]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-predcom.c
generalized IPA predicate on parameter
[thirdparty/gcc.git] / gcc / tree-predcom.c
CommitLineData
ad4a85ad 1/* Predictive commoning.
fbd26352 2 Copyright (C) 2005-2019 Free Software Foundation, Inc.
48e1416a 3
ad4a85ad 4This file is part of GCC.
48e1416a 5
ad4a85ad 6GCC is free software; you can redistribute it and/or modify it
7under the terms of the GNU General Public License as published by the
8c4c00c1 8Free Software Foundation; either version 3, or (at your option) any
ad4a85ad 9later version.
48e1416a 10
ad4a85ad 11GCC is distributed in the hope that it will be useful, but WITHOUT
12ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14for more details.
48e1416a 15
ad4a85ad 16You should have received a copy of the GNU General Public License
8c4c00c1 17along with GCC; see the file COPYING3. If not see
18<http://www.gnu.org/licenses/>. */
ad4a85ad 19
20/* This file implements the predictive commoning optimization. Predictive
21 commoning can be viewed as CSE around a loop, and with some improvements,
22 as generalized strength reduction-- i.e., reusing values computed in
23 earlier iterations of a loop in the later ones. So far, the pass only
24 handles the most useful case, that is, reusing values of memory references.
25 If you think this is all just a special case of PRE, you are sort of right;
26 however, concentrating on loops is simpler, and makes it possible to
27 incorporate data dependence analysis to detect the opportunities, perform
28 loop unrolling to avoid copies together with renaming immediately,
29 and if needed, we could also take register pressure into account.
30
31 Let us demonstrate what is done on an example:
48e1416a 32
ad4a85ad 33 for (i = 0; i < 100; i++)
34 {
35 a[i+2] = a[i] + a[i+1];
36 b[10] = b[10] + i;
37 c[i] = c[99 - i];
38 d[i] = d[i + 1];
39 }
40
41 1) We find data references in the loop, and split them to mutually
42 independent groups (i.e., we find components of a data dependence
43 graph). We ignore read-read dependences whose distance is not constant.
44 (TODO -- we could also ignore antidependences). In this example, we
45 find the following groups:
46
47 a[i]{read}, a[i+1]{read}, a[i+2]{write}
48 b[10]{read}, b[10]{write}
49 c[99 - i]{read}, c[i]{write}
50 d[i + 1]{read}, d[i]{write}
51
52 2) Inside each of the group, we verify several conditions:
53 a) all the references must differ in indices only, and the indices
54 must all have the same step
55 b) the references must dominate loop latch (and thus, they must be
56 ordered by dominance relation).
57 c) the distance of the indices must be a small multiple of the step
58 We are then able to compute the difference of the references (# of
59 iterations before they point to the same place as the first of them).
60 Also, in case there are writes in the loop, we split the groups into
61 chains whose head is the write whose values are used by the reads in
62 the same chain. The chains are then processed independently,
63 making the further transformations simpler. Also, the shorter chains
64 need the same number of registers, but may require lower unrolling
65 factor in order to get rid of the copies on the loop latch.
48e1416a 66
ad4a85ad 67 In our example, we get the following chains (the chain for c is invalid).
68
69 a[i]{read,+0}, a[i+1]{read,-1}, a[i+2]{write,-2}
70 b[10]{read,+0}, b[10]{write,+0}
71 d[i + 1]{read,+0}, d[i]{write,+1}
72
73 3) For each read, we determine the read or write whose value it reuses,
74 together with the distance of this reuse. I.e. we take the last
75 reference before it with distance 0, or the last of the references
76 with the smallest positive distance to the read. Then, we remove
77 the references that are not used in any of these chains, discard the
78 empty groups, and propagate all the links so that they point to the
48e1416a 79 single root reference of the chain (adjusting their distance
ad4a85ad 80 appropriately). Some extra care needs to be taken for references with
81 step 0. In our example (the numbers indicate the distance of the
82 reuse),
83
84 a[i] --> (*) 2, a[i+1] --> (*) 1, a[i+2] (*)
85 b[10] --> (*) 1, b[10] (*)
86
87 4) The chains are combined together if possible. If the corresponding
88 elements of two chains are always combined together with the same
89 operator, we remember just the result of this combination, instead
90 of remembering the values separately. We may need to perform
91 reassociation to enable combining, for example
92
93 e[i] + f[i+1] + e[i+1] + f[i]
94
95 can be reassociated as
96
97 (e[i] + f[i]) + (e[i+1] + f[i+1])
98
99 and we can combine the chains for e and f into one chain.
100
101 5) For each root reference (end of the chain) R, let N be maximum distance
9d75589a 102 of a reference reusing its value. Variables R0 up to RN are created,
ad4a85ad 103 together with phi nodes that transfer values from R1 .. RN to
104 R0 .. R(N-1).
105 Initial values are loaded to R0..R(N-1) (in case not all references
106 must necessarily be accessed and they may trap, we may fail here;
107 TODO sometimes, the loads could be guarded by a check for the number
108 of iterations). Values loaded/stored in roots are also copied to
109 RN. Other reads are replaced with the appropriate variable Ri.
110 Everything is put to SSA form.
111
112 As a small improvement, if R0 is dead after the root (i.e., all uses of
113 the value with the maximum distance dominate the root), we can avoid
114 creating RN and use R0 instead of it.
115
116 In our example, we get (only the parts concerning a and b are shown):
117 for (i = 0; i < 100; i++)
118 {
119 f = phi (a[0], s);
120 s = phi (a[1], f);
121 x = phi (b[10], x);
122
123 f = f + s;
124 a[i+2] = f;
125 x = x + i;
126 b[10] = x;
127 }
128
129 6) Factor F for unrolling is determined as the smallest common multiple of
130 (N + 1) for each root reference (N for references for that we avoided
131 creating RN). If F and the loop is small enough, loop is unrolled F
132 times. The stores to RN (R0) in the copies of the loop body are
133 periodically replaced with R0, R1, ... (R1, R2, ...), so that they can
134 be coalesced and the copies can be eliminated.
48e1416a 135
ad4a85ad 136 TODO -- copy propagation and other optimizations may change the live
137 ranges of the temporary registers and prevent them from being coalesced;
138 this may increase the register pressure.
139
140 In our case, F = 2 and the (main loop of the) result is
141
142 for (i = 0; i < ...; i += 2)
143 {
144 f = phi (a[0], f);
145 s = phi (a[1], s);
146 x = phi (b[10], x);
147
148 f = f + s;
149 a[i+2] = f;
150 x = x + i;
151 b[10] = x;
152
153 s = s + f;
154 a[i+3] = s;
155 x = x + i;
156 b[10] = x;
157 }
158
a652753d 159 Apart from predictive commoning on Load-Load and Store-Load chains, we
160 also support Store-Store chains -- stores killed by other store can be
161 eliminated. Given below example:
162
163 for (i = 0; i < n; i++)
164 {
165 a[i] = 1;
166 a[i+2] = 2;
167 }
ad4a85ad 168
a652753d 169 It can be replaced with:
ad4a85ad 170
a652753d 171 t0 = a[0];
172 t1 = a[1];
173 for (i = 0; i < n; i++)
174 {
175 a[i] = 1;
176 t2 = 2;
177 t0 = t1;
178 t1 = t2;
179 }
180 a[n] = t0;
181 a[n+1] = t1;
ad4a85ad 182
a652753d 183 If the loop runs more than 1 iterations, it can be further simplified into:
184
185 for (i = 0; i < n; i++)
186 {
187 a[i] = 1;
188 }
189 a[n] = 2;
190 a[n+1] = 2;
191
192 The interesting part is this can be viewed either as general store motion
193 or general dead store elimination in either intra/inter-iterations way.
194
83c06cf0 195 With trivial effort, we also support load inside Store-Store chains if the
196 load is dominated by a store statement in the same iteration of loop. You
197 can see this as a restricted Store-Mixed-Load-Store chain.
198
a652753d 199 TODO: For now, we don't support store-store chains in multi-exit loops. We
200 force to not unroll in case of store-store chain even if other chains might
201 ask for unroll.
ad4a85ad 202
203 Predictive commoning can be generalized for arbitrary computations (not
204 just memory loads), and also nontrivial transfer functions (e.g., replacing
205 i * i with ii_last + 2 * i + 1), to generalize strength reduction. */
206
207#include "config.h"
208#include "system.h"
209#include "coretypes.h"
9ef16211 210#include "backend.h"
7c29e30e 211#include "rtl.h"
ad4a85ad 212#include "tree.h"
9ef16211 213#include "gimple.h"
7c29e30e 214#include "predict.h"
215#include "tree-pass.h"
9ef16211 216#include "ssa.h"
7c29e30e 217#include "gimple-pretty-print.h"
9ef16211 218#include "alias.h"
b20a8bb4 219#include "fold-const.h"
ad4a85ad 220#include "cfgloop.h"
bc61cadb 221#include "tree-eh.h"
a8783bee 222#include "gimplify.h"
dcf1a1ec 223#include "gimple-iterator.h"
e795d6e1 224#include "gimplify-me.h"
05d9c18a 225#include "tree-ssa-loop-ivopts.h"
226#include "tree-ssa-loop-manip.h"
227#include "tree-ssa-loop-niter.h"
073c1fd5 228#include "tree-ssa-loop.h"
229#include "tree-into-ssa.h"
230#include "tree-dfa.h"
69ee5dbb 231#include "tree-ssa.h"
ad4a85ad 232#include "tree-data-ref.h"
233#include "tree-scalar-evolution.h"
ad4a85ad 234#include "params.h"
ad4a85ad 235#include "tree-affine.h"
40395983 236#include "builtins.h"
ad4a85ad 237
238/* The maximum number of iterations between the considered memory
239 references. */
240
241#define MAX_DISTANCE (target_avail_regs < 16 ? 4 : 8)
48e1416a 242
75a70cf9 243/* Data references (or phi nodes that carry data reference values across
244 loop iterations). */
ad4a85ad 245
251317e4 246typedef class dref_d
ad4a85ad 247{
251317e4 248public:
ad4a85ad 249 /* The reference itself. */
250 struct data_reference *ref;
251
252 /* The statement in that the reference appears. */
42acab1c 253 gimple *stmt;
75a70cf9 254
255 /* In case that STMT is a phi node, this field is set to the SSA name
256 defined by it in replace_phis_by_defined_names (in order to avoid
257 pointing to phi node that got reallocated in the meantime). */
258 tree name_defined_by_phi;
ad4a85ad 259
260 /* Distance of the reference from the root of the chain (in number of
261 iterations of the loop). */
262 unsigned distance;
263
264 /* Number of iterations offset from the first reference in the component. */
5de9d3ed 265 widest_int offset;
ad4a85ad 266
267 /* Number of the reference in a component, in dominance ordering. */
268 unsigned pos;
269
270 /* True if the memory reference is always accessed when the loop is
271 entered. */
272 unsigned always_accessed : 1;
273} *dref;
274
ad4a85ad 275
276/* Type of the chain of the references. */
277
278enum chain_type
279{
280 /* The addresses of the references in the chain are constant. */
281 CT_INVARIANT,
282
283 /* There are only loads in the chain. */
284 CT_LOAD,
285
286 /* Root of the chain is store, the rest are loads. */
287 CT_STORE_LOAD,
288
a652753d 289 /* There are only stores in the chain. */
290 CT_STORE_STORE,
291
ad4a85ad 292 /* A combination of two chains. */
293 CT_COMBINATION
294};
295
296/* Chains of data references. */
297
298typedef struct chain
299{
300 /* Type of the chain. */
301 enum chain_type type;
302
303 /* For combination chains, the operator and the two chains that are
304 combined, and the type of the result. */
f4e36c33 305 enum tree_code op;
ad4a85ad 306 tree rslt_type;
307 struct chain *ch1, *ch2;
308
309 /* The references in the chain. */
f1f41a6c 310 vec<dref> refs;
ad4a85ad 311
312 /* The maximum distance of the reference in the chain from the root. */
313 unsigned length;
314
315 /* The variables used to copy the value throughout iterations. */
f1f41a6c 316 vec<tree> vars;
ad4a85ad 317
318 /* Initializers for the variables. */
f1f41a6c 319 vec<tree> inits;
ad4a85ad 320
a652753d 321 /* Finalizers for the eliminated stores. */
322 vec<tree> finis;
323
a481acc5 324 /* gimple stmts intializing the initial variables of the chain. */
325 gimple_seq init_seq;
326
a652753d 327 /* gimple stmts finalizing the eliminated stores of the chain. */
328 gimple_seq fini_seq;
329
ad4a85ad 330 /* True if there is a use of a variable with the maximal distance
331 that comes after the root in the loop. */
332 unsigned has_max_use_after : 1;
333
334 /* True if all the memory references in the chain are always accessed. */
335 unsigned all_always_accessed : 1;
336
337 /* True if this chain was combined together with some other chain. */
338 unsigned combined : 1;
e33b3a13 339
340 /* True if this is store elimination chain and eliminated stores store
341 loop invariant value into memory. */
342 unsigned inv_store_elimination : 1;
ad4a85ad 343} *chain_p;
344
ad4a85ad 345
346/* Describes the knowledge about the step of the memory references in
347 the component. */
348
349enum ref_step_type
350{
351 /* The step is zero. */
352 RS_INVARIANT,
353
354 /* The step is nonzero. */
355 RS_NONZERO,
356
357 /* The step may or may not be nonzero. */
358 RS_ANY
359};
360
361/* Components of the data dependence graph. */
362
363struct component
364{
365 /* The references in the component. */
f1f41a6c 366 vec<dref> refs;
ad4a85ad 367
368 /* What we know about the step of the references in the component. */
369 enum ref_step_type comp_step;
370
a652753d 371 /* True if all references in component are stores and we try to do
372 intra/inter loop iteration dead store elimination. */
373 bool eliminate_store_p;
374
ad4a85ad 375 /* Next component in the list. */
376 struct component *next;
377};
378
379/* Bitmap of ssa names defined by looparound phi nodes covered by chains. */
380
381static bitmap looparound_phis;
382
383/* Cache used by tree_to_aff_combination_expand. */
384
5f8841a5 385static hash_map<tree, name_expansion *> *name_expansions;
ad4a85ad 386
387/* Dumps data reference REF to FILE. */
388
389extern void dump_dref (FILE *, dref);
390void
391dump_dref (FILE *file, dref ref)
392{
393 if (ref->ref)
394 {
395 fprintf (file, " ");
396 print_generic_expr (file, DR_REF (ref->ref), TDF_SLIM);
397 fprintf (file, " (id %u%s)\n", ref->pos,
398 DR_IS_READ (ref->ref) ? "" : ", write");
399
400 fprintf (file, " offset ");
e913b5cd 401 print_decs (ref->offset, file);
ad4a85ad 402 fprintf (file, "\n");
403
404 fprintf (file, " distance %u\n", ref->distance);
405 }
406 else
407 {
75a70cf9 408 if (gimple_code (ref->stmt) == GIMPLE_PHI)
ad4a85ad 409 fprintf (file, " looparound ref\n");
410 else
411 fprintf (file, " combination ref\n");
412 fprintf (file, " in statement ");
75a70cf9 413 print_gimple_stmt (file, ref->stmt, 0, TDF_SLIM);
ad4a85ad 414 fprintf (file, "\n");
415 fprintf (file, " distance %u\n", ref->distance);
416 }
417
418}
419
420/* Dumps CHAIN to FILE. */
421
422extern void dump_chain (FILE *, chain_p);
423void
424dump_chain (FILE *file, chain_p chain)
425{
426 dref a;
427 const char *chain_type;
428 unsigned i;
429 tree var;
430
431 switch (chain->type)
432 {
433 case CT_INVARIANT:
434 chain_type = "Load motion";
435 break;
436
437 case CT_LOAD:
438 chain_type = "Loads-only";
439 break;
440
441 case CT_STORE_LOAD:
442 chain_type = "Store-loads";
443 break;
444
a652753d 445 case CT_STORE_STORE:
446 chain_type = "Store-stores";
447 break;
448
ad4a85ad 449 case CT_COMBINATION:
450 chain_type = "Combination";
451 break;
452
453 default:
454 gcc_unreachable ();
455 }
456
457 fprintf (file, "%s chain %p%s\n", chain_type, (void *) chain,
458 chain->combined ? " (combined)" : "");
459 if (chain->type != CT_INVARIANT)
460 fprintf (file, " max distance %u%s\n", chain->length,
461 chain->has_max_use_after ? "" : ", may reuse first");
462
463 if (chain->type == CT_COMBINATION)
464 {
465 fprintf (file, " equal to %p %s %p in type ",
f4e36c33 466 (void *) chain->ch1, op_symbol_code (chain->op),
ad4a85ad 467 (void *) chain->ch2);
468 print_generic_expr (file, chain->rslt_type, TDF_SLIM);
469 fprintf (file, "\n");
470 }
471
f1f41a6c 472 if (chain->vars.exists ())
ad4a85ad 473 {
474 fprintf (file, " vars");
f1f41a6c 475 FOR_EACH_VEC_ELT (chain->vars, i, var)
ad4a85ad 476 {
477 fprintf (file, " ");
478 print_generic_expr (file, var, TDF_SLIM);
479 }
480 fprintf (file, "\n");
481 }
482
f1f41a6c 483 if (chain->inits.exists ())
ad4a85ad 484 {
485 fprintf (file, " inits");
f1f41a6c 486 FOR_EACH_VEC_ELT (chain->inits, i, var)
ad4a85ad 487 {
488 fprintf (file, " ");
489 print_generic_expr (file, var, TDF_SLIM);
490 }
491 fprintf (file, "\n");
492 }
493
494 fprintf (file, " references:\n");
f1f41a6c 495 FOR_EACH_VEC_ELT (chain->refs, i, a)
ad4a85ad 496 dump_dref (file, a);
497
498 fprintf (file, "\n");
499}
500
501/* Dumps CHAINS to FILE. */
502
f1f41a6c 503extern void dump_chains (FILE *, vec<chain_p> );
ad4a85ad 504void
f1f41a6c 505dump_chains (FILE *file, vec<chain_p> chains)
ad4a85ad 506{
507 chain_p chain;
508 unsigned i;
509
f1f41a6c 510 FOR_EACH_VEC_ELT (chains, i, chain)
ad4a85ad 511 dump_chain (file, chain);
512}
513
514/* Dumps COMP to FILE. */
515
516extern void dump_component (FILE *, struct component *);
517void
518dump_component (FILE *file, struct component *comp)
519{
520 dref a;
521 unsigned i;
522
523 fprintf (file, "Component%s:\n",
524 comp->comp_step == RS_INVARIANT ? " (invariant)" : "");
f1f41a6c 525 FOR_EACH_VEC_ELT (comp->refs, i, a)
ad4a85ad 526 dump_dref (file, a);
527 fprintf (file, "\n");
528}
529
530/* Dumps COMPS to FILE. */
531
532extern void dump_components (FILE *, struct component *);
533void
534dump_components (FILE *file, struct component *comps)
535{
536 struct component *comp;
537
538 for (comp = comps; comp; comp = comp->next)
539 dump_component (file, comp);
540}
541
542/* Frees a chain CHAIN. */
543
544static void
545release_chain (chain_p chain)
546{
547 dref ref;
548 unsigned i;
549
550 if (chain == NULL)
551 return;
552
f1f41a6c 553 FOR_EACH_VEC_ELT (chain->refs, i, ref)
ad4a85ad 554 free (ref);
555
f1f41a6c 556 chain->refs.release ();
557 chain->vars.release ();
558 chain->inits.release ();
a481acc5 559 if (chain->init_seq)
560 gimple_seq_discard (chain->init_seq);
ad4a85ad 561
a652753d 562 chain->finis.release ();
563 if (chain->fini_seq)
564 gimple_seq_discard (chain->fini_seq);
565
ad4a85ad 566 free (chain);
567}
568
569/* Frees CHAINS. */
570
571static void
f1f41a6c 572release_chains (vec<chain_p> chains)
ad4a85ad 573{
574 unsigned i;
575 chain_p chain;
576
f1f41a6c 577 FOR_EACH_VEC_ELT (chains, i, chain)
ad4a85ad 578 release_chain (chain);
f1f41a6c 579 chains.release ();
ad4a85ad 580}
581
582/* Frees a component COMP. */
583
584static void
585release_component (struct component *comp)
586{
f1f41a6c 587 comp->refs.release ();
ad4a85ad 588 free (comp);
589}
590
591/* Frees list of components COMPS. */
592
593static void
594release_components (struct component *comps)
595{
596 struct component *act, *next;
597
598 for (act = comps; act; act = next)
599 {
600 next = act->next;
601 release_component (act);
602 }
603}
604
605/* Finds a root of tree given by FATHERS containing A, and performs path
606 shortening. */
607
608static unsigned
609component_of (unsigned fathers[], unsigned a)
610{
611 unsigned root, n;
612
613 for (root = a; root != fathers[root]; root = fathers[root])
614 continue;
615
616 for (; a != root; a = n)
617 {
618 n = fathers[a];
619 fathers[a] = root;
620 }
621
622 return root;
623}
624
625/* Join operation for DFU. FATHERS gives the tree, SIZES are sizes of the
626 components, A and B are components to merge. */
627
628static void
629merge_comps (unsigned fathers[], unsigned sizes[], unsigned a, unsigned b)
630{
631 unsigned ca = component_of (fathers, a);
632 unsigned cb = component_of (fathers, b);
633
634 if (ca == cb)
635 return;
636
637 if (sizes[ca] < sizes[cb])
638 {
639 sizes[cb] += sizes[ca];
640 fathers[ca] = cb;
641 }
642 else
643 {
644 sizes[ca] += sizes[cb];
645 fathers[cb] = ca;
646 }
647}
648
649/* Returns true if A is a reference that is suitable for predictive commoning
650 in the innermost loop that contains it. REF_STEP is set according to the
651 step of the reference A. */
652
653static bool
654suitable_reference_p (struct data_reference *a, enum ref_step_type *ref_step)
655{
656 tree ref = DR_REF (a), step = DR_STEP (a);
657
658 if (!step
450c0971 659 || TREE_THIS_VOLATILE (ref)
154edec0 660 || !is_gimple_reg_type (TREE_TYPE (ref))
661 || tree_could_throw_p (ref))
ad4a85ad 662 return false;
663
664 if (integer_zerop (step))
665 *ref_step = RS_INVARIANT;
666 else if (integer_nonzerop (step))
667 *ref_step = RS_NONZERO;
668 else
669 *ref_step = RS_ANY;
670
671 return true;
672}
673
674/* Stores DR_OFFSET (DR) + DR_INIT (DR) to OFFSET. */
675
676static void
677aff_combination_dr_offset (struct data_reference *dr, aff_tree *offset)
678{
a0553bff 679 tree type = TREE_TYPE (DR_OFFSET (dr));
ad4a85ad 680 aff_tree delta;
681
a0553bff 682 tree_to_aff_combination_expand (DR_OFFSET (dr), type, offset,
ad4a85ad 683 &name_expansions);
c4d25d8a 684 aff_combination_const (&delta, type, wi::to_poly_widest (DR_INIT (dr)));
ad4a85ad 685 aff_combination_add (offset, &delta);
686}
687
688/* Determines number of iterations of the innermost enclosing loop before B
689 refers to exactly the same location as A and stores it to OFF. If A and
690 B do not have the same step, they never meet, or anything else fails,
691 returns false, otherwise returns true. Both A and B are assumed to
692 satisfy suitable_reference_p. */
693
694static bool
695determine_offset (struct data_reference *a, struct data_reference *b,
1aeea61f 696 poly_widest_int *off)
ad4a85ad 697{
698 aff_tree diff, baseb, step;
ca4882a2 699 tree typea, typeb;
700
701 /* Check that both the references access the location in the same type. */
702 typea = TREE_TYPE (DR_REF (a));
703 typeb = TREE_TYPE (DR_REF (b));
548044d8 704 if (!useless_type_conversion_p (typeb, typea))
ca4882a2 705 return false;
ad4a85ad 706
707 /* Check whether the base address and the step of both references is the
708 same. */
709 if (!operand_equal_p (DR_STEP (a), DR_STEP (b), 0)
710 || !operand_equal_p (DR_BASE_ADDRESS (a), DR_BASE_ADDRESS (b), 0))
711 return false;
712
713 if (integer_zerop (DR_STEP (a)))
714 {
715 /* If the references have loop invariant address, check that they access
716 exactly the same location. */
e913b5cd 717 *off = 0;
ad4a85ad 718 return (operand_equal_p (DR_OFFSET (a), DR_OFFSET (b), 0)
719 && operand_equal_p (DR_INIT (a), DR_INIT (b), 0));
720 }
721
722 /* Compare the offsets of the addresses, and check whether the difference
723 is a multiple of step. */
724 aff_combination_dr_offset (a, &diff);
725 aff_combination_dr_offset (b, &baseb);
e913b5cd 726 aff_combination_scale (&baseb, -1);
ad4a85ad 727 aff_combination_add (&diff, &baseb);
728
a0553bff 729 tree_to_aff_combination_expand (DR_STEP (a), TREE_TYPE (DR_STEP (a)),
ad4a85ad 730 &step, &name_expansions);
731 return aff_combination_constant_multiple_p (&diff, &step, off);
732}
733
734/* Returns the last basic block in LOOP for that we are sure that
735 it is executed whenever the loop is entered. */
736
737static basic_block
2e966e2a 738last_always_executed_block (class loop *loop)
ad4a85ad 739{
740 unsigned i;
f1f41a6c 741 vec<edge> exits = get_loop_exit_edges (loop);
ad4a85ad 742 edge ex;
743 basic_block last = loop->latch;
744
f1f41a6c 745 FOR_EACH_VEC_ELT (exits, i, ex)
ad4a85ad 746 last = nearest_common_dominator (CDI_DOMINATORS, last, ex->src);
f1f41a6c 747 exits.release ();
ad4a85ad 748
749 return last;
750}
751
752/* Splits dependence graph on DATAREFS described by DEPENDS to components. */
753
754static struct component *
2e966e2a 755split_data_refs_to_components (class loop *loop,
f1f41a6c 756 vec<data_reference_p> datarefs,
757 vec<ddr_p> depends)
ad4a85ad 758{
f1f41a6c 759 unsigned i, n = datarefs.length ();
ad4a85ad 760 unsigned ca, ia, ib, bad;
761 unsigned *comp_father = XNEWVEC (unsigned, n + 1);
762 unsigned *comp_size = XNEWVEC (unsigned, n + 1);
763 struct component **comps;
764 struct data_reference *dr, *dra, *drb;
765 struct data_dependence_relation *ddr;
766 struct component *comp_list = NULL, *comp;
767 dref dataref;
a652753d 768 /* Don't do store elimination if loop has multiple exit edges. */
769 bool eliminate_store_p = single_exit (loop) != NULL;
ad4a85ad 770 basic_block last_always_executed = last_always_executed_block (loop);
48e1416a 771
f1f41a6c 772 FOR_EACH_VEC_ELT (datarefs, i, dr)
ad4a85ad 773 {
774 if (!DR_REF (dr))
775 {
776 /* A fake reference for call or asm_expr that may clobber memory;
777 just fail. */
778 goto end;
779 }
c71d3c24 780 /* predcom pass isn't prepared to handle calls with data references. */
781 if (is_gimple_call (DR_STMT (dr)))
782 goto end;
5c205353 783 dr->aux = (void *) (size_t) i;
ad4a85ad 784 comp_father[i] = i;
785 comp_size[i] = 1;
786 }
787
788 /* A component reserved for the "bad" data references. */
789 comp_father[n] = n;
790 comp_size[n] = 1;
791
f1f41a6c 792 FOR_EACH_VEC_ELT (datarefs, i, dr)
ad4a85ad 793 {
794 enum ref_step_type dummy;
795
796 if (!suitable_reference_p (dr, &dummy))
797 {
5c205353 798 ia = (unsigned) (size_t) dr->aux;
ad4a85ad 799 merge_comps (comp_father, comp_size, n, ia);
800 }
801 }
802
f1f41a6c 803 FOR_EACH_VEC_ELT (depends, i, ddr)
ad4a85ad 804 {
1aeea61f 805 poly_widest_int dummy_off;
ad4a85ad 806
807 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
808 continue;
809
810 dra = DDR_A (ddr);
811 drb = DDR_B (ddr);
a652753d 812
813 /* Don't do store elimination if there is any unknown dependence for
814 any store data reference. */
815 if ((DR_IS_WRITE (dra) || DR_IS_WRITE (drb))
816 && (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know
817 || DDR_NUM_DIST_VECTS (ddr) == 0))
818 eliminate_store_p = false;
819
5c205353 820 ia = component_of (comp_father, (unsigned) (size_t) dra->aux);
821 ib = component_of (comp_father, (unsigned) (size_t) drb->aux);
ad4a85ad 822 if (ia == ib)
823 continue;
824
825 bad = component_of (comp_father, n);
826
827 /* If both A and B are reads, we may ignore unsuitable dependences. */
19f2ced2 828 if (DR_IS_READ (dra) && DR_IS_READ (drb))
829 {
830 if (ia == bad || ib == bad
831 || !determine_offset (dra, drb, &dummy_off))
832 continue;
833 }
834 /* If A is read and B write or vice versa and there is unsuitable
835 dependence, instead of merging both components into a component
836 that will certainly not pass suitable_component_p, just put the
837 read into bad component, perhaps at least the write together with
838 all the other data refs in it's component will be optimizable. */
839 else if (DR_IS_READ (dra) && ib != bad)
840 {
841 if (ia == bad)
842 continue;
843 else if (!determine_offset (dra, drb, &dummy_off))
844 {
845 merge_comps (comp_father, comp_size, bad, ia);
846 continue;
847 }
848 }
849 else if (DR_IS_READ (drb) && ia != bad)
850 {
851 if (ib == bad)
852 continue;
853 else if (!determine_offset (dra, drb, &dummy_off))
854 {
855 merge_comps (comp_father, comp_size, bad, ib);
856 continue;
857 }
858 }
a652753d 859 else if (DR_IS_WRITE (dra) && DR_IS_WRITE (drb)
860 && ia != bad && ib != bad
861 && !determine_offset (dra, drb, &dummy_off))
862 {
863 merge_comps (comp_father, comp_size, bad, ia);
864 merge_comps (comp_father, comp_size, bad, ib);
865 continue;
866 }
48e1416a 867
ad4a85ad 868 merge_comps (comp_father, comp_size, ia, ib);
869 }
870
a652753d 871 if (eliminate_store_p)
872 {
873 tree niters = number_of_latch_executions (loop);
874
875 /* Don't do store elimination if niters info is unknown because stores
876 in the last iteration can't be eliminated and we need to recover it
877 after loop. */
878 eliminate_store_p = (niters != NULL_TREE && niters != chrec_dont_know);
879 }
880
ad4a85ad 881 comps = XCNEWVEC (struct component *, n);
882 bad = component_of (comp_father, n);
f1f41a6c 883 FOR_EACH_VEC_ELT (datarefs, i, dr)
ad4a85ad 884 {
5c205353 885 ia = (unsigned) (size_t) dr->aux;
ad4a85ad 886 ca = component_of (comp_father, ia);
887 if (ca == bad)
888 continue;
889
890 comp = comps[ca];
891 if (!comp)
892 {
893 comp = XCNEW (struct component);
f1f41a6c 894 comp->refs.create (comp_size[ca]);
a652753d 895 comp->eliminate_store_p = eliminate_store_p;
ad4a85ad 896 comps[ca] = comp;
897 }
898
2e966e2a 899 dataref = XCNEW (class dref_d);
ad4a85ad 900 dataref->ref = dr;
901 dataref->stmt = DR_STMT (dr);
e913b5cd 902 dataref->offset = 0;
ad4a85ad 903 dataref->distance = 0;
904
905 dataref->always_accessed
906 = dominated_by_p (CDI_DOMINATORS, last_always_executed,
75a70cf9 907 gimple_bb (dataref->stmt));
f1f41a6c 908 dataref->pos = comp->refs.length ();
909 comp->refs.quick_push (dataref);
ad4a85ad 910 }
911
912 for (i = 0; i < n; i++)
913 {
914 comp = comps[i];
915 if (comp)
916 {
917 comp->next = comp_list;
918 comp_list = comp;
919 }
920 }
921 free (comps);
922
923end:
924 free (comp_father);
925 free (comp_size);
926 return comp_list;
927}
928
929/* Returns true if the component COMP satisfies the conditions
310d2511 930 described in 2) at the beginning of this file. LOOP is the current
ad4a85ad 931 loop. */
48e1416a 932
ad4a85ad 933static bool
2e966e2a 934suitable_component_p (class loop *loop, struct component *comp)
ad4a85ad 935{
936 unsigned i;
937 dref a, first;
938 basic_block ba, bp = loop->header;
939 bool ok, has_write = false;
940
f1f41a6c 941 FOR_EACH_VEC_ELT (comp->refs, i, a)
ad4a85ad 942 {
75a70cf9 943 ba = gimple_bb (a->stmt);
ad4a85ad 944
945 if (!just_once_each_iteration_p (loop, ba))
946 return false;
947
948 gcc_assert (dominated_by_p (CDI_DOMINATORS, ba, bp));
949 bp = ba;
950
9ff25603 951 if (DR_IS_WRITE (a->ref))
ad4a85ad 952 has_write = true;
953 }
954
f1f41a6c 955 first = comp->refs[0];
ad4a85ad 956 ok = suitable_reference_p (first->ref, &comp->comp_step);
957 gcc_assert (ok);
e913b5cd 958 first->offset = 0;
ad4a85ad 959
f1f41a6c 960 for (i = 1; comp->refs.iterate (i, &a); i++)
ad4a85ad 961 {
1aeea61f 962 /* Polynomial offsets are no use, since we need to know the
963 gap between iteration numbers at compile time. */
964 poly_widest_int offset;
965 if (!determine_offset (first->ref, a->ref, &offset)
966 || !offset.is_constant (&a->offset))
ad4a85ad 967 return false;
968
382ecba7 969 enum ref_step_type a_step;
970 gcc_checking_assert (suitable_reference_p (a->ref, &a_step)
971 && a_step == comp->comp_step);
ad4a85ad 972 }
973
974 /* If there is a write inside the component, we must know whether the
975 step is nonzero or not -- we would not otherwise be able to recognize
976 whether the value accessed by reads comes from the OFFSET-th iteration
977 or the previous one. */
978 if (has_write && comp->comp_step == RS_ANY)
979 return false;
980
981 return true;
982}
48e1416a 983
ad4a85ad 984/* Check the conditions on references inside each of components COMPS,
985 and remove the unsuitable components from the list. The new list
986 of components is returned. The conditions are described in 2) at
310d2511 987 the beginning of this file. LOOP is the current loop. */
ad4a85ad 988
989static struct component *
2e966e2a 990filter_suitable_components (class loop *loop, struct component *comps)
ad4a85ad 991{
992 struct component **comp, *act;
993
994 for (comp = &comps; *comp; )
995 {
996 act = *comp;
997 if (suitable_component_p (loop, act))
998 comp = &act->next;
999 else
1000 {
19af51e2 1001 dref ref;
1002 unsigned i;
1003
ad4a85ad 1004 *comp = act->next;
f1f41a6c 1005 FOR_EACH_VEC_ELT (act->refs, i, ref)
19af51e2 1006 free (ref);
ad4a85ad 1007 release_component (act);
1008 }
1009 }
1010
1011 return comps;
1012}
1013
1014/* Compares two drefs A and B by their offset and position. Callback for
1015 qsort. */
1016
1017static int
1018order_drefs (const void *a, const void *b)
1019{
45ba1503 1020 const dref *const da = (const dref *) a;
1021 const dref *const db = (const dref *) b;
796b6678 1022 int offcmp = wi::cmps ((*da)->offset, (*db)->offset);
ad4a85ad 1023
1024 if (offcmp != 0)
1025 return offcmp;
1026
1027 return (*da)->pos - (*db)->pos;
1028}
1029
e02e28c4 1030/* Compares two drefs A and B by their position. Callback for qsort. */
1031
1032static int
1033order_drefs_by_pos (const void *a, const void *b)
1034{
1035 const dref *const da = (const dref *) a;
1036 const dref *const db = (const dref *) b;
1037
1038 return (*da)->pos - (*db)->pos;
1039}
1040
ad4a85ad 1041/* Returns root of the CHAIN. */
1042
1043static inline dref
1044get_chain_root (chain_p chain)
1045{
f1f41a6c 1046 return chain->refs[0];
ad4a85ad 1047}
1048
83c06cf0 1049/* Given CHAIN, returns the last write ref at DISTANCE, or NULL if it doesn't
a652753d 1050 exist. */
1051
1052static inline dref
83c06cf0 1053get_chain_last_write_at (chain_p chain, unsigned distance)
a652753d 1054{
83c06cf0 1055 for (unsigned i = chain->refs.length (); i > 0; i--)
1056 if (DR_IS_WRITE (chain->refs[i - 1]->ref)
1057 && distance == chain->refs[i - 1]->distance)
1058 return chain->refs[i - 1];
a652753d 1059
83c06cf0 1060 return NULL;
1061}
1062
1063/* Given CHAIN, returns the last write ref with the same distance before load
1064 at index LOAD_IDX, or NULL if it doesn't exist. */
1065
1066static inline dref
1067get_chain_last_write_before_load (chain_p chain, unsigned load_idx)
1068{
1069 gcc_assert (load_idx < chain->refs.length ());
1070
1071 unsigned distance = chain->refs[load_idx]->distance;
a652753d 1072
83c06cf0 1073 for (unsigned i = load_idx; i > 0; i--)
1074 if (DR_IS_WRITE (chain->refs[i - 1]->ref)
1075 && distance == chain->refs[i - 1]->distance)
1076 return chain->refs[i - 1];
1077
1078 return NULL;
a652753d 1079}
1080
ad4a85ad 1081/* Adds REF to the chain CHAIN. */
1082
1083static void
1084add_ref_to_chain (chain_p chain, dref ref)
1085{
1086 dref root = get_chain_root (chain);
ad4a85ad 1087
796b6678 1088 gcc_assert (wi::les_p (root->offset, ref->offset));
ab2c1de8 1089 widest_int dist = ref->offset - root->offset;
796b6678 1090 gcc_assert (wi::fits_uhwi_p (dist));
ad4a85ad 1091
f1f41a6c 1092 chain->refs.safe_push (ref);
ad4a85ad 1093
cf8f0e63 1094 ref->distance = dist.to_uhwi ();
ad4a85ad 1095
1096 if (ref->distance >= chain->length)
1097 {
1098 chain->length = ref->distance;
1099 chain->has_max_use_after = false;
1100 }
1101
83c06cf0 1102 /* Promote this chain to CT_STORE_STORE if it has multiple stores. */
1103 if (DR_IS_WRITE (ref->ref))
1104 chain->type = CT_STORE_STORE;
1105
c12979d1 1106 /* Don't set the flag for store-store chain since there is no use. */
1107 if (chain->type != CT_STORE_STORE
1108 && ref->distance == chain->length
ad4a85ad 1109 && ref->pos > root->pos)
1110 chain->has_max_use_after = true;
1111
1112 chain->all_always_accessed &= ref->always_accessed;
1113}
1114
1115/* Returns the chain for invariant component COMP. */
1116
1117static chain_p
1118make_invariant_chain (struct component *comp)
1119{
1120 chain_p chain = XCNEW (struct chain);
1121 unsigned i;
1122 dref ref;
1123
1124 chain->type = CT_INVARIANT;
1125
1126 chain->all_always_accessed = true;
1127
f1f41a6c 1128 FOR_EACH_VEC_ELT (comp->refs, i, ref)
ad4a85ad 1129 {
f1f41a6c 1130 chain->refs.safe_push (ref);
ad4a85ad 1131 chain->all_always_accessed &= ref->always_accessed;
1132 }
1133
a652753d 1134 chain->inits = vNULL;
1135 chain->finis = vNULL;
1136
ad4a85ad 1137 return chain;
1138}
1139
a652753d 1140/* Make a new chain of type TYPE rooted at REF. */
ad4a85ad 1141
1142static chain_p
a652753d 1143make_rooted_chain (dref ref, enum chain_type type)
ad4a85ad 1144{
1145 chain_p chain = XCNEW (struct chain);
1146
a652753d 1147 chain->type = type;
f1f41a6c 1148 chain->refs.safe_push (ref);
ad4a85ad 1149 chain->all_always_accessed = ref->always_accessed;
ad4a85ad 1150 ref->distance = 0;
1151
a652753d 1152 chain->inits = vNULL;
1153 chain->finis = vNULL;
1154
ad4a85ad 1155 return chain;
1156}
1157
1158/* Returns true if CHAIN is not trivial. */
1159
1160static bool
1161nontrivial_chain_p (chain_p chain)
1162{
f1f41a6c 1163 return chain != NULL && chain->refs.length () > 1;
ad4a85ad 1164}
1165
1166/* Returns the ssa name that contains the value of REF, or NULL_TREE if there
1167 is no such name. */
1168
1169static tree
1170name_for_ref (dref ref)
1171{
1172 tree name;
1173
75a70cf9 1174 if (is_gimple_assign (ref->stmt))
ad4a85ad 1175 {
1176 if (!ref->ref || DR_IS_READ (ref->ref))
75a70cf9 1177 name = gimple_assign_lhs (ref->stmt);
ad4a85ad 1178 else
75a70cf9 1179 name = gimple_assign_rhs1 (ref->stmt);
ad4a85ad 1180 }
1181 else
1182 name = PHI_RESULT (ref->stmt);
1183
1184 return (TREE_CODE (name) == SSA_NAME ? name : NULL_TREE);
1185}
1186
1187/* Returns true if REF is a valid initializer for ROOT with given DISTANCE (in
1188 iterations of the innermost enclosing loop). */
1189
1190static bool
1191valid_initializer_p (struct data_reference *ref,
1192 unsigned distance, struct data_reference *root)
1193{
1194 aff_tree diff, base, step;
1aeea61f 1195 poly_widest_int off;
ad4a85ad 1196
ad4a85ad 1197 /* Both REF and ROOT must be accessing the same object. */
1198 if (!operand_equal_p (DR_BASE_ADDRESS (ref), DR_BASE_ADDRESS (root), 0))
1199 return false;
1200
1201 /* The initializer is defined outside of loop, hence its address must be
1202 invariant inside the loop. */
1203 gcc_assert (integer_zerop (DR_STEP (ref)));
1204
1205 /* If the address of the reference is invariant, initializer must access
1206 exactly the same location. */
1207 if (integer_zerop (DR_STEP (root)))
1208 return (operand_equal_p (DR_OFFSET (ref), DR_OFFSET (root), 0)
1209 && operand_equal_p (DR_INIT (ref), DR_INIT (root), 0));
1210
1211 /* Verify that this index of REF is equal to the root's index at
1212 -DISTANCE-th iteration. */
1213 aff_combination_dr_offset (root, &diff);
1214 aff_combination_dr_offset (ref, &base);
e913b5cd 1215 aff_combination_scale (&base, -1);
ad4a85ad 1216 aff_combination_add (&diff, &base);
1217
a0553bff 1218 tree_to_aff_combination_expand (DR_STEP (root), TREE_TYPE (DR_STEP (root)),
1219 &step, &name_expansions);
ad4a85ad 1220 if (!aff_combination_constant_multiple_p (&diff, &step, &off))
1221 return false;
1222
1aeea61f 1223 if (maybe_ne (off, distance))
ad4a85ad 1224 return false;
1225
1226 return true;
1227}
1228
1229/* Finds looparound phi node of LOOP that copies the value of REF, and if its
1230 initial value is correct (equal to initial value of REF shifted by one
1231 iteration), returns the phi node. Otherwise, NULL_TREE is returned. ROOT
1232 is the root of the current chain. */
1233
1a91d914 1234static gphi *
2e966e2a 1235find_looparound_phi (class loop *loop, dref ref, dref root)
ad4a85ad 1236{
75a70cf9 1237 tree name, init, init_ref;
1a91d914 1238 gphi *phi = NULL;
42acab1c 1239 gimple *init_stmt;
ad4a85ad 1240 edge latch = loop_latch_edge (loop);
1241 struct data_reference init_dr;
1a91d914 1242 gphi_iterator psi;
ad4a85ad 1243
75a70cf9 1244 if (is_gimple_assign (ref->stmt))
ad4a85ad 1245 {
1246 if (DR_IS_READ (ref->ref))
75a70cf9 1247 name = gimple_assign_lhs (ref->stmt);
ad4a85ad 1248 else
75a70cf9 1249 name = gimple_assign_rhs1 (ref->stmt);
ad4a85ad 1250 }
1251 else
1252 name = PHI_RESULT (ref->stmt);
1253 if (!name)
75a70cf9 1254 return NULL;
ad4a85ad 1255
75a70cf9 1256 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1257 {
1a91d914 1258 phi = psi.phi ();
75a70cf9 1259 if (PHI_ARG_DEF_FROM_EDGE (phi, latch) == name)
1260 break;
1261 }
ad4a85ad 1262
75a70cf9 1263 if (gsi_end_p (psi))
1264 return NULL;
ad4a85ad 1265
1266 init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1267 if (TREE_CODE (init) != SSA_NAME)
75a70cf9 1268 return NULL;
ad4a85ad 1269 init_stmt = SSA_NAME_DEF_STMT (init);
75a70cf9 1270 if (gimple_code (init_stmt) != GIMPLE_ASSIGN)
1271 return NULL;
1272 gcc_assert (gimple_assign_lhs (init_stmt) == init);
ad4a85ad 1273
75a70cf9 1274 init_ref = gimple_assign_rhs1 (init_stmt);
ad4a85ad 1275 if (!REFERENCE_CLASS_P (init_ref)
1276 && !DECL_P (init_ref))
75a70cf9 1277 return NULL;
ad4a85ad 1278
1279 /* Analyze the behavior of INIT_REF with respect to LOOP (innermost
1280 loop enclosing PHI). */
1281 memset (&init_dr, 0, sizeof (struct data_reference));
1282 DR_REF (&init_dr) = init_ref;
1283 DR_STMT (&init_dr) = phi;
ed9370cc 1284 if (!dr_analyze_innermost (&DR_INNERMOST (&init_dr), init_ref, loop,
1285 init_stmt))
880734c8 1286 return NULL;
ad4a85ad 1287
1288 if (!valid_initializer_p (&init_dr, ref->distance + 1, root->ref))
75a70cf9 1289 return NULL;
ad4a85ad 1290
1291 return phi;
1292}
1293
1294/* Adds a reference for the looparound copy of REF in PHI to CHAIN. */
1295
1296static void
1a91d914 1297insert_looparound_copy (chain_p chain, dref ref, gphi *phi)
ad4a85ad 1298{
2e966e2a 1299 dref nw = XCNEW (class dref_d), aref;
ad4a85ad 1300 unsigned i;
1301
1302 nw->stmt = phi;
1303 nw->distance = ref->distance + 1;
1304 nw->always_accessed = 1;
1305
f1f41a6c 1306 FOR_EACH_VEC_ELT (chain->refs, i, aref)
ad4a85ad 1307 if (aref->distance >= nw->distance)
1308 break;
f1f41a6c 1309 chain->refs.safe_insert (i, nw);
ad4a85ad 1310
1311 if (nw->distance > chain->length)
1312 {
1313 chain->length = nw->distance;
1314 chain->has_max_use_after = false;
1315 }
1316}
1317
1318/* For references in CHAIN that are copied around the LOOP (created previously
1319 by PRE, or by user), add the results of such copies to the chain. This
1320 enables us to remove the copies by unrolling, and may need less registers
1321 (also, it may allow us to combine chains together). */
1322
1323static void
2e966e2a 1324add_looparound_copies (class loop *loop, chain_p chain)
ad4a85ad 1325{
1326 unsigned i;
1327 dref ref, root = get_chain_root (chain);
1a91d914 1328 gphi *phi;
ad4a85ad 1329
a652753d 1330 if (chain->type == CT_STORE_STORE)
1331 return;
1332
f1f41a6c 1333 FOR_EACH_VEC_ELT (chain->refs, i, ref)
ad4a85ad 1334 {
1335 phi = find_looparound_phi (loop, ref, root);
1336 if (!phi)
1337 continue;
1338
1339 bitmap_set_bit (looparound_phis, SSA_NAME_VERSION (PHI_RESULT (phi)));
1340 insert_looparound_copy (chain, ref, phi);
1341 }
1342}
1343
1344/* Find roots of the values and determine distances in the component COMP.
1345 The references are redistributed into CHAINS. LOOP is the current
1346 loop. */
1347
1348static void
2e966e2a 1349determine_roots_comp (class loop *loop,
ad4a85ad 1350 struct component *comp,
f1f41a6c 1351 vec<chain_p> *chains)
ad4a85ad 1352{
1353 unsigned i;
1354 dref a;
1355 chain_p chain = NULL;
5de9d3ed 1356 widest_int last_ofs = 0;
a652753d 1357 enum chain_type type;
ad4a85ad 1358
1359 /* Invariants are handled specially. */
1360 if (comp->comp_step == RS_INVARIANT)
1361 {
1362 chain = make_invariant_chain (comp);
f1f41a6c 1363 chains->safe_push (chain);
ad4a85ad 1364 return;
1365 }
1366
8a0e25c9 1367 /* Trivial component. */
1368 if (comp->refs.length () <= 1)
f7b2503a 1369 {
1370 if (comp->refs.length () == 1)
1371 {
1372 free (comp->refs[0]);
1373 comp->refs.truncate (0);
1374 }
1375 return;
1376 }
ad4a85ad 1377
8a0e25c9 1378 comp->refs.qsort (order_drefs);
83c06cf0 1379
1380 /* For Store-Store chain, we only support load if it is dominated by a
1381 store statement in the same iteration of loop. */
1382 if (comp->eliminate_store_p)
1383 for (a = NULL, i = 0; i < comp->refs.length (); i++)
1384 {
1385 if (DR_IS_WRITE (comp->refs[i]->ref))
1386 a = comp->refs[i];
1387 else if (a == NULL || a->offset != comp->refs[i]->offset)
1388 {
1389 /* If there is load that is not dominated by a store in the
1390 same iteration of loop, clear the flag so no Store-Store
1391 chain is generated for this component. */
1392 comp->eliminate_store_p = false;
1393 break;
1394 }
1395 }
1396
1397 /* Determine roots and create chains for components. */
f1f41a6c 1398 FOR_EACH_VEC_ELT (comp->refs, i, a)
ad4a85ad 1399 {
a652753d 1400 if (!chain
83c06cf0 1401 || (chain->type == CT_LOAD && DR_IS_WRITE (a->ref))
a652753d 1402 || (!comp->eliminate_store_p && DR_IS_WRITE (a->ref))
796b6678 1403 || wi::leu_p (MAX_DISTANCE, a->offset - last_ofs))
ad4a85ad 1404 {
1405 if (nontrivial_chain_p (chain))
be2e5c02 1406 {
1407 add_looparound_copies (loop, chain);
f1f41a6c 1408 chains->safe_push (chain);
be2e5c02 1409 }
ad4a85ad 1410 else
1411 release_chain (chain);
a652753d 1412
83c06cf0 1413 /* Determine type of the chain. If the root reference is a load,
1414 this can only be a CT_LOAD chain; other chains are intialized
1415 to CT_STORE_LOAD and might be promoted to CT_STORE_STORE when
1416 new reference is added. */
1417 type = DR_IS_READ (a->ref) ? CT_LOAD : CT_STORE_LOAD;
a652753d 1418 chain = make_rooted_chain (a, type);
be2e5c02 1419 last_ofs = a->offset;
ad4a85ad 1420 continue;
1421 }
1422
1423 add_ref_to_chain (chain, a);
1424 }
1425
1426 if (nontrivial_chain_p (chain))
1427 {
1428 add_looparound_copies (loop, chain);
f1f41a6c 1429 chains->safe_push (chain);
ad4a85ad 1430 }
1431 else
1432 release_chain (chain);
1433}
1434
1435/* Find roots of the values and determine distances in components COMPS, and
1436 separates the references to CHAINS. LOOP is the current loop. */
1437
1438static void
2e966e2a 1439determine_roots (class loop *loop,
f1f41a6c 1440 struct component *comps, vec<chain_p> *chains)
ad4a85ad 1441{
1442 struct component *comp;
1443
1444 for (comp = comps; comp; comp = comp->next)
1445 determine_roots_comp (loop, comp, chains);
1446}
1447
1448/* Replace the reference in statement STMT with temporary variable
f4e36c33 1449 NEW_TREE. If SET is true, NEW_TREE is instead initialized to the value of
ad4a85ad 1450 the reference in the statement. IN_LHS is true if the reference
1451 is in the lhs of STMT, false if it is in rhs. */
1452
1453static void
42acab1c 1454replace_ref_with (gimple *stmt, tree new_tree, bool set, bool in_lhs)
ad4a85ad 1455{
75a70cf9 1456 tree val;
1a91d914 1457 gassign *new_stmt;
75a70cf9 1458 gimple_stmt_iterator bsi, psi;
ad4a85ad 1459
75a70cf9 1460 if (gimple_code (stmt) == GIMPLE_PHI)
ad4a85ad 1461 {
1462 gcc_assert (!in_lhs && !set);
1463
1464 val = PHI_RESULT (stmt);
75a70cf9 1465 bsi = gsi_after_labels (gimple_bb (stmt));
1466 psi = gsi_for_stmt (stmt);
1467 remove_phi_node (&psi, false);
ad4a85ad 1468
75a70cf9 1469 /* Turn the phi node into GIMPLE_ASSIGN. */
f4e36c33 1470 new_stmt = gimple_build_assign (val, new_tree);
75a70cf9 1471 gsi_insert_before (&bsi, new_stmt, GSI_NEW_STMT);
ad4a85ad 1472 return;
1473 }
48e1416a 1474
ad4a85ad 1475 /* Since the reference is of gimple_reg type, it should only
1476 appear as lhs or rhs of modify statement. */
75a70cf9 1477 gcc_assert (is_gimple_assign (stmt));
1478
1479 bsi = gsi_for_stmt (stmt);
ad4a85ad 1480
f4e36c33 1481 /* If we do not need to initialize NEW_TREE, just replace the use of OLD. */
ad4a85ad 1482 if (!set)
1483 {
1484 gcc_assert (!in_lhs);
f4e36c33 1485 gimple_assign_set_rhs_from_tree (&bsi, new_tree);
75a70cf9 1486 stmt = gsi_stmt (bsi);
ad4a85ad 1487 update_stmt (stmt);
1488 return;
1489 }
1490
ad4a85ad 1491 if (in_lhs)
1492 {
75a70cf9 1493 /* We have statement
48e1416a 1494
75a70cf9 1495 OLD = VAL
ad4a85ad 1496
75a70cf9 1497 If OLD is a memory reference, then VAL is gimple_val, and we transform
1498 this to
ad4a85ad 1499
1500 OLD = VAL
1501 NEW = VAL
1502
48e1416a 1503 Otherwise, we are replacing a combination chain,
75a70cf9 1504 VAL is the expression that performs the combination, and OLD is an
1505 SSA name. In this case, we transform the assignment to
1506
1507 OLD = VAL
1508 NEW = OLD
1509
1510 */
1511
1512 val = gimple_assign_lhs (stmt);
1513 if (TREE_CODE (val) != SSA_NAME)
1514 {
75a70cf9 1515 val = gimple_assign_rhs1 (stmt);
bbfbb5ba 1516 gcc_assert (gimple_assign_single_p (stmt));
1517 if (TREE_CLOBBER_P (val))
c6dfe037 1518 val = get_or_create_ssa_default_def (cfun, SSA_NAME_VAR (new_tree));
bbfbb5ba 1519 else
1520 gcc_assert (gimple_assign_copy_p (stmt));
75a70cf9 1521 }
ad4a85ad 1522 }
1523 else
1524 {
ad4a85ad 1525 /* VAL = OLD
1526
1527 is transformed to
1528
1529 VAL = OLD
1530 NEW = VAL */
75a70cf9 1531
1532 val = gimple_assign_lhs (stmt);
ad4a85ad 1533 }
1534
f4e36c33 1535 new_stmt = gimple_build_assign (new_tree, unshare_expr (val));
75a70cf9 1536 gsi_insert_after (&bsi, new_stmt, GSI_NEW_STMT);
ad4a85ad 1537}
1538
3e59c7a7 1539/* Returns a memory reference to DR in the (NITERS + ITER)-th iteration
1540 of the loop it was analyzed in. Append init stmts to STMTS. */
99f6be4b 1541
b1abc234 1542static tree
3e59c7a7 1543ref_at_iteration (data_reference_p dr, int iter,
1544 gimple_seq *stmts, tree niters = NULL_TREE)
99f6be4b 1545{
1546 tree off = DR_OFFSET (dr);
1547 tree coff = DR_INIT (dr);
b1abc234 1548 tree ref = DR_REF (dr);
1549 enum tree_code ref_code = ERROR_MARK;
1550 tree ref_type = NULL_TREE;
1551 tree ref_op1 = NULL_TREE;
1552 tree ref_op2 = NULL_TREE;
3e59c7a7 1553 tree new_offset;
1554
1555 if (iter != 0)
1556 {
1557 new_offset = size_binop (MULT_EXPR, DR_STEP (dr), ssize_int (iter));
1558 if (TREE_CODE (new_offset) == INTEGER_CST)
1559 coff = size_binop (PLUS_EXPR, coff, new_offset);
1560 else
1561 off = size_binop (PLUS_EXPR, off, new_offset);
1562 }
1563
1564 if (niters != NULL_TREE)
1565 {
1566 niters = fold_convert (ssizetype, niters);
1567 new_offset = size_binop (MULT_EXPR, DR_STEP (dr), niters);
1568 if (TREE_CODE (niters) == INTEGER_CST)
1569 coff = size_binop (PLUS_EXPR, coff, new_offset);
1570 else
1571 off = size_binop (PLUS_EXPR, off, new_offset);
1572 }
1573
e5104bca 1574 /* While data-ref analysis punts on bit offsets it still handles
1575 bitfield accesses at byte boundaries. Cope with that. Note that
b1abc234 1576 if the bitfield object also starts at a byte-boundary we can simply
1577 replicate the COMPONENT_REF, but we have to subtract the component's
1578 byte-offset from the MEM_REF address first.
1579 Otherwise we simply build a BIT_FIELD_REF knowing that the bits
e5104bca 1580 start at offset zero. */
b1abc234 1581 if (TREE_CODE (ref) == COMPONENT_REF
1582 && DECL_BIT_FIELD (TREE_OPERAND (ref, 1)))
e5104bca 1583 {
b1abc234 1584 unsigned HOST_WIDE_INT boff;
1585 tree field = TREE_OPERAND (ref, 1);
1586 tree offset = component_ref_field_offset (ref);
1587 ref_type = TREE_TYPE (ref);
1588 boff = tree_to_uhwi (DECL_FIELD_BIT_OFFSET (field));
1589 /* This can occur in Ada. See the comment in get_bit_range. */
1590 if (boff % BITS_PER_UNIT != 0
1591 || !tree_fits_uhwi_p (offset))
1592 {
1593 ref_code = BIT_FIELD_REF;
1594 ref_op1 = DECL_SIZE (field);
1595 ref_op2 = bitsize_zero_node;
1596 }
1597 else
1598 {
1599 boff >>= LOG2_BITS_PER_UNIT;
1600 boff += tree_to_uhwi (offset);
1601 coff = size_binop (MINUS_EXPR, coff, ssize_int (boff));
1602 ref_code = COMPONENT_REF;
1603 ref_op1 = field;
1604 ref_op2 = TREE_OPERAND (ref, 2);
1605 ref = TREE_OPERAND (ref, 0);
1606 }
ad4a85ad 1607 }
b1abc234 1608 tree addr = fold_build_pointer_plus (DR_BASE_ADDRESS (dr), off);
1609 addr = force_gimple_operand_1 (unshare_expr (addr), stmts,
1610 is_gimple_mem_ref_addr, NULL_TREE);
1611 tree alias_ptr = fold_convert (reference_alias_ptr_type (ref), coff);
1612 tree type = build_aligned_type (TREE_TYPE (ref),
1613 get_object_alignment (ref));
1614 ref = build2 (MEM_REF, type, addr, alias_ptr);
1615 if (ref_type)
1616 ref = build3 (ref_code, ref_type, ref, ref_op1, ref_op2);
1617 return ref;
ad4a85ad 1618}
1619
1620/* Get the initialization expression for the INDEX-th temporary variable
1621 of CHAIN. */
1622
1623static tree
1624get_init_expr (chain_p chain, unsigned index)
1625{
1626 if (chain->type == CT_COMBINATION)
1627 {
1628 tree e1 = get_init_expr (chain->ch1, index);
1629 tree e2 = get_init_expr (chain->ch2, index);
1630
f4e36c33 1631 return fold_build2 (chain->op, chain->rslt_type, e1, e2);
ad4a85ad 1632 }
1633 else
f1f41a6c 1634 return chain->inits[index];
ad4a85ad 1635}
1636
a4c3242a 1637/* Returns a new temporary variable used for the I-th variable carrying
1638 value of REF. The variable's uid is marked in TMP_VARS. */
1639
1640static tree
1641predcom_tmp_var (tree ref, unsigned i, bitmap tmp_vars)
1642{
1643 tree type = TREE_TYPE (ref);
a4c3242a 1644 /* We never access the components of the temporary variable in predictive
1645 commoning. */
2ac51e48 1646 tree var = create_tmp_reg (type, get_lsm_tmp_name (ref, i));
a4c3242a 1647 bitmap_set_bit (tmp_vars, DECL_UID (var));
1648 return var;
1649}
1650
ad4a85ad 1651/* Creates the variables for CHAIN, as well as phi nodes for them and
1652 initialization on entry to LOOP. Uids of the newly created
1653 temporary variables are marked in TMP_VARS. */
1654
1655static void
2e966e2a 1656initialize_root_vars (class loop *loop, chain_p chain, bitmap tmp_vars)
ad4a85ad 1657{
1658 unsigned i;
1659 unsigned n = chain->length;
1660 dref root = get_chain_root (chain);
1661 bool reuse_first = !chain->has_max_use_after;
75a70cf9 1662 tree ref, init, var, next;
1a91d914 1663 gphi *phi;
75a70cf9 1664 gimple_seq stmts;
ad4a85ad 1665 edge entry = loop_preheader_edge (loop), latch = loop_latch_edge (loop);
1666
1667 /* If N == 0, then all the references are within the single iteration. And
1668 since this is an nonempty chain, reuse_first cannot be true. */
1669 gcc_assert (n > 0 || !reuse_first);
1670
f1f41a6c 1671 chain->vars.create (n + 1);
ad4a85ad 1672
1673 if (chain->type == CT_COMBINATION)
75a70cf9 1674 ref = gimple_assign_lhs (root->stmt);
ad4a85ad 1675 else
1676 ref = DR_REF (root->ref);
1677
1678 for (i = 0; i < n + (reuse_first ? 0 : 1); i++)
1679 {
a4c3242a 1680 var = predcom_tmp_var (ref, i, tmp_vars);
f1f41a6c 1681 chain->vars.quick_push (var);
ad4a85ad 1682 }
1683 if (reuse_first)
f1f41a6c 1684 chain->vars.quick_push (chain->vars[0]);
48e1416a 1685
f1f41a6c 1686 FOR_EACH_VEC_ELT (chain->vars, i, var)
f9e245b2 1687 chain->vars[i] = make_ssa_name (var);
ad4a85ad 1688
1689 for (i = 0; i < n; i++)
1690 {
f1f41a6c 1691 var = chain->vars[i];
1692 next = chain->vars[i + 1];
ad4a85ad 1693 init = get_init_expr (chain, i);
1694
1695 init = force_gimple_operand (init, &stmts, true, NULL_TREE);
1696 if (stmts)
dd277d48 1697 gsi_insert_seq_on_edge_immediate (entry, stmts);
ad4a85ad 1698
1699 phi = create_phi_node (var, loop->header);
60d535d2 1700 add_phi_arg (phi, init, entry, UNKNOWN_LOCATION);
1701 add_phi_arg (phi, next, latch, UNKNOWN_LOCATION);
ad4a85ad 1702 }
1703}
1704
e33b3a13 1705/* For inter-iteration store elimination CHAIN in LOOP, returns true if
1706 all stores to be eliminated store loop invariant values into memory.
1707 In this case, we can use these invariant values directly after LOOP. */
1708
1709static bool
2e966e2a 1710is_inv_store_elimination_chain (class loop *loop, chain_p chain)
e33b3a13 1711{
1712 if (chain->length == 0 || chain->type != CT_STORE_STORE)
1713 return false;
1714
1715 gcc_assert (!chain->has_max_use_after);
1716
8537594f 1717 /* If loop iterates for unknown times or fewer times than chain->length,
e33b3a13 1718 we still need to setup root variable and propagate it with PHI node. */
1719 tree niters = number_of_latch_executions (loop);
e3d0f65c 1720 if (TREE_CODE (niters) != INTEGER_CST
1721 || wi::leu_p (wi::to_wide (niters), chain->length))
e33b3a13 1722 return false;
1723
1724 /* Check stores in chain for elimination if they only store loop invariant
1725 values. */
1726 for (unsigned i = 0; i < chain->length; i++)
1727 {
83c06cf0 1728 dref a = get_chain_last_write_at (chain, i);
e33b3a13 1729 if (a == NULL)
1730 continue;
1731
1732 gimple *def_stmt, *stmt = a->stmt;
1733 if (!gimple_assign_single_p (stmt))
1734 return false;
1735
1736 tree val = gimple_assign_rhs1 (stmt);
1737 if (TREE_CLOBBER_P (val))
1738 return false;
1739
1740 if (CONSTANT_CLASS_P (val))
1741 continue;
1742
1743 if (TREE_CODE (val) != SSA_NAME)
1744 return false;
1745
1746 def_stmt = SSA_NAME_DEF_STMT (val);
1747 if (gimple_nop_p (def_stmt))
1748 continue;
1749
1750 if (flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
1751 return false;
1752 }
1753 return true;
1754}
1755
1756/* Creates root variables for store elimination CHAIN in which stores for
1757 elimination only store loop invariant values. In this case, we neither
1758 need to load root variables before loop nor propagate it with PHI nodes. */
1759
1760static void
1761initialize_root_vars_store_elim_1 (chain_p chain)
1762{
1763 tree var;
1764 unsigned i, n = chain->length;
1765
1766 chain->vars.create (n);
1767 chain->vars.safe_grow_cleared (n);
1768
1769 /* Initialize root value for eliminated stores at each distance. */
1770 for (i = 0; i < n; i++)
1771 {
83c06cf0 1772 dref a = get_chain_last_write_at (chain, i);
e33b3a13 1773 if (a == NULL)
1774 continue;
1775
1776 var = gimple_assign_rhs1 (a->stmt);
1777 chain->vars[a->distance] = var;
1778 }
1779
1780 /* We don't propagate values with PHI nodes, so manually propagate value
1781 to bubble positions. */
1782 var = chain->vars[0];
1783 for (i = 1; i < n; i++)
1784 {
1785 if (chain->vars[i] != NULL_TREE)
1786 {
1787 var = chain->vars[i];
1788 continue;
1789 }
1790 chain->vars[i] = var;
1791 }
1792
1793 /* Revert the vector. */
1794 for (i = 0; i < n / 2; i++)
1795 std::swap (chain->vars[i], chain->vars[n - i - 1]);
1796}
1797
a652753d 1798/* Creates root variables for store elimination CHAIN in which stores for
1799 elimination store loop variant values. In this case, we may need to
1800 load root variables before LOOP and propagate it with PHI nodes. Uids
1801 of the newly created root variables are marked in TMP_VARS. */
1802
1803static void
2e966e2a 1804initialize_root_vars_store_elim_2 (class loop *loop,
a652753d 1805 chain_p chain, bitmap tmp_vars)
1806{
1807 unsigned i, n = chain->length;
1808 tree ref, init, var, next, val, phi_result;
1809 gimple *stmt;
1810 gimple_seq stmts;
1811
1812 chain->vars.create (n);
1813
1814 ref = DR_REF (get_chain_root (chain)->ref);
1815 for (i = 0; i < n; i++)
1816 {
1817 var = predcom_tmp_var (ref, i, tmp_vars);
1818 chain->vars.quick_push (var);
1819 }
1820
1821 FOR_EACH_VEC_ELT (chain->vars, i, var)
1822 chain->vars[i] = make_ssa_name (var);
1823
1824 /* Root values are either rhs operand of stores to be eliminated, or
1825 loaded from memory before loop. */
1826 auto_vec<tree> vtemps;
1827 vtemps.safe_grow_cleared (n);
1828 for (i = 0; i < n; i++)
1829 {
1830 init = get_init_expr (chain, i);
1831 if (init == NULL_TREE)
1832 {
1833 /* Root value is rhs operand of the store to be eliminated if
1834 it isn't loaded from memory before loop. */
83c06cf0 1835 dref a = get_chain_last_write_at (chain, i);
a652753d 1836 val = gimple_assign_rhs1 (a->stmt);
1837 if (TREE_CLOBBER_P (val))
83c06cf0 1838 {
1839 val = get_or_create_ssa_default_def (cfun, SSA_NAME_VAR (var));
1840 gimple_assign_set_rhs1 (a->stmt, val);
1841 }
a652753d 1842
1843 vtemps[n - i - 1] = val;
1844 }
1845 else
1846 {
1847 edge latch = loop_latch_edge (loop);
1848 edge entry = loop_preheader_edge (loop);
1849
1850 /* Root value is loaded from memory before loop, we also need
1851 to add PHI nodes to propagate the value across iterations. */
1852 init = force_gimple_operand (init, &stmts, true, NULL_TREE);
1853 if (stmts)
1854 gsi_insert_seq_on_edge_immediate (entry, stmts);
1855
1856 next = chain->vars[n - i];
1857 phi_result = copy_ssa_name (next);
1858 gphi *phi = create_phi_node (phi_result, loop->header);
1859 add_phi_arg (phi, init, entry, UNKNOWN_LOCATION);
1860 add_phi_arg (phi, next, latch, UNKNOWN_LOCATION);
1861 vtemps[n - i - 1] = phi_result;
1862 }
1863 }
1864
1865 /* Find the insertion position. */
1866 dref last = get_chain_root (chain);
1867 for (i = 0; i < chain->refs.length (); i++)
1868 {
1869 if (chain->refs[i]->pos > last->pos)
1870 last = chain->refs[i];
1871 }
1872
1873 gimple_stmt_iterator gsi = gsi_for_stmt (last->stmt);
1874
1875 /* Insert statements copying root value to root variable. */
1876 for (i = 0; i < n; i++)
1877 {
1878 var = chain->vars[i];
1879 val = vtemps[i];
1880 stmt = gimple_build_assign (var, val);
1881 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1882 }
1883}
1884
1885/* Generates stores for CHAIN's eliminated stores in LOOP's last
1886 (CHAIN->length - 1) iterations. */
1887
1888static void
2e966e2a 1889finalize_eliminated_stores (class loop *loop, chain_p chain)
a652753d 1890{
1891 unsigned i, n = chain->length;
1892
1893 for (i = 0; i < n; i++)
1894 {
1895 tree var = chain->vars[i];
1896 tree fini = chain->finis[n - i - 1];
1897 gimple *stmt = gimple_build_assign (fini, var);
1898
1899 gimple_seq_add_stmt_without_update (&chain->fini_seq, stmt);
1900 }
1901
1902 if (chain->fini_seq)
1903 {
1904 gsi_insert_seq_on_edge_immediate (single_exit (loop), chain->fini_seq);
1905 chain->fini_seq = NULL;
1906 }
1907}
1908
ad4a85ad 1909/* Initializes a variable for load motion for ROOT and prepares phi nodes and
1910 initialization on entry to LOOP if necessary. The ssa name for the variable
1911 is stored in VARS. If WRITTEN is true, also a phi node to copy its value
1912 around the loop is created. Uid of the newly created temporary variable
1913 is marked in TMP_VARS. INITS is the list containing the (single)
1914 initializer. */
1915
1916static void
2e966e2a 1917initialize_root_vars_lm (class loop *loop, dref root, bool written,
f1f41a6c 1918 vec<tree> *vars, vec<tree> inits,
ad4a85ad 1919 bitmap tmp_vars)
1920{
1921 unsigned i;
75a70cf9 1922 tree ref = DR_REF (root->ref), init, var, next;
1923 gimple_seq stmts;
1a91d914 1924 gphi *phi;
ad4a85ad 1925 edge entry = loop_preheader_edge (loop), latch = loop_latch_edge (loop);
1926
1927 /* Find the initializer for the variable, and check that it cannot
1928 trap. */
f1f41a6c 1929 init = inits[0];
ad4a85ad 1930
f1f41a6c 1931 vars->create (written ? 2 : 1);
a4c3242a 1932 var = predcom_tmp_var (ref, 0, tmp_vars);
f1f41a6c 1933 vars->quick_push (var);
ad4a85ad 1934 if (written)
f1f41a6c 1935 vars->quick_push ((*vars)[0]);
48e1416a 1936
f1f41a6c 1937 FOR_EACH_VEC_ELT (*vars, i, var)
f9e245b2 1938 (*vars)[i] = make_ssa_name (var);
ad4a85ad 1939
f1f41a6c 1940 var = (*vars)[0];
48e1416a 1941
ad4a85ad 1942 init = force_gimple_operand (init, &stmts, written, NULL_TREE);
1943 if (stmts)
dd277d48 1944 gsi_insert_seq_on_edge_immediate (entry, stmts);
ad4a85ad 1945
1946 if (written)
1947 {
f1f41a6c 1948 next = (*vars)[1];
ad4a85ad 1949 phi = create_phi_node (var, loop->header);
60d535d2 1950 add_phi_arg (phi, init, entry, UNKNOWN_LOCATION);
1951 add_phi_arg (phi, next, latch, UNKNOWN_LOCATION);
ad4a85ad 1952 }
1953 else
1954 {
1a91d914 1955 gassign *init_stmt = gimple_build_assign (var, init);
75a70cf9 1956 gsi_insert_on_edge_immediate (entry, init_stmt);
ad4a85ad 1957 }
1958}
1959
1960
1961/* Execute load motion for references in chain CHAIN. Uids of the newly
1962 created temporary variables are marked in TMP_VARS. */
1963
1964static void
2e966e2a 1965execute_load_motion (class loop *loop, chain_p chain, bitmap tmp_vars)
ad4a85ad 1966{
c2078b80 1967 auto_vec<tree> vars;
ad4a85ad 1968 dref a;
1969 unsigned n_writes = 0, ridx, i;
1970 tree var;
1971
1972 gcc_assert (chain->type == CT_INVARIANT);
1973 gcc_assert (!chain->combined);
f1f41a6c 1974 FOR_EACH_VEC_ELT (chain->refs, i, a)
9ff25603 1975 if (DR_IS_WRITE (a->ref))
ad4a85ad 1976 n_writes++;
48e1416a 1977
ad4a85ad 1978 /* If there are no reads in the loop, there is nothing to do. */
f1f41a6c 1979 if (n_writes == chain->refs.length ())
ad4a85ad 1980 return;
1981
1982 initialize_root_vars_lm (loop, get_chain_root (chain), n_writes > 0,
1983 &vars, chain->inits, tmp_vars);
1984
1985 ridx = 0;
f1f41a6c 1986 FOR_EACH_VEC_ELT (chain->refs, i, a)
ad4a85ad 1987 {
1988 bool is_read = DR_IS_READ (a->ref);
ad4a85ad 1989
9ff25603 1990 if (DR_IS_WRITE (a->ref))
ad4a85ad 1991 {
1992 n_writes--;
1993 if (n_writes)
1994 {
f1f41a6c 1995 var = vars[0];
f9e245b2 1996 var = make_ssa_name (SSA_NAME_VAR (var));
f1f41a6c 1997 vars[0] = var;
ad4a85ad 1998 }
1999 else
2000 ridx = 1;
2001 }
48e1416a 2002
f1f41a6c 2003 replace_ref_with (a->stmt, vars[ridx],
ad4a85ad 2004 !is_read, !is_read);
2005 }
ad4a85ad 2006}
2007
2008/* Returns the single statement in that NAME is used, excepting
2009 the looparound phi nodes contained in one of the chains. If there is no
75a70cf9 2010 such statement, or more statements, NULL is returned. */
ad4a85ad 2011
42acab1c 2012static gimple *
ad4a85ad 2013single_nonlooparound_use (tree name)
2014{
2015 use_operand_p use;
2016 imm_use_iterator it;
42acab1c 2017 gimple *stmt, *ret = NULL;
ad4a85ad 2018
2019 FOR_EACH_IMM_USE_FAST (use, it, name)
2020 {
2021 stmt = USE_STMT (use);
2022
75a70cf9 2023 if (gimple_code (stmt) == GIMPLE_PHI)
ad4a85ad 2024 {
2025 /* Ignore uses in looparound phi nodes. Uses in other phi nodes
2026 could not be processed anyway, so just fail for them. */
2027 if (bitmap_bit_p (looparound_phis,
2028 SSA_NAME_VERSION (PHI_RESULT (stmt))))
2029 continue;
2030
75a70cf9 2031 return NULL;
ad4a85ad 2032 }
db16e52d 2033 else if (is_gimple_debug (stmt))
2034 continue;
75a70cf9 2035 else if (ret != NULL)
2036 return NULL;
ad4a85ad 2037 else
2038 ret = stmt;
2039 }
2040
2041 return ret;
2042}
2043
2044/* Remove statement STMT, as well as the chain of assignments in that it is
2045 used. */
2046
2047static void
42acab1c 2048remove_stmt (gimple *stmt)
ad4a85ad 2049{
75a70cf9 2050 tree name;
42acab1c 2051 gimple *next;
75a70cf9 2052 gimple_stmt_iterator psi;
ad4a85ad 2053
75a70cf9 2054 if (gimple_code (stmt) == GIMPLE_PHI)
ad4a85ad 2055 {
2056 name = PHI_RESULT (stmt);
2057 next = single_nonlooparound_use (name);
b6d5efe6 2058 reset_debug_uses (stmt);
75a70cf9 2059 psi = gsi_for_stmt (stmt);
2060 remove_phi_node (&psi, true);
ad4a85ad 2061
2062 if (!next
fb2d5860 2063 || !gimple_assign_ssa_name_copy_p (next)
75a70cf9 2064 || gimple_assign_rhs1 (next) != name)
ad4a85ad 2065 return;
2066
2067 stmt = next;
2068 }
2069
2070 while (1)
2071 {
75a70cf9 2072 gimple_stmt_iterator bsi;
48e1416a 2073
75a70cf9 2074 bsi = gsi_for_stmt (stmt);
ad4a85ad 2075
75a70cf9 2076 name = gimple_assign_lhs (stmt);
a652753d 2077 if (TREE_CODE (name) == SSA_NAME)
2078 {
2079 next = single_nonlooparound_use (name);
2080 reset_debug_uses (stmt);
2081 }
2082 else
2083 {
2084 /* This is a store to be eliminated. */
2085 gcc_assert (gimple_vdef (stmt) != NULL);
2086 next = NULL;
2087 }
ad4a85ad 2088
e70e8b13 2089 unlink_stmt_vdef (stmt);
75a70cf9 2090 gsi_remove (&bsi, true);
fb2d5860 2091 release_defs (stmt);
ad4a85ad 2092
2093 if (!next
fb2d5860 2094 || !gimple_assign_ssa_name_copy_p (next)
75a70cf9 2095 || gimple_assign_rhs1 (next) != name)
ad4a85ad 2096 return;
2097
2098 stmt = next;
2099 }
2100}
2101
2102/* Perform the predictive commoning optimization for a chain CHAIN.
2103 Uids of the newly created temporary variables are marked in TMP_VARS.*/
2104
2105static void
2e966e2a 2106execute_pred_commoning_chain (class loop *loop, chain_p chain,
a652753d 2107 bitmap tmp_vars)
ad4a85ad 2108{
83c06cf0 2109 unsigned i;
e70e8b13 2110 dref a;
ad4a85ad 2111 tree var;
fbab3016 2112 bool in_lhs;
ad4a85ad 2113
2114 if (chain->combined)
2115 {
2116 /* For combined chains, just remove the statements that are used to
182624fa 2117 compute the values of the expression (except for the root one).
2118 We delay this until after all chains are processed. */
ad4a85ad 2119 }
a652753d 2120 else if (chain->type == CT_STORE_STORE)
2121 {
2122 if (chain->length > 0)
2123 {
e33b3a13 2124 if (chain->inv_store_elimination)
2125 {
2126 /* If dead stores in this chain only store loop invariant
2127 values, we can simply record the invariant value and use
2128 it directly after loop. */
2129 initialize_root_vars_store_elim_1 (chain);
2130 }
2131 else
2132 {
2133 /* If dead stores in this chain store loop variant values,
2134 we need to set up the variables by loading from memory
2135 before loop and propagating it with PHI nodes. */
2136 initialize_root_vars_store_elim_2 (loop, chain, tmp_vars);
2137 }
a652753d 2138
2139 /* For inter-iteration store elimination chain, stores at each
2140 distance in loop's last (chain->length - 1) iterations can't
2141 be eliminated, because there is no following killing store.
2142 We need to generate these stores after loop. */
2143 finalize_eliminated_stores (loop, chain);
2144 }
2145
83c06cf0 2146 bool last_store_p = true;
2147 for (i = chain->refs.length (); i > 0; i--)
2148 {
2149 a = chain->refs[i - 1];
2150 /* Preserve the last store of the chain. Eliminate other stores
2151 which are killed by the last one. */
2152 if (DR_IS_WRITE (a->ref))
2153 {
2154 if (last_store_p)
2155 last_store_p = false;
2156 else
2157 remove_stmt (a->stmt);
2158
2159 continue;
2160 }
2161
2162 /* Any load in Store-Store chain must be dominated by a previous
2163 store, we replace the load reference with rhs of the store. */
2164 dref b = get_chain_last_write_before_load (chain, i - 1);
2165 gcc_assert (b != NULL);
2166 var = gimple_assign_rhs1 (b->stmt);
2167 replace_ref_with (a->stmt, var, false, false);
2168 }
a652753d 2169 }
ad4a85ad 2170 else
2171 {
fbab3016 2172 /* For non-combined chains, set up the variables that hold its value. */
2173 initialize_root_vars (loop, chain, tmp_vars);
2174 a = get_chain_root (chain);
2175 in_lhs = (chain->type == CT_STORE_LOAD
2176 || chain->type == CT_COMBINATION);
2177 replace_ref_with (a->stmt, chain->vars[chain->length], true, in_lhs);
2178
2179 /* Replace the uses of the original references by these variables. */
f1f41a6c 2180 for (i = 1; chain->refs.iterate (i, &a); i++)
ad4a85ad 2181 {
f1f41a6c 2182 var = chain->vars[chain->length - a->distance];
ad4a85ad 2183 replace_ref_with (a->stmt, var, false, false);
2184 }
2185 }
2186}
2187
2188/* Determines the unroll factor necessary to remove as many temporary variable
2189 copies as possible. CHAINS is the list of chains that will be
2190 optimized. */
2191
2192static unsigned
f1f41a6c 2193determine_unroll_factor (vec<chain_p> chains)
ad4a85ad 2194{
2195 chain_p chain;
2196 unsigned factor = 1, af, nfactor, i;
2197 unsigned max = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
2198
f1f41a6c 2199 FOR_EACH_VEC_ELT (chains, i, chain)
ad4a85ad 2200 {
67a97bd8 2201 if (chain->type == CT_INVARIANT)
ad4a85ad 2202 continue;
a652753d 2203 /* For now we can't handle unrolling when eliminating stores. */
2204 else if (chain->type == CT_STORE_STORE)
2205 return 1;
ad4a85ad 2206
67a97bd8 2207 if (chain->combined)
2208 {
2209 /* For combined chains, we can't handle unrolling if we replace
2210 looparound PHIs. */
2211 dref a;
2212 unsigned j;
2213 for (j = 1; chain->refs.iterate (j, &a); j++)
2214 if (gimple_code (a->stmt) == GIMPLE_PHI)
2215 return 1;
2216 continue;
2217 }
2218
ad4a85ad 2219 /* The best unroll factor for this chain is equal to the number of
2220 temporary variables that we create for it. */
2221 af = chain->length;
2222 if (chain->has_max_use_after)
2223 af++;
2224
2225 nfactor = factor * af / gcd (factor, af);
2226 if (nfactor <= max)
2227 factor = nfactor;
2228 }
2229
2230 return factor;
2231}
2232
2233/* Perform the predictive commoning optimization for CHAINS.
2234 Uids of the newly created temporary variables are marked in TMP_VARS. */
2235
2236static void
2e966e2a 2237execute_pred_commoning (class loop *loop, vec<chain_p> chains,
ad4a85ad 2238 bitmap tmp_vars)
2239{
2240 chain_p chain;
2241 unsigned i;
2242
f1f41a6c 2243 FOR_EACH_VEC_ELT (chains, i, chain)
ad4a85ad 2244 {
2245 if (chain->type == CT_INVARIANT)
2246 execute_load_motion (loop, chain, tmp_vars);
2247 else
2248 execute_pred_commoning_chain (loop, chain, tmp_vars);
2249 }
48e1416a 2250
182624fa 2251 FOR_EACH_VEC_ELT (chains, i, chain)
2252 {
2253 if (chain->type == CT_INVARIANT)
2254 ;
2255 else if (chain->combined)
2256 {
2257 /* For combined chains, just remove the statements that are used to
2258 compute the values of the expression (except for the root one). */
2259 dref a;
2260 unsigned j;
2261 for (j = 1; chain->refs.iterate (j, &a); j++)
2262 remove_stmt (a->stmt);
2263 }
2264 }
2265
ad4a85ad 2266 update_ssa (TODO_update_ssa_only_virtuals);
2267}
2268
310d2511 2269/* For each reference in CHAINS, if its defining statement is
75a70cf9 2270 phi node, record the ssa name that is defined by it. */
ad4a85ad 2271
2272static void
f1f41a6c 2273replace_phis_by_defined_names (vec<chain_p> chains)
ad4a85ad 2274{
2275 chain_p chain;
2276 dref a;
2277 unsigned i, j;
2278
f1f41a6c 2279 FOR_EACH_VEC_ELT (chains, i, chain)
2280 FOR_EACH_VEC_ELT (chain->refs, j, a)
ad4a85ad 2281 {
75a70cf9 2282 if (gimple_code (a->stmt) == GIMPLE_PHI)
2283 {
2284 a->name_defined_by_phi = PHI_RESULT (a->stmt);
2285 a->stmt = NULL;
2286 }
ad4a85ad 2287 }
2288}
2289
75a70cf9 2290/* For each reference in CHAINS, if name_defined_by_phi is not
2291 NULL, use it to set the stmt field. */
ad4a85ad 2292
2293static void
f1f41a6c 2294replace_names_by_phis (vec<chain_p> chains)
ad4a85ad 2295{
2296 chain_p chain;
2297 dref a;
2298 unsigned i, j;
2299
f1f41a6c 2300 FOR_EACH_VEC_ELT (chains, i, chain)
2301 FOR_EACH_VEC_ELT (chain->refs, j, a)
75a70cf9 2302 if (a->stmt == NULL)
ad4a85ad 2303 {
75a70cf9 2304 a->stmt = SSA_NAME_DEF_STMT (a->name_defined_by_phi);
2305 gcc_assert (gimple_code (a->stmt) == GIMPLE_PHI);
2306 a->name_defined_by_phi = NULL_TREE;
ad4a85ad 2307 }
2308}
2309
2310/* Wrapper over execute_pred_commoning, to pass it as a callback
2311 to tree_transform_and_unroll_loop. */
2312
2313struct epcc_data
2314{
f1f41a6c 2315 vec<chain_p> chains;
ad4a85ad 2316 bitmap tmp_vars;
2317};
2318
2319static void
2e966e2a 2320execute_pred_commoning_cbck (class loop *loop, void *data)
ad4a85ad 2321{
45ba1503 2322 struct epcc_data *const dta = (struct epcc_data *) data;
ad4a85ad 2323
2324 /* Restore phi nodes that were replaced by ssa names before
2325 tree_transform_and_unroll_loop (see detailed description in
2326 tree_predictive_commoning_loop). */
2327 replace_names_by_phis (dta->chains);
2328 execute_pred_commoning (loop, dta->chains, dta->tmp_vars);
2329}
2330
ad4a85ad 2331/* Base NAME and all the names in the chain of phi nodes that use it
2332 on variable VAR. The phi nodes are recognized by being in the copies of
2333 the header of the LOOP. */
2334
2335static void
2e966e2a 2336base_names_in_chain_on (class loop *loop, tree name, tree var)
ad4a85ad 2337{
42acab1c 2338 gimple *stmt, *phi;
ad4a85ad 2339 imm_use_iterator iter;
ad4a85ad 2340
3b652cc1 2341 replace_ssa_name_symbol (name, var);
ad4a85ad 2342
2343 while (1)
2344 {
2345 phi = NULL;
2346 FOR_EACH_IMM_USE_STMT (stmt, iter, name)
2347 {
75a70cf9 2348 if (gimple_code (stmt) == GIMPLE_PHI
2349 && flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
ad4a85ad 2350 {
2351 phi = stmt;
2352 BREAK_FROM_IMM_USE_STMT (iter);
2353 }
2354 }
2355 if (!phi)
2356 return;
2357
ad4a85ad 2358 name = PHI_RESULT (phi);
3b652cc1 2359 replace_ssa_name_symbol (name, var);
ad4a85ad 2360 }
2361}
2362
2363/* Given an unrolled LOOP after predictive commoning, remove the
2364 register copies arising from phi nodes by changing the base
2365 variables of SSA names. TMP_VARS is the set of the temporary variables
2366 for those we want to perform this. */
2367
2368static void
2e966e2a 2369eliminate_temp_copies (class loop *loop, bitmap tmp_vars)
ad4a85ad 2370{
2371 edge e;
1a91d914 2372 gphi *phi;
42acab1c 2373 gimple *stmt;
75a70cf9 2374 tree name, use, var;
1a91d914 2375 gphi_iterator psi;
ad4a85ad 2376
2377 e = loop_latch_edge (loop);
75a70cf9 2378 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
ad4a85ad 2379 {
1a91d914 2380 phi = psi.phi ();
ad4a85ad 2381 name = PHI_RESULT (phi);
2382 var = SSA_NAME_VAR (name);
ec11736b 2383 if (!var || !bitmap_bit_p (tmp_vars, DECL_UID (var)))
ad4a85ad 2384 continue;
2385 use = PHI_ARG_DEF_FROM_EDGE (phi, e);
2386 gcc_assert (TREE_CODE (use) == SSA_NAME);
2387
2388 /* Base all the ssa names in the ud and du chain of NAME on VAR. */
2389 stmt = SSA_NAME_DEF_STMT (use);
75a70cf9 2390 while (gimple_code (stmt) == GIMPLE_PHI
3fa1e4f2 2391 /* In case we could not unroll the loop enough to eliminate
2392 all copies, we may reach the loop header before the defining
2393 statement (in that case, some register copies will be present
2394 in loop latch in the final code, corresponding to the newly
2395 created looparound phi nodes). */
75a70cf9 2396 && gimple_bb (stmt) != loop->header)
ad4a85ad 2397 {
75a70cf9 2398 gcc_assert (single_pred_p (gimple_bb (stmt)));
ad4a85ad 2399 use = PHI_ARG_DEF (stmt, 0);
2400 stmt = SSA_NAME_DEF_STMT (use);
2401 }
2402
2403 base_names_in_chain_on (loop, use, var);
2404 }
2405}
2406
2407/* Returns true if CHAIN is suitable to be combined. */
2408
2409static bool
2410chain_can_be_combined_p (chain_p chain)
2411{
2412 return (!chain->combined
2413 && (chain->type == CT_LOAD || chain->type == CT_COMBINATION));
2414}
2415
2416/* Returns the modify statement that uses NAME. Skips over assignment
2417 statements, NAME is replaced with the actual name used in the returned
2418 statement. */
2419
42acab1c 2420static gimple *
ad4a85ad 2421find_use_stmt (tree *name)
2422{
42acab1c 2423 gimple *stmt;
75a70cf9 2424 tree rhs, lhs;
ad4a85ad 2425
2426 /* Skip over assignments. */
2427 while (1)
2428 {
2429 stmt = single_nonlooparound_use (*name);
2430 if (!stmt)
75a70cf9 2431 return NULL;
ad4a85ad 2432
75a70cf9 2433 if (gimple_code (stmt) != GIMPLE_ASSIGN)
2434 return NULL;
ad4a85ad 2435
75a70cf9 2436 lhs = gimple_assign_lhs (stmt);
ad4a85ad 2437 if (TREE_CODE (lhs) != SSA_NAME)
75a70cf9 2438 return NULL;
ad4a85ad 2439
75a70cf9 2440 if (gimple_assign_copy_p (stmt))
2441 {
2442 rhs = gimple_assign_rhs1 (stmt);
2443 if (rhs != *name)
2444 return NULL;
ad4a85ad 2445
75a70cf9 2446 *name = lhs;
2447 }
2448 else if (get_gimple_rhs_class (gimple_assign_rhs_code (stmt))
2449 == GIMPLE_BINARY_RHS)
2450 return stmt;
2451 else
2452 return NULL;
ad4a85ad 2453 }
ad4a85ad 2454}
2455
2456/* Returns true if we may perform reassociation for operation CODE in TYPE. */
2457
2458static bool
2459may_reassociate_p (tree type, enum tree_code code)
2460{
2461 if (FLOAT_TYPE_P (type)
2462 && !flag_unsafe_math_optimizations)
2463 return false;
2464
2465 return (commutative_tree_code (code)
2466 && associative_tree_code (code));
2467}
2468
2469/* If the operation used in STMT is associative and commutative, go through the
2470 tree of the same operations and returns its root. Distance to the root
2471 is stored in DISTANCE. */
2472
42acab1c 2473static gimple *
2474find_associative_operation_root (gimple *stmt, unsigned *distance)
ad4a85ad 2475{
75a70cf9 2476 tree lhs;
42acab1c 2477 gimple *next;
75a70cf9 2478 enum tree_code code = gimple_assign_rhs_code (stmt);
2479 tree type = TREE_TYPE (gimple_assign_lhs (stmt));
ad4a85ad 2480 unsigned dist = 0;
2481
75a70cf9 2482 if (!may_reassociate_p (type, code))
2483 return NULL;
ad4a85ad 2484
2485 while (1)
2486 {
75a70cf9 2487 lhs = gimple_assign_lhs (stmt);
ad4a85ad 2488 gcc_assert (TREE_CODE (lhs) == SSA_NAME);
2489
2490 next = find_use_stmt (&lhs);
75a70cf9 2491 if (!next
2492 || gimple_assign_rhs_code (next) != code)
ad4a85ad 2493 break;
2494
2495 stmt = next;
2496 dist++;
2497 }
2498
2499 if (distance)
2500 *distance = dist;
2501 return stmt;
2502}
2503
2504/* Returns the common statement in that NAME1 and NAME2 have a use. If there
2505 is no such statement, returns NULL_TREE. In case the operation used on
310d2511 2506 NAME1 and NAME2 is associative and commutative, returns the root of the
ad4a85ad 2507 tree formed by this operation instead of the statement that uses NAME1 or
2508 NAME2. */
2509
42acab1c 2510static gimple *
ad4a85ad 2511find_common_use_stmt (tree *name1, tree *name2)
2512{
42acab1c 2513 gimple *stmt1, *stmt2;
ad4a85ad 2514
2515 stmt1 = find_use_stmt (name1);
2516 if (!stmt1)
75a70cf9 2517 return NULL;
ad4a85ad 2518
2519 stmt2 = find_use_stmt (name2);
2520 if (!stmt2)
75a70cf9 2521 return NULL;
ad4a85ad 2522
2523 if (stmt1 == stmt2)
2524 return stmt1;
2525
2526 stmt1 = find_associative_operation_root (stmt1, NULL);
2527 if (!stmt1)
75a70cf9 2528 return NULL;
ad4a85ad 2529 stmt2 = find_associative_operation_root (stmt2, NULL);
2530 if (!stmt2)
75a70cf9 2531 return NULL;
ad4a85ad 2532
75a70cf9 2533 return (stmt1 == stmt2 ? stmt1 : NULL);
ad4a85ad 2534}
2535
2536/* Checks whether R1 and R2 are combined together using CODE, with the result
2537 in RSLT_TYPE, in order R1 CODE R2 if SWAP is false and in order R2 CODE R1
2538 if it is true. If CODE is ERROR_MARK, set these values instead. */
2539
2540static bool
2541combinable_refs_p (dref r1, dref r2,
2542 enum tree_code *code, bool *swap, tree *rslt_type)
2543{
2544 enum tree_code acode;
2545 bool aswap;
2546 tree atype;
75a70cf9 2547 tree name1, name2;
42acab1c 2548 gimple *stmt;
ad4a85ad 2549
2550 name1 = name_for_ref (r1);
2551 name2 = name_for_ref (r2);
2552 gcc_assert (name1 != NULL_TREE && name2 != NULL_TREE);
2553
2554 stmt = find_common_use_stmt (&name1, &name2);
2555
f8d7b728 2556 if (!stmt
2557 /* A simple post-dominance check - make sure the combination
2558 is executed under the same condition as the references. */
2559 || (gimple_bb (stmt) != gimple_bb (r1->stmt)
2560 && gimple_bb (stmt) != gimple_bb (r2->stmt)))
ad4a85ad 2561 return false;
2562
75a70cf9 2563 acode = gimple_assign_rhs_code (stmt);
ad4a85ad 2564 aswap = (!commutative_tree_code (acode)
75a70cf9 2565 && gimple_assign_rhs1 (stmt) != name1);
2566 atype = TREE_TYPE (gimple_assign_lhs (stmt));
ad4a85ad 2567
2568 if (*code == ERROR_MARK)
2569 {
2570 *code = acode;
2571 *swap = aswap;
2572 *rslt_type = atype;
2573 return true;
2574 }
2575
2576 return (*code == acode
2577 && *swap == aswap
2578 && *rslt_type == atype);
2579}
2580
2581/* Remove OP from the operation on rhs of STMT, and replace STMT with
2582 an assignment of the remaining operand. */
2583
2584static void
42acab1c 2585remove_name_from_operation (gimple *stmt, tree op)
ad4a85ad 2586{
75a70cf9 2587 tree other_op;
2588 gimple_stmt_iterator si;
ad4a85ad 2589
75a70cf9 2590 gcc_assert (is_gimple_assign (stmt));
ad4a85ad 2591
75a70cf9 2592 if (gimple_assign_rhs1 (stmt) == op)
2593 other_op = gimple_assign_rhs2 (stmt);
ad4a85ad 2594 else
75a70cf9 2595 other_op = gimple_assign_rhs1 (stmt);
2596
2597 si = gsi_for_stmt (stmt);
2598 gimple_assign_set_rhs_from_tree (&si, other_op);
2599
2600 /* We should not have reallocated STMT. */
2601 gcc_assert (gsi_stmt (si) == stmt);
2602
ad4a85ad 2603 update_stmt (stmt);
2604}
2605
2606/* Reassociates the expression in that NAME1 and NAME2 are used so that they
b63803a8 2607 are combined in a single statement, and returns this statement. */
ad4a85ad 2608
42acab1c 2609static gimple *
b63803a8 2610reassociate_to_the_same_stmt (tree name1, tree name2)
ad4a85ad 2611{
42acab1c 2612 gimple *stmt1, *stmt2, *root1, *root2, *s1, *s2;
1a91d914 2613 gassign *new_stmt, *tmp_stmt;
75a70cf9 2614 tree new_name, tmp_name, var, r1, r2;
ad4a85ad 2615 unsigned dist1, dist2;
2616 enum tree_code code;
2617 tree type = TREE_TYPE (name1);
75a70cf9 2618 gimple_stmt_iterator bsi;
ad4a85ad 2619
2620 stmt1 = find_use_stmt (&name1);
2621 stmt2 = find_use_stmt (&name2);
2622 root1 = find_associative_operation_root (stmt1, &dist1);
2623 root2 = find_associative_operation_root (stmt2, &dist2);
75a70cf9 2624 code = gimple_assign_rhs_code (stmt1);
ad4a85ad 2625
2626 gcc_assert (root1 && root2 && root1 == root2
75a70cf9 2627 && code == gimple_assign_rhs_code (stmt2));
ad4a85ad 2628
2629 /* Find the root of the nearest expression in that both NAME1 and NAME2
2630 are used. */
2631 r1 = name1;
2632 s1 = stmt1;
2633 r2 = name2;
2634 s2 = stmt2;
2635
2636 while (dist1 > dist2)
2637 {
2638 s1 = find_use_stmt (&r1);
75a70cf9 2639 r1 = gimple_assign_lhs (s1);
ad4a85ad 2640 dist1--;
2641 }
2642 while (dist2 > dist1)
2643 {
2644 s2 = find_use_stmt (&r2);
75a70cf9 2645 r2 = gimple_assign_lhs (s2);
ad4a85ad 2646 dist2--;
2647 }
2648
2649 while (s1 != s2)
2650 {
2651 s1 = find_use_stmt (&r1);
75a70cf9 2652 r1 = gimple_assign_lhs (s1);
ad4a85ad 2653 s2 = find_use_stmt (&r2);
75a70cf9 2654 r2 = gimple_assign_lhs (s2);
ad4a85ad 2655 }
2656
2657 /* Remove NAME1 and NAME2 from the statements in that they are used
2658 currently. */
2659 remove_name_from_operation (stmt1, name1);
2660 remove_name_from_operation (stmt2, name2);
2661
2662 /* Insert the new statement combining NAME1 and NAME2 before S1, and
2663 combine it with the rhs of S1. */
2ac51e48 2664 var = create_tmp_reg (type, "predreastmp");
f9e245b2 2665 new_name = make_ssa_name (var);
e9cf809e 2666 new_stmt = gimple_build_assign (new_name, code, name1, name2);
ad4a85ad 2667
2ac51e48 2668 var = create_tmp_reg (type, "predreastmp");
f9e245b2 2669 tmp_name = make_ssa_name (var);
75a70cf9 2670
2671 /* Rhs of S1 may now be either a binary expression with operation
2672 CODE, or gimple_val (in case that stmt1 == s1 or stmt2 == s1,
2673 so that name1 or name2 was removed from it). */
e9cf809e 2674 tmp_stmt = gimple_build_assign (tmp_name, gimple_assign_rhs_code (s1),
2675 gimple_assign_rhs1 (s1),
2676 gimple_assign_rhs2 (s1));
75a70cf9 2677
2678 bsi = gsi_for_stmt (s1);
2679 gimple_assign_set_rhs_with_ops (&bsi, code, new_name, tmp_name);
2680 s1 = gsi_stmt (bsi);
ad4a85ad 2681 update_stmt (s1);
2682
b63803a8 2683 gsi_insert_before (&bsi, new_stmt, GSI_SAME_STMT);
75a70cf9 2684 gsi_insert_before (&bsi, tmp_stmt, GSI_SAME_STMT);
ad4a85ad 2685
2686 return new_stmt;
2687}
2688
2689/* Returns the statement that combines references R1 and R2. In case R1
2690 and R2 are not used in the same statement, but they are used with an
2691 associative and commutative operation in the same expression, reassociate
b63803a8 2692 the expression so that they are used in the same statement. */
ad4a85ad 2693
42acab1c 2694static gimple *
b63803a8 2695stmt_combining_refs (dref r1, dref r2)
ad4a85ad 2696{
42acab1c 2697 gimple *stmt1, *stmt2;
ad4a85ad 2698 tree name1 = name_for_ref (r1);
2699 tree name2 = name_for_ref (r2);
2700
2701 stmt1 = find_use_stmt (&name1);
2702 stmt2 = find_use_stmt (&name2);
2703 if (stmt1 == stmt2)
2704 return stmt1;
2705
b63803a8 2706 return reassociate_to_the_same_stmt (name1, name2);
ad4a85ad 2707}
2708
2709/* Tries to combine chains CH1 and CH2 together. If this succeeds, the
2710 description of the new chain is returned, otherwise we return NULL. */
2711
2712static chain_p
2713combine_chains (chain_p ch1, chain_p ch2)
2714{
2715 dref r1, r2, nw;
2716 enum tree_code op = ERROR_MARK;
2717 bool swap = false;
2718 chain_p new_chain;
b63803a8 2719 unsigned i;
ad4a85ad 2720 tree rslt_type = NULL_TREE;
2721
2722 if (ch1 == ch2)
c84b1d32 2723 return NULL;
ad4a85ad 2724 if (ch1->length != ch2->length)
2725 return NULL;
2726
f1f41a6c 2727 if (ch1->refs.length () != ch2->refs.length ())
ad4a85ad 2728 return NULL;
2729
f1f41a6c 2730 for (i = 0; (ch1->refs.iterate (i, &r1)
2731 && ch2->refs.iterate (i, &r2)); i++)
ad4a85ad 2732 {
2733 if (r1->distance != r2->distance)
2734 return NULL;
2735
2736 if (!combinable_refs_p (r1, r2, &op, &swap, &rslt_type))
2737 return NULL;
2738 }
2739
2740 if (swap)
a4f59596 2741 std::swap (ch1, ch2);
ad4a85ad 2742
2743 new_chain = XCNEW (struct chain);
2744 new_chain->type = CT_COMBINATION;
f4e36c33 2745 new_chain->op = op;
ad4a85ad 2746 new_chain->ch1 = ch1;
2747 new_chain->ch2 = ch2;
2748 new_chain->rslt_type = rslt_type;
2749 new_chain->length = ch1->length;
2750
b63803a8 2751 for (i = 0; (ch1->refs.iterate (i, &r1)
2752 && ch2->refs.iterate (i, &r2)); i++)
2753 {
2e966e2a 2754 nw = XCNEW (class dref_d);
b63803a8 2755 nw->stmt = stmt_combining_refs (r1, r2);
ad4a85ad 2756 nw->distance = r1->distance;
7a31ada4 2757
42230610 2758 new_chain->refs.safe_push (nw);
7a31ada4 2759 }
ad4a85ad 2760
b63803a8 2761 ch1->combined = true;
2762 ch2->combined = true;
ad4a85ad 2763 return new_chain;
2764}
2765
e02e28c4 2766/* Recursively update position information of all offspring chains to ROOT
2767 chain's position information. */
2768
2769static void
2770update_pos_for_combined_chains (chain_p root)
2771{
2772 chain_p ch1 = root->ch1, ch2 = root->ch2;
2773 dref ref, ref1, ref2;
2774 for (unsigned j = 0; (root->refs.iterate (j, &ref)
2775 && ch1->refs.iterate (j, &ref1)
2776 && ch2->refs.iterate (j, &ref2)); ++j)
2777 ref1->pos = ref2->pos = ref->pos;
2778
2779 if (ch1->type == CT_COMBINATION)
2780 update_pos_for_combined_chains (ch1);
2781 if (ch2->type == CT_COMBINATION)
2782 update_pos_for_combined_chains (ch2);
2783}
2784
2785/* Returns true if statement S1 dominates statement S2. */
2786
2787static bool
2788pcom_stmt_dominates_stmt_p (gimple *s1, gimple *s2)
2789{
2790 basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
2791
2792 if (!bb1 || s1 == s2)
2793 return true;
2794
2795 if (bb1 == bb2)
2796 return gimple_uid (s1) < gimple_uid (s2);
2797
2798 return dominated_by_p (CDI_DOMINATORS, bb2, bb1);
2799}
2800
2801/* Try to combine the CHAINS in LOOP. */
ad4a85ad 2802
2803static void
2e966e2a 2804try_combine_chains (class loop *loop, vec<chain_p> *chains)
ad4a85ad 2805{
2806 unsigned i, j;
2807 chain_p ch1, ch2, cch;
c2078b80 2808 auto_vec<chain_p> worklist;
e02e28c4 2809 bool combined_p = false;
ad4a85ad 2810
f1f41a6c 2811 FOR_EACH_VEC_ELT (*chains, i, ch1)
ad4a85ad 2812 if (chain_can_be_combined_p (ch1))
f1f41a6c 2813 worklist.safe_push (ch1);
ad4a85ad 2814
f1f41a6c 2815 while (!worklist.is_empty ())
ad4a85ad 2816 {
f1f41a6c 2817 ch1 = worklist.pop ();
ad4a85ad 2818 if (!chain_can_be_combined_p (ch1))
2819 continue;
2820
f1f41a6c 2821 FOR_EACH_VEC_ELT (*chains, j, ch2)
ad4a85ad 2822 {
2823 if (!chain_can_be_combined_p (ch2))
2824 continue;
2825
2826 cch = combine_chains (ch1, ch2);
2827 if (cch)
2828 {
f1f41a6c 2829 worklist.safe_push (cch);
2830 chains->safe_push (cch);
e02e28c4 2831 combined_p = true;
2832 break;
2833 }
2834 }
2835 }
2836 if (!combined_p)
2837 return;
2838
2839 /* Setup UID for all statements in dominance order. */
886345e6 2840 basic_block *bbs = get_loop_body_in_dom_order (loop);
e02e28c4 2841 renumber_gimple_stmt_uids_in_blocks (bbs, loop->num_nodes);
2842 free (bbs);
2843
2844 /* Re-association in combined chains may generate statements different to
2845 order of references of the original chain. We need to keep references
2846 of combined chain in dominance order so that all uses will be inserted
2847 after definitions. Note:
2848 A) This is necessary for all combined chains.
2849 B) This is only necessary for ZERO distance references because other
2850 references inherit value from loop carried PHIs.
2851
2852 We first update position information for all combined chains. */
2853 dref ref;
2854 for (i = 0; chains->iterate (i, &ch1); ++i)
2855 {
2856 if (ch1->type != CT_COMBINATION || ch1->combined)
2857 continue;
2858
2859 for (j = 0; ch1->refs.iterate (j, &ref); ++j)
2860 ref->pos = gimple_uid (ref->stmt);
2861
2862 update_pos_for_combined_chains (ch1);
2863 }
2864 /* Then sort references according to newly updated position information. */
2865 for (i = 0; chains->iterate (i, &ch1); ++i)
2866 {
2867 if (ch1->type != CT_COMBINATION && !ch1->combined)
2868 continue;
2869
2870 /* Find the first reference with non-ZERO distance. */
2871 if (ch1->length == 0)
2872 j = ch1->refs.length();
2873 else
2874 {
2875 for (j = 0; ch1->refs.iterate (j, &ref); ++j)
2876 if (ref->distance != 0)
2877 break;
2878 }
2879
2880 /* Sort all ZERO distance references by position. */
2881 qsort (&ch1->refs[0], j, sizeof (ch1->refs[0]), order_drefs_by_pos);
2882
2883 if (ch1->combined)
2884 continue;
2885
2886 /* For ZERO length chain, has_max_use_after must be true since root
2887 combined stmt must dominates others. */
2888 if (ch1->length == 0)
2889 {
2890 ch1->has_max_use_after = true;
2891 continue;
2892 }
2893 /* Check if there is use at max distance after root for combined chains
2894 and set flag accordingly. */
2895 ch1->has_max_use_after = false;
2896 gimple *root_stmt = get_chain_root (ch1)->stmt;
2897 for (j = 1; ch1->refs.iterate (j, &ref); ++j)
2898 {
2899 if (ref->distance == ch1->length
2900 && !pcom_stmt_dominates_stmt_p (ref->stmt, root_stmt))
2901 {
2902 ch1->has_max_use_after = true;
ad4a85ad 2903 break;
2904 }
2905 }
2906 }
2907}
2908
a652753d 2909/* Prepare initializers for store elimination CHAIN in LOOP. Returns false
2910 if this is impossible because one of these initializers may trap, true
2911 otherwise. */
2912
2913static bool
2e966e2a 2914prepare_initializers_chain_store_elim (class loop *loop, chain_p chain)
a652753d 2915{
2916 unsigned i, n = chain->length;
2917
2918 /* For now we can't eliminate stores if some of them are conditional
2919 executed. */
2920 if (!chain->all_always_accessed)
2921 return false;
2922
2923 /* Nothing to intialize for intra-iteration store elimination. */
2924 if (n == 0 && chain->type == CT_STORE_STORE)
2925 return true;
2926
e33b3a13 2927 /* For store elimination chain, there is nothing to initialize if stores
2928 to be eliminated only store loop invariant values into memory. */
2929 if (chain->type == CT_STORE_STORE
2930 && is_inv_store_elimination_chain (loop, chain))
2931 {
2932 chain->inv_store_elimination = true;
2933 return true;
2934 }
2935
a652753d 2936 chain->inits.create (n);
2937 chain->inits.safe_grow_cleared (n);
2938
2939 /* For store eliminatin chain like below:
2940
2941 for (i = 0; i < len; i++)
2942 {
2943 a[i] = 1;
2944 // a[i + 1] = ...
2945 a[i + 2] = 3;
2946 }
2947
2948 store to a[i + 1] is missed in loop body, it acts like bubbles. The
2949 content of a[i + 1] remain the same if the loop iterates fewer times
2950 than chain->length. We need to set up root variables for such stores
2951 by loading from memory before loop. Note we only need to load bubble
2952 elements because loop body is guaranteed to be executed at least once
2953 after loop's preheader edge. */
2954 auto_vec<bool> bubbles;
2955 bubbles.safe_grow_cleared (n + 1);
2956 for (i = 0; i < chain->refs.length (); i++)
2957 bubbles[chain->refs[i]->distance] = true;
2958
2959 struct data_reference *dr = get_chain_root (chain)->ref;
2960 for (i = 0; i < n; i++)
2961 {
2962 if (bubbles[i])
2963 continue;
2964
2965 gimple_seq stmts = NULL;
2966
2967 tree init = ref_at_iteration (dr, (int) 0 - i, &stmts);
2968 if (stmts)
2969 gimple_seq_add_seq_without_update (&chain->init_seq, stmts);
2970
2971 chain->inits[i] = init;
2972 }
2973
2974 return true;
2975}
2976
ad4a85ad 2977/* Prepare initializers for CHAIN in LOOP. Returns false if this is
2978 impossible because one of these initializers may trap, true otherwise. */
2979
2980static bool
2e966e2a 2981prepare_initializers_chain (class loop *loop, chain_p chain)
ad4a85ad 2982{
2983 unsigned i, n = (chain->type == CT_INVARIANT) ? 1 : chain->length;
2984 struct data_reference *dr = get_chain_root (chain)->ref;
75a70cf9 2985 tree init;
ad4a85ad 2986 dref laref;
2987 edge entry = loop_preheader_edge (loop);
2988
a652753d 2989 if (chain->type == CT_STORE_STORE)
2990 return prepare_initializers_chain_store_elim (loop, chain);
2991
ad4a85ad 2992 /* Find the initializers for the variables, and check that they cannot
2993 trap. */
f1f41a6c 2994 chain->inits.create (n);
ad4a85ad 2995 for (i = 0; i < n; i++)
f1f41a6c 2996 chain->inits.quick_push (NULL_TREE);
ad4a85ad 2997
2998 /* If we have replaced some looparound phi nodes, use their initializers
2999 instead of creating our own. */
f1f41a6c 3000 FOR_EACH_VEC_ELT (chain->refs, i, laref)
ad4a85ad 3001 {
75a70cf9 3002 if (gimple_code (laref->stmt) != GIMPLE_PHI)
ad4a85ad 3003 continue;
3004
3005 gcc_assert (laref->distance > 0);
f1f41a6c 3006 chain->inits[n - laref->distance]
3007 = PHI_ARG_DEF_FROM_EDGE (laref->stmt, entry);
ad4a85ad 3008 }
3009
3010 for (i = 0; i < n; i++)
3011 {
6968257f 3012 gimple_seq stmts = NULL;
3013
f1f41a6c 3014 if (chain->inits[i] != NULL_TREE)
ad4a85ad 3015 continue;
3016
99f6be4b 3017 init = ref_at_iteration (dr, (int) i - n, &stmts);
ad4a85ad 3018 if (!chain->all_always_accessed && tree_could_trap_p (init))
6968257f 3019 {
3020 gimple_seq_discard (stmts);
3021 return false;
3022 }
ad4a85ad 3023
ad4a85ad 3024 if (stmts)
a481acc5 3025 gimple_seq_add_seq_without_update (&chain->init_seq, stmts);
ad4a85ad 3026
f1f41a6c 3027 chain->inits[i] = init;
ad4a85ad 3028 }
3029
3030 return true;
3031}
3032
3033/* Prepare initializers for CHAINS in LOOP, and free chains that cannot
3034 be used because the initializers might trap. */
3035
3036static void
2e966e2a 3037prepare_initializers (class loop *loop, vec<chain_p> chains)
ad4a85ad 3038{
3039 chain_p chain;
3040 unsigned i;
3041
f1f41a6c 3042 for (i = 0; i < chains.length (); )
ad4a85ad 3043 {
f1f41a6c 3044 chain = chains[i];
ad4a85ad 3045 if (prepare_initializers_chain (loop, chain))
3046 i++;
3047 else
3048 {
3049 release_chain (chain);
f1f41a6c 3050 chains.unordered_remove (i);
ad4a85ad 3051 }
3052 }
3053}
3054
a652753d 3055/* Generates finalizer memory references for CHAIN in LOOP. Returns true
3056 if finalizer code for CHAIN can be generated, otherwise false. */
3057
3058static bool
2e966e2a 3059prepare_finalizers_chain (class loop *loop, chain_p chain)
a652753d 3060{
3061 unsigned i, n = chain->length;
3062 struct data_reference *dr = get_chain_root (chain)->ref;
3063 tree fini, niters = number_of_latch_executions (loop);
3064
3065 /* For now we can't eliminate stores if some of them are conditional
3066 executed. */
3067 if (!chain->all_always_accessed)
3068 return false;
3069
3070 chain->finis.create (n);
3071 for (i = 0; i < n; i++)
3072 chain->finis.quick_push (NULL_TREE);
3073
3074 /* We never use looparound phi node for store elimination chains. */
3075
3076 /* Find the finalizers for the variables, and check that they cannot
3077 trap. */
3078 for (i = 0; i < n; i++)
3079 {
3080 gimple_seq stmts = NULL;
3081 gcc_assert (chain->finis[i] == NULL_TREE);
3082
3083 if (TREE_CODE (niters) != INTEGER_CST && TREE_CODE (niters) != SSA_NAME)
3084 {
cf071e9e 3085 niters = unshare_expr (niters);
a652753d 3086 niters = force_gimple_operand (niters, &stmts, true, NULL);
3087 if (stmts)
3088 {
3089 gimple_seq_add_seq_without_update (&chain->fini_seq, stmts);
3090 stmts = NULL;
3091 }
3092 }
3093 fini = ref_at_iteration (dr, (int) 0 - i, &stmts, niters);
3094 if (stmts)
3095 gimple_seq_add_seq_without_update (&chain->fini_seq, stmts);
3096
3097 chain->finis[i] = fini;
3098 }
3099
3100 return true;
3101}
3102
3103/* Generates finalizer memory reference for CHAINS in LOOP. Returns true
3104 if finalizer code generation for CHAINS breaks loop closed ssa form. */
3105
3106static bool
2e966e2a 3107prepare_finalizers (class loop *loop, vec<chain_p> chains)
a652753d 3108{
3109 chain_p chain;
3110 unsigned i;
3111 bool loop_closed_ssa = false;
3112
3113 for (i = 0; i < chains.length ();)
3114 {
3115 chain = chains[i];
3116
3117 /* Finalizer is only necessary for inter-iteration store elimination
3118 chains. */
3119 if (chain->length == 0 || chain->type != CT_STORE_STORE)
3120 {
3121 i++;
3122 continue;
3123 }
3124
3125 if (prepare_finalizers_chain (loop, chain))
3126 {
3127 i++;
89c72921 3128 /* Be conservative, assume loop closed ssa form is corrupted
3129 by store-store chain. Though it's not always the case if
3130 eliminated stores only store loop invariant values into
3131 memory. */
3132 loop_closed_ssa = true;
a652753d 3133 }
3134 else
3135 {
3136 release_chain (chain);
3137 chains.unordered_remove (i);
3138 }
3139 }
3140 return loop_closed_ssa;
3141}
3142
a481acc5 3143/* Insert all initializing gimple stmts into loop's entry edge. */
3144
3145static void
2e966e2a 3146insert_init_seqs (class loop *loop, vec<chain_p> chains)
a481acc5 3147{
3148 unsigned i;
3149 edge entry = loop_preheader_edge (loop);
3150
3151 for (i = 0; i < chains.length (); ++i)
3152 if (chains[i]->init_seq)
3153 {
3154 gsi_insert_seq_on_edge_immediate (entry, chains[i]->init_seq);
3155 chains[i]->init_seq = NULL;
3156 }
3157}
3158
a652753d 3159/* Performs predictive commoning for LOOP. Sets bit 1<<0 of return value
3160 if LOOP was unrolled; Sets bit 1<<1 of return value if loop closed ssa
3161 form was corrupted. */
ad4a85ad 3162
a652753d 3163static unsigned
2e966e2a 3164tree_predictive_commoning_loop (class loop *loop)
ad4a85ad 3165{
f1f41a6c 3166 vec<data_reference_p> datarefs;
3167 vec<ddr_p> dependences;
ad4a85ad 3168 struct component *components;
1e094109 3169 vec<chain_p> chains = vNULL;
ad4a85ad 3170 unsigned unroll_factor;
2e966e2a 3171 class tree_niter_desc desc;
a652753d 3172 bool unroll = false, loop_closed_ssa = false;
ad4a85ad 3173 edge exit;
ad4a85ad 3174
3175 if (dump_file && (dump_flags & TDF_DETAILS))
3176 fprintf (dump_file, "Processing loop %d\n", loop->num);
3177
c4dd22f7 3178 /* Nothing for predicitive commoning if loop only iterates 1 time. */
3179 if (get_max_loop_iterations_int (loop) == 0)
3180 {
3181 if (dump_file && (dump_flags & TDF_DETAILS))
3182 fprintf (dump_file, "Loop iterates only 1 time, nothing to do.\n");
3183
a652753d 3184 return 0;
c4dd22f7 3185 }
3186
ad4a85ad 3187 /* Find the data references and split them into components according to their
3188 dependence relations. */
4997014d 3189 auto_vec<loop_p, 3> loop_nest;
f1f41a6c 3190 dependences.create (10);
e85cf4e5 3191 datarefs.create (10);
713f1f14 3192 if (! compute_data_dependences_for_loop (loop, true, &loop_nest, &datarefs,
3193 &dependences))
3194 {
3195 if (dump_file && (dump_flags & TDF_DETAILS))
3196 fprintf (dump_file, "Cannot analyze data dependencies\n");
713f1f14 3197 free_data_refs (datarefs);
3198 free_dependence_relations (dependences);
a652753d 3199 return 0;
713f1f14 3200 }
3201
ad4a85ad 3202 if (dump_file && (dump_flags & TDF_DETAILS))
3203 dump_data_dependence_relations (dump_file, dependences);
3204
3205 components = split_data_refs_to_components (loop, datarefs, dependences);
f1f41a6c 3206 loop_nest.release ();
ad4a85ad 3207 free_dependence_relations (dependences);
3208 if (!components)
3209 {
3210 free_data_refs (datarefs);
f037f351 3211 free_affine_expand_cache (&name_expansions);
a652753d 3212 return 0;
ad4a85ad 3213 }
3214
3215 if (dump_file && (dump_flags & TDF_DETAILS))
3216 {
3217 fprintf (dump_file, "Initial state:\n\n");
3218 dump_components (dump_file, components);
3219 }
3220
3221 /* Find the suitable components and split them into chains. */
3222 components = filter_suitable_components (loop, components);
3223
035def86 3224 auto_bitmap tmp_vars;
ad4a85ad 3225 looparound_phis = BITMAP_ALLOC (NULL);
3226 determine_roots (loop, components, &chains);
3227 release_components (components);
3228
f1f41a6c 3229 if (!chains.exists ())
ad4a85ad 3230 {
3231 if (dump_file && (dump_flags & TDF_DETAILS))
3232 fprintf (dump_file,
3233 "Predictive commoning failed: no suitable chains\n");
3234 goto end;
3235 }
3236 prepare_initializers (loop, chains);
a652753d 3237 loop_closed_ssa = prepare_finalizers (loop, chains);
ad4a85ad 3238
3239 /* Try to combine the chains that are always worked with together. */
e02e28c4 3240 try_combine_chains (loop, &chains);
ad4a85ad 3241
a481acc5 3242 insert_init_seqs (loop, chains);
3243
ad4a85ad 3244 if (dump_file && (dump_flags & TDF_DETAILS))
3245 {
3246 fprintf (dump_file, "Before commoning:\n\n");
3247 dump_chains (dump_file, chains);
3248 }
3249
3250 /* Determine the unroll factor, and if the loop should be unrolled, ensure
3251 that its number of iterations is divisible by the factor. */
3252 unroll_factor = determine_unroll_factor (chains);
3253 scev_reset ();
286fa508 3254 unroll = (unroll_factor > 1
3255 && can_unroll_loop_p (loop, unroll_factor, &desc));
ad4a85ad 3256 exit = single_dom_exit (loop);
3257
3258 /* Execute the predictive commoning transformations, and possibly unroll the
3259 loop. */
3260 if (unroll)
3261 {
3262 struct epcc_data dta;
3263
3264 if (dump_file && (dump_flags & TDF_DETAILS))
3265 fprintf (dump_file, "Unrolling %u times.\n", unroll_factor);
3266
3267 dta.chains = chains;
3268 dta.tmp_vars = tmp_vars;
48e1416a 3269
ad4a85ad 3270 update_ssa (TODO_update_ssa_only_virtuals);
3271
3272 /* Cfg manipulations performed in tree_transform_and_unroll_loop before
3273 execute_pred_commoning_cbck is called may cause phi nodes to be
3274 reallocated, which is a problem since CHAINS may point to these
3275 statements. To fix this, we store the ssa names defined by the
3276 phi nodes here instead of the phi nodes themselves, and restore
3277 the phi nodes in execute_pred_commoning_cbck. A bit hacky. */
3278 replace_phis_by_defined_names (chains);
3279
3280 tree_transform_and_unroll_loop (loop, unroll_factor, exit, &desc,
3281 execute_pred_commoning_cbck, &dta);
3282 eliminate_temp_copies (loop, tmp_vars);
3283 }
3284 else
3285 {
3286 if (dump_file && (dump_flags & TDF_DETAILS))
3287 fprintf (dump_file,
3288 "Executing predictive commoning without unrolling.\n");
3289 execute_pred_commoning (loop, chains, tmp_vars);
3290 }
3291
3292end: ;
3293 release_chains (chains);
3294 free_data_refs (datarefs);
ad4a85ad 3295 BITMAP_FREE (looparound_phis);
3296
3297 free_affine_expand_cache (&name_expansions);
3298
a652753d 3299 return (unroll ? 1 : 0) | (loop_closed_ssa ? 2 : 0);
ad4a85ad 3300}
3301
3302/* Runs predictive commoning. */
3303
eb2a640e 3304unsigned
ad4a85ad 3305tree_predictive_commoning (void)
3306{
2e966e2a 3307 class loop *loop;
a652753d 3308 unsigned ret = 0, changed = 0;
ad4a85ad 3309
3310 initialize_original_copy_tables ();
f21d4d00 3311 FOR_EACH_LOOP (loop, LI_ONLY_INNERMOST)
7baffbd3 3312 if (optimize_loop_for_speed_p (loop))
3313 {
a652753d 3314 changed |= tree_predictive_commoning_loop (loop);
7baffbd3 3315 }
a652753d 3316 free_original_copy_tables ();
ad4a85ad 3317
a652753d 3318 if (changed > 0)
ad4a85ad 3319 {
3320 scev_reset ();
a652753d 3321
3322 if (changed > 1)
3323 rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
3324
eb2a640e 3325 ret = TODO_cleanup_cfg;
ad4a85ad 3326 }
eb2a640e 3327
3328 return ret;
ad4a85ad 3329}
64641360 3330
3331/* Predictive commoning Pass. */
3332
3333static unsigned
b3083327 3334run_tree_predictive_commoning (struct function *fun)
64641360 3335{
b3083327 3336 if (number_of_loops (fun) <= 1)
64641360 3337 return 0;
3338
3339 return tree_predictive_commoning ();
3340}
3341
64641360 3342namespace {
3343
3344const pass_data pass_data_predcom =
3345{
3346 GIMPLE_PASS, /* type */
3347 "pcom", /* name */
3348 OPTGROUP_LOOP, /* optinfo_flags */
64641360 3349 TV_PREDCOM, /* tv_id */
3350 PROP_cfg, /* properties_required */
3351 0, /* properties_provided */
3352 0, /* properties_destroyed */
3353 0, /* todo_flags_start */
3354 TODO_update_ssa_only_virtuals, /* todo_flags_finish */
3355};
3356
3357class pass_predcom : public gimple_opt_pass
3358{
3359public:
3360 pass_predcom (gcc::context *ctxt)
3361 : gimple_opt_pass (pass_data_predcom, ctxt)
3362 {}
3363
3364 /* opt_pass methods: */
31315c24 3365 virtual bool gate (function *) { return flag_predictive_commoning != 0; }
b3083327 3366 virtual unsigned int execute (function *fun)
65b0537f 3367 {
b3083327 3368 return run_tree_predictive_commoning (fun);
65b0537f 3369 }
64641360 3370
3371}; // class pass_predcom
3372
3373} // anon namespace
3374
3375gimple_opt_pass *
3376make_pass_predcom (gcc::context *ctxt)
3377{
3378 return new pass_predcom (ctxt);
3379}
3380
3381