]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-predcom.c
[Ada] Update headers
[thirdparty/gcc.git] / gcc / tree-predcom.c
CommitLineData
bbc8a8dc 1/* Predictive commoning.
8d9254fc 2 Copyright (C) 2005-2020 Free Software Foundation, Inc.
b8698a0f 3
bbc8a8dc 4This file is part of GCC.
b8698a0f 5
bbc8a8dc
ZD
6GCC is free software; you can redistribute it and/or modify it
7under the terms of the GNU General Public License as published by the
9dcd6f09 8Free Software Foundation; either version 3, or (at your option) any
bbc8a8dc 9later version.
b8698a0f 10
bbc8a8dc
ZD
11GCC is distributed in the hope that it will be useful, but WITHOUT
12ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14for more details.
b8698a0f 15
bbc8a8dc 16You should have received a copy of the GNU General Public License
9dcd6f09
NC
17along with GCC; see the file COPYING3. If not see
18<http://www.gnu.org/licenses/>. */
bbc8a8dc
ZD
19
20/* This file implements the predictive commoning optimization. Predictive
21 commoning can be viewed as CSE around a loop, and with some improvements,
22 as generalized strength reduction-- i.e., reusing values computed in
23 earlier iterations of a loop in the later ones. So far, the pass only
24 handles the most useful case, that is, reusing values of memory references.
25 If you think this is all just a special case of PRE, you are sort of right;
26 however, concentrating on loops is simpler, and makes it possible to
27 incorporate data dependence analysis to detect the opportunities, perform
28 loop unrolling to avoid copies together with renaming immediately,
29 and if needed, we could also take register pressure into account.
30
31 Let us demonstrate what is done on an example:
b8698a0f 32
bbc8a8dc
ZD
33 for (i = 0; i < 100; i++)
34 {
35 a[i+2] = a[i] + a[i+1];
36 b[10] = b[10] + i;
37 c[i] = c[99 - i];
38 d[i] = d[i + 1];
39 }
40
41 1) We find data references in the loop, and split them to mutually
42 independent groups (i.e., we find components of a data dependence
43 graph). We ignore read-read dependences whose distance is not constant.
44 (TODO -- we could also ignore antidependences). In this example, we
45 find the following groups:
46
47 a[i]{read}, a[i+1]{read}, a[i+2]{write}
48 b[10]{read}, b[10]{write}
49 c[99 - i]{read}, c[i]{write}
50 d[i + 1]{read}, d[i]{write}
51
52 2) Inside each of the group, we verify several conditions:
53 a) all the references must differ in indices only, and the indices
54 must all have the same step
55 b) the references must dominate loop latch (and thus, they must be
56 ordered by dominance relation).
57 c) the distance of the indices must be a small multiple of the step
58 We are then able to compute the difference of the references (# of
59 iterations before they point to the same place as the first of them).
60 Also, in case there are writes in the loop, we split the groups into
61 chains whose head is the write whose values are used by the reads in
62 the same chain. The chains are then processed independently,
63 making the further transformations simpler. Also, the shorter chains
64 need the same number of registers, but may require lower unrolling
65 factor in order to get rid of the copies on the loop latch.
b8698a0f 66
bbc8a8dc
ZD
67 In our example, we get the following chains (the chain for c is invalid).
68
69 a[i]{read,+0}, a[i+1]{read,-1}, a[i+2]{write,-2}
70 b[10]{read,+0}, b[10]{write,+0}
71 d[i + 1]{read,+0}, d[i]{write,+1}
72
73 3) For each read, we determine the read or write whose value it reuses,
74 together with the distance of this reuse. I.e. we take the last
75 reference before it with distance 0, or the last of the references
76 with the smallest positive distance to the read. Then, we remove
77 the references that are not used in any of these chains, discard the
78 empty groups, and propagate all the links so that they point to the
b8698a0f 79 single root reference of the chain (adjusting their distance
bbc8a8dc
ZD
80 appropriately). Some extra care needs to be taken for references with
81 step 0. In our example (the numbers indicate the distance of the
82 reuse),
83
84 a[i] --> (*) 2, a[i+1] --> (*) 1, a[i+2] (*)
85 b[10] --> (*) 1, b[10] (*)
86
87 4) The chains are combined together if possible. If the corresponding
88 elements of two chains are always combined together with the same
89 operator, we remember just the result of this combination, instead
90 of remembering the values separately. We may need to perform
91 reassociation to enable combining, for example
92
93 e[i] + f[i+1] + e[i+1] + f[i]
94
95 can be reassociated as
96
97 (e[i] + f[i]) + (e[i+1] + f[i+1])
98
99 and we can combine the chains for e and f into one chain.
100
101 5) For each root reference (end of the chain) R, let N be maximum distance
073a8998 102 of a reference reusing its value. Variables R0 up to RN are created,
bbc8a8dc
ZD
103 together with phi nodes that transfer values from R1 .. RN to
104 R0 .. R(N-1).
105 Initial values are loaded to R0..R(N-1) (in case not all references
106 must necessarily be accessed and they may trap, we may fail here;
107 TODO sometimes, the loads could be guarded by a check for the number
108 of iterations). Values loaded/stored in roots are also copied to
109 RN. Other reads are replaced with the appropriate variable Ri.
110 Everything is put to SSA form.
111
112 As a small improvement, if R0 is dead after the root (i.e., all uses of
113 the value with the maximum distance dominate the root), we can avoid
114 creating RN and use R0 instead of it.
115
116 In our example, we get (only the parts concerning a and b are shown):
117 for (i = 0; i < 100; i++)
118 {
119 f = phi (a[0], s);
120 s = phi (a[1], f);
121 x = phi (b[10], x);
122
123 f = f + s;
124 a[i+2] = f;
125 x = x + i;
126 b[10] = x;
127 }
128
129 6) Factor F for unrolling is determined as the smallest common multiple of
130 (N + 1) for each root reference (N for references for that we avoided
131 creating RN). If F and the loop is small enough, loop is unrolled F
132 times. The stores to RN (R0) in the copies of the loop body are
133 periodically replaced with R0, R1, ... (R1, R2, ...), so that they can
134 be coalesced and the copies can be eliminated.
b8698a0f 135
bbc8a8dc
ZD
136 TODO -- copy propagation and other optimizations may change the live
137 ranges of the temporary registers and prevent them from being coalesced;
138 this may increase the register pressure.
139
140 In our case, F = 2 and the (main loop of the) result is
141
142 for (i = 0; i < ...; i += 2)
143 {
144 f = phi (a[0], f);
145 s = phi (a[1], s);
146 x = phi (b[10], x);
147
148 f = f + s;
149 a[i+2] = f;
150 x = x + i;
151 b[10] = x;
152
153 s = s + f;
154 a[i+3] = s;
155 x = x + i;
156 b[10] = x;
157 }
158
d9c259ef
BC
159 Apart from predictive commoning on Load-Load and Store-Load chains, we
160 also support Store-Store chains -- stores killed by other store can be
161 eliminated. Given below example:
162
163 for (i = 0; i < n; i++)
164 {
165 a[i] = 1;
166 a[i+2] = 2;
167 }
bbc8a8dc 168
d9c259ef 169 It can be replaced with:
bbc8a8dc 170
d9c259ef
BC
171 t0 = a[0];
172 t1 = a[1];
173 for (i = 0; i < n; i++)
174 {
175 a[i] = 1;
176 t2 = 2;
177 t0 = t1;
178 t1 = t2;
179 }
180 a[n] = t0;
181 a[n+1] = t1;
bbc8a8dc 182
d9c259ef
BC
183 If the loop runs more than 1 iterations, it can be further simplified into:
184
185 for (i = 0; i < n; i++)
186 {
187 a[i] = 1;
188 }
189 a[n] = 2;
190 a[n+1] = 2;
191
192 The interesting part is this can be viewed either as general store motion
193 or general dead store elimination in either intra/inter-iterations way.
194
9cdcebf9
BC
195 With trivial effort, we also support load inside Store-Store chains if the
196 load is dominated by a store statement in the same iteration of loop. You
197 can see this as a restricted Store-Mixed-Load-Store chain.
198
d9c259ef
BC
199 TODO: For now, we don't support store-store chains in multi-exit loops. We
200 force to not unroll in case of store-store chain even if other chains might
201 ask for unroll.
bbc8a8dc
ZD
202
203 Predictive commoning can be generalized for arbitrary computations (not
204 just memory loads), and also nontrivial transfer functions (e.g., replacing
205 i * i with ii_last + 2 * i + 1), to generalize strength reduction. */
206
207#include "config.h"
208#include "system.h"
209#include "coretypes.h"
c7131fb2 210#include "backend.h"
957060b5 211#include "rtl.h"
bbc8a8dc 212#include "tree.h"
c7131fb2 213#include "gimple.h"
957060b5
AM
214#include "predict.h"
215#include "tree-pass.h"
c7131fb2 216#include "ssa.h"
957060b5 217#include "gimple-pretty-print.h"
c7131fb2 218#include "alias.h"
40e23961 219#include "fold-const.h"
bbc8a8dc 220#include "cfgloop.h"
2fb9a547 221#include "tree-eh.h"
45b0be94 222#include "gimplify.h"
5be5c238 223#include "gimple-iterator.h"
18f429e2 224#include "gimplify-me.h"
e28030cf
AM
225#include "tree-ssa-loop-ivopts.h"
226#include "tree-ssa-loop-manip.h"
227#include "tree-ssa-loop-niter.h"
442b4905
AM
228#include "tree-ssa-loop.h"
229#include "tree-into-ssa.h"
230#include "tree-dfa.h"
7a300452 231#include "tree-ssa.h"
bbc8a8dc
ZD
232#include "tree-data-ref.h"
233#include "tree-scalar-evolution.h"
bbc8a8dc 234#include "tree-affine.h"
59537744 235#include "builtins.h"
bbc8a8dc
ZD
236
237/* The maximum number of iterations between the considered memory
238 references. */
239
240#define MAX_DISTANCE (target_avail_regs < 16 ? 4 : 8)
b8698a0f 241
726a989a
RB
242/* Data references (or phi nodes that carry data reference values across
243 loop iterations). */
bbc8a8dc 244
6c1dae73 245typedef class dref_d
bbc8a8dc 246{
6c1dae73 247public:
bbc8a8dc
ZD
248 /* The reference itself. */
249 struct data_reference *ref;
250
251 /* The statement in that the reference appears. */
355fe088 252 gimple *stmt;
726a989a
RB
253
254 /* In case that STMT is a phi node, this field is set to the SSA name
255 defined by it in replace_phis_by_defined_names (in order to avoid
256 pointing to phi node that got reallocated in the meantime). */
257 tree name_defined_by_phi;
bbc8a8dc
ZD
258
259 /* Distance of the reference from the root of the chain (in number of
260 iterations of the loop). */
261 unsigned distance;
262
263 /* Number of iterations offset from the first reference in the component. */
807e902e 264 widest_int offset;
bbc8a8dc
ZD
265
266 /* Number of the reference in a component, in dominance ordering. */
267 unsigned pos;
268
269 /* True if the memory reference is always accessed when the loop is
270 entered. */
271 unsigned always_accessed : 1;
272} *dref;
273
bbc8a8dc
ZD
274
275/* Type of the chain of the references. */
276
277enum chain_type
278{
279 /* The addresses of the references in the chain are constant. */
280 CT_INVARIANT,
281
282 /* There are only loads in the chain. */
283 CT_LOAD,
284
285 /* Root of the chain is store, the rest are loads. */
286 CT_STORE_LOAD,
287
d9c259ef
BC
288 /* There are only stores in the chain. */
289 CT_STORE_STORE,
290
bbc8a8dc
ZD
291 /* A combination of two chains. */
292 CT_COMBINATION
293};
294
295/* Chains of data references. */
296
297typedef struct chain
298{
299 /* Type of the chain. */
300 enum chain_type type;
301
302 /* For combination chains, the operator and the two chains that are
303 combined, and the type of the result. */
82d6e6fc 304 enum tree_code op;
bbc8a8dc
ZD
305 tree rslt_type;
306 struct chain *ch1, *ch2;
307
308 /* The references in the chain. */
9771b263 309 vec<dref> refs;
bbc8a8dc
ZD
310
311 /* The maximum distance of the reference in the chain from the root. */
312 unsigned length;
313
314 /* The variables used to copy the value throughout iterations. */
9771b263 315 vec<tree> vars;
bbc8a8dc
ZD
316
317 /* Initializers for the variables. */
9771b263 318 vec<tree> inits;
bbc8a8dc 319
d9c259ef
BC
320 /* Finalizers for the eliminated stores. */
321 vec<tree> finis;
322
aa4de160
BC
323 /* gimple stmts intializing the initial variables of the chain. */
324 gimple_seq init_seq;
325
d9c259ef
BC
326 /* gimple stmts finalizing the eliminated stores of the chain. */
327 gimple_seq fini_seq;
328
bbc8a8dc
ZD
329 /* True if there is a use of a variable with the maximal distance
330 that comes after the root in the loop. */
331 unsigned has_max_use_after : 1;
332
333 /* True if all the memory references in the chain are always accessed. */
334 unsigned all_always_accessed : 1;
335
336 /* True if this chain was combined together with some other chain. */
337 unsigned combined : 1;
39ff50ce
BC
338
339 /* True if this is store elimination chain and eliminated stores store
340 loop invariant value into memory. */
341 unsigned inv_store_elimination : 1;
bbc8a8dc
ZD
342} *chain_p;
343
bbc8a8dc
ZD
344
345/* Describes the knowledge about the step of the memory references in
346 the component. */
347
348enum ref_step_type
349{
350 /* The step is zero. */
351 RS_INVARIANT,
352
353 /* The step is nonzero. */
354 RS_NONZERO,
355
356 /* The step may or may not be nonzero. */
357 RS_ANY
358};
359
360/* Components of the data dependence graph. */
361
362struct component
363{
364 /* The references in the component. */
9771b263 365 vec<dref> refs;
bbc8a8dc
ZD
366
367 /* What we know about the step of the references in the component. */
368 enum ref_step_type comp_step;
369
d9c259ef
BC
370 /* True if all references in component are stores and we try to do
371 intra/inter loop iteration dead store elimination. */
372 bool eliminate_store_p;
373
bbc8a8dc
ZD
374 /* Next component in the list. */
375 struct component *next;
376};
377
378/* Bitmap of ssa names defined by looparound phi nodes covered by chains. */
379
380static bitmap looparound_phis;
381
382/* Cache used by tree_to_aff_combination_expand. */
383
39c8aaa4 384static hash_map<tree, name_expansion *> *name_expansions;
bbc8a8dc
ZD
385
386/* Dumps data reference REF to FILE. */
387
388extern void dump_dref (FILE *, dref);
389void
390dump_dref (FILE *file, dref ref)
391{
392 if (ref->ref)
393 {
394 fprintf (file, " ");
395 print_generic_expr (file, DR_REF (ref->ref), TDF_SLIM);
396 fprintf (file, " (id %u%s)\n", ref->pos,
397 DR_IS_READ (ref->ref) ? "" : ", write");
398
399 fprintf (file, " offset ");
807e902e 400 print_decs (ref->offset, file);
bbc8a8dc
ZD
401 fprintf (file, "\n");
402
403 fprintf (file, " distance %u\n", ref->distance);
404 }
405 else
406 {
726a989a 407 if (gimple_code (ref->stmt) == GIMPLE_PHI)
bbc8a8dc
ZD
408 fprintf (file, " looparound ref\n");
409 else
410 fprintf (file, " combination ref\n");
411 fprintf (file, " in statement ");
726a989a 412 print_gimple_stmt (file, ref->stmt, 0, TDF_SLIM);
bbc8a8dc
ZD
413 fprintf (file, "\n");
414 fprintf (file, " distance %u\n", ref->distance);
415 }
416
417}
418
419/* Dumps CHAIN to FILE. */
420
421extern void dump_chain (FILE *, chain_p);
422void
423dump_chain (FILE *file, chain_p chain)
424{
425 dref a;
426 const char *chain_type;
427 unsigned i;
428 tree var;
429
430 switch (chain->type)
431 {
432 case CT_INVARIANT:
433 chain_type = "Load motion";
434 break;
435
436 case CT_LOAD:
437 chain_type = "Loads-only";
438 break;
439
440 case CT_STORE_LOAD:
441 chain_type = "Store-loads";
442 break;
443
d9c259ef
BC
444 case CT_STORE_STORE:
445 chain_type = "Store-stores";
446 break;
447
bbc8a8dc
ZD
448 case CT_COMBINATION:
449 chain_type = "Combination";
450 break;
451
452 default:
453 gcc_unreachable ();
454 }
455
456 fprintf (file, "%s chain %p%s\n", chain_type, (void *) chain,
457 chain->combined ? " (combined)" : "");
458 if (chain->type != CT_INVARIANT)
459 fprintf (file, " max distance %u%s\n", chain->length,
460 chain->has_max_use_after ? "" : ", may reuse first");
461
462 if (chain->type == CT_COMBINATION)
463 {
464 fprintf (file, " equal to %p %s %p in type ",
82d6e6fc 465 (void *) chain->ch1, op_symbol_code (chain->op),
bbc8a8dc
ZD
466 (void *) chain->ch2);
467 print_generic_expr (file, chain->rslt_type, TDF_SLIM);
468 fprintf (file, "\n");
469 }
470
9771b263 471 if (chain->vars.exists ())
bbc8a8dc
ZD
472 {
473 fprintf (file, " vars");
9771b263 474 FOR_EACH_VEC_ELT (chain->vars, i, var)
bbc8a8dc
ZD
475 {
476 fprintf (file, " ");
477 print_generic_expr (file, var, TDF_SLIM);
478 }
479 fprintf (file, "\n");
480 }
481
9771b263 482 if (chain->inits.exists ())
bbc8a8dc
ZD
483 {
484 fprintf (file, " inits");
9771b263 485 FOR_EACH_VEC_ELT (chain->inits, i, var)
bbc8a8dc
ZD
486 {
487 fprintf (file, " ");
488 print_generic_expr (file, var, TDF_SLIM);
489 }
490 fprintf (file, "\n");
491 }
492
493 fprintf (file, " references:\n");
9771b263 494 FOR_EACH_VEC_ELT (chain->refs, i, a)
bbc8a8dc
ZD
495 dump_dref (file, a);
496
497 fprintf (file, "\n");
498}
499
500/* Dumps CHAINS to FILE. */
501
9771b263 502extern void dump_chains (FILE *, vec<chain_p> );
bbc8a8dc 503void
9771b263 504dump_chains (FILE *file, vec<chain_p> chains)
bbc8a8dc
ZD
505{
506 chain_p chain;
507 unsigned i;
508
9771b263 509 FOR_EACH_VEC_ELT (chains, i, chain)
bbc8a8dc
ZD
510 dump_chain (file, chain);
511}
512
513/* Dumps COMP to FILE. */
514
515extern void dump_component (FILE *, struct component *);
516void
517dump_component (FILE *file, struct component *comp)
518{
519 dref a;
520 unsigned i;
521
522 fprintf (file, "Component%s:\n",
523 comp->comp_step == RS_INVARIANT ? " (invariant)" : "");
9771b263 524 FOR_EACH_VEC_ELT (comp->refs, i, a)
bbc8a8dc
ZD
525 dump_dref (file, a);
526 fprintf (file, "\n");
527}
528
529/* Dumps COMPS to FILE. */
530
531extern void dump_components (FILE *, struct component *);
532void
533dump_components (FILE *file, struct component *comps)
534{
535 struct component *comp;
536
537 for (comp = comps; comp; comp = comp->next)
538 dump_component (file, comp);
539}
540
541/* Frees a chain CHAIN. */
542
543static void
544release_chain (chain_p chain)
545{
546 dref ref;
547 unsigned i;
548
549 if (chain == NULL)
550 return;
551
9771b263 552 FOR_EACH_VEC_ELT (chain->refs, i, ref)
bbc8a8dc
ZD
553 free (ref);
554
9771b263
DN
555 chain->refs.release ();
556 chain->vars.release ();
557 chain->inits.release ();
aa4de160
BC
558 if (chain->init_seq)
559 gimple_seq_discard (chain->init_seq);
bbc8a8dc 560
d9c259ef
BC
561 chain->finis.release ();
562 if (chain->fini_seq)
563 gimple_seq_discard (chain->fini_seq);
564
bbc8a8dc
ZD
565 free (chain);
566}
567
568/* Frees CHAINS. */
569
570static void
9771b263 571release_chains (vec<chain_p> chains)
bbc8a8dc
ZD
572{
573 unsigned i;
574 chain_p chain;
575
9771b263 576 FOR_EACH_VEC_ELT (chains, i, chain)
bbc8a8dc 577 release_chain (chain);
9771b263 578 chains.release ();
bbc8a8dc
ZD
579}
580
581/* Frees a component COMP. */
582
583static void
584release_component (struct component *comp)
585{
9771b263 586 comp->refs.release ();
bbc8a8dc
ZD
587 free (comp);
588}
589
590/* Frees list of components COMPS. */
591
592static void
593release_components (struct component *comps)
594{
595 struct component *act, *next;
596
597 for (act = comps; act; act = next)
598 {
599 next = act->next;
600 release_component (act);
601 }
602}
603
604/* Finds a root of tree given by FATHERS containing A, and performs path
605 shortening. */
606
607static unsigned
608component_of (unsigned fathers[], unsigned a)
609{
610 unsigned root, n;
611
612 for (root = a; root != fathers[root]; root = fathers[root])
613 continue;
614
615 for (; a != root; a = n)
616 {
617 n = fathers[a];
618 fathers[a] = root;
619 }
620
621 return root;
622}
623
624/* Join operation for DFU. FATHERS gives the tree, SIZES are sizes of the
625 components, A and B are components to merge. */
626
627static void
628merge_comps (unsigned fathers[], unsigned sizes[], unsigned a, unsigned b)
629{
630 unsigned ca = component_of (fathers, a);
631 unsigned cb = component_of (fathers, b);
632
633 if (ca == cb)
634 return;
635
636 if (sizes[ca] < sizes[cb])
637 {
638 sizes[cb] += sizes[ca];
639 fathers[ca] = cb;
640 }
641 else
642 {
643 sizes[ca] += sizes[cb];
644 fathers[cb] = ca;
645 }
646}
647
648/* Returns true if A is a reference that is suitable for predictive commoning
649 in the innermost loop that contains it. REF_STEP is set according to the
650 step of the reference A. */
651
652static bool
653suitable_reference_p (struct data_reference *a, enum ref_step_type *ref_step)
654{
655 tree ref = DR_REF (a), step = DR_STEP (a);
656
657 if (!step
64fb0d3a 658 || TREE_THIS_VOLATILE (ref)
7e80c6bf
EB
659 || !is_gimple_reg_type (TREE_TYPE (ref))
660 || tree_could_throw_p (ref))
bbc8a8dc
ZD
661 return false;
662
663 if (integer_zerop (step))
664 *ref_step = RS_INVARIANT;
665 else if (integer_nonzerop (step))
666 *ref_step = RS_NONZERO;
667 else
668 *ref_step = RS_ANY;
669
670 return true;
671}
672
673/* Stores DR_OFFSET (DR) + DR_INIT (DR) to OFFSET. */
674
675static void
676aff_combination_dr_offset (struct data_reference *dr, aff_tree *offset)
677{
0d82a1c8 678 tree type = TREE_TYPE (DR_OFFSET (dr));
bbc8a8dc
ZD
679 aff_tree delta;
680
0d82a1c8 681 tree_to_aff_combination_expand (DR_OFFSET (dr), type, offset,
bbc8a8dc 682 &name_expansions);
c0a46545 683 aff_combination_const (&delta, type, wi::to_poly_widest (DR_INIT (dr)));
bbc8a8dc
ZD
684 aff_combination_add (offset, &delta);
685}
686
687/* Determines number of iterations of the innermost enclosing loop before B
688 refers to exactly the same location as A and stores it to OFF. If A and
689 B do not have the same step, they never meet, or anything else fails,
690 returns false, otherwise returns true. Both A and B are assumed to
691 satisfy suitable_reference_p. */
692
693static bool
694determine_offset (struct data_reference *a, struct data_reference *b,
cc8bea09 695 poly_widest_int *off)
bbc8a8dc
ZD
696{
697 aff_tree diff, baseb, step;
49379cb1
ZD
698 tree typea, typeb;
699
700 /* Check that both the references access the location in the same type. */
701 typea = TREE_TYPE (DR_REF (a));
702 typeb = TREE_TYPE (DR_REF (b));
36618b93 703 if (!useless_type_conversion_p (typeb, typea))
49379cb1 704 return false;
bbc8a8dc
ZD
705
706 /* Check whether the base address and the step of both references is the
707 same. */
708 if (!operand_equal_p (DR_STEP (a), DR_STEP (b), 0)
709 || !operand_equal_p (DR_BASE_ADDRESS (a), DR_BASE_ADDRESS (b), 0))
710 return false;
711
712 if (integer_zerop (DR_STEP (a)))
713 {
714 /* If the references have loop invariant address, check that they access
715 exactly the same location. */
807e902e 716 *off = 0;
bbc8a8dc
ZD
717 return (operand_equal_p (DR_OFFSET (a), DR_OFFSET (b), 0)
718 && operand_equal_p (DR_INIT (a), DR_INIT (b), 0));
719 }
720
721 /* Compare the offsets of the addresses, and check whether the difference
722 is a multiple of step. */
723 aff_combination_dr_offset (a, &diff);
724 aff_combination_dr_offset (b, &baseb);
807e902e 725 aff_combination_scale (&baseb, -1);
bbc8a8dc
ZD
726 aff_combination_add (&diff, &baseb);
727
0d82a1c8 728 tree_to_aff_combination_expand (DR_STEP (a), TREE_TYPE (DR_STEP (a)),
bbc8a8dc
ZD
729 &step, &name_expansions);
730 return aff_combination_constant_multiple_p (&diff, &step, off);
731}
732
733/* Returns the last basic block in LOOP for that we are sure that
734 it is executed whenever the loop is entered. */
735
736static basic_block
99b1c316 737last_always_executed_block (class loop *loop)
bbc8a8dc
ZD
738{
739 unsigned i;
9771b263 740 vec<edge> exits = get_loop_exit_edges (loop);
bbc8a8dc
ZD
741 edge ex;
742 basic_block last = loop->latch;
743
9771b263 744 FOR_EACH_VEC_ELT (exits, i, ex)
bbc8a8dc 745 last = nearest_common_dominator (CDI_DOMINATORS, last, ex->src);
9771b263 746 exits.release ();
bbc8a8dc
ZD
747
748 return last;
749}
750
751/* Splits dependence graph on DATAREFS described by DEPENDS to components. */
752
753static struct component *
99b1c316 754split_data_refs_to_components (class loop *loop,
9771b263
DN
755 vec<data_reference_p> datarefs,
756 vec<ddr_p> depends)
bbc8a8dc 757{
9771b263 758 unsigned i, n = datarefs.length ();
bbc8a8dc
ZD
759 unsigned ca, ia, ib, bad;
760 unsigned *comp_father = XNEWVEC (unsigned, n + 1);
761 unsigned *comp_size = XNEWVEC (unsigned, n + 1);
762 struct component **comps;
763 struct data_reference *dr, *dra, *drb;
764 struct data_dependence_relation *ddr;
765 struct component *comp_list = NULL, *comp;
766 dref dataref;
d9c259ef
BC
767 /* Don't do store elimination if loop has multiple exit edges. */
768 bool eliminate_store_p = single_exit (loop) != NULL;
bbc8a8dc 769 basic_block last_always_executed = last_always_executed_block (loop);
6cedf5d8 770 auto_bitmap no_store_store_comps;
b8698a0f 771
9771b263 772 FOR_EACH_VEC_ELT (datarefs, i, dr)
bbc8a8dc
ZD
773 {
774 if (!DR_REF (dr))
775 {
776 /* A fake reference for call or asm_expr that may clobber memory;
777 just fail. */
778 goto end;
779 }
5ce9450f
JJ
780 /* predcom pass isn't prepared to handle calls with data references. */
781 if (is_gimple_call (DR_STMT (dr)))
782 goto end;
5417e022 783 dr->aux = (void *) (size_t) i;
bbc8a8dc
ZD
784 comp_father[i] = i;
785 comp_size[i] = 1;
786 }
787
788 /* A component reserved for the "bad" data references. */
789 comp_father[n] = n;
790 comp_size[n] = 1;
791
9771b263 792 FOR_EACH_VEC_ELT (datarefs, i, dr)
bbc8a8dc
ZD
793 {
794 enum ref_step_type dummy;
795
796 if (!suitable_reference_p (dr, &dummy))
797 {
5417e022 798 ia = (unsigned) (size_t) dr->aux;
bbc8a8dc
ZD
799 merge_comps (comp_father, comp_size, n, ia);
800 }
801 }
802
9771b263 803 FOR_EACH_VEC_ELT (depends, i, ddr)
bbc8a8dc 804 {
cc8bea09 805 poly_widest_int dummy_off;
bbc8a8dc
ZD
806
807 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
808 continue;
809
810 dra = DDR_A (ddr);
811 drb = DDR_B (ddr);
d9c259ef
BC
812
813 /* Don't do store elimination if there is any unknown dependence for
814 any store data reference. */
815 if ((DR_IS_WRITE (dra) || DR_IS_WRITE (drb))
816 && (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know
817 || DDR_NUM_DIST_VECTS (ddr) == 0))
818 eliminate_store_p = false;
819
5417e022
ZD
820 ia = component_of (comp_father, (unsigned) (size_t) dra->aux);
821 ib = component_of (comp_father, (unsigned) (size_t) drb->aux);
bbc8a8dc
ZD
822 if (ia == ib)
823 continue;
824
825 bad = component_of (comp_father, n);
826
827 /* If both A and B are reads, we may ignore unsuitable dependences. */
41626746
JJ
828 if (DR_IS_READ (dra) && DR_IS_READ (drb))
829 {
830 if (ia == bad || ib == bad
831 || !determine_offset (dra, drb, &dummy_off))
832 continue;
833 }
834 /* If A is read and B write or vice versa and there is unsuitable
835 dependence, instead of merging both components into a component
836 that will certainly not pass suitable_component_p, just put the
837 read into bad component, perhaps at least the write together with
838 all the other data refs in it's component will be optimizable. */
839 else if (DR_IS_READ (dra) && ib != bad)
840 {
841 if (ia == bad)
6cedf5d8
RS
842 {
843 bitmap_set_bit (no_store_store_comps, ib);
844 continue;
845 }
41626746
JJ
846 else if (!determine_offset (dra, drb, &dummy_off))
847 {
6cedf5d8 848 bitmap_set_bit (no_store_store_comps, ib);
41626746
JJ
849 merge_comps (comp_father, comp_size, bad, ia);
850 continue;
851 }
852 }
853 else if (DR_IS_READ (drb) && ia != bad)
854 {
855 if (ib == bad)
6cedf5d8
RS
856 {
857 bitmap_set_bit (no_store_store_comps, ia);
858 continue;
859 }
41626746
JJ
860 else if (!determine_offset (dra, drb, &dummy_off))
861 {
6cedf5d8 862 bitmap_set_bit (no_store_store_comps, ia);
41626746
JJ
863 merge_comps (comp_father, comp_size, bad, ib);
864 continue;
865 }
866 }
d9c259ef
BC
867 else if (DR_IS_WRITE (dra) && DR_IS_WRITE (drb)
868 && ia != bad && ib != bad
869 && !determine_offset (dra, drb, &dummy_off))
870 {
871 merge_comps (comp_father, comp_size, bad, ia);
872 merge_comps (comp_father, comp_size, bad, ib);
873 continue;
874 }
b8698a0f 875
bbc8a8dc
ZD
876 merge_comps (comp_father, comp_size, ia, ib);
877 }
878
d9c259ef
BC
879 if (eliminate_store_p)
880 {
881 tree niters = number_of_latch_executions (loop);
882
883 /* Don't do store elimination if niters info is unknown because stores
884 in the last iteration can't be eliminated and we need to recover it
885 after loop. */
886 eliminate_store_p = (niters != NULL_TREE && niters != chrec_dont_know);
887 }
888
bbc8a8dc
ZD
889 comps = XCNEWVEC (struct component *, n);
890 bad = component_of (comp_father, n);
9771b263 891 FOR_EACH_VEC_ELT (datarefs, i, dr)
bbc8a8dc 892 {
5417e022 893 ia = (unsigned) (size_t) dr->aux;
bbc8a8dc
ZD
894 ca = component_of (comp_father, ia);
895 if (ca == bad)
896 continue;
897
898 comp = comps[ca];
899 if (!comp)
900 {
901 comp = XCNEW (struct component);
9771b263 902 comp->refs.create (comp_size[ca]);
d9c259ef 903 comp->eliminate_store_p = eliminate_store_p;
bbc8a8dc
ZD
904 comps[ca] = comp;
905 }
906
99b1c316 907 dataref = XCNEW (class dref_d);
bbc8a8dc
ZD
908 dataref->ref = dr;
909 dataref->stmt = DR_STMT (dr);
807e902e 910 dataref->offset = 0;
bbc8a8dc
ZD
911 dataref->distance = 0;
912
913 dataref->always_accessed
914 = dominated_by_p (CDI_DOMINATORS, last_always_executed,
726a989a 915 gimple_bb (dataref->stmt));
9771b263
DN
916 dataref->pos = comp->refs.length ();
917 comp->refs.quick_push (dataref);
bbc8a8dc
ZD
918 }
919
6cedf5d8
RS
920 if (eliminate_store_p)
921 {
922 bitmap_iterator bi;
923 EXECUTE_IF_SET_IN_BITMAP (no_store_store_comps, 0, ia, bi)
924 {
925 ca = component_of (comp_father, ia);
926 if (ca != bad)
927 comps[ca]->eliminate_store_p = false;
928 }
929 }
930
bbc8a8dc
ZD
931 for (i = 0; i < n; i++)
932 {
933 comp = comps[i];
934 if (comp)
935 {
936 comp->next = comp_list;
937 comp_list = comp;
938 }
939 }
940 free (comps);
941
942end:
943 free (comp_father);
944 free (comp_size);
945 return comp_list;
946}
947
948/* Returns true if the component COMP satisfies the conditions
c80b4100 949 described in 2) at the beginning of this file. LOOP is the current
bbc8a8dc 950 loop. */
b8698a0f 951
bbc8a8dc 952static bool
99b1c316 953suitable_component_p (class loop *loop, struct component *comp)
bbc8a8dc
ZD
954{
955 unsigned i;
956 dref a, first;
957 basic_block ba, bp = loop->header;
958 bool ok, has_write = false;
959
9771b263 960 FOR_EACH_VEC_ELT (comp->refs, i, a)
bbc8a8dc 961 {
726a989a 962 ba = gimple_bb (a->stmt);
bbc8a8dc
ZD
963
964 if (!just_once_each_iteration_p (loop, ba))
965 return false;
966
967 gcc_assert (dominated_by_p (CDI_DOMINATORS, ba, bp));
968 bp = ba;
969
b0af49c4 970 if (DR_IS_WRITE (a->ref))
bbc8a8dc
ZD
971 has_write = true;
972 }
973
9771b263 974 first = comp->refs[0];
bbc8a8dc
ZD
975 ok = suitable_reference_p (first->ref, &comp->comp_step);
976 gcc_assert (ok);
807e902e 977 first->offset = 0;
bbc8a8dc 978
9771b263 979 for (i = 1; comp->refs.iterate (i, &a); i++)
bbc8a8dc 980 {
cc8bea09
RS
981 /* Polynomial offsets are no use, since we need to know the
982 gap between iteration numbers at compile time. */
983 poly_widest_int offset;
984 if (!determine_offset (first->ref, a->ref, &offset)
985 || !offset.is_constant (&a->offset))
bbc8a8dc
ZD
986 return false;
987
b2b29377
MM
988 enum ref_step_type a_step;
989 gcc_checking_assert (suitable_reference_p (a->ref, &a_step)
990 && a_step == comp->comp_step);
bbc8a8dc
ZD
991 }
992
993 /* If there is a write inside the component, we must know whether the
994 step is nonzero or not -- we would not otherwise be able to recognize
995 whether the value accessed by reads comes from the OFFSET-th iteration
996 or the previous one. */
997 if (has_write && comp->comp_step == RS_ANY)
998 return false;
999
1000 return true;
1001}
b8698a0f 1002
bbc8a8dc
ZD
1003/* Check the conditions on references inside each of components COMPS,
1004 and remove the unsuitable components from the list. The new list
1005 of components is returned. The conditions are described in 2) at
c80b4100 1006 the beginning of this file. LOOP is the current loop. */
bbc8a8dc
ZD
1007
1008static struct component *
99b1c316 1009filter_suitable_components (class loop *loop, struct component *comps)
bbc8a8dc
ZD
1010{
1011 struct component **comp, *act;
1012
1013 for (comp = &comps; *comp; )
1014 {
1015 act = *comp;
1016 if (suitable_component_p (loop, act))
1017 comp = &act->next;
1018 else
1019 {
a0044be5
JJ
1020 dref ref;
1021 unsigned i;
1022
bbc8a8dc 1023 *comp = act->next;
9771b263 1024 FOR_EACH_VEC_ELT (act->refs, i, ref)
a0044be5 1025 free (ref);
bbc8a8dc
ZD
1026 release_component (act);
1027 }
1028 }
1029
1030 return comps;
1031}
1032
1033/* Compares two drefs A and B by their offset and position. Callback for
1034 qsort. */
1035
1036static int
1037order_drefs (const void *a, const void *b)
1038{
3d9a9f94
KG
1039 const dref *const da = (const dref *) a;
1040 const dref *const db = (const dref *) b;
807e902e 1041 int offcmp = wi::cmps ((*da)->offset, (*db)->offset);
bbc8a8dc
ZD
1042
1043 if (offcmp != 0)
1044 return offcmp;
1045
1046 return (*da)->pos - (*db)->pos;
1047}
1048
bd9cc42b
BC
1049/* Compares two drefs A and B by their position. Callback for qsort. */
1050
1051static int
1052order_drefs_by_pos (const void *a, const void *b)
1053{
1054 const dref *const da = (const dref *) a;
1055 const dref *const db = (const dref *) b;
1056
1057 return (*da)->pos - (*db)->pos;
1058}
1059
bbc8a8dc
ZD
1060/* Returns root of the CHAIN. */
1061
1062static inline dref
1063get_chain_root (chain_p chain)
1064{
9771b263 1065 return chain->refs[0];
bbc8a8dc
ZD
1066}
1067
9cdcebf9 1068/* Given CHAIN, returns the last write ref at DISTANCE, or NULL if it doesn't
d9c259ef
BC
1069 exist. */
1070
1071static inline dref
9cdcebf9 1072get_chain_last_write_at (chain_p chain, unsigned distance)
d9c259ef 1073{
9cdcebf9
BC
1074 for (unsigned i = chain->refs.length (); i > 0; i--)
1075 if (DR_IS_WRITE (chain->refs[i - 1]->ref)
1076 && distance == chain->refs[i - 1]->distance)
1077 return chain->refs[i - 1];
d9c259ef 1078
9cdcebf9
BC
1079 return NULL;
1080}
1081
1082/* Given CHAIN, returns the last write ref with the same distance before load
1083 at index LOAD_IDX, or NULL if it doesn't exist. */
1084
1085static inline dref
1086get_chain_last_write_before_load (chain_p chain, unsigned load_idx)
1087{
1088 gcc_assert (load_idx < chain->refs.length ());
1089
1090 unsigned distance = chain->refs[load_idx]->distance;
d9c259ef 1091
9cdcebf9
BC
1092 for (unsigned i = load_idx; i > 0; i--)
1093 if (DR_IS_WRITE (chain->refs[i - 1]->ref)
1094 && distance == chain->refs[i - 1]->distance)
1095 return chain->refs[i - 1];
1096
1097 return NULL;
d9c259ef
BC
1098}
1099
bbc8a8dc
ZD
1100/* Adds REF to the chain CHAIN. */
1101
1102static void
1103add_ref_to_chain (chain_p chain, dref ref)
1104{
1105 dref root = get_chain_root (chain);
bbc8a8dc 1106
807e902e
KZ
1107 gcc_assert (wi::les_p (root->offset, ref->offset));
1108 widest_int dist = ref->offset - root->offset;
807e902e 1109 gcc_assert (wi::fits_uhwi_p (dist));
bbc8a8dc 1110
9771b263 1111 chain->refs.safe_push (ref);
bbc8a8dc 1112
27bcd47c 1113 ref->distance = dist.to_uhwi ();
bbc8a8dc
ZD
1114
1115 if (ref->distance >= chain->length)
1116 {
1117 chain->length = ref->distance;
1118 chain->has_max_use_after = false;
1119 }
1120
9cdcebf9
BC
1121 /* Promote this chain to CT_STORE_STORE if it has multiple stores. */
1122 if (DR_IS_WRITE (ref->ref))
1123 chain->type = CT_STORE_STORE;
1124
39637a44
BC
1125 /* Don't set the flag for store-store chain since there is no use. */
1126 if (chain->type != CT_STORE_STORE
1127 && ref->distance == chain->length
bbc8a8dc
ZD
1128 && ref->pos > root->pos)
1129 chain->has_max_use_after = true;
1130
1131 chain->all_always_accessed &= ref->always_accessed;
1132}
1133
1134/* Returns the chain for invariant component COMP. */
1135
1136static chain_p
1137make_invariant_chain (struct component *comp)
1138{
1139 chain_p chain = XCNEW (struct chain);
1140 unsigned i;
1141 dref ref;
1142
1143 chain->type = CT_INVARIANT;
1144
1145 chain->all_always_accessed = true;
1146
9771b263 1147 FOR_EACH_VEC_ELT (comp->refs, i, ref)
bbc8a8dc 1148 {
9771b263 1149 chain->refs.safe_push (ref);
bbc8a8dc
ZD
1150 chain->all_always_accessed &= ref->always_accessed;
1151 }
1152
d9c259ef
BC
1153 chain->inits = vNULL;
1154 chain->finis = vNULL;
1155
bbc8a8dc
ZD
1156 return chain;
1157}
1158
d9c259ef 1159/* Make a new chain of type TYPE rooted at REF. */
bbc8a8dc
ZD
1160
1161static chain_p
d9c259ef 1162make_rooted_chain (dref ref, enum chain_type type)
bbc8a8dc
ZD
1163{
1164 chain_p chain = XCNEW (struct chain);
1165
d9c259ef 1166 chain->type = type;
9771b263 1167 chain->refs.safe_push (ref);
bbc8a8dc 1168 chain->all_always_accessed = ref->always_accessed;
bbc8a8dc
ZD
1169 ref->distance = 0;
1170
d9c259ef
BC
1171 chain->inits = vNULL;
1172 chain->finis = vNULL;
1173
bbc8a8dc
ZD
1174 return chain;
1175}
1176
1177/* Returns true if CHAIN is not trivial. */
1178
1179static bool
1180nontrivial_chain_p (chain_p chain)
1181{
9771b263 1182 return chain != NULL && chain->refs.length () > 1;
bbc8a8dc
ZD
1183}
1184
1185/* Returns the ssa name that contains the value of REF, or NULL_TREE if there
1186 is no such name. */
1187
1188static tree
1189name_for_ref (dref ref)
1190{
1191 tree name;
1192
726a989a 1193 if (is_gimple_assign (ref->stmt))
bbc8a8dc
ZD
1194 {
1195 if (!ref->ref || DR_IS_READ (ref->ref))
726a989a 1196 name = gimple_assign_lhs (ref->stmt);
bbc8a8dc 1197 else
726a989a 1198 name = gimple_assign_rhs1 (ref->stmt);
bbc8a8dc
ZD
1199 }
1200 else
1201 name = PHI_RESULT (ref->stmt);
1202
1203 return (TREE_CODE (name) == SSA_NAME ? name : NULL_TREE);
1204}
1205
1206/* Returns true if REF is a valid initializer for ROOT with given DISTANCE (in
1207 iterations of the innermost enclosing loop). */
1208
1209static bool
1210valid_initializer_p (struct data_reference *ref,
1211 unsigned distance, struct data_reference *root)
1212{
1213 aff_tree diff, base, step;
cc8bea09 1214 poly_widest_int off;
bbc8a8dc 1215
bbc8a8dc
ZD
1216 /* Both REF and ROOT must be accessing the same object. */
1217 if (!operand_equal_p (DR_BASE_ADDRESS (ref), DR_BASE_ADDRESS (root), 0))
1218 return false;
1219
1220 /* The initializer is defined outside of loop, hence its address must be
1221 invariant inside the loop. */
1222 gcc_assert (integer_zerop (DR_STEP (ref)));
1223
1224 /* If the address of the reference is invariant, initializer must access
1225 exactly the same location. */
1226 if (integer_zerop (DR_STEP (root)))
1227 return (operand_equal_p (DR_OFFSET (ref), DR_OFFSET (root), 0)
1228 && operand_equal_p (DR_INIT (ref), DR_INIT (root), 0));
1229
1230 /* Verify that this index of REF is equal to the root's index at
1231 -DISTANCE-th iteration. */
1232 aff_combination_dr_offset (root, &diff);
1233 aff_combination_dr_offset (ref, &base);
807e902e 1234 aff_combination_scale (&base, -1);
bbc8a8dc
ZD
1235 aff_combination_add (&diff, &base);
1236
0d82a1c8
RG
1237 tree_to_aff_combination_expand (DR_STEP (root), TREE_TYPE (DR_STEP (root)),
1238 &step, &name_expansions);
bbc8a8dc
ZD
1239 if (!aff_combination_constant_multiple_p (&diff, &step, &off))
1240 return false;
1241
cc8bea09 1242 if (maybe_ne (off, distance))
bbc8a8dc
ZD
1243 return false;
1244
1245 return true;
1246}
1247
1248/* Finds looparound phi node of LOOP that copies the value of REF, and if its
1249 initial value is correct (equal to initial value of REF shifted by one
1250 iteration), returns the phi node. Otherwise, NULL_TREE is returned. ROOT
1251 is the root of the current chain. */
1252
538dd0b7 1253static gphi *
99b1c316 1254find_looparound_phi (class loop *loop, dref ref, dref root)
bbc8a8dc 1255{
726a989a 1256 tree name, init, init_ref;
538dd0b7 1257 gphi *phi = NULL;
355fe088 1258 gimple *init_stmt;
bbc8a8dc
ZD
1259 edge latch = loop_latch_edge (loop);
1260 struct data_reference init_dr;
538dd0b7 1261 gphi_iterator psi;
bbc8a8dc 1262
726a989a 1263 if (is_gimple_assign (ref->stmt))
bbc8a8dc
ZD
1264 {
1265 if (DR_IS_READ (ref->ref))
726a989a 1266 name = gimple_assign_lhs (ref->stmt);
bbc8a8dc 1267 else
726a989a 1268 name = gimple_assign_rhs1 (ref->stmt);
bbc8a8dc
ZD
1269 }
1270 else
1271 name = PHI_RESULT (ref->stmt);
1272 if (!name)
726a989a 1273 return NULL;
bbc8a8dc 1274
726a989a
RB
1275 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1276 {
538dd0b7 1277 phi = psi.phi ();
726a989a
RB
1278 if (PHI_ARG_DEF_FROM_EDGE (phi, latch) == name)
1279 break;
1280 }
bbc8a8dc 1281
726a989a
RB
1282 if (gsi_end_p (psi))
1283 return NULL;
bbc8a8dc
ZD
1284
1285 init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1286 if (TREE_CODE (init) != SSA_NAME)
726a989a 1287 return NULL;
bbc8a8dc 1288 init_stmt = SSA_NAME_DEF_STMT (init);
726a989a
RB
1289 if (gimple_code (init_stmt) != GIMPLE_ASSIGN)
1290 return NULL;
1291 gcc_assert (gimple_assign_lhs (init_stmt) == init);
bbc8a8dc 1292
726a989a 1293 init_ref = gimple_assign_rhs1 (init_stmt);
bbc8a8dc
ZD
1294 if (!REFERENCE_CLASS_P (init_ref)
1295 && !DECL_P (init_ref))
726a989a 1296 return NULL;
bbc8a8dc
ZD
1297
1298 /* Analyze the behavior of INIT_REF with respect to LOOP (innermost
1299 loop enclosing PHI). */
1300 memset (&init_dr, 0, sizeof (struct data_reference));
1301 DR_REF (&init_dr) = init_ref;
1302 DR_STMT (&init_dr) = phi;
f4ebbd24
DM
1303 if (!dr_analyze_innermost (&DR_INNERMOST (&init_dr), init_ref, loop,
1304 init_stmt))
3661e899 1305 return NULL;
bbc8a8dc
ZD
1306
1307 if (!valid_initializer_p (&init_dr, ref->distance + 1, root->ref))
726a989a 1308 return NULL;
bbc8a8dc
ZD
1309
1310 return phi;
1311}
1312
1313/* Adds a reference for the looparound copy of REF in PHI to CHAIN. */
1314
1315static void
538dd0b7 1316insert_looparound_copy (chain_p chain, dref ref, gphi *phi)
bbc8a8dc 1317{
99b1c316 1318 dref nw = XCNEW (class dref_d), aref;
bbc8a8dc
ZD
1319 unsigned i;
1320
1321 nw->stmt = phi;
1322 nw->distance = ref->distance + 1;
1323 nw->always_accessed = 1;
1324
9771b263 1325 FOR_EACH_VEC_ELT (chain->refs, i, aref)
bbc8a8dc
ZD
1326 if (aref->distance >= nw->distance)
1327 break;
9771b263 1328 chain->refs.safe_insert (i, nw);
bbc8a8dc
ZD
1329
1330 if (nw->distance > chain->length)
1331 {
1332 chain->length = nw->distance;
1333 chain->has_max_use_after = false;
1334 }
1335}
1336
1337/* For references in CHAIN that are copied around the LOOP (created previously
1338 by PRE, or by user), add the results of such copies to the chain. This
1339 enables us to remove the copies by unrolling, and may need less registers
1340 (also, it may allow us to combine chains together). */
1341
1342static void
99b1c316 1343add_looparound_copies (class loop *loop, chain_p chain)
bbc8a8dc
ZD
1344{
1345 unsigned i;
1346 dref ref, root = get_chain_root (chain);
538dd0b7 1347 gphi *phi;
bbc8a8dc 1348
d9c259ef
BC
1349 if (chain->type == CT_STORE_STORE)
1350 return;
1351
9771b263 1352 FOR_EACH_VEC_ELT (chain->refs, i, ref)
bbc8a8dc
ZD
1353 {
1354 phi = find_looparound_phi (loop, ref, root);
1355 if (!phi)
1356 continue;
1357
1358 bitmap_set_bit (looparound_phis, SSA_NAME_VERSION (PHI_RESULT (phi)));
1359 insert_looparound_copy (chain, ref, phi);
1360 }
1361}
1362
1363/* Find roots of the values and determine distances in the component COMP.
1364 The references are redistributed into CHAINS. LOOP is the current
1365 loop. */
1366
1367static void
99b1c316 1368determine_roots_comp (class loop *loop,
bbc8a8dc 1369 struct component *comp,
9771b263 1370 vec<chain_p> *chains)
bbc8a8dc
ZD
1371{
1372 unsigned i;
1373 dref a;
1374 chain_p chain = NULL;
807e902e 1375 widest_int last_ofs = 0;
d9c259ef 1376 enum chain_type type;
bbc8a8dc
ZD
1377
1378 /* Invariants are handled specially. */
1379 if (comp->comp_step == RS_INVARIANT)
1380 {
1381 chain = make_invariant_chain (comp);
9771b263 1382 chains->safe_push (chain);
bbc8a8dc
ZD
1383 return;
1384 }
1385
67bb451d
BC
1386 /* Trivial component. */
1387 if (comp->refs.length () <= 1)
df3c2945
BC
1388 {
1389 if (comp->refs.length () == 1)
1390 {
1391 free (comp->refs[0]);
1392 comp->refs.truncate (0);
1393 }
1394 return;
1395 }
bbc8a8dc 1396
67bb451d 1397 comp->refs.qsort (order_drefs);
9cdcebf9
BC
1398
1399 /* For Store-Store chain, we only support load if it is dominated by a
1400 store statement in the same iteration of loop. */
1401 if (comp->eliminate_store_p)
1402 for (a = NULL, i = 0; i < comp->refs.length (); i++)
1403 {
1404 if (DR_IS_WRITE (comp->refs[i]->ref))
1405 a = comp->refs[i];
1406 else if (a == NULL || a->offset != comp->refs[i]->offset)
1407 {
1408 /* If there is load that is not dominated by a store in the
1409 same iteration of loop, clear the flag so no Store-Store
1410 chain is generated for this component. */
1411 comp->eliminate_store_p = false;
1412 break;
1413 }
1414 }
1415
1416 /* Determine roots and create chains for components. */
9771b263 1417 FOR_EACH_VEC_ELT (comp->refs, i, a)
bbc8a8dc 1418 {
d9c259ef 1419 if (!chain
9cdcebf9 1420 || (chain->type == CT_LOAD && DR_IS_WRITE (a->ref))
d9c259ef 1421 || (!comp->eliminate_store_p && DR_IS_WRITE (a->ref))
807e902e 1422 || wi::leu_p (MAX_DISTANCE, a->offset - last_ofs))
bbc8a8dc
ZD
1423 {
1424 if (nontrivial_chain_p (chain))
b61b1f17
MM
1425 {
1426 add_looparound_copies (loop, chain);
9771b263 1427 chains->safe_push (chain);
b61b1f17 1428 }
bbc8a8dc
ZD
1429 else
1430 release_chain (chain);
d9c259ef 1431
9cdcebf9
BC
1432 /* Determine type of the chain. If the root reference is a load,
1433 this can only be a CT_LOAD chain; other chains are intialized
1434 to CT_STORE_LOAD and might be promoted to CT_STORE_STORE when
1435 new reference is added. */
1436 type = DR_IS_READ (a->ref) ? CT_LOAD : CT_STORE_LOAD;
d9c259ef 1437 chain = make_rooted_chain (a, type);
b61b1f17 1438 last_ofs = a->offset;
bbc8a8dc
ZD
1439 continue;
1440 }
1441
1442 add_ref_to_chain (chain, a);
1443 }
1444
1445 if (nontrivial_chain_p (chain))
1446 {
1447 add_looparound_copies (loop, chain);
9771b263 1448 chains->safe_push (chain);
bbc8a8dc
ZD
1449 }
1450 else
1451 release_chain (chain);
1452}
1453
1454/* Find roots of the values and determine distances in components COMPS, and
1455 separates the references to CHAINS. LOOP is the current loop. */
1456
1457static void
99b1c316 1458determine_roots (class loop *loop,
9771b263 1459 struct component *comps, vec<chain_p> *chains)
bbc8a8dc
ZD
1460{
1461 struct component *comp;
1462
1463 for (comp = comps; comp; comp = comp->next)
1464 determine_roots_comp (loop, comp, chains);
1465}
1466
1467/* Replace the reference in statement STMT with temporary variable
82d6e6fc 1468 NEW_TREE. If SET is true, NEW_TREE is instead initialized to the value of
bbc8a8dc
ZD
1469 the reference in the statement. IN_LHS is true if the reference
1470 is in the lhs of STMT, false if it is in rhs. */
1471
1472static void
355fe088 1473replace_ref_with (gimple *stmt, tree new_tree, bool set, bool in_lhs)
bbc8a8dc 1474{
726a989a 1475 tree val;
538dd0b7 1476 gassign *new_stmt;
726a989a 1477 gimple_stmt_iterator bsi, psi;
bbc8a8dc 1478
726a989a 1479 if (gimple_code (stmt) == GIMPLE_PHI)
bbc8a8dc
ZD
1480 {
1481 gcc_assert (!in_lhs && !set);
1482
1483 val = PHI_RESULT (stmt);
726a989a
RB
1484 bsi = gsi_after_labels (gimple_bb (stmt));
1485 psi = gsi_for_stmt (stmt);
1486 remove_phi_node (&psi, false);
bbc8a8dc 1487
726a989a 1488 /* Turn the phi node into GIMPLE_ASSIGN. */
82d6e6fc 1489 new_stmt = gimple_build_assign (val, new_tree);
726a989a 1490 gsi_insert_before (&bsi, new_stmt, GSI_NEW_STMT);
bbc8a8dc
ZD
1491 return;
1492 }
b8698a0f 1493
bbc8a8dc
ZD
1494 /* Since the reference is of gimple_reg type, it should only
1495 appear as lhs or rhs of modify statement. */
726a989a
RB
1496 gcc_assert (is_gimple_assign (stmt));
1497
1498 bsi = gsi_for_stmt (stmt);
bbc8a8dc 1499
82d6e6fc 1500 /* If we do not need to initialize NEW_TREE, just replace the use of OLD. */
bbc8a8dc
ZD
1501 if (!set)
1502 {
1503 gcc_assert (!in_lhs);
82d6e6fc 1504 gimple_assign_set_rhs_from_tree (&bsi, new_tree);
726a989a 1505 stmt = gsi_stmt (bsi);
bbc8a8dc
ZD
1506 update_stmt (stmt);
1507 return;
1508 }
1509
bbc8a8dc
ZD
1510 if (in_lhs)
1511 {
726a989a 1512 /* We have statement
b8698a0f 1513
726a989a 1514 OLD = VAL
bbc8a8dc 1515
726a989a
RB
1516 If OLD is a memory reference, then VAL is gimple_val, and we transform
1517 this to
bbc8a8dc
ZD
1518
1519 OLD = VAL
1520 NEW = VAL
1521
b8698a0f 1522 Otherwise, we are replacing a combination chain,
726a989a
RB
1523 VAL is the expression that performs the combination, and OLD is an
1524 SSA name. In this case, we transform the assignment to
1525
1526 OLD = VAL
1527 NEW = OLD
1528
1529 */
1530
1531 val = gimple_assign_lhs (stmt);
1532 if (TREE_CODE (val) != SSA_NAME)
1533 {
726a989a 1534 val = gimple_assign_rhs1 (stmt);
7e8b1c4b
JJ
1535 gcc_assert (gimple_assign_single_p (stmt));
1536 if (TREE_CLOBBER_P (val))
32244553 1537 val = get_or_create_ssa_default_def (cfun, SSA_NAME_VAR (new_tree));
7e8b1c4b
JJ
1538 else
1539 gcc_assert (gimple_assign_copy_p (stmt));
726a989a 1540 }
bbc8a8dc
ZD
1541 }
1542 else
1543 {
bbc8a8dc
ZD
1544 /* VAL = OLD
1545
1546 is transformed to
1547
1548 VAL = OLD
1549 NEW = VAL */
726a989a
RB
1550
1551 val = gimple_assign_lhs (stmt);
bbc8a8dc
ZD
1552 }
1553
82d6e6fc 1554 new_stmt = gimple_build_assign (new_tree, unshare_expr (val));
726a989a 1555 gsi_insert_after (&bsi, new_stmt, GSI_NEW_STMT);
bbc8a8dc
ZD
1556}
1557
3c62a7fb
BC
1558/* Returns a memory reference to DR in the (NITERS + ITER)-th iteration
1559 of the loop it was analyzed in. Append init stmts to STMTS. */
9f2b860b 1560
b1ad9be2 1561static tree
3c62a7fb
BC
1562ref_at_iteration (data_reference_p dr, int iter,
1563 gimple_seq *stmts, tree niters = NULL_TREE)
9f2b860b
RB
1564{
1565 tree off = DR_OFFSET (dr);
1566 tree coff = DR_INIT (dr);
b1ad9be2
BE
1567 tree ref = DR_REF (dr);
1568 enum tree_code ref_code = ERROR_MARK;
1569 tree ref_type = NULL_TREE;
1570 tree ref_op1 = NULL_TREE;
1571 tree ref_op2 = NULL_TREE;
3c62a7fb
BC
1572 tree new_offset;
1573
1574 if (iter != 0)
1575 {
1576 new_offset = size_binop (MULT_EXPR, DR_STEP (dr), ssize_int (iter));
1577 if (TREE_CODE (new_offset) == INTEGER_CST)
1578 coff = size_binop (PLUS_EXPR, coff, new_offset);
1579 else
1580 off = size_binop (PLUS_EXPR, off, new_offset);
1581 }
1582
1583 if (niters != NULL_TREE)
1584 {
1585 niters = fold_convert (ssizetype, niters);
1586 new_offset = size_binop (MULT_EXPR, DR_STEP (dr), niters);
1587 if (TREE_CODE (niters) == INTEGER_CST)
1588 coff = size_binop (PLUS_EXPR, coff, new_offset);
1589 else
1590 off = size_binop (PLUS_EXPR, off, new_offset);
1591 }
1592
cb3d1e3e
RB
1593 /* While data-ref analysis punts on bit offsets it still handles
1594 bitfield accesses at byte boundaries. Cope with that. Note that
b1ad9be2
BE
1595 if the bitfield object also starts at a byte-boundary we can simply
1596 replicate the COMPONENT_REF, but we have to subtract the component's
1597 byte-offset from the MEM_REF address first.
1598 Otherwise we simply build a BIT_FIELD_REF knowing that the bits
cb3d1e3e 1599 start at offset zero. */
b1ad9be2
BE
1600 if (TREE_CODE (ref) == COMPONENT_REF
1601 && DECL_BIT_FIELD (TREE_OPERAND (ref, 1)))
cb3d1e3e 1602 {
b1ad9be2
BE
1603 unsigned HOST_WIDE_INT boff;
1604 tree field = TREE_OPERAND (ref, 1);
1605 tree offset = component_ref_field_offset (ref);
1606 ref_type = TREE_TYPE (ref);
1607 boff = tree_to_uhwi (DECL_FIELD_BIT_OFFSET (field));
1608 /* This can occur in Ada. See the comment in get_bit_range. */
1609 if (boff % BITS_PER_UNIT != 0
1610 || !tree_fits_uhwi_p (offset))
1611 {
1612 ref_code = BIT_FIELD_REF;
1613 ref_op1 = DECL_SIZE (field);
1614 ref_op2 = bitsize_zero_node;
1615 }
1616 else
1617 {
1618 boff >>= LOG2_BITS_PER_UNIT;
1619 boff += tree_to_uhwi (offset);
1620 coff = size_binop (MINUS_EXPR, coff, ssize_int (boff));
1621 ref_code = COMPONENT_REF;
1622 ref_op1 = field;
1623 ref_op2 = TREE_OPERAND (ref, 2);
1624 ref = TREE_OPERAND (ref, 0);
1625 }
cb3d1e3e 1626 }
b1ad9be2
BE
1627 tree addr = fold_build_pointer_plus (DR_BASE_ADDRESS (dr), off);
1628 addr = force_gimple_operand_1 (unshare_expr (addr), stmts,
1629 is_gimple_mem_ref_addr, NULL_TREE);
1630 tree alias_ptr = fold_convert (reference_alias_ptr_type (ref), coff);
1631 tree type = build_aligned_type (TREE_TYPE (ref),
1632 get_object_alignment (ref));
1633 ref = build2 (MEM_REF, type, addr, alias_ptr);
1634 if (ref_type)
1635 ref = build3 (ref_code, ref_type, ref, ref_op1, ref_op2);
1636 return ref;
bbc8a8dc
ZD
1637}
1638
1639/* Get the initialization expression for the INDEX-th temporary variable
1640 of CHAIN. */
1641
1642static tree
1643get_init_expr (chain_p chain, unsigned index)
1644{
1645 if (chain->type == CT_COMBINATION)
1646 {
1647 tree e1 = get_init_expr (chain->ch1, index);
1648 tree e2 = get_init_expr (chain->ch2, index);
1649
82d6e6fc 1650 return fold_build2 (chain->op, chain->rslt_type, e1, e2);
bbc8a8dc
ZD
1651 }
1652 else
9771b263 1653 return chain->inits[index];
bbc8a8dc
ZD
1654}
1655
2664efb6
ZD
1656/* Returns a new temporary variable used for the I-th variable carrying
1657 value of REF. The variable's uid is marked in TMP_VARS. */
1658
1659static tree
1660predcom_tmp_var (tree ref, unsigned i, bitmap tmp_vars)
1661{
1662 tree type = TREE_TYPE (ref);
2664efb6
ZD
1663 /* We never access the components of the temporary variable in predictive
1664 commoning. */
acd63801 1665 tree var = create_tmp_reg (type, get_lsm_tmp_name (ref, i));
2664efb6
ZD
1666 bitmap_set_bit (tmp_vars, DECL_UID (var));
1667 return var;
1668}
1669
bbc8a8dc
ZD
1670/* Creates the variables for CHAIN, as well as phi nodes for them and
1671 initialization on entry to LOOP. Uids of the newly created
1672 temporary variables are marked in TMP_VARS. */
1673
1674static void
99b1c316 1675initialize_root_vars (class loop *loop, chain_p chain, bitmap tmp_vars)
bbc8a8dc
ZD
1676{
1677 unsigned i;
1678 unsigned n = chain->length;
1679 dref root = get_chain_root (chain);
1680 bool reuse_first = !chain->has_max_use_after;
726a989a 1681 tree ref, init, var, next;
538dd0b7 1682 gphi *phi;
726a989a 1683 gimple_seq stmts;
bbc8a8dc
ZD
1684 edge entry = loop_preheader_edge (loop), latch = loop_latch_edge (loop);
1685
1686 /* If N == 0, then all the references are within the single iteration. And
1687 since this is an nonempty chain, reuse_first cannot be true. */
1688 gcc_assert (n > 0 || !reuse_first);
1689
9771b263 1690 chain->vars.create (n + 1);
bbc8a8dc
ZD
1691
1692 if (chain->type == CT_COMBINATION)
726a989a 1693 ref = gimple_assign_lhs (root->stmt);
bbc8a8dc
ZD
1694 else
1695 ref = DR_REF (root->ref);
1696
1697 for (i = 0; i < n + (reuse_first ? 0 : 1); i++)
1698 {
2664efb6 1699 var = predcom_tmp_var (ref, i, tmp_vars);
9771b263 1700 chain->vars.quick_push (var);
bbc8a8dc
ZD
1701 }
1702 if (reuse_first)
9771b263 1703 chain->vars.quick_push (chain->vars[0]);
b8698a0f 1704
9771b263 1705 FOR_EACH_VEC_ELT (chain->vars, i, var)
b731b390 1706 chain->vars[i] = make_ssa_name (var);
bbc8a8dc
ZD
1707
1708 for (i = 0; i < n; i++)
1709 {
9771b263
DN
1710 var = chain->vars[i];
1711 next = chain->vars[i + 1];
bbc8a8dc
ZD
1712 init = get_init_expr (chain, i);
1713
1714 init = force_gimple_operand (init, &stmts, true, NULL_TREE);
1715 if (stmts)
5006671f 1716 gsi_insert_seq_on_edge_immediate (entry, stmts);
bbc8a8dc
ZD
1717
1718 phi = create_phi_node (var, loop->header);
9e227d60
DC
1719 add_phi_arg (phi, init, entry, UNKNOWN_LOCATION);
1720 add_phi_arg (phi, next, latch, UNKNOWN_LOCATION);
bbc8a8dc
ZD
1721 }
1722}
1723
39ff50ce
BC
1724/* For inter-iteration store elimination CHAIN in LOOP, returns true if
1725 all stores to be eliminated store loop invariant values into memory.
1726 In this case, we can use these invariant values directly after LOOP. */
1727
1728static bool
99b1c316 1729is_inv_store_elimination_chain (class loop *loop, chain_p chain)
39ff50ce
BC
1730{
1731 if (chain->length == 0 || chain->type != CT_STORE_STORE)
1732 return false;
1733
1734 gcc_assert (!chain->has_max_use_after);
1735
02fac244 1736 /* If loop iterates for unknown times or fewer times than chain->length,
39ff50ce
BC
1737 we still need to setup root variable and propagate it with PHI node. */
1738 tree niters = number_of_latch_executions (loop);
8e6cdc90
RS
1739 if (TREE_CODE (niters) != INTEGER_CST
1740 || wi::leu_p (wi::to_wide (niters), chain->length))
39ff50ce
BC
1741 return false;
1742
1743 /* Check stores in chain for elimination if they only store loop invariant
1744 values. */
1745 for (unsigned i = 0; i < chain->length; i++)
1746 {
9cdcebf9 1747 dref a = get_chain_last_write_at (chain, i);
39ff50ce
BC
1748 if (a == NULL)
1749 continue;
1750
1751 gimple *def_stmt, *stmt = a->stmt;
1752 if (!gimple_assign_single_p (stmt))
1753 return false;
1754
1755 tree val = gimple_assign_rhs1 (stmt);
1756 if (TREE_CLOBBER_P (val))
1757 return false;
1758
1759 if (CONSTANT_CLASS_P (val))
1760 continue;
1761
1762 if (TREE_CODE (val) != SSA_NAME)
1763 return false;
1764
1765 def_stmt = SSA_NAME_DEF_STMT (val);
1766 if (gimple_nop_p (def_stmt))
1767 continue;
1768
1769 if (flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
1770 return false;
1771 }
1772 return true;
1773}
1774
1775/* Creates root variables for store elimination CHAIN in which stores for
1776 elimination only store loop invariant values. In this case, we neither
1777 need to load root variables before loop nor propagate it with PHI nodes. */
1778
1779static void
1780initialize_root_vars_store_elim_1 (chain_p chain)
1781{
1782 tree var;
1783 unsigned i, n = chain->length;
1784
1785 chain->vars.create (n);
1786 chain->vars.safe_grow_cleared (n);
1787
1788 /* Initialize root value for eliminated stores at each distance. */
1789 for (i = 0; i < n; i++)
1790 {
9cdcebf9 1791 dref a = get_chain_last_write_at (chain, i);
39ff50ce
BC
1792 if (a == NULL)
1793 continue;
1794
1795 var = gimple_assign_rhs1 (a->stmt);
1796 chain->vars[a->distance] = var;
1797 }
1798
1799 /* We don't propagate values with PHI nodes, so manually propagate value
1800 to bubble positions. */
1801 var = chain->vars[0];
1802 for (i = 1; i < n; i++)
1803 {
1804 if (chain->vars[i] != NULL_TREE)
1805 {
1806 var = chain->vars[i];
1807 continue;
1808 }
1809 chain->vars[i] = var;
1810 }
1811
1812 /* Revert the vector. */
1813 for (i = 0; i < n / 2; i++)
1814 std::swap (chain->vars[i], chain->vars[n - i - 1]);
1815}
1816
d9c259ef
BC
1817/* Creates root variables for store elimination CHAIN in which stores for
1818 elimination store loop variant values. In this case, we may need to
1819 load root variables before LOOP and propagate it with PHI nodes. Uids
1820 of the newly created root variables are marked in TMP_VARS. */
1821
1822static void
99b1c316 1823initialize_root_vars_store_elim_2 (class loop *loop,
d9c259ef
BC
1824 chain_p chain, bitmap tmp_vars)
1825{
1826 unsigned i, n = chain->length;
1827 tree ref, init, var, next, val, phi_result;
1828 gimple *stmt;
1829 gimple_seq stmts;
1830
1831 chain->vars.create (n);
1832
1833 ref = DR_REF (get_chain_root (chain)->ref);
1834 for (i = 0; i < n; i++)
1835 {
1836 var = predcom_tmp_var (ref, i, tmp_vars);
1837 chain->vars.quick_push (var);
1838 }
1839
1840 FOR_EACH_VEC_ELT (chain->vars, i, var)
1841 chain->vars[i] = make_ssa_name (var);
1842
1843 /* Root values are either rhs operand of stores to be eliminated, or
1844 loaded from memory before loop. */
1845 auto_vec<tree> vtemps;
1846 vtemps.safe_grow_cleared (n);
1847 for (i = 0; i < n; i++)
1848 {
1849 init = get_init_expr (chain, i);
1850 if (init == NULL_TREE)
1851 {
1852 /* Root value is rhs operand of the store to be eliminated if
1853 it isn't loaded from memory before loop. */
9cdcebf9 1854 dref a = get_chain_last_write_at (chain, i);
d9c259ef
BC
1855 val = gimple_assign_rhs1 (a->stmt);
1856 if (TREE_CLOBBER_P (val))
9cdcebf9
BC
1857 {
1858 val = get_or_create_ssa_default_def (cfun, SSA_NAME_VAR (var));
1859 gimple_assign_set_rhs1 (a->stmt, val);
1860 }
d9c259ef
BC
1861
1862 vtemps[n - i - 1] = val;
1863 }
1864 else
1865 {
1866 edge latch = loop_latch_edge (loop);
1867 edge entry = loop_preheader_edge (loop);
1868
1869 /* Root value is loaded from memory before loop, we also need
1870 to add PHI nodes to propagate the value across iterations. */
1871 init = force_gimple_operand (init, &stmts, true, NULL_TREE);
1872 if (stmts)
1873 gsi_insert_seq_on_edge_immediate (entry, stmts);
1874
1875 next = chain->vars[n - i];
1876 phi_result = copy_ssa_name (next);
1877 gphi *phi = create_phi_node (phi_result, loop->header);
1878 add_phi_arg (phi, init, entry, UNKNOWN_LOCATION);
1879 add_phi_arg (phi, next, latch, UNKNOWN_LOCATION);
1880 vtemps[n - i - 1] = phi_result;
1881 }
1882 }
1883
1884 /* Find the insertion position. */
1885 dref last = get_chain_root (chain);
1886 for (i = 0; i < chain->refs.length (); i++)
1887 {
1888 if (chain->refs[i]->pos > last->pos)
1889 last = chain->refs[i];
1890 }
1891
1892 gimple_stmt_iterator gsi = gsi_for_stmt (last->stmt);
1893
1894 /* Insert statements copying root value to root variable. */
1895 for (i = 0; i < n; i++)
1896 {
1897 var = chain->vars[i];
1898 val = vtemps[i];
1899 stmt = gimple_build_assign (var, val);
1900 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1901 }
1902}
1903
1904/* Generates stores for CHAIN's eliminated stores in LOOP's last
1905 (CHAIN->length - 1) iterations. */
1906
1907static void
99b1c316 1908finalize_eliminated_stores (class loop *loop, chain_p chain)
d9c259ef
BC
1909{
1910 unsigned i, n = chain->length;
1911
1912 for (i = 0; i < n; i++)
1913 {
1914 tree var = chain->vars[i];
1915 tree fini = chain->finis[n - i - 1];
1916 gimple *stmt = gimple_build_assign (fini, var);
1917
1918 gimple_seq_add_stmt_without_update (&chain->fini_seq, stmt);
1919 }
1920
1921 if (chain->fini_seq)
1922 {
1923 gsi_insert_seq_on_edge_immediate (single_exit (loop), chain->fini_seq);
1924 chain->fini_seq = NULL;
1925 }
1926}
1927
bbc8a8dc
ZD
1928/* Initializes a variable for load motion for ROOT and prepares phi nodes and
1929 initialization on entry to LOOP if necessary. The ssa name for the variable
1930 is stored in VARS. If WRITTEN is true, also a phi node to copy its value
1931 around the loop is created. Uid of the newly created temporary variable
1932 is marked in TMP_VARS. INITS is the list containing the (single)
1933 initializer. */
1934
1935static void
99b1c316 1936initialize_root_vars_lm (class loop *loop, dref root, bool written,
9771b263 1937 vec<tree> *vars, vec<tree> inits,
bbc8a8dc
ZD
1938 bitmap tmp_vars)
1939{
1940 unsigned i;
726a989a
RB
1941 tree ref = DR_REF (root->ref), init, var, next;
1942 gimple_seq stmts;
538dd0b7 1943 gphi *phi;
bbc8a8dc
ZD
1944 edge entry = loop_preheader_edge (loop), latch = loop_latch_edge (loop);
1945
1946 /* Find the initializer for the variable, and check that it cannot
1947 trap. */
9771b263 1948 init = inits[0];
bbc8a8dc 1949
9771b263 1950 vars->create (written ? 2 : 1);
2664efb6 1951 var = predcom_tmp_var (ref, 0, tmp_vars);
9771b263 1952 vars->quick_push (var);
bbc8a8dc 1953 if (written)
9771b263 1954 vars->quick_push ((*vars)[0]);
b8698a0f 1955
9771b263 1956 FOR_EACH_VEC_ELT (*vars, i, var)
b731b390 1957 (*vars)[i] = make_ssa_name (var);
bbc8a8dc 1958
9771b263 1959 var = (*vars)[0];
b8698a0f 1960
bbc8a8dc
ZD
1961 init = force_gimple_operand (init, &stmts, written, NULL_TREE);
1962 if (stmts)
5006671f 1963 gsi_insert_seq_on_edge_immediate (entry, stmts);
bbc8a8dc
ZD
1964
1965 if (written)
1966 {
9771b263 1967 next = (*vars)[1];
bbc8a8dc 1968 phi = create_phi_node (var, loop->header);
9e227d60
DC
1969 add_phi_arg (phi, init, entry, UNKNOWN_LOCATION);
1970 add_phi_arg (phi, next, latch, UNKNOWN_LOCATION);
bbc8a8dc
ZD
1971 }
1972 else
1973 {
538dd0b7 1974 gassign *init_stmt = gimple_build_assign (var, init);
726a989a 1975 gsi_insert_on_edge_immediate (entry, init_stmt);
bbc8a8dc
ZD
1976 }
1977}
1978
1979
1980/* Execute load motion for references in chain CHAIN. Uids of the newly
1981 created temporary variables are marked in TMP_VARS. */
1982
1983static void
99b1c316 1984execute_load_motion (class loop *loop, chain_p chain, bitmap tmp_vars)
bbc8a8dc 1985{
ef062b13 1986 auto_vec<tree> vars;
bbc8a8dc
ZD
1987 dref a;
1988 unsigned n_writes = 0, ridx, i;
1989 tree var;
1990
1991 gcc_assert (chain->type == CT_INVARIANT);
1992 gcc_assert (!chain->combined);
9771b263 1993 FOR_EACH_VEC_ELT (chain->refs, i, a)
b0af49c4 1994 if (DR_IS_WRITE (a->ref))
bbc8a8dc 1995 n_writes++;
b8698a0f 1996
bbc8a8dc 1997 /* If there are no reads in the loop, there is nothing to do. */
9771b263 1998 if (n_writes == chain->refs.length ())
bbc8a8dc
ZD
1999 return;
2000
2001 initialize_root_vars_lm (loop, get_chain_root (chain), n_writes > 0,
2002 &vars, chain->inits, tmp_vars);
2003
2004 ridx = 0;
9771b263 2005 FOR_EACH_VEC_ELT (chain->refs, i, a)
bbc8a8dc
ZD
2006 {
2007 bool is_read = DR_IS_READ (a->ref);
bbc8a8dc 2008
b0af49c4 2009 if (DR_IS_WRITE (a->ref))
bbc8a8dc
ZD
2010 {
2011 n_writes--;
2012 if (n_writes)
2013 {
9771b263 2014 var = vars[0];
b731b390 2015 var = make_ssa_name (SSA_NAME_VAR (var));
9771b263 2016 vars[0] = var;
bbc8a8dc
ZD
2017 }
2018 else
2019 ridx = 1;
2020 }
b8698a0f 2021
9771b263 2022 replace_ref_with (a->stmt, vars[ridx],
bbc8a8dc
ZD
2023 !is_read, !is_read);
2024 }
bbc8a8dc
ZD
2025}
2026
2027/* Returns the single statement in that NAME is used, excepting
2028 the looparound phi nodes contained in one of the chains. If there is no
726a989a 2029 such statement, or more statements, NULL is returned. */
bbc8a8dc 2030
355fe088 2031static gimple *
bbc8a8dc
ZD
2032single_nonlooparound_use (tree name)
2033{
2034 use_operand_p use;
2035 imm_use_iterator it;
355fe088 2036 gimple *stmt, *ret = NULL;
bbc8a8dc
ZD
2037
2038 FOR_EACH_IMM_USE_FAST (use, it, name)
2039 {
2040 stmt = USE_STMT (use);
2041
726a989a 2042 if (gimple_code (stmt) == GIMPLE_PHI)
bbc8a8dc
ZD
2043 {
2044 /* Ignore uses in looparound phi nodes. Uses in other phi nodes
2045 could not be processed anyway, so just fail for them. */
2046 if (bitmap_bit_p (looparound_phis,
2047 SSA_NAME_VERSION (PHI_RESULT (stmt))))
2048 continue;
2049
726a989a 2050 return NULL;
bbc8a8dc 2051 }
b63f974e
JJ
2052 else if (is_gimple_debug (stmt))
2053 continue;
726a989a
RB
2054 else if (ret != NULL)
2055 return NULL;
bbc8a8dc
ZD
2056 else
2057 ret = stmt;
2058 }
2059
2060 return ret;
2061}
2062
2063/* Remove statement STMT, as well as the chain of assignments in that it is
2064 used. */
2065
2066static void
355fe088 2067remove_stmt (gimple *stmt)
bbc8a8dc 2068{
726a989a 2069 tree name;
355fe088 2070 gimple *next;
726a989a 2071 gimple_stmt_iterator psi;
bbc8a8dc 2072
726a989a 2073 if (gimple_code (stmt) == GIMPLE_PHI)
bbc8a8dc
ZD
2074 {
2075 name = PHI_RESULT (stmt);
2076 next = single_nonlooparound_use (name);
273ccb6d 2077 reset_debug_uses (stmt);
726a989a
RB
2078 psi = gsi_for_stmt (stmt);
2079 remove_phi_node (&psi, true);
bbc8a8dc
ZD
2080
2081 if (!next
5f8ecf45 2082 || !gimple_assign_ssa_name_copy_p (next)
726a989a 2083 || gimple_assign_rhs1 (next) != name)
bbc8a8dc
ZD
2084 return;
2085
2086 stmt = next;
2087 }
2088
2089 while (1)
2090 {
726a989a 2091 gimple_stmt_iterator bsi;
b8698a0f 2092
726a989a 2093 bsi = gsi_for_stmt (stmt);
bbc8a8dc 2094
726a989a 2095 name = gimple_assign_lhs (stmt);
d9c259ef
BC
2096 if (TREE_CODE (name) == SSA_NAME)
2097 {
2098 next = single_nonlooparound_use (name);
2099 reset_debug_uses (stmt);
2100 }
2101 else
2102 {
2103 /* This is a store to be eliminated. */
2104 gcc_assert (gimple_vdef (stmt) != NULL);
2105 next = NULL;
2106 }
bbc8a8dc 2107
13714310 2108 unlink_stmt_vdef (stmt);
726a989a 2109 gsi_remove (&bsi, true);
5f8ecf45 2110 release_defs (stmt);
bbc8a8dc
ZD
2111
2112 if (!next
5f8ecf45 2113 || !gimple_assign_ssa_name_copy_p (next)
726a989a 2114 || gimple_assign_rhs1 (next) != name)
bbc8a8dc
ZD
2115 return;
2116
2117 stmt = next;
2118 }
2119}
2120
2121/* Perform the predictive commoning optimization for a chain CHAIN.
2122 Uids of the newly created temporary variables are marked in TMP_VARS.*/
2123
2124static void
99b1c316 2125execute_pred_commoning_chain (class loop *loop, chain_p chain,
d9c259ef 2126 bitmap tmp_vars)
bbc8a8dc 2127{
9cdcebf9 2128 unsigned i;
13714310 2129 dref a;
bbc8a8dc 2130 tree var;
10dfaf9a 2131 bool in_lhs;
bbc8a8dc
ZD
2132
2133 if (chain->combined)
2134 {
2135 /* For combined chains, just remove the statements that are used to
a933d47f
RB
2136 compute the values of the expression (except for the root one).
2137 We delay this until after all chains are processed. */
bbc8a8dc 2138 }
d9c259ef
BC
2139 else if (chain->type == CT_STORE_STORE)
2140 {
2141 if (chain->length > 0)
2142 {
39ff50ce
BC
2143 if (chain->inv_store_elimination)
2144 {
2145 /* If dead stores in this chain only store loop invariant
2146 values, we can simply record the invariant value and use
2147 it directly after loop. */
2148 initialize_root_vars_store_elim_1 (chain);
2149 }
2150 else
2151 {
2152 /* If dead stores in this chain store loop variant values,
2153 we need to set up the variables by loading from memory
2154 before loop and propagating it with PHI nodes. */
2155 initialize_root_vars_store_elim_2 (loop, chain, tmp_vars);
2156 }
d9c259ef
BC
2157
2158 /* For inter-iteration store elimination chain, stores at each
2159 distance in loop's last (chain->length - 1) iterations can't
2160 be eliminated, because there is no following killing store.
2161 We need to generate these stores after loop. */
2162 finalize_eliminated_stores (loop, chain);
2163 }
2164
9cdcebf9
BC
2165 bool last_store_p = true;
2166 for (i = chain->refs.length (); i > 0; i--)
2167 {
2168 a = chain->refs[i - 1];
2169 /* Preserve the last store of the chain. Eliminate other stores
2170 which are killed by the last one. */
2171 if (DR_IS_WRITE (a->ref))
2172 {
2173 if (last_store_p)
2174 last_store_p = false;
2175 else
2176 remove_stmt (a->stmt);
2177
2178 continue;
2179 }
2180
2181 /* Any load in Store-Store chain must be dominated by a previous
2182 store, we replace the load reference with rhs of the store. */
2183 dref b = get_chain_last_write_before_load (chain, i - 1);
2184 gcc_assert (b != NULL);
2185 var = gimple_assign_rhs1 (b->stmt);
2186 replace_ref_with (a->stmt, var, false, false);
2187 }
d9c259ef 2188 }
bbc8a8dc
ZD
2189 else
2190 {
10dfaf9a
BC
2191 /* For non-combined chains, set up the variables that hold its value. */
2192 initialize_root_vars (loop, chain, tmp_vars);
2193 a = get_chain_root (chain);
2194 in_lhs = (chain->type == CT_STORE_LOAD
2195 || chain->type == CT_COMBINATION);
2196 replace_ref_with (a->stmt, chain->vars[chain->length], true, in_lhs);
2197
2198 /* Replace the uses of the original references by these variables. */
9771b263 2199 for (i = 1; chain->refs.iterate (i, &a); i++)
bbc8a8dc 2200 {
9771b263 2201 var = chain->vars[chain->length - a->distance];
bbc8a8dc
ZD
2202 replace_ref_with (a->stmt, var, false, false);
2203 }
2204 }
2205}
2206
2207/* Determines the unroll factor necessary to remove as many temporary variable
2208 copies as possible. CHAINS is the list of chains that will be
2209 optimized. */
2210
2211static unsigned
9771b263 2212determine_unroll_factor (vec<chain_p> chains)
bbc8a8dc
ZD
2213{
2214 chain_p chain;
2215 unsigned factor = 1, af, nfactor, i;
028d4092 2216 unsigned max = param_max_unroll_times;
bbc8a8dc 2217
9771b263 2218 FOR_EACH_VEC_ELT (chains, i, chain)
bbc8a8dc 2219 {
8039a35d 2220 if (chain->type == CT_INVARIANT)
bbc8a8dc 2221 continue;
d9c259ef
BC
2222 /* For now we can't handle unrolling when eliminating stores. */
2223 else if (chain->type == CT_STORE_STORE)
2224 return 1;
bbc8a8dc 2225
8039a35d
RB
2226 if (chain->combined)
2227 {
2228 /* For combined chains, we can't handle unrolling if we replace
2229 looparound PHIs. */
2230 dref a;
2231 unsigned j;
2232 for (j = 1; chain->refs.iterate (j, &a); j++)
2233 if (gimple_code (a->stmt) == GIMPLE_PHI)
2234 return 1;
2235 continue;
2236 }
2237
bbc8a8dc
ZD
2238 /* The best unroll factor for this chain is equal to the number of
2239 temporary variables that we create for it. */
2240 af = chain->length;
2241 if (chain->has_max_use_after)
2242 af++;
2243
2244 nfactor = factor * af / gcd (factor, af);
2245 if (nfactor <= max)
2246 factor = nfactor;
2247 }
2248
2249 return factor;
2250}
2251
2252/* Perform the predictive commoning optimization for CHAINS.
2253 Uids of the newly created temporary variables are marked in TMP_VARS. */
2254
2255static void
99b1c316 2256execute_pred_commoning (class loop *loop, vec<chain_p> chains,
bbc8a8dc
ZD
2257 bitmap tmp_vars)
2258{
2259 chain_p chain;
2260 unsigned i;
2261
9771b263 2262 FOR_EACH_VEC_ELT (chains, i, chain)
bbc8a8dc
ZD
2263 {
2264 if (chain->type == CT_INVARIANT)
2265 execute_load_motion (loop, chain, tmp_vars);
2266 else
2267 execute_pred_commoning_chain (loop, chain, tmp_vars);
2268 }
b8698a0f 2269
a933d47f
RB
2270 FOR_EACH_VEC_ELT (chains, i, chain)
2271 {
2272 if (chain->type == CT_INVARIANT)
2273 ;
2274 else if (chain->combined)
2275 {
2276 /* For combined chains, just remove the statements that are used to
2277 compute the values of the expression (except for the root one). */
2278 dref a;
2279 unsigned j;
2280 for (j = 1; chain->refs.iterate (j, &a); j++)
2281 remove_stmt (a->stmt);
2282 }
2283 }
2284
bbc8a8dc
ZD
2285 update_ssa (TODO_update_ssa_only_virtuals);
2286}
2287
c80b4100 2288/* For each reference in CHAINS, if its defining statement is
726a989a 2289 phi node, record the ssa name that is defined by it. */
bbc8a8dc
ZD
2290
2291static void
9771b263 2292replace_phis_by_defined_names (vec<chain_p> chains)
bbc8a8dc
ZD
2293{
2294 chain_p chain;
2295 dref a;
2296 unsigned i, j;
2297
9771b263
DN
2298 FOR_EACH_VEC_ELT (chains, i, chain)
2299 FOR_EACH_VEC_ELT (chain->refs, j, a)
bbc8a8dc 2300 {
726a989a
RB
2301 if (gimple_code (a->stmt) == GIMPLE_PHI)
2302 {
2303 a->name_defined_by_phi = PHI_RESULT (a->stmt);
2304 a->stmt = NULL;
2305 }
bbc8a8dc
ZD
2306 }
2307}
2308
726a989a
RB
2309/* For each reference in CHAINS, if name_defined_by_phi is not
2310 NULL, use it to set the stmt field. */
bbc8a8dc
ZD
2311
2312static void
9771b263 2313replace_names_by_phis (vec<chain_p> chains)
bbc8a8dc
ZD
2314{
2315 chain_p chain;
2316 dref a;
2317 unsigned i, j;
2318
9771b263
DN
2319 FOR_EACH_VEC_ELT (chains, i, chain)
2320 FOR_EACH_VEC_ELT (chain->refs, j, a)
726a989a 2321 if (a->stmt == NULL)
bbc8a8dc 2322 {
726a989a
RB
2323 a->stmt = SSA_NAME_DEF_STMT (a->name_defined_by_phi);
2324 gcc_assert (gimple_code (a->stmt) == GIMPLE_PHI);
2325 a->name_defined_by_phi = NULL_TREE;
bbc8a8dc
ZD
2326 }
2327}
2328
2329/* Wrapper over execute_pred_commoning, to pass it as a callback
2330 to tree_transform_and_unroll_loop. */
2331
2332struct epcc_data
2333{
9771b263 2334 vec<chain_p> chains;
bbc8a8dc
ZD
2335 bitmap tmp_vars;
2336};
2337
2338static void
99b1c316 2339execute_pred_commoning_cbck (class loop *loop, void *data)
bbc8a8dc 2340{
3d9a9f94 2341 struct epcc_data *const dta = (struct epcc_data *) data;
bbc8a8dc
ZD
2342
2343 /* Restore phi nodes that were replaced by ssa names before
2344 tree_transform_and_unroll_loop (see detailed description in
2345 tree_predictive_commoning_loop). */
2346 replace_names_by_phis (dta->chains);
2347 execute_pred_commoning (loop, dta->chains, dta->tmp_vars);
2348}
2349
bbc8a8dc
ZD
2350/* Base NAME and all the names in the chain of phi nodes that use it
2351 on variable VAR. The phi nodes are recognized by being in the copies of
2352 the header of the LOOP. */
2353
2354static void
99b1c316 2355base_names_in_chain_on (class loop *loop, tree name, tree var)
bbc8a8dc 2356{
355fe088 2357 gimple *stmt, *phi;
bbc8a8dc 2358 imm_use_iterator iter;
bbc8a8dc 2359
b2ec94d4 2360 replace_ssa_name_symbol (name, var);
bbc8a8dc
ZD
2361
2362 while (1)
2363 {
2364 phi = NULL;
2365 FOR_EACH_IMM_USE_STMT (stmt, iter, name)
2366 {
726a989a
RB
2367 if (gimple_code (stmt) == GIMPLE_PHI
2368 && flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
bbc8a8dc
ZD
2369 {
2370 phi = stmt;
2371 BREAK_FROM_IMM_USE_STMT (iter);
2372 }
2373 }
2374 if (!phi)
2375 return;
2376
bbc8a8dc 2377 name = PHI_RESULT (phi);
b2ec94d4 2378 replace_ssa_name_symbol (name, var);
bbc8a8dc
ZD
2379 }
2380}
2381
2382/* Given an unrolled LOOP after predictive commoning, remove the
2383 register copies arising from phi nodes by changing the base
2384 variables of SSA names. TMP_VARS is the set of the temporary variables
2385 for those we want to perform this. */
2386
2387static void
99b1c316 2388eliminate_temp_copies (class loop *loop, bitmap tmp_vars)
bbc8a8dc
ZD
2389{
2390 edge e;
538dd0b7 2391 gphi *phi;
355fe088 2392 gimple *stmt;
726a989a 2393 tree name, use, var;
538dd0b7 2394 gphi_iterator psi;
bbc8a8dc
ZD
2395
2396 e = loop_latch_edge (loop);
726a989a 2397 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
bbc8a8dc 2398 {
538dd0b7 2399 phi = psi.phi ();
bbc8a8dc
ZD
2400 name = PHI_RESULT (phi);
2401 var = SSA_NAME_VAR (name);
70b5e7dc 2402 if (!var || !bitmap_bit_p (tmp_vars, DECL_UID (var)))
bbc8a8dc
ZD
2403 continue;
2404 use = PHI_ARG_DEF_FROM_EDGE (phi, e);
2405 gcc_assert (TREE_CODE (use) == SSA_NAME);
2406
2407 /* Base all the ssa names in the ud and du chain of NAME on VAR. */
2408 stmt = SSA_NAME_DEF_STMT (use);
726a989a 2409 while (gimple_code (stmt) == GIMPLE_PHI
1b0cfaa6
ZD
2410 /* In case we could not unroll the loop enough to eliminate
2411 all copies, we may reach the loop header before the defining
2412 statement (in that case, some register copies will be present
2413 in loop latch in the final code, corresponding to the newly
2414 created looparound phi nodes). */
726a989a 2415 && gimple_bb (stmt) != loop->header)
bbc8a8dc 2416 {
726a989a 2417 gcc_assert (single_pred_p (gimple_bb (stmt)));
bbc8a8dc
ZD
2418 use = PHI_ARG_DEF (stmt, 0);
2419 stmt = SSA_NAME_DEF_STMT (use);
2420 }
2421
2422 base_names_in_chain_on (loop, use, var);
2423 }
2424}
2425
2426/* Returns true if CHAIN is suitable to be combined. */
2427
2428static bool
2429chain_can_be_combined_p (chain_p chain)
2430{
2431 return (!chain->combined
2432 && (chain->type == CT_LOAD || chain->type == CT_COMBINATION));
2433}
2434
2435/* Returns the modify statement that uses NAME. Skips over assignment
2436 statements, NAME is replaced with the actual name used in the returned
2437 statement. */
2438
355fe088 2439static gimple *
bbc8a8dc
ZD
2440find_use_stmt (tree *name)
2441{
355fe088 2442 gimple *stmt;
726a989a 2443 tree rhs, lhs;
bbc8a8dc
ZD
2444
2445 /* Skip over assignments. */
2446 while (1)
2447 {
2448 stmt = single_nonlooparound_use (*name);
2449 if (!stmt)
726a989a 2450 return NULL;
bbc8a8dc 2451
726a989a
RB
2452 if (gimple_code (stmt) != GIMPLE_ASSIGN)
2453 return NULL;
bbc8a8dc 2454
726a989a 2455 lhs = gimple_assign_lhs (stmt);
bbc8a8dc 2456 if (TREE_CODE (lhs) != SSA_NAME)
726a989a 2457 return NULL;
bbc8a8dc 2458
726a989a
RB
2459 if (gimple_assign_copy_p (stmt))
2460 {
2461 rhs = gimple_assign_rhs1 (stmt);
2462 if (rhs != *name)
2463 return NULL;
bbc8a8dc 2464
726a989a
RB
2465 *name = lhs;
2466 }
2467 else if (get_gimple_rhs_class (gimple_assign_rhs_code (stmt))
2468 == GIMPLE_BINARY_RHS)
2469 return stmt;
2470 else
2471 return NULL;
bbc8a8dc 2472 }
bbc8a8dc
ZD
2473}
2474
2475/* Returns true if we may perform reassociation for operation CODE in TYPE. */
2476
2477static bool
2478may_reassociate_p (tree type, enum tree_code code)
2479{
2480 if (FLOAT_TYPE_P (type)
2481 && !flag_unsafe_math_optimizations)
2482 return false;
2483
2484 return (commutative_tree_code (code)
2485 && associative_tree_code (code));
2486}
2487
2488/* If the operation used in STMT is associative and commutative, go through the
2489 tree of the same operations and returns its root. Distance to the root
2490 is stored in DISTANCE. */
2491
355fe088
TS
2492static gimple *
2493find_associative_operation_root (gimple *stmt, unsigned *distance)
bbc8a8dc 2494{
726a989a 2495 tree lhs;
355fe088 2496 gimple *next;
726a989a
RB
2497 enum tree_code code = gimple_assign_rhs_code (stmt);
2498 tree type = TREE_TYPE (gimple_assign_lhs (stmt));
bbc8a8dc
ZD
2499 unsigned dist = 0;
2500
726a989a
RB
2501 if (!may_reassociate_p (type, code))
2502 return NULL;
bbc8a8dc
ZD
2503
2504 while (1)
2505 {
726a989a 2506 lhs = gimple_assign_lhs (stmt);
bbc8a8dc
ZD
2507 gcc_assert (TREE_CODE (lhs) == SSA_NAME);
2508
2509 next = find_use_stmt (&lhs);
726a989a
RB
2510 if (!next
2511 || gimple_assign_rhs_code (next) != code)
bbc8a8dc
ZD
2512 break;
2513
2514 stmt = next;
2515 dist++;
2516 }
2517
2518 if (distance)
2519 *distance = dist;
2520 return stmt;
2521}
2522
2523/* Returns the common statement in that NAME1 and NAME2 have a use. If there
2524 is no such statement, returns NULL_TREE. In case the operation used on
c80b4100 2525 NAME1 and NAME2 is associative and commutative, returns the root of the
bbc8a8dc
ZD
2526 tree formed by this operation instead of the statement that uses NAME1 or
2527 NAME2. */
2528
355fe088 2529static gimple *
bbc8a8dc
ZD
2530find_common_use_stmt (tree *name1, tree *name2)
2531{
355fe088 2532 gimple *stmt1, *stmt2;
bbc8a8dc
ZD
2533
2534 stmt1 = find_use_stmt (name1);
2535 if (!stmt1)
726a989a 2536 return NULL;
bbc8a8dc
ZD
2537
2538 stmt2 = find_use_stmt (name2);
2539 if (!stmt2)
726a989a 2540 return NULL;
bbc8a8dc
ZD
2541
2542 if (stmt1 == stmt2)
2543 return stmt1;
2544
2545 stmt1 = find_associative_operation_root (stmt1, NULL);
2546 if (!stmt1)
726a989a 2547 return NULL;
bbc8a8dc
ZD
2548 stmt2 = find_associative_operation_root (stmt2, NULL);
2549 if (!stmt2)
726a989a 2550 return NULL;
bbc8a8dc 2551
726a989a 2552 return (stmt1 == stmt2 ? stmt1 : NULL);
bbc8a8dc
ZD
2553}
2554
2555/* Checks whether R1 and R2 are combined together using CODE, with the result
2556 in RSLT_TYPE, in order R1 CODE R2 if SWAP is false and in order R2 CODE R1
2557 if it is true. If CODE is ERROR_MARK, set these values instead. */
2558
2559static bool
2560combinable_refs_p (dref r1, dref r2,
2561 enum tree_code *code, bool *swap, tree *rslt_type)
2562{
2563 enum tree_code acode;
2564 bool aswap;
2565 tree atype;
726a989a 2566 tree name1, name2;
355fe088 2567 gimple *stmt;
bbc8a8dc
ZD
2568
2569 name1 = name_for_ref (r1);
2570 name2 = name_for_ref (r2);
2571 gcc_assert (name1 != NULL_TREE && name2 != NULL_TREE);
2572
2573 stmt = find_common_use_stmt (&name1, &name2);
2574
7906dbe4
RB
2575 if (!stmt
2576 /* A simple post-dominance check - make sure the combination
2577 is executed under the same condition as the references. */
2578 || (gimple_bb (stmt) != gimple_bb (r1->stmt)
2579 && gimple_bb (stmt) != gimple_bb (r2->stmt)))
bbc8a8dc
ZD
2580 return false;
2581
726a989a 2582 acode = gimple_assign_rhs_code (stmt);
bbc8a8dc 2583 aswap = (!commutative_tree_code (acode)
726a989a
RB
2584 && gimple_assign_rhs1 (stmt) != name1);
2585 atype = TREE_TYPE (gimple_assign_lhs (stmt));
bbc8a8dc
ZD
2586
2587 if (*code == ERROR_MARK)
2588 {
2589 *code = acode;
2590 *swap = aswap;
2591 *rslt_type = atype;
2592 return true;
2593 }
2594
2595 return (*code == acode
2596 && *swap == aswap
2597 && *rslt_type == atype);
2598}
2599
2600/* Remove OP from the operation on rhs of STMT, and replace STMT with
2601 an assignment of the remaining operand. */
2602
2603static void
355fe088 2604remove_name_from_operation (gimple *stmt, tree op)
bbc8a8dc 2605{
726a989a
RB
2606 tree other_op;
2607 gimple_stmt_iterator si;
bbc8a8dc 2608
726a989a 2609 gcc_assert (is_gimple_assign (stmt));
bbc8a8dc 2610
726a989a
RB
2611 if (gimple_assign_rhs1 (stmt) == op)
2612 other_op = gimple_assign_rhs2 (stmt);
bbc8a8dc 2613 else
726a989a
RB
2614 other_op = gimple_assign_rhs1 (stmt);
2615
2616 si = gsi_for_stmt (stmt);
2617 gimple_assign_set_rhs_from_tree (&si, other_op);
2618
2619 /* We should not have reallocated STMT. */
2620 gcc_assert (gsi_stmt (si) == stmt);
2621
bbc8a8dc
ZD
2622 update_stmt (stmt);
2623}
2624
2625/* Reassociates the expression in that NAME1 and NAME2 are used so that they
1ad3d8aa 2626 are combined in a single statement, and returns this statement. */
bbc8a8dc 2627
355fe088 2628static gimple *
1ad3d8aa 2629reassociate_to_the_same_stmt (tree name1, tree name2)
bbc8a8dc 2630{
355fe088 2631 gimple *stmt1, *stmt2, *root1, *root2, *s1, *s2;
538dd0b7 2632 gassign *new_stmt, *tmp_stmt;
726a989a 2633 tree new_name, tmp_name, var, r1, r2;
bbc8a8dc
ZD
2634 unsigned dist1, dist2;
2635 enum tree_code code;
2636 tree type = TREE_TYPE (name1);
726a989a 2637 gimple_stmt_iterator bsi;
bbc8a8dc
ZD
2638
2639 stmt1 = find_use_stmt (&name1);
2640 stmt2 = find_use_stmt (&name2);
2641 root1 = find_associative_operation_root (stmt1, &dist1);
2642 root2 = find_associative_operation_root (stmt2, &dist2);
726a989a 2643 code = gimple_assign_rhs_code (stmt1);
bbc8a8dc
ZD
2644
2645 gcc_assert (root1 && root2 && root1 == root2
726a989a 2646 && code == gimple_assign_rhs_code (stmt2));
bbc8a8dc
ZD
2647
2648 /* Find the root of the nearest expression in that both NAME1 and NAME2
2649 are used. */
2650 r1 = name1;
2651 s1 = stmt1;
2652 r2 = name2;
2653 s2 = stmt2;
2654
2655 while (dist1 > dist2)
2656 {
2657 s1 = find_use_stmt (&r1);
726a989a 2658 r1 = gimple_assign_lhs (s1);
bbc8a8dc
ZD
2659 dist1--;
2660 }
2661 while (dist2 > dist1)
2662 {
2663 s2 = find_use_stmt (&r2);
726a989a 2664 r2 = gimple_assign_lhs (s2);
bbc8a8dc
ZD
2665 dist2--;
2666 }
2667
2668 while (s1 != s2)
2669 {
2670 s1 = find_use_stmt (&r1);
726a989a 2671 r1 = gimple_assign_lhs (s1);
bbc8a8dc 2672 s2 = find_use_stmt (&r2);
726a989a 2673 r2 = gimple_assign_lhs (s2);
bbc8a8dc
ZD
2674 }
2675
2676 /* Remove NAME1 and NAME2 from the statements in that they are used
2677 currently. */
2678 remove_name_from_operation (stmt1, name1);
2679 remove_name_from_operation (stmt2, name2);
2680
2681 /* Insert the new statement combining NAME1 and NAME2 before S1, and
2682 combine it with the rhs of S1. */
acd63801 2683 var = create_tmp_reg (type, "predreastmp");
b731b390 2684 new_name = make_ssa_name (var);
0d0e4a03 2685 new_stmt = gimple_build_assign (new_name, code, name1, name2);
bbc8a8dc 2686
acd63801 2687 var = create_tmp_reg (type, "predreastmp");
b731b390 2688 tmp_name = make_ssa_name (var);
726a989a
RB
2689
2690 /* Rhs of S1 may now be either a binary expression with operation
2691 CODE, or gimple_val (in case that stmt1 == s1 or stmt2 == s1,
2692 so that name1 or name2 was removed from it). */
0d0e4a03
JJ
2693 tmp_stmt = gimple_build_assign (tmp_name, gimple_assign_rhs_code (s1),
2694 gimple_assign_rhs1 (s1),
2695 gimple_assign_rhs2 (s1));
726a989a
RB
2696
2697 bsi = gsi_for_stmt (s1);
2698 gimple_assign_set_rhs_with_ops (&bsi, code, new_name, tmp_name);
2699 s1 = gsi_stmt (bsi);
bbc8a8dc
ZD
2700 update_stmt (s1);
2701
1ad3d8aa 2702 gsi_insert_before (&bsi, new_stmt, GSI_SAME_STMT);
726a989a 2703 gsi_insert_before (&bsi, tmp_stmt, GSI_SAME_STMT);
bbc8a8dc
ZD
2704
2705 return new_stmt;
2706}
2707
2708/* Returns the statement that combines references R1 and R2. In case R1
2709 and R2 are not used in the same statement, but they are used with an
2710 associative and commutative operation in the same expression, reassociate
1ad3d8aa 2711 the expression so that they are used in the same statement. */
bbc8a8dc 2712
355fe088 2713static gimple *
1ad3d8aa 2714stmt_combining_refs (dref r1, dref r2)
bbc8a8dc 2715{
355fe088 2716 gimple *stmt1, *stmt2;
bbc8a8dc
ZD
2717 tree name1 = name_for_ref (r1);
2718 tree name2 = name_for_ref (r2);
2719
2720 stmt1 = find_use_stmt (&name1);
2721 stmt2 = find_use_stmt (&name2);
2722 if (stmt1 == stmt2)
2723 return stmt1;
2724
1ad3d8aa 2725 return reassociate_to_the_same_stmt (name1, name2);
bbc8a8dc
ZD
2726}
2727
2728/* Tries to combine chains CH1 and CH2 together. If this succeeds, the
2729 description of the new chain is returned, otherwise we return NULL. */
2730
2731static chain_p
2732combine_chains (chain_p ch1, chain_p ch2)
2733{
2734 dref r1, r2, nw;
2735 enum tree_code op = ERROR_MARK;
2736 bool swap = false;
2737 chain_p new_chain;
1ad3d8aa 2738 unsigned i;
bbc8a8dc
ZD
2739 tree rslt_type = NULL_TREE;
2740
2741 if (ch1 == ch2)
a90352a0 2742 return NULL;
bbc8a8dc
ZD
2743 if (ch1->length != ch2->length)
2744 return NULL;
2745
9771b263 2746 if (ch1->refs.length () != ch2->refs.length ())
bbc8a8dc
ZD
2747 return NULL;
2748
9771b263
DN
2749 for (i = 0; (ch1->refs.iterate (i, &r1)
2750 && ch2->refs.iterate (i, &r2)); i++)
bbc8a8dc
ZD
2751 {
2752 if (r1->distance != r2->distance)
2753 return NULL;
2754
2755 if (!combinable_refs_p (r1, r2, &op, &swap, &rslt_type))
2756 return NULL;
2757 }
2758
2759 if (swap)
6b4db501 2760 std::swap (ch1, ch2);
bbc8a8dc
ZD
2761
2762 new_chain = XCNEW (struct chain);
2763 new_chain->type = CT_COMBINATION;
82d6e6fc 2764 new_chain->op = op;
bbc8a8dc
ZD
2765 new_chain->ch1 = ch1;
2766 new_chain->ch2 = ch2;
2767 new_chain->rslt_type = rslt_type;
2768 new_chain->length = ch1->length;
2769
1ad3d8aa
BC
2770 for (i = 0; (ch1->refs.iterate (i, &r1)
2771 && ch2->refs.iterate (i, &r2)); i++)
2772 {
99b1c316 2773 nw = XCNEW (class dref_d);
1ad3d8aa 2774 nw->stmt = stmt_combining_refs (r1, r2);
bbc8a8dc 2775 nw->distance = r1->distance;
8539a302 2776
d822f3d5 2777 new_chain->refs.safe_push (nw);
8539a302 2778 }
bbc8a8dc 2779
1ad3d8aa
BC
2780 ch1->combined = true;
2781 ch2->combined = true;
bbc8a8dc
ZD
2782 return new_chain;
2783}
2784
bd9cc42b
BC
2785/* Recursively update position information of all offspring chains to ROOT
2786 chain's position information. */
2787
2788static void
2789update_pos_for_combined_chains (chain_p root)
2790{
2791 chain_p ch1 = root->ch1, ch2 = root->ch2;
2792 dref ref, ref1, ref2;
2793 for (unsigned j = 0; (root->refs.iterate (j, &ref)
2794 && ch1->refs.iterate (j, &ref1)
2795 && ch2->refs.iterate (j, &ref2)); ++j)
2796 ref1->pos = ref2->pos = ref->pos;
2797
2798 if (ch1->type == CT_COMBINATION)
2799 update_pos_for_combined_chains (ch1);
2800 if (ch2->type == CT_COMBINATION)
2801 update_pos_for_combined_chains (ch2);
2802}
2803
2804/* Returns true if statement S1 dominates statement S2. */
2805
2806static bool
2807pcom_stmt_dominates_stmt_p (gimple *s1, gimple *s2)
2808{
2809 basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
2810
2811 if (!bb1 || s1 == s2)
2812 return true;
2813
2814 if (bb1 == bb2)
2815 return gimple_uid (s1) < gimple_uid (s2);
2816
2817 return dominated_by_p (CDI_DOMINATORS, bb2, bb1);
2818}
2819
2820/* Try to combine the CHAINS in LOOP. */
bbc8a8dc
ZD
2821
2822static void
99b1c316 2823try_combine_chains (class loop *loop, vec<chain_p> *chains)
bbc8a8dc
ZD
2824{
2825 unsigned i, j;
2826 chain_p ch1, ch2, cch;
ef062b13 2827 auto_vec<chain_p> worklist;
bd9cc42b 2828 bool combined_p = false;
bbc8a8dc 2829
9771b263 2830 FOR_EACH_VEC_ELT (*chains, i, ch1)
bbc8a8dc 2831 if (chain_can_be_combined_p (ch1))
9771b263 2832 worklist.safe_push (ch1);
bbc8a8dc 2833
9771b263 2834 while (!worklist.is_empty ())
bbc8a8dc 2835 {
9771b263 2836 ch1 = worklist.pop ();
bbc8a8dc
ZD
2837 if (!chain_can_be_combined_p (ch1))
2838 continue;
2839
9771b263 2840 FOR_EACH_VEC_ELT (*chains, j, ch2)
bbc8a8dc
ZD
2841 {
2842 if (!chain_can_be_combined_p (ch2))
2843 continue;
2844
2845 cch = combine_chains (ch1, ch2);
2846 if (cch)
2847 {
9771b263
DN
2848 worklist.safe_push (cch);
2849 chains->safe_push (cch);
bd9cc42b
BC
2850 combined_p = true;
2851 break;
2852 }
2853 }
2854 }
2855 if (!combined_p)
2856 return;
2857
2858 /* Setup UID for all statements in dominance order. */
3c2df916 2859 basic_block *bbs = get_loop_body_in_dom_order (loop);
bd9cc42b
BC
2860 renumber_gimple_stmt_uids_in_blocks (bbs, loop->num_nodes);
2861 free (bbs);
2862
2863 /* Re-association in combined chains may generate statements different to
2864 order of references of the original chain. We need to keep references
2865 of combined chain in dominance order so that all uses will be inserted
2866 after definitions. Note:
2867 A) This is necessary for all combined chains.
2868 B) This is only necessary for ZERO distance references because other
2869 references inherit value from loop carried PHIs.
2870
2871 We first update position information for all combined chains. */
2872 dref ref;
2873 for (i = 0; chains->iterate (i, &ch1); ++i)
2874 {
2875 if (ch1->type != CT_COMBINATION || ch1->combined)
2876 continue;
2877
2878 for (j = 0; ch1->refs.iterate (j, &ref); ++j)
2879 ref->pos = gimple_uid (ref->stmt);
2880
2881 update_pos_for_combined_chains (ch1);
2882 }
2883 /* Then sort references according to newly updated position information. */
2884 for (i = 0; chains->iterate (i, &ch1); ++i)
2885 {
2886 if (ch1->type != CT_COMBINATION && !ch1->combined)
2887 continue;
2888
2889 /* Find the first reference with non-ZERO distance. */
2890 if (ch1->length == 0)
2891 j = ch1->refs.length();
2892 else
2893 {
2894 for (j = 0; ch1->refs.iterate (j, &ref); ++j)
2895 if (ref->distance != 0)
2896 break;
2897 }
2898
2899 /* Sort all ZERO distance references by position. */
2900 qsort (&ch1->refs[0], j, sizeof (ch1->refs[0]), order_drefs_by_pos);
2901
2902 if (ch1->combined)
2903 continue;
2904
2905 /* For ZERO length chain, has_max_use_after must be true since root
2906 combined stmt must dominates others. */
2907 if (ch1->length == 0)
2908 {
2909 ch1->has_max_use_after = true;
2910 continue;
2911 }
2912 /* Check if there is use at max distance after root for combined chains
2913 and set flag accordingly. */
2914 ch1->has_max_use_after = false;
2915 gimple *root_stmt = get_chain_root (ch1)->stmt;
2916 for (j = 1; ch1->refs.iterate (j, &ref); ++j)
2917 {
2918 if (ref->distance == ch1->length
2919 && !pcom_stmt_dominates_stmt_p (ref->stmt, root_stmt))
2920 {
2921 ch1->has_max_use_after = true;
bbc8a8dc
ZD
2922 break;
2923 }
2924 }
2925 }
2926}
2927
d9c259ef
BC
2928/* Prepare initializers for store elimination CHAIN in LOOP. Returns false
2929 if this is impossible because one of these initializers may trap, true
2930 otherwise. */
2931
2932static bool
99b1c316 2933prepare_initializers_chain_store_elim (class loop *loop, chain_p chain)
d9c259ef
BC
2934{
2935 unsigned i, n = chain->length;
2936
2937 /* For now we can't eliminate stores if some of them are conditional
2938 executed. */
2939 if (!chain->all_always_accessed)
2940 return false;
2941
2942 /* Nothing to intialize for intra-iteration store elimination. */
2943 if (n == 0 && chain->type == CT_STORE_STORE)
2944 return true;
2945
39ff50ce
BC
2946 /* For store elimination chain, there is nothing to initialize if stores
2947 to be eliminated only store loop invariant values into memory. */
2948 if (chain->type == CT_STORE_STORE
2949 && is_inv_store_elimination_chain (loop, chain))
2950 {
2951 chain->inv_store_elimination = true;
2952 return true;
2953 }
2954
d9c259ef
BC
2955 chain->inits.create (n);
2956 chain->inits.safe_grow_cleared (n);
2957
2958 /* For store eliminatin chain like below:
2959
2960 for (i = 0; i < len; i++)
2961 {
2962 a[i] = 1;
2963 // a[i + 1] = ...
2964 a[i + 2] = 3;
2965 }
2966
2967 store to a[i + 1] is missed in loop body, it acts like bubbles. The
2968 content of a[i + 1] remain the same if the loop iterates fewer times
2969 than chain->length. We need to set up root variables for such stores
2970 by loading from memory before loop. Note we only need to load bubble
2971 elements because loop body is guaranteed to be executed at least once
2972 after loop's preheader edge. */
2973 auto_vec<bool> bubbles;
2974 bubbles.safe_grow_cleared (n + 1);
2975 for (i = 0; i < chain->refs.length (); i++)
2976 bubbles[chain->refs[i]->distance] = true;
2977
2978 struct data_reference *dr = get_chain_root (chain)->ref;
2979 for (i = 0; i < n; i++)
2980 {
2981 if (bubbles[i])
2982 continue;
2983
2984 gimple_seq stmts = NULL;
2985
2986 tree init = ref_at_iteration (dr, (int) 0 - i, &stmts);
2987 if (stmts)
2988 gimple_seq_add_seq_without_update (&chain->init_seq, stmts);
2989
2990 chain->inits[i] = init;
2991 }
2992
2993 return true;
2994}
2995
bbc8a8dc
ZD
2996/* Prepare initializers for CHAIN in LOOP. Returns false if this is
2997 impossible because one of these initializers may trap, true otherwise. */
2998
2999static bool
99b1c316 3000prepare_initializers_chain (class loop *loop, chain_p chain)
bbc8a8dc
ZD
3001{
3002 unsigned i, n = (chain->type == CT_INVARIANT) ? 1 : chain->length;
3003 struct data_reference *dr = get_chain_root (chain)->ref;
726a989a 3004 tree init;
bbc8a8dc
ZD
3005 dref laref;
3006 edge entry = loop_preheader_edge (loop);
3007
d9c259ef
BC
3008 if (chain->type == CT_STORE_STORE)
3009 return prepare_initializers_chain_store_elim (loop, chain);
3010
bbc8a8dc
ZD
3011 /* Find the initializers for the variables, and check that they cannot
3012 trap. */
9771b263 3013 chain->inits.create (n);
bbc8a8dc 3014 for (i = 0; i < n; i++)
9771b263 3015 chain->inits.quick_push (NULL_TREE);
bbc8a8dc
ZD
3016
3017 /* If we have replaced some looparound phi nodes, use their initializers
3018 instead of creating our own. */
9771b263 3019 FOR_EACH_VEC_ELT (chain->refs, i, laref)
bbc8a8dc 3020 {
726a989a 3021 if (gimple_code (laref->stmt) != GIMPLE_PHI)
bbc8a8dc
ZD
3022 continue;
3023
3024 gcc_assert (laref->distance > 0);
9771b263
DN
3025 chain->inits[n - laref->distance]
3026 = PHI_ARG_DEF_FROM_EDGE (laref->stmt, entry);
bbc8a8dc
ZD
3027 }
3028
3029 for (i = 0; i < n; i++)
3030 {
7ec44c3d
RB
3031 gimple_seq stmts = NULL;
3032
9771b263 3033 if (chain->inits[i] != NULL_TREE)
bbc8a8dc
ZD
3034 continue;
3035
9f2b860b 3036 init = ref_at_iteration (dr, (int) i - n, &stmts);
bbc8a8dc 3037 if (!chain->all_always_accessed && tree_could_trap_p (init))
7ec44c3d
RB
3038 {
3039 gimple_seq_discard (stmts);
3040 return false;
3041 }
bbc8a8dc 3042
bbc8a8dc 3043 if (stmts)
aa4de160 3044 gimple_seq_add_seq_without_update (&chain->init_seq, stmts);
bbc8a8dc 3045
9771b263 3046 chain->inits[i] = init;
bbc8a8dc
ZD
3047 }
3048
3049 return true;
3050}
3051
3052/* Prepare initializers for CHAINS in LOOP, and free chains that cannot
3053 be used because the initializers might trap. */
3054
3055static void
99b1c316 3056prepare_initializers (class loop *loop, vec<chain_p> chains)
bbc8a8dc
ZD
3057{
3058 chain_p chain;
3059 unsigned i;
3060
9771b263 3061 for (i = 0; i < chains.length (); )
bbc8a8dc 3062 {
9771b263 3063 chain = chains[i];
bbc8a8dc
ZD
3064 if (prepare_initializers_chain (loop, chain))
3065 i++;
3066 else
3067 {
3068 release_chain (chain);
9771b263 3069 chains.unordered_remove (i);
bbc8a8dc
ZD
3070 }
3071 }
3072}
3073
d9c259ef
BC
3074/* Generates finalizer memory references for CHAIN in LOOP. Returns true
3075 if finalizer code for CHAIN can be generated, otherwise false. */
3076
3077static bool
99b1c316 3078prepare_finalizers_chain (class loop *loop, chain_p chain)
d9c259ef
BC
3079{
3080 unsigned i, n = chain->length;
3081 struct data_reference *dr = get_chain_root (chain)->ref;
3082 tree fini, niters = number_of_latch_executions (loop);
3083
3084 /* For now we can't eliminate stores if some of them are conditional
3085 executed. */
3086 if (!chain->all_always_accessed)
3087 return false;
3088
3089 chain->finis.create (n);
3090 for (i = 0; i < n; i++)
3091 chain->finis.quick_push (NULL_TREE);
3092
3093 /* We never use looparound phi node for store elimination chains. */
3094
3095 /* Find the finalizers for the variables, and check that they cannot
3096 trap. */
3097 for (i = 0; i < n; i++)
3098 {
3099 gimple_seq stmts = NULL;
3100 gcc_assert (chain->finis[i] == NULL_TREE);
3101
3102 if (TREE_CODE (niters) != INTEGER_CST && TREE_CODE (niters) != SSA_NAME)
3103 {
a5c93f53 3104 niters = unshare_expr (niters);
d9c259ef
BC
3105 niters = force_gimple_operand (niters, &stmts, true, NULL);
3106 if (stmts)
3107 {
3108 gimple_seq_add_seq_without_update (&chain->fini_seq, stmts);
3109 stmts = NULL;
3110 }
3111 }
3112 fini = ref_at_iteration (dr, (int) 0 - i, &stmts, niters);
3113 if (stmts)
3114 gimple_seq_add_seq_without_update (&chain->fini_seq, stmts);
3115
3116 chain->finis[i] = fini;
3117 }
3118
3119 return true;
3120}
3121
3122/* Generates finalizer memory reference for CHAINS in LOOP. Returns true
3123 if finalizer code generation for CHAINS breaks loop closed ssa form. */
3124
3125static bool
99b1c316 3126prepare_finalizers (class loop *loop, vec<chain_p> chains)
d9c259ef
BC
3127{
3128 chain_p chain;
3129 unsigned i;
3130 bool loop_closed_ssa = false;
3131
3132 for (i = 0; i < chains.length ();)
3133 {
3134 chain = chains[i];
3135
3136 /* Finalizer is only necessary for inter-iteration store elimination
3137 chains. */
3138 if (chain->length == 0 || chain->type != CT_STORE_STORE)
3139 {
3140 i++;
3141 continue;
3142 }
3143
3144 if (prepare_finalizers_chain (loop, chain))
3145 {
3146 i++;
7061cfc0
BC
3147 /* Be conservative, assume loop closed ssa form is corrupted
3148 by store-store chain. Though it's not always the case if
3149 eliminated stores only store loop invariant values into
3150 memory. */
3151 loop_closed_ssa = true;
d9c259ef
BC
3152 }
3153 else
3154 {
3155 release_chain (chain);
3156 chains.unordered_remove (i);
3157 }
3158 }
3159 return loop_closed_ssa;
3160}
3161
aa4de160
BC
3162/* Insert all initializing gimple stmts into loop's entry edge. */
3163
3164static void
99b1c316 3165insert_init_seqs (class loop *loop, vec<chain_p> chains)
aa4de160
BC
3166{
3167 unsigned i;
3168 edge entry = loop_preheader_edge (loop);
3169
3170 for (i = 0; i < chains.length (); ++i)
3171 if (chains[i]->init_seq)
3172 {
3173 gsi_insert_seq_on_edge_immediate (entry, chains[i]->init_seq);
3174 chains[i]->init_seq = NULL;
3175 }
3176}
3177
d9c259ef
BC
3178/* Performs predictive commoning for LOOP. Sets bit 1<<0 of return value
3179 if LOOP was unrolled; Sets bit 1<<1 of return value if loop closed ssa
3180 form was corrupted. */
bbc8a8dc 3181
d9c259ef 3182static unsigned
99b1c316 3183tree_predictive_commoning_loop (class loop *loop)
bbc8a8dc 3184{
9771b263
DN
3185 vec<data_reference_p> datarefs;
3186 vec<ddr_p> dependences;
bbc8a8dc 3187 struct component *components;
6e1aa848 3188 vec<chain_p> chains = vNULL;
bbc8a8dc 3189 unsigned unroll_factor;
99b1c316 3190 class tree_niter_desc desc;
d9c259ef 3191 bool unroll = false, loop_closed_ssa = false;
bbc8a8dc 3192 edge exit;
bbc8a8dc
ZD
3193
3194 if (dump_file && (dump_flags & TDF_DETAILS))
3195 fprintf (dump_file, "Processing loop %d\n", loop->num);
3196
b8a4b8c8
BC
3197 /* Nothing for predicitive commoning if loop only iterates 1 time. */
3198 if (get_max_loop_iterations_int (loop) == 0)
3199 {
3200 if (dump_file && (dump_flags & TDF_DETAILS))
3201 fprintf (dump_file, "Loop iterates only 1 time, nothing to do.\n");
3202
d9c259ef 3203 return 0;
b8a4b8c8
BC
3204 }
3205
bbc8a8dc
ZD
3206 /* Find the data references and split them into components according to their
3207 dependence relations. */
00f96dc9 3208 auto_vec<loop_p, 3> loop_nest;
9771b263 3209 dependences.create (10);
07687835 3210 datarefs.create (10);
9ca3d00e
AB
3211 if (! compute_data_dependences_for_loop (loop, true, &loop_nest, &datarefs,
3212 &dependences))
3213 {
3214 if (dump_file && (dump_flags & TDF_DETAILS))
3215 fprintf (dump_file, "Cannot analyze data dependencies\n");
9ca3d00e
AB
3216 free_data_refs (datarefs);
3217 free_dependence_relations (dependences);
d9c259ef 3218 return 0;
9ca3d00e
AB
3219 }
3220
bbc8a8dc
ZD
3221 if (dump_file && (dump_flags & TDF_DETAILS))
3222 dump_data_dependence_relations (dump_file, dependences);
3223
3224 components = split_data_refs_to_components (loop, datarefs, dependences);
9771b263 3225 loop_nest.release ();
bbc8a8dc
ZD
3226 free_dependence_relations (dependences);
3227 if (!components)
3228 {
3229 free_data_refs (datarefs);
4f87d581 3230 free_affine_expand_cache (&name_expansions);
d9c259ef 3231 return 0;
bbc8a8dc
ZD
3232 }
3233
3234 if (dump_file && (dump_flags & TDF_DETAILS))
3235 {
3236 fprintf (dump_file, "Initial state:\n\n");
3237 dump_components (dump_file, components);
3238 }
3239
3240 /* Find the suitable components and split them into chains. */
3241 components = filter_suitable_components (loop, components);
3242
0e3de1d4 3243 auto_bitmap tmp_vars;
bbc8a8dc
ZD
3244 looparound_phis = BITMAP_ALLOC (NULL);
3245 determine_roots (loop, components, &chains);
3246 release_components (components);
3247
9771b263 3248 if (!chains.exists ())
bbc8a8dc
ZD
3249 {
3250 if (dump_file && (dump_flags & TDF_DETAILS))
3251 fprintf (dump_file,
3252 "Predictive commoning failed: no suitable chains\n");
3253 goto end;
3254 }
3255 prepare_initializers (loop, chains);
d9c259ef 3256 loop_closed_ssa = prepare_finalizers (loop, chains);
bbc8a8dc
ZD
3257
3258 /* Try to combine the chains that are always worked with together. */
bd9cc42b 3259 try_combine_chains (loop, &chains);
bbc8a8dc 3260
aa4de160
BC
3261 insert_init_seqs (loop, chains);
3262
bbc8a8dc
ZD
3263 if (dump_file && (dump_flags & TDF_DETAILS))
3264 {
3265 fprintf (dump_file, "Before commoning:\n\n");
3266 dump_chains (dump_file, chains);
3267 }
3268
3269 /* Determine the unroll factor, and if the loop should be unrolled, ensure
3270 that its number of iterations is divisible by the factor. */
3271 unroll_factor = determine_unroll_factor (chains);
3272 scev_reset ();
e1e6dc73
RG
3273 unroll = (unroll_factor > 1
3274 && can_unroll_loop_p (loop, unroll_factor, &desc));
bbc8a8dc
ZD
3275 exit = single_dom_exit (loop);
3276
3277 /* Execute the predictive commoning transformations, and possibly unroll the
3278 loop. */
3279 if (unroll)
3280 {
3281 struct epcc_data dta;
3282
3283 if (dump_file && (dump_flags & TDF_DETAILS))
3284 fprintf (dump_file, "Unrolling %u times.\n", unroll_factor);
3285
3286 dta.chains = chains;
3287 dta.tmp_vars = tmp_vars;
b8698a0f 3288
bbc8a8dc
ZD
3289 update_ssa (TODO_update_ssa_only_virtuals);
3290
3291 /* Cfg manipulations performed in tree_transform_and_unroll_loop before
3292 execute_pred_commoning_cbck is called may cause phi nodes to be
3293 reallocated, which is a problem since CHAINS may point to these
3294 statements. To fix this, we store the ssa names defined by the
3295 phi nodes here instead of the phi nodes themselves, and restore
3296 the phi nodes in execute_pred_commoning_cbck. A bit hacky. */
3297 replace_phis_by_defined_names (chains);
3298
3299 tree_transform_and_unroll_loop (loop, unroll_factor, exit, &desc,
3300 execute_pred_commoning_cbck, &dta);
3301 eliminate_temp_copies (loop, tmp_vars);
3302 }
3303 else
3304 {
3305 if (dump_file && (dump_flags & TDF_DETAILS))
3306 fprintf (dump_file,
3307 "Executing predictive commoning without unrolling.\n");
3308 execute_pred_commoning (loop, chains, tmp_vars);
3309 }
3310
3311end: ;
3312 release_chains (chains);
3313 free_data_refs (datarefs);
bbc8a8dc
ZD
3314 BITMAP_FREE (looparound_phis);
3315
3316 free_affine_expand_cache (&name_expansions);
3317
d9c259ef 3318 return (unroll ? 1 : 0) | (loop_closed_ssa ? 2 : 0);
bbc8a8dc
ZD
3319}
3320
3321/* Runs predictive commoning. */
3322
592c303d 3323unsigned
bbc8a8dc
ZD
3324tree_predictive_commoning (void)
3325{
99b1c316 3326 class loop *loop;
d9c259ef 3327 unsigned ret = 0, changed = 0;
bbc8a8dc
ZD
3328
3329 initialize_original_copy_tables ();
f0bd40b1 3330 FOR_EACH_LOOP (loop, LI_ONLY_INNERMOST)
8bcf15f6
JH
3331 if (optimize_loop_for_speed_p (loop))
3332 {
d9c259ef 3333 changed |= tree_predictive_commoning_loop (loop);
8bcf15f6 3334 }
d9c259ef 3335 free_original_copy_tables ();
bbc8a8dc 3336
d9c259ef 3337 if (changed > 0)
bbc8a8dc
ZD
3338 {
3339 scev_reset ();
d9c259ef
BC
3340
3341 if (changed > 1)
3342 rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
3343
592c303d 3344 ret = TODO_cleanup_cfg;
bbc8a8dc 3345 }
592c303d
ZD
3346
3347 return ret;
bbc8a8dc 3348}
c1bf2a39
AM
3349
3350/* Predictive commoning Pass. */
3351
3352static unsigned
726338f4 3353run_tree_predictive_commoning (struct function *fun)
c1bf2a39 3354{
726338f4 3355 if (number_of_loops (fun) <= 1)
c1bf2a39
AM
3356 return 0;
3357
3358 return tree_predictive_commoning ();
3359}
3360
c1bf2a39
AM
3361namespace {
3362
3363const pass_data pass_data_predcom =
3364{
3365 GIMPLE_PASS, /* type */
3366 "pcom", /* name */
3367 OPTGROUP_LOOP, /* optinfo_flags */
c1bf2a39
AM
3368 TV_PREDCOM, /* tv_id */
3369 PROP_cfg, /* properties_required */
3370 0, /* properties_provided */
3371 0, /* properties_destroyed */
3372 0, /* todo_flags_start */
3373 TODO_update_ssa_only_virtuals, /* todo_flags_finish */
3374};
3375
3376class pass_predcom : public gimple_opt_pass
3377{
3378public:
3379 pass_predcom (gcc::context *ctxt)
3380 : gimple_opt_pass (pass_data_predcom, ctxt)
3381 {}
3382
3383 /* opt_pass methods: */
1a3d085c 3384 virtual bool gate (function *) { return flag_predictive_commoning != 0; }
726338f4 3385 virtual unsigned int execute (function *fun)
be55bfe6 3386 {
726338f4 3387 return run_tree_predictive_commoning (fun);
be55bfe6 3388 }
c1bf2a39
AM
3389
3390}; // class pass_predcom
3391
3392} // anon namespace
3393
3394gimple_opt_pass *
3395make_pass_predcom (gcc::context *ctxt)
3396{
3397 return new pass_predcom (ctxt);
3398}
3399
3400