]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-predcom.c
Update copyright years.
[thirdparty/gcc.git] / gcc / tree-predcom.c
CommitLineData
bbc8a8dc 1/* Predictive commoning.
99dee823 2 Copyright (C) 2005-2021 Free Software Foundation, Inc.
b8698a0f 3
bbc8a8dc 4This file is part of GCC.
b8698a0f 5
bbc8a8dc
ZD
6GCC is free software; you can redistribute it and/or modify it
7under the terms of the GNU General Public License as published by the
9dcd6f09 8Free Software Foundation; either version 3, or (at your option) any
bbc8a8dc 9later version.
b8698a0f 10
bbc8a8dc
ZD
11GCC is distributed in the hope that it will be useful, but WITHOUT
12ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14for more details.
b8698a0f 15
bbc8a8dc 16You should have received a copy of the GNU General Public License
9dcd6f09
NC
17along with GCC; see the file COPYING3. If not see
18<http://www.gnu.org/licenses/>. */
bbc8a8dc
ZD
19
20/* This file implements the predictive commoning optimization. Predictive
21 commoning can be viewed as CSE around a loop, and with some improvements,
22 as generalized strength reduction-- i.e., reusing values computed in
23 earlier iterations of a loop in the later ones. So far, the pass only
24 handles the most useful case, that is, reusing values of memory references.
25 If you think this is all just a special case of PRE, you are sort of right;
26 however, concentrating on loops is simpler, and makes it possible to
27 incorporate data dependence analysis to detect the opportunities, perform
28 loop unrolling to avoid copies together with renaming immediately,
29 and if needed, we could also take register pressure into account.
30
31 Let us demonstrate what is done on an example:
b8698a0f 32
bbc8a8dc
ZD
33 for (i = 0; i < 100; i++)
34 {
35 a[i+2] = a[i] + a[i+1];
36 b[10] = b[10] + i;
37 c[i] = c[99 - i];
38 d[i] = d[i + 1];
39 }
40
41 1) We find data references in the loop, and split them to mutually
42 independent groups (i.e., we find components of a data dependence
43 graph). We ignore read-read dependences whose distance is not constant.
44 (TODO -- we could also ignore antidependences). In this example, we
45 find the following groups:
46
47 a[i]{read}, a[i+1]{read}, a[i+2]{write}
48 b[10]{read}, b[10]{write}
49 c[99 - i]{read}, c[i]{write}
50 d[i + 1]{read}, d[i]{write}
51
52 2) Inside each of the group, we verify several conditions:
53 a) all the references must differ in indices only, and the indices
54 must all have the same step
55 b) the references must dominate loop latch (and thus, they must be
56 ordered by dominance relation).
57 c) the distance of the indices must be a small multiple of the step
58 We are then able to compute the difference of the references (# of
59 iterations before they point to the same place as the first of them).
60 Also, in case there are writes in the loop, we split the groups into
61 chains whose head is the write whose values are used by the reads in
62 the same chain. The chains are then processed independently,
63 making the further transformations simpler. Also, the shorter chains
64 need the same number of registers, but may require lower unrolling
65 factor in order to get rid of the copies on the loop latch.
b8698a0f 66
bbc8a8dc
ZD
67 In our example, we get the following chains (the chain for c is invalid).
68
69 a[i]{read,+0}, a[i+1]{read,-1}, a[i+2]{write,-2}
70 b[10]{read,+0}, b[10]{write,+0}
71 d[i + 1]{read,+0}, d[i]{write,+1}
72
73 3) For each read, we determine the read or write whose value it reuses,
74 together with the distance of this reuse. I.e. we take the last
75 reference before it with distance 0, or the last of the references
76 with the smallest positive distance to the read. Then, we remove
77 the references that are not used in any of these chains, discard the
78 empty groups, and propagate all the links so that they point to the
b8698a0f 79 single root reference of the chain (adjusting their distance
bbc8a8dc
ZD
80 appropriately). Some extra care needs to be taken for references with
81 step 0. In our example (the numbers indicate the distance of the
82 reuse),
83
84 a[i] --> (*) 2, a[i+1] --> (*) 1, a[i+2] (*)
85 b[10] --> (*) 1, b[10] (*)
86
87 4) The chains are combined together if possible. If the corresponding
88 elements of two chains are always combined together with the same
89 operator, we remember just the result of this combination, instead
90 of remembering the values separately. We may need to perform
91 reassociation to enable combining, for example
92
93 e[i] + f[i+1] + e[i+1] + f[i]
94
95 can be reassociated as
96
97 (e[i] + f[i]) + (e[i+1] + f[i+1])
98
99 and we can combine the chains for e and f into one chain.
100
101 5) For each root reference (end of the chain) R, let N be maximum distance
073a8998 102 of a reference reusing its value. Variables R0 up to RN are created,
bbc8a8dc
ZD
103 together with phi nodes that transfer values from R1 .. RN to
104 R0 .. R(N-1).
105 Initial values are loaded to R0..R(N-1) (in case not all references
106 must necessarily be accessed and they may trap, we may fail here;
107 TODO sometimes, the loads could be guarded by a check for the number
108 of iterations). Values loaded/stored in roots are also copied to
109 RN. Other reads are replaced with the appropriate variable Ri.
110 Everything is put to SSA form.
111
112 As a small improvement, if R0 is dead after the root (i.e., all uses of
113 the value with the maximum distance dominate the root), we can avoid
114 creating RN and use R0 instead of it.
115
116 In our example, we get (only the parts concerning a and b are shown):
117 for (i = 0; i < 100; i++)
118 {
119 f = phi (a[0], s);
120 s = phi (a[1], f);
121 x = phi (b[10], x);
122
123 f = f + s;
124 a[i+2] = f;
125 x = x + i;
126 b[10] = x;
127 }
128
129 6) Factor F for unrolling is determined as the smallest common multiple of
130 (N + 1) for each root reference (N for references for that we avoided
131 creating RN). If F and the loop is small enough, loop is unrolled F
132 times. The stores to RN (R0) in the copies of the loop body are
133 periodically replaced with R0, R1, ... (R1, R2, ...), so that they can
134 be coalesced and the copies can be eliminated.
b8698a0f 135
bbc8a8dc
ZD
136 TODO -- copy propagation and other optimizations may change the live
137 ranges of the temporary registers and prevent them from being coalesced;
138 this may increase the register pressure.
139
140 In our case, F = 2 and the (main loop of the) result is
141
142 for (i = 0; i < ...; i += 2)
143 {
144 f = phi (a[0], f);
145 s = phi (a[1], s);
146 x = phi (b[10], x);
147
148 f = f + s;
149 a[i+2] = f;
150 x = x + i;
151 b[10] = x;
152
153 s = s + f;
154 a[i+3] = s;
155 x = x + i;
156 b[10] = x;
157 }
158
d9c259ef
BC
159 Apart from predictive commoning on Load-Load and Store-Load chains, we
160 also support Store-Store chains -- stores killed by other store can be
161 eliminated. Given below example:
162
163 for (i = 0; i < n; i++)
164 {
165 a[i] = 1;
166 a[i+2] = 2;
167 }
bbc8a8dc 168
d9c259ef 169 It can be replaced with:
bbc8a8dc 170
d9c259ef
BC
171 t0 = a[0];
172 t1 = a[1];
173 for (i = 0; i < n; i++)
174 {
175 a[i] = 1;
176 t2 = 2;
177 t0 = t1;
178 t1 = t2;
179 }
180 a[n] = t0;
181 a[n+1] = t1;
bbc8a8dc 182
d9c259ef
BC
183 If the loop runs more than 1 iterations, it can be further simplified into:
184
185 for (i = 0; i < n; i++)
186 {
187 a[i] = 1;
188 }
189 a[n] = 2;
190 a[n+1] = 2;
191
192 The interesting part is this can be viewed either as general store motion
193 or general dead store elimination in either intra/inter-iterations way.
194
9cdcebf9
BC
195 With trivial effort, we also support load inside Store-Store chains if the
196 load is dominated by a store statement in the same iteration of loop. You
197 can see this as a restricted Store-Mixed-Load-Store chain.
198
d9c259ef
BC
199 TODO: For now, we don't support store-store chains in multi-exit loops. We
200 force to not unroll in case of store-store chain even if other chains might
201 ask for unroll.
bbc8a8dc
ZD
202
203 Predictive commoning can be generalized for arbitrary computations (not
204 just memory loads), and also nontrivial transfer functions (e.g., replacing
205 i * i with ii_last + 2 * i + 1), to generalize strength reduction. */
206
207#include "config.h"
208#include "system.h"
209#include "coretypes.h"
c7131fb2 210#include "backend.h"
957060b5 211#include "rtl.h"
bbc8a8dc 212#include "tree.h"
c7131fb2 213#include "gimple.h"
957060b5
AM
214#include "predict.h"
215#include "tree-pass.h"
c7131fb2 216#include "ssa.h"
957060b5 217#include "gimple-pretty-print.h"
c7131fb2 218#include "alias.h"
40e23961 219#include "fold-const.h"
bbc8a8dc 220#include "cfgloop.h"
2fb9a547 221#include "tree-eh.h"
45b0be94 222#include "gimplify.h"
5be5c238 223#include "gimple-iterator.h"
18f429e2 224#include "gimplify-me.h"
e28030cf
AM
225#include "tree-ssa-loop-ivopts.h"
226#include "tree-ssa-loop-manip.h"
227#include "tree-ssa-loop-niter.h"
442b4905
AM
228#include "tree-ssa-loop.h"
229#include "tree-into-ssa.h"
230#include "tree-dfa.h"
7a300452 231#include "tree-ssa.h"
bbc8a8dc
ZD
232#include "tree-data-ref.h"
233#include "tree-scalar-evolution.h"
bbc8a8dc 234#include "tree-affine.h"
59537744 235#include "builtins.h"
bbc8a8dc
ZD
236
237/* The maximum number of iterations between the considered memory
238 references. */
239
240#define MAX_DISTANCE (target_avail_regs < 16 ? 4 : 8)
b8698a0f 241
726a989a
RB
242/* Data references (or phi nodes that carry data reference values across
243 loop iterations). */
bbc8a8dc 244
6c1dae73 245typedef class dref_d
bbc8a8dc 246{
6c1dae73 247public:
bbc8a8dc
ZD
248 /* The reference itself. */
249 struct data_reference *ref;
250
251 /* The statement in that the reference appears. */
355fe088 252 gimple *stmt;
726a989a
RB
253
254 /* In case that STMT is a phi node, this field is set to the SSA name
255 defined by it in replace_phis_by_defined_names (in order to avoid
256 pointing to phi node that got reallocated in the meantime). */
257 tree name_defined_by_phi;
bbc8a8dc
ZD
258
259 /* Distance of the reference from the root of the chain (in number of
260 iterations of the loop). */
261 unsigned distance;
262
263 /* Number of iterations offset from the first reference in the component. */
807e902e 264 widest_int offset;
bbc8a8dc
ZD
265
266 /* Number of the reference in a component, in dominance ordering. */
267 unsigned pos;
268
269 /* True if the memory reference is always accessed when the loop is
270 entered. */
271 unsigned always_accessed : 1;
272} *dref;
273
bbc8a8dc
ZD
274
275/* Type of the chain of the references. */
276
277enum chain_type
278{
279 /* The addresses of the references in the chain are constant. */
280 CT_INVARIANT,
281
282 /* There are only loads in the chain. */
283 CT_LOAD,
284
285 /* Root of the chain is store, the rest are loads. */
286 CT_STORE_LOAD,
287
d9c259ef
BC
288 /* There are only stores in the chain. */
289 CT_STORE_STORE,
290
bbc8a8dc
ZD
291 /* A combination of two chains. */
292 CT_COMBINATION
293};
294
295/* Chains of data references. */
296
297typedef struct chain
298{
299 /* Type of the chain. */
300 enum chain_type type;
301
302 /* For combination chains, the operator and the two chains that are
303 combined, and the type of the result. */
82d6e6fc 304 enum tree_code op;
bbc8a8dc
ZD
305 tree rslt_type;
306 struct chain *ch1, *ch2;
307
308 /* The references in the chain. */
9771b263 309 vec<dref> refs;
bbc8a8dc
ZD
310
311 /* The maximum distance of the reference in the chain from the root. */
312 unsigned length;
313
314 /* The variables used to copy the value throughout iterations. */
9771b263 315 vec<tree> vars;
bbc8a8dc
ZD
316
317 /* Initializers for the variables. */
9771b263 318 vec<tree> inits;
bbc8a8dc 319
d9c259ef
BC
320 /* Finalizers for the eliminated stores. */
321 vec<tree> finis;
322
aa4de160
BC
323 /* gimple stmts intializing the initial variables of the chain. */
324 gimple_seq init_seq;
325
d9c259ef
BC
326 /* gimple stmts finalizing the eliminated stores of the chain. */
327 gimple_seq fini_seq;
328
bbc8a8dc
ZD
329 /* True if there is a use of a variable with the maximal distance
330 that comes after the root in the loop. */
331 unsigned has_max_use_after : 1;
332
333 /* True if all the memory references in the chain are always accessed. */
334 unsigned all_always_accessed : 1;
335
336 /* True if this chain was combined together with some other chain. */
337 unsigned combined : 1;
39ff50ce
BC
338
339 /* True if this is store elimination chain and eliminated stores store
340 loop invariant value into memory. */
341 unsigned inv_store_elimination : 1;
bbc8a8dc
ZD
342} *chain_p;
343
bbc8a8dc
ZD
344
345/* Describes the knowledge about the step of the memory references in
346 the component. */
347
348enum ref_step_type
349{
350 /* The step is zero. */
351 RS_INVARIANT,
352
353 /* The step is nonzero. */
354 RS_NONZERO,
355
356 /* The step may or may not be nonzero. */
357 RS_ANY
358};
359
360/* Components of the data dependence graph. */
361
362struct component
363{
364 /* The references in the component. */
9771b263 365 vec<dref> refs;
bbc8a8dc
ZD
366
367 /* What we know about the step of the references in the component. */
368 enum ref_step_type comp_step;
369
d9c259ef
BC
370 /* True if all references in component are stores and we try to do
371 intra/inter loop iteration dead store elimination. */
372 bool eliminate_store_p;
373
bbc8a8dc
ZD
374 /* Next component in the list. */
375 struct component *next;
376};
377
378/* Bitmap of ssa names defined by looparound phi nodes covered by chains. */
379
380static bitmap looparound_phis;
381
382/* Cache used by tree_to_aff_combination_expand. */
383
39c8aaa4 384static hash_map<tree, name_expansion *> *name_expansions;
bbc8a8dc
ZD
385
386/* Dumps data reference REF to FILE. */
387
388extern void dump_dref (FILE *, dref);
389void
390dump_dref (FILE *file, dref ref)
391{
392 if (ref->ref)
393 {
394 fprintf (file, " ");
395 print_generic_expr (file, DR_REF (ref->ref), TDF_SLIM);
396 fprintf (file, " (id %u%s)\n", ref->pos,
397 DR_IS_READ (ref->ref) ? "" : ", write");
398
399 fprintf (file, " offset ");
807e902e 400 print_decs (ref->offset, file);
bbc8a8dc
ZD
401 fprintf (file, "\n");
402
403 fprintf (file, " distance %u\n", ref->distance);
404 }
405 else
406 {
726a989a 407 if (gimple_code (ref->stmt) == GIMPLE_PHI)
bbc8a8dc
ZD
408 fprintf (file, " looparound ref\n");
409 else
410 fprintf (file, " combination ref\n");
411 fprintf (file, " in statement ");
726a989a 412 print_gimple_stmt (file, ref->stmt, 0, TDF_SLIM);
bbc8a8dc
ZD
413 fprintf (file, "\n");
414 fprintf (file, " distance %u\n", ref->distance);
415 }
416
417}
418
419/* Dumps CHAIN to FILE. */
420
421extern void dump_chain (FILE *, chain_p);
422void
423dump_chain (FILE *file, chain_p chain)
424{
425 dref a;
426 const char *chain_type;
427 unsigned i;
428 tree var;
429
430 switch (chain->type)
431 {
432 case CT_INVARIANT:
433 chain_type = "Load motion";
434 break;
435
436 case CT_LOAD:
437 chain_type = "Loads-only";
438 break;
439
440 case CT_STORE_LOAD:
441 chain_type = "Store-loads";
442 break;
443
d9c259ef
BC
444 case CT_STORE_STORE:
445 chain_type = "Store-stores";
446 break;
447
bbc8a8dc
ZD
448 case CT_COMBINATION:
449 chain_type = "Combination";
450 break;
451
452 default:
453 gcc_unreachable ();
454 }
455
456 fprintf (file, "%s chain %p%s\n", chain_type, (void *) chain,
457 chain->combined ? " (combined)" : "");
458 if (chain->type != CT_INVARIANT)
459 fprintf (file, " max distance %u%s\n", chain->length,
460 chain->has_max_use_after ? "" : ", may reuse first");
461
462 if (chain->type == CT_COMBINATION)
463 {
464 fprintf (file, " equal to %p %s %p in type ",
82d6e6fc 465 (void *) chain->ch1, op_symbol_code (chain->op),
bbc8a8dc
ZD
466 (void *) chain->ch2);
467 print_generic_expr (file, chain->rslt_type, TDF_SLIM);
468 fprintf (file, "\n");
469 }
470
9771b263 471 if (chain->vars.exists ())
bbc8a8dc
ZD
472 {
473 fprintf (file, " vars");
9771b263 474 FOR_EACH_VEC_ELT (chain->vars, i, var)
bbc8a8dc
ZD
475 {
476 fprintf (file, " ");
477 print_generic_expr (file, var, TDF_SLIM);
478 }
479 fprintf (file, "\n");
480 }
481
9771b263 482 if (chain->inits.exists ())
bbc8a8dc
ZD
483 {
484 fprintf (file, " inits");
9771b263 485 FOR_EACH_VEC_ELT (chain->inits, i, var)
bbc8a8dc
ZD
486 {
487 fprintf (file, " ");
488 print_generic_expr (file, var, TDF_SLIM);
489 }
490 fprintf (file, "\n");
491 }
492
493 fprintf (file, " references:\n");
9771b263 494 FOR_EACH_VEC_ELT (chain->refs, i, a)
bbc8a8dc
ZD
495 dump_dref (file, a);
496
497 fprintf (file, "\n");
498}
499
500/* Dumps CHAINS to FILE. */
501
9771b263 502extern void dump_chains (FILE *, vec<chain_p> );
bbc8a8dc 503void
9771b263 504dump_chains (FILE *file, vec<chain_p> chains)
bbc8a8dc
ZD
505{
506 chain_p chain;
507 unsigned i;
508
9771b263 509 FOR_EACH_VEC_ELT (chains, i, chain)
bbc8a8dc
ZD
510 dump_chain (file, chain);
511}
512
513/* Dumps COMP to FILE. */
514
515extern void dump_component (FILE *, struct component *);
516void
517dump_component (FILE *file, struct component *comp)
518{
519 dref a;
520 unsigned i;
521
522 fprintf (file, "Component%s:\n",
523 comp->comp_step == RS_INVARIANT ? " (invariant)" : "");
9771b263 524 FOR_EACH_VEC_ELT (comp->refs, i, a)
bbc8a8dc
ZD
525 dump_dref (file, a);
526 fprintf (file, "\n");
527}
528
529/* Dumps COMPS to FILE. */
530
531extern void dump_components (FILE *, struct component *);
532void
533dump_components (FILE *file, struct component *comps)
534{
535 struct component *comp;
536
537 for (comp = comps; comp; comp = comp->next)
538 dump_component (file, comp);
539}
540
541/* Frees a chain CHAIN. */
542
543static void
544release_chain (chain_p chain)
545{
546 dref ref;
547 unsigned i;
548
549 if (chain == NULL)
550 return;
551
9771b263 552 FOR_EACH_VEC_ELT (chain->refs, i, ref)
bbc8a8dc
ZD
553 free (ref);
554
9771b263
DN
555 chain->refs.release ();
556 chain->vars.release ();
557 chain->inits.release ();
aa4de160
BC
558 if (chain->init_seq)
559 gimple_seq_discard (chain->init_seq);
bbc8a8dc 560
d9c259ef
BC
561 chain->finis.release ();
562 if (chain->fini_seq)
563 gimple_seq_discard (chain->fini_seq);
564
bbc8a8dc
ZD
565 free (chain);
566}
567
568/* Frees CHAINS. */
569
570static void
9771b263 571release_chains (vec<chain_p> chains)
bbc8a8dc
ZD
572{
573 unsigned i;
574 chain_p chain;
575
9771b263 576 FOR_EACH_VEC_ELT (chains, i, chain)
bbc8a8dc 577 release_chain (chain);
9771b263 578 chains.release ();
bbc8a8dc
ZD
579}
580
581/* Frees a component COMP. */
582
583static void
584release_component (struct component *comp)
585{
9771b263 586 comp->refs.release ();
bbc8a8dc
ZD
587 free (comp);
588}
589
590/* Frees list of components COMPS. */
591
592static void
593release_components (struct component *comps)
594{
595 struct component *act, *next;
596
597 for (act = comps; act; act = next)
598 {
599 next = act->next;
600 release_component (act);
601 }
602}
603
604/* Finds a root of tree given by FATHERS containing A, and performs path
605 shortening. */
606
607static unsigned
608component_of (unsigned fathers[], unsigned a)
609{
610 unsigned root, n;
611
612 for (root = a; root != fathers[root]; root = fathers[root])
613 continue;
614
615 for (; a != root; a = n)
616 {
617 n = fathers[a];
618 fathers[a] = root;
619 }
620
621 return root;
622}
623
624/* Join operation for DFU. FATHERS gives the tree, SIZES are sizes of the
625 components, A and B are components to merge. */
626
627static void
628merge_comps (unsigned fathers[], unsigned sizes[], unsigned a, unsigned b)
629{
630 unsigned ca = component_of (fathers, a);
631 unsigned cb = component_of (fathers, b);
632
633 if (ca == cb)
634 return;
635
636 if (sizes[ca] < sizes[cb])
637 {
638 sizes[cb] += sizes[ca];
639 fathers[ca] = cb;
640 }
641 else
642 {
643 sizes[ca] += sizes[cb];
644 fathers[cb] = ca;
645 }
646}
647
648/* Returns true if A is a reference that is suitable for predictive commoning
649 in the innermost loop that contains it. REF_STEP is set according to the
650 step of the reference A. */
651
652static bool
653suitable_reference_p (struct data_reference *a, enum ref_step_type *ref_step)
654{
655 tree ref = DR_REF (a), step = DR_STEP (a);
656
657 if (!step
64fb0d3a 658 || TREE_THIS_VOLATILE (ref)
7e80c6bf
EB
659 || !is_gimple_reg_type (TREE_TYPE (ref))
660 || tree_could_throw_p (ref))
bbc8a8dc
ZD
661 return false;
662
663 if (integer_zerop (step))
664 *ref_step = RS_INVARIANT;
665 else if (integer_nonzerop (step))
666 *ref_step = RS_NONZERO;
667 else
668 *ref_step = RS_ANY;
669
670 return true;
671}
672
673/* Stores DR_OFFSET (DR) + DR_INIT (DR) to OFFSET. */
674
675static void
676aff_combination_dr_offset (struct data_reference *dr, aff_tree *offset)
677{
0d82a1c8 678 tree type = TREE_TYPE (DR_OFFSET (dr));
bbc8a8dc
ZD
679 aff_tree delta;
680
0d82a1c8 681 tree_to_aff_combination_expand (DR_OFFSET (dr), type, offset,
bbc8a8dc 682 &name_expansions);
c0a46545 683 aff_combination_const (&delta, type, wi::to_poly_widest (DR_INIT (dr)));
bbc8a8dc
ZD
684 aff_combination_add (offset, &delta);
685}
686
687/* Determines number of iterations of the innermost enclosing loop before B
688 refers to exactly the same location as A and stores it to OFF. If A and
689 B do not have the same step, they never meet, or anything else fails,
690 returns false, otherwise returns true. Both A and B are assumed to
691 satisfy suitable_reference_p. */
692
693static bool
694determine_offset (struct data_reference *a, struct data_reference *b,
cc8bea09 695 poly_widest_int *off)
bbc8a8dc
ZD
696{
697 aff_tree diff, baseb, step;
49379cb1
ZD
698 tree typea, typeb;
699
700 /* Check that both the references access the location in the same type. */
701 typea = TREE_TYPE (DR_REF (a));
702 typeb = TREE_TYPE (DR_REF (b));
36618b93 703 if (!useless_type_conversion_p (typeb, typea))
49379cb1 704 return false;
bbc8a8dc
ZD
705
706 /* Check whether the base address and the step of both references is the
707 same. */
708 if (!operand_equal_p (DR_STEP (a), DR_STEP (b), 0)
709 || !operand_equal_p (DR_BASE_ADDRESS (a), DR_BASE_ADDRESS (b), 0))
710 return false;
711
712 if (integer_zerop (DR_STEP (a)))
713 {
714 /* If the references have loop invariant address, check that they access
715 exactly the same location. */
807e902e 716 *off = 0;
bbc8a8dc
ZD
717 return (operand_equal_p (DR_OFFSET (a), DR_OFFSET (b), 0)
718 && operand_equal_p (DR_INIT (a), DR_INIT (b), 0));
719 }
720
721 /* Compare the offsets of the addresses, and check whether the difference
722 is a multiple of step. */
723 aff_combination_dr_offset (a, &diff);
724 aff_combination_dr_offset (b, &baseb);
807e902e 725 aff_combination_scale (&baseb, -1);
bbc8a8dc
ZD
726 aff_combination_add (&diff, &baseb);
727
0d82a1c8 728 tree_to_aff_combination_expand (DR_STEP (a), TREE_TYPE (DR_STEP (a)),
bbc8a8dc
ZD
729 &step, &name_expansions);
730 return aff_combination_constant_multiple_p (&diff, &step, off);
731}
732
733/* Returns the last basic block in LOOP for that we are sure that
734 it is executed whenever the loop is entered. */
735
736static basic_block
99b1c316 737last_always_executed_block (class loop *loop)
bbc8a8dc
ZD
738{
739 unsigned i;
4b9d61f7 740 auto_vec<edge> exits = get_loop_exit_edges (loop);
bbc8a8dc
ZD
741 edge ex;
742 basic_block last = loop->latch;
743
9771b263 744 FOR_EACH_VEC_ELT (exits, i, ex)
bbc8a8dc 745 last = nearest_common_dominator (CDI_DOMINATORS, last, ex->src);
bbc8a8dc
ZD
746
747 return last;
748}
749
750/* Splits dependence graph on DATAREFS described by DEPENDS to components. */
751
752static struct component *
99b1c316 753split_data_refs_to_components (class loop *loop,
9771b263
DN
754 vec<data_reference_p> datarefs,
755 vec<ddr_p> depends)
bbc8a8dc 756{
9771b263 757 unsigned i, n = datarefs.length ();
bbc8a8dc
ZD
758 unsigned ca, ia, ib, bad;
759 unsigned *comp_father = XNEWVEC (unsigned, n + 1);
760 unsigned *comp_size = XNEWVEC (unsigned, n + 1);
761 struct component **comps;
762 struct data_reference *dr, *dra, *drb;
763 struct data_dependence_relation *ddr;
764 struct component *comp_list = NULL, *comp;
765 dref dataref;
d9c259ef
BC
766 /* Don't do store elimination if loop has multiple exit edges. */
767 bool eliminate_store_p = single_exit (loop) != NULL;
bbc8a8dc 768 basic_block last_always_executed = last_always_executed_block (loop);
6cedf5d8 769 auto_bitmap no_store_store_comps;
b8698a0f 770
9771b263 771 FOR_EACH_VEC_ELT (datarefs, i, dr)
bbc8a8dc
ZD
772 {
773 if (!DR_REF (dr))
774 {
775 /* A fake reference for call or asm_expr that may clobber memory;
776 just fail. */
777 goto end;
778 }
5ce9450f
JJ
779 /* predcom pass isn't prepared to handle calls with data references. */
780 if (is_gimple_call (DR_STMT (dr)))
781 goto end;
5417e022 782 dr->aux = (void *) (size_t) i;
bbc8a8dc
ZD
783 comp_father[i] = i;
784 comp_size[i] = 1;
785 }
786
787 /* A component reserved for the "bad" data references. */
788 comp_father[n] = n;
789 comp_size[n] = 1;
790
9771b263 791 FOR_EACH_VEC_ELT (datarefs, i, dr)
bbc8a8dc
ZD
792 {
793 enum ref_step_type dummy;
794
795 if (!suitable_reference_p (dr, &dummy))
796 {
5417e022 797 ia = (unsigned) (size_t) dr->aux;
bbc8a8dc
ZD
798 merge_comps (comp_father, comp_size, n, ia);
799 }
800 }
801
9771b263 802 FOR_EACH_VEC_ELT (depends, i, ddr)
bbc8a8dc 803 {
cc8bea09 804 poly_widest_int dummy_off;
bbc8a8dc
ZD
805
806 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
807 continue;
808
809 dra = DDR_A (ddr);
810 drb = DDR_B (ddr);
d9c259ef
BC
811
812 /* Don't do store elimination if there is any unknown dependence for
813 any store data reference. */
814 if ((DR_IS_WRITE (dra) || DR_IS_WRITE (drb))
815 && (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know
816 || DDR_NUM_DIST_VECTS (ddr) == 0))
817 eliminate_store_p = false;
818
5417e022
ZD
819 ia = component_of (comp_father, (unsigned) (size_t) dra->aux);
820 ib = component_of (comp_father, (unsigned) (size_t) drb->aux);
bbc8a8dc
ZD
821 if (ia == ib)
822 continue;
823
824 bad = component_of (comp_father, n);
825
826 /* If both A and B are reads, we may ignore unsuitable dependences. */
41626746
JJ
827 if (DR_IS_READ (dra) && DR_IS_READ (drb))
828 {
829 if (ia == bad || ib == bad
830 || !determine_offset (dra, drb, &dummy_off))
831 continue;
832 }
833 /* If A is read and B write or vice versa and there is unsuitable
834 dependence, instead of merging both components into a component
835 that will certainly not pass suitable_component_p, just put the
836 read into bad component, perhaps at least the write together with
837 all the other data refs in it's component will be optimizable. */
838 else if (DR_IS_READ (dra) && ib != bad)
839 {
840 if (ia == bad)
6cedf5d8
RS
841 {
842 bitmap_set_bit (no_store_store_comps, ib);
843 continue;
844 }
41626746
JJ
845 else if (!determine_offset (dra, drb, &dummy_off))
846 {
6cedf5d8 847 bitmap_set_bit (no_store_store_comps, ib);
41626746
JJ
848 merge_comps (comp_father, comp_size, bad, ia);
849 continue;
850 }
851 }
852 else if (DR_IS_READ (drb) && ia != bad)
853 {
854 if (ib == bad)
6cedf5d8
RS
855 {
856 bitmap_set_bit (no_store_store_comps, ia);
857 continue;
858 }
41626746
JJ
859 else if (!determine_offset (dra, drb, &dummy_off))
860 {
6cedf5d8 861 bitmap_set_bit (no_store_store_comps, ia);
41626746
JJ
862 merge_comps (comp_father, comp_size, bad, ib);
863 continue;
864 }
865 }
d9c259ef
BC
866 else if (DR_IS_WRITE (dra) && DR_IS_WRITE (drb)
867 && ia != bad && ib != bad
868 && !determine_offset (dra, drb, &dummy_off))
869 {
870 merge_comps (comp_father, comp_size, bad, ia);
871 merge_comps (comp_father, comp_size, bad, ib);
872 continue;
873 }
b8698a0f 874
bbc8a8dc
ZD
875 merge_comps (comp_father, comp_size, ia, ib);
876 }
877
d9c259ef
BC
878 if (eliminate_store_p)
879 {
880 tree niters = number_of_latch_executions (loop);
881
882 /* Don't do store elimination if niters info is unknown because stores
883 in the last iteration can't be eliminated and we need to recover it
884 after loop. */
885 eliminate_store_p = (niters != NULL_TREE && niters != chrec_dont_know);
886 }
887
bbc8a8dc
ZD
888 comps = XCNEWVEC (struct component *, n);
889 bad = component_of (comp_father, n);
9771b263 890 FOR_EACH_VEC_ELT (datarefs, i, dr)
bbc8a8dc 891 {
5417e022 892 ia = (unsigned) (size_t) dr->aux;
bbc8a8dc
ZD
893 ca = component_of (comp_father, ia);
894 if (ca == bad)
895 continue;
896
897 comp = comps[ca];
898 if (!comp)
899 {
900 comp = XCNEW (struct component);
9771b263 901 comp->refs.create (comp_size[ca]);
d9c259ef 902 comp->eliminate_store_p = eliminate_store_p;
bbc8a8dc
ZD
903 comps[ca] = comp;
904 }
905
99b1c316 906 dataref = XCNEW (class dref_d);
bbc8a8dc
ZD
907 dataref->ref = dr;
908 dataref->stmt = DR_STMT (dr);
807e902e 909 dataref->offset = 0;
bbc8a8dc
ZD
910 dataref->distance = 0;
911
912 dataref->always_accessed
913 = dominated_by_p (CDI_DOMINATORS, last_always_executed,
726a989a 914 gimple_bb (dataref->stmt));
9771b263
DN
915 dataref->pos = comp->refs.length ();
916 comp->refs.quick_push (dataref);
bbc8a8dc
ZD
917 }
918
6cedf5d8
RS
919 if (eliminate_store_p)
920 {
921 bitmap_iterator bi;
922 EXECUTE_IF_SET_IN_BITMAP (no_store_store_comps, 0, ia, bi)
923 {
924 ca = component_of (comp_father, ia);
925 if (ca != bad)
926 comps[ca]->eliminate_store_p = false;
927 }
928 }
929
bbc8a8dc
ZD
930 for (i = 0; i < n; i++)
931 {
932 comp = comps[i];
933 if (comp)
934 {
935 comp->next = comp_list;
936 comp_list = comp;
937 }
938 }
939 free (comps);
940
941end:
942 free (comp_father);
943 free (comp_size);
944 return comp_list;
945}
946
947/* Returns true if the component COMP satisfies the conditions
c80b4100 948 described in 2) at the beginning of this file. LOOP is the current
bbc8a8dc 949 loop. */
b8698a0f 950
bbc8a8dc 951static bool
99b1c316 952suitable_component_p (class loop *loop, struct component *comp)
bbc8a8dc
ZD
953{
954 unsigned i;
955 dref a, first;
956 basic_block ba, bp = loop->header;
957 bool ok, has_write = false;
958
9771b263 959 FOR_EACH_VEC_ELT (comp->refs, i, a)
bbc8a8dc 960 {
726a989a 961 ba = gimple_bb (a->stmt);
bbc8a8dc
ZD
962
963 if (!just_once_each_iteration_p (loop, ba))
964 return false;
965
966 gcc_assert (dominated_by_p (CDI_DOMINATORS, ba, bp));
967 bp = ba;
968
b0af49c4 969 if (DR_IS_WRITE (a->ref))
bbc8a8dc
ZD
970 has_write = true;
971 }
972
9771b263 973 first = comp->refs[0];
bbc8a8dc
ZD
974 ok = suitable_reference_p (first->ref, &comp->comp_step);
975 gcc_assert (ok);
807e902e 976 first->offset = 0;
bbc8a8dc 977
9771b263 978 for (i = 1; comp->refs.iterate (i, &a); i++)
bbc8a8dc 979 {
cc8bea09
RS
980 /* Polynomial offsets are no use, since we need to know the
981 gap between iteration numbers at compile time. */
982 poly_widest_int offset;
983 if (!determine_offset (first->ref, a->ref, &offset)
984 || !offset.is_constant (&a->offset))
bbc8a8dc
ZD
985 return false;
986
b2b29377
MM
987 enum ref_step_type a_step;
988 gcc_checking_assert (suitable_reference_p (a->ref, &a_step)
989 && a_step == comp->comp_step);
bbc8a8dc
ZD
990 }
991
992 /* If there is a write inside the component, we must know whether the
993 step is nonzero or not -- we would not otherwise be able to recognize
994 whether the value accessed by reads comes from the OFFSET-th iteration
995 or the previous one. */
996 if (has_write && comp->comp_step == RS_ANY)
997 return false;
998
999 return true;
1000}
b8698a0f 1001
bbc8a8dc
ZD
1002/* Check the conditions on references inside each of components COMPS,
1003 and remove the unsuitable components from the list. The new list
1004 of components is returned. The conditions are described in 2) at
c80b4100 1005 the beginning of this file. LOOP is the current loop. */
bbc8a8dc
ZD
1006
1007static struct component *
99b1c316 1008filter_suitable_components (class loop *loop, struct component *comps)
bbc8a8dc
ZD
1009{
1010 struct component **comp, *act;
1011
1012 for (comp = &comps; *comp; )
1013 {
1014 act = *comp;
1015 if (suitable_component_p (loop, act))
1016 comp = &act->next;
1017 else
1018 {
a0044be5
JJ
1019 dref ref;
1020 unsigned i;
1021
bbc8a8dc 1022 *comp = act->next;
9771b263 1023 FOR_EACH_VEC_ELT (act->refs, i, ref)
a0044be5 1024 free (ref);
bbc8a8dc
ZD
1025 release_component (act);
1026 }
1027 }
1028
1029 return comps;
1030}
1031
1032/* Compares two drefs A and B by their offset and position. Callback for
1033 qsort. */
1034
1035static int
1036order_drefs (const void *a, const void *b)
1037{
3d9a9f94
KG
1038 const dref *const da = (const dref *) a;
1039 const dref *const db = (const dref *) b;
807e902e 1040 int offcmp = wi::cmps ((*da)->offset, (*db)->offset);
bbc8a8dc
ZD
1041
1042 if (offcmp != 0)
1043 return offcmp;
1044
1045 return (*da)->pos - (*db)->pos;
1046}
1047
bd9cc42b
BC
1048/* Compares two drefs A and B by their position. Callback for qsort. */
1049
1050static int
1051order_drefs_by_pos (const void *a, const void *b)
1052{
1053 const dref *const da = (const dref *) a;
1054 const dref *const db = (const dref *) b;
1055
1056 return (*da)->pos - (*db)->pos;
1057}
1058
bbc8a8dc
ZD
1059/* Returns root of the CHAIN. */
1060
1061static inline dref
1062get_chain_root (chain_p chain)
1063{
9771b263 1064 return chain->refs[0];
bbc8a8dc
ZD
1065}
1066
9cdcebf9 1067/* Given CHAIN, returns the last write ref at DISTANCE, or NULL if it doesn't
d9c259ef
BC
1068 exist. */
1069
1070static inline dref
9cdcebf9 1071get_chain_last_write_at (chain_p chain, unsigned distance)
d9c259ef 1072{
9cdcebf9
BC
1073 for (unsigned i = chain->refs.length (); i > 0; i--)
1074 if (DR_IS_WRITE (chain->refs[i - 1]->ref)
1075 && distance == chain->refs[i - 1]->distance)
1076 return chain->refs[i - 1];
d9c259ef 1077
9cdcebf9
BC
1078 return NULL;
1079}
1080
1081/* Given CHAIN, returns the last write ref with the same distance before load
1082 at index LOAD_IDX, or NULL if it doesn't exist. */
1083
1084static inline dref
1085get_chain_last_write_before_load (chain_p chain, unsigned load_idx)
1086{
1087 gcc_assert (load_idx < chain->refs.length ());
1088
1089 unsigned distance = chain->refs[load_idx]->distance;
d9c259ef 1090
9cdcebf9
BC
1091 for (unsigned i = load_idx; i > 0; i--)
1092 if (DR_IS_WRITE (chain->refs[i - 1]->ref)
1093 && distance == chain->refs[i - 1]->distance)
1094 return chain->refs[i - 1];
1095
1096 return NULL;
d9c259ef
BC
1097}
1098
bbc8a8dc
ZD
1099/* Adds REF to the chain CHAIN. */
1100
1101static void
1102add_ref_to_chain (chain_p chain, dref ref)
1103{
1104 dref root = get_chain_root (chain);
bbc8a8dc 1105
807e902e
KZ
1106 gcc_assert (wi::les_p (root->offset, ref->offset));
1107 widest_int dist = ref->offset - root->offset;
807e902e 1108 gcc_assert (wi::fits_uhwi_p (dist));
bbc8a8dc 1109
9771b263 1110 chain->refs.safe_push (ref);
bbc8a8dc 1111
27bcd47c 1112 ref->distance = dist.to_uhwi ();
bbc8a8dc
ZD
1113
1114 if (ref->distance >= chain->length)
1115 {
1116 chain->length = ref->distance;
1117 chain->has_max_use_after = false;
1118 }
1119
9cdcebf9
BC
1120 /* Promote this chain to CT_STORE_STORE if it has multiple stores. */
1121 if (DR_IS_WRITE (ref->ref))
1122 chain->type = CT_STORE_STORE;
1123
39637a44
BC
1124 /* Don't set the flag for store-store chain since there is no use. */
1125 if (chain->type != CT_STORE_STORE
1126 && ref->distance == chain->length
bbc8a8dc
ZD
1127 && ref->pos > root->pos)
1128 chain->has_max_use_after = true;
1129
1130 chain->all_always_accessed &= ref->always_accessed;
1131}
1132
1133/* Returns the chain for invariant component COMP. */
1134
1135static chain_p
1136make_invariant_chain (struct component *comp)
1137{
1138 chain_p chain = XCNEW (struct chain);
1139 unsigned i;
1140 dref ref;
1141
1142 chain->type = CT_INVARIANT;
1143
1144 chain->all_always_accessed = true;
1145
9771b263 1146 FOR_EACH_VEC_ELT (comp->refs, i, ref)
bbc8a8dc 1147 {
9771b263 1148 chain->refs.safe_push (ref);
bbc8a8dc
ZD
1149 chain->all_always_accessed &= ref->always_accessed;
1150 }
1151
d9c259ef
BC
1152 chain->inits = vNULL;
1153 chain->finis = vNULL;
1154
bbc8a8dc
ZD
1155 return chain;
1156}
1157
d9c259ef 1158/* Make a new chain of type TYPE rooted at REF. */
bbc8a8dc
ZD
1159
1160static chain_p
d9c259ef 1161make_rooted_chain (dref ref, enum chain_type type)
bbc8a8dc
ZD
1162{
1163 chain_p chain = XCNEW (struct chain);
1164
d9c259ef 1165 chain->type = type;
9771b263 1166 chain->refs.safe_push (ref);
bbc8a8dc 1167 chain->all_always_accessed = ref->always_accessed;
bbc8a8dc
ZD
1168 ref->distance = 0;
1169
d9c259ef
BC
1170 chain->inits = vNULL;
1171 chain->finis = vNULL;
1172
bbc8a8dc
ZD
1173 return chain;
1174}
1175
1176/* Returns true if CHAIN is not trivial. */
1177
1178static bool
1179nontrivial_chain_p (chain_p chain)
1180{
9771b263 1181 return chain != NULL && chain->refs.length () > 1;
bbc8a8dc
ZD
1182}
1183
1184/* Returns the ssa name that contains the value of REF, or NULL_TREE if there
1185 is no such name. */
1186
1187static tree
1188name_for_ref (dref ref)
1189{
1190 tree name;
1191
726a989a 1192 if (is_gimple_assign (ref->stmt))
bbc8a8dc
ZD
1193 {
1194 if (!ref->ref || DR_IS_READ (ref->ref))
726a989a 1195 name = gimple_assign_lhs (ref->stmt);
bbc8a8dc 1196 else
726a989a 1197 name = gimple_assign_rhs1 (ref->stmt);
bbc8a8dc
ZD
1198 }
1199 else
1200 name = PHI_RESULT (ref->stmt);
1201
1202 return (TREE_CODE (name) == SSA_NAME ? name : NULL_TREE);
1203}
1204
1205/* Returns true if REF is a valid initializer for ROOT with given DISTANCE (in
1206 iterations of the innermost enclosing loop). */
1207
1208static bool
1209valid_initializer_p (struct data_reference *ref,
1210 unsigned distance, struct data_reference *root)
1211{
1212 aff_tree diff, base, step;
cc8bea09 1213 poly_widest_int off;
bbc8a8dc 1214
bbc8a8dc
ZD
1215 /* Both REF and ROOT must be accessing the same object. */
1216 if (!operand_equal_p (DR_BASE_ADDRESS (ref), DR_BASE_ADDRESS (root), 0))
1217 return false;
1218
1219 /* The initializer is defined outside of loop, hence its address must be
1220 invariant inside the loop. */
1221 gcc_assert (integer_zerop (DR_STEP (ref)));
1222
1223 /* If the address of the reference is invariant, initializer must access
1224 exactly the same location. */
1225 if (integer_zerop (DR_STEP (root)))
1226 return (operand_equal_p (DR_OFFSET (ref), DR_OFFSET (root), 0)
1227 && operand_equal_p (DR_INIT (ref), DR_INIT (root), 0));
1228
1229 /* Verify that this index of REF is equal to the root's index at
1230 -DISTANCE-th iteration. */
1231 aff_combination_dr_offset (root, &diff);
1232 aff_combination_dr_offset (ref, &base);
807e902e 1233 aff_combination_scale (&base, -1);
bbc8a8dc
ZD
1234 aff_combination_add (&diff, &base);
1235
0d82a1c8
RG
1236 tree_to_aff_combination_expand (DR_STEP (root), TREE_TYPE (DR_STEP (root)),
1237 &step, &name_expansions);
bbc8a8dc
ZD
1238 if (!aff_combination_constant_multiple_p (&diff, &step, &off))
1239 return false;
1240
cc8bea09 1241 if (maybe_ne (off, distance))
bbc8a8dc
ZD
1242 return false;
1243
1244 return true;
1245}
1246
1247/* Finds looparound phi node of LOOP that copies the value of REF, and if its
1248 initial value is correct (equal to initial value of REF shifted by one
1249 iteration), returns the phi node. Otherwise, NULL_TREE is returned. ROOT
1250 is the root of the current chain. */
1251
538dd0b7 1252static gphi *
99b1c316 1253find_looparound_phi (class loop *loop, dref ref, dref root)
bbc8a8dc 1254{
726a989a 1255 tree name, init, init_ref;
538dd0b7 1256 gphi *phi = NULL;
355fe088 1257 gimple *init_stmt;
bbc8a8dc
ZD
1258 edge latch = loop_latch_edge (loop);
1259 struct data_reference init_dr;
538dd0b7 1260 gphi_iterator psi;
bbc8a8dc 1261
726a989a 1262 if (is_gimple_assign (ref->stmt))
bbc8a8dc
ZD
1263 {
1264 if (DR_IS_READ (ref->ref))
726a989a 1265 name = gimple_assign_lhs (ref->stmt);
bbc8a8dc 1266 else
726a989a 1267 name = gimple_assign_rhs1 (ref->stmt);
bbc8a8dc
ZD
1268 }
1269 else
1270 name = PHI_RESULT (ref->stmt);
1271 if (!name)
726a989a 1272 return NULL;
bbc8a8dc 1273
726a989a
RB
1274 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1275 {
538dd0b7 1276 phi = psi.phi ();
726a989a
RB
1277 if (PHI_ARG_DEF_FROM_EDGE (phi, latch) == name)
1278 break;
1279 }
bbc8a8dc 1280
726a989a
RB
1281 if (gsi_end_p (psi))
1282 return NULL;
bbc8a8dc
ZD
1283
1284 init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1285 if (TREE_CODE (init) != SSA_NAME)
726a989a 1286 return NULL;
bbc8a8dc 1287 init_stmt = SSA_NAME_DEF_STMT (init);
726a989a
RB
1288 if (gimple_code (init_stmt) != GIMPLE_ASSIGN)
1289 return NULL;
1290 gcc_assert (gimple_assign_lhs (init_stmt) == init);
bbc8a8dc 1291
726a989a 1292 init_ref = gimple_assign_rhs1 (init_stmt);
bbc8a8dc
ZD
1293 if (!REFERENCE_CLASS_P (init_ref)
1294 && !DECL_P (init_ref))
726a989a 1295 return NULL;
bbc8a8dc
ZD
1296
1297 /* Analyze the behavior of INIT_REF with respect to LOOP (innermost
1298 loop enclosing PHI). */
1299 memset (&init_dr, 0, sizeof (struct data_reference));
1300 DR_REF (&init_dr) = init_ref;
1301 DR_STMT (&init_dr) = phi;
f4ebbd24
DM
1302 if (!dr_analyze_innermost (&DR_INNERMOST (&init_dr), init_ref, loop,
1303 init_stmt))
3661e899 1304 return NULL;
bbc8a8dc
ZD
1305
1306 if (!valid_initializer_p (&init_dr, ref->distance + 1, root->ref))
726a989a 1307 return NULL;
bbc8a8dc
ZD
1308
1309 return phi;
1310}
1311
1312/* Adds a reference for the looparound copy of REF in PHI to CHAIN. */
1313
1314static void
538dd0b7 1315insert_looparound_copy (chain_p chain, dref ref, gphi *phi)
bbc8a8dc 1316{
99b1c316 1317 dref nw = XCNEW (class dref_d), aref;
bbc8a8dc
ZD
1318 unsigned i;
1319
1320 nw->stmt = phi;
1321 nw->distance = ref->distance + 1;
1322 nw->always_accessed = 1;
1323
9771b263 1324 FOR_EACH_VEC_ELT (chain->refs, i, aref)
bbc8a8dc
ZD
1325 if (aref->distance >= nw->distance)
1326 break;
9771b263 1327 chain->refs.safe_insert (i, nw);
bbc8a8dc
ZD
1328
1329 if (nw->distance > chain->length)
1330 {
1331 chain->length = nw->distance;
1332 chain->has_max_use_after = false;
1333 }
1334}
1335
1336/* For references in CHAIN that are copied around the LOOP (created previously
1337 by PRE, or by user), add the results of such copies to the chain. This
1338 enables us to remove the copies by unrolling, and may need less registers
1339 (also, it may allow us to combine chains together). */
1340
1341static void
99b1c316 1342add_looparound_copies (class loop *loop, chain_p chain)
bbc8a8dc
ZD
1343{
1344 unsigned i;
1345 dref ref, root = get_chain_root (chain);
538dd0b7 1346 gphi *phi;
bbc8a8dc 1347
d9c259ef
BC
1348 if (chain->type == CT_STORE_STORE)
1349 return;
1350
9771b263 1351 FOR_EACH_VEC_ELT (chain->refs, i, ref)
bbc8a8dc
ZD
1352 {
1353 phi = find_looparound_phi (loop, ref, root);
1354 if (!phi)
1355 continue;
1356
1357 bitmap_set_bit (looparound_phis, SSA_NAME_VERSION (PHI_RESULT (phi)));
1358 insert_looparound_copy (chain, ref, phi);
1359 }
1360}
1361
1362/* Find roots of the values and determine distances in the component COMP.
1363 The references are redistributed into CHAINS. LOOP is the current
1364 loop. */
1365
1366static void
99b1c316 1367determine_roots_comp (class loop *loop,
bbc8a8dc 1368 struct component *comp,
9771b263 1369 vec<chain_p> *chains)
bbc8a8dc
ZD
1370{
1371 unsigned i;
1372 dref a;
1373 chain_p chain = NULL;
807e902e 1374 widest_int last_ofs = 0;
d9c259ef 1375 enum chain_type type;
bbc8a8dc
ZD
1376
1377 /* Invariants are handled specially. */
1378 if (comp->comp_step == RS_INVARIANT)
1379 {
1380 chain = make_invariant_chain (comp);
9771b263 1381 chains->safe_push (chain);
bbc8a8dc
ZD
1382 return;
1383 }
1384
67bb451d
BC
1385 /* Trivial component. */
1386 if (comp->refs.length () <= 1)
df3c2945
BC
1387 {
1388 if (comp->refs.length () == 1)
1389 {
1390 free (comp->refs[0]);
1391 comp->refs.truncate (0);
1392 }
1393 return;
1394 }
bbc8a8dc 1395
67bb451d 1396 comp->refs.qsort (order_drefs);
9cdcebf9
BC
1397
1398 /* For Store-Store chain, we only support load if it is dominated by a
1399 store statement in the same iteration of loop. */
1400 if (comp->eliminate_store_p)
1401 for (a = NULL, i = 0; i < comp->refs.length (); i++)
1402 {
1403 if (DR_IS_WRITE (comp->refs[i]->ref))
1404 a = comp->refs[i];
1405 else if (a == NULL || a->offset != comp->refs[i]->offset)
1406 {
1407 /* If there is load that is not dominated by a store in the
1408 same iteration of loop, clear the flag so no Store-Store
1409 chain is generated for this component. */
1410 comp->eliminate_store_p = false;
1411 break;
1412 }
1413 }
1414
1415 /* Determine roots and create chains for components. */
9771b263 1416 FOR_EACH_VEC_ELT (comp->refs, i, a)
bbc8a8dc 1417 {
d9c259ef 1418 if (!chain
9cdcebf9 1419 || (chain->type == CT_LOAD && DR_IS_WRITE (a->ref))
d9c259ef 1420 || (!comp->eliminate_store_p && DR_IS_WRITE (a->ref))
807e902e 1421 || wi::leu_p (MAX_DISTANCE, a->offset - last_ofs))
bbc8a8dc
ZD
1422 {
1423 if (nontrivial_chain_p (chain))
b61b1f17
MM
1424 {
1425 add_looparound_copies (loop, chain);
9771b263 1426 chains->safe_push (chain);
b61b1f17 1427 }
bbc8a8dc
ZD
1428 else
1429 release_chain (chain);
d9c259ef 1430
9cdcebf9
BC
1431 /* Determine type of the chain. If the root reference is a load,
1432 this can only be a CT_LOAD chain; other chains are intialized
1433 to CT_STORE_LOAD and might be promoted to CT_STORE_STORE when
1434 new reference is added. */
1435 type = DR_IS_READ (a->ref) ? CT_LOAD : CT_STORE_LOAD;
d9c259ef 1436 chain = make_rooted_chain (a, type);
b61b1f17 1437 last_ofs = a->offset;
bbc8a8dc
ZD
1438 continue;
1439 }
1440
1441 add_ref_to_chain (chain, a);
1442 }
1443
1444 if (nontrivial_chain_p (chain))
1445 {
1446 add_looparound_copies (loop, chain);
9771b263 1447 chains->safe_push (chain);
bbc8a8dc
ZD
1448 }
1449 else
1450 release_chain (chain);
1451}
1452
1453/* Find roots of the values and determine distances in components COMPS, and
1454 separates the references to CHAINS. LOOP is the current loop. */
1455
1456static void
99b1c316 1457determine_roots (class loop *loop,
9771b263 1458 struct component *comps, vec<chain_p> *chains)
bbc8a8dc
ZD
1459{
1460 struct component *comp;
1461
1462 for (comp = comps; comp; comp = comp->next)
1463 determine_roots_comp (loop, comp, chains);
1464}
1465
1466/* Replace the reference in statement STMT with temporary variable
82d6e6fc 1467 NEW_TREE. If SET is true, NEW_TREE is instead initialized to the value of
bbc8a8dc
ZD
1468 the reference in the statement. IN_LHS is true if the reference
1469 is in the lhs of STMT, false if it is in rhs. */
1470
1471static void
355fe088 1472replace_ref_with (gimple *stmt, tree new_tree, bool set, bool in_lhs)
bbc8a8dc 1473{
726a989a 1474 tree val;
538dd0b7 1475 gassign *new_stmt;
726a989a 1476 gimple_stmt_iterator bsi, psi;
bbc8a8dc 1477
726a989a 1478 if (gimple_code (stmt) == GIMPLE_PHI)
bbc8a8dc
ZD
1479 {
1480 gcc_assert (!in_lhs && !set);
1481
1482 val = PHI_RESULT (stmt);
726a989a
RB
1483 bsi = gsi_after_labels (gimple_bb (stmt));
1484 psi = gsi_for_stmt (stmt);
1485 remove_phi_node (&psi, false);
bbc8a8dc 1486
726a989a 1487 /* Turn the phi node into GIMPLE_ASSIGN. */
82d6e6fc 1488 new_stmt = gimple_build_assign (val, new_tree);
726a989a 1489 gsi_insert_before (&bsi, new_stmt, GSI_NEW_STMT);
bbc8a8dc
ZD
1490 return;
1491 }
b8698a0f 1492
bbc8a8dc
ZD
1493 /* Since the reference is of gimple_reg type, it should only
1494 appear as lhs or rhs of modify statement. */
726a989a
RB
1495 gcc_assert (is_gimple_assign (stmt));
1496
1497 bsi = gsi_for_stmt (stmt);
bbc8a8dc 1498
82d6e6fc 1499 /* If we do not need to initialize NEW_TREE, just replace the use of OLD. */
bbc8a8dc
ZD
1500 if (!set)
1501 {
1502 gcc_assert (!in_lhs);
82d6e6fc 1503 gimple_assign_set_rhs_from_tree (&bsi, new_tree);
726a989a 1504 stmt = gsi_stmt (bsi);
bbc8a8dc
ZD
1505 update_stmt (stmt);
1506 return;
1507 }
1508
bbc8a8dc
ZD
1509 if (in_lhs)
1510 {
726a989a 1511 /* We have statement
b8698a0f 1512
726a989a 1513 OLD = VAL
bbc8a8dc 1514
726a989a
RB
1515 If OLD is a memory reference, then VAL is gimple_val, and we transform
1516 this to
bbc8a8dc
ZD
1517
1518 OLD = VAL
1519 NEW = VAL
1520
b8698a0f 1521 Otherwise, we are replacing a combination chain,
726a989a
RB
1522 VAL is the expression that performs the combination, and OLD is an
1523 SSA name. In this case, we transform the assignment to
1524
1525 OLD = VAL
1526 NEW = OLD
1527
1528 */
1529
1530 val = gimple_assign_lhs (stmt);
1531 if (TREE_CODE (val) != SSA_NAME)
1532 {
726a989a 1533 val = gimple_assign_rhs1 (stmt);
7e8b1c4b
JJ
1534 gcc_assert (gimple_assign_single_p (stmt));
1535 if (TREE_CLOBBER_P (val))
32244553 1536 val = get_or_create_ssa_default_def (cfun, SSA_NAME_VAR (new_tree));
7e8b1c4b
JJ
1537 else
1538 gcc_assert (gimple_assign_copy_p (stmt));
726a989a 1539 }
bbc8a8dc
ZD
1540 }
1541 else
1542 {
bbc8a8dc
ZD
1543 /* VAL = OLD
1544
1545 is transformed to
1546
1547 VAL = OLD
1548 NEW = VAL */
726a989a
RB
1549
1550 val = gimple_assign_lhs (stmt);
bbc8a8dc
ZD
1551 }
1552
82d6e6fc 1553 new_stmt = gimple_build_assign (new_tree, unshare_expr (val));
726a989a 1554 gsi_insert_after (&bsi, new_stmt, GSI_NEW_STMT);
bbc8a8dc
ZD
1555}
1556
3c62a7fb
BC
1557/* Returns a memory reference to DR in the (NITERS + ITER)-th iteration
1558 of the loop it was analyzed in. Append init stmts to STMTS. */
9f2b860b 1559
b1ad9be2 1560static tree
3c62a7fb
BC
1561ref_at_iteration (data_reference_p dr, int iter,
1562 gimple_seq *stmts, tree niters = NULL_TREE)
9f2b860b
RB
1563{
1564 tree off = DR_OFFSET (dr);
1565 tree coff = DR_INIT (dr);
b1ad9be2
BE
1566 tree ref = DR_REF (dr);
1567 enum tree_code ref_code = ERROR_MARK;
1568 tree ref_type = NULL_TREE;
1569 tree ref_op1 = NULL_TREE;
1570 tree ref_op2 = NULL_TREE;
3c62a7fb
BC
1571 tree new_offset;
1572
1573 if (iter != 0)
1574 {
1575 new_offset = size_binop (MULT_EXPR, DR_STEP (dr), ssize_int (iter));
1576 if (TREE_CODE (new_offset) == INTEGER_CST)
1577 coff = size_binop (PLUS_EXPR, coff, new_offset);
1578 else
1579 off = size_binop (PLUS_EXPR, off, new_offset);
1580 }
1581
1582 if (niters != NULL_TREE)
1583 {
1584 niters = fold_convert (ssizetype, niters);
1585 new_offset = size_binop (MULT_EXPR, DR_STEP (dr), niters);
1586 if (TREE_CODE (niters) == INTEGER_CST)
1587 coff = size_binop (PLUS_EXPR, coff, new_offset);
1588 else
1589 off = size_binop (PLUS_EXPR, off, new_offset);
1590 }
1591
cb3d1e3e
RB
1592 /* While data-ref analysis punts on bit offsets it still handles
1593 bitfield accesses at byte boundaries. Cope with that. Note that
b1ad9be2
BE
1594 if the bitfield object also starts at a byte-boundary we can simply
1595 replicate the COMPONENT_REF, but we have to subtract the component's
1596 byte-offset from the MEM_REF address first.
1597 Otherwise we simply build a BIT_FIELD_REF knowing that the bits
cb3d1e3e 1598 start at offset zero. */
b1ad9be2
BE
1599 if (TREE_CODE (ref) == COMPONENT_REF
1600 && DECL_BIT_FIELD (TREE_OPERAND (ref, 1)))
cb3d1e3e 1601 {
b1ad9be2
BE
1602 unsigned HOST_WIDE_INT boff;
1603 tree field = TREE_OPERAND (ref, 1);
1604 tree offset = component_ref_field_offset (ref);
1605 ref_type = TREE_TYPE (ref);
1606 boff = tree_to_uhwi (DECL_FIELD_BIT_OFFSET (field));
1607 /* This can occur in Ada. See the comment in get_bit_range. */
1608 if (boff % BITS_PER_UNIT != 0
1609 || !tree_fits_uhwi_p (offset))
1610 {
1611 ref_code = BIT_FIELD_REF;
1612 ref_op1 = DECL_SIZE (field);
1613 ref_op2 = bitsize_zero_node;
1614 }
1615 else
1616 {
1617 boff >>= LOG2_BITS_PER_UNIT;
1618 boff += tree_to_uhwi (offset);
1619 coff = size_binop (MINUS_EXPR, coff, ssize_int (boff));
1620 ref_code = COMPONENT_REF;
1621 ref_op1 = field;
1622 ref_op2 = TREE_OPERAND (ref, 2);
1623 ref = TREE_OPERAND (ref, 0);
1624 }
cb3d1e3e 1625 }
b1ad9be2
BE
1626 tree addr = fold_build_pointer_plus (DR_BASE_ADDRESS (dr), off);
1627 addr = force_gimple_operand_1 (unshare_expr (addr), stmts,
1628 is_gimple_mem_ref_addr, NULL_TREE);
1629 tree alias_ptr = fold_convert (reference_alias_ptr_type (ref), coff);
1630 tree type = build_aligned_type (TREE_TYPE (ref),
1631 get_object_alignment (ref));
1632 ref = build2 (MEM_REF, type, addr, alias_ptr);
1633 if (ref_type)
1634 ref = build3 (ref_code, ref_type, ref, ref_op1, ref_op2);
1635 return ref;
bbc8a8dc
ZD
1636}
1637
1638/* Get the initialization expression for the INDEX-th temporary variable
1639 of CHAIN. */
1640
1641static tree
1642get_init_expr (chain_p chain, unsigned index)
1643{
1644 if (chain->type == CT_COMBINATION)
1645 {
1646 tree e1 = get_init_expr (chain->ch1, index);
1647 tree e2 = get_init_expr (chain->ch2, index);
1648
82d6e6fc 1649 return fold_build2 (chain->op, chain->rslt_type, e1, e2);
bbc8a8dc
ZD
1650 }
1651 else
9771b263 1652 return chain->inits[index];
bbc8a8dc
ZD
1653}
1654
2664efb6
ZD
1655/* Returns a new temporary variable used for the I-th variable carrying
1656 value of REF. The variable's uid is marked in TMP_VARS. */
1657
1658static tree
1659predcom_tmp_var (tree ref, unsigned i, bitmap tmp_vars)
1660{
1661 tree type = TREE_TYPE (ref);
2664efb6
ZD
1662 /* We never access the components of the temporary variable in predictive
1663 commoning. */
acd63801 1664 tree var = create_tmp_reg (type, get_lsm_tmp_name (ref, i));
2664efb6
ZD
1665 bitmap_set_bit (tmp_vars, DECL_UID (var));
1666 return var;
1667}
1668
bbc8a8dc
ZD
1669/* Creates the variables for CHAIN, as well as phi nodes for them and
1670 initialization on entry to LOOP. Uids of the newly created
1671 temporary variables are marked in TMP_VARS. */
1672
1673static void
99b1c316 1674initialize_root_vars (class loop *loop, chain_p chain, bitmap tmp_vars)
bbc8a8dc
ZD
1675{
1676 unsigned i;
1677 unsigned n = chain->length;
1678 dref root = get_chain_root (chain);
1679 bool reuse_first = !chain->has_max_use_after;
726a989a 1680 tree ref, init, var, next;
538dd0b7 1681 gphi *phi;
726a989a 1682 gimple_seq stmts;
bbc8a8dc
ZD
1683 edge entry = loop_preheader_edge (loop), latch = loop_latch_edge (loop);
1684
1685 /* If N == 0, then all the references are within the single iteration. And
1686 since this is an nonempty chain, reuse_first cannot be true. */
1687 gcc_assert (n > 0 || !reuse_first);
1688
9771b263 1689 chain->vars.create (n + 1);
bbc8a8dc
ZD
1690
1691 if (chain->type == CT_COMBINATION)
726a989a 1692 ref = gimple_assign_lhs (root->stmt);
bbc8a8dc
ZD
1693 else
1694 ref = DR_REF (root->ref);
1695
1696 for (i = 0; i < n + (reuse_first ? 0 : 1); i++)
1697 {
2664efb6 1698 var = predcom_tmp_var (ref, i, tmp_vars);
9771b263 1699 chain->vars.quick_push (var);
bbc8a8dc
ZD
1700 }
1701 if (reuse_first)
9771b263 1702 chain->vars.quick_push (chain->vars[0]);
b8698a0f 1703
9771b263 1704 FOR_EACH_VEC_ELT (chain->vars, i, var)
b731b390 1705 chain->vars[i] = make_ssa_name (var);
bbc8a8dc
ZD
1706
1707 for (i = 0; i < n; i++)
1708 {
9771b263
DN
1709 var = chain->vars[i];
1710 next = chain->vars[i + 1];
bbc8a8dc
ZD
1711 init = get_init_expr (chain, i);
1712
1713 init = force_gimple_operand (init, &stmts, true, NULL_TREE);
1714 if (stmts)
5006671f 1715 gsi_insert_seq_on_edge_immediate (entry, stmts);
bbc8a8dc
ZD
1716
1717 phi = create_phi_node (var, loop->header);
9e227d60
DC
1718 add_phi_arg (phi, init, entry, UNKNOWN_LOCATION);
1719 add_phi_arg (phi, next, latch, UNKNOWN_LOCATION);
bbc8a8dc
ZD
1720 }
1721}
1722
39ff50ce
BC
1723/* For inter-iteration store elimination CHAIN in LOOP, returns true if
1724 all stores to be eliminated store loop invariant values into memory.
1725 In this case, we can use these invariant values directly after LOOP. */
1726
1727static bool
99b1c316 1728is_inv_store_elimination_chain (class loop *loop, chain_p chain)
39ff50ce
BC
1729{
1730 if (chain->length == 0 || chain->type != CT_STORE_STORE)
1731 return false;
1732
1733 gcc_assert (!chain->has_max_use_after);
1734
02fac244 1735 /* If loop iterates for unknown times or fewer times than chain->length,
39ff50ce
BC
1736 we still need to setup root variable and propagate it with PHI node. */
1737 tree niters = number_of_latch_executions (loop);
8e6cdc90
RS
1738 if (TREE_CODE (niters) != INTEGER_CST
1739 || wi::leu_p (wi::to_wide (niters), chain->length))
39ff50ce
BC
1740 return false;
1741
1742 /* Check stores in chain for elimination if they only store loop invariant
1743 values. */
1744 for (unsigned i = 0; i < chain->length; i++)
1745 {
9cdcebf9 1746 dref a = get_chain_last_write_at (chain, i);
39ff50ce
BC
1747 if (a == NULL)
1748 continue;
1749
1750 gimple *def_stmt, *stmt = a->stmt;
1751 if (!gimple_assign_single_p (stmt))
1752 return false;
1753
1754 tree val = gimple_assign_rhs1 (stmt);
1755 if (TREE_CLOBBER_P (val))
1756 return false;
1757
1758 if (CONSTANT_CLASS_P (val))
1759 continue;
1760
1761 if (TREE_CODE (val) != SSA_NAME)
1762 return false;
1763
1764 def_stmt = SSA_NAME_DEF_STMT (val);
1765 if (gimple_nop_p (def_stmt))
1766 continue;
1767
1768 if (flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
1769 return false;
1770 }
1771 return true;
1772}
1773
1774/* Creates root variables for store elimination CHAIN in which stores for
1775 elimination only store loop invariant values. In this case, we neither
1776 need to load root variables before loop nor propagate it with PHI nodes. */
1777
1778static void
1779initialize_root_vars_store_elim_1 (chain_p chain)
1780{
1781 tree var;
1782 unsigned i, n = chain->length;
1783
1784 chain->vars.create (n);
cb3874dc 1785 chain->vars.safe_grow_cleared (n, true);
39ff50ce
BC
1786
1787 /* Initialize root value for eliminated stores at each distance. */
1788 for (i = 0; i < n; i++)
1789 {
9cdcebf9 1790 dref a = get_chain_last_write_at (chain, i);
39ff50ce
BC
1791 if (a == NULL)
1792 continue;
1793
1794 var = gimple_assign_rhs1 (a->stmt);
1795 chain->vars[a->distance] = var;
1796 }
1797
1798 /* We don't propagate values with PHI nodes, so manually propagate value
1799 to bubble positions. */
1800 var = chain->vars[0];
1801 for (i = 1; i < n; i++)
1802 {
1803 if (chain->vars[i] != NULL_TREE)
1804 {
1805 var = chain->vars[i];
1806 continue;
1807 }
1808 chain->vars[i] = var;
1809 }
1810
1811 /* Revert the vector. */
1812 for (i = 0; i < n / 2; i++)
1813 std::swap (chain->vars[i], chain->vars[n - i - 1]);
1814}
1815
d9c259ef
BC
1816/* Creates root variables for store elimination CHAIN in which stores for
1817 elimination store loop variant values. In this case, we may need to
1818 load root variables before LOOP and propagate it with PHI nodes. Uids
1819 of the newly created root variables are marked in TMP_VARS. */
1820
1821static void
99b1c316 1822initialize_root_vars_store_elim_2 (class loop *loop,
d9c259ef
BC
1823 chain_p chain, bitmap tmp_vars)
1824{
1825 unsigned i, n = chain->length;
1826 tree ref, init, var, next, val, phi_result;
1827 gimple *stmt;
1828 gimple_seq stmts;
1829
1830 chain->vars.create (n);
1831
1832 ref = DR_REF (get_chain_root (chain)->ref);
1833 for (i = 0; i < n; i++)
1834 {
1835 var = predcom_tmp_var (ref, i, tmp_vars);
1836 chain->vars.quick_push (var);
1837 }
1838
1839 FOR_EACH_VEC_ELT (chain->vars, i, var)
1840 chain->vars[i] = make_ssa_name (var);
1841
1842 /* Root values are either rhs operand of stores to be eliminated, or
1843 loaded from memory before loop. */
1844 auto_vec<tree> vtemps;
cb3874dc 1845 vtemps.safe_grow_cleared (n, true);
d9c259ef
BC
1846 for (i = 0; i < n; i++)
1847 {
1848 init = get_init_expr (chain, i);
1849 if (init == NULL_TREE)
1850 {
1851 /* Root value is rhs operand of the store to be eliminated if
1852 it isn't loaded from memory before loop. */
9cdcebf9 1853 dref a = get_chain_last_write_at (chain, i);
d9c259ef
BC
1854 val = gimple_assign_rhs1 (a->stmt);
1855 if (TREE_CLOBBER_P (val))
9cdcebf9
BC
1856 {
1857 val = get_or_create_ssa_default_def (cfun, SSA_NAME_VAR (var));
1858 gimple_assign_set_rhs1 (a->stmt, val);
1859 }
d9c259ef
BC
1860
1861 vtemps[n - i - 1] = val;
1862 }
1863 else
1864 {
1865 edge latch = loop_latch_edge (loop);
1866 edge entry = loop_preheader_edge (loop);
1867
1868 /* Root value is loaded from memory before loop, we also need
1869 to add PHI nodes to propagate the value across iterations. */
1870 init = force_gimple_operand (init, &stmts, true, NULL_TREE);
1871 if (stmts)
1872 gsi_insert_seq_on_edge_immediate (entry, stmts);
1873
1874 next = chain->vars[n - i];
1875 phi_result = copy_ssa_name (next);
1876 gphi *phi = create_phi_node (phi_result, loop->header);
1877 add_phi_arg (phi, init, entry, UNKNOWN_LOCATION);
1878 add_phi_arg (phi, next, latch, UNKNOWN_LOCATION);
1879 vtemps[n - i - 1] = phi_result;
1880 }
1881 }
1882
1883 /* Find the insertion position. */
1884 dref last = get_chain_root (chain);
1885 for (i = 0; i < chain->refs.length (); i++)
1886 {
1887 if (chain->refs[i]->pos > last->pos)
1888 last = chain->refs[i];
1889 }
1890
1891 gimple_stmt_iterator gsi = gsi_for_stmt (last->stmt);
1892
1893 /* Insert statements copying root value to root variable. */
1894 for (i = 0; i < n; i++)
1895 {
1896 var = chain->vars[i];
1897 val = vtemps[i];
1898 stmt = gimple_build_assign (var, val);
1899 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1900 }
1901}
1902
1903/* Generates stores for CHAIN's eliminated stores in LOOP's last
1904 (CHAIN->length - 1) iterations. */
1905
1906static void
99b1c316 1907finalize_eliminated_stores (class loop *loop, chain_p chain)
d9c259ef
BC
1908{
1909 unsigned i, n = chain->length;
1910
1911 for (i = 0; i < n; i++)
1912 {
1913 tree var = chain->vars[i];
1914 tree fini = chain->finis[n - i - 1];
1915 gimple *stmt = gimple_build_assign (fini, var);
1916
1917 gimple_seq_add_stmt_without_update (&chain->fini_seq, stmt);
1918 }
1919
1920 if (chain->fini_seq)
1921 {
1922 gsi_insert_seq_on_edge_immediate (single_exit (loop), chain->fini_seq);
1923 chain->fini_seq = NULL;
1924 }
1925}
1926
bbc8a8dc
ZD
1927/* Initializes a variable for load motion for ROOT and prepares phi nodes and
1928 initialization on entry to LOOP if necessary. The ssa name for the variable
1929 is stored in VARS. If WRITTEN is true, also a phi node to copy its value
1930 around the loop is created. Uid of the newly created temporary variable
1931 is marked in TMP_VARS. INITS is the list containing the (single)
1932 initializer. */
1933
1934static void
99b1c316 1935initialize_root_vars_lm (class loop *loop, dref root, bool written,
9771b263 1936 vec<tree> *vars, vec<tree> inits,
bbc8a8dc
ZD
1937 bitmap tmp_vars)
1938{
1939 unsigned i;
726a989a
RB
1940 tree ref = DR_REF (root->ref), init, var, next;
1941 gimple_seq stmts;
538dd0b7 1942 gphi *phi;
bbc8a8dc
ZD
1943 edge entry = loop_preheader_edge (loop), latch = loop_latch_edge (loop);
1944
1945 /* Find the initializer for the variable, and check that it cannot
1946 trap. */
9771b263 1947 init = inits[0];
bbc8a8dc 1948
9771b263 1949 vars->create (written ? 2 : 1);
2664efb6 1950 var = predcom_tmp_var (ref, 0, tmp_vars);
9771b263 1951 vars->quick_push (var);
bbc8a8dc 1952 if (written)
9771b263 1953 vars->quick_push ((*vars)[0]);
b8698a0f 1954
9771b263 1955 FOR_EACH_VEC_ELT (*vars, i, var)
b731b390 1956 (*vars)[i] = make_ssa_name (var);
bbc8a8dc 1957
9771b263 1958 var = (*vars)[0];
b8698a0f 1959
bbc8a8dc
ZD
1960 init = force_gimple_operand (init, &stmts, written, NULL_TREE);
1961 if (stmts)
5006671f 1962 gsi_insert_seq_on_edge_immediate (entry, stmts);
bbc8a8dc
ZD
1963
1964 if (written)
1965 {
9771b263 1966 next = (*vars)[1];
bbc8a8dc 1967 phi = create_phi_node (var, loop->header);
9e227d60
DC
1968 add_phi_arg (phi, init, entry, UNKNOWN_LOCATION);
1969 add_phi_arg (phi, next, latch, UNKNOWN_LOCATION);
bbc8a8dc
ZD
1970 }
1971 else
1972 {
538dd0b7 1973 gassign *init_stmt = gimple_build_assign (var, init);
726a989a 1974 gsi_insert_on_edge_immediate (entry, init_stmt);
bbc8a8dc
ZD
1975 }
1976}
1977
1978
1979/* Execute load motion for references in chain CHAIN. Uids of the newly
1980 created temporary variables are marked in TMP_VARS. */
1981
1982static void
99b1c316 1983execute_load_motion (class loop *loop, chain_p chain, bitmap tmp_vars)
bbc8a8dc 1984{
ef062b13 1985 auto_vec<tree> vars;
bbc8a8dc
ZD
1986 dref a;
1987 unsigned n_writes = 0, ridx, i;
1988 tree var;
1989
1990 gcc_assert (chain->type == CT_INVARIANT);
1991 gcc_assert (!chain->combined);
9771b263 1992 FOR_EACH_VEC_ELT (chain->refs, i, a)
b0af49c4 1993 if (DR_IS_WRITE (a->ref))
bbc8a8dc 1994 n_writes++;
b8698a0f 1995
bbc8a8dc 1996 /* If there are no reads in the loop, there is nothing to do. */
9771b263 1997 if (n_writes == chain->refs.length ())
bbc8a8dc
ZD
1998 return;
1999
2000 initialize_root_vars_lm (loop, get_chain_root (chain), n_writes > 0,
2001 &vars, chain->inits, tmp_vars);
2002
2003 ridx = 0;
9771b263 2004 FOR_EACH_VEC_ELT (chain->refs, i, a)
bbc8a8dc
ZD
2005 {
2006 bool is_read = DR_IS_READ (a->ref);
bbc8a8dc 2007
b0af49c4 2008 if (DR_IS_WRITE (a->ref))
bbc8a8dc
ZD
2009 {
2010 n_writes--;
2011 if (n_writes)
2012 {
9771b263 2013 var = vars[0];
b731b390 2014 var = make_ssa_name (SSA_NAME_VAR (var));
9771b263 2015 vars[0] = var;
bbc8a8dc
ZD
2016 }
2017 else
2018 ridx = 1;
2019 }
b8698a0f 2020
9771b263 2021 replace_ref_with (a->stmt, vars[ridx],
bbc8a8dc
ZD
2022 !is_read, !is_read);
2023 }
bbc8a8dc
ZD
2024}
2025
2026/* Returns the single statement in that NAME is used, excepting
2027 the looparound phi nodes contained in one of the chains. If there is no
726a989a 2028 such statement, or more statements, NULL is returned. */
bbc8a8dc 2029
355fe088 2030static gimple *
bbc8a8dc
ZD
2031single_nonlooparound_use (tree name)
2032{
2033 use_operand_p use;
2034 imm_use_iterator it;
355fe088 2035 gimple *stmt, *ret = NULL;
bbc8a8dc
ZD
2036
2037 FOR_EACH_IMM_USE_FAST (use, it, name)
2038 {
2039 stmt = USE_STMT (use);
2040
726a989a 2041 if (gimple_code (stmt) == GIMPLE_PHI)
bbc8a8dc
ZD
2042 {
2043 /* Ignore uses in looparound phi nodes. Uses in other phi nodes
2044 could not be processed anyway, so just fail for them. */
2045 if (bitmap_bit_p (looparound_phis,
2046 SSA_NAME_VERSION (PHI_RESULT (stmt))))
2047 continue;
2048
726a989a 2049 return NULL;
bbc8a8dc 2050 }
b63f974e
JJ
2051 else if (is_gimple_debug (stmt))
2052 continue;
726a989a
RB
2053 else if (ret != NULL)
2054 return NULL;
bbc8a8dc
ZD
2055 else
2056 ret = stmt;
2057 }
2058
2059 return ret;
2060}
2061
2062/* Remove statement STMT, as well as the chain of assignments in that it is
2063 used. */
2064
2065static void
355fe088 2066remove_stmt (gimple *stmt)
bbc8a8dc 2067{
726a989a 2068 tree name;
355fe088 2069 gimple *next;
726a989a 2070 gimple_stmt_iterator psi;
bbc8a8dc 2071
726a989a 2072 if (gimple_code (stmt) == GIMPLE_PHI)
bbc8a8dc
ZD
2073 {
2074 name = PHI_RESULT (stmt);
2075 next = single_nonlooparound_use (name);
273ccb6d 2076 reset_debug_uses (stmt);
726a989a
RB
2077 psi = gsi_for_stmt (stmt);
2078 remove_phi_node (&psi, true);
bbc8a8dc
ZD
2079
2080 if (!next
5f8ecf45 2081 || !gimple_assign_ssa_name_copy_p (next)
726a989a 2082 || gimple_assign_rhs1 (next) != name)
bbc8a8dc
ZD
2083 return;
2084
2085 stmt = next;
2086 }
2087
2088 while (1)
2089 {
726a989a 2090 gimple_stmt_iterator bsi;
b8698a0f 2091
726a989a 2092 bsi = gsi_for_stmt (stmt);
bbc8a8dc 2093
726a989a 2094 name = gimple_assign_lhs (stmt);
d9c259ef
BC
2095 if (TREE_CODE (name) == SSA_NAME)
2096 {
2097 next = single_nonlooparound_use (name);
2098 reset_debug_uses (stmt);
2099 }
2100 else
2101 {
2102 /* This is a store to be eliminated. */
2103 gcc_assert (gimple_vdef (stmt) != NULL);
2104 next = NULL;
2105 }
bbc8a8dc 2106
13714310 2107 unlink_stmt_vdef (stmt);
726a989a 2108 gsi_remove (&bsi, true);
5f8ecf45 2109 release_defs (stmt);
bbc8a8dc
ZD
2110
2111 if (!next
5f8ecf45 2112 || !gimple_assign_ssa_name_copy_p (next)
726a989a 2113 || gimple_assign_rhs1 (next) != name)
bbc8a8dc
ZD
2114 return;
2115
2116 stmt = next;
2117 }
2118}
2119
2120/* Perform the predictive commoning optimization for a chain CHAIN.
2121 Uids of the newly created temporary variables are marked in TMP_VARS.*/
2122
2123static void
99b1c316 2124execute_pred_commoning_chain (class loop *loop, chain_p chain,
d9c259ef 2125 bitmap tmp_vars)
bbc8a8dc 2126{
9cdcebf9 2127 unsigned i;
13714310 2128 dref a;
bbc8a8dc 2129 tree var;
10dfaf9a 2130 bool in_lhs;
bbc8a8dc
ZD
2131
2132 if (chain->combined)
2133 {
2134 /* For combined chains, just remove the statements that are used to
a933d47f
RB
2135 compute the values of the expression (except for the root one).
2136 We delay this until after all chains are processed. */
bbc8a8dc 2137 }
d9c259ef
BC
2138 else if (chain->type == CT_STORE_STORE)
2139 {
2140 if (chain->length > 0)
2141 {
39ff50ce
BC
2142 if (chain->inv_store_elimination)
2143 {
2144 /* If dead stores in this chain only store loop invariant
2145 values, we can simply record the invariant value and use
2146 it directly after loop. */
2147 initialize_root_vars_store_elim_1 (chain);
2148 }
2149 else
2150 {
2151 /* If dead stores in this chain store loop variant values,
2152 we need to set up the variables by loading from memory
2153 before loop and propagating it with PHI nodes. */
2154 initialize_root_vars_store_elim_2 (loop, chain, tmp_vars);
2155 }
d9c259ef
BC
2156
2157 /* For inter-iteration store elimination chain, stores at each
2158 distance in loop's last (chain->length - 1) iterations can't
2159 be eliminated, because there is no following killing store.
2160 We need to generate these stores after loop. */
2161 finalize_eliminated_stores (loop, chain);
2162 }
2163
9cdcebf9
BC
2164 bool last_store_p = true;
2165 for (i = chain->refs.length (); i > 0; i--)
2166 {
2167 a = chain->refs[i - 1];
2168 /* Preserve the last store of the chain. Eliminate other stores
2169 which are killed by the last one. */
2170 if (DR_IS_WRITE (a->ref))
2171 {
2172 if (last_store_p)
2173 last_store_p = false;
2174 else
2175 remove_stmt (a->stmt);
2176
2177 continue;
2178 }
2179
2180 /* Any load in Store-Store chain must be dominated by a previous
2181 store, we replace the load reference with rhs of the store. */
2182 dref b = get_chain_last_write_before_load (chain, i - 1);
2183 gcc_assert (b != NULL);
2184 var = gimple_assign_rhs1 (b->stmt);
2185 replace_ref_with (a->stmt, var, false, false);
2186 }
d9c259ef 2187 }
bbc8a8dc
ZD
2188 else
2189 {
10dfaf9a
BC
2190 /* For non-combined chains, set up the variables that hold its value. */
2191 initialize_root_vars (loop, chain, tmp_vars);
2192 a = get_chain_root (chain);
2193 in_lhs = (chain->type == CT_STORE_LOAD
2194 || chain->type == CT_COMBINATION);
2195 replace_ref_with (a->stmt, chain->vars[chain->length], true, in_lhs);
2196
2197 /* Replace the uses of the original references by these variables. */
9771b263 2198 for (i = 1; chain->refs.iterate (i, &a); i++)
bbc8a8dc 2199 {
9771b263 2200 var = chain->vars[chain->length - a->distance];
bbc8a8dc
ZD
2201 replace_ref_with (a->stmt, var, false, false);
2202 }
2203 }
2204}
2205
2206/* Determines the unroll factor necessary to remove as many temporary variable
2207 copies as possible. CHAINS is the list of chains that will be
2208 optimized. */
2209
2210static unsigned
9771b263 2211determine_unroll_factor (vec<chain_p> chains)
bbc8a8dc
ZD
2212{
2213 chain_p chain;
2214 unsigned factor = 1, af, nfactor, i;
028d4092 2215 unsigned max = param_max_unroll_times;
bbc8a8dc 2216
9771b263 2217 FOR_EACH_VEC_ELT (chains, i, chain)
bbc8a8dc 2218 {
8039a35d 2219 if (chain->type == CT_INVARIANT)
bbc8a8dc 2220 continue;
d9c259ef
BC
2221 /* For now we can't handle unrolling when eliminating stores. */
2222 else if (chain->type == CT_STORE_STORE)
2223 return 1;
bbc8a8dc 2224
8039a35d
RB
2225 if (chain->combined)
2226 {
2227 /* For combined chains, we can't handle unrolling if we replace
2228 looparound PHIs. */
2229 dref a;
2230 unsigned j;
2231 for (j = 1; chain->refs.iterate (j, &a); j++)
2232 if (gimple_code (a->stmt) == GIMPLE_PHI)
2233 return 1;
2234 continue;
2235 }
2236
bbc8a8dc
ZD
2237 /* The best unroll factor for this chain is equal to the number of
2238 temporary variables that we create for it. */
2239 af = chain->length;
2240 if (chain->has_max_use_after)
2241 af++;
2242
2243 nfactor = factor * af / gcd (factor, af);
2244 if (nfactor <= max)
2245 factor = nfactor;
2246 }
2247
2248 return factor;
2249}
2250
2251/* Perform the predictive commoning optimization for CHAINS.
2252 Uids of the newly created temporary variables are marked in TMP_VARS. */
2253
2254static void
99b1c316 2255execute_pred_commoning (class loop *loop, vec<chain_p> chains,
bbc8a8dc
ZD
2256 bitmap tmp_vars)
2257{
2258 chain_p chain;
2259 unsigned i;
2260
9771b263 2261 FOR_EACH_VEC_ELT (chains, i, chain)
bbc8a8dc
ZD
2262 {
2263 if (chain->type == CT_INVARIANT)
2264 execute_load_motion (loop, chain, tmp_vars);
2265 else
2266 execute_pred_commoning_chain (loop, chain, tmp_vars);
2267 }
b8698a0f 2268
a933d47f
RB
2269 FOR_EACH_VEC_ELT (chains, i, chain)
2270 {
2271 if (chain->type == CT_INVARIANT)
2272 ;
2273 else if (chain->combined)
2274 {
2275 /* For combined chains, just remove the statements that are used to
2276 compute the values of the expression (except for the root one). */
2277 dref a;
2278 unsigned j;
2279 for (j = 1; chain->refs.iterate (j, &a); j++)
2280 remove_stmt (a->stmt);
2281 }
2282 }
2283
bbc8a8dc
ZD
2284 update_ssa (TODO_update_ssa_only_virtuals);
2285}
2286
c80b4100 2287/* For each reference in CHAINS, if its defining statement is
726a989a 2288 phi node, record the ssa name that is defined by it. */
bbc8a8dc
ZD
2289
2290static void
9771b263 2291replace_phis_by_defined_names (vec<chain_p> chains)
bbc8a8dc
ZD
2292{
2293 chain_p chain;
2294 dref a;
2295 unsigned i, j;
2296
9771b263
DN
2297 FOR_EACH_VEC_ELT (chains, i, chain)
2298 FOR_EACH_VEC_ELT (chain->refs, j, a)
bbc8a8dc 2299 {
726a989a
RB
2300 if (gimple_code (a->stmt) == GIMPLE_PHI)
2301 {
2302 a->name_defined_by_phi = PHI_RESULT (a->stmt);
2303 a->stmt = NULL;
2304 }
bbc8a8dc
ZD
2305 }
2306}
2307
726a989a
RB
2308/* For each reference in CHAINS, if name_defined_by_phi is not
2309 NULL, use it to set the stmt field. */
bbc8a8dc
ZD
2310
2311static void
9771b263 2312replace_names_by_phis (vec<chain_p> chains)
bbc8a8dc
ZD
2313{
2314 chain_p chain;
2315 dref a;
2316 unsigned i, j;
2317
9771b263
DN
2318 FOR_EACH_VEC_ELT (chains, i, chain)
2319 FOR_EACH_VEC_ELT (chain->refs, j, a)
726a989a 2320 if (a->stmt == NULL)
bbc8a8dc 2321 {
726a989a
RB
2322 a->stmt = SSA_NAME_DEF_STMT (a->name_defined_by_phi);
2323 gcc_assert (gimple_code (a->stmt) == GIMPLE_PHI);
2324 a->name_defined_by_phi = NULL_TREE;
bbc8a8dc
ZD
2325 }
2326}
2327
2328/* Wrapper over execute_pred_commoning, to pass it as a callback
2329 to tree_transform_and_unroll_loop. */
2330
2331struct epcc_data
2332{
9771b263 2333 vec<chain_p> chains;
bbc8a8dc
ZD
2334 bitmap tmp_vars;
2335};
2336
2337static void
99b1c316 2338execute_pred_commoning_cbck (class loop *loop, void *data)
bbc8a8dc 2339{
3d9a9f94 2340 struct epcc_data *const dta = (struct epcc_data *) data;
bbc8a8dc
ZD
2341
2342 /* Restore phi nodes that were replaced by ssa names before
2343 tree_transform_and_unroll_loop (see detailed description in
2344 tree_predictive_commoning_loop). */
2345 replace_names_by_phis (dta->chains);
2346 execute_pred_commoning (loop, dta->chains, dta->tmp_vars);
2347}
2348
bbc8a8dc
ZD
2349/* Base NAME and all the names in the chain of phi nodes that use it
2350 on variable VAR. The phi nodes are recognized by being in the copies of
2351 the header of the LOOP. */
2352
2353static void
99b1c316 2354base_names_in_chain_on (class loop *loop, tree name, tree var)
bbc8a8dc 2355{
355fe088 2356 gimple *stmt, *phi;
bbc8a8dc 2357 imm_use_iterator iter;
bbc8a8dc 2358
b2ec94d4 2359 replace_ssa_name_symbol (name, var);
bbc8a8dc
ZD
2360
2361 while (1)
2362 {
2363 phi = NULL;
2364 FOR_EACH_IMM_USE_STMT (stmt, iter, name)
2365 {
726a989a
RB
2366 if (gimple_code (stmt) == GIMPLE_PHI
2367 && flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
bbc8a8dc
ZD
2368 {
2369 phi = stmt;
2370 BREAK_FROM_IMM_USE_STMT (iter);
2371 }
2372 }
2373 if (!phi)
2374 return;
2375
bbc8a8dc 2376 name = PHI_RESULT (phi);
b2ec94d4 2377 replace_ssa_name_symbol (name, var);
bbc8a8dc
ZD
2378 }
2379}
2380
2381/* Given an unrolled LOOP after predictive commoning, remove the
2382 register copies arising from phi nodes by changing the base
2383 variables of SSA names. TMP_VARS is the set of the temporary variables
2384 for those we want to perform this. */
2385
2386static void
99b1c316 2387eliminate_temp_copies (class loop *loop, bitmap tmp_vars)
bbc8a8dc
ZD
2388{
2389 edge e;
538dd0b7 2390 gphi *phi;
355fe088 2391 gimple *stmt;
726a989a 2392 tree name, use, var;
538dd0b7 2393 gphi_iterator psi;
bbc8a8dc
ZD
2394
2395 e = loop_latch_edge (loop);
726a989a 2396 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
bbc8a8dc 2397 {
538dd0b7 2398 phi = psi.phi ();
bbc8a8dc
ZD
2399 name = PHI_RESULT (phi);
2400 var = SSA_NAME_VAR (name);
70b5e7dc 2401 if (!var || !bitmap_bit_p (tmp_vars, DECL_UID (var)))
bbc8a8dc
ZD
2402 continue;
2403 use = PHI_ARG_DEF_FROM_EDGE (phi, e);
2404 gcc_assert (TREE_CODE (use) == SSA_NAME);
2405
2406 /* Base all the ssa names in the ud and du chain of NAME on VAR. */
2407 stmt = SSA_NAME_DEF_STMT (use);
726a989a 2408 while (gimple_code (stmt) == GIMPLE_PHI
1b0cfaa6
ZD
2409 /* In case we could not unroll the loop enough to eliminate
2410 all copies, we may reach the loop header before the defining
2411 statement (in that case, some register copies will be present
2412 in loop latch in the final code, corresponding to the newly
2413 created looparound phi nodes). */
726a989a 2414 && gimple_bb (stmt) != loop->header)
bbc8a8dc 2415 {
726a989a 2416 gcc_assert (single_pred_p (gimple_bb (stmt)));
bbc8a8dc
ZD
2417 use = PHI_ARG_DEF (stmt, 0);
2418 stmt = SSA_NAME_DEF_STMT (use);
2419 }
2420
2421 base_names_in_chain_on (loop, use, var);
2422 }
2423}
2424
2425/* Returns true if CHAIN is suitable to be combined. */
2426
2427static bool
2428chain_can_be_combined_p (chain_p chain)
2429{
2430 return (!chain->combined
2431 && (chain->type == CT_LOAD || chain->type == CT_COMBINATION));
2432}
2433
2434/* Returns the modify statement that uses NAME. Skips over assignment
2435 statements, NAME is replaced with the actual name used in the returned
2436 statement. */
2437
355fe088 2438static gimple *
bbc8a8dc
ZD
2439find_use_stmt (tree *name)
2440{
355fe088 2441 gimple *stmt;
726a989a 2442 tree rhs, lhs;
bbc8a8dc
ZD
2443
2444 /* Skip over assignments. */
2445 while (1)
2446 {
2447 stmt = single_nonlooparound_use (*name);
2448 if (!stmt)
726a989a 2449 return NULL;
bbc8a8dc 2450
726a989a
RB
2451 if (gimple_code (stmt) != GIMPLE_ASSIGN)
2452 return NULL;
bbc8a8dc 2453
726a989a 2454 lhs = gimple_assign_lhs (stmt);
bbc8a8dc 2455 if (TREE_CODE (lhs) != SSA_NAME)
726a989a 2456 return NULL;
bbc8a8dc 2457
726a989a
RB
2458 if (gimple_assign_copy_p (stmt))
2459 {
2460 rhs = gimple_assign_rhs1 (stmt);
2461 if (rhs != *name)
2462 return NULL;
bbc8a8dc 2463
726a989a
RB
2464 *name = lhs;
2465 }
2466 else if (get_gimple_rhs_class (gimple_assign_rhs_code (stmt))
2467 == GIMPLE_BINARY_RHS)
2468 return stmt;
2469 else
2470 return NULL;
bbc8a8dc 2471 }
bbc8a8dc
ZD
2472}
2473
2474/* Returns true if we may perform reassociation for operation CODE in TYPE. */
2475
2476static bool
2477may_reassociate_p (tree type, enum tree_code code)
2478{
2479 if (FLOAT_TYPE_P (type)
2480 && !flag_unsafe_math_optimizations)
2481 return false;
2482
2483 return (commutative_tree_code (code)
2484 && associative_tree_code (code));
2485}
2486
2487/* If the operation used in STMT is associative and commutative, go through the
2488 tree of the same operations and returns its root. Distance to the root
2489 is stored in DISTANCE. */
2490
355fe088
TS
2491static gimple *
2492find_associative_operation_root (gimple *stmt, unsigned *distance)
bbc8a8dc 2493{
726a989a 2494 tree lhs;
355fe088 2495 gimple *next;
726a989a
RB
2496 enum tree_code code = gimple_assign_rhs_code (stmt);
2497 tree type = TREE_TYPE (gimple_assign_lhs (stmt));
bbc8a8dc
ZD
2498 unsigned dist = 0;
2499
726a989a
RB
2500 if (!may_reassociate_p (type, code))
2501 return NULL;
bbc8a8dc
ZD
2502
2503 while (1)
2504 {
726a989a 2505 lhs = gimple_assign_lhs (stmt);
bbc8a8dc
ZD
2506 gcc_assert (TREE_CODE (lhs) == SSA_NAME);
2507
2508 next = find_use_stmt (&lhs);
726a989a
RB
2509 if (!next
2510 || gimple_assign_rhs_code (next) != code)
bbc8a8dc
ZD
2511 break;
2512
2513 stmt = next;
2514 dist++;
2515 }
2516
2517 if (distance)
2518 *distance = dist;
2519 return stmt;
2520}
2521
2522/* Returns the common statement in that NAME1 and NAME2 have a use. If there
2523 is no such statement, returns NULL_TREE. In case the operation used on
c80b4100 2524 NAME1 and NAME2 is associative and commutative, returns the root of the
bbc8a8dc
ZD
2525 tree formed by this operation instead of the statement that uses NAME1 or
2526 NAME2. */
2527
355fe088 2528static gimple *
bbc8a8dc
ZD
2529find_common_use_stmt (tree *name1, tree *name2)
2530{
355fe088 2531 gimple *stmt1, *stmt2;
bbc8a8dc
ZD
2532
2533 stmt1 = find_use_stmt (name1);
2534 if (!stmt1)
726a989a 2535 return NULL;
bbc8a8dc
ZD
2536
2537 stmt2 = find_use_stmt (name2);
2538 if (!stmt2)
726a989a 2539 return NULL;
bbc8a8dc
ZD
2540
2541 if (stmt1 == stmt2)
2542 return stmt1;
2543
2544 stmt1 = find_associative_operation_root (stmt1, NULL);
2545 if (!stmt1)
726a989a 2546 return NULL;
bbc8a8dc
ZD
2547 stmt2 = find_associative_operation_root (stmt2, NULL);
2548 if (!stmt2)
726a989a 2549 return NULL;
bbc8a8dc 2550
726a989a 2551 return (stmt1 == stmt2 ? stmt1 : NULL);
bbc8a8dc
ZD
2552}
2553
2554/* Checks whether R1 and R2 are combined together using CODE, with the result
2555 in RSLT_TYPE, in order R1 CODE R2 if SWAP is false and in order R2 CODE R1
2556 if it is true. If CODE is ERROR_MARK, set these values instead. */
2557
2558static bool
2559combinable_refs_p (dref r1, dref r2,
2560 enum tree_code *code, bool *swap, tree *rslt_type)
2561{
2562 enum tree_code acode;
2563 bool aswap;
2564 tree atype;
726a989a 2565 tree name1, name2;
355fe088 2566 gimple *stmt;
bbc8a8dc
ZD
2567
2568 name1 = name_for_ref (r1);
2569 name2 = name_for_ref (r2);
2570 gcc_assert (name1 != NULL_TREE && name2 != NULL_TREE);
2571
2572 stmt = find_common_use_stmt (&name1, &name2);
2573
7906dbe4
RB
2574 if (!stmt
2575 /* A simple post-dominance check - make sure the combination
2576 is executed under the same condition as the references. */
2577 || (gimple_bb (stmt) != gimple_bb (r1->stmt)
2578 && gimple_bb (stmt) != gimple_bb (r2->stmt)))
bbc8a8dc
ZD
2579 return false;
2580
726a989a 2581 acode = gimple_assign_rhs_code (stmt);
bbc8a8dc 2582 aswap = (!commutative_tree_code (acode)
726a989a
RB
2583 && gimple_assign_rhs1 (stmt) != name1);
2584 atype = TREE_TYPE (gimple_assign_lhs (stmt));
bbc8a8dc
ZD
2585
2586 if (*code == ERROR_MARK)
2587 {
2588 *code = acode;
2589 *swap = aswap;
2590 *rslt_type = atype;
2591 return true;
2592 }
2593
2594 return (*code == acode
2595 && *swap == aswap
2596 && *rslt_type == atype);
2597}
2598
2599/* Remove OP from the operation on rhs of STMT, and replace STMT with
2600 an assignment of the remaining operand. */
2601
2602static void
355fe088 2603remove_name_from_operation (gimple *stmt, tree op)
bbc8a8dc 2604{
726a989a
RB
2605 tree other_op;
2606 gimple_stmt_iterator si;
bbc8a8dc 2607
726a989a 2608 gcc_assert (is_gimple_assign (stmt));
bbc8a8dc 2609
726a989a
RB
2610 if (gimple_assign_rhs1 (stmt) == op)
2611 other_op = gimple_assign_rhs2 (stmt);
bbc8a8dc 2612 else
726a989a
RB
2613 other_op = gimple_assign_rhs1 (stmt);
2614
2615 si = gsi_for_stmt (stmt);
2616 gimple_assign_set_rhs_from_tree (&si, other_op);
2617
2618 /* We should not have reallocated STMT. */
2619 gcc_assert (gsi_stmt (si) == stmt);
2620
bbc8a8dc
ZD
2621 update_stmt (stmt);
2622}
2623
2624/* Reassociates the expression in that NAME1 and NAME2 are used so that they
1ad3d8aa 2625 are combined in a single statement, and returns this statement. */
bbc8a8dc 2626
355fe088 2627static gimple *
1ad3d8aa 2628reassociate_to_the_same_stmt (tree name1, tree name2)
bbc8a8dc 2629{
355fe088 2630 gimple *stmt1, *stmt2, *root1, *root2, *s1, *s2;
538dd0b7 2631 gassign *new_stmt, *tmp_stmt;
726a989a 2632 tree new_name, tmp_name, var, r1, r2;
bbc8a8dc
ZD
2633 unsigned dist1, dist2;
2634 enum tree_code code;
2635 tree type = TREE_TYPE (name1);
726a989a 2636 gimple_stmt_iterator bsi;
bbc8a8dc
ZD
2637
2638 stmt1 = find_use_stmt (&name1);
2639 stmt2 = find_use_stmt (&name2);
2640 root1 = find_associative_operation_root (stmt1, &dist1);
2641 root2 = find_associative_operation_root (stmt2, &dist2);
726a989a 2642 code = gimple_assign_rhs_code (stmt1);
bbc8a8dc
ZD
2643
2644 gcc_assert (root1 && root2 && root1 == root2
726a989a 2645 && code == gimple_assign_rhs_code (stmt2));
bbc8a8dc
ZD
2646
2647 /* Find the root of the nearest expression in that both NAME1 and NAME2
2648 are used. */
2649 r1 = name1;
2650 s1 = stmt1;
2651 r2 = name2;
2652 s2 = stmt2;
2653
2654 while (dist1 > dist2)
2655 {
2656 s1 = find_use_stmt (&r1);
726a989a 2657 r1 = gimple_assign_lhs (s1);
bbc8a8dc
ZD
2658 dist1--;
2659 }
2660 while (dist2 > dist1)
2661 {
2662 s2 = find_use_stmt (&r2);
726a989a 2663 r2 = gimple_assign_lhs (s2);
bbc8a8dc
ZD
2664 dist2--;
2665 }
2666
2667 while (s1 != s2)
2668 {
2669 s1 = find_use_stmt (&r1);
726a989a 2670 r1 = gimple_assign_lhs (s1);
bbc8a8dc 2671 s2 = find_use_stmt (&r2);
726a989a 2672 r2 = gimple_assign_lhs (s2);
bbc8a8dc
ZD
2673 }
2674
2675 /* Remove NAME1 and NAME2 from the statements in that they are used
2676 currently. */
2677 remove_name_from_operation (stmt1, name1);
2678 remove_name_from_operation (stmt2, name2);
2679
2680 /* Insert the new statement combining NAME1 and NAME2 before S1, and
2681 combine it with the rhs of S1. */
acd63801 2682 var = create_tmp_reg (type, "predreastmp");
b731b390 2683 new_name = make_ssa_name (var);
0d0e4a03 2684 new_stmt = gimple_build_assign (new_name, code, name1, name2);
bbc8a8dc 2685
acd63801 2686 var = create_tmp_reg (type, "predreastmp");
b731b390 2687 tmp_name = make_ssa_name (var);
726a989a
RB
2688
2689 /* Rhs of S1 may now be either a binary expression with operation
2690 CODE, or gimple_val (in case that stmt1 == s1 or stmt2 == s1,
2691 so that name1 or name2 was removed from it). */
0d0e4a03
JJ
2692 tmp_stmt = gimple_build_assign (tmp_name, gimple_assign_rhs_code (s1),
2693 gimple_assign_rhs1 (s1),
2694 gimple_assign_rhs2 (s1));
726a989a
RB
2695
2696 bsi = gsi_for_stmt (s1);
2697 gimple_assign_set_rhs_with_ops (&bsi, code, new_name, tmp_name);
2698 s1 = gsi_stmt (bsi);
bbc8a8dc
ZD
2699 update_stmt (s1);
2700
1ad3d8aa 2701 gsi_insert_before (&bsi, new_stmt, GSI_SAME_STMT);
726a989a 2702 gsi_insert_before (&bsi, tmp_stmt, GSI_SAME_STMT);
bbc8a8dc
ZD
2703
2704 return new_stmt;
2705}
2706
2707/* Returns the statement that combines references R1 and R2. In case R1
2708 and R2 are not used in the same statement, but they are used with an
2709 associative and commutative operation in the same expression, reassociate
1ad3d8aa 2710 the expression so that they are used in the same statement. */
bbc8a8dc 2711
355fe088 2712static gimple *
1ad3d8aa 2713stmt_combining_refs (dref r1, dref r2)
bbc8a8dc 2714{
355fe088 2715 gimple *stmt1, *stmt2;
bbc8a8dc
ZD
2716 tree name1 = name_for_ref (r1);
2717 tree name2 = name_for_ref (r2);
2718
2719 stmt1 = find_use_stmt (&name1);
2720 stmt2 = find_use_stmt (&name2);
2721 if (stmt1 == stmt2)
2722 return stmt1;
2723
1ad3d8aa 2724 return reassociate_to_the_same_stmt (name1, name2);
bbc8a8dc
ZD
2725}
2726
2727/* Tries to combine chains CH1 and CH2 together. If this succeeds, the
2728 description of the new chain is returned, otherwise we return NULL. */
2729
2730static chain_p
2731combine_chains (chain_p ch1, chain_p ch2)
2732{
2733 dref r1, r2, nw;
2734 enum tree_code op = ERROR_MARK;
2735 bool swap = false;
2736 chain_p new_chain;
1ad3d8aa 2737 unsigned i;
bbc8a8dc
ZD
2738 tree rslt_type = NULL_TREE;
2739
2740 if (ch1 == ch2)
a90352a0 2741 return NULL;
bbc8a8dc
ZD
2742 if (ch1->length != ch2->length)
2743 return NULL;
2744
9771b263 2745 if (ch1->refs.length () != ch2->refs.length ())
bbc8a8dc
ZD
2746 return NULL;
2747
9771b263
DN
2748 for (i = 0; (ch1->refs.iterate (i, &r1)
2749 && ch2->refs.iterate (i, &r2)); i++)
bbc8a8dc
ZD
2750 {
2751 if (r1->distance != r2->distance)
2752 return NULL;
2753
2754 if (!combinable_refs_p (r1, r2, &op, &swap, &rslt_type))
2755 return NULL;
2756 }
2757
2758 if (swap)
6b4db501 2759 std::swap (ch1, ch2);
bbc8a8dc
ZD
2760
2761 new_chain = XCNEW (struct chain);
2762 new_chain->type = CT_COMBINATION;
82d6e6fc 2763 new_chain->op = op;
bbc8a8dc
ZD
2764 new_chain->ch1 = ch1;
2765 new_chain->ch2 = ch2;
2766 new_chain->rslt_type = rslt_type;
2767 new_chain->length = ch1->length;
2768
1ad3d8aa
BC
2769 for (i = 0; (ch1->refs.iterate (i, &r1)
2770 && ch2->refs.iterate (i, &r2)); i++)
2771 {
99b1c316 2772 nw = XCNEW (class dref_d);
1ad3d8aa 2773 nw->stmt = stmt_combining_refs (r1, r2);
bbc8a8dc 2774 nw->distance = r1->distance;
8539a302 2775
d822f3d5 2776 new_chain->refs.safe_push (nw);
8539a302 2777 }
bbc8a8dc 2778
1ad3d8aa
BC
2779 ch1->combined = true;
2780 ch2->combined = true;
bbc8a8dc
ZD
2781 return new_chain;
2782}
2783
bd9cc42b
BC
2784/* Recursively update position information of all offspring chains to ROOT
2785 chain's position information. */
2786
2787static void
2788update_pos_for_combined_chains (chain_p root)
2789{
2790 chain_p ch1 = root->ch1, ch2 = root->ch2;
2791 dref ref, ref1, ref2;
2792 for (unsigned j = 0; (root->refs.iterate (j, &ref)
2793 && ch1->refs.iterate (j, &ref1)
2794 && ch2->refs.iterate (j, &ref2)); ++j)
2795 ref1->pos = ref2->pos = ref->pos;
2796
2797 if (ch1->type == CT_COMBINATION)
2798 update_pos_for_combined_chains (ch1);
2799 if (ch2->type == CT_COMBINATION)
2800 update_pos_for_combined_chains (ch2);
2801}
2802
2803/* Returns true if statement S1 dominates statement S2. */
2804
2805static bool
2806pcom_stmt_dominates_stmt_p (gimple *s1, gimple *s2)
2807{
2808 basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
2809
2810 if (!bb1 || s1 == s2)
2811 return true;
2812
2813 if (bb1 == bb2)
2814 return gimple_uid (s1) < gimple_uid (s2);
2815
2816 return dominated_by_p (CDI_DOMINATORS, bb2, bb1);
2817}
2818
2819/* Try to combine the CHAINS in LOOP. */
bbc8a8dc
ZD
2820
2821static void
99b1c316 2822try_combine_chains (class loop *loop, vec<chain_p> *chains)
bbc8a8dc
ZD
2823{
2824 unsigned i, j;
2825 chain_p ch1, ch2, cch;
ef062b13 2826 auto_vec<chain_p> worklist;
bd9cc42b 2827 bool combined_p = false;
bbc8a8dc 2828
9771b263 2829 FOR_EACH_VEC_ELT (*chains, i, ch1)
bbc8a8dc 2830 if (chain_can_be_combined_p (ch1))
9771b263 2831 worklist.safe_push (ch1);
bbc8a8dc 2832
9771b263 2833 while (!worklist.is_empty ())
bbc8a8dc 2834 {
9771b263 2835 ch1 = worklist.pop ();
bbc8a8dc
ZD
2836 if (!chain_can_be_combined_p (ch1))
2837 continue;
2838
9771b263 2839 FOR_EACH_VEC_ELT (*chains, j, ch2)
bbc8a8dc
ZD
2840 {
2841 if (!chain_can_be_combined_p (ch2))
2842 continue;
2843
2844 cch = combine_chains (ch1, ch2);
2845 if (cch)
2846 {
9771b263
DN
2847 worklist.safe_push (cch);
2848 chains->safe_push (cch);
bd9cc42b
BC
2849 combined_p = true;
2850 break;
2851 }
2852 }
2853 }
2854 if (!combined_p)
2855 return;
2856
2857 /* Setup UID for all statements in dominance order. */
3c2df916 2858 basic_block *bbs = get_loop_body_in_dom_order (loop);
bd9cc42b
BC
2859 renumber_gimple_stmt_uids_in_blocks (bbs, loop->num_nodes);
2860 free (bbs);
2861
2862 /* Re-association in combined chains may generate statements different to
2863 order of references of the original chain. We need to keep references
2864 of combined chain in dominance order so that all uses will be inserted
2865 after definitions. Note:
2866 A) This is necessary for all combined chains.
2867 B) This is only necessary for ZERO distance references because other
2868 references inherit value from loop carried PHIs.
2869
2870 We first update position information for all combined chains. */
2871 dref ref;
2872 for (i = 0; chains->iterate (i, &ch1); ++i)
2873 {
2874 if (ch1->type != CT_COMBINATION || ch1->combined)
2875 continue;
2876
2877 for (j = 0; ch1->refs.iterate (j, &ref); ++j)
2878 ref->pos = gimple_uid (ref->stmt);
2879
2880 update_pos_for_combined_chains (ch1);
2881 }
2882 /* Then sort references according to newly updated position information. */
2883 for (i = 0; chains->iterate (i, &ch1); ++i)
2884 {
2885 if (ch1->type != CT_COMBINATION && !ch1->combined)
2886 continue;
2887
2888 /* Find the first reference with non-ZERO distance. */
2889 if (ch1->length == 0)
2890 j = ch1->refs.length();
2891 else
2892 {
2893 for (j = 0; ch1->refs.iterate (j, &ref); ++j)
2894 if (ref->distance != 0)
2895 break;
2896 }
2897
2898 /* Sort all ZERO distance references by position. */
2899 qsort (&ch1->refs[0], j, sizeof (ch1->refs[0]), order_drefs_by_pos);
2900
2901 if (ch1->combined)
2902 continue;
2903
2904 /* For ZERO length chain, has_max_use_after must be true since root
2905 combined stmt must dominates others. */
2906 if (ch1->length == 0)
2907 {
2908 ch1->has_max_use_after = true;
2909 continue;
2910 }
2911 /* Check if there is use at max distance after root for combined chains
2912 and set flag accordingly. */
2913 ch1->has_max_use_after = false;
2914 gimple *root_stmt = get_chain_root (ch1)->stmt;
2915 for (j = 1; ch1->refs.iterate (j, &ref); ++j)
2916 {
2917 if (ref->distance == ch1->length
2918 && !pcom_stmt_dominates_stmt_p (ref->stmt, root_stmt))
2919 {
2920 ch1->has_max_use_after = true;
bbc8a8dc
ZD
2921 break;
2922 }
2923 }
2924 }
2925}
2926
d9c259ef
BC
2927/* Prepare initializers for store elimination CHAIN in LOOP. Returns false
2928 if this is impossible because one of these initializers may trap, true
2929 otherwise. */
2930
2931static bool
99b1c316 2932prepare_initializers_chain_store_elim (class loop *loop, chain_p chain)
d9c259ef
BC
2933{
2934 unsigned i, n = chain->length;
2935
2936 /* For now we can't eliminate stores if some of them are conditional
2937 executed. */
2938 if (!chain->all_always_accessed)
2939 return false;
2940
2941 /* Nothing to intialize for intra-iteration store elimination. */
2942 if (n == 0 && chain->type == CT_STORE_STORE)
2943 return true;
2944
39ff50ce
BC
2945 /* For store elimination chain, there is nothing to initialize if stores
2946 to be eliminated only store loop invariant values into memory. */
2947 if (chain->type == CT_STORE_STORE
2948 && is_inv_store_elimination_chain (loop, chain))
2949 {
2950 chain->inv_store_elimination = true;
2951 return true;
2952 }
2953
d9c259ef 2954 chain->inits.create (n);
cb3874dc 2955 chain->inits.safe_grow_cleared (n, true);
d9c259ef
BC
2956
2957 /* For store eliminatin chain like below:
2958
2959 for (i = 0; i < len; i++)
2960 {
2961 a[i] = 1;
2962 // a[i + 1] = ...
2963 a[i + 2] = 3;
2964 }
2965
2966 store to a[i + 1] is missed in loop body, it acts like bubbles. The
2967 content of a[i + 1] remain the same if the loop iterates fewer times
2968 than chain->length. We need to set up root variables for such stores
2969 by loading from memory before loop. Note we only need to load bubble
2970 elements because loop body is guaranteed to be executed at least once
2971 after loop's preheader edge. */
2972 auto_vec<bool> bubbles;
cb3874dc 2973 bubbles.safe_grow_cleared (n + 1, true);
d9c259ef
BC
2974 for (i = 0; i < chain->refs.length (); i++)
2975 bubbles[chain->refs[i]->distance] = true;
2976
2977 struct data_reference *dr = get_chain_root (chain)->ref;
2978 for (i = 0; i < n; i++)
2979 {
2980 if (bubbles[i])
2981 continue;
2982
2983 gimple_seq stmts = NULL;
2984
2985 tree init = ref_at_iteration (dr, (int) 0 - i, &stmts);
2986 if (stmts)
2987 gimple_seq_add_seq_without_update (&chain->init_seq, stmts);
2988
2989 chain->inits[i] = init;
2990 }
2991
2992 return true;
2993}
2994
bbc8a8dc
ZD
2995/* Prepare initializers for CHAIN in LOOP. Returns false if this is
2996 impossible because one of these initializers may trap, true otherwise. */
2997
2998static bool
99b1c316 2999prepare_initializers_chain (class loop *loop, chain_p chain)
bbc8a8dc
ZD
3000{
3001 unsigned i, n = (chain->type == CT_INVARIANT) ? 1 : chain->length;
3002 struct data_reference *dr = get_chain_root (chain)->ref;
726a989a 3003 tree init;
bbc8a8dc
ZD
3004 dref laref;
3005 edge entry = loop_preheader_edge (loop);
3006
d9c259ef
BC
3007 if (chain->type == CT_STORE_STORE)
3008 return prepare_initializers_chain_store_elim (loop, chain);
3009
bbc8a8dc
ZD
3010 /* Find the initializers for the variables, and check that they cannot
3011 trap. */
9771b263 3012 chain->inits.create (n);
bbc8a8dc 3013 for (i = 0; i < n; i++)
9771b263 3014 chain->inits.quick_push (NULL_TREE);
bbc8a8dc
ZD
3015
3016 /* If we have replaced some looparound phi nodes, use their initializers
3017 instead of creating our own. */
9771b263 3018 FOR_EACH_VEC_ELT (chain->refs, i, laref)
bbc8a8dc 3019 {
726a989a 3020 if (gimple_code (laref->stmt) != GIMPLE_PHI)
bbc8a8dc
ZD
3021 continue;
3022
3023 gcc_assert (laref->distance > 0);
9771b263
DN
3024 chain->inits[n - laref->distance]
3025 = PHI_ARG_DEF_FROM_EDGE (laref->stmt, entry);
bbc8a8dc
ZD
3026 }
3027
3028 for (i = 0; i < n; i++)
3029 {
7ec44c3d
RB
3030 gimple_seq stmts = NULL;
3031
9771b263 3032 if (chain->inits[i] != NULL_TREE)
bbc8a8dc
ZD
3033 continue;
3034
9f2b860b 3035 init = ref_at_iteration (dr, (int) i - n, &stmts);
bbc8a8dc 3036 if (!chain->all_always_accessed && tree_could_trap_p (init))
7ec44c3d
RB
3037 {
3038 gimple_seq_discard (stmts);
3039 return false;
3040 }
bbc8a8dc 3041
bbc8a8dc 3042 if (stmts)
aa4de160 3043 gimple_seq_add_seq_without_update (&chain->init_seq, stmts);
bbc8a8dc 3044
9771b263 3045 chain->inits[i] = init;
bbc8a8dc
ZD
3046 }
3047
3048 return true;
3049}
3050
3051/* Prepare initializers for CHAINS in LOOP, and free chains that cannot
3052 be used because the initializers might trap. */
3053
3054static void
99b1c316 3055prepare_initializers (class loop *loop, vec<chain_p> chains)
bbc8a8dc
ZD
3056{
3057 chain_p chain;
3058 unsigned i;
3059
9771b263 3060 for (i = 0; i < chains.length (); )
bbc8a8dc 3061 {
9771b263 3062 chain = chains[i];
bbc8a8dc
ZD
3063 if (prepare_initializers_chain (loop, chain))
3064 i++;
3065 else
3066 {
3067 release_chain (chain);
9771b263 3068 chains.unordered_remove (i);
bbc8a8dc
ZD
3069 }
3070 }
3071}
3072
d9c259ef
BC
3073/* Generates finalizer memory references for CHAIN in LOOP. Returns true
3074 if finalizer code for CHAIN can be generated, otherwise false. */
3075
3076static bool
99b1c316 3077prepare_finalizers_chain (class loop *loop, chain_p chain)
d9c259ef
BC
3078{
3079 unsigned i, n = chain->length;
3080 struct data_reference *dr = get_chain_root (chain)->ref;
3081 tree fini, niters = number_of_latch_executions (loop);
3082
3083 /* For now we can't eliminate stores if some of them are conditional
3084 executed. */
3085 if (!chain->all_always_accessed)
3086 return false;
3087
3088 chain->finis.create (n);
3089 for (i = 0; i < n; i++)
3090 chain->finis.quick_push (NULL_TREE);
3091
3092 /* We never use looparound phi node for store elimination chains. */
3093
3094 /* Find the finalizers for the variables, and check that they cannot
3095 trap. */
3096 for (i = 0; i < n; i++)
3097 {
3098 gimple_seq stmts = NULL;
3099 gcc_assert (chain->finis[i] == NULL_TREE);
3100
3101 if (TREE_CODE (niters) != INTEGER_CST && TREE_CODE (niters) != SSA_NAME)
3102 {
a5c93f53 3103 niters = unshare_expr (niters);
d9c259ef
BC
3104 niters = force_gimple_operand (niters, &stmts, true, NULL);
3105 if (stmts)
3106 {
3107 gimple_seq_add_seq_without_update (&chain->fini_seq, stmts);
3108 stmts = NULL;
3109 }
3110 }
3111 fini = ref_at_iteration (dr, (int) 0 - i, &stmts, niters);
3112 if (stmts)
3113 gimple_seq_add_seq_without_update (&chain->fini_seq, stmts);
3114
3115 chain->finis[i] = fini;
3116 }
3117
3118 return true;
3119}
3120
3121/* Generates finalizer memory reference for CHAINS in LOOP. Returns true
3122 if finalizer code generation for CHAINS breaks loop closed ssa form. */
3123
3124static bool
99b1c316 3125prepare_finalizers (class loop *loop, vec<chain_p> chains)
d9c259ef
BC
3126{
3127 chain_p chain;
3128 unsigned i;
3129 bool loop_closed_ssa = false;
3130
3131 for (i = 0; i < chains.length ();)
3132 {
3133 chain = chains[i];
3134
3135 /* Finalizer is only necessary for inter-iteration store elimination
3136 chains. */
3137 if (chain->length == 0 || chain->type != CT_STORE_STORE)
3138 {
3139 i++;
3140 continue;
3141 }
3142
3143 if (prepare_finalizers_chain (loop, chain))
3144 {
3145 i++;
7061cfc0
BC
3146 /* Be conservative, assume loop closed ssa form is corrupted
3147 by store-store chain. Though it's not always the case if
3148 eliminated stores only store loop invariant values into
3149 memory. */
3150 loop_closed_ssa = true;
d9c259ef
BC
3151 }
3152 else
3153 {
3154 release_chain (chain);
3155 chains.unordered_remove (i);
3156 }
3157 }
3158 return loop_closed_ssa;
3159}
3160
aa4de160
BC
3161/* Insert all initializing gimple stmts into loop's entry edge. */
3162
3163static void
99b1c316 3164insert_init_seqs (class loop *loop, vec<chain_p> chains)
aa4de160
BC
3165{
3166 unsigned i;
3167 edge entry = loop_preheader_edge (loop);
3168
3169 for (i = 0; i < chains.length (); ++i)
3170 if (chains[i]->init_seq)
3171 {
3172 gsi_insert_seq_on_edge_immediate (entry, chains[i]->init_seq);
3173 chains[i]->init_seq = NULL;
3174 }
3175}
3176
d9c259ef
BC
3177/* Performs predictive commoning for LOOP. Sets bit 1<<0 of return value
3178 if LOOP was unrolled; Sets bit 1<<1 of return value if loop closed ssa
3179 form was corrupted. */
bbc8a8dc 3180
d9c259ef 3181static unsigned
99b1c316 3182tree_predictive_commoning_loop (class loop *loop)
bbc8a8dc 3183{
9771b263
DN
3184 vec<data_reference_p> datarefs;
3185 vec<ddr_p> dependences;
bbc8a8dc 3186 struct component *components;
6e1aa848 3187 vec<chain_p> chains = vNULL;
bbc8a8dc 3188 unsigned unroll_factor;
99b1c316 3189 class tree_niter_desc desc;
d9c259ef 3190 bool unroll = false, loop_closed_ssa = false;
bbc8a8dc 3191 edge exit;
bbc8a8dc
ZD
3192
3193 if (dump_file && (dump_flags & TDF_DETAILS))
3194 fprintf (dump_file, "Processing loop %d\n", loop->num);
3195
b8a4b8c8
BC
3196 /* Nothing for predicitive commoning if loop only iterates 1 time. */
3197 if (get_max_loop_iterations_int (loop) == 0)
3198 {
3199 if (dump_file && (dump_flags & TDF_DETAILS))
3200 fprintf (dump_file, "Loop iterates only 1 time, nothing to do.\n");
3201
d9c259ef 3202 return 0;
b8a4b8c8
BC
3203 }
3204
bbc8a8dc
ZD
3205 /* Find the data references and split them into components according to their
3206 dependence relations. */
00f96dc9 3207 auto_vec<loop_p, 3> loop_nest;
9771b263 3208 dependences.create (10);
07687835 3209 datarefs.create (10);
9ca3d00e
AB
3210 if (! compute_data_dependences_for_loop (loop, true, &loop_nest, &datarefs,
3211 &dependences))
3212 {
3213 if (dump_file && (dump_flags & TDF_DETAILS))
3214 fprintf (dump_file, "Cannot analyze data dependencies\n");
9ca3d00e
AB
3215 free_data_refs (datarefs);
3216 free_dependence_relations (dependences);
d9c259ef 3217 return 0;
9ca3d00e
AB
3218 }
3219
bbc8a8dc
ZD
3220 if (dump_file && (dump_flags & TDF_DETAILS))
3221 dump_data_dependence_relations (dump_file, dependences);
3222
3223 components = split_data_refs_to_components (loop, datarefs, dependences);
9771b263 3224 loop_nest.release ();
bbc8a8dc
ZD
3225 free_dependence_relations (dependences);
3226 if (!components)
3227 {
3228 free_data_refs (datarefs);
4f87d581 3229 free_affine_expand_cache (&name_expansions);
d9c259ef 3230 return 0;
bbc8a8dc
ZD
3231 }
3232
3233 if (dump_file && (dump_flags & TDF_DETAILS))
3234 {
3235 fprintf (dump_file, "Initial state:\n\n");
3236 dump_components (dump_file, components);
3237 }
3238
3239 /* Find the suitable components and split them into chains. */
3240 components = filter_suitable_components (loop, components);
3241
0e3de1d4 3242 auto_bitmap tmp_vars;
bbc8a8dc
ZD
3243 looparound_phis = BITMAP_ALLOC (NULL);
3244 determine_roots (loop, components, &chains);
3245 release_components (components);
3246
9771b263 3247 if (!chains.exists ())
bbc8a8dc
ZD
3248 {
3249 if (dump_file && (dump_flags & TDF_DETAILS))
3250 fprintf (dump_file,
3251 "Predictive commoning failed: no suitable chains\n");
3252 goto end;
3253 }
3254 prepare_initializers (loop, chains);
d9c259ef 3255 loop_closed_ssa = prepare_finalizers (loop, chains);
bbc8a8dc
ZD
3256
3257 /* Try to combine the chains that are always worked with together. */
bd9cc42b 3258 try_combine_chains (loop, &chains);
bbc8a8dc 3259
aa4de160
BC
3260 insert_init_seqs (loop, chains);
3261
bbc8a8dc
ZD
3262 if (dump_file && (dump_flags & TDF_DETAILS))
3263 {
3264 fprintf (dump_file, "Before commoning:\n\n");
3265 dump_chains (dump_file, chains);
3266 }
3267
3268 /* Determine the unroll factor, and if the loop should be unrolled, ensure
3269 that its number of iterations is divisible by the factor. */
3270 unroll_factor = determine_unroll_factor (chains);
3271 scev_reset ();
e1e6dc73
RG
3272 unroll = (unroll_factor > 1
3273 && can_unroll_loop_p (loop, unroll_factor, &desc));
bbc8a8dc
ZD
3274 exit = single_dom_exit (loop);
3275
3276 /* Execute the predictive commoning transformations, and possibly unroll the
3277 loop. */
3278 if (unroll)
3279 {
3280 struct epcc_data dta;
3281
3282 if (dump_file && (dump_flags & TDF_DETAILS))
3283 fprintf (dump_file, "Unrolling %u times.\n", unroll_factor);
3284
3285 dta.chains = chains;
3286 dta.tmp_vars = tmp_vars;
b8698a0f 3287
bbc8a8dc
ZD
3288 update_ssa (TODO_update_ssa_only_virtuals);
3289
3290 /* Cfg manipulations performed in tree_transform_and_unroll_loop before
3291 execute_pred_commoning_cbck is called may cause phi nodes to be
3292 reallocated, which is a problem since CHAINS may point to these
3293 statements. To fix this, we store the ssa names defined by the
3294 phi nodes here instead of the phi nodes themselves, and restore
3295 the phi nodes in execute_pred_commoning_cbck. A bit hacky. */
3296 replace_phis_by_defined_names (chains);
3297
3298 tree_transform_and_unroll_loop (loop, unroll_factor, exit, &desc,
3299 execute_pred_commoning_cbck, &dta);
3300 eliminate_temp_copies (loop, tmp_vars);
3301 }
3302 else
3303 {
3304 if (dump_file && (dump_flags & TDF_DETAILS))
3305 fprintf (dump_file,
3306 "Executing predictive commoning without unrolling.\n");
3307 execute_pred_commoning (loop, chains, tmp_vars);
3308 }
3309
3310end: ;
3311 release_chains (chains);
3312 free_data_refs (datarefs);
bbc8a8dc
ZD
3313 BITMAP_FREE (looparound_phis);
3314
3315 free_affine_expand_cache (&name_expansions);
3316
d9c259ef 3317 return (unroll ? 1 : 0) | (loop_closed_ssa ? 2 : 0);
bbc8a8dc
ZD
3318}
3319
3320/* Runs predictive commoning. */
3321
592c303d 3322unsigned
bbc8a8dc
ZD
3323tree_predictive_commoning (void)
3324{
99b1c316 3325 class loop *loop;
d9c259ef 3326 unsigned ret = 0, changed = 0;
bbc8a8dc
ZD
3327
3328 initialize_original_copy_tables ();
f0bd40b1 3329 FOR_EACH_LOOP (loop, LI_ONLY_INNERMOST)
8bcf15f6
JH
3330 if (optimize_loop_for_speed_p (loop))
3331 {
d9c259ef 3332 changed |= tree_predictive_commoning_loop (loop);
8bcf15f6 3333 }
d9c259ef 3334 free_original_copy_tables ();
bbc8a8dc 3335
d9c259ef 3336 if (changed > 0)
bbc8a8dc
ZD
3337 {
3338 scev_reset ();
d9c259ef
BC
3339
3340 if (changed > 1)
3341 rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
3342
592c303d 3343 ret = TODO_cleanup_cfg;
bbc8a8dc 3344 }
592c303d
ZD
3345
3346 return ret;
bbc8a8dc 3347}
c1bf2a39
AM
3348
3349/* Predictive commoning Pass. */
3350
3351static unsigned
726338f4 3352run_tree_predictive_commoning (struct function *fun)
c1bf2a39 3353{
726338f4 3354 if (number_of_loops (fun) <= 1)
c1bf2a39
AM
3355 return 0;
3356
3357 return tree_predictive_commoning ();
3358}
3359
c1bf2a39
AM
3360namespace {
3361
3362const pass_data pass_data_predcom =
3363{
3364 GIMPLE_PASS, /* type */
3365 "pcom", /* name */
3366 OPTGROUP_LOOP, /* optinfo_flags */
c1bf2a39
AM
3367 TV_PREDCOM, /* tv_id */
3368 PROP_cfg, /* properties_required */
3369 0, /* properties_provided */
3370 0, /* properties_destroyed */
3371 0, /* todo_flags_start */
3372 TODO_update_ssa_only_virtuals, /* todo_flags_finish */
3373};
3374
3375class pass_predcom : public gimple_opt_pass
3376{
3377public:
3378 pass_predcom (gcc::context *ctxt)
3379 : gimple_opt_pass (pass_data_predcom, ctxt)
3380 {}
3381
3382 /* opt_pass methods: */
1a3d085c 3383 virtual bool gate (function *) { return flag_predictive_commoning != 0; }
726338f4 3384 virtual unsigned int execute (function *fun)
be55bfe6 3385 {
726338f4 3386 return run_tree_predictive_commoning (fun);
be55bfe6 3387 }
c1bf2a39
AM
3388
3389}; // class pass_predcom
3390
3391} // anon namespace
3392
3393gimple_opt_pass *
3394make_pass_predcom (gcc::context *ctxt)
3395{
3396 return new pass_predcom (ctxt);
3397}
3398
3399