]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-predcom.c
* config/microblaze/microblaze.c (microblaze_expand_block_move): Treat
[thirdparty/gcc.git] / gcc / tree-predcom.c
CommitLineData
ad4a85ad 1/* Predictive commoning.
fbd26352 2 Copyright (C) 2005-2019 Free Software Foundation, Inc.
48e1416a 3
ad4a85ad 4This file is part of GCC.
48e1416a 5
ad4a85ad 6GCC is free software; you can redistribute it and/or modify it
7under the terms of the GNU General Public License as published by the
8c4c00c1 8Free Software Foundation; either version 3, or (at your option) any
ad4a85ad 9later version.
48e1416a 10
ad4a85ad 11GCC is distributed in the hope that it will be useful, but WITHOUT
12ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14for more details.
48e1416a 15
ad4a85ad 16You should have received a copy of the GNU General Public License
8c4c00c1 17along with GCC; see the file COPYING3. If not see
18<http://www.gnu.org/licenses/>. */
ad4a85ad 19
20/* This file implements the predictive commoning optimization. Predictive
21 commoning can be viewed as CSE around a loop, and with some improvements,
22 as generalized strength reduction-- i.e., reusing values computed in
23 earlier iterations of a loop in the later ones. So far, the pass only
24 handles the most useful case, that is, reusing values of memory references.
25 If you think this is all just a special case of PRE, you are sort of right;
26 however, concentrating on loops is simpler, and makes it possible to
27 incorporate data dependence analysis to detect the opportunities, perform
28 loop unrolling to avoid copies together with renaming immediately,
29 and if needed, we could also take register pressure into account.
30
31 Let us demonstrate what is done on an example:
48e1416a 32
ad4a85ad 33 for (i = 0; i < 100; i++)
34 {
35 a[i+2] = a[i] + a[i+1];
36 b[10] = b[10] + i;
37 c[i] = c[99 - i];
38 d[i] = d[i + 1];
39 }
40
41 1) We find data references in the loop, and split them to mutually
42 independent groups (i.e., we find components of a data dependence
43 graph). We ignore read-read dependences whose distance is not constant.
44 (TODO -- we could also ignore antidependences). In this example, we
45 find the following groups:
46
47 a[i]{read}, a[i+1]{read}, a[i+2]{write}
48 b[10]{read}, b[10]{write}
49 c[99 - i]{read}, c[i]{write}
50 d[i + 1]{read}, d[i]{write}
51
52 2) Inside each of the group, we verify several conditions:
53 a) all the references must differ in indices only, and the indices
54 must all have the same step
55 b) the references must dominate loop latch (and thus, they must be
56 ordered by dominance relation).
57 c) the distance of the indices must be a small multiple of the step
58 We are then able to compute the difference of the references (# of
59 iterations before they point to the same place as the first of them).
60 Also, in case there are writes in the loop, we split the groups into
61 chains whose head is the write whose values are used by the reads in
62 the same chain. The chains are then processed independently,
63 making the further transformations simpler. Also, the shorter chains
64 need the same number of registers, but may require lower unrolling
65 factor in order to get rid of the copies on the loop latch.
48e1416a 66
ad4a85ad 67 In our example, we get the following chains (the chain for c is invalid).
68
69 a[i]{read,+0}, a[i+1]{read,-1}, a[i+2]{write,-2}
70 b[10]{read,+0}, b[10]{write,+0}
71 d[i + 1]{read,+0}, d[i]{write,+1}
72
73 3) For each read, we determine the read or write whose value it reuses,
74 together with the distance of this reuse. I.e. we take the last
75 reference before it with distance 0, or the last of the references
76 with the smallest positive distance to the read. Then, we remove
77 the references that are not used in any of these chains, discard the
78 empty groups, and propagate all the links so that they point to the
48e1416a 79 single root reference of the chain (adjusting their distance
ad4a85ad 80 appropriately). Some extra care needs to be taken for references with
81 step 0. In our example (the numbers indicate the distance of the
82 reuse),
83
84 a[i] --> (*) 2, a[i+1] --> (*) 1, a[i+2] (*)
85 b[10] --> (*) 1, b[10] (*)
86
87 4) The chains are combined together if possible. If the corresponding
88 elements of two chains are always combined together with the same
89 operator, we remember just the result of this combination, instead
90 of remembering the values separately. We may need to perform
91 reassociation to enable combining, for example
92
93 e[i] + f[i+1] + e[i+1] + f[i]
94
95 can be reassociated as
96
97 (e[i] + f[i]) + (e[i+1] + f[i+1])
98
99 and we can combine the chains for e and f into one chain.
100
101 5) For each root reference (end of the chain) R, let N be maximum distance
9d75589a 102 of a reference reusing its value. Variables R0 up to RN are created,
ad4a85ad 103 together with phi nodes that transfer values from R1 .. RN to
104 R0 .. R(N-1).
105 Initial values are loaded to R0..R(N-1) (in case not all references
106 must necessarily be accessed and they may trap, we may fail here;
107 TODO sometimes, the loads could be guarded by a check for the number
108 of iterations). Values loaded/stored in roots are also copied to
109 RN. Other reads are replaced with the appropriate variable Ri.
110 Everything is put to SSA form.
111
112 As a small improvement, if R0 is dead after the root (i.e., all uses of
113 the value with the maximum distance dominate the root), we can avoid
114 creating RN and use R0 instead of it.
115
116 In our example, we get (only the parts concerning a and b are shown):
117 for (i = 0; i < 100; i++)
118 {
119 f = phi (a[0], s);
120 s = phi (a[1], f);
121 x = phi (b[10], x);
122
123 f = f + s;
124 a[i+2] = f;
125 x = x + i;
126 b[10] = x;
127 }
128
129 6) Factor F for unrolling is determined as the smallest common multiple of
130 (N + 1) for each root reference (N for references for that we avoided
131 creating RN). If F and the loop is small enough, loop is unrolled F
132 times. The stores to RN (R0) in the copies of the loop body are
133 periodically replaced with R0, R1, ... (R1, R2, ...), so that they can
134 be coalesced and the copies can be eliminated.
48e1416a 135
ad4a85ad 136 TODO -- copy propagation and other optimizations may change the live
137 ranges of the temporary registers and prevent them from being coalesced;
138 this may increase the register pressure.
139
140 In our case, F = 2 and the (main loop of the) result is
141
142 for (i = 0; i < ...; i += 2)
143 {
144 f = phi (a[0], f);
145 s = phi (a[1], s);
146 x = phi (b[10], x);
147
148 f = f + s;
149 a[i+2] = f;
150 x = x + i;
151 b[10] = x;
152
153 s = s + f;
154 a[i+3] = s;
155 x = x + i;
156 b[10] = x;
157 }
158
a652753d 159 Apart from predictive commoning on Load-Load and Store-Load chains, we
160 also support Store-Store chains -- stores killed by other store can be
161 eliminated. Given below example:
162
163 for (i = 0; i < n; i++)
164 {
165 a[i] = 1;
166 a[i+2] = 2;
167 }
ad4a85ad 168
a652753d 169 It can be replaced with:
ad4a85ad 170
a652753d 171 t0 = a[0];
172 t1 = a[1];
173 for (i = 0; i < n; i++)
174 {
175 a[i] = 1;
176 t2 = 2;
177 t0 = t1;
178 t1 = t2;
179 }
180 a[n] = t0;
181 a[n+1] = t1;
ad4a85ad 182
a652753d 183 If the loop runs more than 1 iterations, it can be further simplified into:
184
185 for (i = 0; i < n; i++)
186 {
187 a[i] = 1;
188 }
189 a[n] = 2;
190 a[n+1] = 2;
191
192 The interesting part is this can be viewed either as general store motion
193 or general dead store elimination in either intra/inter-iterations way.
194
83c06cf0 195 With trivial effort, we also support load inside Store-Store chains if the
196 load is dominated by a store statement in the same iteration of loop. You
197 can see this as a restricted Store-Mixed-Load-Store chain.
198
a652753d 199 TODO: For now, we don't support store-store chains in multi-exit loops. We
200 force to not unroll in case of store-store chain even if other chains might
201 ask for unroll.
ad4a85ad 202
203 Predictive commoning can be generalized for arbitrary computations (not
204 just memory loads), and also nontrivial transfer functions (e.g., replacing
205 i * i with ii_last + 2 * i + 1), to generalize strength reduction. */
206
207#include "config.h"
208#include "system.h"
209#include "coretypes.h"
9ef16211 210#include "backend.h"
7c29e30e 211#include "rtl.h"
ad4a85ad 212#include "tree.h"
9ef16211 213#include "gimple.h"
7c29e30e 214#include "predict.h"
215#include "tree-pass.h"
9ef16211 216#include "ssa.h"
7c29e30e 217#include "gimple-pretty-print.h"
9ef16211 218#include "alias.h"
b20a8bb4 219#include "fold-const.h"
ad4a85ad 220#include "cfgloop.h"
bc61cadb 221#include "tree-eh.h"
a8783bee 222#include "gimplify.h"
dcf1a1ec 223#include "gimple-iterator.h"
e795d6e1 224#include "gimplify-me.h"
05d9c18a 225#include "tree-ssa-loop-ivopts.h"
226#include "tree-ssa-loop-manip.h"
227#include "tree-ssa-loop-niter.h"
073c1fd5 228#include "tree-ssa-loop.h"
229#include "tree-into-ssa.h"
230#include "tree-dfa.h"
69ee5dbb 231#include "tree-ssa.h"
ad4a85ad 232#include "tree-data-ref.h"
233#include "tree-scalar-evolution.h"
ad4a85ad 234#include "params.h"
ad4a85ad 235#include "tree-affine.h"
40395983 236#include "builtins.h"
ad4a85ad 237
238/* The maximum number of iterations between the considered memory
239 references. */
240
241#define MAX_DISTANCE (target_avail_regs < 16 ? 4 : 8)
48e1416a 242
75a70cf9 243/* Data references (or phi nodes that carry data reference values across
244 loop iterations). */
ad4a85ad 245
26dbec0a 246typedef struct dref_d
ad4a85ad 247{
248 /* The reference itself. */
249 struct data_reference *ref;
250
251 /* The statement in that the reference appears. */
42acab1c 252 gimple *stmt;
75a70cf9 253
254 /* In case that STMT is a phi node, this field is set to the SSA name
255 defined by it in replace_phis_by_defined_names (in order to avoid
256 pointing to phi node that got reallocated in the meantime). */
257 tree name_defined_by_phi;
ad4a85ad 258
259 /* Distance of the reference from the root of the chain (in number of
260 iterations of the loop). */
261 unsigned distance;
262
263 /* Number of iterations offset from the first reference in the component. */
5de9d3ed 264 widest_int offset;
ad4a85ad 265
266 /* Number of the reference in a component, in dominance ordering. */
267 unsigned pos;
268
269 /* True if the memory reference is always accessed when the loop is
270 entered. */
271 unsigned always_accessed : 1;
272} *dref;
273
ad4a85ad 274
275/* Type of the chain of the references. */
276
277enum chain_type
278{
279 /* The addresses of the references in the chain are constant. */
280 CT_INVARIANT,
281
282 /* There are only loads in the chain. */
283 CT_LOAD,
284
285 /* Root of the chain is store, the rest are loads. */
286 CT_STORE_LOAD,
287
a652753d 288 /* There are only stores in the chain. */
289 CT_STORE_STORE,
290
ad4a85ad 291 /* A combination of two chains. */
292 CT_COMBINATION
293};
294
295/* Chains of data references. */
296
297typedef struct chain
298{
299 /* Type of the chain. */
300 enum chain_type type;
301
302 /* For combination chains, the operator and the two chains that are
303 combined, and the type of the result. */
f4e36c33 304 enum tree_code op;
ad4a85ad 305 tree rslt_type;
306 struct chain *ch1, *ch2;
307
308 /* The references in the chain. */
f1f41a6c 309 vec<dref> refs;
ad4a85ad 310
311 /* The maximum distance of the reference in the chain from the root. */
312 unsigned length;
313
314 /* The variables used to copy the value throughout iterations. */
f1f41a6c 315 vec<tree> vars;
ad4a85ad 316
317 /* Initializers for the variables. */
f1f41a6c 318 vec<tree> inits;
ad4a85ad 319
a652753d 320 /* Finalizers for the eliminated stores. */
321 vec<tree> finis;
322
a481acc5 323 /* gimple stmts intializing the initial variables of the chain. */
324 gimple_seq init_seq;
325
a652753d 326 /* gimple stmts finalizing the eliminated stores of the chain. */
327 gimple_seq fini_seq;
328
ad4a85ad 329 /* True if there is a use of a variable with the maximal distance
330 that comes after the root in the loop. */
331 unsigned has_max_use_after : 1;
332
333 /* True if all the memory references in the chain are always accessed. */
334 unsigned all_always_accessed : 1;
335
336 /* True if this chain was combined together with some other chain. */
337 unsigned combined : 1;
e33b3a13 338
339 /* True if this is store elimination chain and eliminated stores store
340 loop invariant value into memory. */
341 unsigned inv_store_elimination : 1;
ad4a85ad 342} *chain_p;
343
ad4a85ad 344
345/* Describes the knowledge about the step of the memory references in
346 the component. */
347
348enum ref_step_type
349{
350 /* The step is zero. */
351 RS_INVARIANT,
352
353 /* The step is nonzero. */
354 RS_NONZERO,
355
356 /* The step may or may not be nonzero. */
357 RS_ANY
358};
359
360/* Components of the data dependence graph. */
361
362struct component
363{
364 /* The references in the component. */
f1f41a6c 365 vec<dref> refs;
ad4a85ad 366
367 /* What we know about the step of the references in the component. */
368 enum ref_step_type comp_step;
369
a652753d 370 /* True if all references in component are stores and we try to do
371 intra/inter loop iteration dead store elimination. */
372 bool eliminate_store_p;
373
ad4a85ad 374 /* Next component in the list. */
375 struct component *next;
376};
377
378/* Bitmap of ssa names defined by looparound phi nodes covered by chains. */
379
380static bitmap looparound_phis;
381
382/* Cache used by tree_to_aff_combination_expand. */
383
5f8841a5 384static hash_map<tree, name_expansion *> *name_expansions;
ad4a85ad 385
386/* Dumps data reference REF to FILE. */
387
388extern void dump_dref (FILE *, dref);
389void
390dump_dref (FILE *file, dref ref)
391{
392 if (ref->ref)
393 {
394 fprintf (file, " ");
395 print_generic_expr (file, DR_REF (ref->ref), TDF_SLIM);
396 fprintf (file, " (id %u%s)\n", ref->pos,
397 DR_IS_READ (ref->ref) ? "" : ", write");
398
399 fprintf (file, " offset ");
e913b5cd 400 print_decs (ref->offset, file);
ad4a85ad 401 fprintf (file, "\n");
402
403 fprintf (file, " distance %u\n", ref->distance);
404 }
405 else
406 {
75a70cf9 407 if (gimple_code (ref->stmt) == GIMPLE_PHI)
ad4a85ad 408 fprintf (file, " looparound ref\n");
409 else
410 fprintf (file, " combination ref\n");
411 fprintf (file, " in statement ");
75a70cf9 412 print_gimple_stmt (file, ref->stmt, 0, TDF_SLIM);
ad4a85ad 413 fprintf (file, "\n");
414 fprintf (file, " distance %u\n", ref->distance);
415 }
416
417}
418
419/* Dumps CHAIN to FILE. */
420
421extern void dump_chain (FILE *, chain_p);
422void
423dump_chain (FILE *file, chain_p chain)
424{
425 dref a;
426 const char *chain_type;
427 unsigned i;
428 tree var;
429
430 switch (chain->type)
431 {
432 case CT_INVARIANT:
433 chain_type = "Load motion";
434 break;
435
436 case CT_LOAD:
437 chain_type = "Loads-only";
438 break;
439
440 case CT_STORE_LOAD:
441 chain_type = "Store-loads";
442 break;
443
a652753d 444 case CT_STORE_STORE:
445 chain_type = "Store-stores";
446 break;
447
ad4a85ad 448 case CT_COMBINATION:
449 chain_type = "Combination";
450 break;
451
452 default:
453 gcc_unreachable ();
454 }
455
456 fprintf (file, "%s chain %p%s\n", chain_type, (void *) chain,
457 chain->combined ? " (combined)" : "");
458 if (chain->type != CT_INVARIANT)
459 fprintf (file, " max distance %u%s\n", chain->length,
460 chain->has_max_use_after ? "" : ", may reuse first");
461
462 if (chain->type == CT_COMBINATION)
463 {
464 fprintf (file, " equal to %p %s %p in type ",
f4e36c33 465 (void *) chain->ch1, op_symbol_code (chain->op),
ad4a85ad 466 (void *) chain->ch2);
467 print_generic_expr (file, chain->rslt_type, TDF_SLIM);
468 fprintf (file, "\n");
469 }
470
f1f41a6c 471 if (chain->vars.exists ())
ad4a85ad 472 {
473 fprintf (file, " vars");
f1f41a6c 474 FOR_EACH_VEC_ELT (chain->vars, i, var)
ad4a85ad 475 {
476 fprintf (file, " ");
477 print_generic_expr (file, var, TDF_SLIM);
478 }
479 fprintf (file, "\n");
480 }
481
f1f41a6c 482 if (chain->inits.exists ())
ad4a85ad 483 {
484 fprintf (file, " inits");
f1f41a6c 485 FOR_EACH_VEC_ELT (chain->inits, i, var)
ad4a85ad 486 {
487 fprintf (file, " ");
488 print_generic_expr (file, var, TDF_SLIM);
489 }
490 fprintf (file, "\n");
491 }
492
493 fprintf (file, " references:\n");
f1f41a6c 494 FOR_EACH_VEC_ELT (chain->refs, i, a)
ad4a85ad 495 dump_dref (file, a);
496
497 fprintf (file, "\n");
498}
499
500/* Dumps CHAINS to FILE. */
501
f1f41a6c 502extern void dump_chains (FILE *, vec<chain_p> );
ad4a85ad 503void
f1f41a6c 504dump_chains (FILE *file, vec<chain_p> chains)
ad4a85ad 505{
506 chain_p chain;
507 unsigned i;
508
f1f41a6c 509 FOR_EACH_VEC_ELT (chains, i, chain)
ad4a85ad 510 dump_chain (file, chain);
511}
512
513/* Dumps COMP to FILE. */
514
515extern void dump_component (FILE *, struct component *);
516void
517dump_component (FILE *file, struct component *comp)
518{
519 dref a;
520 unsigned i;
521
522 fprintf (file, "Component%s:\n",
523 comp->comp_step == RS_INVARIANT ? " (invariant)" : "");
f1f41a6c 524 FOR_EACH_VEC_ELT (comp->refs, i, a)
ad4a85ad 525 dump_dref (file, a);
526 fprintf (file, "\n");
527}
528
529/* Dumps COMPS to FILE. */
530
531extern void dump_components (FILE *, struct component *);
532void
533dump_components (FILE *file, struct component *comps)
534{
535 struct component *comp;
536
537 for (comp = comps; comp; comp = comp->next)
538 dump_component (file, comp);
539}
540
541/* Frees a chain CHAIN. */
542
543static void
544release_chain (chain_p chain)
545{
546 dref ref;
547 unsigned i;
548
549 if (chain == NULL)
550 return;
551
f1f41a6c 552 FOR_EACH_VEC_ELT (chain->refs, i, ref)
ad4a85ad 553 free (ref);
554
f1f41a6c 555 chain->refs.release ();
556 chain->vars.release ();
557 chain->inits.release ();
a481acc5 558 if (chain->init_seq)
559 gimple_seq_discard (chain->init_seq);
ad4a85ad 560
a652753d 561 chain->finis.release ();
562 if (chain->fini_seq)
563 gimple_seq_discard (chain->fini_seq);
564
ad4a85ad 565 free (chain);
566}
567
568/* Frees CHAINS. */
569
570static void
f1f41a6c 571release_chains (vec<chain_p> chains)
ad4a85ad 572{
573 unsigned i;
574 chain_p chain;
575
f1f41a6c 576 FOR_EACH_VEC_ELT (chains, i, chain)
ad4a85ad 577 release_chain (chain);
f1f41a6c 578 chains.release ();
ad4a85ad 579}
580
581/* Frees a component COMP. */
582
583static void
584release_component (struct component *comp)
585{
f1f41a6c 586 comp->refs.release ();
ad4a85ad 587 free (comp);
588}
589
590/* Frees list of components COMPS. */
591
592static void
593release_components (struct component *comps)
594{
595 struct component *act, *next;
596
597 for (act = comps; act; act = next)
598 {
599 next = act->next;
600 release_component (act);
601 }
602}
603
604/* Finds a root of tree given by FATHERS containing A, and performs path
605 shortening. */
606
607static unsigned
608component_of (unsigned fathers[], unsigned a)
609{
610 unsigned root, n;
611
612 for (root = a; root != fathers[root]; root = fathers[root])
613 continue;
614
615 for (; a != root; a = n)
616 {
617 n = fathers[a];
618 fathers[a] = root;
619 }
620
621 return root;
622}
623
624/* Join operation for DFU. FATHERS gives the tree, SIZES are sizes of the
625 components, A and B are components to merge. */
626
627static void
628merge_comps (unsigned fathers[], unsigned sizes[], unsigned a, unsigned b)
629{
630 unsigned ca = component_of (fathers, a);
631 unsigned cb = component_of (fathers, b);
632
633 if (ca == cb)
634 return;
635
636 if (sizes[ca] < sizes[cb])
637 {
638 sizes[cb] += sizes[ca];
639 fathers[ca] = cb;
640 }
641 else
642 {
643 sizes[ca] += sizes[cb];
644 fathers[cb] = ca;
645 }
646}
647
648/* Returns true if A is a reference that is suitable for predictive commoning
649 in the innermost loop that contains it. REF_STEP is set according to the
650 step of the reference A. */
651
652static bool
653suitable_reference_p (struct data_reference *a, enum ref_step_type *ref_step)
654{
655 tree ref = DR_REF (a), step = DR_STEP (a);
656
657 if (!step
450c0971 658 || TREE_THIS_VOLATILE (ref)
154edec0 659 || !is_gimple_reg_type (TREE_TYPE (ref))
660 || tree_could_throw_p (ref))
ad4a85ad 661 return false;
662
663 if (integer_zerop (step))
664 *ref_step = RS_INVARIANT;
665 else if (integer_nonzerop (step))
666 *ref_step = RS_NONZERO;
667 else
668 *ref_step = RS_ANY;
669
670 return true;
671}
672
673/* Stores DR_OFFSET (DR) + DR_INIT (DR) to OFFSET. */
674
675static void
676aff_combination_dr_offset (struct data_reference *dr, aff_tree *offset)
677{
a0553bff 678 tree type = TREE_TYPE (DR_OFFSET (dr));
ad4a85ad 679 aff_tree delta;
680
a0553bff 681 tree_to_aff_combination_expand (DR_OFFSET (dr), type, offset,
ad4a85ad 682 &name_expansions);
c4d25d8a 683 aff_combination_const (&delta, type, wi::to_poly_widest (DR_INIT (dr)));
ad4a85ad 684 aff_combination_add (offset, &delta);
685}
686
687/* Determines number of iterations of the innermost enclosing loop before B
688 refers to exactly the same location as A and stores it to OFF. If A and
689 B do not have the same step, they never meet, or anything else fails,
690 returns false, otherwise returns true. Both A and B are assumed to
691 satisfy suitable_reference_p. */
692
693static bool
694determine_offset (struct data_reference *a, struct data_reference *b,
1aeea61f 695 poly_widest_int *off)
ad4a85ad 696{
697 aff_tree diff, baseb, step;
ca4882a2 698 tree typea, typeb;
699
700 /* Check that both the references access the location in the same type. */
701 typea = TREE_TYPE (DR_REF (a));
702 typeb = TREE_TYPE (DR_REF (b));
548044d8 703 if (!useless_type_conversion_p (typeb, typea))
ca4882a2 704 return false;
ad4a85ad 705
706 /* Check whether the base address and the step of both references is the
707 same. */
708 if (!operand_equal_p (DR_STEP (a), DR_STEP (b), 0)
709 || !operand_equal_p (DR_BASE_ADDRESS (a), DR_BASE_ADDRESS (b), 0))
710 return false;
711
712 if (integer_zerop (DR_STEP (a)))
713 {
714 /* If the references have loop invariant address, check that they access
715 exactly the same location. */
e913b5cd 716 *off = 0;
ad4a85ad 717 return (operand_equal_p (DR_OFFSET (a), DR_OFFSET (b), 0)
718 && operand_equal_p (DR_INIT (a), DR_INIT (b), 0));
719 }
720
721 /* Compare the offsets of the addresses, and check whether the difference
722 is a multiple of step. */
723 aff_combination_dr_offset (a, &diff);
724 aff_combination_dr_offset (b, &baseb);
e913b5cd 725 aff_combination_scale (&baseb, -1);
ad4a85ad 726 aff_combination_add (&diff, &baseb);
727
a0553bff 728 tree_to_aff_combination_expand (DR_STEP (a), TREE_TYPE (DR_STEP (a)),
ad4a85ad 729 &step, &name_expansions);
730 return aff_combination_constant_multiple_p (&diff, &step, off);
731}
732
733/* Returns the last basic block in LOOP for that we are sure that
734 it is executed whenever the loop is entered. */
735
736static basic_block
737last_always_executed_block (struct loop *loop)
738{
739 unsigned i;
f1f41a6c 740 vec<edge> exits = get_loop_exit_edges (loop);
ad4a85ad 741 edge ex;
742 basic_block last = loop->latch;
743
f1f41a6c 744 FOR_EACH_VEC_ELT (exits, i, ex)
ad4a85ad 745 last = nearest_common_dominator (CDI_DOMINATORS, last, ex->src);
f1f41a6c 746 exits.release ();
ad4a85ad 747
748 return last;
749}
750
751/* Splits dependence graph on DATAREFS described by DEPENDS to components. */
752
753static struct component *
754split_data_refs_to_components (struct loop *loop,
f1f41a6c 755 vec<data_reference_p> datarefs,
756 vec<ddr_p> depends)
ad4a85ad 757{
f1f41a6c 758 unsigned i, n = datarefs.length ();
ad4a85ad 759 unsigned ca, ia, ib, bad;
760 unsigned *comp_father = XNEWVEC (unsigned, n + 1);
761 unsigned *comp_size = XNEWVEC (unsigned, n + 1);
762 struct component **comps;
763 struct data_reference *dr, *dra, *drb;
764 struct data_dependence_relation *ddr;
765 struct component *comp_list = NULL, *comp;
766 dref dataref;
a652753d 767 /* Don't do store elimination if loop has multiple exit edges. */
768 bool eliminate_store_p = single_exit (loop) != NULL;
ad4a85ad 769 basic_block last_always_executed = last_always_executed_block (loop);
48e1416a 770
f1f41a6c 771 FOR_EACH_VEC_ELT (datarefs, i, dr)
ad4a85ad 772 {
773 if (!DR_REF (dr))
774 {
775 /* A fake reference for call or asm_expr that may clobber memory;
776 just fail. */
777 goto end;
778 }
c71d3c24 779 /* predcom pass isn't prepared to handle calls with data references. */
780 if (is_gimple_call (DR_STMT (dr)))
781 goto end;
5c205353 782 dr->aux = (void *) (size_t) i;
ad4a85ad 783 comp_father[i] = i;
784 comp_size[i] = 1;
785 }
786
787 /* A component reserved for the "bad" data references. */
788 comp_father[n] = n;
789 comp_size[n] = 1;
790
f1f41a6c 791 FOR_EACH_VEC_ELT (datarefs, i, dr)
ad4a85ad 792 {
793 enum ref_step_type dummy;
794
795 if (!suitable_reference_p (dr, &dummy))
796 {
5c205353 797 ia = (unsigned) (size_t) dr->aux;
ad4a85ad 798 merge_comps (comp_father, comp_size, n, ia);
799 }
800 }
801
f1f41a6c 802 FOR_EACH_VEC_ELT (depends, i, ddr)
ad4a85ad 803 {
1aeea61f 804 poly_widest_int dummy_off;
ad4a85ad 805
806 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
807 continue;
808
809 dra = DDR_A (ddr);
810 drb = DDR_B (ddr);
a652753d 811
812 /* Don't do store elimination if there is any unknown dependence for
813 any store data reference. */
814 if ((DR_IS_WRITE (dra) || DR_IS_WRITE (drb))
815 && (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know
816 || DDR_NUM_DIST_VECTS (ddr) == 0))
817 eliminate_store_p = false;
818
5c205353 819 ia = component_of (comp_father, (unsigned) (size_t) dra->aux);
820 ib = component_of (comp_father, (unsigned) (size_t) drb->aux);
ad4a85ad 821 if (ia == ib)
822 continue;
823
824 bad = component_of (comp_father, n);
825
826 /* If both A and B are reads, we may ignore unsuitable dependences. */
19f2ced2 827 if (DR_IS_READ (dra) && DR_IS_READ (drb))
828 {
829 if (ia == bad || ib == bad
830 || !determine_offset (dra, drb, &dummy_off))
831 continue;
832 }
833 /* If A is read and B write or vice versa and there is unsuitable
834 dependence, instead of merging both components into a component
835 that will certainly not pass suitable_component_p, just put the
836 read into bad component, perhaps at least the write together with
837 all the other data refs in it's component will be optimizable. */
838 else if (DR_IS_READ (dra) && ib != bad)
839 {
840 if (ia == bad)
841 continue;
842 else if (!determine_offset (dra, drb, &dummy_off))
843 {
844 merge_comps (comp_father, comp_size, bad, ia);
845 continue;
846 }
847 }
848 else if (DR_IS_READ (drb) && ia != bad)
849 {
850 if (ib == bad)
851 continue;
852 else if (!determine_offset (dra, drb, &dummy_off))
853 {
854 merge_comps (comp_father, comp_size, bad, ib);
855 continue;
856 }
857 }
a652753d 858 else if (DR_IS_WRITE (dra) && DR_IS_WRITE (drb)
859 && ia != bad && ib != bad
860 && !determine_offset (dra, drb, &dummy_off))
861 {
862 merge_comps (comp_father, comp_size, bad, ia);
863 merge_comps (comp_father, comp_size, bad, ib);
864 continue;
865 }
48e1416a 866
ad4a85ad 867 merge_comps (comp_father, comp_size, ia, ib);
868 }
869
a652753d 870 if (eliminate_store_p)
871 {
872 tree niters = number_of_latch_executions (loop);
873
874 /* Don't do store elimination if niters info is unknown because stores
875 in the last iteration can't be eliminated and we need to recover it
876 after loop. */
877 eliminate_store_p = (niters != NULL_TREE && niters != chrec_dont_know);
878 }
879
ad4a85ad 880 comps = XCNEWVEC (struct component *, n);
881 bad = component_of (comp_father, n);
f1f41a6c 882 FOR_EACH_VEC_ELT (datarefs, i, dr)
ad4a85ad 883 {
5c205353 884 ia = (unsigned) (size_t) dr->aux;
ad4a85ad 885 ca = component_of (comp_father, ia);
886 if (ca == bad)
887 continue;
888
889 comp = comps[ca];
890 if (!comp)
891 {
892 comp = XCNEW (struct component);
f1f41a6c 893 comp->refs.create (comp_size[ca]);
a652753d 894 comp->eliminate_store_p = eliminate_store_p;
ad4a85ad 895 comps[ca] = comp;
896 }
897
26dbec0a 898 dataref = XCNEW (struct dref_d);
ad4a85ad 899 dataref->ref = dr;
900 dataref->stmt = DR_STMT (dr);
e913b5cd 901 dataref->offset = 0;
ad4a85ad 902 dataref->distance = 0;
903
904 dataref->always_accessed
905 = dominated_by_p (CDI_DOMINATORS, last_always_executed,
75a70cf9 906 gimple_bb (dataref->stmt));
f1f41a6c 907 dataref->pos = comp->refs.length ();
908 comp->refs.quick_push (dataref);
ad4a85ad 909 }
910
911 for (i = 0; i < n; i++)
912 {
913 comp = comps[i];
914 if (comp)
915 {
916 comp->next = comp_list;
917 comp_list = comp;
918 }
919 }
920 free (comps);
921
922end:
923 free (comp_father);
924 free (comp_size);
925 return comp_list;
926}
927
928/* Returns true if the component COMP satisfies the conditions
310d2511 929 described in 2) at the beginning of this file. LOOP is the current
ad4a85ad 930 loop. */
48e1416a 931
ad4a85ad 932static bool
933suitable_component_p (struct loop *loop, struct component *comp)
934{
935 unsigned i;
936 dref a, first;
937 basic_block ba, bp = loop->header;
938 bool ok, has_write = false;
939
f1f41a6c 940 FOR_EACH_VEC_ELT (comp->refs, i, a)
ad4a85ad 941 {
75a70cf9 942 ba = gimple_bb (a->stmt);
ad4a85ad 943
944 if (!just_once_each_iteration_p (loop, ba))
945 return false;
946
947 gcc_assert (dominated_by_p (CDI_DOMINATORS, ba, bp));
948 bp = ba;
949
9ff25603 950 if (DR_IS_WRITE (a->ref))
ad4a85ad 951 has_write = true;
952 }
953
f1f41a6c 954 first = comp->refs[0];
ad4a85ad 955 ok = suitable_reference_p (first->ref, &comp->comp_step);
956 gcc_assert (ok);
e913b5cd 957 first->offset = 0;
ad4a85ad 958
f1f41a6c 959 for (i = 1; comp->refs.iterate (i, &a); i++)
ad4a85ad 960 {
1aeea61f 961 /* Polynomial offsets are no use, since we need to know the
962 gap between iteration numbers at compile time. */
963 poly_widest_int offset;
964 if (!determine_offset (first->ref, a->ref, &offset)
965 || !offset.is_constant (&a->offset))
ad4a85ad 966 return false;
967
382ecba7 968 enum ref_step_type a_step;
969 gcc_checking_assert (suitable_reference_p (a->ref, &a_step)
970 && a_step == comp->comp_step);
ad4a85ad 971 }
972
973 /* If there is a write inside the component, we must know whether the
974 step is nonzero or not -- we would not otherwise be able to recognize
975 whether the value accessed by reads comes from the OFFSET-th iteration
976 or the previous one. */
977 if (has_write && comp->comp_step == RS_ANY)
978 return false;
979
980 return true;
981}
48e1416a 982
ad4a85ad 983/* Check the conditions on references inside each of components COMPS,
984 and remove the unsuitable components from the list. The new list
985 of components is returned. The conditions are described in 2) at
310d2511 986 the beginning of this file. LOOP is the current loop. */
ad4a85ad 987
988static struct component *
989filter_suitable_components (struct loop *loop, struct component *comps)
990{
991 struct component **comp, *act;
992
993 for (comp = &comps; *comp; )
994 {
995 act = *comp;
996 if (suitable_component_p (loop, act))
997 comp = &act->next;
998 else
999 {
19af51e2 1000 dref ref;
1001 unsigned i;
1002
ad4a85ad 1003 *comp = act->next;
f1f41a6c 1004 FOR_EACH_VEC_ELT (act->refs, i, ref)
19af51e2 1005 free (ref);
ad4a85ad 1006 release_component (act);
1007 }
1008 }
1009
1010 return comps;
1011}
1012
1013/* Compares two drefs A and B by their offset and position. Callback for
1014 qsort. */
1015
1016static int
1017order_drefs (const void *a, const void *b)
1018{
45ba1503 1019 const dref *const da = (const dref *) a;
1020 const dref *const db = (const dref *) b;
796b6678 1021 int offcmp = wi::cmps ((*da)->offset, (*db)->offset);
ad4a85ad 1022
1023 if (offcmp != 0)
1024 return offcmp;
1025
1026 return (*da)->pos - (*db)->pos;
1027}
1028
e02e28c4 1029/* Compares two drefs A and B by their position. Callback for qsort. */
1030
1031static int
1032order_drefs_by_pos (const void *a, const void *b)
1033{
1034 const dref *const da = (const dref *) a;
1035 const dref *const db = (const dref *) b;
1036
1037 return (*da)->pos - (*db)->pos;
1038}
1039
ad4a85ad 1040/* Returns root of the CHAIN. */
1041
1042static inline dref
1043get_chain_root (chain_p chain)
1044{
f1f41a6c 1045 return chain->refs[0];
ad4a85ad 1046}
1047
83c06cf0 1048/* Given CHAIN, returns the last write ref at DISTANCE, or NULL if it doesn't
a652753d 1049 exist. */
1050
1051static inline dref
83c06cf0 1052get_chain_last_write_at (chain_p chain, unsigned distance)
a652753d 1053{
83c06cf0 1054 for (unsigned i = chain->refs.length (); i > 0; i--)
1055 if (DR_IS_WRITE (chain->refs[i - 1]->ref)
1056 && distance == chain->refs[i - 1]->distance)
1057 return chain->refs[i - 1];
a652753d 1058
83c06cf0 1059 return NULL;
1060}
1061
1062/* Given CHAIN, returns the last write ref with the same distance before load
1063 at index LOAD_IDX, or NULL if it doesn't exist. */
1064
1065static inline dref
1066get_chain_last_write_before_load (chain_p chain, unsigned load_idx)
1067{
1068 gcc_assert (load_idx < chain->refs.length ());
1069
1070 unsigned distance = chain->refs[load_idx]->distance;
a652753d 1071
83c06cf0 1072 for (unsigned i = load_idx; i > 0; i--)
1073 if (DR_IS_WRITE (chain->refs[i - 1]->ref)
1074 && distance == chain->refs[i - 1]->distance)
1075 return chain->refs[i - 1];
1076
1077 return NULL;
a652753d 1078}
1079
ad4a85ad 1080/* Adds REF to the chain CHAIN. */
1081
1082static void
1083add_ref_to_chain (chain_p chain, dref ref)
1084{
1085 dref root = get_chain_root (chain);
ad4a85ad 1086
796b6678 1087 gcc_assert (wi::les_p (root->offset, ref->offset));
ab2c1de8 1088 widest_int dist = ref->offset - root->offset;
796b6678 1089 gcc_assert (wi::fits_uhwi_p (dist));
ad4a85ad 1090
f1f41a6c 1091 chain->refs.safe_push (ref);
ad4a85ad 1092
cf8f0e63 1093 ref->distance = dist.to_uhwi ();
ad4a85ad 1094
1095 if (ref->distance >= chain->length)
1096 {
1097 chain->length = ref->distance;
1098 chain->has_max_use_after = false;
1099 }
1100
83c06cf0 1101 /* Promote this chain to CT_STORE_STORE if it has multiple stores. */
1102 if (DR_IS_WRITE (ref->ref))
1103 chain->type = CT_STORE_STORE;
1104
c12979d1 1105 /* Don't set the flag for store-store chain since there is no use. */
1106 if (chain->type != CT_STORE_STORE
1107 && ref->distance == chain->length
ad4a85ad 1108 && ref->pos > root->pos)
1109 chain->has_max_use_after = true;
1110
1111 chain->all_always_accessed &= ref->always_accessed;
1112}
1113
1114/* Returns the chain for invariant component COMP. */
1115
1116static chain_p
1117make_invariant_chain (struct component *comp)
1118{
1119 chain_p chain = XCNEW (struct chain);
1120 unsigned i;
1121 dref ref;
1122
1123 chain->type = CT_INVARIANT;
1124
1125 chain->all_always_accessed = true;
1126
f1f41a6c 1127 FOR_EACH_VEC_ELT (comp->refs, i, ref)
ad4a85ad 1128 {
f1f41a6c 1129 chain->refs.safe_push (ref);
ad4a85ad 1130 chain->all_always_accessed &= ref->always_accessed;
1131 }
1132
a652753d 1133 chain->inits = vNULL;
1134 chain->finis = vNULL;
1135
ad4a85ad 1136 return chain;
1137}
1138
a652753d 1139/* Make a new chain of type TYPE rooted at REF. */
ad4a85ad 1140
1141static chain_p
a652753d 1142make_rooted_chain (dref ref, enum chain_type type)
ad4a85ad 1143{
1144 chain_p chain = XCNEW (struct chain);
1145
a652753d 1146 chain->type = type;
f1f41a6c 1147 chain->refs.safe_push (ref);
ad4a85ad 1148 chain->all_always_accessed = ref->always_accessed;
ad4a85ad 1149 ref->distance = 0;
1150
a652753d 1151 chain->inits = vNULL;
1152 chain->finis = vNULL;
1153
ad4a85ad 1154 return chain;
1155}
1156
1157/* Returns true if CHAIN is not trivial. */
1158
1159static bool
1160nontrivial_chain_p (chain_p chain)
1161{
f1f41a6c 1162 return chain != NULL && chain->refs.length () > 1;
ad4a85ad 1163}
1164
1165/* Returns the ssa name that contains the value of REF, or NULL_TREE if there
1166 is no such name. */
1167
1168static tree
1169name_for_ref (dref ref)
1170{
1171 tree name;
1172
75a70cf9 1173 if (is_gimple_assign (ref->stmt))
ad4a85ad 1174 {
1175 if (!ref->ref || DR_IS_READ (ref->ref))
75a70cf9 1176 name = gimple_assign_lhs (ref->stmt);
ad4a85ad 1177 else
75a70cf9 1178 name = gimple_assign_rhs1 (ref->stmt);
ad4a85ad 1179 }
1180 else
1181 name = PHI_RESULT (ref->stmt);
1182
1183 return (TREE_CODE (name) == SSA_NAME ? name : NULL_TREE);
1184}
1185
1186/* Returns true if REF is a valid initializer for ROOT with given DISTANCE (in
1187 iterations of the innermost enclosing loop). */
1188
1189static bool
1190valid_initializer_p (struct data_reference *ref,
1191 unsigned distance, struct data_reference *root)
1192{
1193 aff_tree diff, base, step;
1aeea61f 1194 poly_widest_int off;
ad4a85ad 1195
ad4a85ad 1196 /* Both REF and ROOT must be accessing the same object. */
1197 if (!operand_equal_p (DR_BASE_ADDRESS (ref), DR_BASE_ADDRESS (root), 0))
1198 return false;
1199
1200 /* The initializer is defined outside of loop, hence its address must be
1201 invariant inside the loop. */
1202 gcc_assert (integer_zerop (DR_STEP (ref)));
1203
1204 /* If the address of the reference is invariant, initializer must access
1205 exactly the same location. */
1206 if (integer_zerop (DR_STEP (root)))
1207 return (operand_equal_p (DR_OFFSET (ref), DR_OFFSET (root), 0)
1208 && operand_equal_p (DR_INIT (ref), DR_INIT (root), 0));
1209
1210 /* Verify that this index of REF is equal to the root's index at
1211 -DISTANCE-th iteration. */
1212 aff_combination_dr_offset (root, &diff);
1213 aff_combination_dr_offset (ref, &base);
e913b5cd 1214 aff_combination_scale (&base, -1);
ad4a85ad 1215 aff_combination_add (&diff, &base);
1216
a0553bff 1217 tree_to_aff_combination_expand (DR_STEP (root), TREE_TYPE (DR_STEP (root)),
1218 &step, &name_expansions);
ad4a85ad 1219 if (!aff_combination_constant_multiple_p (&diff, &step, &off))
1220 return false;
1221
1aeea61f 1222 if (maybe_ne (off, distance))
ad4a85ad 1223 return false;
1224
1225 return true;
1226}
1227
1228/* Finds looparound phi node of LOOP that copies the value of REF, and if its
1229 initial value is correct (equal to initial value of REF shifted by one
1230 iteration), returns the phi node. Otherwise, NULL_TREE is returned. ROOT
1231 is the root of the current chain. */
1232
1a91d914 1233static gphi *
ad4a85ad 1234find_looparound_phi (struct loop *loop, dref ref, dref root)
1235{
75a70cf9 1236 tree name, init, init_ref;
1a91d914 1237 gphi *phi = NULL;
42acab1c 1238 gimple *init_stmt;
ad4a85ad 1239 edge latch = loop_latch_edge (loop);
1240 struct data_reference init_dr;
1a91d914 1241 gphi_iterator psi;
ad4a85ad 1242
75a70cf9 1243 if (is_gimple_assign (ref->stmt))
ad4a85ad 1244 {
1245 if (DR_IS_READ (ref->ref))
75a70cf9 1246 name = gimple_assign_lhs (ref->stmt);
ad4a85ad 1247 else
75a70cf9 1248 name = gimple_assign_rhs1 (ref->stmt);
ad4a85ad 1249 }
1250 else
1251 name = PHI_RESULT (ref->stmt);
1252 if (!name)
75a70cf9 1253 return NULL;
ad4a85ad 1254
75a70cf9 1255 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1256 {
1a91d914 1257 phi = psi.phi ();
75a70cf9 1258 if (PHI_ARG_DEF_FROM_EDGE (phi, latch) == name)
1259 break;
1260 }
ad4a85ad 1261
75a70cf9 1262 if (gsi_end_p (psi))
1263 return NULL;
ad4a85ad 1264
1265 init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1266 if (TREE_CODE (init) != SSA_NAME)
75a70cf9 1267 return NULL;
ad4a85ad 1268 init_stmt = SSA_NAME_DEF_STMT (init);
75a70cf9 1269 if (gimple_code (init_stmt) != GIMPLE_ASSIGN)
1270 return NULL;
1271 gcc_assert (gimple_assign_lhs (init_stmt) == init);
ad4a85ad 1272
75a70cf9 1273 init_ref = gimple_assign_rhs1 (init_stmt);
ad4a85ad 1274 if (!REFERENCE_CLASS_P (init_ref)
1275 && !DECL_P (init_ref))
75a70cf9 1276 return NULL;
ad4a85ad 1277
1278 /* Analyze the behavior of INIT_REF with respect to LOOP (innermost
1279 loop enclosing PHI). */
1280 memset (&init_dr, 0, sizeof (struct data_reference));
1281 DR_REF (&init_dr) = init_ref;
1282 DR_STMT (&init_dr) = phi;
ed9370cc 1283 if (!dr_analyze_innermost (&DR_INNERMOST (&init_dr), init_ref, loop,
1284 init_stmt))
880734c8 1285 return NULL;
ad4a85ad 1286
1287 if (!valid_initializer_p (&init_dr, ref->distance + 1, root->ref))
75a70cf9 1288 return NULL;
ad4a85ad 1289
1290 return phi;
1291}
1292
1293/* Adds a reference for the looparound copy of REF in PHI to CHAIN. */
1294
1295static void
1a91d914 1296insert_looparound_copy (chain_p chain, dref ref, gphi *phi)
ad4a85ad 1297{
26dbec0a 1298 dref nw = XCNEW (struct dref_d), aref;
ad4a85ad 1299 unsigned i;
1300
1301 nw->stmt = phi;
1302 nw->distance = ref->distance + 1;
1303 nw->always_accessed = 1;
1304
f1f41a6c 1305 FOR_EACH_VEC_ELT (chain->refs, i, aref)
ad4a85ad 1306 if (aref->distance >= nw->distance)
1307 break;
f1f41a6c 1308 chain->refs.safe_insert (i, nw);
ad4a85ad 1309
1310 if (nw->distance > chain->length)
1311 {
1312 chain->length = nw->distance;
1313 chain->has_max_use_after = false;
1314 }
1315}
1316
1317/* For references in CHAIN that are copied around the LOOP (created previously
1318 by PRE, or by user), add the results of such copies to the chain. This
1319 enables us to remove the copies by unrolling, and may need less registers
1320 (also, it may allow us to combine chains together). */
1321
1322static void
1323add_looparound_copies (struct loop *loop, chain_p chain)
1324{
1325 unsigned i;
1326 dref ref, root = get_chain_root (chain);
1a91d914 1327 gphi *phi;
ad4a85ad 1328
a652753d 1329 if (chain->type == CT_STORE_STORE)
1330 return;
1331
f1f41a6c 1332 FOR_EACH_VEC_ELT (chain->refs, i, ref)
ad4a85ad 1333 {
1334 phi = find_looparound_phi (loop, ref, root);
1335 if (!phi)
1336 continue;
1337
1338 bitmap_set_bit (looparound_phis, SSA_NAME_VERSION (PHI_RESULT (phi)));
1339 insert_looparound_copy (chain, ref, phi);
1340 }
1341}
1342
1343/* Find roots of the values and determine distances in the component COMP.
1344 The references are redistributed into CHAINS. LOOP is the current
1345 loop. */
1346
1347static void
1348determine_roots_comp (struct loop *loop,
1349 struct component *comp,
f1f41a6c 1350 vec<chain_p> *chains)
ad4a85ad 1351{
1352 unsigned i;
1353 dref a;
1354 chain_p chain = NULL;
5de9d3ed 1355 widest_int last_ofs = 0;
a652753d 1356 enum chain_type type;
ad4a85ad 1357
1358 /* Invariants are handled specially. */
1359 if (comp->comp_step == RS_INVARIANT)
1360 {
1361 chain = make_invariant_chain (comp);
f1f41a6c 1362 chains->safe_push (chain);
ad4a85ad 1363 return;
1364 }
1365
8a0e25c9 1366 /* Trivial component. */
1367 if (comp->refs.length () <= 1)
f7b2503a 1368 {
1369 if (comp->refs.length () == 1)
1370 {
1371 free (comp->refs[0]);
1372 comp->refs.truncate (0);
1373 }
1374 return;
1375 }
ad4a85ad 1376
8a0e25c9 1377 comp->refs.qsort (order_drefs);
83c06cf0 1378
1379 /* For Store-Store chain, we only support load if it is dominated by a
1380 store statement in the same iteration of loop. */
1381 if (comp->eliminate_store_p)
1382 for (a = NULL, i = 0; i < comp->refs.length (); i++)
1383 {
1384 if (DR_IS_WRITE (comp->refs[i]->ref))
1385 a = comp->refs[i];
1386 else if (a == NULL || a->offset != comp->refs[i]->offset)
1387 {
1388 /* If there is load that is not dominated by a store in the
1389 same iteration of loop, clear the flag so no Store-Store
1390 chain is generated for this component. */
1391 comp->eliminate_store_p = false;
1392 break;
1393 }
1394 }
1395
1396 /* Determine roots and create chains for components. */
f1f41a6c 1397 FOR_EACH_VEC_ELT (comp->refs, i, a)
ad4a85ad 1398 {
a652753d 1399 if (!chain
83c06cf0 1400 || (chain->type == CT_LOAD && DR_IS_WRITE (a->ref))
a652753d 1401 || (!comp->eliminate_store_p && DR_IS_WRITE (a->ref))
796b6678 1402 || wi::leu_p (MAX_DISTANCE, a->offset - last_ofs))
ad4a85ad 1403 {
1404 if (nontrivial_chain_p (chain))
be2e5c02 1405 {
1406 add_looparound_copies (loop, chain);
f1f41a6c 1407 chains->safe_push (chain);
be2e5c02 1408 }
ad4a85ad 1409 else
1410 release_chain (chain);
a652753d 1411
83c06cf0 1412 /* Determine type of the chain. If the root reference is a load,
1413 this can only be a CT_LOAD chain; other chains are intialized
1414 to CT_STORE_LOAD and might be promoted to CT_STORE_STORE when
1415 new reference is added. */
1416 type = DR_IS_READ (a->ref) ? CT_LOAD : CT_STORE_LOAD;
a652753d 1417 chain = make_rooted_chain (a, type);
be2e5c02 1418 last_ofs = a->offset;
ad4a85ad 1419 continue;
1420 }
1421
1422 add_ref_to_chain (chain, a);
1423 }
1424
1425 if (nontrivial_chain_p (chain))
1426 {
1427 add_looparound_copies (loop, chain);
f1f41a6c 1428 chains->safe_push (chain);
ad4a85ad 1429 }
1430 else
1431 release_chain (chain);
1432}
1433
1434/* Find roots of the values and determine distances in components COMPS, and
1435 separates the references to CHAINS. LOOP is the current loop. */
1436
1437static void
1438determine_roots (struct loop *loop,
f1f41a6c 1439 struct component *comps, vec<chain_p> *chains)
ad4a85ad 1440{
1441 struct component *comp;
1442
1443 for (comp = comps; comp; comp = comp->next)
1444 determine_roots_comp (loop, comp, chains);
1445}
1446
1447/* Replace the reference in statement STMT with temporary variable
f4e36c33 1448 NEW_TREE. If SET is true, NEW_TREE is instead initialized to the value of
ad4a85ad 1449 the reference in the statement. IN_LHS is true if the reference
1450 is in the lhs of STMT, false if it is in rhs. */
1451
1452static void
42acab1c 1453replace_ref_with (gimple *stmt, tree new_tree, bool set, bool in_lhs)
ad4a85ad 1454{
75a70cf9 1455 tree val;
1a91d914 1456 gassign *new_stmt;
75a70cf9 1457 gimple_stmt_iterator bsi, psi;
ad4a85ad 1458
75a70cf9 1459 if (gimple_code (stmt) == GIMPLE_PHI)
ad4a85ad 1460 {
1461 gcc_assert (!in_lhs && !set);
1462
1463 val = PHI_RESULT (stmt);
75a70cf9 1464 bsi = gsi_after_labels (gimple_bb (stmt));
1465 psi = gsi_for_stmt (stmt);
1466 remove_phi_node (&psi, false);
ad4a85ad 1467
75a70cf9 1468 /* Turn the phi node into GIMPLE_ASSIGN. */
f4e36c33 1469 new_stmt = gimple_build_assign (val, new_tree);
75a70cf9 1470 gsi_insert_before (&bsi, new_stmt, GSI_NEW_STMT);
ad4a85ad 1471 return;
1472 }
48e1416a 1473
ad4a85ad 1474 /* Since the reference is of gimple_reg type, it should only
1475 appear as lhs or rhs of modify statement. */
75a70cf9 1476 gcc_assert (is_gimple_assign (stmt));
1477
1478 bsi = gsi_for_stmt (stmt);
ad4a85ad 1479
f4e36c33 1480 /* If we do not need to initialize NEW_TREE, just replace the use of OLD. */
ad4a85ad 1481 if (!set)
1482 {
1483 gcc_assert (!in_lhs);
f4e36c33 1484 gimple_assign_set_rhs_from_tree (&bsi, new_tree);
75a70cf9 1485 stmt = gsi_stmt (bsi);
ad4a85ad 1486 update_stmt (stmt);
1487 return;
1488 }
1489
ad4a85ad 1490 if (in_lhs)
1491 {
75a70cf9 1492 /* We have statement
48e1416a 1493
75a70cf9 1494 OLD = VAL
ad4a85ad 1495
75a70cf9 1496 If OLD is a memory reference, then VAL is gimple_val, and we transform
1497 this to
ad4a85ad 1498
1499 OLD = VAL
1500 NEW = VAL
1501
48e1416a 1502 Otherwise, we are replacing a combination chain,
75a70cf9 1503 VAL is the expression that performs the combination, and OLD is an
1504 SSA name. In this case, we transform the assignment to
1505
1506 OLD = VAL
1507 NEW = OLD
1508
1509 */
1510
1511 val = gimple_assign_lhs (stmt);
1512 if (TREE_CODE (val) != SSA_NAME)
1513 {
75a70cf9 1514 val = gimple_assign_rhs1 (stmt);
bbfbb5ba 1515 gcc_assert (gimple_assign_single_p (stmt));
1516 if (TREE_CLOBBER_P (val))
c6dfe037 1517 val = get_or_create_ssa_default_def (cfun, SSA_NAME_VAR (new_tree));
bbfbb5ba 1518 else
1519 gcc_assert (gimple_assign_copy_p (stmt));
75a70cf9 1520 }
ad4a85ad 1521 }
1522 else
1523 {
ad4a85ad 1524 /* VAL = OLD
1525
1526 is transformed to
1527
1528 VAL = OLD
1529 NEW = VAL */
75a70cf9 1530
1531 val = gimple_assign_lhs (stmt);
ad4a85ad 1532 }
1533
f4e36c33 1534 new_stmt = gimple_build_assign (new_tree, unshare_expr (val));
75a70cf9 1535 gsi_insert_after (&bsi, new_stmt, GSI_NEW_STMT);
ad4a85ad 1536}
1537
3e59c7a7 1538/* Returns a memory reference to DR in the (NITERS + ITER)-th iteration
1539 of the loop it was analyzed in. Append init stmts to STMTS. */
99f6be4b 1540
b1abc234 1541static tree
3e59c7a7 1542ref_at_iteration (data_reference_p dr, int iter,
1543 gimple_seq *stmts, tree niters = NULL_TREE)
99f6be4b 1544{
1545 tree off = DR_OFFSET (dr);
1546 tree coff = DR_INIT (dr);
b1abc234 1547 tree ref = DR_REF (dr);
1548 enum tree_code ref_code = ERROR_MARK;
1549 tree ref_type = NULL_TREE;
1550 tree ref_op1 = NULL_TREE;
1551 tree ref_op2 = NULL_TREE;
3e59c7a7 1552 tree new_offset;
1553
1554 if (iter != 0)
1555 {
1556 new_offset = size_binop (MULT_EXPR, DR_STEP (dr), ssize_int (iter));
1557 if (TREE_CODE (new_offset) == INTEGER_CST)
1558 coff = size_binop (PLUS_EXPR, coff, new_offset);
1559 else
1560 off = size_binop (PLUS_EXPR, off, new_offset);
1561 }
1562
1563 if (niters != NULL_TREE)
1564 {
1565 niters = fold_convert (ssizetype, niters);
1566 new_offset = size_binop (MULT_EXPR, DR_STEP (dr), niters);
1567 if (TREE_CODE (niters) == INTEGER_CST)
1568 coff = size_binop (PLUS_EXPR, coff, new_offset);
1569 else
1570 off = size_binop (PLUS_EXPR, off, new_offset);
1571 }
1572
e5104bca 1573 /* While data-ref analysis punts on bit offsets it still handles
1574 bitfield accesses at byte boundaries. Cope with that. Note that
b1abc234 1575 if the bitfield object also starts at a byte-boundary we can simply
1576 replicate the COMPONENT_REF, but we have to subtract the component's
1577 byte-offset from the MEM_REF address first.
1578 Otherwise we simply build a BIT_FIELD_REF knowing that the bits
e5104bca 1579 start at offset zero. */
b1abc234 1580 if (TREE_CODE (ref) == COMPONENT_REF
1581 && DECL_BIT_FIELD (TREE_OPERAND (ref, 1)))
e5104bca 1582 {
b1abc234 1583 unsigned HOST_WIDE_INT boff;
1584 tree field = TREE_OPERAND (ref, 1);
1585 tree offset = component_ref_field_offset (ref);
1586 ref_type = TREE_TYPE (ref);
1587 boff = tree_to_uhwi (DECL_FIELD_BIT_OFFSET (field));
1588 /* This can occur in Ada. See the comment in get_bit_range. */
1589 if (boff % BITS_PER_UNIT != 0
1590 || !tree_fits_uhwi_p (offset))
1591 {
1592 ref_code = BIT_FIELD_REF;
1593 ref_op1 = DECL_SIZE (field);
1594 ref_op2 = bitsize_zero_node;
1595 }
1596 else
1597 {
1598 boff >>= LOG2_BITS_PER_UNIT;
1599 boff += tree_to_uhwi (offset);
1600 coff = size_binop (MINUS_EXPR, coff, ssize_int (boff));
1601 ref_code = COMPONENT_REF;
1602 ref_op1 = field;
1603 ref_op2 = TREE_OPERAND (ref, 2);
1604 ref = TREE_OPERAND (ref, 0);
1605 }
ad4a85ad 1606 }
b1abc234 1607 tree addr = fold_build_pointer_plus (DR_BASE_ADDRESS (dr), off);
1608 addr = force_gimple_operand_1 (unshare_expr (addr), stmts,
1609 is_gimple_mem_ref_addr, NULL_TREE);
1610 tree alias_ptr = fold_convert (reference_alias_ptr_type (ref), coff);
1611 tree type = build_aligned_type (TREE_TYPE (ref),
1612 get_object_alignment (ref));
1613 ref = build2 (MEM_REF, type, addr, alias_ptr);
1614 if (ref_type)
1615 ref = build3 (ref_code, ref_type, ref, ref_op1, ref_op2);
1616 return ref;
ad4a85ad 1617}
1618
1619/* Get the initialization expression for the INDEX-th temporary variable
1620 of CHAIN. */
1621
1622static tree
1623get_init_expr (chain_p chain, unsigned index)
1624{
1625 if (chain->type == CT_COMBINATION)
1626 {
1627 tree e1 = get_init_expr (chain->ch1, index);
1628 tree e2 = get_init_expr (chain->ch2, index);
1629
f4e36c33 1630 return fold_build2 (chain->op, chain->rslt_type, e1, e2);
ad4a85ad 1631 }
1632 else
f1f41a6c 1633 return chain->inits[index];
ad4a85ad 1634}
1635
a4c3242a 1636/* Returns a new temporary variable used for the I-th variable carrying
1637 value of REF. The variable's uid is marked in TMP_VARS. */
1638
1639static tree
1640predcom_tmp_var (tree ref, unsigned i, bitmap tmp_vars)
1641{
1642 tree type = TREE_TYPE (ref);
a4c3242a 1643 /* We never access the components of the temporary variable in predictive
1644 commoning. */
2ac51e48 1645 tree var = create_tmp_reg (type, get_lsm_tmp_name (ref, i));
a4c3242a 1646 bitmap_set_bit (tmp_vars, DECL_UID (var));
1647 return var;
1648}
1649
ad4a85ad 1650/* Creates the variables for CHAIN, as well as phi nodes for them and
1651 initialization on entry to LOOP. Uids of the newly created
1652 temporary variables are marked in TMP_VARS. */
1653
1654static void
1655initialize_root_vars (struct loop *loop, chain_p chain, bitmap tmp_vars)
1656{
1657 unsigned i;
1658 unsigned n = chain->length;
1659 dref root = get_chain_root (chain);
1660 bool reuse_first = !chain->has_max_use_after;
75a70cf9 1661 tree ref, init, var, next;
1a91d914 1662 gphi *phi;
75a70cf9 1663 gimple_seq stmts;
ad4a85ad 1664 edge entry = loop_preheader_edge (loop), latch = loop_latch_edge (loop);
1665
1666 /* If N == 0, then all the references are within the single iteration. And
1667 since this is an nonempty chain, reuse_first cannot be true. */
1668 gcc_assert (n > 0 || !reuse_first);
1669
f1f41a6c 1670 chain->vars.create (n + 1);
ad4a85ad 1671
1672 if (chain->type == CT_COMBINATION)
75a70cf9 1673 ref = gimple_assign_lhs (root->stmt);
ad4a85ad 1674 else
1675 ref = DR_REF (root->ref);
1676
1677 for (i = 0; i < n + (reuse_first ? 0 : 1); i++)
1678 {
a4c3242a 1679 var = predcom_tmp_var (ref, i, tmp_vars);
f1f41a6c 1680 chain->vars.quick_push (var);
ad4a85ad 1681 }
1682 if (reuse_first)
f1f41a6c 1683 chain->vars.quick_push (chain->vars[0]);
48e1416a 1684
f1f41a6c 1685 FOR_EACH_VEC_ELT (chain->vars, i, var)
f9e245b2 1686 chain->vars[i] = make_ssa_name (var);
ad4a85ad 1687
1688 for (i = 0; i < n; i++)
1689 {
f1f41a6c 1690 var = chain->vars[i];
1691 next = chain->vars[i + 1];
ad4a85ad 1692 init = get_init_expr (chain, i);
1693
1694 init = force_gimple_operand (init, &stmts, true, NULL_TREE);
1695 if (stmts)
dd277d48 1696 gsi_insert_seq_on_edge_immediate (entry, stmts);
ad4a85ad 1697
1698 phi = create_phi_node (var, loop->header);
60d535d2 1699 add_phi_arg (phi, init, entry, UNKNOWN_LOCATION);
1700 add_phi_arg (phi, next, latch, UNKNOWN_LOCATION);
ad4a85ad 1701 }
1702}
1703
e33b3a13 1704/* For inter-iteration store elimination CHAIN in LOOP, returns true if
1705 all stores to be eliminated store loop invariant values into memory.
1706 In this case, we can use these invariant values directly after LOOP. */
1707
1708static bool
1709is_inv_store_elimination_chain (struct loop *loop, chain_p chain)
1710{
1711 if (chain->length == 0 || chain->type != CT_STORE_STORE)
1712 return false;
1713
1714 gcc_assert (!chain->has_max_use_after);
1715
1716 /* If loop iterates for unknown times or fewer times than chain->lenght,
1717 we still need to setup root variable and propagate it with PHI node. */
1718 tree niters = number_of_latch_executions (loop);
e3d0f65c 1719 if (TREE_CODE (niters) != INTEGER_CST
1720 || wi::leu_p (wi::to_wide (niters), chain->length))
e33b3a13 1721 return false;
1722
1723 /* Check stores in chain for elimination if they only store loop invariant
1724 values. */
1725 for (unsigned i = 0; i < chain->length; i++)
1726 {
83c06cf0 1727 dref a = get_chain_last_write_at (chain, i);
e33b3a13 1728 if (a == NULL)
1729 continue;
1730
1731 gimple *def_stmt, *stmt = a->stmt;
1732 if (!gimple_assign_single_p (stmt))
1733 return false;
1734
1735 tree val = gimple_assign_rhs1 (stmt);
1736 if (TREE_CLOBBER_P (val))
1737 return false;
1738
1739 if (CONSTANT_CLASS_P (val))
1740 continue;
1741
1742 if (TREE_CODE (val) != SSA_NAME)
1743 return false;
1744
1745 def_stmt = SSA_NAME_DEF_STMT (val);
1746 if (gimple_nop_p (def_stmt))
1747 continue;
1748
1749 if (flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
1750 return false;
1751 }
1752 return true;
1753}
1754
1755/* Creates root variables for store elimination CHAIN in which stores for
1756 elimination only store loop invariant values. In this case, we neither
1757 need to load root variables before loop nor propagate it with PHI nodes. */
1758
1759static void
1760initialize_root_vars_store_elim_1 (chain_p chain)
1761{
1762 tree var;
1763 unsigned i, n = chain->length;
1764
1765 chain->vars.create (n);
1766 chain->vars.safe_grow_cleared (n);
1767
1768 /* Initialize root value for eliminated stores at each distance. */
1769 for (i = 0; i < n; i++)
1770 {
83c06cf0 1771 dref a = get_chain_last_write_at (chain, i);
e33b3a13 1772 if (a == NULL)
1773 continue;
1774
1775 var = gimple_assign_rhs1 (a->stmt);
1776 chain->vars[a->distance] = var;
1777 }
1778
1779 /* We don't propagate values with PHI nodes, so manually propagate value
1780 to bubble positions. */
1781 var = chain->vars[0];
1782 for (i = 1; i < n; i++)
1783 {
1784 if (chain->vars[i] != NULL_TREE)
1785 {
1786 var = chain->vars[i];
1787 continue;
1788 }
1789 chain->vars[i] = var;
1790 }
1791
1792 /* Revert the vector. */
1793 for (i = 0; i < n / 2; i++)
1794 std::swap (chain->vars[i], chain->vars[n - i - 1]);
1795}
1796
a652753d 1797/* Creates root variables for store elimination CHAIN in which stores for
1798 elimination store loop variant values. In this case, we may need to
1799 load root variables before LOOP and propagate it with PHI nodes. Uids
1800 of the newly created root variables are marked in TMP_VARS. */
1801
1802static void
1803initialize_root_vars_store_elim_2 (struct loop *loop,
1804 chain_p chain, bitmap tmp_vars)
1805{
1806 unsigned i, n = chain->length;
1807 tree ref, init, var, next, val, phi_result;
1808 gimple *stmt;
1809 gimple_seq stmts;
1810
1811 chain->vars.create (n);
1812
1813 ref = DR_REF (get_chain_root (chain)->ref);
1814 for (i = 0; i < n; i++)
1815 {
1816 var = predcom_tmp_var (ref, i, tmp_vars);
1817 chain->vars.quick_push (var);
1818 }
1819
1820 FOR_EACH_VEC_ELT (chain->vars, i, var)
1821 chain->vars[i] = make_ssa_name (var);
1822
1823 /* Root values are either rhs operand of stores to be eliminated, or
1824 loaded from memory before loop. */
1825 auto_vec<tree> vtemps;
1826 vtemps.safe_grow_cleared (n);
1827 for (i = 0; i < n; i++)
1828 {
1829 init = get_init_expr (chain, i);
1830 if (init == NULL_TREE)
1831 {
1832 /* Root value is rhs operand of the store to be eliminated if
1833 it isn't loaded from memory before loop. */
83c06cf0 1834 dref a = get_chain_last_write_at (chain, i);
a652753d 1835 val = gimple_assign_rhs1 (a->stmt);
1836 if (TREE_CLOBBER_P (val))
83c06cf0 1837 {
1838 val = get_or_create_ssa_default_def (cfun, SSA_NAME_VAR (var));
1839 gimple_assign_set_rhs1 (a->stmt, val);
1840 }
a652753d 1841
1842 vtemps[n - i - 1] = val;
1843 }
1844 else
1845 {
1846 edge latch = loop_latch_edge (loop);
1847 edge entry = loop_preheader_edge (loop);
1848
1849 /* Root value is loaded from memory before loop, we also need
1850 to add PHI nodes to propagate the value across iterations. */
1851 init = force_gimple_operand (init, &stmts, true, NULL_TREE);
1852 if (stmts)
1853 gsi_insert_seq_on_edge_immediate (entry, stmts);
1854
1855 next = chain->vars[n - i];
1856 phi_result = copy_ssa_name (next);
1857 gphi *phi = create_phi_node (phi_result, loop->header);
1858 add_phi_arg (phi, init, entry, UNKNOWN_LOCATION);
1859 add_phi_arg (phi, next, latch, UNKNOWN_LOCATION);
1860 vtemps[n - i - 1] = phi_result;
1861 }
1862 }
1863
1864 /* Find the insertion position. */
1865 dref last = get_chain_root (chain);
1866 for (i = 0; i < chain->refs.length (); i++)
1867 {
1868 if (chain->refs[i]->pos > last->pos)
1869 last = chain->refs[i];
1870 }
1871
1872 gimple_stmt_iterator gsi = gsi_for_stmt (last->stmt);
1873
1874 /* Insert statements copying root value to root variable. */
1875 for (i = 0; i < n; i++)
1876 {
1877 var = chain->vars[i];
1878 val = vtemps[i];
1879 stmt = gimple_build_assign (var, val);
1880 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
1881 }
1882}
1883
1884/* Generates stores for CHAIN's eliminated stores in LOOP's last
1885 (CHAIN->length - 1) iterations. */
1886
1887static void
1888finalize_eliminated_stores (struct loop *loop, chain_p chain)
1889{
1890 unsigned i, n = chain->length;
1891
1892 for (i = 0; i < n; i++)
1893 {
1894 tree var = chain->vars[i];
1895 tree fini = chain->finis[n - i - 1];
1896 gimple *stmt = gimple_build_assign (fini, var);
1897
1898 gimple_seq_add_stmt_without_update (&chain->fini_seq, stmt);
1899 }
1900
1901 if (chain->fini_seq)
1902 {
1903 gsi_insert_seq_on_edge_immediate (single_exit (loop), chain->fini_seq);
1904 chain->fini_seq = NULL;
1905 }
1906}
1907
ad4a85ad 1908/* Initializes a variable for load motion for ROOT and prepares phi nodes and
1909 initialization on entry to LOOP if necessary. The ssa name for the variable
1910 is stored in VARS. If WRITTEN is true, also a phi node to copy its value
1911 around the loop is created. Uid of the newly created temporary variable
1912 is marked in TMP_VARS. INITS is the list containing the (single)
1913 initializer. */
1914
1915static void
1916initialize_root_vars_lm (struct loop *loop, dref root, bool written,
f1f41a6c 1917 vec<tree> *vars, vec<tree> inits,
ad4a85ad 1918 bitmap tmp_vars)
1919{
1920 unsigned i;
75a70cf9 1921 tree ref = DR_REF (root->ref), init, var, next;
1922 gimple_seq stmts;
1a91d914 1923 gphi *phi;
ad4a85ad 1924 edge entry = loop_preheader_edge (loop), latch = loop_latch_edge (loop);
1925
1926 /* Find the initializer for the variable, and check that it cannot
1927 trap. */
f1f41a6c 1928 init = inits[0];
ad4a85ad 1929
f1f41a6c 1930 vars->create (written ? 2 : 1);
a4c3242a 1931 var = predcom_tmp_var (ref, 0, tmp_vars);
f1f41a6c 1932 vars->quick_push (var);
ad4a85ad 1933 if (written)
f1f41a6c 1934 vars->quick_push ((*vars)[0]);
48e1416a 1935
f1f41a6c 1936 FOR_EACH_VEC_ELT (*vars, i, var)
f9e245b2 1937 (*vars)[i] = make_ssa_name (var);
ad4a85ad 1938
f1f41a6c 1939 var = (*vars)[0];
48e1416a 1940
ad4a85ad 1941 init = force_gimple_operand (init, &stmts, written, NULL_TREE);
1942 if (stmts)
dd277d48 1943 gsi_insert_seq_on_edge_immediate (entry, stmts);
ad4a85ad 1944
1945 if (written)
1946 {
f1f41a6c 1947 next = (*vars)[1];
ad4a85ad 1948 phi = create_phi_node (var, loop->header);
60d535d2 1949 add_phi_arg (phi, init, entry, UNKNOWN_LOCATION);
1950 add_phi_arg (phi, next, latch, UNKNOWN_LOCATION);
ad4a85ad 1951 }
1952 else
1953 {
1a91d914 1954 gassign *init_stmt = gimple_build_assign (var, init);
75a70cf9 1955 gsi_insert_on_edge_immediate (entry, init_stmt);
ad4a85ad 1956 }
1957}
1958
1959
1960/* Execute load motion for references in chain CHAIN. Uids of the newly
1961 created temporary variables are marked in TMP_VARS. */
1962
1963static void
1964execute_load_motion (struct loop *loop, chain_p chain, bitmap tmp_vars)
1965{
c2078b80 1966 auto_vec<tree> vars;
ad4a85ad 1967 dref a;
1968 unsigned n_writes = 0, ridx, i;
1969 tree var;
1970
1971 gcc_assert (chain->type == CT_INVARIANT);
1972 gcc_assert (!chain->combined);
f1f41a6c 1973 FOR_EACH_VEC_ELT (chain->refs, i, a)
9ff25603 1974 if (DR_IS_WRITE (a->ref))
ad4a85ad 1975 n_writes++;
48e1416a 1976
ad4a85ad 1977 /* If there are no reads in the loop, there is nothing to do. */
f1f41a6c 1978 if (n_writes == chain->refs.length ())
ad4a85ad 1979 return;
1980
1981 initialize_root_vars_lm (loop, get_chain_root (chain), n_writes > 0,
1982 &vars, chain->inits, tmp_vars);
1983
1984 ridx = 0;
f1f41a6c 1985 FOR_EACH_VEC_ELT (chain->refs, i, a)
ad4a85ad 1986 {
1987 bool is_read = DR_IS_READ (a->ref);
ad4a85ad 1988
9ff25603 1989 if (DR_IS_WRITE (a->ref))
ad4a85ad 1990 {
1991 n_writes--;
1992 if (n_writes)
1993 {
f1f41a6c 1994 var = vars[0];
f9e245b2 1995 var = make_ssa_name (SSA_NAME_VAR (var));
f1f41a6c 1996 vars[0] = var;
ad4a85ad 1997 }
1998 else
1999 ridx = 1;
2000 }
48e1416a 2001
f1f41a6c 2002 replace_ref_with (a->stmt, vars[ridx],
ad4a85ad 2003 !is_read, !is_read);
2004 }
ad4a85ad 2005}
2006
2007/* Returns the single statement in that NAME is used, excepting
2008 the looparound phi nodes contained in one of the chains. If there is no
75a70cf9 2009 such statement, or more statements, NULL is returned. */
ad4a85ad 2010
42acab1c 2011static gimple *
ad4a85ad 2012single_nonlooparound_use (tree name)
2013{
2014 use_operand_p use;
2015 imm_use_iterator it;
42acab1c 2016 gimple *stmt, *ret = NULL;
ad4a85ad 2017
2018 FOR_EACH_IMM_USE_FAST (use, it, name)
2019 {
2020 stmt = USE_STMT (use);
2021
75a70cf9 2022 if (gimple_code (stmt) == GIMPLE_PHI)
ad4a85ad 2023 {
2024 /* Ignore uses in looparound phi nodes. Uses in other phi nodes
2025 could not be processed anyway, so just fail for them. */
2026 if (bitmap_bit_p (looparound_phis,
2027 SSA_NAME_VERSION (PHI_RESULT (stmt))))
2028 continue;
2029
75a70cf9 2030 return NULL;
ad4a85ad 2031 }
db16e52d 2032 else if (is_gimple_debug (stmt))
2033 continue;
75a70cf9 2034 else if (ret != NULL)
2035 return NULL;
ad4a85ad 2036 else
2037 ret = stmt;
2038 }
2039
2040 return ret;
2041}
2042
2043/* Remove statement STMT, as well as the chain of assignments in that it is
2044 used. */
2045
2046static void
42acab1c 2047remove_stmt (gimple *stmt)
ad4a85ad 2048{
75a70cf9 2049 tree name;
42acab1c 2050 gimple *next;
75a70cf9 2051 gimple_stmt_iterator psi;
ad4a85ad 2052
75a70cf9 2053 if (gimple_code (stmt) == GIMPLE_PHI)
ad4a85ad 2054 {
2055 name = PHI_RESULT (stmt);
2056 next = single_nonlooparound_use (name);
b6d5efe6 2057 reset_debug_uses (stmt);
75a70cf9 2058 psi = gsi_for_stmt (stmt);
2059 remove_phi_node (&psi, true);
ad4a85ad 2060
2061 if (!next
fb2d5860 2062 || !gimple_assign_ssa_name_copy_p (next)
75a70cf9 2063 || gimple_assign_rhs1 (next) != name)
ad4a85ad 2064 return;
2065
2066 stmt = next;
2067 }
2068
2069 while (1)
2070 {
75a70cf9 2071 gimple_stmt_iterator bsi;
48e1416a 2072
75a70cf9 2073 bsi = gsi_for_stmt (stmt);
ad4a85ad 2074
75a70cf9 2075 name = gimple_assign_lhs (stmt);
a652753d 2076 if (TREE_CODE (name) == SSA_NAME)
2077 {
2078 next = single_nonlooparound_use (name);
2079 reset_debug_uses (stmt);
2080 }
2081 else
2082 {
2083 /* This is a store to be eliminated. */
2084 gcc_assert (gimple_vdef (stmt) != NULL);
2085 next = NULL;
2086 }
ad4a85ad 2087
e70e8b13 2088 unlink_stmt_vdef (stmt);
75a70cf9 2089 gsi_remove (&bsi, true);
fb2d5860 2090 release_defs (stmt);
ad4a85ad 2091
2092 if (!next
fb2d5860 2093 || !gimple_assign_ssa_name_copy_p (next)
75a70cf9 2094 || gimple_assign_rhs1 (next) != name)
ad4a85ad 2095 return;
2096
2097 stmt = next;
2098 }
2099}
2100
2101/* Perform the predictive commoning optimization for a chain CHAIN.
2102 Uids of the newly created temporary variables are marked in TMP_VARS.*/
2103
2104static void
2105execute_pred_commoning_chain (struct loop *loop, chain_p chain,
a652753d 2106 bitmap tmp_vars)
ad4a85ad 2107{
83c06cf0 2108 unsigned i;
e70e8b13 2109 dref a;
ad4a85ad 2110 tree var;
fbab3016 2111 bool in_lhs;
ad4a85ad 2112
2113 if (chain->combined)
2114 {
2115 /* For combined chains, just remove the statements that are used to
182624fa 2116 compute the values of the expression (except for the root one).
2117 We delay this until after all chains are processed. */
ad4a85ad 2118 }
a652753d 2119 else if (chain->type == CT_STORE_STORE)
2120 {
2121 if (chain->length > 0)
2122 {
e33b3a13 2123 if (chain->inv_store_elimination)
2124 {
2125 /* If dead stores in this chain only store loop invariant
2126 values, we can simply record the invariant value and use
2127 it directly after loop. */
2128 initialize_root_vars_store_elim_1 (chain);
2129 }
2130 else
2131 {
2132 /* If dead stores in this chain store loop variant values,
2133 we need to set up the variables by loading from memory
2134 before loop and propagating it with PHI nodes. */
2135 initialize_root_vars_store_elim_2 (loop, chain, tmp_vars);
2136 }
a652753d 2137
2138 /* For inter-iteration store elimination chain, stores at each
2139 distance in loop's last (chain->length - 1) iterations can't
2140 be eliminated, because there is no following killing store.
2141 We need to generate these stores after loop. */
2142 finalize_eliminated_stores (loop, chain);
2143 }
2144
83c06cf0 2145 bool last_store_p = true;
2146 for (i = chain->refs.length (); i > 0; i--)
2147 {
2148 a = chain->refs[i - 1];
2149 /* Preserve the last store of the chain. Eliminate other stores
2150 which are killed by the last one. */
2151 if (DR_IS_WRITE (a->ref))
2152 {
2153 if (last_store_p)
2154 last_store_p = false;
2155 else
2156 remove_stmt (a->stmt);
2157
2158 continue;
2159 }
2160
2161 /* Any load in Store-Store chain must be dominated by a previous
2162 store, we replace the load reference with rhs of the store. */
2163 dref b = get_chain_last_write_before_load (chain, i - 1);
2164 gcc_assert (b != NULL);
2165 var = gimple_assign_rhs1 (b->stmt);
2166 replace_ref_with (a->stmt, var, false, false);
2167 }
a652753d 2168 }
ad4a85ad 2169 else
2170 {
fbab3016 2171 /* For non-combined chains, set up the variables that hold its value. */
2172 initialize_root_vars (loop, chain, tmp_vars);
2173 a = get_chain_root (chain);
2174 in_lhs = (chain->type == CT_STORE_LOAD
2175 || chain->type == CT_COMBINATION);
2176 replace_ref_with (a->stmt, chain->vars[chain->length], true, in_lhs);
2177
2178 /* Replace the uses of the original references by these variables. */
f1f41a6c 2179 for (i = 1; chain->refs.iterate (i, &a); i++)
ad4a85ad 2180 {
f1f41a6c 2181 var = chain->vars[chain->length - a->distance];
ad4a85ad 2182 replace_ref_with (a->stmt, var, false, false);
2183 }
2184 }
2185}
2186
2187/* Determines the unroll factor necessary to remove as many temporary variable
2188 copies as possible. CHAINS is the list of chains that will be
2189 optimized. */
2190
2191static unsigned
f1f41a6c 2192determine_unroll_factor (vec<chain_p> chains)
ad4a85ad 2193{
2194 chain_p chain;
2195 unsigned factor = 1, af, nfactor, i;
2196 unsigned max = PARAM_VALUE (PARAM_MAX_UNROLL_TIMES);
2197
f1f41a6c 2198 FOR_EACH_VEC_ELT (chains, i, chain)
ad4a85ad 2199 {
67a97bd8 2200 if (chain->type == CT_INVARIANT)
ad4a85ad 2201 continue;
a652753d 2202 /* For now we can't handle unrolling when eliminating stores. */
2203 else if (chain->type == CT_STORE_STORE)
2204 return 1;
ad4a85ad 2205
67a97bd8 2206 if (chain->combined)
2207 {
2208 /* For combined chains, we can't handle unrolling if we replace
2209 looparound PHIs. */
2210 dref a;
2211 unsigned j;
2212 for (j = 1; chain->refs.iterate (j, &a); j++)
2213 if (gimple_code (a->stmt) == GIMPLE_PHI)
2214 return 1;
2215 continue;
2216 }
2217
ad4a85ad 2218 /* The best unroll factor for this chain is equal to the number of
2219 temporary variables that we create for it. */
2220 af = chain->length;
2221 if (chain->has_max_use_after)
2222 af++;
2223
2224 nfactor = factor * af / gcd (factor, af);
2225 if (nfactor <= max)
2226 factor = nfactor;
2227 }
2228
2229 return factor;
2230}
2231
2232/* Perform the predictive commoning optimization for CHAINS.
2233 Uids of the newly created temporary variables are marked in TMP_VARS. */
2234
2235static void
f1f41a6c 2236execute_pred_commoning (struct loop *loop, vec<chain_p> chains,
ad4a85ad 2237 bitmap tmp_vars)
2238{
2239 chain_p chain;
2240 unsigned i;
2241
f1f41a6c 2242 FOR_EACH_VEC_ELT (chains, i, chain)
ad4a85ad 2243 {
2244 if (chain->type == CT_INVARIANT)
2245 execute_load_motion (loop, chain, tmp_vars);
2246 else
2247 execute_pred_commoning_chain (loop, chain, tmp_vars);
2248 }
48e1416a 2249
182624fa 2250 FOR_EACH_VEC_ELT (chains, i, chain)
2251 {
2252 if (chain->type == CT_INVARIANT)
2253 ;
2254 else if (chain->combined)
2255 {
2256 /* For combined chains, just remove the statements that are used to
2257 compute the values of the expression (except for the root one). */
2258 dref a;
2259 unsigned j;
2260 for (j = 1; chain->refs.iterate (j, &a); j++)
2261 remove_stmt (a->stmt);
2262 }
2263 }
2264
ad4a85ad 2265 update_ssa (TODO_update_ssa_only_virtuals);
2266}
2267
310d2511 2268/* For each reference in CHAINS, if its defining statement is
75a70cf9 2269 phi node, record the ssa name that is defined by it. */
ad4a85ad 2270
2271static void
f1f41a6c 2272replace_phis_by_defined_names (vec<chain_p> chains)
ad4a85ad 2273{
2274 chain_p chain;
2275 dref a;
2276 unsigned i, j;
2277
f1f41a6c 2278 FOR_EACH_VEC_ELT (chains, i, chain)
2279 FOR_EACH_VEC_ELT (chain->refs, j, a)
ad4a85ad 2280 {
75a70cf9 2281 if (gimple_code (a->stmt) == GIMPLE_PHI)
2282 {
2283 a->name_defined_by_phi = PHI_RESULT (a->stmt);
2284 a->stmt = NULL;
2285 }
ad4a85ad 2286 }
2287}
2288
75a70cf9 2289/* For each reference in CHAINS, if name_defined_by_phi is not
2290 NULL, use it to set the stmt field. */
ad4a85ad 2291
2292static void
f1f41a6c 2293replace_names_by_phis (vec<chain_p> chains)
ad4a85ad 2294{
2295 chain_p chain;
2296 dref a;
2297 unsigned i, j;
2298
f1f41a6c 2299 FOR_EACH_VEC_ELT (chains, i, chain)
2300 FOR_EACH_VEC_ELT (chain->refs, j, a)
75a70cf9 2301 if (a->stmt == NULL)
ad4a85ad 2302 {
75a70cf9 2303 a->stmt = SSA_NAME_DEF_STMT (a->name_defined_by_phi);
2304 gcc_assert (gimple_code (a->stmt) == GIMPLE_PHI);
2305 a->name_defined_by_phi = NULL_TREE;
ad4a85ad 2306 }
2307}
2308
2309/* Wrapper over execute_pred_commoning, to pass it as a callback
2310 to tree_transform_and_unroll_loop. */
2311
2312struct epcc_data
2313{
f1f41a6c 2314 vec<chain_p> chains;
ad4a85ad 2315 bitmap tmp_vars;
2316};
2317
2318static void
2319execute_pred_commoning_cbck (struct loop *loop, void *data)
2320{
45ba1503 2321 struct epcc_data *const dta = (struct epcc_data *) data;
ad4a85ad 2322
2323 /* Restore phi nodes that were replaced by ssa names before
2324 tree_transform_and_unroll_loop (see detailed description in
2325 tree_predictive_commoning_loop). */
2326 replace_names_by_phis (dta->chains);
2327 execute_pred_commoning (loop, dta->chains, dta->tmp_vars);
2328}
2329
ad4a85ad 2330/* Base NAME and all the names in the chain of phi nodes that use it
2331 on variable VAR. The phi nodes are recognized by being in the copies of
2332 the header of the LOOP. */
2333
2334static void
2335base_names_in_chain_on (struct loop *loop, tree name, tree var)
2336{
42acab1c 2337 gimple *stmt, *phi;
ad4a85ad 2338 imm_use_iterator iter;
ad4a85ad 2339
3b652cc1 2340 replace_ssa_name_symbol (name, var);
ad4a85ad 2341
2342 while (1)
2343 {
2344 phi = NULL;
2345 FOR_EACH_IMM_USE_STMT (stmt, iter, name)
2346 {
75a70cf9 2347 if (gimple_code (stmt) == GIMPLE_PHI
2348 && flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
ad4a85ad 2349 {
2350 phi = stmt;
2351 BREAK_FROM_IMM_USE_STMT (iter);
2352 }
2353 }
2354 if (!phi)
2355 return;
2356
ad4a85ad 2357 name = PHI_RESULT (phi);
3b652cc1 2358 replace_ssa_name_symbol (name, var);
ad4a85ad 2359 }
2360}
2361
2362/* Given an unrolled LOOP after predictive commoning, remove the
2363 register copies arising from phi nodes by changing the base
2364 variables of SSA names. TMP_VARS is the set of the temporary variables
2365 for those we want to perform this. */
2366
2367static void
2368eliminate_temp_copies (struct loop *loop, bitmap tmp_vars)
2369{
2370 edge e;
1a91d914 2371 gphi *phi;
42acab1c 2372 gimple *stmt;
75a70cf9 2373 tree name, use, var;
1a91d914 2374 gphi_iterator psi;
ad4a85ad 2375
2376 e = loop_latch_edge (loop);
75a70cf9 2377 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
ad4a85ad 2378 {
1a91d914 2379 phi = psi.phi ();
ad4a85ad 2380 name = PHI_RESULT (phi);
2381 var = SSA_NAME_VAR (name);
ec11736b 2382 if (!var || !bitmap_bit_p (tmp_vars, DECL_UID (var)))
ad4a85ad 2383 continue;
2384 use = PHI_ARG_DEF_FROM_EDGE (phi, e);
2385 gcc_assert (TREE_CODE (use) == SSA_NAME);
2386
2387 /* Base all the ssa names in the ud and du chain of NAME on VAR. */
2388 stmt = SSA_NAME_DEF_STMT (use);
75a70cf9 2389 while (gimple_code (stmt) == GIMPLE_PHI
3fa1e4f2 2390 /* In case we could not unroll the loop enough to eliminate
2391 all copies, we may reach the loop header before the defining
2392 statement (in that case, some register copies will be present
2393 in loop latch in the final code, corresponding to the newly
2394 created looparound phi nodes). */
75a70cf9 2395 && gimple_bb (stmt) != loop->header)
ad4a85ad 2396 {
75a70cf9 2397 gcc_assert (single_pred_p (gimple_bb (stmt)));
ad4a85ad 2398 use = PHI_ARG_DEF (stmt, 0);
2399 stmt = SSA_NAME_DEF_STMT (use);
2400 }
2401
2402 base_names_in_chain_on (loop, use, var);
2403 }
2404}
2405
2406/* Returns true if CHAIN is suitable to be combined. */
2407
2408static bool
2409chain_can_be_combined_p (chain_p chain)
2410{
2411 return (!chain->combined
2412 && (chain->type == CT_LOAD || chain->type == CT_COMBINATION));
2413}
2414
2415/* Returns the modify statement that uses NAME. Skips over assignment
2416 statements, NAME is replaced with the actual name used in the returned
2417 statement. */
2418
42acab1c 2419static gimple *
ad4a85ad 2420find_use_stmt (tree *name)
2421{
42acab1c 2422 gimple *stmt;
75a70cf9 2423 tree rhs, lhs;
ad4a85ad 2424
2425 /* Skip over assignments. */
2426 while (1)
2427 {
2428 stmt = single_nonlooparound_use (*name);
2429 if (!stmt)
75a70cf9 2430 return NULL;
ad4a85ad 2431
75a70cf9 2432 if (gimple_code (stmt) != GIMPLE_ASSIGN)
2433 return NULL;
ad4a85ad 2434
75a70cf9 2435 lhs = gimple_assign_lhs (stmt);
ad4a85ad 2436 if (TREE_CODE (lhs) != SSA_NAME)
75a70cf9 2437 return NULL;
ad4a85ad 2438
75a70cf9 2439 if (gimple_assign_copy_p (stmt))
2440 {
2441 rhs = gimple_assign_rhs1 (stmt);
2442 if (rhs != *name)
2443 return NULL;
ad4a85ad 2444
75a70cf9 2445 *name = lhs;
2446 }
2447 else if (get_gimple_rhs_class (gimple_assign_rhs_code (stmt))
2448 == GIMPLE_BINARY_RHS)
2449 return stmt;
2450 else
2451 return NULL;
ad4a85ad 2452 }
ad4a85ad 2453}
2454
2455/* Returns true if we may perform reassociation for operation CODE in TYPE. */
2456
2457static bool
2458may_reassociate_p (tree type, enum tree_code code)
2459{
2460 if (FLOAT_TYPE_P (type)
2461 && !flag_unsafe_math_optimizations)
2462 return false;
2463
2464 return (commutative_tree_code (code)
2465 && associative_tree_code (code));
2466}
2467
2468/* If the operation used in STMT is associative and commutative, go through the
2469 tree of the same operations and returns its root. Distance to the root
2470 is stored in DISTANCE. */
2471
42acab1c 2472static gimple *
2473find_associative_operation_root (gimple *stmt, unsigned *distance)
ad4a85ad 2474{
75a70cf9 2475 tree lhs;
42acab1c 2476 gimple *next;
75a70cf9 2477 enum tree_code code = gimple_assign_rhs_code (stmt);
2478 tree type = TREE_TYPE (gimple_assign_lhs (stmt));
ad4a85ad 2479 unsigned dist = 0;
2480
75a70cf9 2481 if (!may_reassociate_p (type, code))
2482 return NULL;
ad4a85ad 2483
2484 while (1)
2485 {
75a70cf9 2486 lhs = gimple_assign_lhs (stmt);
ad4a85ad 2487 gcc_assert (TREE_CODE (lhs) == SSA_NAME);
2488
2489 next = find_use_stmt (&lhs);
75a70cf9 2490 if (!next
2491 || gimple_assign_rhs_code (next) != code)
ad4a85ad 2492 break;
2493
2494 stmt = next;
2495 dist++;
2496 }
2497
2498 if (distance)
2499 *distance = dist;
2500 return stmt;
2501}
2502
2503/* Returns the common statement in that NAME1 and NAME2 have a use. If there
2504 is no such statement, returns NULL_TREE. In case the operation used on
310d2511 2505 NAME1 and NAME2 is associative and commutative, returns the root of the
ad4a85ad 2506 tree formed by this operation instead of the statement that uses NAME1 or
2507 NAME2. */
2508
42acab1c 2509static gimple *
ad4a85ad 2510find_common_use_stmt (tree *name1, tree *name2)
2511{
42acab1c 2512 gimple *stmt1, *stmt2;
ad4a85ad 2513
2514 stmt1 = find_use_stmt (name1);
2515 if (!stmt1)
75a70cf9 2516 return NULL;
ad4a85ad 2517
2518 stmt2 = find_use_stmt (name2);
2519 if (!stmt2)
75a70cf9 2520 return NULL;
ad4a85ad 2521
2522 if (stmt1 == stmt2)
2523 return stmt1;
2524
2525 stmt1 = find_associative_operation_root (stmt1, NULL);
2526 if (!stmt1)
75a70cf9 2527 return NULL;
ad4a85ad 2528 stmt2 = find_associative_operation_root (stmt2, NULL);
2529 if (!stmt2)
75a70cf9 2530 return NULL;
ad4a85ad 2531
75a70cf9 2532 return (stmt1 == stmt2 ? stmt1 : NULL);
ad4a85ad 2533}
2534
2535/* Checks whether R1 and R2 are combined together using CODE, with the result
2536 in RSLT_TYPE, in order R1 CODE R2 if SWAP is false and in order R2 CODE R1
2537 if it is true. If CODE is ERROR_MARK, set these values instead. */
2538
2539static bool
2540combinable_refs_p (dref r1, dref r2,
2541 enum tree_code *code, bool *swap, tree *rslt_type)
2542{
2543 enum tree_code acode;
2544 bool aswap;
2545 tree atype;
75a70cf9 2546 tree name1, name2;
42acab1c 2547 gimple *stmt;
ad4a85ad 2548
2549 name1 = name_for_ref (r1);
2550 name2 = name_for_ref (r2);
2551 gcc_assert (name1 != NULL_TREE && name2 != NULL_TREE);
2552
2553 stmt = find_common_use_stmt (&name1, &name2);
2554
f8d7b728 2555 if (!stmt
2556 /* A simple post-dominance check - make sure the combination
2557 is executed under the same condition as the references. */
2558 || (gimple_bb (stmt) != gimple_bb (r1->stmt)
2559 && gimple_bb (stmt) != gimple_bb (r2->stmt)))
ad4a85ad 2560 return false;
2561
75a70cf9 2562 acode = gimple_assign_rhs_code (stmt);
ad4a85ad 2563 aswap = (!commutative_tree_code (acode)
75a70cf9 2564 && gimple_assign_rhs1 (stmt) != name1);
2565 atype = TREE_TYPE (gimple_assign_lhs (stmt));
ad4a85ad 2566
2567 if (*code == ERROR_MARK)
2568 {
2569 *code = acode;
2570 *swap = aswap;
2571 *rslt_type = atype;
2572 return true;
2573 }
2574
2575 return (*code == acode
2576 && *swap == aswap
2577 && *rslt_type == atype);
2578}
2579
2580/* Remove OP from the operation on rhs of STMT, and replace STMT with
2581 an assignment of the remaining operand. */
2582
2583static void
42acab1c 2584remove_name_from_operation (gimple *stmt, tree op)
ad4a85ad 2585{
75a70cf9 2586 tree other_op;
2587 gimple_stmt_iterator si;
ad4a85ad 2588
75a70cf9 2589 gcc_assert (is_gimple_assign (stmt));
ad4a85ad 2590
75a70cf9 2591 if (gimple_assign_rhs1 (stmt) == op)
2592 other_op = gimple_assign_rhs2 (stmt);
ad4a85ad 2593 else
75a70cf9 2594 other_op = gimple_assign_rhs1 (stmt);
2595
2596 si = gsi_for_stmt (stmt);
2597 gimple_assign_set_rhs_from_tree (&si, other_op);
2598
2599 /* We should not have reallocated STMT. */
2600 gcc_assert (gsi_stmt (si) == stmt);
2601
ad4a85ad 2602 update_stmt (stmt);
2603}
2604
2605/* Reassociates the expression in that NAME1 and NAME2 are used so that they
b63803a8 2606 are combined in a single statement, and returns this statement. */
ad4a85ad 2607
42acab1c 2608static gimple *
b63803a8 2609reassociate_to_the_same_stmt (tree name1, tree name2)
ad4a85ad 2610{
42acab1c 2611 gimple *stmt1, *stmt2, *root1, *root2, *s1, *s2;
1a91d914 2612 gassign *new_stmt, *tmp_stmt;
75a70cf9 2613 tree new_name, tmp_name, var, r1, r2;
ad4a85ad 2614 unsigned dist1, dist2;
2615 enum tree_code code;
2616 tree type = TREE_TYPE (name1);
75a70cf9 2617 gimple_stmt_iterator bsi;
ad4a85ad 2618
2619 stmt1 = find_use_stmt (&name1);
2620 stmt2 = find_use_stmt (&name2);
2621 root1 = find_associative_operation_root (stmt1, &dist1);
2622 root2 = find_associative_operation_root (stmt2, &dist2);
75a70cf9 2623 code = gimple_assign_rhs_code (stmt1);
ad4a85ad 2624
2625 gcc_assert (root1 && root2 && root1 == root2
75a70cf9 2626 && code == gimple_assign_rhs_code (stmt2));
ad4a85ad 2627
2628 /* Find the root of the nearest expression in that both NAME1 and NAME2
2629 are used. */
2630 r1 = name1;
2631 s1 = stmt1;
2632 r2 = name2;
2633 s2 = stmt2;
2634
2635 while (dist1 > dist2)
2636 {
2637 s1 = find_use_stmt (&r1);
75a70cf9 2638 r1 = gimple_assign_lhs (s1);
ad4a85ad 2639 dist1--;
2640 }
2641 while (dist2 > dist1)
2642 {
2643 s2 = find_use_stmt (&r2);
75a70cf9 2644 r2 = gimple_assign_lhs (s2);
ad4a85ad 2645 dist2--;
2646 }
2647
2648 while (s1 != s2)
2649 {
2650 s1 = find_use_stmt (&r1);
75a70cf9 2651 r1 = gimple_assign_lhs (s1);
ad4a85ad 2652 s2 = find_use_stmt (&r2);
75a70cf9 2653 r2 = gimple_assign_lhs (s2);
ad4a85ad 2654 }
2655
2656 /* Remove NAME1 and NAME2 from the statements in that they are used
2657 currently. */
2658 remove_name_from_operation (stmt1, name1);
2659 remove_name_from_operation (stmt2, name2);
2660
2661 /* Insert the new statement combining NAME1 and NAME2 before S1, and
2662 combine it with the rhs of S1. */
2ac51e48 2663 var = create_tmp_reg (type, "predreastmp");
f9e245b2 2664 new_name = make_ssa_name (var);
e9cf809e 2665 new_stmt = gimple_build_assign (new_name, code, name1, name2);
ad4a85ad 2666
2ac51e48 2667 var = create_tmp_reg (type, "predreastmp");
f9e245b2 2668 tmp_name = make_ssa_name (var);
75a70cf9 2669
2670 /* Rhs of S1 may now be either a binary expression with operation
2671 CODE, or gimple_val (in case that stmt1 == s1 or stmt2 == s1,
2672 so that name1 or name2 was removed from it). */
e9cf809e 2673 tmp_stmt = gimple_build_assign (tmp_name, gimple_assign_rhs_code (s1),
2674 gimple_assign_rhs1 (s1),
2675 gimple_assign_rhs2 (s1));
75a70cf9 2676
2677 bsi = gsi_for_stmt (s1);
2678 gimple_assign_set_rhs_with_ops (&bsi, code, new_name, tmp_name);
2679 s1 = gsi_stmt (bsi);
ad4a85ad 2680 update_stmt (s1);
2681
b63803a8 2682 gsi_insert_before (&bsi, new_stmt, GSI_SAME_STMT);
75a70cf9 2683 gsi_insert_before (&bsi, tmp_stmt, GSI_SAME_STMT);
ad4a85ad 2684
2685 return new_stmt;
2686}
2687
2688/* Returns the statement that combines references R1 and R2. In case R1
2689 and R2 are not used in the same statement, but they are used with an
2690 associative and commutative operation in the same expression, reassociate
b63803a8 2691 the expression so that they are used in the same statement. */
ad4a85ad 2692
42acab1c 2693static gimple *
b63803a8 2694stmt_combining_refs (dref r1, dref r2)
ad4a85ad 2695{
42acab1c 2696 gimple *stmt1, *stmt2;
ad4a85ad 2697 tree name1 = name_for_ref (r1);
2698 tree name2 = name_for_ref (r2);
2699
2700 stmt1 = find_use_stmt (&name1);
2701 stmt2 = find_use_stmt (&name2);
2702 if (stmt1 == stmt2)
2703 return stmt1;
2704
b63803a8 2705 return reassociate_to_the_same_stmt (name1, name2);
ad4a85ad 2706}
2707
2708/* Tries to combine chains CH1 and CH2 together. If this succeeds, the
2709 description of the new chain is returned, otherwise we return NULL. */
2710
2711static chain_p
2712combine_chains (chain_p ch1, chain_p ch2)
2713{
2714 dref r1, r2, nw;
2715 enum tree_code op = ERROR_MARK;
2716 bool swap = false;
2717 chain_p new_chain;
b63803a8 2718 unsigned i;
ad4a85ad 2719 tree rslt_type = NULL_TREE;
2720
2721 if (ch1 == ch2)
c84b1d32 2722 return NULL;
ad4a85ad 2723 if (ch1->length != ch2->length)
2724 return NULL;
2725
f1f41a6c 2726 if (ch1->refs.length () != ch2->refs.length ())
ad4a85ad 2727 return NULL;
2728
f1f41a6c 2729 for (i = 0; (ch1->refs.iterate (i, &r1)
2730 && ch2->refs.iterate (i, &r2)); i++)
ad4a85ad 2731 {
2732 if (r1->distance != r2->distance)
2733 return NULL;
2734
2735 if (!combinable_refs_p (r1, r2, &op, &swap, &rslt_type))
2736 return NULL;
2737 }
2738
2739 if (swap)
a4f59596 2740 std::swap (ch1, ch2);
ad4a85ad 2741
2742 new_chain = XCNEW (struct chain);
2743 new_chain->type = CT_COMBINATION;
f4e36c33 2744 new_chain->op = op;
ad4a85ad 2745 new_chain->ch1 = ch1;
2746 new_chain->ch2 = ch2;
2747 new_chain->rslt_type = rslt_type;
2748 new_chain->length = ch1->length;
2749
b63803a8 2750 for (i = 0; (ch1->refs.iterate (i, &r1)
2751 && ch2->refs.iterate (i, &r2)); i++)
2752 {
26dbec0a 2753 nw = XCNEW (struct dref_d);
b63803a8 2754 nw->stmt = stmt_combining_refs (r1, r2);
ad4a85ad 2755 nw->distance = r1->distance;
7a31ada4 2756
42230610 2757 new_chain->refs.safe_push (nw);
7a31ada4 2758 }
ad4a85ad 2759
b63803a8 2760 ch1->combined = true;
2761 ch2->combined = true;
ad4a85ad 2762 return new_chain;
2763}
2764
e02e28c4 2765/* Recursively update position information of all offspring chains to ROOT
2766 chain's position information. */
2767
2768static void
2769update_pos_for_combined_chains (chain_p root)
2770{
2771 chain_p ch1 = root->ch1, ch2 = root->ch2;
2772 dref ref, ref1, ref2;
2773 for (unsigned j = 0; (root->refs.iterate (j, &ref)
2774 && ch1->refs.iterate (j, &ref1)
2775 && ch2->refs.iterate (j, &ref2)); ++j)
2776 ref1->pos = ref2->pos = ref->pos;
2777
2778 if (ch1->type == CT_COMBINATION)
2779 update_pos_for_combined_chains (ch1);
2780 if (ch2->type == CT_COMBINATION)
2781 update_pos_for_combined_chains (ch2);
2782}
2783
2784/* Returns true if statement S1 dominates statement S2. */
2785
2786static bool
2787pcom_stmt_dominates_stmt_p (gimple *s1, gimple *s2)
2788{
2789 basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
2790
2791 if (!bb1 || s1 == s2)
2792 return true;
2793
2794 if (bb1 == bb2)
2795 return gimple_uid (s1) < gimple_uid (s2);
2796
2797 return dominated_by_p (CDI_DOMINATORS, bb2, bb1);
2798}
2799
2800/* Try to combine the CHAINS in LOOP. */
ad4a85ad 2801
2802static void
e02e28c4 2803try_combine_chains (struct loop *loop, vec<chain_p> *chains)
ad4a85ad 2804{
2805 unsigned i, j;
2806 chain_p ch1, ch2, cch;
c2078b80 2807 auto_vec<chain_p> worklist;
e02e28c4 2808 bool combined_p = false;
ad4a85ad 2809
f1f41a6c 2810 FOR_EACH_VEC_ELT (*chains, i, ch1)
ad4a85ad 2811 if (chain_can_be_combined_p (ch1))
f1f41a6c 2812 worklist.safe_push (ch1);
ad4a85ad 2813
f1f41a6c 2814 while (!worklist.is_empty ())
ad4a85ad 2815 {
f1f41a6c 2816 ch1 = worklist.pop ();
ad4a85ad 2817 if (!chain_can_be_combined_p (ch1))
2818 continue;
2819
f1f41a6c 2820 FOR_EACH_VEC_ELT (*chains, j, ch2)
ad4a85ad 2821 {
2822 if (!chain_can_be_combined_p (ch2))
2823 continue;
2824
2825 cch = combine_chains (ch1, ch2);
2826 if (cch)
2827 {
f1f41a6c 2828 worklist.safe_push (cch);
2829 chains->safe_push (cch);
e02e28c4 2830 combined_p = true;
2831 break;
2832 }
2833 }
2834 }
2835 if (!combined_p)
2836 return;
2837
2838 /* Setup UID for all statements in dominance order. */
886345e6 2839 basic_block *bbs = get_loop_body_in_dom_order (loop);
e02e28c4 2840 renumber_gimple_stmt_uids_in_blocks (bbs, loop->num_nodes);
2841 free (bbs);
2842
2843 /* Re-association in combined chains may generate statements different to
2844 order of references of the original chain. We need to keep references
2845 of combined chain in dominance order so that all uses will be inserted
2846 after definitions. Note:
2847 A) This is necessary for all combined chains.
2848 B) This is only necessary for ZERO distance references because other
2849 references inherit value from loop carried PHIs.
2850
2851 We first update position information for all combined chains. */
2852 dref ref;
2853 for (i = 0; chains->iterate (i, &ch1); ++i)
2854 {
2855 if (ch1->type != CT_COMBINATION || ch1->combined)
2856 continue;
2857
2858 for (j = 0; ch1->refs.iterate (j, &ref); ++j)
2859 ref->pos = gimple_uid (ref->stmt);
2860
2861 update_pos_for_combined_chains (ch1);
2862 }
2863 /* Then sort references according to newly updated position information. */
2864 for (i = 0; chains->iterate (i, &ch1); ++i)
2865 {
2866 if (ch1->type != CT_COMBINATION && !ch1->combined)
2867 continue;
2868
2869 /* Find the first reference with non-ZERO distance. */
2870 if (ch1->length == 0)
2871 j = ch1->refs.length();
2872 else
2873 {
2874 for (j = 0; ch1->refs.iterate (j, &ref); ++j)
2875 if (ref->distance != 0)
2876 break;
2877 }
2878
2879 /* Sort all ZERO distance references by position. */
2880 qsort (&ch1->refs[0], j, sizeof (ch1->refs[0]), order_drefs_by_pos);
2881
2882 if (ch1->combined)
2883 continue;
2884
2885 /* For ZERO length chain, has_max_use_after must be true since root
2886 combined stmt must dominates others. */
2887 if (ch1->length == 0)
2888 {
2889 ch1->has_max_use_after = true;
2890 continue;
2891 }
2892 /* Check if there is use at max distance after root for combined chains
2893 and set flag accordingly. */
2894 ch1->has_max_use_after = false;
2895 gimple *root_stmt = get_chain_root (ch1)->stmt;
2896 for (j = 1; ch1->refs.iterate (j, &ref); ++j)
2897 {
2898 if (ref->distance == ch1->length
2899 && !pcom_stmt_dominates_stmt_p (ref->stmt, root_stmt))
2900 {
2901 ch1->has_max_use_after = true;
ad4a85ad 2902 break;
2903 }
2904 }
2905 }
2906}
2907
a652753d 2908/* Prepare initializers for store elimination CHAIN in LOOP. Returns false
2909 if this is impossible because one of these initializers may trap, true
2910 otherwise. */
2911
2912static bool
e33b3a13 2913prepare_initializers_chain_store_elim (struct loop *loop, chain_p chain)
a652753d 2914{
2915 unsigned i, n = chain->length;
2916
2917 /* For now we can't eliminate stores if some of them are conditional
2918 executed. */
2919 if (!chain->all_always_accessed)
2920 return false;
2921
2922 /* Nothing to intialize for intra-iteration store elimination. */
2923 if (n == 0 && chain->type == CT_STORE_STORE)
2924 return true;
2925
e33b3a13 2926 /* For store elimination chain, there is nothing to initialize if stores
2927 to be eliminated only store loop invariant values into memory. */
2928 if (chain->type == CT_STORE_STORE
2929 && is_inv_store_elimination_chain (loop, chain))
2930 {
2931 chain->inv_store_elimination = true;
2932 return true;
2933 }
2934
a652753d 2935 chain->inits.create (n);
2936 chain->inits.safe_grow_cleared (n);
2937
2938 /* For store eliminatin chain like below:
2939
2940 for (i = 0; i < len; i++)
2941 {
2942 a[i] = 1;
2943 // a[i + 1] = ...
2944 a[i + 2] = 3;
2945 }
2946
2947 store to a[i + 1] is missed in loop body, it acts like bubbles. The
2948 content of a[i + 1] remain the same if the loop iterates fewer times
2949 than chain->length. We need to set up root variables for such stores
2950 by loading from memory before loop. Note we only need to load bubble
2951 elements because loop body is guaranteed to be executed at least once
2952 after loop's preheader edge. */
2953 auto_vec<bool> bubbles;
2954 bubbles.safe_grow_cleared (n + 1);
2955 for (i = 0; i < chain->refs.length (); i++)
2956 bubbles[chain->refs[i]->distance] = true;
2957
2958 struct data_reference *dr = get_chain_root (chain)->ref;
2959 for (i = 0; i < n; i++)
2960 {
2961 if (bubbles[i])
2962 continue;
2963
2964 gimple_seq stmts = NULL;
2965
2966 tree init = ref_at_iteration (dr, (int) 0 - i, &stmts);
2967 if (stmts)
2968 gimple_seq_add_seq_without_update (&chain->init_seq, stmts);
2969
2970 chain->inits[i] = init;
2971 }
2972
2973 return true;
2974}
2975
ad4a85ad 2976/* Prepare initializers for CHAIN in LOOP. Returns false if this is
2977 impossible because one of these initializers may trap, true otherwise. */
2978
2979static bool
2980prepare_initializers_chain (struct loop *loop, chain_p chain)
2981{
2982 unsigned i, n = (chain->type == CT_INVARIANT) ? 1 : chain->length;
2983 struct data_reference *dr = get_chain_root (chain)->ref;
75a70cf9 2984 tree init;
ad4a85ad 2985 dref laref;
2986 edge entry = loop_preheader_edge (loop);
2987
a652753d 2988 if (chain->type == CT_STORE_STORE)
2989 return prepare_initializers_chain_store_elim (loop, chain);
2990
ad4a85ad 2991 /* Find the initializers for the variables, and check that they cannot
2992 trap. */
f1f41a6c 2993 chain->inits.create (n);
ad4a85ad 2994 for (i = 0; i < n; i++)
f1f41a6c 2995 chain->inits.quick_push (NULL_TREE);
ad4a85ad 2996
2997 /* If we have replaced some looparound phi nodes, use their initializers
2998 instead of creating our own. */
f1f41a6c 2999 FOR_EACH_VEC_ELT (chain->refs, i, laref)
ad4a85ad 3000 {
75a70cf9 3001 if (gimple_code (laref->stmt) != GIMPLE_PHI)
ad4a85ad 3002 continue;
3003
3004 gcc_assert (laref->distance > 0);
f1f41a6c 3005 chain->inits[n - laref->distance]
3006 = PHI_ARG_DEF_FROM_EDGE (laref->stmt, entry);
ad4a85ad 3007 }
3008
3009 for (i = 0; i < n; i++)
3010 {
6968257f 3011 gimple_seq stmts = NULL;
3012
f1f41a6c 3013 if (chain->inits[i] != NULL_TREE)
ad4a85ad 3014 continue;
3015
99f6be4b 3016 init = ref_at_iteration (dr, (int) i - n, &stmts);
ad4a85ad 3017 if (!chain->all_always_accessed && tree_could_trap_p (init))
6968257f 3018 {
3019 gimple_seq_discard (stmts);
3020 return false;
3021 }
ad4a85ad 3022
ad4a85ad 3023 if (stmts)
a481acc5 3024 gimple_seq_add_seq_without_update (&chain->init_seq, stmts);
ad4a85ad 3025
f1f41a6c 3026 chain->inits[i] = init;
ad4a85ad 3027 }
3028
3029 return true;
3030}
3031
3032/* Prepare initializers for CHAINS in LOOP, and free chains that cannot
3033 be used because the initializers might trap. */
3034
3035static void
f1f41a6c 3036prepare_initializers (struct loop *loop, vec<chain_p> chains)
ad4a85ad 3037{
3038 chain_p chain;
3039 unsigned i;
3040
f1f41a6c 3041 for (i = 0; i < chains.length (); )
ad4a85ad 3042 {
f1f41a6c 3043 chain = chains[i];
ad4a85ad 3044 if (prepare_initializers_chain (loop, chain))
3045 i++;
3046 else
3047 {
3048 release_chain (chain);
f1f41a6c 3049 chains.unordered_remove (i);
ad4a85ad 3050 }
3051 }
3052}
3053
a652753d 3054/* Generates finalizer memory references for CHAIN in LOOP. Returns true
3055 if finalizer code for CHAIN can be generated, otherwise false. */
3056
3057static bool
3058prepare_finalizers_chain (struct loop *loop, chain_p chain)
3059{
3060 unsigned i, n = chain->length;
3061 struct data_reference *dr = get_chain_root (chain)->ref;
3062 tree fini, niters = number_of_latch_executions (loop);
3063
3064 /* For now we can't eliminate stores if some of them are conditional
3065 executed. */
3066 if (!chain->all_always_accessed)
3067 return false;
3068
3069 chain->finis.create (n);
3070 for (i = 0; i < n; i++)
3071 chain->finis.quick_push (NULL_TREE);
3072
3073 /* We never use looparound phi node for store elimination chains. */
3074
3075 /* Find the finalizers for the variables, and check that they cannot
3076 trap. */
3077 for (i = 0; i < n; i++)
3078 {
3079 gimple_seq stmts = NULL;
3080 gcc_assert (chain->finis[i] == NULL_TREE);
3081
3082 if (TREE_CODE (niters) != INTEGER_CST && TREE_CODE (niters) != SSA_NAME)
3083 {
cf071e9e 3084 niters = unshare_expr (niters);
a652753d 3085 niters = force_gimple_operand (niters, &stmts, true, NULL);
3086 if (stmts)
3087 {
3088 gimple_seq_add_seq_without_update (&chain->fini_seq, stmts);
3089 stmts = NULL;
3090 }
3091 }
3092 fini = ref_at_iteration (dr, (int) 0 - i, &stmts, niters);
3093 if (stmts)
3094 gimple_seq_add_seq_without_update (&chain->fini_seq, stmts);
3095
3096 chain->finis[i] = fini;
3097 }
3098
3099 return true;
3100}
3101
3102/* Generates finalizer memory reference for CHAINS in LOOP. Returns true
3103 if finalizer code generation for CHAINS breaks loop closed ssa form. */
3104
3105static bool
3106prepare_finalizers (struct loop *loop, vec<chain_p> chains)
3107{
3108 chain_p chain;
3109 unsigned i;
3110 bool loop_closed_ssa = false;
3111
3112 for (i = 0; i < chains.length ();)
3113 {
3114 chain = chains[i];
3115
3116 /* Finalizer is only necessary for inter-iteration store elimination
3117 chains. */
3118 if (chain->length == 0 || chain->type != CT_STORE_STORE)
3119 {
3120 i++;
3121 continue;
3122 }
3123
3124 if (prepare_finalizers_chain (loop, chain))
3125 {
3126 i++;
89c72921 3127 /* Be conservative, assume loop closed ssa form is corrupted
3128 by store-store chain. Though it's not always the case if
3129 eliminated stores only store loop invariant values into
3130 memory. */
3131 loop_closed_ssa = true;
a652753d 3132 }
3133 else
3134 {
3135 release_chain (chain);
3136 chains.unordered_remove (i);
3137 }
3138 }
3139 return loop_closed_ssa;
3140}
3141
a481acc5 3142/* Insert all initializing gimple stmts into loop's entry edge. */
3143
3144static void
3145insert_init_seqs (struct loop *loop, vec<chain_p> chains)
3146{
3147 unsigned i;
3148 edge entry = loop_preheader_edge (loop);
3149
3150 for (i = 0; i < chains.length (); ++i)
3151 if (chains[i]->init_seq)
3152 {
3153 gsi_insert_seq_on_edge_immediate (entry, chains[i]->init_seq);
3154 chains[i]->init_seq = NULL;
3155 }
3156}
3157
a652753d 3158/* Performs predictive commoning for LOOP. Sets bit 1<<0 of return value
3159 if LOOP was unrolled; Sets bit 1<<1 of return value if loop closed ssa
3160 form was corrupted. */
ad4a85ad 3161
a652753d 3162static unsigned
ad4a85ad 3163tree_predictive_commoning_loop (struct loop *loop)
3164{
f1f41a6c 3165 vec<data_reference_p> datarefs;
3166 vec<ddr_p> dependences;
ad4a85ad 3167 struct component *components;
1e094109 3168 vec<chain_p> chains = vNULL;
ad4a85ad 3169 unsigned unroll_factor;
3170 struct tree_niter_desc desc;
a652753d 3171 bool unroll = false, loop_closed_ssa = false;
ad4a85ad 3172 edge exit;
ad4a85ad 3173
3174 if (dump_file && (dump_flags & TDF_DETAILS))
3175 fprintf (dump_file, "Processing loop %d\n", loop->num);
3176
c4dd22f7 3177 /* Nothing for predicitive commoning if loop only iterates 1 time. */
3178 if (get_max_loop_iterations_int (loop) == 0)
3179 {
3180 if (dump_file && (dump_flags & TDF_DETAILS))
3181 fprintf (dump_file, "Loop iterates only 1 time, nothing to do.\n");
3182
a652753d 3183 return 0;
c4dd22f7 3184 }
3185
ad4a85ad 3186 /* Find the data references and split them into components according to their
3187 dependence relations. */
4997014d 3188 auto_vec<loop_p, 3> loop_nest;
f1f41a6c 3189 dependences.create (10);
e85cf4e5 3190 datarefs.create (10);
713f1f14 3191 if (! compute_data_dependences_for_loop (loop, true, &loop_nest, &datarefs,
3192 &dependences))
3193 {
3194 if (dump_file && (dump_flags & TDF_DETAILS))
3195 fprintf (dump_file, "Cannot analyze data dependencies\n");
713f1f14 3196 free_data_refs (datarefs);
3197 free_dependence_relations (dependences);
a652753d 3198 return 0;
713f1f14 3199 }
3200
ad4a85ad 3201 if (dump_file && (dump_flags & TDF_DETAILS))
3202 dump_data_dependence_relations (dump_file, dependences);
3203
3204 components = split_data_refs_to_components (loop, datarefs, dependences);
f1f41a6c 3205 loop_nest.release ();
ad4a85ad 3206 free_dependence_relations (dependences);
3207 if (!components)
3208 {
3209 free_data_refs (datarefs);
f037f351 3210 free_affine_expand_cache (&name_expansions);
a652753d 3211 return 0;
ad4a85ad 3212 }
3213
3214 if (dump_file && (dump_flags & TDF_DETAILS))
3215 {
3216 fprintf (dump_file, "Initial state:\n\n");
3217 dump_components (dump_file, components);
3218 }
3219
3220 /* Find the suitable components and split them into chains. */
3221 components = filter_suitable_components (loop, components);
3222
035def86 3223 auto_bitmap tmp_vars;
ad4a85ad 3224 looparound_phis = BITMAP_ALLOC (NULL);
3225 determine_roots (loop, components, &chains);
3226 release_components (components);
3227
f1f41a6c 3228 if (!chains.exists ())
ad4a85ad 3229 {
3230 if (dump_file && (dump_flags & TDF_DETAILS))
3231 fprintf (dump_file,
3232 "Predictive commoning failed: no suitable chains\n");
3233 goto end;
3234 }
3235 prepare_initializers (loop, chains);
a652753d 3236 loop_closed_ssa = prepare_finalizers (loop, chains);
ad4a85ad 3237
3238 /* Try to combine the chains that are always worked with together. */
e02e28c4 3239 try_combine_chains (loop, &chains);
ad4a85ad 3240
a481acc5 3241 insert_init_seqs (loop, chains);
3242
ad4a85ad 3243 if (dump_file && (dump_flags & TDF_DETAILS))
3244 {
3245 fprintf (dump_file, "Before commoning:\n\n");
3246 dump_chains (dump_file, chains);
3247 }
3248
3249 /* Determine the unroll factor, and if the loop should be unrolled, ensure
3250 that its number of iterations is divisible by the factor. */
3251 unroll_factor = determine_unroll_factor (chains);
3252 scev_reset ();
286fa508 3253 unroll = (unroll_factor > 1
3254 && can_unroll_loop_p (loop, unroll_factor, &desc));
ad4a85ad 3255 exit = single_dom_exit (loop);
3256
3257 /* Execute the predictive commoning transformations, and possibly unroll the
3258 loop. */
3259 if (unroll)
3260 {
3261 struct epcc_data dta;
3262
3263 if (dump_file && (dump_flags & TDF_DETAILS))
3264 fprintf (dump_file, "Unrolling %u times.\n", unroll_factor);
3265
3266 dta.chains = chains;
3267 dta.tmp_vars = tmp_vars;
48e1416a 3268
ad4a85ad 3269 update_ssa (TODO_update_ssa_only_virtuals);
3270
3271 /* Cfg manipulations performed in tree_transform_and_unroll_loop before
3272 execute_pred_commoning_cbck is called may cause phi nodes to be
3273 reallocated, which is a problem since CHAINS may point to these
3274 statements. To fix this, we store the ssa names defined by the
3275 phi nodes here instead of the phi nodes themselves, and restore
3276 the phi nodes in execute_pred_commoning_cbck. A bit hacky. */
3277 replace_phis_by_defined_names (chains);
3278
3279 tree_transform_and_unroll_loop (loop, unroll_factor, exit, &desc,
3280 execute_pred_commoning_cbck, &dta);
3281 eliminate_temp_copies (loop, tmp_vars);
3282 }
3283 else
3284 {
3285 if (dump_file && (dump_flags & TDF_DETAILS))
3286 fprintf (dump_file,
3287 "Executing predictive commoning without unrolling.\n");
3288 execute_pred_commoning (loop, chains, tmp_vars);
3289 }
3290
3291end: ;
3292 release_chains (chains);
3293 free_data_refs (datarefs);
ad4a85ad 3294 BITMAP_FREE (looparound_phis);
3295
3296 free_affine_expand_cache (&name_expansions);
3297
a652753d 3298 return (unroll ? 1 : 0) | (loop_closed_ssa ? 2 : 0);
ad4a85ad 3299}
3300
3301/* Runs predictive commoning. */
3302
eb2a640e 3303unsigned
ad4a85ad 3304tree_predictive_commoning (void)
3305{
ad4a85ad 3306 struct loop *loop;
a652753d 3307 unsigned ret = 0, changed = 0;
ad4a85ad 3308
3309 initialize_original_copy_tables ();
f21d4d00 3310 FOR_EACH_LOOP (loop, LI_ONLY_INNERMOST)
7baffbd3 3311 if (optimize_loop_for_speed_p (loop))
3312 {
a652753d 3313 changed |= tree_predictive_commoning_loop (loop);
7baffbd3 3314 }
a652753d 3315 free_original_copy_tables ();
ad4a85ad 3316
a652753d 3317 if (changed > 0)
ad4a85ad 3318 {
3319 scev_reset ();
a652753d 3320
3321 if (changed > 1)
3322 rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
3323
eb2a640e 3324 ret = TODO_cleanup_cfg;
ad4a85ad 3325 }
eb2a640e 3326
3327 return ret;
ad4a85ad 3328}
64641360 3329
3330/* Predictive commoning Pass. */
3331
3332static unsigned
b3083327 3333run_tree_predictive_commoning (struct function *fun)
64641360 3334{
b3083327 3335 if (number_of_loops (fun) <= 1)
64641360 3336 return 0;
3337
3338 return tree_predictive_commoning ();
3339}
3340
64641360 3341namespace {
3342
3343const pass_data pass_data_predcom =
3344{
3345 GIMPLE_PASS, /* type */
3346 "pcom", /* name */
3347 OPTGROUP_LOOP, /* optinfo_flags */
64641360 3348 TV_PREDCOM, /* tv_id */
3349 PROP_cfg, /* properties_required */
3350 0, /* properties_provided */
3351 0, /* properties_destroyed */
3352 0, /* todo_flags_start */
3353 TODO_update_ssa_only_virtuals, /* todo_flags_finish */
3354};
3355
3356class pass_predcom : public gimple_opt_pass
3357{
3358public:
3359 pass_predcom (gcc::context *ctxt)
3360 : gimple_opt_pass (pass_data_predcom, ctxt)
3361 {}
3362
3363 /* opt_pass methods: */
31315c24 3364 virtual bool gate (function *) { return flag_predictive_commoning != 0; }
b3083327 3365 virtual unsigned int execute (function *fun)
65b0537f 3366 {
b3083327 3367 return run_tree_predictive_commoning (fun);
65b0537f 3368 }
64641360 3369
3370}; // class pass_predcom
3371
3372} // anon namespace
3373
3374gimple_opt_pass *
3375make_pass_predcom (gcc::context *ctxt)
3376{
3377 return new pass_predcom (ctxt);
3378}
3379
3380