]> git.ipfire.org Git - people/ms/gcc.git/blob - gcc/tree-predcom.cc
c++: namespace-scoped friend in local class [PR69410]
[people/ms/gcc.git] / gcc / tree-predcom.cc
1 /* Predictive commoning.
2 Copyright (C) 2005-2023 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
10
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
19
20 /* This file implements the predictive commoning optimization. Predictive
21 commoning can be viewed as CSE around a loop, and with some improvements,
22 as generalized strength reduction-- i.e., reusing values computed in
23 earlier iterations of a loop in the later ones. So far, the pass only
24 handles the most useful case, that is, reusing values of memory references.
25 If you think this is all just a special case of PRE, you are sort of right;
26 however, concentrating on loops is simpler, and makes it possible to
27 incorporate data dependence analysis to detect the opportunities, perform
28 loop unrolling to avoid copies together with renaming immediately,
29 and if needed, we could also take register pressure into account.
30
31 Let us demonstrate what is done on an example:
32
33 for (i = 0; i < 100; i++)
34 {
35 a[i+2] = a[i] + a[i+1];
36 b[10] = b[10] + i;
37 c[i] = c[99 - i];
38 d[i] = d[i + 1];
39 }
40
41 1) We find data references in the loop, and split them to mutually
42 independent groups (i.e., we find components of a data dependence
43 graph). We ignore read-read dependences whose distance is not constant.
44 (TODO -- we could also ignore antidependences). In this example, we
45 find the following groups:
46
47 a[i]{read}, a[i+1]{read}, a[i+2]{write}
48 b[10]{read}, b[10]{write}
49 c[99 - i]{read}, c[i]{write}
50 d[i + 1]{read}, d[i]{write}
51
52 2) Inside each of the group, we verify several conditions:
53 a) all the references must differ in indices only, and the indices
54 must all have the same step
55 b) the references must dominate loop latch (and thus, they must be
56 ordered by dominance relation).
57 c) the distance of the indices must be a small multiple of the step
58 We are then able to compute the difference of the references (# of
59 iterations before they point to the same place as the first of them).
60 Also, in case there are writes in the loop, we split the groups into
61 chains whose head is the write whose values are used by the reads in
62 the same chain. The chains are then processed independently,
63 making the further transformations simpler. Also, the shorter chains
64 need the same number of registers, but may require lower unrolling
65 factor in order to get rid of the copies on the loop latch.
66
67 In our example, we get the following chains (the chain for c is invalid).
68
69 a[i]{read,+0}, a[i+1]{read,-1}, a[i+2]{write,-2}
70 b[10]{read,+0}, b[10]{write,+0}
71 d[i + 1]{read,+0}, d[i]{write,+1}
72
73 3) For each read, we determine the read or write whose value it reuses,
74 together with the distance of this reuse. I.e. we take the last
75 reference before it with distance 0, or the last of the references
76 with the smallest positive distance to the read. Then, we remove
77 the references that are not used in any of these chains, discard the
78 empty groups, and propagate all the links so that they point to the
79 single root reference of the chain (adjusting their distance
80 appropriately). Some extra care needs to be taken for references with
81 step 0. In our example (the numbers indicate the distance of the
82 reuse),
83
84 a[i] --> (*) 2, a[i+1] --> (*) 1, a[i+2] (*)
85 b[10] --> (*) 1, b[10] (*)
86
87 4) The chains are combined together if possible. If the corresponding
88 elements of two chains are always combined together with the same
89 operator, we remember just the result of this combination, instead
90 of remembering the values separately. We may need to perform
91 reassociation to enable combining, for example
92
93 e[i] + f[i+1] + e[i+1] + f[i]
94
95 can be reassociated as
96
97 (e[i] + f[i]) + (e[i+1] + f[i+1])
98
99 and we can combine the chains for e and f into one chain.
100
101 5) For each root reference (end of the chain) R, let N be maximum distance
102 of a reference reusing its value. Variables R0 up to RN are created,
103 together with phi nodes that transfer values from R1 .. RN to
104 R0 .. R(N-1).
105 Initial values are loaded to R0..R(N-1) (in case not all references
106 must necessarily be accessed and they may trap, we may fail here;
107 TODO sometimes, the loads could be guarded by a check for the number
108 of iterations). Values loaded/stored in roots are also copied to
109 RN. Other reads are replaced with the appropriate variable Ri.
110 Everything is put to SSA form.
111
112 As a small improvement, if R0 is dead after the root (i.e., all uses of
113 the value with the maximum distance dominate the root), we can avoid
114 creating RN and use R0 instead of it.
115
116 In our example, we get (only the parts concerning a and b are shown):
117 for (i = 0; i < 100; i++)
118 {
119 f = phi (a[0], s);
120 s = phi (a[1], f);
121 x = phi (b[10], x);
122
123 f = f + s;
124 a[i+2] = f;
125 x = x + i;
126 b[10] = x;
127 }
128
129 6) Factor F for unrolling is determined as the smallest common multiple of
130 (N + 1) for each root reference (N for references for that we avoided
131 creating RN). If F and the loop is small enough, loop is unrolled F
132 times. The stores to RN (R0) in the copies of the loop body are
133 periodically replaced with R0, R1, ... (R1, R2, ...), so that they can
134 be coalesced and the copies can be eliminated.
135
136 TODO -- copy propagation and other optimizations may change the live
137 ranges of the temporary registers and prevent them from being coalesced;
138 this may increase the register pressure.
139
140 In our case, F = 2 and the (main loop of the) result is
141
142 for (i = 0; i < ...; i += 2)
143 {
144 f = phi (a[0], f);
145 s = phi (a[1], s);
146 x = phi (b[10], x);
147
148 f = f + s;
149 a[i+2] = f;
150 x = x + i;
151 b[10] = x;
152
153 s = s + f;
154 a[i+3] = s;
155 x = x + i;
156 b[10] = x;
157 }
158
159 Apart from predictive commoning on Load-Load and Store-Load chains, we
160 also support Store-Store chains -- stores killed by other store can be
161 eliminated. Given below example:
162
163 for (i = 0; i < n; i++)
164 {
165 a[i] = 1;
166 a[i+2] = 2;
167 }
168
169 It can be replaced with:
170
171 t0 = a[0];
172 t1 = a[1];
173 for (i = 0; i < n; i++)
174 {
175 a[i] = 1;
176 t2 = 2;
177 t0 = t1;
178 t1 = t2;
179 }
180 a[n] = t0;
181 a[n+1] = t1;
182
183 If the loop runs more than 1 iterations, it can be further simplified into:
184
185 for (i = 0; i < n; i++)
186 {
187 a[i] = 1;
188 }
189 a[n] = 2;
190 a[n+1] = 2;
191
192 The interesting part is this can be viewed either as general store motion
193 or general dead store elimination in either intra/inter-iterations way.
194
195 With trivial effort, we also support load inside Store-Store chains if the
196 load is dominated by a store statement in the same iteration of loop. You
197 can see this as a restricted Store-Mixed-Load-Store chain.
198
199 TODO: For now, we don't support store-store chains in multi-exit loops. We
200 force to not unroll in case of store-store chain even if other chains might
201 ask for unroll.
202
203 Predictive commoning can be generalized for arbitrary computations (not
204 just memory loads), and also nontrivial transfer functions (e.g., replacing
205 i * i with ii_last + 2 * i + 1), to generalize strength reduction. */
206
207 #include "config.h"
208 #include "system.h"
209 #include "coretypes.h"
210 #include "backend.h"
211 #include "rtl.h"
212 #include "tree.h"
213 #include "gimple.h"
214 #include "predict.h"
215 #include "tree-pass.h"
216 #include "ssa.h"
217 #include "gimple-pretty-print.h"
218 #include "alias.h"
219 #include "fold-const.h"
220 #include "cfgloop.h"
221 #include "tree-eh.h"
222 #include "gimplify.h"
223 #include "gimple-iterator.h"
224 #include "gimplify-me.h"
225 #include "tree-ssa-loop-ivopts.h"
226 #include "tree-ssa-loop-manip.h"
227 #include "tree-ssa-loop-niter.h"
228 #include "tree-ssa-loop.h"
229 #include "tree-into-ssa.h"
230 #include "tree-dfa.h"
231 #include "tree-ssa.h"
232 #include "tree-data-ref.h"
233 #include "tree-scalar-evolution.h"
234 #include "tree-affine.h"
235 #include "builtins.h"
236 #include "opts.h"
237
238 /* The maximum number of iterations between the considered memory
239 references. */
240
241 #define MAX_DISTANCE (target_avail_regs < 16 ? 4 : 8)
242
243 /* Data references (or phi nodes that carry data reference values across
244 loop iterations). */
245
246 typedef class dref_d
247 {
248 public:
249 /* The reference itself. */
250 struct data_reference *ref;
251
252 /* The statement in that the reference appears. */
253 gimple *stmt;
254
255 /* In case that STMT is a phi node, this field is set to the SSA name
256 defined by it in replace_phis_by_defined_names (in order to avoid
257 pointing to phi node that got reallocated in the meantime). */
258 tree name_defined_by_phi;
259
260 /* Distance of the reference from the root of the chain (in number of
261 iterations of the loop). */
262 unsigned distance;
263
264 /* Number of iterations offset from the first reference in the component. */
265 widest_int offset;
266
267 /* Number of the reference in a component, in dominance ordering. */
268 unsigned pos;
269
270 /* True if the memory reference is always accessed when the loop is
271 entered. */
272 unsigned always_accessed : 1;
273 } *dref;
274
275
276 /* Type of the chain of the references. */
277
278 enum chain_type
279 {
280 /* The addresses of the references in the chain are constant. */
281 CT_INVARIANT,
282
283 /* There are only loads in the chain. */
284 CT_LOAD,
285
286 /* Root of the chain is store, the rest are loads. */
287 CT_STORE_LOAD,
288
289 /* There are only stores in the chain. */
290 CT_STORE_STORE,
291
292 /* A combination of two chains. */
293 CT_COMBINATION
294 };
295
296 /* Chains of data references. */
297
298 typedef struct chain
299 {
300 chain (chain_type t) : type (t), op (ERROR_MARK), rslt_type (NULL_TREE),
301 ch1 (NULL), ch2 (NULL), length (0), init_seq (NULL), fini_seq (NULL),
302 has_max_use_after (false), all_always_accessed (false), combined (false),
303 inv_store_elimination (false) {}
304
305 /* Type of the chain. */
306 enum chain_type type;
307
308 /* For combination chains, the operator and the two chains that are
309 combined, and the type of the result. */
310 enum tree_code op;
311 tree rslt_type;
312 struct chain *ch1, *ch2;
313
314 /* The references in the chain. */
315 auto_vec<dref> refs;
316
317 /* The maximum distance of the reference in the chain from the root. */
318 unsigned length;
319
320 /* The variables used to copy the value throughout iterations. */
321 auto_vec<tree> vars;
322
323 /* Initializers for the variables. */
324 auto_vec<tree> inits;
325
326 /* Finalizers for the eliminated stores. */
327 auto_vec<tree> finis;
328
329 /* gimple stmts intializing the initial variables of the chain. */
330 gimple_seq init_seq;
331
332 /* gimple stmts finalizing the eliminated stores of the chain. */
333 gimple_seq fini_seq;
334
335 /* True if there is a use of a variable with the maximal distance
336 that comes after the root in the loop. */
337 unsigned has_max_use_after : 1;
338
339 /* True if all the memory references in the chain are always accessed. */
340 unsigned all_always_accessed : 1;
341
342 /* True if this chain was combined together with some other chain. */
343 unsigned combined : 1;
344
345 /* True if this is store elimination chain and eliminated stores store
346 loop invariant value into memory. */
347 unsigned inv_store_elimination : 1;
348 } *chain_p;
349
350
351 /* Describes the knowledge about the step of the memory references in
352 the component. */
353
354 enum ref_step_type
355 {
356 /* The step is zero. */
357 RS_INVARIANT,
358
359 /* The step is nonzero. */
360 RS_NONZERO,
361
362 /* The step may or may not be nonzero. */
363 RS_ANY
364 };
365
366 /* Components of the data dependence graph. */
367
368 struct component
369 {
370 component (bool es) : comp_step (RS_ANY), eliminate_store_p (es),
371 next (NULL) {}
372
373 /* The references in the component. */
374 auto_vec<dref> refs;
375
376 /* What we know about the step of the references in the component. */
377 enum ref_step_type comp_step;
378
379 /* True if all references in component are stores and we try to do
380 intra/inter loop iteration dead store elimination. */
381 bool eliminate_store_p;
382
383 /* Next component in the list. */
384 struct component *next;
385 };
386
387 /* A class to encapsulate the global states used for predictive
388 commoning work on top of one given LOOP. */
389
390 class pcom_worker
391 {
392 public:
393 pcom_worker (loop_p l) : m_loop (l), m_cache (NULL) {}
394
395 ~pcom_worker ()
396 {
397 free_data_refs (m_datarefs);
398 free_dependence_relations (m_dependences);
399 free_affine_expand_cache (&m_cache);
400 release_chains ();
401 }
402
403 pcom_worker (const pcom_worker &) = delete;
404 pcom_worker &operator= (const pcom_worker &) = delete;
405
406 /* Performs predictive commoning. */
407 unsigned tree_predictive_commoning_loop (bool allow_unroll_p);
408
409 /* Perform the predictive commoning optimization for chains, make this
410 public for being called in callback execute_pred_commoning_cbck. */
411 void execute_pred_commoning (bitmap tmp_vars);
412
413 private:
414 /* The pointer to the given loop. */
415 loop_p m_loop;
416
417 /* All data references. */
418 auto_vec<data_reference_p, 10> m_datarefs;
419
420 /* All data dependences. */
421 auto_vec<ddr_p, 10> m_dependences;
422
423 /* All chains. */
424 auto_vec<chain_p> m_chains;
425
426 /* Bitmap of ssa names defined by looparound phi nodes covered by chains. */
427 auto_bitmap m_looparound_phis;
428
429 typedef hash_map<tree, name_expansion *> tree_expand_map_t;
430 /* Cache used by tree_to_aff_combination_expand. */
431 tree_expand_map_t *m_cache;
432
433 /* Splits dependence graph to components. */
434 struct component *split_data_refs_to_components ();
435
436 /* Check the conditions on references inside each of components COMPS,
437 and remove the unsuitable components from the list. */
438 struct component *filter_suitable_components (struct component *comps);
439
440 /* Find roots of the values and determine distances in components COMPS,
441 and separates the references to chains. */
442 void determine_roots (struct component *comps);
443
444 /* Prepare initializers for chains, and free chains that cannot
445 be used because the initializers might trap. */
446 void prepare_initializers ();
447
448 /* Generates finalizer memory reference for chains. Returns true if
449 finalizer code generation for chains breaks loop closed ssa form. */
450 bool prepare_finalizers ();
451
452 /* Try to combine the chains. */
453 void try_combine_chains ();
454
455 /* Frees CHAINS. */
456 void release_chains ();
457
458 /* Frees a chain CHAIN. */
459 void release_chain (chain_p chain);
460
461 /* Prepare initializers for CHAIN. Returns false if this is impossible
462 because one of these initializers may trap, true otherwise. */
463 bool prepare_initializers_chain (chain_p chain);
464
465 /* Generates finalizer memory references for CHAIN. Returns true
466 if finalizer code for CHAIN can be generated, otherwise false. */
467 bool prepare_finalizers_chain (chain_p chain);
468
469 /* Stores DR_OFFSET (DR) + DR_INIT (DR) to OFFSET. */
470 void aff_combination_dr_offset (struct data_reference *dr, aff_tree *offset);
471
472 /* Determines number of iterations of the innermost enclosing loop before
473 B refers to exactly the same location as A and stores it to OFF. */
474 bool determine_offset (struct data_reference *a, struct data_reference *b,
475 poly_widest_int *off);
476
477 /* Returns true if the component COMP satisfies the conditions
478 described in 2) at the beginning of this file. */
479 bool suitable_component_p (struct component *comp);
480
481 /* Returns true if REF is a valid initializer for ROOT with given
482 DISTANCE (in iterations of the innermost enclosing loop). */
483 bool valid_initializer_p (struct data_reference *ref, unsigned distance,
484 struct data_reference *root);
485
486 /* Finds looparound phi node of loop that copies the value of REF. */
487 gphi *find_looparound_phi (dref ref, dref root);
488
489 /* Find roots of the values and determine distances in the component
490 COMP. The references are redistributed into chains. */
491 void determine_roots_comp (struct component *comp);
492
493 /* For references in CHAIN that are copied around the loop, add the
494 results of such copies to the chain. */
495 void add_looparound_copies (chain_p chain);
496
497 /* Returns the single statement in that NAME is used, excepting
498 the looparound phi nodes contained in one of the chains. */
499 gimple *single_nonlooparound_use (tree name);
500
501 /* Remove statement STMT, as well as the chain of assignments in that
502 it is used. */
503 void remove_stmt (gimple *stmt);
504
505 /* Perform the predictive commoning optimization for a chain CHAIN. */
506 void execute_pred_commoning_chain (chain_p chain, bitmap tmp_vars);
507
508 /* Returns the modify statement that uses NAME. */
509 gimple *find_use_stmt (tree *name);
510
511 /* If the operation used in STMT is associative and commutative, go
512 through the tree of the same operations and returns its root. */
513 gimple *find_associative_operation_root (gimple *stmt, unsigned *distance);
514
515 /* Returns the common statement in that NAME1 and NAME2 have a use. */
516 gimple *find_common_use_stmt (tree *name1, tree *name2);
517
518 /* Checks whether R1 and R2 are combined together using CODE, with the
519 result in RSLT_TYPE, in order R1 CODE R2 if SWAP is false and in order
520 R2 CODE R1 if it is true. */
521 bool combinable_refs_p (dref r1, dref r2, enum tree_code *code, bool *swap,
522 tree *rslt_type);
523
524 /* Reassociates the expression in that NAME1 and NAME2 are used so that
525 they are combined in a single statement, and returns this statement. */
526 gimple *reassociate_to_the_same_stmt (tree name1, tree name2);
527
528 /* Returns the statement that combines references R1 and R2. */
529 gimple *stmt_combining_refs (dref r1, dref r2);
530
531 /* Tries to combine chains CH1 and CH2 together. */
532 chain_p combine_chains (chain_p ch1, chain_p ch2);
533 };
534
535 /* Dumps data reference REF to FILE. */
536
537 extern void dump_dref (FILE *, dref);
538 void
539 dump_dref (FILE *file, dref ref)
540 {
541 if (ref->ref)
542 {
543 fprintf (file, " ");
544 print_generic_expr (file, DR_REF (ref->ref), TDF_SLIM);
545 fprintf (file, " (id %u%s)\n", ref->pos,
546 DR_IS_READ (ref->ref) ? "" : ", write");
547
548 fprintf (file, " offset ");
549 print_decs (ref->offset, file);
550 fprintf (file, "\n");
551
552 fprintf (file, " distance %u\n", ref->distance);
553 }
554 else
555 {
556 if (gimple_code (ref->stmt) == GIMPLE_PHI)
557 fprintf (file, " looparound ref\n");
558 else
559 fprintf (file, " combination ref\n");
560 fprintf (file, " in statement ");
561 print_gimple_stmt (file, ref->stmt, 0, TDF_SLIM);
562 fprintf (file, "\n");
563 fprintf (file, " distance %u\n", ref->distance);
564 }
565
566 }
567
568 /* Dumps CHAIN to FILE. */
569
570 extern void dump_chain (FILE *, chain_p);
571 void
572 dump_chain (FILE *file, chain_p chain)
573 {
574 dref a;
575 const char *chain_type;
576 unsigned i;
577 tree var;
578
579 switch (chain->type)
580 {
581 case CT_INVARIANT:
582 chain_type = "Load motion";
583 break;
584
585 case CT_LOAD:
586 chain_type = "Loads-only";
587 break;
588
589 case CT_STORE_LOAD:
590 chain_type = "Store-loads";
591 break;
592
593 case CT_STORE_STORE:
594 chain_type = "Store-stores";
595 break;
596
597 case CT_COMBINATION:
598 chain_type = "Combination";
599 break;
600
601 default:
602 gcc_unreachable ();
603 }
604
605 fprintf (file, "%s chain %p%s\n", chain_type, (void *) chain,
606 chain->combined ? " (combined)" : "");
607 if (chain->type != CT_INVARIANT)
608 fprintf (file, " max distance %u%s\n", chain->length,
609 chain->has_max_use_after ? "" : ", may reuse first");
610
611 if (chain->type == CT_COMBINATION)
612 {
613 fprintf (file, " equal to %p %s %p in type ",
614 (void *) chain->ch1, op_symbol_code (chain->op),
615 (void *) chain->ch2);
616 print_generic_expr (file, chain->rslt_type, TDF_SLIM);
617 fprintf (file, "\n");
618 }
619
620 if (chain->vars.exists ())
621 {
622 fprintf (file, " vars");
623 FOR_EACH_VEC_ELT (chain->vars, i, var)
624 {
625 fprintf (file, " ");
626 print_generic_expr (file, var, TDF_SLIM);
627 }
628 fprintf (file, "\n");
629 }
630
631 if (chain->inits.exists ())
632 {
633 fprintf (file, " inits");
634 FOR_EACH_VEC_ELT (chain->inits, i, var)
635 {
636 fprintf (file, " ");
637 print_generic_expr (file, var, TDF_SLIM);
638 }
639 fprintf (file, "\n");
640 }
641
642 fprintf (file, " references:\n");
643 FOR_EACH_VEC_ELT (chain->refs, i, a)
644 dump_dref (file, a);
645
646 fprintf (file, "\n");
647 }
648
649 /* Dumps CHAINS to FILE. */
650
651 void
652 dump_chains (FILE *file, const vec<chain_p> &chains)
653 {
654 chain_p chain;
655 unsigned i;
656
657 FOR_EACH_VEC_ELT (chains, i, chain)
658 dump_chain (file, chain);
659 }
660
661 /* Dumps COMP to FILE. */
662
663 extern void dump_component (FILE *, struct component *);
664 void
665 dump_component (FILE *file, struct component *comp)
666 {
667 dref a;
668 unsigned i;
669
670 fprintf (file, "Component%s:\n",
671 comp->comp_step == RS_INVARIANT ? " (invariant)" : "");
672 FOR_EACH_VEC_ELT (comp->refs, i, a)
673 dump_dref (file, a);
674 fprintf (file, "\n");
675 }
676
677 /* Dumps COMPS to FILE. */
678
679 extern void dump_components (FILE *, struct component *);
680 void
681 dump_components (FILE *file, struct component *comps)
682 {
683 struct component *comp;
684
685 for (comp = comps; comp; comp = comp->next)
686 dump_component (file, comp);
687 }
688
689 /* Frees a chain CHAIN. */
690
691 void
692 pcom_worker::release_chain (chain_p chain)
693 {
694 dref ref;
695 unsigned i;
696
697 if (chain == NULL)
698 return;
699
700 FOR_EACH_VEC_ELT (chain->refs, i, ref)
701 free (ref);
702
703 if (chain->init_seq)
704 gimple_seq_discard (chain->init_seq);
705
706 if (chain->fini_seq)
707 gimple_seq_discard (chain->fini_seq);
708
709 delete chain;
710 }
711
712 /* Frees CHAINS. */
713
714 void
715 pcom_worker::release_chains ()
716 {
717 unsigned i;
718 chain_p chain;
719
720 FOR_EACH_VEC_ELT (m_chains, i, chain)
721 release_chain (chain);
722 }
723
724 /* Frees list of components COMPS. */
725
726 static void
727 release_components (struct component *comps)
728 {
729 struct component *act, *next;
730
731 for (act = comps; act; act = next)
732 {
733 next = act->next;
734 delete act;
735 }
736 }
737
738 /* Finds a root of tree given by FATHERS containing A, and performs path
739 shortening. */
740
741 static unsigned
742 component_of (vec<unsigned> &fathers, unsigned a)
743 {
744 unsigned root, n;
745
746 for (root = a; root != fathers[root]; root = fathers[root])
747 continue;
748
749 for (; a != root; a = n)
750 {
751 n = fathers[a];
752 fathers[a] = root;
753 }
754
755 return root;
756 }
757
758 /* Join operation for DFU. FATHERS gives the tree, SIZES are sizes of the
759 components, A and B are components to merge. */
760
761 static void
762 merge_comps (vec<unsigned> &fathers, vec<unsigned> &sizes,
763 unsigned a, unsigned b)
764 {
765 unsigned ca = component_of (fathers, a);
766 unsigned cb = component_of (fathers, b);
767
768 if (ca == cb)
769 return;
770
771 if (sizes[ca] < sizes[cb])
772 {
773 sizes[cb] += sizes[ca];
774 fathers[ca] = cb;
775 }
776 else
777 {
778 sizes[ca] += sizes[cb];
779 fathers[cb] = ca;
780 }
781 }
782
783 /* Returns true if A is a reference that is suitable for predictive commoning
784 in the innermost loop that contains it. REF_STEP is set according to the
785 step of the reference A. */
786
787 static bool
788 suitable_reference_p (struct data_reference *a, enum ref_step_type *ref_step)
789 {
790 tree ref = DR_REF (a), step = DR_STEP (a);
791
792 if (!step
793 || TREE_THIS_VOLATILE (ref)
794 || !is_gimple_reg_type (TREE_TYPE (ref))
795 || tree_could_throw_p (ref))
796 return false;
797
798 if (integer_zerop (step))
799 *ref_step = RS_INVARIANT;
800 else if (integer_nonzerop (step))
801 *ref_step = RS_NONZERO;
802 else
803 *ref_step = RS_ANY;
804
805 return true;
806 }
807
808 /* Stores DR_OFFSET (DR) + DR_INIT (DR) to OFFSET. */
809
810 void
811 pcom_worker::aff_combination_dr_offset (struct data_reference *dr,
812 aff_tree *offset)
813 {
814 tree type = TREE_TYPE (DR_OFFSET (dr));
815 aff_tree delta;
816
817 tree_to_aff_combination_expand (DR_OFFSET (dr), type, offset, &m_cache);
818 aff_combination_const (&delta, type, wi::to_poly_widest (DR_INIT (dr)));
819 aff_combination_add (offset, &delta);
820 }
821
822 /* Determines number of iterations of the innermost enclosing loop before B
823 refers to exactly the same location as A and stores it to OFF. If A and
824 B do not have the same step, they never meet, or anything else fails,
825 returns false, otherwise returns true. Both A and B are assumed to
826 satisfy suitable_reference_p. */
827
828 bool
829 pcom_worker::determine_offset (struct data_reference *a,
830 struct data_reference *b, poly_widest_int *off)
831 {
832 aff_tree diff, baseb, step;
833 tree typea, typeb;
834
835 /* Check that both the references access the location in the same type. */
836 typea = TREE_TYPE (DR_REF (a));
837 typeb = TREE_TYPE (DR_REF (b));
838 if (!useless_type_conversion_p (typeb, typea))
839 return false;
840
841 /* Check whether the base address and the step of both references is the
842 same. */
843 if (!operand_equal_p (DR_STEP (a), DR_STEP (b), 0)
844 || !operand_equal_p (DR_BASE_ADDRESS (a), DR_BASE_ADDRESS (b), 0))
845 return false;
846
847 if (integer_zerop (DR_STEP (a)))
848 {
849 /* If the references have loop invariant address, check that they access
850 exactly the same location. */
851 *off = 0;
852 return (operand_equal_p (DR_OFFSET (a), DR_OFFSET (b), 0)
853 && operand_equal_p (DR_INIT (a), DR_INIT (b), 0));
854 }
855
856 /* Compare the offsets of the addresses, and check whether the difference
857 is a multiple of step. */
858 aff_combination_dr_offset (a, &diff);
859 aff_combination_dr_offset (b, &baseb);
860 aff_combination_scale (&baseb, -1);
861 aff_combination_add (&diff, &baseb);
862
863 tree_to_aff_combination_expand (DR_STEP (a), TREE_TYPE (DR_STEP (a)),
864 &step, &m_cache);
865 return aff_combination_constant_multiple_p (&diff, &step, off);
866 }
867
868 /* Returns the last basic block in LOOP for that we are sure that
869 it is executed whenever the loop is entered. */
870
871 static basic_block
872 last_always_executed_block (class loop *loop)
873 {
874 unsigned i;
875 auto_vec<edge> exits = get_loop_exit_edges (loop);
876 edge ex;
877 basic_block last = loop->latch;
878
879 FOR_EACH_VEC_ELT (exits, i, ex)
880 last = nearest_common_dominator (CDI_DOMINATORS, last, ex->src);
881
882 return last;
883 }
884
885 /* Splits dependence graph on DATAREFS described by DEPENDENCES to
886 components. */
887
888 struct component *
889 pcom_worker::split_data_refs_to_components ()
890 {
891 unsigned i, n = m_datarefs.length ();
892 unsigned ca, ia, ib, bad;
893 struct data_reference *dr, *dra, *drb;
894 struct data_dependence_relation *ddr;
895 struct component *comp_list = NULL, *comp;
896 dref dataref;
897 /* Don't do store elimination if loop has multiple exit edges. */
898 bool eliminate_store_p = single_exit (m_loop) != NULL;
899 basic_block last_always_executed = last_always_executed_block (m_loop);
900 auto_bitmap no_store_store_comps;
901 auto_vec<unsigned> comp_father (n + 1);
902 auto_vec<unsigned> comp_size (n + 1);
903 comp_father.quick_grow (n + 1);
904 comp_size.quick_grow (n + 1);
905
906 FOR_EACH_VEC_ELT (m_datarefs, i, dr)
907 {
908 if (!DR_REF (dr))
909 /* A fake reference for call or asm_expr that may clobber memory;
910 just fail. */
911 return NULL;
912 /* predcom pass isn't prepared to handle calls with data references. */
913 if (is_gimple_call (DR_STMT (dr)))
914 return NULL;
915 dr->aux = (void *) (size_t) i;
916 comp_father[i] = i;
917 comp_size[i] = 1;
918 }
919
920 /* A component reserved for the "bad" data references. */
921 comp_father[n] = n;
922 comp_size[n] = 1;
923
924 FOR_EACH_VEC_ELT (m_datarefs, i, dr)
925 {
926 enum ref_step_type dummy;
927
928 if (!suitable_reference_p (dr, &dummy))
929 {
930 ia = (unsigned) (size_t) dr->aux;
931 merge_comps (comp_father, comp_size, n, ia);
932 }
933 }
934
935 FOR_EACH_VEC_ELT (m_dependences, i, ddr)
936 {
937 poly_widest_int dummy_off;
938
939 if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
940 continue;
941
942 dra = DDR_A (ddr);
943 drb = DDR_B (ddr);
944
945 /* Don't do store elimination if there is any unknown dependence for
946 any store data reference. */
947 if ((DR_IS_WRITE (dra) || DR_IS_WRITE (drb))
948 && (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know
949 || DDR_NUM_DIST_VECTS (ddr) == 0))
950 eliminate_store_p = false;
951
952 ia = component_of (comp_father, (unsigned) (size_t) dra->aux);
953 ib = component_of (comp_father, (unsigned) (size_t) drb->aux);
954 if (ia == ib)
955 continue;
956
957 bad = component_of (comp_father, n);
958
959 /* If both A and B are reads, we may ignore unsuitable dependences. */
960 if (DR_IS_READ (dra) && DR_IS_READ (drb))
961 {
962 if (ia == bad || ib == bad
963 || !determine_offset (dra, drb, &dummy_off))
964 continue;
965 }
966 /* If A is read and B write or vice versa and there is unsuitable
967 dependence, instead of merging both components into a component
968 that will certainly not pass suitable_component_p, just put the
969 read into bad component, perhaps at least the write together with
970 all the other data refs in it's component will be optimizable. */
971 else if (DR_IS_READ (dra) && ib != bad)
972 {
973 if (ia == bad)
974 {
975 bitmap_set_bit (no_store_store_comps, ib);
976 continue;
977 }
978 else if (!determine_offset (dra, drb, &dummy_off))
979 {
980 bitmap_set_bit (no_store_store_comps, ib);
981 merge_comps (comp_father, comp_size, bad, ia);
982 continue;
983 }
984 }
985 else if (DR_IS_READ (drb) && ia != bad)
986 {
987 if (ib == bad)
988 {
989 bitmap_set_bit (no_store_store_comps, ia);
990 continue;
991 }
992 else if (!determine_offset (dra, drb, &dummy_off))
993 {
994 bitmap_set_bit (no_store_store_comps, ia);
995 merge_comps (comp_father, comp_size, bad, ib);
996 continue;
997 }
998 }
999 else if (DR_IS_WRITE (dra) && DR_IS_WRITE (drb)
1000 && ia != bad && ib != bad
1001 && !determine_offset (dra, drb, &dummy_off))
1002 {
1003 merge_comps (comp_father, comp_size, bad, ia);
1004 merge_comps (comp_father, comp_size, bad, ib);
1005 continue;
1006 }
1007
1008 merge_comps (comp_father, comp_size, ia, ib);
1009 }
1010
1011 if (eliminate_store_p)
1012 {
1013 tree niters = number_of_latch_executions (m_loop);
1014
1015 /* Don't do store elimination if niters info is unknown because stores
1016 in the last iteration can't be eliminated and we need to recover it
1017 after loop. */
1018 eliminate_store_p = (niters != NULL_TREE && niters != chrec_dont_know);
1019 }
1020
1021 auto_vec<struct component *> comps;
1022 comps.safe_grow_cleared (n, true);
1023 bad = component_of (comp_father, n);
1024 FOR_EACH_VEC_ELT (m_datarefs, i, dr)
1025 {
1026 ia = (unsigned) (size_t) dr->aux;
1027 ca = component_of (comp_father, ia);
1028 if (ca == bad)
1029 continue;
1030
1031 comp = comps[ca];
1032 if (!comp)
1033 {
1034 comp = new component (eliminate_store_p);
1035 comp->refs.reserve_exact (comp_size[ca]);
1036 comps[ca] = comp;
1037 }
1038
1039 dataref = XCNEW (class dref_d);
1040 dataref->ref = dr;
1041 dataref->stmt = DR_STMT (dr);
1042 dataref->offset = 0;
1043 dataref->distance = 0;
1044
1045 dataref->always_accessed
1046 = dominated_by_p (CDI_DOMINATORS, last_always_executed,
1047 gimple_bb (dataref->stmt));
1048 dataref->pos = comp->refs.length ();
1049 comp->refs.quick_push (dataref);
1050 }
1051
1052 if (eliminate_store_p)
1053 {
1054 bitmap_iterator bi;
1055 EXECUTE_IF_SET_IN_BITMAP (no_store_store_comps, 0, ia, bi)
1056 {
1057 ca = component_of (comp_father, ia);
1058 if (ca != bad)
1059 comps[ca]->eliminate_store_p = false;
1060 }
1061 }
1062
1063 for (i = 0; i < n; i++)
1064 {
1065 comp = comps[i];
1066 if (comp)
1067 {
1068 comp->next = comp_list;
1069 comp_list = comp;
1070 }
1071 }
1072 return comp_list;
1073 }
1074
1075 /* Returns true if the component COMP satisfies the conditions
1076 described in 2) at the beginning of this file. */
1077
1078 bool
1079 pcom_worker::suitable_component_p (struct component *comp)
1080 {
1081 unsigned i;
1082 dref a, first;
1083 basic_block ba, bp = m_loop->header;
1084 bool ok, has_write = false;
1085
1086 FOR_EACH_VEC_ELT (comp->refs, i, a)
1087 {
1088 ba = gimple_bb (a->stmt);
1089
1090 if (!just_once_each_iteration_p (m_loop, ba))
1091 return false;
1092
1093 gcc_assert (dominated_by_p (CDI_DOMINATORS, ba, bp));
1094 bp = ba;
1095
1096 if (DR_IS_WRITE (a->ref))
1097 has_write = true;
1098 }
1099
1100 first = comp->refs[0];
1101 ok = suitable_reference_p (first->ref, &comp->comp_step);
1102 gcc_assert (ok);
1103 first->offset = 0;
1104
1105 for (i = 1; comp->refs.iterate (i, &a); i++)
1106 {
1107 /* Polynomial offsets are no use, since we need to know the
1108 gap between iteration numbers at compile time. */
1109 poly_widest_int offset;
1110 if (!determine_offset (first->ref, a->ref, &offset)
1111 || !offset.is_constant (&a->offset))
1112 return false;
1113
1114 enum ref_step_type a_step;
1115 gcc_checking_assert (suitable_reference_p (a->ref, &a_step)
1116 && a_step == comp->comp_step);
1117 }
1118
1119 /* If there is a write inside the component, we must know whether the
1120 step is nonzero or not -- we would not otherwise be able to recognize
1121 whether the value accessed by reads comes from the OFFSET-th iteration
1122 or the previous one. */
1123 if (has_write && comp->comp_step == RS_ANY)
1124 return false;
1125
1126 return true;
1127 }
1128
1129 /* Check the conditions on references inside each of components COMPS,
1130 and remove the unsuitable components from the list. The new list
1131 of components is returned. The conditions are described in 2) at
1132 the beginning of this file. */
1133
1134 struct component *
1135 pcom_worker::filter_suitable_components (struct component *comps)
1136 {
1137 struct component **comp, *act;
1138
1139 for (comp = &comps; *comp; )
1140 {
1141 act = *comp;
1142 if (suitable_component_p (act))
1143 comp = &act->next;
1144 else
1145 {
1146 dref ref;
1147 unsigned i;
1148
1149 *comp = act->next;
1150 FOR_EACH_VEC_ELT (act->refs, i, ref)
1151 free (ref);
1152 delete act;
1153 }
1154 }
1155
1156 return comps;
1157 }
1158
1159 /* Compares two drefs A and B by their offset and position. Callback for
1160 qsort. */
1161
1162 static int
1163 order_drefs (const void *a, const void *b)
1164 {
1165 const dref *const da = (const dref *) a;
1166 const dref *const db = (const dref *) b;
1167 int offcmp = wi::cmps ((*da)->offset, (*db)->offset);
1168
1169 if (offcmp != 0)
1170 return offcmp;
1171
1172 return (*da)->pos - (*db)->pos;
1173 }
1174
1175 /* Compares two drefs A and B by their position. Callback for qsort. */
1176
1177 static int
1178 order_drefs_by_pos (const void *a, const void *b)
1179 {
1180 const dref *const da = (const dref *) a;
1181 const dref *const db = (const dref *) b;
1182
1183 return (*da)->pos - (*db)->pos;
1184 }
1185
1186 /* Returns root of the CHAIN. */
1187
1188 static inline dref
1189 get_chain_root (chain_p chain)
1190 {
1191 return chain->refs[0];
1192 }
1193
1194 /* Given CHAIN, returns the last write ref at DISTANCE, or NULL if it doesn't
1195 exist. */
1196
1197 static inline dref
1198 get_chain_last_write_at (chain_p chain, unsigned distance)
1199 {
1200 for (unsigned i = chain->refs.length (); i > 0; i--)
1201 if (DR_IS_WRITE (chain->refs[i - 1]->ref)
1202 && distance == chain->refs[i - 1]->distance)
1203 return chain->refs[i - 1];
1204
1205 return NULL;
1206 }
1207
1208 /* Given CHAIN, returns the last write ref with the same distance before load
1209 at index LOAD_IDX, or NULL if it doesn't exist. */
1210
1211 static inline dref
1212 get_chain_last_write_before_load (chain_p chain, unsigned load_idx)
1213 {
1214 gcc_assert (load_idx < chain->refs.length ());
1215
1216 unsigned distance = chain->refs[load_idx]->distance;
1217
1218 for (unsigned i = load_idx; i > 0; i--)
1219 if (DR_IS_WRITE (chain->refs[i - 1]->ref)
1220 && distance == chain->refs[i - 1]->distance)
1221 return chain->refs[i - 1];
1222
1223 return NULL;
1224 }
1225
1226 /* Adds REF to the chain CHAIN. */
1227
1228 static void
1229 add_ref_to_chain (chain_p chain, dref ref)
1230 {
1231 dref root = get_chain_root (chain);
1232
1233 gcc_assert (wi::les_p (root->offset, ref->offset));
1234 widest_int dist = ref->offset - root->offset;
1235 gcc_assert (wi::fits_uhwi_p (dist));
1236
1237 chain->refs.safe_push (ref);
1238
1239 ref->distance = dist.to_uhwi ();
1240
1241 if (ref->distance >= chain->length)
1242 {
1243 chain->length = ref->distance;
1244 chain->has_max_use_after = false;
1245 }
1246
1247 /* Promote this chain to CT_STORE_STORE if it has multiple stores. */
1248 if (DR_IS_WRITE (ref->ref))
1249 chain->type = CT_STORE_STORE;
1250
1251 /* Don't set the flag for store-store chain since there is no use. */
1252 if (chain->type != CT_STORE_STORE
1253 && ref->distance == chain->length
1254 && ref->pos > root->pos)
1255 chain->has_max_use_after = true;
1256
1257 chain->all_always_accessed &= ref->always_accessed;
1258 }
1259
1260 /* Returns the chain for invariant component COMP. */
1261
1262 static chain_p
1263 make_invariant_chain (struct component *comp)
1264 {
1265 chain_p chain = new struct chain (CT_INVARIANT);
1266 unsigned i;
1267 dref ref;
1268
1269 chain->all_always_accessed = true;
1270
1271 FOR_EACH_VEC_ELT (comp->refs, i, ref)
1272 {
1273 chain->refs.safe_push (ref);
1274 chain->all_always_accessed &= ref->always_accessed;
1275 }
1276
1277 chain->inits = vNULL;
1278 chain->finis = vNULL;
1279
1280 return chain;
1281 }
1282
1283 /* Make a new chain of type TYPE rooted at REF. */
1284
1285 static chain_p
1286 make_rooted_chain (dref ref, enum chain_type type)
1287 {
1288 chain_p chain = new struct chain (type);
1289
1290 chain->refs.safe_push (ref);
1291 chain->all_always_accessed = ref->always_accessed;
1292 ref->distance = 0;
1293
1294 chain->inits = vNULL;
1295 chain->finis = vNULL;
1296
1297 return chain;
1298 }
1299
1300 /* Returns true if CHAIN is not trivial. */
1301
1302 static bool
1303 nontrivial_chain_p (chain_p chain)
1304 {
1305 return chain != NULL && chain->refs.length () > 1;
1306 }
1307
1308 /* Returns the ssa name that contains the value of REF, or NULL_TREE if there
1309 is no such name. */
1310
1311 static tree
1312 name_for_ref (dref ref)
1313 {
1314 tree name;
1315
1316 if (is_gimple_assign (ref->stmt))
1317 {
1318 if (!ref->ref || DR_IS_READ (ref->ref))
1319 name = gimple_assign_lhs (ref->stmt);
1320 else
1321 name = gimple_assign_rhs1 (ref->stmt);
1322 }
1323 else
1324 name = PHI_RESULT (ref->stmt);
1325
1326 return (TREE_CODE (name) == SSA_NAME ? name : NULL_TREE);
1327 }
1328
1329 /* Returns true if REF is a valid initializer for ROOT with given DISTANCE (in
1330 iterations of the innermost enclosing loop). */
1331
1332 bool
1333 pcom_worker::valid_initializer_p (struct data_reference *ref, unsigned distance,
1334 struct data_reference *root)
1335 {
1336 aff_tree diff, base, step;
1337 poly_widest_int off;
1338
1339 /* Both REF and ROOT must be accessing the same object. */
1340 if (!operand_equal_p (DR_BASE_ADDRESS (ref), DR_BASE_ADDRESS (root), 0))
1341 return false;
1342
1343 /* The initializer is defined outside of loop, hence its address must be
1344 invariant inside the loop. */
1345 gcc_assert (integer_zerop (DR_STEP (ref)));
1346
1347 /* If the address of the reference is invariant, initializer must access
1348 exactly the same location. */
1349 if (integer_zerop (DR_STEP (root)))
1350 return (operand_equal_p (DR_OFFSET (ref), DR_OFFSET (root), 0)
1351 && operand_equal_p (DR_INIT (ref), DR_INIT (root), 0));
1352
1353 /* Verify that this index of REF is equal to the root's index at
1354 -DISTANCE-th iteration. */
1355 aff_combination_dr_offset (root, &diff);
1356 aff_combination_dr_offset (ref, &base);
1357 aff_combination_scale (&base, -1);
1358 aff_combination_add (&diff, &base);
1359
1360 tree_to_aff_combination_expand (DR_STEP (root), TREE_TYPE (DR_STEP (root)),
1361 &step, &m_cache);
1362 if (!aff_combination_constant_multiple_p (&diff, &step, &off))
1363 return false;
1364
1365 if (maybe_ne (off, distance))
1366 return false;
1367
1368 return true;
1369 }
1370
1371 /* Finds looparound phi node of loop that copies the value of REF, and if its
1372 initial value is correct (equal to initial value of REF shifted by one
1373 iteration), returns the phi node. Otherwise, NULL_TREE is returned. ROOT
1374 is the root of the current chain. */
1375
1376 gphi *
1377 pcom_worker::find_looparound_phi (dref ref, dref root)
1378 {
1379 tree name, init, init_ref;
1380 gimple *init_stmt;
1381 edge latch = loop_latch_edge (m_loop);
1382 struct data_reference init_dr;
1383 gphi_iterator psi;
1384
1385 if (is_gimple_assign (ref->stmt))
1386 {
1387 if (DR_IS_READ (ref->ref))
1388 name = gimple_assign_lhs (ref->stmt);
1389 else
1390 name = gimple_assign_rhs1 (ref->stmt);
1391 }
1392 else
1393 name = PHI_RESULT (ref->stmt);
1394 if (!name)
1395 return NULL;
1396
1397 tree entry_vuse = NULL_TREE;
1398 gphi *phi = NULL;
1399 for (psi = gsi_start_phis (m_loop->header); !gsi_end_p (psi); gsi_next (&psi))
1400 {
1401 gphi *p = psi.phi ();
1402 if (PHI_ARG_DEF_FROM_EDGE (p, latch) == name)
1403 phi = p;
1404 else if (virtual_operand_p (gimple_phi_result (p)))
1405 entry_vuse = PHI_ARG_DEF_FROM_EDGE (p, loop_preheader_edge (m_loop));
1406 if (phi && entry_vuse)
1407 break;
1408 }
1409 if (!phi || !entry_vuse)
1410 return NULL;
1411
1412 init = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (m_loop));
1413 if (TREE_CODE (init) != SSA_NAME)
1414 return NULL;
1415 init_stmt = SSA_NAME_DEF_STMT (init);
1416 if (gimple_code (init_stmt) != GIMPLE_ASSIGN)
1417 return NULL;
1418 gcc_assert (gimple_assign_lhs (init_stmt) == init);
1419
1420 init_ref = gimple_assign_rhs1 (init_stmt);
1421 if (!REFERENCE_CLASS_P (init_ref)
1422 && !DECL_P (init_ref))
1423 return NULL;
1424
1425 /* Analyze the behavior of INIT_REF with respect to LOOP (innermost
1426 loop enclosing PHI). */
1427 memset (&init_dr, 0, sizeof (struct data_reference));
1428 DR_REF (&init_dr) = init_ref;
1429 DR_STMT (&init_dr) = phi;
1430 if (!dr_analyze_innermost (&DR_INNERMOST (&init_dr), init_ref, m_loop,
1431 init_stmt))
1432 return NULL;
1433
1434 if (!valid_initializer_p (&init_dr, ref->distance + 1, root->ref))
1435 return NULL;
1436
1437 /* Make sure nothing clobbers the location we re-use the initial value
1438 from. */
1439 if (entry_vuse != gimple_vuse (init_stmt))
1440 {
1441 ao_ref ref;
1442 ao_ref_init (&ref, init_ref);
1443 unsigned limit = param_sccvn_max_alias_queries_per_access;
1444 tree vdef = entry_vuse;
1445 do
1446 {
1447 gimple *def = SSA_NAME_DEF_STMT (vdef);
1448 if (limit-- == 0 || gimple_code (def) == GIMPLE_PHI)
1449 return NULL;
1450 if (stmt_may_clobber_ref_p_1 (def, &ref))
1451 /* When the stmt is an assign to init_ref we could in theory
1452 use its RHS for the initial value of the looparound PHI
1453 we replace in prepare_initializers_chain, but we have
1454 no convenient place to store this info at the moment. */
1455 return NULL;
1456 vdef = gimple_vuse (def);
1457 }
1458 while (vdef != gimple_vuse (init_stmt));
1459 }
1460
1461 return phi;
1462 }
1463
1464 /* Adds a reference for the looparound copy of REF in PHI to CHAIN. */
1465
1466 static void
1467 insert_looparound_copy (chain_p chain, dref ref, gphi *phi)
1468 {
1469 dref nw = XCNEW (class dref_d), aref;
1470 unsigned i;
1471
1472 nw->stmt = phi;
1473 nw->distance = ref->distance + 1;
1474 nw->always_accessed = 1;
1475
1476 FOR_EACH_VEC_ELT (chain->refs, i, aref)
1477 if (aref->distance >= nw->distance)
1478 break;
1479 chain->refs.safe_insert (i, nw);
1480
1481 if (nw->distance > chain->length)
1482 {
1483 chain->length = nw->distance;
1484 chain->has_max_use_after = false;
1485 }
1486 }
1487
1488 /* For references in CHAIN that are copied around the loop (created previously
1489 by PRE, or by user), add the results of such copies to the chain. This
1490 enables us to remove the copies by unrolling, and may need less registers
1491 (also, it may allow us to combine chains together). */
1492
1493 void
1494 pcom_worker::add_looparound_copies (chain_p chain)
1495 {
1496 unsigned i;
1497 dref ref, root = get_chain_root (chain);
1498 gphi *phi;
1499
1500 if (chain->type == CT_STORE_STORE)
1501 return;
1502
1503 FOR_EACH_VEC_ELT (chain->refs, i, ref)
1504 {
1505 phi = find_looparound_phi (ref, root);
1506 if (!phi)
1507 continue;
1508
1509 bitmap_set_bit (m_looparound_phis, SSA_NAME_VERSION (PHI_RESULT (phi)));
1510 insert_looparound_copy (chain, ref, phi);
1511 }
1512 }
1513
1514 /* Find roots of the values and determine distances in the component COMP.
1515 The references are redistributed into chains. */
1516
1517 void
1518 pcom_worker::determine_roots_comp (struct component *comp)
1519 {
1520 unsigned i;
1521 dref a;
1522 chain_p chain = NULL;
1523 widest_int last_ofs = 0;
1524 enum chain_type type;
1525
1526 /* Invariants are handled specially. */
1527 if (comp->comp_step == RS_INVARIANT)
1528 {
1529 chain = make_invariant_chain (comp);
1530 m_chains.safe_push (chain);
1531 return;
1532 }
1533
1534 /* Trivial component. */
1535 if (comp->refs.length () <= 1)
1536 {
1537 if (comp->refs.length () == 1)
1538 {
1539 free (comp->refs[0]);
1540 comp->refs.truncate (0);
1541 }
1542 return;
1543 }
1544
1545 comp->refs.qsort (order_drefs);
1546
1547 /* For Store-Store chain, we only support load if it is dominated by a
1548 store statement in the same iteration of loop. */
1549 if (comp->eliminate_store_p)
1550 for (a = NULL, i = 0; i < comp->refs.length (); i++)
1551 {
1552 if (DR_IS_WRITE (comp->refs[i]->ref))
1553 a = comp->refs[i];
1554 else if (a == NULL || a->offset != comp->refs[i]->offset)
1555 {
1556 /* If there is load that is not dominated by a store in the
1557 same iteration of loop, clear the flag so no Store-Store
1558 chain is generated for this component. */
1559 comp->eliminate_store_p = false;
1560 break;
1561 }
1562 }
1563
1564 /* Determine roots and create chains for components. */
1565 FOR_EACH_VEC_ELT (comp->refs, i, a)
1566 {
1567 if (!chain
1568 || (chain->type == CT_LOAD && DR_IS_WRITE (a->ref))
1569 || (!comp->eliminate_store_p && DR_IS_WRITE (a->ref))
1570 || wi::leu_p (MAX_DISTANCE, a->offset - last_ofs))
1571 {
1572 if (nontrivial_chain_p (chain))
1573 {
1574 add_looparound_copies (chain);
1575 m_chains.safe_push (chain);
1576 }
1577 else
1578 release_chain (chain);
1579
1580 /* Determine type of the chain. If the root reference is a load,
1581 this can only be a CT_LOAD chain; other chains are intialized
1582 to CT_STORE_LOAD and might be promoted to CT_STORE_STORE when
1583 new reference is added. */
1584 type = DR_IS_READ (a->ref) ? CT_LOAD : CT_STORE_LOAD;
1585 chain = make_rooted_chain (a, type);
1586 last_ofs = a->offset;
1587 continue;
1588 }
1589
1590 add_ref_to_chain (chain, a);
1591 }
1592
1593 if (nontrivial_chain_p (chain))
1594 {
1595 add_looparound_copies (chain);
1596 m_chains.safe_push (chain);
1597 }
1598 else
1599 release_chain (chain);
1600 }
1601
1602 /* Find roots of the values and determine distances in components COMPS, and
1603 separates the references to chains. */
1604
1605 void
1606 pcom_worker::determine_roots (struct component *comps)
1607 {
1608 struct component *comp;
1609
1610 for (comp = comps; comp; comp = comp->next)
1611 determine_roots_comp (comp);
1612 }
1613
1614 /* Replace the reference in statement STMT with temporary variable
1615 NEW_TREE. If SET is true, NEW_TREE is instead initialized to the value of
1616 the reference in the statement. IN_LHS is true if the reference
1617 is in the lhs of STMT, false if it is in rhs. */
1618
1619 static void
1620 replace_ref_with (gimple *stmt, tree new_tree, bool set, bool in_lhs)
1621 {
1622 tree val;
1623 gassign *new_stmt;
1624 gimple_stmt_iterator bsi, psi;
1625
1626 if (gimple_code (stmt) == GIMPLE_PHI)
1627 {
1628 gcc_assert (!in_lhs && !set);
1629
1630 val = PHI_RESULT (stmt);
1631 bsi = gsi_after_labels (gimple_bb (stmt));
1632 psi = gsi_for_stmt (stmt);
1633 remove_phi_node (&psi, false);
1634
1635 /* Turn the phi node into GIMPLE_ASSIGN. */
1636 new_stmt = gimple_build_assign (val, new_tree);
1637 gsi_insert_before (&bsi, new_stmt, GSI_NEW_STMT);
1638 return;
1639 }
1640
1641 /* Since the reference is of gimple_reg type, it should only
1642 appear as lhs or rhs of modify statement. */
1643 gcc_assert (is_gimple_assign (stmt));
1644
1645 bsi = gsi_for_stmt (stmt);
1646
1647 /* If we do not need to initialize NEW_TREE, just replace the use of OLD. */
1648 if (!set)
1649 {
1650 gcc_assert (!in_lhs);
1651 gimple_assign_set_rhs_from_tree (&bsi, new_tree);
1652 stmt = gsi_stmt (bsi);
1653 update_stmt (stmt);
1654 return;
1655 }
1656
1657 if (in_lhs)
1658 {
1659 /* We have statement
1660
1661 OLD = VAL
1662
1663 If OLD is a memory reference, then VAL is gimple_val, and we transform
1664 this to
1665
1666 OLD = VAL
1667 NEW = VAL
1668
1669 Otherwise, we are replacing a combination chain,
1670 VAL is the expression that performs the combination, and OLD is an
1671 SSA name. In this case, we transform the assignment to
1672
1673 OLD = VAL
1674 NEW = OLD
1675
1676 */
1677
1678 val = gimple_assign_lhs (stmt);
1679 if (TREE_CODE (val) != SSA_NAME)
1680 {
1681 val = gimple_assign_rhs1 (stmt);
1682 gcc_assert (gimple_assign_single_p (stmt));
1683 if (TREE_CLOBBER_P (val))
1684 val = get_or_create_ssa_default_def (cfun, SSA_NAME_VAR (new_tree));
1685 else
1686 gcc_assert (gimple_assign_copy_p (stmt));
1687 }
1688 }
1689 else
1690 {
1691 /* VAL = OLD
1692
1693 is transformed to
1694
1695 VAL = OLD
1696 NEW = VAL */
1697
1698 val = gimple_assign_lhs (stmt);
1699 }
1700
1701 new_stmt = gimple_build_assign (new_tree, unshare_expr (val));
1702 gsi_insert_after (&bsi, new_stmt, GSI_NEW_STMT);
1703 }
1704
1705 /* Returns a memory reference to DR in the (NITERS + ITER)-th iteration
1706 of the loop it was analyzed in. Append init stmts to STMTS. */
1707
1708 static tree
1709 ref_at_iteration (data_reference_p dr, int iter,
1710 gimple_seq *stmts, tree niters = NULL_TREE)
1711 {
1712 tree off = DR_OFFSET (dr);
1713 tree coff = DR_INIT (dr);
1714 tree ref = DR_REF (dr);
1715 enum tree_code ref_code = ERROR_MARK;
1716 tree ref_type = NULL_TREE;
1717 tree ref_op1 = NULL_TREE;
1718 tree ref_op2 = NULL_TREE;
1719 tree new_offset;
1720
1721 if (iter != 0)
1722 {
1723 new_offset = size_binop (MULT_EXPR, DR_STEP (dr), ssize_int (iter));
1724 if (TREE_CODE (new_offset) == INTEGER_CST)
1725 coff = size_binop (PLUS_EXPR, coff, new_offset);
1726 else
1727 off = size_binop (PLUS_EXPR, off, new_offset);
1728 }
1729
1730 if (niters != NULL_TREE)
1731 {
1732 niters = fold_convert (ssizetype, niters);
1733 new_offset = size_binop (MULT_EXPR, DR_STEP (dr), niters);
1734 if (TREE_CODE (niters) == INTEGER_CST)
1735 coff = size_binop (PLUS_EXPR, coff, new_offset);
1736 else
1737 off = size_binop (PLUS_EXPR, off, new_offset);
1738 }
1739
1740 /* While data-ref analysis punts on bit offsets it still handles
1741 bitfield accesses at byte boundaries. Cope with that. Note that
1742 if the bitfield object also starts at a byte-boundary we can simply
1743 replicate the COMPONENT_REF, but we have to subtract the component's
1744 byte-offset from the MEM_REF address first.
1745 Otherwise we simply build a BIT_FIELD_REF knowing that the bits
1746 start at offset zero. */
1747 if (TREE_CODE (ref) == COMPONENT_REF
1748 && DECL_BIT_FIELD (TREE_OPERAND (ref, 1)))
1749 {
1750 unsigned HOST_WIDE_INT boff;
1751 tree field = TREE_OPERAND (ref, 1);
1752 tree offset = component_ref_field_offset (ref);
1753 ref_type = TREE_TYPE (ref);
1754 boff = tree_to_uhwi (DECL_FIELD_BIT_OFFSET (field));
1755 /* This can occur in Ada. See the comment in get_bit_range. */
1756 if (boff % BITS_PER_UNIT != 0
1757 || !tree_fits_uhwi_p (offset))
1758 {
1759 ref_code = BIT_FIELD_REF;
1760 ref_op1 = DECL_SIZE (field);
1761 ref_op2 = bitsize_zero_node;
1762 }
1763 else
1764 {
1765 boff >>= LOG2_BITS_PER_UNIT;
1766 boff += tree_to_uhwi (offset);
1767 coff = size_binop (MINUS_EXPR, coff, ssize_int (boff));
1768 ref_code = COMPONENT_REF;
1769 ref_op1 = field;
1770 ref_op2 = TREE_OPERAND (ref, 2);
1771 ref = TREE_OPERAND (ref, 0);
1772 }
1773 }
1774 /* We may not associate the constant offset across the pointer plus
1775 expression because that might form a pointer to before the object
1776 then. But for some cases we can retain that to allow tree_could_trap_p
1777 to return false - see gcc.dg/tree-ssa/predcom-1.c */
1778 tree addr, alias_ptr;
1779 if (integer_zerop (off))
1780 {
1781 alias_ptr = fold_convert (reference_alias_ptr_type (ref), coff);
1782 addr = DR_BASE_ADDRESS (dr);
1783 }
1784 else
1785 {
1786 alias_ptr = build_zero_cst (reference_alias_ptr_type (ref));
1787 off = size_binop (PLUS_EXPR, off, coff);
1788 addr = fold_build_pointer_plus (DR_BASE_ADDRESS (dr), off);
1789 }
1790 addr = force_gimple_operand_1 (unshare_expr (addr), stmts,
1791 is_gimple_mem_ref_addr, NULL_TREE);
1792 tree type = build_aligned_type (TREE_TYPE (ref),
1793 get_object_alignment (ref));
1794 ref = build2 (MEM_REF, type, addr, alias_ptr);
1795 if (ref_type)
1796 ref = build3 (ref_code, ref_type, ref, ref_op1, ref_op2);
1797 return ref;
1798 }
1799
1800 /* Get the initialization expression for the INDEX-th temporary variable
1801 of CHAIN. */
1802
1803 static tree
1804 get_init_expr (chain_p chain, unsigned index)
1805 {
1806 if (chain->type == CT_COMBINATION)
1807 {
1808 tree e1 = get_init_expr (chain->ch1, index);
1809 tree e2 = get_init_expr (chain->ch2, index);
1810
1811 return fold_build2 (chain->op, chain->rslt_type, e1, e2);
1812 }
1813 else
1814 return chain->inits[index];
1815 }
1816
1817 /* Returns a new temporary variable used for the I-th variable carrying
1818 value of REF. The variable's uid is marked in TMP_VARS. */
1819
1820 static tree
1821 predcom_tmp_var (tree ref, unsigned i, bitmap tmp_vars)
1822 {
1823 tree type = TREE_TYPE (ref);
1824 /* We never access the components of the temporary variable in predictive
1825 commoning. */
1826 tree var = create_tmp_reg (type, get_lsm_tmp_name (ref, i));
1827 bitmap_set_bit (tmp_vars, DECL_UID (var));
1828 return var;
1829 }
1830
1831 /* Creates the variables for CHAIN, as well as phi nodes for them and
1832 initialization on entry to LOOP. Uids of the newly created
1833 temporary variables are marked in TMP_VARS. */
1834
1835 static void
1836 initialize_root_vars (class loop *loop, chain_p chain, bitmap tmp_vars)
1837 {
1838 unsigned i;
1839 unsigned n = chain->length;
1840 dref root = get_chain_root (chain);
1841 bool reuse_first = !chain->has_max_use_after;
1842 tree ref, init, var, next;
1843 gphi *phi;
1844 gimple_seq stmts;
1845 edge entry = loop_preheader_edge (loop), latch = loop_latch_edge (loop);
1846
1847 /* If N == 0, then all the references are within the single iteration. And
1848 since this is an nonempty chain, reuse_first cannot be true. */
1849 gcc_assert (n > 0 || !reuse_first);
1850
1851 chain->vars.create (n + 1);
1852
1853 if (chain->type == CT_COMBINATION)
1854 ref = gimple_assign_lhs (root->stmt);
1855 else
1856 ref = DR_REF (root->ref);
1857
1858 for (i = 0; i < n + (reuse_first ? 0 : 1); i++)
1859 {
1860 var = predcom_tmp_var (ref, i, tmp_vars);
1861 chain->vars.quick_push (var);
1862 }
1863 if (reuse_first)
1864 chain->vars.quick_push (chain->vars[0]);
1865
1866 FOR_EACH_VEC_ELT (chain->vars, i, var)
1867 chain->vars[i] = make_ssa_name (var);
1868
1869 for (i = 0; i < n; i++)
1870 {
1871 var = chain->vars[i];
1872 next = chain->vars[i + 1];
1873 init = get_init_expr (chain, i);
1874
1875 init = force_gimple_operand (init, &stmts, true, NULL_TREE);
1876 if (stmts)
1877 gsi_insert_seq_on_edge_immediate (entry, stmts);
1878
1879 phi = create_phi_node (var, loop->header);
1880 add_phi_arg (phi, init, entry, UNKNOWN_LOCATION);
1881 add_phi_arg (phi, next, latch, UNKNOWN_LOCATION);
1882 }
1883 }
1884
1885 /* For inter-iteration store elimination CHAIN in LOOP, returns true if
1886 all stores to be eliminated store loop invariant values into memory.
1887 In this case, we can use these invariant values directly after LOOP. */
1888
1889 static bool
1890 is_inv_store_elimination_chain (class loop *loop, chain_p chain)
1891 {
1892 if (chain->length == 0 || chain->type != CT_STORE_STORE)
1893 return false;
1894
1895 gcc_assert (!chain->has_max_use_after);
1896
1897 /* If loop iterates for unknown times or fewer times than chain->length,
1898 we still need to setup root variable and propagate it with PHI node. */
1899 tree niters = number_of_latch_executions (loop);
1900 if (TREE_CODE (niters) != INTEGER_CST
1901 || wi::leu_p (wi::to_wide (niters), chain->length))
1902 return false;
1903
1904 /* Check stores in chain for elimination if they only store loop invariant
1905 values. */
1906 for (unsigned i = 0; i < chain->length; i++)
1907 {
1908 dref a = get_chain_last_write_at (chain, i);
1909 if (a == NULL)
1910 continue;
1911
1912 gimple *def_stmt, *stmt = a->stmt;
1913 if (!gimple_assign_single_p (stmt))
1914 return false;
1915
1916 tree val = gimple_assign_rhs1 (stmt);
1917 if (TREE_CLOBBER_P (val))
1918 return false;
1919
1920 if (CONSTANT_CLASS_P (val))
1921 continue;
1922
1923 if (TREE_CODE (val) != SSA_NAME)
1924 return false;
1925
1926 def_stmt = SSA_NAME_DEF_STMT (val);
1927 if (gimple_nop_p (def_stmt))
1928 continue;
1929
1930 if (flow_bb_inside_loop_p (loop, gimple_bb (def_stmt)))
1931 return false;
1932 }
1933 return true;
1934 }
1935
1936 /* Creates root variables for store elimination CHAIN in which stores for
1937 elimination only store loop invariant values. In this case, we neither
1938 need to load root variables before loop nor propagate it with PHI nodes. */
1939
1940 static void
1941 initialize_root_vars_store_elim_1 (chain_p chain)
1942 {
1943 tree var;
1944 unsigned i, n = chain->length;
1945
1946 chain->vars.create (n);
1947 chain->vars.safe_grow_cleared (n, true);
1948
1949 /* Initialize root value for eliminated stores at each distance. */
1950 for (i = 0; i < n; i++)
1951 {
1952 dref a = get_chain_last_write_at (chain, i);
1953 if (a == NULL)
1954 continue;
1955
1956 var = gimple_assign_rhs1 (a->stmt);
1957 chain->vars[a->distance] = var;
1958 }
1959
1960 /* We don't propagate values with PHI nodes, so manually propagate value
1961 to bubble positions. */
1962 var = chain->vars[0];
1963 for (i = 1; i < n; i++)
1964 {
1965 if (chain->vars[i] != NULL_TREE)
1966 {
1967 var = chain->vars[i];
1968 continue;
1969 }
1970 chain->vars[i] = var;
1971 }
1972
1973 /* Revert the vector. */
1974 for (i = 0; i < n / 2; i++)
1975 std::swap (chain->vars[i], chain->vars[n - i - 1]);
1976 }
1977
1978 /* Creates root variables for store elimination CHAIN in which stores for
1979 elimination store loop variant values. In this case, we may need to
1980 load root variables before LOOP and propagate it with PHI nodes. Uids
1981 of the newly created root variables are marked in TMP_VARS. */
1982
1983 static void
1984 initialize_root_vars_store_elim_2 (class loop *loop,
1985 chain_p chain, bitmap tmp_vars)
1986 {
1987 unsigned i, n = chain->length;
1988 tree ref, init, var, next, val, phi_result;
1989 gimple *stmt;
1990 gimple_seq stmts;
1991
1992 chain->vars.create (n);
1993
1994 ref = DR_REF (get_chain_root (chain)->ref);
1995 for (i = 0; i < n; i++)
1996 {
1997 var = predcom_tmp_var (ref, i, tmp_vars);
1998 chain->vars.quick_push (var);
1999 }
2000
2001 FOR_EACH_VEC_ELT (chain->vars, i, var)
2002 chain->vars[i] = make_ssa_name (var);
2003
2004 /* Root values are either rhs operand of stores to be eliminated, or
2005 loaded from memory before loop. */
2006 auto_vec<tree> vtemps;
2007 vtemps.safe_grow_cleared (n, true);
2008 for (i = 0; i < n; i++)
2009 {
2010 init = get_init_expr (chain, i);
2011 if (init == NULL_TREE)
2012 {
2013 /* Root value is rhs operand of the store to be eliminated if
2014 it isn't loaded from memory before loop. */
2015 dref a = get_chain_last_write_at (chain, i);
2016 val = gimple_assign_rhs1 (a->stmt);
2017 if (TREE_CLOBBER_P (val))
2018 {
2019 val = get_or_create_ssa_default_def (cfun, SSA_NAME_VAR (var));
2020 gimple_assign_set_rhs1 (a->stmt, val);
2021 }
2022
2023 vtemps[n - i - 1] = val;
2024 }
2025 else
2026 {
2027 edge latch = loop_latch_edge (loop);
2028 edge entry = loop_preheader_edge (loop);
2029
2030 /* Root value is loaded from memory before loop, we also need
2031 to add PHI nodes to propagate the value across iterations. */
2032 init = force_gimple_operand (init, &stmts, true, NULL_TREE);
2033 if (stmts)
2034 gsi_insert_seq_on_edge_immediate (entry, stmts);
2035
2036 next = chain->vars[n - i];
2037 phi_result = copy_ssa_name (next);
2038 gphi *phi = create_phi_node (phi_result, loop->header);
2039 add_phi_arg (phi, init, entry, UNKNOWN_LOCATION);
2040 add_phi_arg (phi, next, latch, UNKNOWN_LOCATION);
2041 vtemps[n - i - 1] = phi_result;
2042 }
2043 }
2044
2045 /* Find the insertion position. */
2046 dref last = get_chain_root (chain);
2047 for (i = 0; i < chain->refs.length (); i++)
2048 {
2049 if (chain->refs[i]->pos > last->pos)
2050 last = chain->refs[i];
2051 }
2052
2053 gimple_stmt_iterator gsi = gsi_for_stmt (last->stmt);
2054
2055 /* Insert statements copying root value to root variable. */
2056 for (i = 0; i < n; i++)
2057 {
2058 var = chain->vars[i];
2059 val = vtemps[i];
2060 stmt = gimple_build_assign (var, val);
2061 gsi_insert_after (&gsi, stmt, GSI_NEW_STMT);
2062 }
2063 }
2064
2065 /* Generates stores for CHAIN's eliminated stores in LOOP's last
2066 (CHAIN->length - 1) iterations. */
2067
2068 static void
2069 finalize_eliminated_stores (class loop *loop, chain_p chain)
2070 {
2071 unsigned i, n = chain->length;
2072
2073 for (i = 0; i < n; i++)
2074 {
2075 tree var = chain->vars[i];
2076 tree fini = chain->finis[n - i - 1];
2077 gimple *stmt = gimple_build_assign (fini, var);
2078
2079 gimple_seq_add_stmt_without_update (&chain->fini_seq, stmt);
2080 }
2081
2082 if (chain->fini_seq)
2083 {
2084 gsi_insert_seq_on_edge_immediate (single_exit (loop), chain->fini_seq);
2085 chain->fini_seq = NULL;
2086 }
2087 }
2088
2089 /* Initializes a variable for load motion for ROOT and prepares phi nodes and
2090 initialization on entry to LOOP if necessary. The ssa name for the variable
2091 is stored in VARS. If WRITTEN is true, also a phi node to copy its value
2092 around the loop is created. Uid of the newly created temporary variable
2093 is marked in TMP_VARS. INITS is the list containing the (single)
2094 initializer. */
2095
2096 static void
2097 initialize_root_vars_lm (class loop *loop, dref root, bool written,
2098 vec<tree> *vars, const vec<tree> &inits,
2099 bitmap tmp_vars)
2100 {
2101 unsigned i;
2102 tree ref = DR_REF (root->ref), init, var, next;
2103 gimple_seq stmts;
2104 gphi *phi;
2105 edge entry = loop_preheader_edge (loop), latch = loop_latch_edge (loop);
2106
2107 /* Find the initializer for the variable, and check that it cannot
2108 trap. */
2109 init = inits[0];
2110
2111 vars->create (written ? 2 : 1);
2112 var = predcom_tmp_var (ref, 0, tmp_vars);
2113 vars->quick_push (var);
2114 if (written)
2115 vars->quick_push ((*vars)[0]);
2116
2117 FOR_EACH_VEC_ELT (*vars, i, var)
2118 (*vars)[i] = make_ssa_name (var);
2119
2120 var = (*vars)[0];
2121
2122 init = force_gimple_operand (init, &stmts, written, NULL_TREE);
2123 if (stmts)
2124 gsi_insert_seq_on_edge_immediate (entry, stmts);
2125
2126 if (written)
2127 {
2128 next = (*vars)[1];
2129 phi = create_phi_node (var, loop->header);
2130 add_phi_arg (phi, init, entry, UNKNOWN_LOCATION);
2131 add_phi_arg (phi, next, latch, UNKNOWN_LOCATION);
2132 }
2133 else
2134 {
2135 gassign *init_stmt = gimple_build_assign (var, init);
2136 gsi_insert_on_edge_immediate (entry, init_stmt);
2137 }
2138 }
2139
2140
2141 /* Execute load motion for references in chain CHAIN. Uids of the newly
2142 created temporary variables are marked in TMP_VARS. */
2143
2144 static void
2145 execute_load_motion (class loop *loop, chain_p chain, bitmap tmp_vars)
2146 {
2147 auto_vec<tree> vars;
2148 dref a;
2149 unsigned n_writes = 0, ridx, i;
2150 tree var;
2151
2152 gcc_assert (chain->type == CT_INVARIANT);
2153 gcc_assert (!chain->combined);
2154 FOR_EACH_VEC_ELT (chain->refs, i, a)
2155 if (DR_IS_WRITE (a->ref))
2156 n_writes++;
2157
2158 /* If there are no reads in the loop, there is nothing to do. */
2159 if (n_writes == chain->refs.length ())
2160 return;
2161
2162 initialize_root_vars_lm (loop, get_chain_root (chain), n_writes > 0,
2163 &vars, chain->inits, tmp_vars);
2164
2165 ridx = 0;
2166 FOR_EACH_VEC_ELT (chain->refs, i, a)
2167 {
2168 bool is_read = DR_IS_READ (a->ref);
2169
2170 if (DR_IS_WRITE (a->ref))
2171 {
2172 n_writes--;
2173 if (n_writes)
2174 {
2175 var = vars[0];
2176 var = make_ssa_name (SSA_NAME_VAR (var));
2177 vars[0] = var;
2178 }
2179 else
2180 ridx = 1;
2181 }
2182
2183 replace_ref_with (a->stmt, vars[ridx],
2184 !is_read, !is_read);
2185 }
2186 }
2187
2188 /* Returns the single statement in that NAME is used, excepting
2189 the looparound phi nodes contained in one of the chains. If there is no
2190 such statement, or more statements, NULL is returned. */
2191
2192 gimple *
2193 pcom_worker::single_nonlooparound_use (tree name)
2194 {
2195 use_operand_p use;
2196 imm_use_iterator it;
2197 gimple *stmt, *ret = NULL;
2198
2199 FOR_EACH_IMM_USE_FAST (use, it, name)
2200 {
2201 stmt = USE_STMT (use);
2202
2203 if (gimple_code (stmt) == GIMPLE_PHI)
2204 {
2205 /* Ignore uses in looparound phi nodes. Uses in other phi nodes
2206 could not be processed anyway, so just fail for them. */
2207 if (bitmap_bit_p (m_looparound_phis,
2208 SSA_NAME_VERSION (PHI_RESULT (stmt))))
2209 continue;
2210
2211 return NULL;
2212 }
2213 else if (is_gimple_debug (stmt))
2214 continue;
2215 else if (ret != NULL)
2216 return NULL;
2217 else
2218 ret = stmt;
2219 }
2220
2221 return ret;
2222 }
2223
2224 /* Remove statement STMT, as well as the chain of assignments in that it is
2225 used. */
2226
2227 void
2228 pcom_worker::remove_stmt (gimple *stmt)
2229 {
2230 tree name;
2231 gimple *next;
2232 gimple_stmt_iterator psi;
2233
2234 if (gimple_code (stmt) == GIMPLE_PHI)
2235 {
2236 name = PHI_RESULT (stmt);
2237 next = single_nonlooparound_use (name);
2238 reset_debug_uses (stmt);
2239 psi = gsi_for_stmt (stmt);
2240 remove_phi_node (&psi, true);
2241
2242 if (!next
2243 || !gimple_assign_ssa_name_copy_p (next)
2244 || gimple_assign_rhs1 (next) != name)
2245 return;
2246
2247 stmt = next;
2248 }
2249
2250 while (1)
2251 {
2252 gimple_stmt_iterator bsi;
2253
2254 bsi = gsi_for_stmt (stmt);
2255
2256 name = gimple_assign_lhs (stmt);
2257 if (TREE_CODE (name) == SSA_NAME)
2258 {
2259 next = single_nonlooparound_use (name);
2260 reset_debug_uses (stmt);
2261 }
2262 else
2263 {
2264 /* This is a store to be eliminated. */
2265 gcc_assert (gimple_vdef (stmt) != NULL);
2266 next = NULL;
2267 }
2268
2269 unlink_stmt_vdef (stmt);
2270 gsi_remove (&bsi, true);
2271 release_defs (stmt);
2272
2273 if (!next
2274 || !gimple_assign_ssa_name_copy_p (next)
2275 || gimple_assign_rhs1 (next) != name)
2276 return;
2277
2278 stmt = next;
2279 }
2280 }
2281
2282 /* Perform the predictive commoning optimization for a chain CHAIN.
2283 Uids of the newly created temporary variables are marked in TMP_VARS.*/
2284
2285 void
2286 pcom_worker::execute_pred_commoning_chain (chain_p chain,
2287 bitmap tmp_vars)
2288 {
2289 unsigned i;
2290 dref a;
2291 tree var;
2292 bool in_lhs;
2293
2294 if (chain->combined)
2295 {
2296 /* For combined chains, just remove the statements that are used to
2297 compute the values of the expression (except for the root one).
2298 We delay this until after all chains are processed. */
2299 }
2300 else if (chain->type == CT_STORE_STORE)
2301 {
2302 if (chain->length > 0)
2303 {
2304 if (chain->inv_store_elimination)
2305 {
2306 /* If dead stores in this chain only store loop invariant
2307 values, we can simply record the invariant value and use
2308 it directly after loop. */
2309 initialize_root_vars_store_elim_1 (chain);
2310 }
2311 else
2312 {
2313 /* If dead stores in this chain store loop variant values,
2314 we need to set up the variables by loading from memory
2315 before loop and propagating it with PHI nodes. */
2316 initialize_root_vars_store_elim_2 (m_loop, chain, tmp_vars);
2317 }
2318
2319 /* For inter-iteration store elimination chain, stores at each
2320 distance in loop's last (chain->length - 1) iterations can't
2321 be eliminated, because there is no following killing store.
2322 We need to generate these stores after loop. */
2323 finalize_eliminated_stores (m_loop, chain);
2324 }
2325
2326 bool last_store_p = true;
2327 for (i = chain->refs.length (); i > 0; i--)
2328 {
2329 a = chain->refs[i - 1];
2330 /* Preserve the last store of the chain. Eliminate other stores
2331 which are killed by the last one. */
2332 if (DR_IS_WRITE (a->ref))
2333 {
2334 if (last_store_p)
2335 last_store_p = false;
2336 else
2337 remove_stmt (a->stmt);
2338
2339 continue;
2340 }
2341
2342 /* Any load in Store-Store chain must be dominated by a previous
2343 store, we replace the load reference with rhs of the store. */
2344 dref b = get_chain_last_write_before_load (chain, i - 1);
2345 gcc_assert (b != NULL);
2346 var = gimple_assign_rhs1 (b->stmt);
2347 replace_ref_with (a->stmt, var, false, false);
2348 }
2349 }
2350 else
2351 {
2352 /* For non-combined chains, set up the variables that hold its value. */
2353 initialize_root_vars (m_loop, chain, tmp_vars);
2354 a = get_chain_root (chain);
2355 in_lhs = (chain->type == CT_STORE_LOAD
2356 || chain->type == CT_COMBINATION);
2357 replace_ref_with (a->stmt, chain->vars[chain->length], true, in_lhs);
2358
2359 /* Replace the uses of the original references by these variables. */
2360 for (i = 1; chain->refs.iterate (i, &a); i++)
2361 {
2362 var = chain->vars[chain->length - a->distance];
2363 replace_ref_with (a->stmt, var, false, false);
2364 }
2365 }
2366 }
2367
2368 /* Determines the unroll factor necessary to remove as many temporary variable
2369 copies as possible. CHAINS is the list of chains that will be
2370 optimized. */
2371
2372 static unsigned
2373 determine_unroll_factor (const vec<chain_p> &chains)
2374 {
2375 chain_p chain;
2376 unsigned factor = 1, af, nfactor, i;
2377 unsigned max = param_max_unroll_times;
2378
2379 FOR_EACH_VEC_ELT (chains, i, chain)
2380 {
2381 if (chain->type == CT_INVARIANT)
2382 continue;
2383 /* For now we can't handle unrolling when eliminating stores. */
2384 else if (chain->type == CT_STORE_STORE)
2385 return 1;
2386
2387 if (chain->combined)
2388 {
2389 /* For combined chains, we can't handle unrolling if we replace
2390 looparound PHIs. */
2391 dref a;
2392 unsigned j;
2393 for (j = 1; chain->refs.iterate (j, &a); j++)
2394 if (gimple_code (a->stmt) == GIMPLE_PHI)
2395 return 1;
2396 continue;
2397 }
2398
2399 /* The best unroll factor for this chain is equal to the number of
2400 temporary variables that we create for it. */
2401 af = chain->length;
2402 if (chain->has_max_use_after)
2403 af++;
2404
2405 nfactor = factor * af / gcd (factor, af);
2406 if (nfactor <= max)
2407 factor = nfactor;
2408 }
2409
2410 return factor;
2411 }
2412
2413 /* Perform the predictive commoning optimization for chains.
2414 Uids of the newly created temporary variables are marked in TMP_VARS. */
2415
2416 void
2417 pcom_worker::execute_pred_commoning (bitmap tmp_vars)
2418 {
2419 chain_p chain;
2420 unsigned i;
2421
2422 FOR_EACH_VEC_ELT (m_chains, i, chain)
2423 {
2424 if (chain->type == CT_INVARIANT)
2425 execute_load_motion (m_loop, chain, tmp_vars);
2426 else
2427 execute_pred_commoning_chain (chain, tmp_vars);
2428 }
2429
2430 FOR_EACH_VEC_ELT (m_chains, i, chain)
2431 {
2432 if (chain->type == CT_INVARIANT)
2433 ;
2434 else if (chain->combined)
2435 {
2436 /* For combined chains, just remove the statements that are used to
2437 compute the values of the expression (except for the root one). */
2438 dref a;
2439 unsigned j;
2440 for (j = 1; chain->refs.iterate (j, &a); j++)
2441 remove_stmt (a->stmt);
2442 }
2443 }
2444 }
2445
2446 /* For each reference in CHAINS, if its defining statement is
2447 phi node, record the ssa name that is defined by it. */
2448
2449 static void
2450 replace_phis_by_defined_names (vec<chain_p> &chains)
2451 {
2452 chain_p chain;
2453 dref a;
2454 unsigned i, j;
2455
2456 FOR_EACH_VEC_ELT (chains, i, chain)
2457 FOR_EACH_VEC_ELT (chain->refs, j, a)
2458 {
2459 if (gimple_code (a->stmt) == GIMPLE_PHI)
2460 {
2461 a->name_defined_by_phi = PHI_RESULT (a->stmt);
2462 a->stmt = NULL;
2463 }
2464 }
2465 }
2466
2467 /* For each reference in CHAINS, if name_defined_by_phi is not
2468 NULL, use it to set the stmt field. */
2469
2470 static void
2471 replace_names_by_phis (vec<chain_p> chains)
2472 {
2473 chain_p chain;
2474 dref a;
2475 unsigned i, j;
2476
2477 FOR_EACH_VEC_ELT (chains, i, chain)
2478 FOR_EACH_VEC_ELT (chain->refs, j, a)
2479 if (a->stmt == NULL)
2480 {
2481 a->stmt = SSA_NAME_DEF_STMT (a->name_defined_by_phi);
2482 gcc_assert (gimple_code (a->stmt) == GIMPLE_PHI);
2483 a->name_defined_by_phi = NULL_TREE;
2484 }
2485 }
2486
2487 /* Wrapper over execute_pred_commoning, to pass it as a callback
2488 to tree_transform_and_unroll_loop. */
2489
2490 struct epcc_data
2491 {
2492 vec<chain_p> chains;
2493 bitmap tmp_vars;
2494 pcom_worker *worker;
2495 };
2496
2497 static void
2498 execute_pred_commoning_cbck (class loop *loop ATTRIBUTE_UNUSED, void *data)
2499 {
2500 struct epcc_data *const dta = (struct epcc_data *) data;
2501 pcom_worker *worker = dta->worker;
2502
2503 /* Restore phi nodes that were replaced by ssa names before
2504 tree_transform_and_unroll_loop (see detailed description in
2505 tree_predictive_commoning_loop). */
2506 replace_names_by_phis (dta->chains);
2507 worker->execute_pred_commoning (dta->tmp_vars);
2508 }
2509
2510 /* Base NAME and all the names in the chain of phi nodes that use it
2511 on variable VAR. The phi nodes are recognized by being in the copies of
2512 the header of the LOOP. */
2513
2514 static void
2515 base_names_in_chain_on (class loop *loop, tree name, tree var)
2516 {
2517 gimple *stmt, *phi;
2518 imm_use_iterator iter;
2519
2520 replace_ssa_name_symbol (name, var);
2521
2522 while (1)
2523 {
2524 phi = NULL;
2525 FOR_EACH_IMM_USE_STMT (stmt, iter, name)
2526 {
2527 if (gimple_code (stmt) == GIMPLE_PHI
2528 && flow_bb_inside_loop_p (loop, gimple_bb (stmt)))
2529 {
2530 phi = stmt;
2531 break;
2532 }
2533 }
2534 if (!phi)
2535 return;
2536
2537 name = PHI_RESULT (phi);
2538 replace_ssa_name_symbol (name, var);
2539 }
2540 }
2541
2542 /* Given an unrolled LOOP after predictive commoning, remove the
2543 register copies arising from phi nodes by changing the base
2544 variables of SSA names. TMP_VARS is the set of the temporary variables
2545 for those we want to perform this. */
2546
2547 static void
2548 eliminate_temp_copies (class loop *loop, bitmap tmp_vars)
2549 {
2550 edge e;
2551 gphi *phi;
2552 gimple *stmt;
2553 tree name, use, var;
2554 gphi_iterator psi;
2555
2556 e = loop_latch_edge (loop);
2557 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
2558 {
2559 phi = psi.phi ();
2560 name = PHI_RESULT (phi);
2561 var = SSA_NAME_VAR (name);
2562 if (!var || !bitmap_bit_p (tmp_vars, DECL_UID (var)))
2563 continue;
2564 use = PHI_ARG_DEF_FROM_EDGE (phi, e);
2565 gcc_assert (TREE_CODE (use) == SSA_NAME);
2566
2567 /* Base all the ssa names in the ud and du chain of NAME on VAR. */
2568 stmt = SSA_NAME_DEF_STMT (use);
2569 while (gimple_code (stmt) == GIMPLE_PHI
2570 /* In case we could not unroll the loop enough to eliminate
2571 all copies, we may reach the loop header before the defining
2572 statement (in that case, some register copies will be present
2573 in loop latch in the final code, corresponding to the newly
2574 created looparound phi nodes). */
2575 && gimple_bb (stmt) != loop->header)
2576 {
2577 gcc_assert (single_pred_p (gimple_bb (stmt)));
2578 use = PHI_ARG_DEF (stmt, 0);
2579 stmt = SSA_NAME_DEF_STMT (use);
2580 }
2581
2582 base_names_in_chain_on (loop, use, var);
2583 }
2584 }
2585
2586 /* Returns true if CHAIN is suitable to be combined. */
2587
2588 static bool
2589 chain_can_be_combined_p (chain_p chain)
2590 {
2591 return (!chain->combined
2592 && (chain->type == CT_LOAD || chain->type == CT_COMBINATION));
2593 }
2594
2595 /* Returns the modify statement that uses NAME. Skips over assignment
2596 statements, NAME is replaced with the actual name used in the returned
2597 statement. */
2598
2599 gimple *
2600 pcom_worker::find_use_stmt (tree *name)
2601 {
2602 gimple *stmt;
2603 tree rhs, lhs;
2604
2605 /* Skip over assignments. */
2606 while (1)
2607 {
2608 stmt = single_nonlooparound_use (*name);
2609 if (!stmt)
2610 return NULL;
2611
2612 if (gimple_code (stmt) != GIMPLE_ASSIGN)
2613 return NULL;
2614
2615 lhs = gimple_assign_lhs (stmt);
2616 if (TREE_CODE (lhs) != SSA_NAME)
2617 return NULL;
2618
2619 if (gimple_assign_copy_p (stmt))
2620 {
2621 rhs = gimple_assign_rhs1 (stmt);
2622 if (rhs != *name)
2623 return NULL;
2624
2625 *name = lhs;
2626 }
2627 else if (get_gimple_rhs_class (gimple_assign_rhs_code (stmt))
2628 == GIMPLE_BINARY_RHS)
2629 return stmt;
2630 else
2631 return NULL;
2632 }
2633 }
2634
2635 /* Returns true if we may perform reassociation for operation CODE in TYPE. */
2636
2637 static bool
2638 may_reassociate_p (tree type, enum tree_code code)
2639 {
2640 if (FLOAT_TYPE_P (type)
2641 && !flag_unsafe_math_optimizations)
2642 return false;
2643
2644 return (commutative_tree_code (code)
2645 && associative_tree_code (code));
2646 }
2647
2648 /* If the operation used in STMT is associative and commutative, go through the
2649 tree of the same operations and returns its root. Distance to the root
2650 is stored in DISTANCE. */
2651
2652 gimple *
2653 pcom_worker::find_associative_operation_root (gimple *stmt, unsigned *distance)
2654 {
2655 tree lhs;
2656 gimple *next;
2657 enum tree_code code = gimple_assign_rhs_code (stmt);
2658 tree type = TREE_TYPE (gimple_assign_lhs (stmt));
2659 unsigned dist = 0;
2660
2661 if (!may_reassociate_p (type, code))
2662 return NULL;
2663
2664 while (1)
2665 {
2666 lhs = gimple_assign_lhs (stmt);
2667 gcc_assert (TREE_CODE (lhs) == SSA_NAME);
2668
2669 next = find_use_stmt (&lhs);
2670 if (!next
2671 || gimple_assign_rhs_code (next) != code)
2672 break;
2673
2674 stmt = next;
2675 dist++;
2676 }
2677
2678 if (distance)
2679 *distance = dist;
2680 return stmt;
2681 }
2682
2683 /* Returns the common statement in that NAME1 and NAME2 have a use. If there
2684 is no such statement, returns NULL_TREE. In case the operation used on
2685 NAME1 and NAME2 is associative and commutative, returns the root of the
2686 tree formed by this operation instead of the statement that uses NAME1 or
2687 NAME2. */
2688
2689 gimple *
2690 pcom_worker::find_common_use_stmt (tree *name1, tree *name2)
2691 {
2692 gimple *stmt1, *stmt2;
2693
2694 stmt1 = find_use_stmt (name1);
2695 if (!stmt1)
2696 return NULL;
2697
2698 stmt2 = find_use_stmt (name2);
2699 if (!stmt2)
2700 return NULL;
2701
2702 if (stmt1 == stmt2)
2703 return stmt1;
2704
2705 stmt1 = find_associative_operation_root (stmt1, NULL);
2706 if (!stmt1)
2707 return NULL;
2708 stmt2 = find_associative_operation_root (stmt2, NULL);
2709 if (!stmt2)
2710 return NULL;
2711
2712 return (stmt1 == stmt2 ? stmt1 : NULL);
2713 }
2714
2715 /* Checks whether R1 and R2 are combined together using CODE, with the result
2716 in RSLT_TYPE, in order R1 CODE R2 if SWAP is false and in order R2 CODE R1
2717 if it is true. If CODE is ERROR_MARK, set these values instead. */
2718
2719 bool
2720 pcom_worker::combinable_refs_p (dref r1, dref r2,
2721 enum tree_code *code, bool *swap, tree *rslt_type)
2722 {
2723 enum tree_code acode;
2724 bool aswap;
2725 tree atype;
2726 tree name1, name2;
2727 gimple *stmt;
2728
2729 name1 = name_for_ref (r1);
2730 name2 = name_for_ref (r2);
2731 gcc_assert (name1 != NULL_TREE && name2 != NULL_TREE);
2732
2733 stmt = find_common_use_stmt (&name1, &name2);
2734
2735 if (!stmt
2736 /* A simple post-dominance check - make sure the combination
2737 is executed under the same condition as the references. */
2738 || (gimple_bb (stmt) != gimple_bb (r1->stmt)
2739 && gimple_bb (stmt) != gimple_bb (r2->stmt)))
2740 return false;
2741
2742 acode = gimple_assign_rhs_code (stmt);
2743 aswap = (!commutative_tree_code (acode)
2744 && gimple_assign_rhs1 (stmt) != name1);
2745 atype = TREE_TYPE (gimple_assign_lhs (stmt));
2746
2747 if (*code == ERROR_MARK)
2748 {
2749 *code = acode;
2750 *swap = aswap;
2751 *rslt_type = atype;
2752 return true;
2753 }
2754
2755 return (*code == acode
2756 && *swap == aswap
2757 && *rslt_type == atype);
2758 }
2759
2760 /* Remove OP from the operation on rhs of STMT, and replace STMT with
2761 an assignment of the remaining operand. */
2762
2763 static void
2764 remove_name_from_operation (gimple *stmt, tree op)
2765 {
2766 tree other_op;
2767 gimple_stmt_iterator si;
2768
2769 gcc_assert (is_gimple_assign (stmt));
2770
2771 if (gimple_assign_rhs1 (stmt) == op)
2772 other_op = gimple_assign_rhs2 (stmt);
2773 else
2774 other_op = gimple_assign_rhs1 (stmt);
2775
2776 si = gsi_for_stmt (stmt);
2777 gimple_assign_set_rhs_from_tree (&si, other_op);
2778
2779 /* We should not have reallocated STMT. */
2780 gcc_assert (gsi_stmt (si) == stmt);
2781
2782 update_stmt (stmt);
2783 }
2784
2785 /* Reassociates the expression in that NAME1 and NAME2 are used so that they
2786 are combined in a single statement, and returns this statement. */
2787
2788 gimple *
2789 pcom_worker::reassociate_to_the_same_stmt (tree name1, tree name2)
2790 {
2791 gimple *stmt1, *stmt2, *root1, *root2, *s1, *s2;
2792 gassign *new_stmt, *tmp_stmt;
2793 tree new_name, tmp_name, var, r1, r2;
2794 unsigned dist1, dist2;
2795 enum tree_code code;
2796 tree type = TREE_TYPE (name1);
2797 gimple_stmt_iterator bsi;
2798
2799 stmt1 = find_use_stmt (&name1);
2800 stmt2 = find_use_stmt (&name2);
2801 root1 = find_associative_operation_root (stmt1, &dist1);
2802 root2 = find_associative_operation_root (stmt2, &dist2);
2803 code = gimple_assign_rhs_code (stmt1);
2804
2805 gcc_assert (root1 && root2 && root1 == root2
2806 && code == gimple_assign_rhs_code (stmt2));
2807
2808 /* Find the root of the nearest expression in that both NAME1 and NAME2
2809 are used. */
2810 r1 = name1;
2811 s1 = stmt1;
2812 r2 = name2;
2813 s2 = stmt2;
2814
2815 while (dist1 > dist2)
2816 {
2817 s1 = find_use_stmt (&r1);
2818 r1 = gimple_assign_lhs (s1);
2819 dist1--;
2820 }
2821 while (dist2 > dist1)
2822 {
2823 s2 = find_use_stmt (&r2);
2824 r2 = gimple_assign_lhs (s2);
2825 dist2--;
2826 }
2827
2828 while (s1 != s2)
2829 {
2830 s1 = find_use_stmt (&r1);
2831 r1 = gimple_assign_lhs (s1);
2832 s2 = find_use_stmt (&r2);
2833 r2 = gimple_assign_lhs (s2);
2834 }
2835
2836 /* Remove NAME1 and NAME2 from the statements in that they are used
2837 currently. */
2838 remove_name_from_operation (stmt1, name1);
2839 remove_name_from_operation (stmt2, name2);
2840
2841 /* Insert the new statement combining NAME1 and NAME2 before S1, and
2842 combine it with the rhs of S1. */
2843 var = create_tmp_reg (type, "predreastmp");
2844 new_name = make_ssa_name (var);
2845 new_stmt = gimple_build_assign (new_name, code, name1, name2);
2846
2847 var = create_tmp_reg (type, "predreastmp");
2848 tmp_name = make_ssa_name (var);
2849
2850 /* Rhs of S1 may now be either a binary expression with operation
2851 CODE, or gimple_val (in case that stmt1 == s1 or stmt2 == s1,
2852 so that name1 or name2 was removed from it). */
2853 tmp_stmt = gimple_build_assign (tmp_name, gimple_assign_rhs_code (s1),
2854 gimple_assign_rhs1 (s1),
2855 gimple_assign_rhs2 (s1));
2856
2857 bsi = gsi_for_stmt (s1);
2858 gimple_assign_set_rhs_with_ops (&bsi, code, new_name, tmp_name);
2859 s1 = gsi_stmt (bsi);
2860 update_stmt (s1);
2861
2862 gsi_insert_before (&bsi, new_stmt, GSI_SAME_STMT);
2863 gsi_insert_before (&bsi, tmp_stmt, GSI_SAME_STMT);
2864
2865 return new_stmt;
2866 }
2867
2868 /* Returns the statement that combines references R1 and R2. In case R1
2869 and R2 are not used in the same statement, but they are used with an
2870 associative and commutative operation in the same expression, reassociate
2871 the expression so that they are used in the same statement. */
2872
2873 gimple *
2874 pcom_worker::stmt_combining_refs (dref r1, dref r2)
2875 {
2876 gimple *stmt1, *stmt2;
2877 tree name1 = name_for_ref (r1);
2878 tree name2 = name_for_ref (r2);
2879
2880 stmt1 = find_use_stmt (&name1);
2881 stmt2 = find_use_stmt (&name2);
2882 if (stmt1 == stmt2)
2883 return stmt1;
2884
2885 return reassociate_to_the_same_stmt (name1, name2);
2886 }
2887
2888 /* Tries to combine chains CH1 and CH2 together. If this succeeds, the
2889 description of the new chain is returned, otherwise we return NULL. */
2890
2891 chain_p
2892 pcom_worker::combine_chains (chain_p ch1, chain_p ch2)
2893 {
2894 dref r1, r2, nw;
2895 enum tree_code op = ERROR_MARK;
2896 bool swap = false;
2897 chain_p new_chain;
2898 unsigned i;
2899 tree rslt_type = NULL_TREE;
2900
2901 if (ch1 == ch2)
2902 return NULL;
2903 if (ch1->length != ch2->length)
2904 return NULL;
2905
2906 if (ch1->refs.length () != ch2->refs.length ())
2907 return NULL;
2908
2909 for (i = 0; (ch1->refs.iterate (i, &r1)
2910 && ch2->refs.iterate (i, &r2)); i++)
2911 {
2912 if (r1->distance != r2->distance)
2913 return NULL;
2914
2915 if (!combinable_refs_p (r1, r2, &op, &swap, &rslt_type))
2916 return NULL;
2917 }
2918
2919 if (swap)
2920 std::swap (ch1, ch2);
2921
2922 new_chain = new struct chain (CT_COMBINATION);
2923 new_chain->op = op;
2924 new_chain->ch1 = ch1;
2925 new_chain->ch2 = ch2;
2926 new_chain->rslt_type = rslt_type;
2927 new_chain->length = ch1->length;
2928
2929 for (i = 0; (ch1->refs.iterate (i, &r1)
2930 && ch2->refs.iterate (i, &r2)); i++)
2931 {
2932 nw = XCNEW (class dref_d);
2933 nw->stmt = stmt_combining_refs (r1, r2);
2934 nw->distance = r1->distance;
2935
2936 new_chain->refs.safe_push (nw);
2937 }
2938
2939 ch1->combined = true;
2940 ch2->combined = true;
2941 return new_chain;
2942 }
2943
2944 /* Recursively update position information of all offspring chains to ROOT
2945 chain's position information. */
2946
2947 static void
2948 update_pos_for_combined_chains (chain_p root)
2949 {
2950 chain_p ch1 = root->ch1, ch2 = root->ch2;
2951 dref ref, ref1, ref2;
2952 for (unsigned j = 0; (root->refs.iterate (j, &ref)
2953 && ch1->refs.iterate (j, &ref1)
2954 && ch2->refs.iterate (j, &ref2)); ++j)
2955 ref1->pos = ref2->pos = ref->pos;
2956
2957 if (ch1->type == CT_COMBINATION)
2958 update_pos_for_combined_chains (ch1);
2959 if (ch2->type == CT_COMBINATION)
2960 update_pos_for_combined_chains (ch2);
2961 }
2962
2963 /* Returns true if statement S1 dominates statement S2. */
2964
2965 static bool
2966 pcom_stmt_dominates_stmt_p (gimple *s1, gimple *s2)
2967 {
2968 basic_block bb1 = gimple_bb (s1), bb2 = gimple_bb (s2);
2969
2970 if (!bb1 || s1 == s2)
2971 return true;
2972
2973 if (bb1 == bb2)
2974 return gimple_uid (s1) < gimple_uid (s2);
2975
2976 return dominated_by_p (CDI_DOMINATORS, bb2, bb1);
2977 }
2978
2979 /* Try to combine the chains. */
2980
2981 void
2982 pcom_worker::try_combine_chains ()
2983 {
2984 unsigned i, j;
2985 chain_p ch1, ch2, cch;
2986 auto_vec<chain_p> worklist;
2987 bool combined_p = false;
2988
2989 FOR_EACH_VEC_ELT (m_chains, i, ch1)
2990 if (chain_can_be_combined_p (ch1))
2991 worklist.safe_push (ch1);
2992
2993 while (!worklist.is_empty ())
2994 {
2995 ch1 = worklist.pop ();
2996 if (!chain_can_be_combined_p (ch1))
2997 continue;
2998
2999 FOR_EACH_VEC_ELT (m_chains, j, ch2)
3000 {
3001 if (!chain_can_be_combined_p (ch2))
3002 continue;
3003
3004 cch = combine_chains (ch1, ch2);
3005 if (cch)
3006 {
3007 worklist.safe_push (cch);
3008 m_chains.safe_push (cch);
3009 combined_p = true;
3010 break;
3011 }
3012 }
3013 }
3014 if (!combined_p)
3015 return;
3016
3017 /* Setup UID for all statements in dominance order. */
3018 basic_block *bbs = get_loop_body_in_dom_order (m_loop);
3019 renumber_gimple_stmt_uids_in_blocks (bbs, m_loop->num_nodes);
3020 free (bbs);
3021
3022 /* Re-association in combined chains may generate statements different to
3023 order of references of the original chain. We need to keep references
3024 of combined chain in dominance order so that all uses will be inserted
3025 after definitions. Note:
3026 A) This is necessary for all combined chains.
3027 B) This is only necessary for ZERO distance references because other
3028 references inherit value from loop carried PHIs.
3029
3030 We first update position information for all combined chains. */
3031 dref ref;
3032 for (i = 0; m_chains.iterate (i, &ch1); ++i)
3033 {
3034 if (ch1->type != CT_COMBINATION || ch1->combined)
3035 continue;
3036
3037 for (j = 0; ch1->refs.iterate (j, &ref); ++j)
3038 ref->pos = gimple_uid (ref->stmt);
3039
3040 update_pos_for_combined_chains (ch1);
3041 }
3042 /* Then sort references according to newly updated position information. */
3043 for (i = 0; m_chains.iterate (i, &ch1); ++i)
3044 {
3045 if (ch1->type != CT_COMBINATION && !ch1->combined)
3046 continue;
3047
3048 /* Find the first reference with non-ZERO distance. */
3049 if (ch1->length == 0)
3050 j = ch1->refs.length();
3051 else
3052 {
3053 for (j = 0; ch1->refs.iterate (j, &ref); ++j)
3054 if (ref->distance != 0)
3055 break;
3056 }
3057
3058 /* Sort all ZERO distance references by position. */
3059 qsort (&ch1->refs[0], j, sizeof (ch1->refs[0]), order_drefs_by_pos);
3060
3061 if (ch1->combined)
3062 continue;
3063
3064 /* For ZERO length chain, has_max_use_after must be true since root
3065 combined stmt must dominates others. */
3066 if (ch1->length == 0)
3067 {
3068 ch1->has_max_use_after = true;
3069 continue;
3070 }
3071 /* Check if there is use at max distance after root for combined chains
3072 and set flag accordingly. */
3073 ch1->has_max_use_after = false;
3074 gimple *root_stmt = get_chain_root (ch1)->stmt;
3075 for (j = 1; ch1->refs.iterate (j, &ref); ++j)
3076 {
3077 if (ref->distance == ch1->length
3078 && !pcom_stmt_dominates_stmt_p (ref->stmt, root_stmt))
3079 {
3080 ch1->has_max_use_after = true;
3081 break;
3082 }
3083 }
3084 }
3085 }
3086
3087 /* Prepare initializers for store elimination CHAIN in LOOP. Returns false
3088 if this is impossible because one of these initializers may trap, true
3089 otherwise. */
3090
3091 static bool
3092 prepare_initializers_chain_store_elim (class loop *loop, chain_p chain)
3093 {
3094 unsigned i, n = chain->length;
3095
3096 /* For now we can't eliminate stores if some of them are conditional
3097 executed. */
3098 if (!chain->all_always_accessed)
3099 return false;
3100
3101 /* Nothing to intialize for intra-iteration store elimination. */
3102 if (n == 0 && chain->type == CT_STORE_STORE)
3103 return true;
3104
3105 /* For store elimination chain, there is nothing to initialize if stores
3106 to be eliminated only store loop invariant values into memory. */
3107 if (chain->type == CT_STORE_STORE
3108 && is_inv_store_elimination_chain (loop, chain))
3109 {
3110 chain->inv_store_elimination = true;
3111 return true;
3112 }
3113
3114 chain->inits.create (n);
3115 chain->inits.safe_grow_cleared (n, true);
3116
3117 /* For store eliminatin chain like below:
3118
3119 for (i = 0; i < len; i++)
3120 {
3121 a[i] = 1;
3122 // a[i + 1] = ...
3123 a[i + 2] = 3;
3124 }
3125
3126 store to a[i + 1] is missed in loop body, it acts like bubbles. The
3127 content of a[i + 1] remain the same if the loop iterates fewer times
3128 than chain->length. We need to set up root variables for such stores
3129 by loading from memory before loop. Note we only need to load bubble
3130 elements because loop body is guaranteed to be executed at least once
3131 after loop's preheader edge. */
3132 auto_vec<bool> bubbles;
3133 bubbles.safe_grow_cleared (n + 1, true);
3134 for (i = 0; i < chain->refs.length (); i++)
3135 bubbles[chain->refs[i]->distance] = true;
3136
3137 struct data_reference *dr = get_chain_root (chain)->ref;
3138 for (i = 0; i < n; i++)
3139 {
3140 if (bubbles[i])
3141 continue;
3142
3143 gimple_seq stmts = NULL;
3144
3145 tree init = ref_at_iteration (dr, (int) 0 - i, &stmts);
3146 if (stmts)
3147 gimple_seq_add_seq_without_update (&chain->init_seq, stmts);
3148
3149 chain->inits[i] = init;
3150 }
3151
3152 return true;
3153 }
3154
3155 /* Prepare initializers for CHAIN. Returns false if this is impossible
3156 because one of these initializers may trap, true otherwise. */
3157
3158 bool
3159 pcom_worker::prepare_initializers_chain (chain_p chain)
3160 {
3161 unsigned i, n = (chain->type == CT_INVARIANT) ? 1 : chain->length;
3162 struct data_reference *dr = get_chain_root (chain)->ref;
3163 tree init;
3164 dref laref;
3165 edge entry = loop_preheader_edge (m_loop);
3166
3167 if (chain->type == CT_STORE_STORE)
3168 return prepare_initializers_chain_store_elim (m_loop, chain);
3169
3170 /* Find the initializers for the variables, and check that they cannot
3171 trap. */
3172 chain->inits.create (n);
3173 for (i = 0; i < n; i++)
3174 chain->inits.quick_push (NULL_TREE);
3175
3176 /* If we have replaced some looparound phi nodes, use their initializers
3177 instead of creating our own. */
3178 FOR_EACH_VEC_ELT (chain->refs, i, laref)
3179 {
3180 if (gimple_code (laref->stmt) != GIMPLE_PHI)
3181 continue;
3182
3183 gcc_assert (laref->distance > 0);
3184 chain->inits[n - laref->distance]
3185 = PHI_ARG_DEF_FROM_EDGE (laref->stmt, entry);
3186 }
3187
3188 for (i = 0; i < n; i++)
3189 {
3190 gimple_seq stmts = NULL;
3191
3192 if (chain->inits[i] != NULL_TREE)
3193 continue;
3194
3195 init = ref_at_iteration (dr, (int) i - n, &stmts);
3196 if (!chain->all_always_accessed && tree_could_trap_p (init))
3197 {
3198 gimple_seq_discard (stmts);
3199 return false;
3200 }
3201
3202 if (stmts)
3203 gimple_seq_add_seq_without_update (&chain->init_seq, stmts);
3204
3205 chain->inits[i] = init;
3206 }
3207
3208 return true;
3209 }
3210
3211 /* Prepare initializers for chains, and free chains that cannot
3212 be used because the initializers might trap. */
3213
3214 void
3215 pcom_worker::prepare_initializers ()
3216 {
3217 chain_p chain;
3218 unsigned i;
3219
3220 for (i = 0; i < m_chains.length (); )
3221 {
3222 chain = m_chains[i];
3223 if (prepare_initializers_chain (chain))
3224 i++;
3225 else
3226 {
3227 release_chain (chain);
3228 m_chains.unordered_remove (i);
3229 }
3230 }
3231 }
3232
3233 /* Generates finalizer memory references for CHAIN. Returns true
3234 if finalizer code for CHAIN can be generated, otherwise false. */
3235
3236 bool
3237 pcom_worker::prepare_finalizers_chain (chain_p chain)
3238 {
3239 unsigned i, n = chain->length;
3240 struct data_reference *dr = get_chain_root (chain)->ref;
3241 tree fini, niters = number_of_latch_executions (m_loop);
3242
3243 /* For now we can't eliminate stores if some of them are conditional
3244 executed. */
3245 if (!chain->all_always_accessed)
3246 return false;
3247
3248 chain->finis.create (n);
3249 for (i = 0; i < n; i++)
3250 chain->finis.quick_push (NULL_TREE);
3251
3252 /* We never use looparound phi node for store elimination chains. */
3253
3254 /* Find the finalizers for the variables, and check that they cannot
3255 trap. */
3256 for (i = 0; i < n; i++)
3257 {
3258 gimple_seq stmts = NULL;
3259 gcc_assert (chain->finis[i] == NULL_TREE);
3260
3261 if (TREE_CODE (niters) != INTEGER_CST && TREE_CODE (niters) != SSA_NAME)
3262 {
3263 niters = unshare_expr (niters);
3264 niters = force_gimple_operand (niters, &stmts, true, NULL);
3265 if (stmts)
3266 {
3267 gimple_seq_add_seq_without_update (&chain->fini_seq, stmts);
3268 stmts = NULL;
3269 }
3270 }
3271 fini = ref_at_iteration (dr, (int) 0 - i, &stmts, niters);
3272 if (stmts)
3273 gimple_seq_add_seq_without_update (&chain->fini_seq, stmts);
3274
3275 chain->finis[i] = fini;
3276 }
3277
3278 return true;
3279 }
3280
3281 /* Generates finalizer memory reference for chains. Returns true if
3282 finalizer code generation for chains breaks loop closed ssa form. */
3283
3284 bool
3285 pcom_worker::prepare_finalizers ()
3286 {
3287 chain_p chain;
3288 unsigned i;
3289 bool loop_closed_ssa = false;
3290
3291 for (i = 0; i < m_chains.length ();)
3292 {
3293 chain = m_chains[i];
3294
3295 /* Finalizer is only necessary for inter-iteration store elimination
3296 chains. */
3297 if (chain->length == 0 || chain->type != CT_STORE_STORE)
3298 {
3299 i++;
3300 continue;
3301 }
3302
3303 if (prepare_finalizers_chain (chain))
3304 {
3305 i++;
3306 /* Be conservative, assume loop closed ssa form is corrupted
3307 by store-store chain. Though it's not always the case if
3308 eliminated stores only store loop invariant values into
3309 memory. */
3310 loop_closed_ssa = true;
3311 }
3312 else
3313 {
3314 release_chain (chain);
3315 m_chains.unordered_remove (i);
3316 }
3317 }
3318 return loop_closed_ssa;
3319 }
3320
3321 /* Insert all initializing gimple stmts into LOOP's entry edge. */
3322
3323 static void
3324 insert_init_seqs (class loop *loop, vec<chain_p> &chains)
3325 {
3326 unsigned i;
3327 edge entry = loop_preheader_edge (loop);
3328
3329 for (i = 0; i < chains.length (); ++i)
3330 if (chains[i]->init_seq)
3331 {
3332 gsi_insert_seq_on_edge_immediate (entry, chains[i]->init_seq);
3333 chains[i]->init_seq = NULL;
3334 }
3335 }
3336
3337 /* Performs predictive commoning for LOOP. Sets bit 1<<1 of return value
3338 if LOOP was unrolled; Sets bit 1<<2 of return value if loop closed ssa
3339 form was corrupted. Non-zero return value indicates some changes were
3340 applied to this loop. */
3341
3342 unsigned
3343 pcom_worker::tree_predictive_commoning_loop (bool allow_unroll_p)
3344 {
3345 struct component *components;
3346 unsigned unroll_factor = 0;
3347 class tree_niter_desc desc;
3348 bool unroll = false, loop_closed_ssa = false;
3349
3350 if (dump_file && (dump_flags & TDF_DETAILS))
3351 fprintf (dump_file, "Processing loop %d\n", m_loop->num);
3352
3353 /* Nothing for predicitive commoning if loop only iterates 1 time. */
3354 if (get_max_loop_iterations_int (m_loop) == 0)
3355 {
3356 if (dump_file && (dump_flags & TDF_DETAILS))
3357 fprintf (dump_file, "Loop iterates only 1 time, nothing to do.\n");
3358
3359 return 0;
3360 }
3361
3362 /* Find the data references and split them into components according to their
3363 dependence relations. */
3364 auto_vec<loop_p, 3> loop_nest;
3365 if (!compute_data_dependences_for_loop (m_loop, true, &loop_nest, &m_datarefs,
3366 &m_dependences))
3367 {
3368 if (dump_file && (dump_flags & TDF_DETAILS))
3369 fprintf (dump_file, "Cannot analyze data dependencies\n");
3370 return 0;
3371 }
3372
3373 if (dump_file && (dump_flags & TDF_DETAILS))
3374 dump_data_dependence_relations (dump_file, m_dependences);
3375
3376 components = split_data_refs_to_components ();
3377
3378 loop_nest.release ();
3379 if (!components)
3380 return 0;
3381
3382 if (dump_file && (dump_flags & TDF_DETAILS))
3383 {
3384 fprintf (dump_file, "Initial state:\n\n");
3385 dump_components (dump_file, components);
3386 }
3387
3388 /* Find the suitable components and split them into chains. */
3389 components = filter_suitable_components (components);
3390
3391 auto_bitmap tmp_vars;
3392 determine_roots (components);
3393 release_components (components);
3394
3395 if (!m_chains.exists ())
3396 {
3397 if (dump_file && (dump_flags & TDF_DETAILS))
3398 fprintf (dump_file,
3399 "Predictive commoning failed: no suitable chains\n");
3400 return 0;
3401 }
3402
3403 prepare_initializers ();
3404 loop_closed_ssa = prepare_finalizers ();
3405
3406 /* Try to combine the chains that are always worked with together. */
3407 try_combine_chains ();
3408
3409 insert_init_seqs (m_loop, m_chains);
3410
3411 if (dump_file && (dump_flags & TDF_DETAILS))
3412 {
3413 fprintf (dump_file, "Before commoning:\n\n");
3414 dump_chains (dump_file, m_chains);
3415 }
3416
3417 if (allow_unroll_p)
3418 /* Determine the unroll factor, and if the loop should be unrolled, ensure
3419 that its number of iterations is divisible by the factor. */
3420 unroll_factor = determine_unroll_factor (m_chains);
3421
3422 if (unroll_factor > 1)
3423 unroll = can_unroll_loop_p (m_loop, unroll_factor, &desc);
3424
3425 /* Execute the predictive commoning transformations, and possibly unroll the
3426 loop. */
3427 if (unroll)
3428 {
3429 struct epcc_data dta;
3430
3431 if (dump_file && (dump_flags & TDF_DETAILS))
3432 fprintf (dump_file, "Unrolling %u times.\n", unroll_factor);
3433
3434 dta.tmp_vars = tmp_vars;
3435 dta.chains = m_chains.to_vec_legacy ();
3436 dta.worker = this;
3437
3438 /* Cfg manipulations performed in tree_transform_and_unroll_loop before
3439 execute_pred_commoning_cbck is called may cause phi nodes to be
3440 reallocated, which is a problem since CHAINS may point to these
3441 statements. To fix this, we store the ssa names defined by the
3442 phi nodes here instead of the phi nodes themselves, and restore
3443 the phi nodes in execute_pred_commoning_cbck. A bit hacky. */
3444 replace_phis_by_defined_names (m_chains);
3445
3446 tree_transform_and_unroll_loop (m_loop, unroll_factor, &desc,
3447 execute_pred_commoning_cbck, &dta);
3448 eliminate_temp_copies (m_loop, tmp_vars);
3449 }
3450 else
3451 {
3452 if (dump_file && (dump_flags & TDF_DETAILS))
3453 fprintf (dump_file,
3454 "Executing predictive commoning without unrolling.\n");
3455 execute_pred_commoning (tmp_vars);
3456 }
3457
3458 return (unroll ? 2 : 1) | (loop_closed_ssa ? 4 : 1);
3459 }
3460
3461 /* Runs predictive commoning. */
3462
3463 unsigned
3464 tree_predictive_commoning (bool allow_unroll_p)
3465 {
3466 unsigned ret = 0, changed = 0;
3467
3468 initialize_original_copy_tables ();
3469 for (auto loop : loops_list (cfun, LI_ONLY_INNERMOST))
3470 if (optimize_loop_for_speed_p (loop))
3471 {
3472 pcom_worker w(loop);
3473 changed |= w.tree_predictive_commoning_loop (allow_unroll_p);
3474 }
3475 free_original_copy_tables ();
3476
3477 if (changed > 0)
3478 {
3479 ret = TODO_update_ssa_only_virtuals;
3480
3481 /* Some loop(s) got unrolled. */
3482 if (changed > 1)
3483 {
3484 scev_reset ();
3485
3486 /* Need to fix up loop closed SSA. */
3487 if (changed >= 4)
3488 rewrite_into_loop_closed_ssa (NULL, TODO_update_ssa);
3489
3490 ret |= TODO_cleanup_cfg;
3491 }
3492 }
3493
3494 return ret;
3495 }
3496
3497 /* Predictive commoning Pass. */
3498
3499 static unsigned
3500 run_tree_predictive_commoning (struct function *fun, bool allow_unroll_p)
3501 {
3502 if (number_of_loops (fun) <= 1)
3503 return 0;
3504
3505 return tree_predictive_commoning (allow_unroll_p);
3506 }
3507
3508 namespace {
3509
3510 const pass_data pass_data_predcom =
3511 {
3512 GIMPLE_PASS, /* type */
3513 "pcom", /* name */
3514 OPTGROUP_LOOP, /* optinfo_flags */
3515 TV_PREDCOM, /* tv_id */
3516 PROP_cfg, /* properties_required */
3517 0, /* properties_provided */
3518 0, /* properties_destroyed */
3519 0, /* todo_flags_start */
3520 0, /* todo_flags_finish */
3521 };
3522
3523 class pass_predcom : public gimple_opt_pass
3524 {
3525 public:
3526 pass_predcom (gcc::context *ctxt)
3527 : gimple_opt_pass (pass_data_predcom, ctxt)
3528 {}
3529
3530 /* opt_pass methods: */
3531 bool
3532 gate (function *) final override
3533 {
3534 if (flag_predictive_commoning != 0)
3535 return true;
3536 /* Loop vectorization enables predictive commoning implicitly
3537 only if predictive commoning isn't set explicitly, and it
3538 doesn't allow unrolling. */
3539 if (flag_tree_loop_vectorize
3540 && !OPTION_SET_P (flag_predictive_commoning))
3541 return true;
3542
3543 return false;
3544 }
3545
3546 unsigned int
3547 execute (function *fun) final override
3548 {
3549 bool allow_unroll_p = flag_predictive_commoning != 0;
3550 return run_tree_predictive_commoning (fun, allow_unroll_p);
3551 }
3552
3553 }; // class pass_predcom
3554
3555 } // anon namespace
3556
3557 gimple_opt_pass *
3558 make_pass_predcom (gcc::context *ctxt)
3559 {
3560 return new pass_predcom (ctxt);
3561 }