]> git.ipfire.org Git - people/ms/gcc.git/blob - gcc/tree-scalar-evolution.cc
c++: namespace-scoped friend in local class [PR69410]
[people/ms/gcc.git] / gcc / tree-scalar-evolution.cc
1 /* Scalar evolution detector.
2 Copyright (C) 2003-2023 Free Software Foundation, Inc.
3 Contributed by Sebastian Pop <s.pop@laposte.net>
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 3, or (at your option) any later
10 version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 /*
22 Description:
23
24 This pass analyzes the evolution of scalar variables in loop
25 structures. The algorithm is based on the SSA representation,
26 and on the loop hierarchy tree. This algorithm is not based on
27 the notion of versions of a variable, as it was the case for the
28 previous implementations of the scalar evolution algorithm, but
29 it assumes that each defined name is unique.
30
31 The notation used in this file is called "chains of recurrences",
32 and has been proposed by Eugene Zima, Robert Van Engelen, and
33 others for describing induction variables in programs. For example
34 "b -> {0, +, 2}_1" means that the scalar variable "b" is equal to 0
35 when entering in the loop_1 and has a step 2 in this loop, in other
36 words "for (b = 0; b < N; b+=2);". Note that the coefficients of
37 this chain of recurrence (or chrec [shrek]) can contain the name of
38 other variables, in which case they are called parametric chrecs.
39 For example, "b -> {a, +, 2}_1" means that the initial value of "b"
40 is the value of "a". In most of the cases these parametric chrecs
41 are fully instantiated before their use because symbolic names can
42 hide some difficult cases such as self-references described later
43 (see the Fibonacci example).
44
45 A short sketch of the algorithm is:
46
47 Given a scalar variable to be analyzed, follow the SSA edge to
48 its definition:
49
50 - When the definition is a GIMPLE_ASSIGN: if the right hand side
51 (RHS) of the definition cannot be statically analyzed, the answer
52 of the analyzer is: "don't know".
53 Otherwise, for all the variables that are not yet analyzed in the
54 RHS, try to determine their evolution, and finally try to
55 evaluate the operation of the RHS that gives the evolution
56 function of the analyzed variable.
57
58 - When the definition is a condition-phi-node: determine the
59 evolution function for all the branches of the phi node, and
60 finally merge these evolutions (see chrec_merge).
61
62 - When the definition is a loop-phi-node: determine its initial
63 condition, that is the SSA edge defined in an outer loop, and
64 keep it symbolic. Then determine the SSA edges that are defined
65 in the body of the loop. Follow the inner edges until ending on
66 another loop-phi-node of the same analyzed loop. If the reached
67 loop-phi-node is not the starting loop-phi-node, then we keep
68 this definition under a symbolic form. If the reached
69 loop-phi-node is the same as the starting one, then we compute a
70 symbolic stride on the return path. The result is then the
71 symbolic chrec {initial_condition, +, symbolic_stride}_loop.
72
73 Examples:
74
75 Example 1: Illustration of the basic algorithm.
76
77 | a = 3
78 | loop_1
79 | b = phi (a, c)
80 | c = b + 1
81 | if (c > 10) exit_loop
82 | endloop
83
84 Suppose that we want to know the number of iterations of the
85 loop_1. The exit_loop is controlled by a COND_EXPR (c > 10). We
86 ask the scalar evolution analyzer two questions: what's the
87 scalar evolution (scev) of "c", and what's the scev of "10". For
88 "10" the answer is "10" since it is a scalar constant. For the
89 scalar variable "c", it follows the SSA edge to its definition,
90 "c = b + 1", and then asks again what's the scev of "b".
91 Following the SSA edge, we end on a loop-phi-node "b = phi (a,
92 c)", where the initial condition is "a", and the inner loop edge
93 is "c". The initial condition is kept under a symbolic form (it
94 may be the case that the copy constant propagation has done its
95 work and we end with the constant "3" as one of the edges of the
96 loop-phi-node). The update edge is followed to the end of the
97 loop, and until reaching again the starting loop-phi-node: b -> c
98 -> b. At this point we have drawn a path from "b" to "b" from
99 which we compute the stride in the loop: in this example it is
100 "+1". The resulting scev for "b" is "b -> {a, +, 1}_1". Now
101 that the scev for "b" is known, it is possible to compute the
102 scev for "c", that is "c -> {a + 1, +, 1}_1". In order to
103 determine the number of iterations in the loop_1, we have to
104 instantiate_parameters (loop_1, {a + 1, +, 1}_1), that gives after some
105 more analysis the scev {4, +, 1}_1, or in other words, this is
106 the function "f (x) = x + 4", where x is the iteration count of
107 the loop_1. Now we have to solve the inequality "x + 4 > 10",
108 and take the smallest iteration number for which the loop is
109 exited: x = 7. This loop runs from x = 0 to x = 7, and in total
110 there are 8 iterations. In terms of loop normalization, we have
111 created a variable that is implicitly defined, "x" or just "_1",
112 and all the other analyzed scalars of the loop are defined in
113 function of this variable:
114
115 a -> 3
116 b -> {3, +, 1}_1
117 c -> {4, +, 1}_1
118
119 or in terms of a C program:
120
121 | a = 3
122 | for (x = 0; x <= 7; x++)
123 | {
124 | b = x + 3
125 | c = x + 4
126 | }
127
128 Example 2a: Illustration of the algorithm on nested loops.
129
130 | loop_1
131 | a = phi (1, b)
132 | c = a + 2
133 | loop_2 10 times
134 | b = phi (c, d)
135 | d = b + 3
136 | endloop
137 | endloop
138
139 For analyzing the scalar evolution of "a", the algorithm follows
140 the SSA edge into the loop's body: "a -> b". "b" is an inner
141 loop-phi-node, and its analysis as in Example 1, gives:
142
143 b -> {c, +, 3}_2
144 d -> {c + 3, +, 3}_2
145
146 Following the SSA edge for the initial condition, we end on "c = a
147 + 2", and then on the starting loop-phi-node "a". From this point,
148 the loop stride is computed: back on "c = a + 2" we get a "+2" in
149 the loop_1, then on the loop-phi-node "b" we compute the overall
150 effect of the inner loop that is "b = c + 30", and we get a "+30"
151 in the loop_1. That means that the overall stride in loop_1 is
152 equal to "+32", and the result is:
153
154 a -> {1, +, 32}_1
155 c -> {3, +, 32}_1
156
157 Example 2b: Multivariate chains of recurrences.
158
159 | loop_1
160 | k = phi (0, k + 1)
161 | loop_2 4 times
162 | j = phi (0, j + 1)
163 | loop_3 4 times
164 | i = phi (0, i + 1)
165 | A[j + k] = ...
166 | endloop
167 | endloop
168 | endloop
169
170 Analyzing the access function of array A with
171 instantiate_parameters (loop_1, "j + k"), we obtain the
172 instantiation and the analysis of the scalar variables "j" and "k"
173 in loop_1. This leads to the scalar evolution {4, +, 1}_1: the end
174 value of loop_2 for "j" is 4, and the evolution of "k" in loop_1 is
175 {0, +, 1}_1. To obtain the evolution function in loop_3 and
176 instantiate the scalar variables up to loop_1, one has to use:
177 instantiate_scev (block_before_loop (loop_1), loop_3, "j + k").
178 The result of this call is {{0, +, 1}_1, +, 1}_2.
179
180 Example 3: Higher degree polynomials.
181
182 | loop_1
183 | a = phi (2, b)
184 | c = phi (5, d)
185 | b = a + 1
186 | d = c + a
187 | endloop
188
189 a -> {2, +, 1}_1
190 b -> {3, +, 1}_1
191 c -> {5, +, a}_1
192 d -> {5 + a, +, a}_1
193
194 instantiate_parameters (loop_1, {5, +, a}_1) -> {5, +, 2, +, 1}_1
195 instantiate_parameters (loop_1, {5 + a, +, a}_1) -> {7, +, 3, +, 1}_1
196
197 Example 4: Lucas, Fibonacci, or mixers in general.
198
199 | loop_1
200 | a = phi (1, b)
201 | c = phi (3, d)
202 | b = c
203 | d = c + a
204 | endloop
205
206 a -> (1, c)_1
207 c -> {3, +, a}_1
208
209 The syntax "(1, c)_1" stands for a PEELED_CHREC that has the
210 following semantics: during the first iteration of the loop_1, the
211 variable contains the value 1, and then it contains the value "c".
212 Note that this syntax is close to the syntax of the loop-phi-node:
213 "a -> (1, c)_1" vs. "a = phi (1, c)".
214
215 The symbolic chrec representation contains all the semantics of the
216 original code. What is more difficult is to use this information.
217
218 Example 5: Flip-flops, or exchangers.
219
220 | loop_1
221 | a = phi (1, b)
222 | c = phi (3, d)
223 | b = c
224 | d = a
225 | endloop
226
227 a -> (1, c)_1
228 c -> (3, a)_1
229
230 Based on these symbolic chrecs, it is possible to refine this
231 information into the more precise PERIODIC_CHRECs:
232
233 a -> |1, 3|_1
234 c -> |3, 1|_1
235
236 This transformation is not yet implemented.
237
238 Further readings:
239
240 You can find a more detailed description of the algorithm in:
241 http://icps.u-strasbg.fr/~pop/DEA_03_Pop.pdf
242 http://icps.u-strasbg.fr/~pop/DEA_03_Pop.ps.gz. But note that
243 this is a preliminary report and some of the details of the
244 algorithm have changed. I'm working on a research report that
245 updates the description of the algorithms to reflect the design
246 choices used in this implementation.
247
248 A set of slides show a high level overview of the algorithm and run
249 an example through the scalar evolution analyzer:
250 http://cri.ensmp.fr/~pop/gcc/mar04/slides.pdf
251
252 The slides that I have presented at the GCC Summit'04 are available
253 at: http://cri.ensmp.fr/~pop/gcc/20040604/gccsummit-lno-spop.pdf
254 */
255
256 #include "config.h"
257 #include "system.h"
258 #include "coretypes.h"
259 #include "backend.h"
260 #include "target.h"
261 #include "rtl.h"
262 #include "optabs-query.h"
263 #include "tree.h"
264 #include "gimple.h"
265 #include "ssa.h"
266 #include "gimple-pretty-print.h"
267 #include "fold-const.h"
268 #include "gimplify.h"
269 #include "gimple-iterator.h"
270 #include "gimplify-me.h"
271 #include "tree-cfg.h"
272 #include "tree-ssa-loop-ivopts.h"
273 #include "tree-ssa-loop-manip.h"
274 #include "tree-ssa-loop-niter.h"
275 #include "tree-ssa-loop.h"
276 #include "tree-ssa.h"
277 #include "cfgloop.h"
278 #include "tree-chrec.h"
279 #include "tree-affine.h"
280 #include "tree-scalar-evolution.h"
281 #include "dumpfile.h"
282 #include "tree-ssa-propagate.h"
283 #include "gimple-fold.h"
284 #include "tree-into-ssa.h"
285 #include "builtins.h"
286 #include "case-cfn-macros.h"
287
288 static tree analyze_scalar_evolution_1 (class loop *, tree);
289 static tree analyze_scalar_evolution_for_address_of (class loop *loop,
290 tree var);
291
292 /* The cached information about an SSA name with version NAME_VERSION,
293 claiming that below basic block with index INSTANTIATED_BELOW, the
294 value of the SSA name can be expressed as CHREC. */
295
296 struct GTY((for_user)) scev_info_str {
297 unsigned int name_version;
298 int instantiated_below;
299 tree chrec;
300 };
301
302 /* Counters for the scev database. */
303 static unsigned nb_set_scev = 0;
304 static unsigned nb_get_scev = 0;
305
306 struct scev_info_hasher : ggc_ptr_hash<scev_info_str>
307 {
308 static hashval_t hash (scev_info_str *i);
309 static bool equal (const scev_info_str *a, const scev_info_str *b);
310 };
311
312 static GTY (()) hash_table<scev_info_hasher> *scalar_evolution_info;
313
314 \f
315 /* Constructs a new SCEV_INFO_STR structure for VAR and INSTANTIATED_BELOW. */
316
317 static inline struct scev_info_str *
318 new_scev_info_str (basic_block instantiated_below, tree var)
319 {
320 struct scev_info_str *res;
321
322 res = ggc_alloc<scev_info_str> ();
323 res->name_version = SSA_NAME_VERSION (var);
324 res->chrec = chrec_not_analyzed_yet;
325 res->instantiated_below = instantiated_below->index;
326
327 return res;
328 }
329
330 /* Computes a hash function for database element ELT. */
331
332 hashval_t
333 scev_info_hasher::hash (scev_info_str *elt)
334 {
335 return elt->name_version ^ elt->instantiated_below;
336 }
337
338 /* Compares database elements E1 and E2. */
339
340 bool
341 scev_info_hasher::equal (const scev_info_str *elt1, const scev_info_str *elt2)
342 {
343 return (elt1->name_version == elt2->name_version
344 && elt1->instantiated_below == elt2->instantiated_below);
345 }
346
347 /* Get the scalar evolution of VAR for INSTANTIATED_BELOW basic block.
348 A first query on VAR returns chrec_not_analyzed_yet. */
349
350 static tree *
351 find_var_scev_info (basic_block instantiated_below, tree var)
352 {
353 struct scev_info_str *res;
354 struct scev_info_str tmp;
355
356 tmp.name_version = SSA_NAME_VERSION (var);
357 tmp.instantiated_below = instantiated_below->index;
358 scev_info_str **slot = scalar_evolution_info->find_slot (&tmp, INSERT);
359
360 if (!*slot)
361 *slot = new_scev_info_str (instantiated_below, var);
362 res = *slot;
363
364 return &res->chrec;
365 }
366
367
368 /* Hashtable helpers for a temporary hash-table used when
369 analyzing a scalar evolution, instantiating a CHREC or
370 resolving mixers. */
371
372 class instantiate_cache_type
373 {
374 public:
375 htab_t map;
376 vec<scev_info_str> entries;
377
378 instantiate_cache_type () : map (NULL), entries (vNULL) {}
379 ~instantiate_cache_type ();
380 tree get (unsigned slot) { return entries[slot].chrec; }
381 void set (unsigned slot, tree chrec) { entries[slot].chrec = chrec; }
382 };
383
384 instantiate_cache_type::~instantiate_cache_type ()
385 {
386 if (map != NULL)
387 {
388 htab_delete (map);
389 entries.release ();
390 }
391 }
392
393 /* Cache to avoid infinite recursion when instantiating an SSA name.
394 Live during the outermost analyze_scalar_evolution, instantiate_scev
395 or resolve_mixers call. */
396 static instantiate_cache_type *global_cache;
397
398
399 /* Return true when PHI is a loop-phi-node. */
400
401 static bool
402 loop_phi_node_p (gimple *phi)
403 {
404 /* The implementation of this function is based on the following
405 property: "all the loop-phi-nodes of a loop are contained in the
406 loop's header basic block". */
407
408 return loop_containing_stmt (phi)->header == gimple_bb (phi);
409 }
410
411 /* Compute the scalar evolution for EVOLUTION_FN after crossing LOOP.
412 In general, in the case of multivariate evolutions we want to get
413 the evolution in different loops. LOOP specifies the level for
414 which to get the evolution.
415
416 Example:
417
418 | for (j = 0; j < 100; j++)
419 | {
420 | for (k = 0; k < 100; k++)
421 | {
422 | i = k + j; - Here the value of i is a function of j, k.
423 | }
424 | ... = i - Here the value of i is a function of j.
425 | }
426 | ... = i - Here the value of i is a scalar.
427
428 Example:
429
430 | i_0 = ...
431 | loop_1 10 times
432 | i_1 = phi (i_0, i_2)
433 | i_2 = i_1 + 2
434 | endloop
435
436 This loop has the same effect as:
437 LOOP_1 has the same effect as:
438
439 | i_1 = i_0 + 20
440
441 The overall effect of the loop, "i_0 + 20" in the previous example,
442 is obtained by passing in the parameters: LOOP = 1,
443 EVOLUTION_FN = {i_0, +, 2}_1.
444 */
445
446 tree
447 compute_overall_effect_of_inner_loop (class loop *loop, tree evolution_fn)
448 {
449 bool val = false;
450
451 if (evolution_fn == chrec_dont_know)
452 return chrec_dont_know;
453
454 else if (TREE_CODE (evolution_fn) == POLYNOMIAL_CHREC)
455 {
456 class loop *inner_loop = get_chrec_loop (evolution_fn);
457
458 if (inner_loop == loop
459 || flow_loop_nested_p (loop, inner_loop))
460 {
461 tree nb_iter = number_of_latch_executions (inner_loop);
462
463 if (nb_iter == chrec_dont_know)
464 return chrec_dont_know;
465 else
466 {
467 tree res;
468
469 /* evolution_fn is the evolution function in LOOP. Get
470 its value in the nb_iter-th iteration. */
471 res = chrec_apply (inner_loop->num, evolution_fn, nb_iter);
472
473 if (chrec_contains_symbols_defined_in_loop (res, loop->num))
474 res = instantiate_parameters (loop, res);
475
476 /* Continue the computation until ending on a parent of LOOP. */
477 return compute_overall_effect_of_inner_loop (loop, res);
478 }
479 }
480 else
481 return evolution_fn;
482 }
483
484 /* If the evolution function is an invariant, there is nothing to do. */
485 else if (no_evolution_in_loop_p (evolution_fn, loop->num, &val) && val)
486 return evolution_fn;
487
488 else
489 return chrec_dont_know;
490 }
491
492 /* Associate CHREC to SCALAR. */
493
494 static void
495 set_scalar_evolution (basic_block instantiated_below, tree scalar, tree chrec)
496 {
497 tree *scalar_info;
498
499 if (TREE_CODE (scalar) != SSA_NAME)
500 return;
501
502 scalar_info = find_var_scev_info (instantiated_below, scalar);
503
504 if (dump_file)
505 {
506 if (dump_flags & TDF_SCEV)
507 {
508 fprintf (dump_file, "(set_scalar_evolution \n");
509 fprintf (dump_file, " instantiated_below = %d \n",
510 instantiated_below->index);
511 fprintf (dump_file, " (scalar = ");
512 print_generic_expr (dump_file, scalar);
513 fprintf (dump_file, ")\n (scalar_evolution = ");
514 print_generic_expr (dump_file, chrec);
515 fprintf (dump_file, "))\n");
516 }
517 if (dump_flags & TDF_STATS)
518 nb_set_scev++;
519 }
520
521 *scalar_info = chrec;
522 }
523
524 /* Retrieve the chrec associated to SCALAR instantiated below
525 INSTANTIATED_BELOW block. */
526
527 static tree
528 get_scalar_evolution (basic_block instantiated_below, tree scalar)
529 {
530 tree res;
531
532 if (dump_file)
533 {
534 if (dump_flags & TDF_SCEV)
535 {
536 fprintf (dump_file, "(get_scalar_evolution \n");
537 fprintf (dump_file, " (scalar = ");
538 print_generic_expr (dump_file, scalar);
539 fprintf (dump_file, ")\n");
540 }
541 if (dump_flags & TDF_STATS)
542 nb_get_scev++;
543 }
544
545 if (VECTOR_TYPE_P (TREE_TYPE (scalar))
546 || TREE_CODE (TREE_TYPE (scalar)) == COMPLEX_TYPE)
547 /* For chrec_dont_know we keep the symbolic form. */
548 res = scalar;
549 else
550 switch (TREE_CODE (scalar))
551 {
552 case SSA_NAME:
553 if (SSA_NAME_IS_DEFAULT_DEF (scalar))
554 res = scalar;
555 else
556 res = *find_var_scev_info (instantiated_below, scalar);
557 break;
558
559 case REAL_CST:
560 case FIXED_CST:
561 case INTEGER_CST:
562 res = scalar;
563 break;
564
565 default:
566 res = chrec_not_analyzed_yet;
567 break;
568 }
569
570 if (dump_file && (dump_flags & TDF_SCEV))
571 {
572 fprintf (dump_file, " (scalar_evolution = ");
573 print_generic_expr (dump_file, res);
574 fprintf (dump_file, "))\n");
575 }
576
577 return res;
578 }
579
580 \f
581 /* Depth first search algorithm. */
582
583 enum t_bool {
584 t_false,
585 t_true,
586 t_dont_know
587 };
588
589 class scev_dfs
590 {
591 public:
592 scev_dfs (class loop *loop_, gphi *phi_, tree init_cond_)
593 : loop (loop_), loop_phi_node (phi_), init_cond (init_cond_) {}
594 t_bool get_ev (tree *, tree);
595
596 private:
597 t_bool follow_ssa_edge_expr (gimple *, tree, tree *, int);
598 t_bool follow_ssa_edge_binary (gimple *at_stmt,
599 tree type, tree rhs0, enum tree_code code,
600 tree rhs1, tree *evolution_of_loop, int limit);
601 t_bool follow_ssa_edge_in_condition_phi_branch (int i,
602 gphi *condition_phi,
603 tree *evolution_of_branch,
604 tree init_cond, int limit);
605 t_bool follow_ssa_edge_in_condition_phi (gphi *condition_phi,
606 tree *evolution_of_loop, int limit);
607 t_bool follow_ssa_edge_inner_loop_phi (gphi *loop_phi_node,
608 tree *evolution_of_loop, int limit);
609 tree add_to_evolution (tree chrec_before, enum tree_code code,
610 tree to_add, gimple *at_stmt);
611 tree add_to_evolution_1 (tree chrec_before, tree to_add, gimple *at_stmt);
612
613 class loop *loop;
614 gphi *loop_phi_node;
615 tree init_cond;
616 };
617
618 t_bool
619 scev_dfs::get_ev (tree *ev_fn, tree arg)
620 {
621 *ev_fn = chrec_dont_know;
622 return follow_ssa_edge_expr (loop_phi_node, arg, ev_fn, 0);
623 }
624
625 /* Helper function for add_to_evolution. Returns the evolution
626 function for an assignment of the form "a = b + c", where "a" and
627 "b" are on the strongly connected component. CHREC_BEFORE is the
628 information that we already have collected up to this point.
629 TO_ADD is the evolution of "c".
630
631 When CHREC_BEFORE has an evolution part in LOOP_NB, add to this
632 evolution the expression TO_ADD, otherwise construct an evolution
633 part for this loop. */
634
635 tree
636 scev_dfs::add_to_evolution_1 (tree chrec_before, tree to_add, gimple *at_stmt)
637 {
638 tree type, left, right;
639 unsigned loop_nb = loop->num;
640 class loop *chloop;
641
642 switch (TREE_CODE (chrec_before))
643 {
644 case POLYNOMIAL_CHREC:
645 chloop = get_chrec_loop (chrec_before);
646 if (chloop == loop
647 || flow_loop_nested_p (chloop, loop))
648 {
649 unsigned var;
650
651 type = chrec_type (chrec_before);
652
653 /* When there is no evolution part in this loop, build it. */
654 if (chloop != loop)
655 {
656 var = loop_nb;
657 left = chrec_before;
658 right = SCALAR_FLOAT_TYPE_P (type)
659 ? build_real (type, dconst0)
660 : build_int_cst (type, 0);
661 }
662 else
663 {
664 var = CHREC_VARIABLE (chrec_before);
665 left = CHREC_LEFT (chrec_before);
666 right = CHREC_RIGHT (chrec_before);
667 }
668
669 to_add = chrec_convert (type, to_add, at_stmt);
670 right = chrec_convert_rhs (type, right, at_stmt);
671 right = chrec_fold_plus (chrec_type (right), right, to_add);
672 return build_polynomial_chrec (var, left, right);
673 }
674 else
675 {
676 gcc_assert (flow_loop_nested_p (loop, chloop));
677
678 /* Search the evolution in LOOP_NB. */
679 left = add_to_evolution_1 (CHREC_LEFT (chrec_before),
680 to_add, at_stmt);
681 right = CHREC_RIGHT (chrec_before);
682 right = chrec_convert_rhs (chrec_type (left), right, at_stmt);
683 return build_polynomial_chrec (CHREC_VARIABLE (chrec_before),
684 left, right);
685 }
686
687 default:
688 /* These nodes do not depend on a loop. */
689 if (chrec_before == chrec_dont_know)
690 return chrec_dont_know;
691
692 left = chrec_before;
693 right = chrec_convert_rhs (chrec_type (left), to_add, at_stmt);
694 /* When we add the first evolution we need to replace the symbolic
695 evolution we've put in when the DFS reached the loop PHI node
696 with the initial value. There's only a limited cases of
697 extra operations ontop of that symbol allowed, namely
698 sign-conversions we can look through. For other cases we leave
699 the symbolic initial condition which causes build_polynomial_chrec
700 to return chrec_dont_know. See PR42512, PR66375 and PR107176 for
701 cases we mishandled before. */
702 STRIP_NOPS (chrec_before);
703 if (chrec_before == gimple_phi_result (loop_phi_node))
704 left = fold_convert (TREE_TYPE (left), init_cond);
705 return build_polynomial_chrec (loop_nb, left, right);
706 }
707 }
708
709 /* Add TO_ADD to the evolution part of CHREC_BEFORE in the dimension
710 of LOOP_NB.
711
712 Description (provided for completeness, for those who read code in
713 a plane, and for my poor 62 bytes brain that would have forgotten
714 all this in the next two or three months):
715
716 The algorithm of translation of programs from the SSA representation
717 into the chrecs syntax is based on a pattern matching. After having
718 reconstructed the overall tree expression for a loop, there are only
719 two cases that can arise:
720
721 1. a = loop-phi (init, a + expr)
722 2. a = loop-phi (init, expr)
723
724 where EXPR is either a scalar constant with respect to the analyzed
725 loop (this is a degree 0 polynomial), or an expression containing
726 other loop-phi definitions (these are higher degree polynomials).
727
728 Examples:
729
730 1.
731 | init = ...
732 | loop_1
733 | a = phi (init, a + 5)
734 | endloop
735
736 2.
737 | inita = ...
738 | initb = ...
739 | loop_1
740 | a = phi (inita, 2 * b + 3)
741 | b = phi (initb, b + 1)
742 | endloop
743
744 For the first case, the semantics of the SSA representation is:
745
746 | a (x) = init + \sum_{j = 0}^{x - 1} expr (j)
747
748 that is, there is a loop index "x" that determines the scalar value
749 of the variable during the loop execution. During the first
750 iteration, the value is that of the initial condition INIT, while
751 during the subsequent iterations, it is the sum of the initial
752 condition with the sum of all the values of EXPR from the initial
753 iteration to the before last considered iteration.
754
755 For the second case, the semantics of the SSA program is:
756
757 | a (x) = init, if x = 0;
758 | expr (x - 1), otherwise.
759
760 The second case corresponds to the PEELED_CHREC, whose syntax is
761 close to the syntax of a loop-phi-node:
762
763 | phi (init, expr) vs. (init, expr)_x
764
765 The proof of the translation algorithm for the first case is a
766 proof by structural induction based on the degree of EXPR.
767
768 Degree 0:
769 When EXPR is a constant with respect to the analyzed loop, or in
770 other words when EXPR is a polynomial of degree 0, the evolution of
771 the variable A in the loop is an affine function with an initial
772 condition INIT, and a step EXPR. In order to show this, we start
773 from the semantics of the SSA representation:
774
775 f (x) = init + \sum_{j = 0}^{x - 1} expr (j)
776
777 and since "expr (j)" is a constant with respect to "j",
778
779 f (x) = init + x * expr
780
781 Finally, based on the semantics of the pure sum chrecs, by
782 identification we get the corresponding chrecs syntax:
783
784 f (x) = init * \binom{x}{0} + expr * \binom{x}{1}
785 f (x) -> {init, +, expr}_x
786
787 Higher degree:
788 Suppose that EXPR is a polynomial of degree N with respect to the
789 analyzed loop_x for which we have already determined that it is
790 written under the chrecs syntax:
791
792 | expr (x) -> {b_0, +, b_1, +, ..., +, b_{n-1}} (x)
793
794 We start from the semantics of the SSA program:
795
796 | f (x) = init + \sum_{j = 0}^{x - 1} expr (j)
797 |
798 | f (x) = init + \sum_{j = 0}^{x - 1}
799 | (b_0 * \binom{j}{0} + ... + b_{n-1} * \binom{j}{n-1})
800 |
801 | f (x) = init + \sum_{j = 0}^{x - 1}
802 | \sum_{k = 0}^{n - 1} (b_k * \binom{j}{k})
803 |
804 | f (x) = init + \sum_{k = 0}^{n - 1}
805 | (b_k * \sum_{j = 0}^{x - 1} \binom{j}{k})
806 |
807 | f (x) = init + \sum_{k = 0}^{n - 1}
808 | (b_k * \binom{x}{k + 1})
809 |
810 | f (x) = init + b_0 * \binom{x}{1} + ...
811 | + b_{n-1} * \binom{x}{n}
812 |
813 | f (x) = init * \binom{x}{0} + b_0 * \binom{x}{1} + ...
814 | + b_{n-1} * \binom{x}{n}
815 |
816
817 And finally from the definition of the chrecs syntax, we identify:
818 | f (x) -> {init, +, b_0, +, ..., +, b_{n-1}}_x
819
820 This shows the mechanism that stands behind the add_to_evolution
821 function. An important point is that the use of symbolic
822 parameters avoids the need of an analysis schedule.
823
824 Example:
825
826 | inita = ...
827 | initb = ...
828 | loop_1
829 | a = phi (inita, a + 2 + b)
830 | b = phi (initb, b + 1)
831 | endloop
832
833 When analyzing "a", the algorithm keeps "b" symbolically:
834
835 | a -> {inita, +, 2 + b}_1
836
837 Then, after instantiation, the analyzer ends on the evolution:
838
839 | a -> {inita, +, 2 + initb, +, 1}_1
840
841 */
842
843 tree
844 scev_dfs::add_to_evolution (tree chrec_before, enum tree_code code,
845 tree to_add, gimple *at_stmt)
846 {
847 tree type = chrec_type (to_add);
848 tree res = NULL_TREE;
849
850 if (to_add == NULL_TREE)
851 return chrec_before;
852
853 /* TO_ADD is either a scalar, or a parameter. TO_ADD is not
854 instantiated at this point. */
855 if (TREE_CODE (to_add) == POLYNOMIAL_CHREC)
856 /* This should not happen. */
857 return chrec_dont_know;
858
859 if (dump_file && (dump_flags & TDF_SCEV))
860 {
861 fprintf (dump_file, "(add_to_evolution \n");
862 fprintf (dump_file, " (loop_nb = %d)\n", loop->num);
863 fprintf (dump_file, " (chrec_before = ");
864 print_generic_expr (dump_file, chrec_before);
865 fprintf (dump_file, ")\n (to_add = ");
866 print_generic_expr (dump_file, to_add);
867 fprintf (dump_file, ")\n");
868 }
869
870 if (code == MINUS_EXPR)
871 to_add = chrec_fold_multiply (type, to_add, SCALAR_FLOAT_TYPE_P (type)
872 ? build_real (type, dconstm1)
873 : build_int_cst_type (type, -1));
874
875 res = add_to_evolution_1 (chrec_before, to_add, at_stmt);
876
877 if (dump_file && (dump_flags & TDF_SCEV))
878 {
879 fprintf (dump_file, " (res = ");
880 print_generic_expr (dump_file, res);
881 fprintf (dump_file, "))\n");
882 }
883
884 return res;
885 }
886
887
888 /* Follow the ssa edge into the binary expression RHS0 CODE RHS1.
889 Return true if the strongly connected component has been found. */
890
891 t_bool
892 scev_dfs::follow_ssa_edge_binary (gimple *at_stmt, tree type, tree rhs0,
893 enum tree_code code, tree rhs1,
894 tree *evolution_of_loop, int limit)
895 {
896 t_bool res = t_false;
897 tree evol;
898
899 switch (code)
900 {
901 case POINTER_PLUS_EXPR:
902 case PLUS_EXPR:
903 if (TREE_CODE (rhs0) == SSA_NAME)
904 {
905 if (TREE_CODE (rhs1) == SSA_NAME)
906 {
907 /* Match an assignment under the form:
908 "a = b + c". */
909
910 /* We want only assignments of form "name + name" contribute to
911 LIMIT, as the other cases do not necessarily contribute to
912 the complexity of the expression. */
913 limit++;
914
915 evol = *evolution_of_loop;
916 res = follow_ssa_edge_expr (at_stmt, rhs0, &evol, limit);
917 if (res == t_true)
918 *evolution_of_loop = add_to_evolution
919 (chrec_convert (type, evol, at_stmt), code, rhs1, at_stmt);
920 else if (res == t_false)
921 {
922 res = follow_ssa_edge_expr
923 (at_stmt, rhs1, evolution_of_loop, limit);
924 if (res == t_true)
925 *evolution_of_loop = add_to_evolution
926 (chrec_convert (type, *evolution_of_loop, at_stmt),
927 code, rhs0, at_stmt);
928 }
929 }
930
931 else
932 gcc_unreachable (); /* Handled in caller. */
933 }
934
935 else if (TREE_CODE (rhs1) == SSA_NAME)
936 {
937 /* Match an assignment under the form:
938 "a = ... + c". */
939 res = follow_ssa_edge_expr (at_stmt, rhs1, evolution_of_loop, limit);
940 if (res == t_true)
941 *evolution_of_loop = add_to_evolution
942 (chrec_convert (type, *evolution_of_loop, at_stmt),
943 code, rhs0, at_stmt);
944 }
945
946 else
947 /* Otherwise, match an assignment under the form:
948 "a = ... + ...". */
949 /* And there is nothing to do. */
950 res = t_false;
951 break;
952
953 case MINUS_EXPR:
954 /* This case is under the form "opnd0 = rhs0 - rhs1". */
955 if (TREE_CODE (rhs0) == SSA_NAME)
956 gcc_unreachable (); /* Handled in caller. */
957 else
958 /* Otherwise, match an assignment under the form:
959 "a = ... - ...". */
960 /* And there is nothing to do. */
961 res = t_false;
962 break;
963
964 default:
965 res = t_false;
966 }
967
968 return res;
969 }
970
971 /* Checks whether the I-th argument of a PHI comes from a backedge. */
972
973 static bool
974 backedge_phi_arg_p (gphi *phi, int i)
975 {
976 const_edge e = gimple_phi_arg_edge (phi, i);
977
978 /* We would in fact like to test EDGE_DFS_BACK here, but we do not care
979 about updating it anywhere, and this should work as well most of the
980 time. */
981 if (e->flags & EDGE_IRREDUCIBLE_LOOP)
982 return true;
983
984 return false;
985 }
986
987 /* Helper function for one branch of the condition-phi-node. Return
988 true if the strongly connected component has been found following
989 this path. */
990
991 t_bool
992 scev_dfs::follow_ssa_edge_in_condition_phi_branch (int i,
993 gphi *condition_phi,
994 tree *evolution_of_branch,
995 tree init_cond, int limit)
996 {
997 tree branch = PHI_ARG_DEF (condition_phi, i);
998 *evolution_of_branch = chrec_dont_know;
999
1000 /* Do not follow back edges (they must belong to an irreducible loop, which
1001 we really do not want to worry about). */
1002 if (backedge_phi_arg_p (condition_phi, i))
1003 return t_false;
1004
1005 if (TREE_CODE (branch) == SSA_NAME)
1006 {
1007 *evolution_of_branch = init_cond;
1008 return follow_ssa_edge_expr (condition_phi, branch,
1009 evolution_of_branch, limit);
1010 }
1011
1012 /* This case occurs when one of the condition branches sets
1013 the variable to a constant: i.e. a phi-node like
1014 "a_2 = PHI <a_7(5), 2(6)>;".
1015
1016 FIXME: This case have to be refined correctly:
1017 in some cases it is possible to say something better than
1018 chrec_dont_know, for example using a wrap-around notation. */
1019 return t_false;
1020 }
1021
1022 /* This function merges the branches of a condition-phi-node in a
1023 loop. */
1024
1025 t_bool
1026 scev_dfs::follow_ssa_edge_in_condition_phi (gphi *condition_phi,
1027 tree *evolution_of_loop, int limit)
1028 {
1029 int i, n;
1030 tree init = *evolution_of_loop;
1031 tree evolution_of_branch;
1032 t_bool res = follow_ssa_edge_in_condition_phi_branch (0, condition_phi,
1033 &evolution_of_branch,
1034 init, limit);
1035 if (res == t_false || res == t_dont_know)
1036 return res;
1037
1038 *evolution_of_loop = evolution_of_branch;
1039
1040 n = gimple_phi_num_args (condition_phi);
1041 for (i = 1; i < n; i++)
1042 {
1043 /* Quickly give up when the evolution of one of the branches is
1044 not known. */
1045 if (*evolution_of_loop == chrec_dont_know)
1046 return t_true;
1047
1048 /* Increase the limit by the PHI argument number to avoid exponential
1049 time and memory complexity. */
1050 res = follow_ssa_edge_in_condition_phi_branch (i, condition_phi,
1051 &evolution_of_branch,
1052 init, limit + i);
1053 if (res == t_false || res == t_dont_know)
1054 return res;
1055
1056 *evolution_of_loop = chrec_merge (*evolution_of_loop,
1057 evolution_of_branch);
1058 }
1059
1060 return t_true;
1061 }
1062
1063 /* Follow an SSA edge in an inner loop. It computes the overall
1064 effect of the loop, and following the symbolic initial conditions,
1065 it follows the edges in the parent loop. The inner loop is
1066 considered as a single statement. */
1067
1068 t_bool
1069 scev_dfs::follow_ssa_edge_inner_loop_phi (gphi *loop_phi_node,
1070 tree *evolution_of_loop, int limit)
1071 {
1072 class loop *loop = loop_containing_stmt (loop_phi_node);
1073 tree ev = analyze_scalar_evolution (loop, PHI_RESULT (loop_phi_node));
1074
1075 /* Sometimes, the inner loop is too difficult to analyze, and the
1076 result of the analysis is a symbolic parameter. */
1077 if (ev == PHI_RESULT (loop_phi_node))
1078 {
1079 t_bool res = t_false;
1080 int i, n = gimple_phi_num_args (loop_phi_node);
1081
1082 for (i = 0; i < n; i++)
1083 {
1084 tree arg = PHI_ARG_DEF (loop_phi_node, i);
1085 basic_block bb;
1086
1087 /* Follow the edges that exit the inner loop. */
1088 bb = gimple_phi_arg_edge (loop_phi_node, i)->src;
1089 if (!flow_bb_inside_loop_p (loop, bb))
1090 res = follow_ssa_edge_expr (loop_phi_node,
1091 arg, evolution_of_loop, limit);
1092 if (res == t_true)
1093 break;
1094 }
1095
1096 /* If the path crosses this loop-phi, give up. */
1097 if (res == t_true)
1098 *evolution_of_loop = chrec_dont_know;
1099
1100 return res;
1101 }
1102
1103 /* Otherwise, compute the overall effect of the inner loop. */
1104 ev = compute_overall_effect_of_inner_loop (loop, ev);
1105 return follow_ssa_edge_expr (loop_phi_node, ev, evolution_of_loop, limit);
1106 }
1107
1108 /* Follow the ssa edge into the expression EXPR.
1109 Return true if the strongly connected component has been found. */
1110
1111 t_bool
1112 scev_dfs::follow_ssa_edge_expr (gimple *at_stmt, tree expr,
1113 tree *evolution_of_loop, int limit)
1114 {
1115 gphi *halting_phi = loop_phi_node;
1116 enum tree_code code;
1117 tree type, rhs0, rhs1 = NULL_TREE;
1118
1119 /* The EXPR is one of the following cases:
1120 - an SSA_NAME,
1121 - an INTEGER_CST,
1122 - a PLUS_EXPR,
1123 - a POINTER_PLUS_EXPR,
1124 - a MINUS_EXPR,
1125 - other cases are not yet handled. */
1126
1127 /* For SSA_NAME look at the definition statement, handling
1128 PHI nodes and otherwise expand appropriately for the expression
1129 handling below. */
1130 if (TREE_CODE (expr) == SSA_NAME)
1131 {
1132 gimple *def = SSA_NAME_DEF_STMT (expr);
1133
1134 if (gimple_nop_p (def))
1135 return t_false;
1136
1137 /* Give up if the path is longer than the MAX that we allow. */
1138 if (limit > param_scev_max_expr_complexity)
1139 {
1140 *evolution_of_loop = chrec_dont_know;
1141 return t_dont_know;
1142 }
1143
1144 if (gphi *phi = dyn_cast <gphi *>(def))
1145 {
1146 if (!loop_phi_node_p (phi))
1147 /* DEF is a condition-phi-node. Follow the branches, and
1148 record their evolutions. Finally, merge the collected
1149 information and set the approximation to the main
1150 variable. */
1151 return follow_ssa_edge_in_condition_phi (phi, evolution_of_loop,
1152 limit);
1153
1154 /* When the analyzed phi is the halting_phi, the
1155 depth-first search is over: we have found a path from
1156 the halting_phi to itself in the loop. */
1157 if (phi == halting_phi)
1158 {
1159 *evolution_of_loop = expr;
1160 return t_true;
1161 }
1162
1163 /* Otherwise, the evolution of the HALTING_PHI depends
1164 on the evolution of another loop-phi-node, i.e. the
1165 evolution function is a higher degree polynomial. */
1166 class loop *def_loop = loop_containing_stmt (def);
1167 if (def_loop == loop)
1168 return t_false;
1169
1170 /* Inner loop. */
1171 if (flow_loop_nested_p (loop, def_loop))
1172 return follow_ssa_edge_inner_loop_phi (phi, evolution_of_loop,
1173 limit + 1);
1174
1175 /* Outer loop. */
1176 return t_false;
1177 }
1178
1179 /* At this level of abstraction, the program is just a set
1180 of GIMPLE_ASSIGNs and PHI_NODEs. In principle there is no
1181 other def to be handled. */
1182 if (!is_gimple_assign (def))
1183 return t_false;
1184
1185 code = gimple_assign_rhs_code (def);
1186 switch (get_gimple_rhs_class (code))
1187 {
1188 case GIMPLE_BINARY_RHS:
1189 rhs0 = gimple_assign_rhs1 (def);
1190 rhs1 = gimple_assign_rhs2 (def);
1191 break;
1192 case GIMPLE_UNARY_RHS:
1193 case GIMPLE_SINGLE_RHS:
1194 rhs0 = gimple_assign_rhs1 (def);
1195 break;
1196 default:
1197 return t_false;
1198 }
1199 type = TREE_TYPE (gimple_assign_lhs (def));
1200 at_stmt = def;
1201 }
1202 else
1203 {
1204 code = TREE_CODE (expr);
1205 type = TREE_TYPE (expr);
1206 /* Via follow_ssa_edge_inner_loop_phi we arrive here with the
1207 GENERIC scalar evolution of the inner loop. */
1208 switch (code)
1209 {
1210 CASE_CONVERT:
1211 rhs0 = TREE_OPERAND (expr, 0);
1212 break;
1213 case POINTER_PLUS_EXPR:
1214 case PLUS_EXPR:
1215 case MINUS_EXPR:
1216 rhs0 = TREE_OPERAND (expr, 0);
1217 rhs1 = TREE_OPERAND (expr, 1);
1218 STRIP_USELESS_TYPE_CONVERSION (rhs0);
1219 STRIP_USELESS_TYPE_CONVERSION (rhs1);
1220 break;
1221 default:
1222 rhs0 = expr;
1223 }
1224 }
1225
1226 switch (code)
1227 {
1228 CASE_CONVERT:
1229 {
1230 /* This assignment is under the form "a_1 = (cast) rhs. We cannot
1231 validate any precision altering conversion during the SCC
1232 analysis, so don't even try. */
1233 if (!tree_nop_conversion_p (type, TREE_TYPE (rhs0)))
1234 return t_false;
1235 t_bool res = follow_ssa_edge_expr (at_stmt, rhs0,
1236 evolution_of_loop, limit);
1237 if (res == t_true)
1238 *evolution_of_loop = chrec_convert (type, *evolution_of_loop,
1239 at_stmt);
1240 return res;
1241 }
1242
1243 case INTEGER_CST:
1244 /* This assignment is under the form "a_1 = 7". */
1245 return t_false;
1246
1247 case ADDR_EXPR:
1248 {
1249 /* Handle &MEM[ptr + CST] which is equivalent to POINTER_PLUS_EXPR. */
1250 if (TREE_CODE (TREE_OPERAND (rhs0, 0)) != MEM_REF)
1251 return t_false;
1252 tree mem = TREE_OPERAND (rhs0, 0);
1253 rhs0 = TREE_OPERAND (mem, 0);
1254 rhs1 = TREE_OPERAND (mem, 1);
1255 code = POINTER_PLUS_EXPR;
1256 }
1257 /* Fallthru. */
1258 case POINTER_PLUS_EXPR:
1259 case PLUS_EXPR:
1260 case MINUS_EXPR:
1261 /* This case is under the form "rhs0 +- rhs1". */
1262 if (TREE_CODE (rhs0) == SSA_NAME
1263 && (TREE_CODE (rhs1) != SSA_NAME || code == MINUS_EXPR))
1264 {
1265 /* Match an assignment under the form:
1266 "a = b +- ...". */
1267 t_bool res = follow_ssa_edge_expr (at_stmt, rhs0,
1268 evolution_of_loop, limit);
1269 if (res == t_true)
1270 *evolution_of_loop = add_to_evolution
1271 (chrec_convert (type, *evolution_of_loop, at_stmt),
1272 code, rhs1, at_stmt);
1273 return res;
1274 }
1275 /* Else search for the SCC in both rhs0 and rhs1. */
1276 return follow_ssa_edge_binary (at_stmt, type, rhs0, code, rhs1,
1277 evolution_of_loop, limit);
1278
1279 default:
1280 return t_false;
1281 }
1282 }
1283 \f
1284
1285 /* This section selects the loops that will be good candidates for the
1286 scalar evolution analysis. For the moment, greedily select all the
1287 loop nests we could analyze. */
1288
1289 /* For a loop with a single exit edge, return the COND_EXPR that
1290 guards the exit edge. If the expression is too difficult to
1291 analyze, then give up. */
1292
1293 gcond *
1294 get_loop_exit_condition (const class loop *loop)
1295 {
1296 gcond *res = NULL;
1297 edge exit_edge = single_exit (loop);
1298
1299 if (dump_file && (dump_flags & TDF_SCEV))
1300 fprintf (dump_file, "(get_loop_exit_condition \n ");
1301
1302 if (exit_edge)
1303 {
1304 gimple *stmt;
1305
1306 stmt = last_stmt (exit_edge->src);
1307 if (gcond *cond_stmt = safe_dyn_cast <gcond *> (stmt))
1308 res = cond_stmt;
1309 }
1310
1311 if (dump_file && (dump_flags & TDF_SCEV))
1312 {
1313 print_gimple_stmt (dump_file, res, 0);
1314 fprintf (dump_file, ")\n");
1315 }
1316
1317 return res;
1318 }
1319
1320 \f
1321 /* Simplify PEELED_CHREC represented by (init_cond, arg) in LOOP.
1322 Handle below case and return the corresponding POLYNOMIAL_CHREC:
1323
1324 # i_17 = PHI <i_13(5), 0(3)>
1325 # _20 = PHI <_5(5), start_4(D)(3)>
1326 ...
1327 i_13 = i_17 + 1;
1328 _5 = start_4(D) + i_13;
1329
1330 Though variable _20 appears as a PEELED_CHREC in the form of
1331 (start_4, _5)_LOOP, it's a POLYNOMIAL_CHREC like {start_4, 1}_LOOP.
1332
1333 See PR41488. */
1334
1335 static tree
1336 simplify_peeled_chrec (class loop *loop, tree arg, tree init_cond)
1337 {
1338 aff_tree aff1, aff2;
1339 tree ev, left, right, type, step_val;
1340 hash_map<tree, name_expansion *> *peeled_chrec_map = NULL;
1341
1342 ev = instantiate_parameters (loop, analyze_scalar_evolution (loop, arg));
1343 if (ev == NULL_TREE || TREE_CODE (ev) != POLYNOMIAL_CHREC)
1344 return chrec_dont_know;
1345
1346 left = CHREC_LEFT (ev);
1347 right = CHREC_RIGHT (ev);
1348 type = TREE_TYPE (left);
1349 step_val = chrec_fold_plus (type, init_cond, right);
1350
1351 /* Transform (init, {left, right}_LOOP)_LOOP to {init, right}_LOOP
1352 if "left" equals to "init + right". */
1353 if (operand_equal_p (left, step_val, 0))
1354 {
1355 if (dump_file && (dump_flags & TDF_SCEV))
1356 fprintf (dump_file, "Simplify PEELED_CHREC into POLYNOMIAL_CHREC.\n");
1357
1358 return build_polynomial_chrec (loop->num, init_cond, right);
1359 }
1360
1361 /* The affine code only deals with pointer and integer types. */
1362 if (!POINTER_TYPE_P (type)
1363 && !INTEGRAL_TYPE_P (type))
1364 return chrec_dont_know;
1365
1366 /* Try harder to check if they are equal. */
1367 tree_to_aff_combination_expand (left, type, &aff1, &peeled_chrec_map);
1368 tree_to_aff_combination_expand (step_val, type, &aff2, &peeled_chrec_map);
1369 free_affine_expand_cache (&peeled_chrec_map);
1370 aff_combination_scale (&aff2, -1);
1371 aff_combination_add (&aff1, &aff2);
1372
1373 /* Transform (init, {left, right}_LOOP)_LOOP to {init, right}_LOOP
1374 if "left" equals to "init + right". */
1375 if (aff_combination_zero_p (&aff1))
1376 {
1377 if (dump_file && (dump_flags & TDF_SCEV))
1378 fprintf (dump_file, "Simplify PEELED_CHREC into POLYNOMIAL_CHREC.\n");
1379
1380 return build_polynomial_chrec (loop->num, init_cond, right);
1381 }
1382 return chrec_dont_know;
1383 }
1384
1385 /* Given a LOOP_PHI_NODE, this function determines the evolution
1386 function from LOOP_PHI_NODE to LOOP_PHI_NODE in the loop. */
1387
1388 static tree
1389 analyze_evolution_in_loop (gphi *loop_phi_node,
1390 tree init_cond)
1391 {
1392 int i, n = gimple_phi_num_args (loop_phi_node);
1393 tree evolution_function = chrec_not_analyzed_yet;
1394 class loop *loop = loop_containing_stmt (loop_phi_node);
1395 basic_block bb;
1396 static bool simplify_peeled_chrec_p = true;
1397
1398 if (dump_file && (dump_flags & TDF_SCEV))
1399 {
1400 fprintf (dump_file, "(analyze_evolution_in_loop \n");
1401 fprintf (dump_file, " (loop_phi_node = ");
1402 print_gimple_stmt (dump_file, loop_phi_node, 0);
1403 fprintf (dump_file, ")\n");
1404 }
1405
1406 for (i = 0; i < n; i++)
1407 {
1408 tree arg = PHI_ARG_DEF (loop_phi_node, i);
1409 tree ev_fn = chrec_dont_know;
1410 t_bool res;
1411
1412 /* Select the edges that enter the loop body. */
1413 bb = gimple_phi_arg_edge (loop_phi_node, i)->src;
1414 if (!flow_bb_inside_loop_p (loop, bb))
1415 continue;
1416
1417 if (TREE_CODE (arg) == SSA_NAME)
1418 {
1419 bool val = false;
1420
1421 /* Pass in the initial condition to the follow edge function. */
1422 scev_dfs dfs (loop, loop_phi_node, init_cond);
1423 res = dfs.get_ev (&ev_fn, arg);
1424
1425 /* If ev_fn has no evolution in the inner loop, and the
1426 init_cond is not equal to ev_fn, then we have an
1427 ambiguity between two possible values, as we cannot know
1428 the number of iterations at this point. */
1429 if (TREE_CODE (ev_fn) != POLYNOMIAL_CHREC
1430 && no_evolution_in_loop_p (ev_fn, loop->num, &val) && val
1431 && !operand_equal_p (init_cond, ev_fn, 0))
1432 ev_fn = chrec_dont_know;
1433 }
1434 else
1435 res = t_false;
1436
1437 /* When it is impossible to go back on the same
1438 loop_phi_node by following the ssa edges, the
1439 evolution is represented by a peeled chrec, i.e. the
1440 first iteration, EV_FN has the value INIT_COND, then
1441 all the other iterations it has the value of ARG.
1442 For the moment, PEELED_CHREC nodes are not built. */
1443 if (res != t_true)
1444 {
1445 ev_fn = chrec_dont_know;
1446 /* Try to recognize POLYNOMIAL_CHREC which appears in
1447 the form of PEELED_CHREC, but guard the process with
1448 a bool variable to keep the analyzer from infinite
1449 recurrence for real PEELED_RECs. */
1450 if (simplify_peeled_chrec_p && TREE_CODE (arg) == SSA_NAME)
1451 {
1452 simplify_peeled_chrec_p = false;
1453 ev_fn = simplify_peeled_chrec (loop, arg, init_cond);
1454 simplify_peeled_chrec_p = true;
1455 }
1456 }
1457
1458 /* When there are multiple back edges of the loop (which in fact never
1459 happens currently, but nevertheless), merge their evolutions. */
1460 evolution_function = chrec_merge (evolution_function, ev_fn);
1461
1462 if (evolution_function == chrec_dont_know)
1463 break;
1464 }
1465
1466 if (dump_file && (dump_flags & TDF_SCEV))
1467 {
1468 fprintf (dump_file, " (evolution_function = ");
1469 print_generic_expr (dump_file, evolution_function);
1470 fprintf (dump_file, "))\n");
1471 }
1472
1473 return evolution_function;
1474 }
1475
1476 /* Looks to see if VAR is a copy of a constant (via straightforward assignments
1477 or degenerate phi's). If so, returns the constant; else, returns VAR. */
1478
1479 static tree
1480 follow_copies_to_constant (tree var)
1481 {
1482 tree res = var;
1483 while (TREE_CODE (res) == SSA_NAME
1484 /* We face not updated SSA form in multiple places and this walk
1485 may end up in sibling loops so we have to guard it. */
1486 && !name_registered_for_update_p (res))
1487 {
1488 gimple *def = SSA_NAME_DEF_STMT (res);
1489 if (gphi *phi = dyn_cast <gphi *> (def))
1490 {
1491 if (tree rhs = degenerate_phi_result (phi))
1492 res = rhs;
1493 else
1494 break;
1495 }
1496 else if (gimple_assign_single_p (def))
1497 /* Will exit loop if not an SSA_NAME. */
1498 res = gimple_assign_rhs1 (def);
1499 else
1500 break;
1501 }
1502 if (CONSTANT_CLASS_P (res))
1503 return res;
1504 return var;
1505 }
1506
1507 /* Given a loop-phi-node, return the initial conditions of the
1508 variable on entry of the loop. When the CCP has propagated
1509 constants into the loop-phi-node, the initial condition is
1510 instantiated, otherwise the initial condition is kept symbolic.
1511 This analyzer does not analyze the evolution outside the current
1512 loop, and leaves this task to the on-demand tree reconstructor. */
1513
1514 static tree
1515 analyze_initial_condition (gphi *loop_phi_node)
1516 {
1517 int i, n;
1518 tree init_cond = chrec_not_analyzed_yet;
1519 class loop *loop = loop_containing_stmt (loop_phi_node);
1520
1521 if (dump_file && (dump_flags & TDF_SCEV))
1522 {
1523 fprintf (dump_file, "(analyze_initial_condition \n");
1524 fprintf (dump_file, " (loop_phi_node = \n");
1525 print_gimple_stmt (dump_file, loop_phi_node, 0);
1526 fprintf (dump_file, ")\n");
1527 }
1528
1529 n = gimple_phi_num_args (loop_phi_node);
1530 for (i = 0; i < n; i++)
1531 {
1532 tree branch = PHI_ARG_DEF (loop_phi_node, i);
1533 basic_block bb = gimple_phi_arg_edge (loop_phi_node, i)->src;
1534
1535 /* When the branch is oriented to the loop's body, it does
1536 not contribute to the initial condition. */
1537 if (flow_bb_inside_loop_p (loop, bb))
1538 continue;
1539
1540 if (init_cond == chrec_not_analyzed_yet)
1541 {
1542 init_cond = branch;
1543 continue;
1544 }
1545
1546 if (TREE_CODE (branch) == SSA_NAME)
1547 {
1548 init_cond = chrec_dont_know;
1549 break;
1550 }
1551
1552 init_cond = chrec_merge (init_cond, branch);
1553 }
1554
1555 /* Ooops -- a loop without an entry??? */
1556 if (init_cond == chrec_not_analyzed_yet)
1557 init_cond = chrec_dont_know;
1558
1559 /* We may not have fully constant propagated IL. Handle degenerate PHIs here
1560 to not miss important early loop unrollings. */
1561 init_cond = follow_copies_to_constant (init_cond);
1562
1563 if (dump_file && (dump_flags & TDF_SCEV))
1564 {
1565 fprintf (dump_file, " (init_cond = ");
1566 print_generic_expr (dump_file, init_cond);
1567 fprintf (dump_file, "))\n");
1568 }
1569
1570 return init_cond;
1571 }
1572
1573 /* Analyze the scalar evolution for LOOP_PHI_NODE. */
1574
1575 static tree
1576 interpret_loop_phi (class loop *loop, gphi *loop_phi_node)
1577 {
1578 class loop *phi_loop = loop_containing_stmt (loop_phi_node);
1579 tree init_cond;
1580
1581 gcc_assert (phi_loop == loop);
1582
1583 /* Otherwise really interpret the loop phi. */
1584 init_cond = analyze_initial_condition (loop_phi_node);
1585 return analyze_evolution_in_loop (loop_phi_node, init_cond);
1586 }
1587
1588 /* This function merges the branches of a condition-phi-node,
1589 contained in the outermost loop, and whose arguments are already
1590 analyzed. */
1591
1592 static tree
1593 interpret_condition_phi (class loop *loop, gphi *condition_phi)
1594 {
1595 int i, n = gimple_phi_num_args (condition_phi);
1596 tree res = chrec_not_analyzed_yet;
1597
1598 for (i = 0; i < n; i++)
1599 {
1600 tree branch_chrec;
1601
1602 if (backedge_phi_arg_p (condition_phi, i))
1603 {
1604 res = chrec_dont_know;
1605 break;
1606 }
1607
1608 branch_chrec = analyze_scalar_evolution
1609 (loop, PHI_ARG_DEF (condition_phi, i));
1610
1611 res = chrec_merge (res, branch_chrec);
1612 if (res == chrec_dont_know)
1613 break;
1614 }
1615
1616 return res;
1617 }
1618
1619 /* Interpret the operation RHS1 OP RHS2. If we didn't
1620 analyze this node before, follow the definitions until ending
1621 either on an analyzed GIMPLE_ASSIGN, or on a loop-phi-node. On the
1622 return path, this function propagates evolutions (ala constant copy
1623 propagation). OPND1 is not a GIMPLE expression because we could
1624 analyze the effect of an inner loop: see interpret_loop_phi. */
1625
1626 static tree
1627 interpret_rhs_expr (class loop *loop, gimple *at_stmt,
1628 tree type, tree rhs1, enum tree_code code, tree rhs2)
1629 {
1630 tree res, chrec1, chrec2, ctype;
1631 gimple *def;
1632
1633 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS)
1634 {
1635 if (is_gimple_min_invariant (rhs1))
1636 return chrec_convert (type, rhs1, at_stmt);
1637
1638 if (code == SSA_NAME)
1639 return chrec_convert (type, analyze_scalar_evolution (loop, rhs1),
1640 at_stmt);
1641 }
1642
1643 switch (code)
1644 {
1645 case ADDR_EXPR:
1646 if (TREE_CODE (TREE_OPERAND (rhs1, 0)) == MEM_REF
1647 || handled_component_p (TREE_OPERAND (rhs1, 0)))
1648 {
1649 machine_mode mode;
1650 poly_int64 bitsize, bitpos;
1651 int unsignedp, reversep;
1652 int volatilep = 0;
1653 tree base, offset;
1654 tree chrec3;
1655 tree unitpos;
1656
1657 base = get_inner_reference (TREE_OPERAND (rhs1, 0),
1658 &bitsize, &bitpos, &offset, &mode,
1659 &unsignedp, &reversep, &volatilep);
1660
1661 if (TREE_CODE (base) == MEM_REF)
1662 {
1663 rhs2 = TREE_OPERAND (base, 1);
1664 rhs1 = TREE_OPERAND (base, 0);
1665
1666 chrec1 = analyze_scalar_evolution (loop, rhs1);
1667 chrec2 = analyze_scalar_evolution (loop, rhs2);
1668 chrec1 = chrec_convert (type, chrec1, at_stmt);
1669 chrec2 = chrec_convert (TREE_TYPE (rhs2), chrec2, at_stmt);
1670 chrec1 = instantiate_parameters (loop, chrec1);
1671 chrec2 = instantiate_parameters (loop, chrec2);
1672 res = chrec_fold_plus (type, chrec1, chrec2);
1673 }
1674 else
1675 {
1676 chrec1 = analyze_scalar_evolution_for_address_of (loop, base);
1677 chrec1 = chrec_convert (type, chrec1, at_stmt);
1678 res = chrec1;
1679 }
1680
1681 if (offset != NULL_TREE)
1682 {
1683 chrec2 = analyze_scalar_evolution (loop, offset);
1684 chrec2 = chrec_convert (TREE_TYPE (offset), chrec2, at_stmt);
1685 chrec2 = instantiate_parameters (loop, chrec2);
1686 res = chrec_fold_plus (type, res, chrec2);
1687 }
1688
1689 if (maybe_ne (bitpos, 0))
1690 {
1691 unitpos = size_int (exact_div (bitpos, BITS_PER_UNIT));
1692 chrec3 = analyze_scalar_evolution (loop, unitpos);
1693 chrec3 = chrec_convert (TREE_TYPE (unitpos), chrec3, at_stmt);
1694 chrec3 = instantiate_parameters (loop, chrec3);
1695 res = chrec_fold_plus (type, res, chrec3);
1696 }
1697 }
1698 else
1699 res = chrec_dont_know;
1700 break;
1701
1702 case POINTER_PLUS_EXPR:
1703 chrec1 = analyze_scalar_evolution (loop, rhs1);
1704 chrec2 = analyze_scalar_evolution (loop, rhs2);
1705 chrec1 = chrec_convert (type, chrec1, at_stmt);
1706 chrec2 = chrec_convert (TREE_TYPE (rhs2), chrec2, at_stmt);
1707 chrec1 = instantiate_parameters (loop, chrec1);
1708 chrec2 = instantiate_parameters (loop, chrec2);
1709 res = chrec_fold_plus (type, chrec1, chrec2);
1710 break;
1711
1712 case PLUS_EXPR:
1713 chrec1 = analyze_scalar_evolution (loop, rhs1);
1714 chrec2 = analyze_scalar_evolution (loop, rhs2);
1715 ctype = type;
1716 /* When the stmt is conditionally executed re-write the CHREC
1717 into a form that has well-defined behavior on overflow. */
1718 if (at_stmt
1719 && INTEGRAL_TYPE_P (type)
1720 && ! TYPE_OVERFLOW_WRAPS (type)
1721 && ! dominated_by_p (CDI_DOMINATORS, loop->latch,
1722 gimple_bb (at_stmt)))
1723 ctype = unsigned_type_for (type);
1724 chrec1 = chrec_convert (ctype, chrec1, at_stmt);
1725 chrec2 = chrec_convert (ctype, chrec2, at_stmt);
1726 chrec1 = instantiate_parameters (loop, chrec1);
1727 chrec2 = instantiate_parameters (loop, chrec2);
1728 res = chrec_fold_plus (ctype, chrec1, chrec2);
1729 if (type != ctype)
1730 res = chrec_convert (type, res, at_stmt);
1731 break;
1732
1733 case MINUS_EXPR:
1734 chrec1 = analyze_scalar_evolution (loop, rhs1);
1735 chrec2 = analyze_scalar_evolution (loop, rhs2);
1736 ctype = type;
1737 /* When the stmt is conditionally executed re-write the CHREC
1738 into a form that has well-defined behavior on overflow. */
1739 if (at_stmt
1740 && INTEGRAL_TYPE_P (type)
1741 && ! TYPE_OVERFLOW_WRAPS (type)
1742 && ! dominated_by_p (CDI_DOMINATORS,
1743 loop->latch, gimple_bb (at_stmt)))
1744 ctype = unsigned_type_for (type);
1745 chrec1 = chrec_convert (ctype, chrec1, at_stmt);
1746 chrec2 = chrec_convert (ctype, chrec2, at_stmt);
1747 chrec1 = instantiate_parameters (loop, chrec1);
1748 chrec2 = instantiate_parameters (loop, chrec2);
1749 res = chrec_fold_minus (ctype, chrec1, chrec2);
1750 if (type != ctype)
1751 res = chrec_convert (type, res, at_stmt);
1752 break;
1753
1754 case NEGATE_EXPR:
1755 chrec1 = analyze_scalar_evolution (loop, rhs1);
1756 ctype = type;
1757 /* When the stmt is conditionally executed re-write the CHREC
1758 into a form that has well-defined behavior on overflow. */
1759 if (at_stmt
1760 && INTEGRAL_TYPE_P (type)
1761 && ! TYPE_OVERFLOW_WRAPS (type)
1762 && ! dominated_by_p (CDI_DOMINATORS,
1763 loop->latch, gimple_bb (at_stmt)))
1764 ctype = unsigned_type_for (type);
1765 chrec1 = chrec_convert (ctype, chrec1, at_stmt);
1766 /* TYPE may be integer, real or complex, so use fold_convert. */
1767 chrec1 = instantiate_parameters (loop, chrec1);
1768 res = chrec_fold_multiply (ctype, chrec1,
1769 fold_convert (ctype, integer_minus_one_node));
1770 if (type != ctype)
1771 res = chrec_convert (type, res, at_stmt);
1772 break;
1773
1774 case BIT_NOT_EXPR:
1775 /* Handle ~X as -1 - X. */
1776 chrec1 = analyze_scalar_evolution (loop, rhs1);
1777 chrec1 = chrec_convert (type, chrec1, at_stmt);
1778 chrec1 = instantiate_parameters (loop, chrec1);
1779 res = chrec_fold_minus (type,
1780 fold_convert (type, integer_minus_one_node),
1781 chrec1);
1782 break;
1783
1784 case MULT_EXPR:
1785 chrec1 = analyze_scalar_evolution (loop, rhs1);
1786 chrec2 = analyze_scalar_evolution (loop, rhs2);
1787 ctype = type;
1788 /* When the stmt is conditionally executed re-write the CHREC
1789 into a form that has well-defined behavior on overflow. */
1790 if (at_stmt
1791 && INTEGRAL_TYPE_P (type)
1792 && ! TYPE_OVERFLOW_WRAPS (type)
1793 && ! dominated_by_p (CDI_DOMINATORS,
1794 loop->latch, gimple_bb (at_stmt)))
1795 ctype = unsigned_type_for (type);
1796 chrec1 = chrec_convert (ctype, chrec1, at_stmt);
1797 chrec2 = chrec_convert (ctype, chrec2, at_stmt);
1798 chrec1 = instantiate_parameters (loop, chrec1);
1799 chrec2 = instantiate_parameters (loop, chrec2);
1800 res = chrec_fold_multiply (ctype, chrec1, chrec2);
1801 if (type != ctype)
1802 res = chrec_convert (type, res, at_stmt);
1803 break;
1804
1805 case LSHIFT_EXPR:
1806 {
1807 /* Handle A<<B as A * (1<<B). */
1808 tree uns = unsigned_type_for (type);
1809 chrec1 = analyze_scalar_evolution (loop, rhs1);
1810 chrec2 = analyze_scalar_evolution (loop, rhs2);
1811 chrec1 = chrec_convert (uns, chrec1, at_stmt);
1812 chrec1 = instantiate_parameters (loop, chrec1);
1813 chrec2 = instantiate_parameters (loop, chrec2);
1814
1815 tree one = build_int_cst (uns, 1);
1816 chrec2 = fold_build2 (LSHIFT_EXPR, uns, one, chrec2);
1817 res = chrec_fold_multiply (uns, chrec1, chrec2);
1818 res = chrec_convert (type, res, at_stmt);
1819 }
1820 break;
1821
1822 CASE_CONVERT:
1823 /* In case we have a truncation of a widened operation that in
1824 the truncated type has undefined overflow behavior analyze
1825 the operation done in an unsigned type of the same precision
1826 as the final truncation. We cannot derive a scalar evolution
1827 for the widened operation but for the truncated result. */
1828 if (TREE_CODE (type) == INTEGER_TYPE
1829 && TREE_CODE (TREE_TYPE (rhs1)) == INTEGER_TYPE
1830 && TYPE_PRECISION (type) < TYPE_PRECISION (TREE_TYPE (rhs1))
1831 && TYPE_OVERFLOW_UNDEFINED (type)
1832 && TREE_CODE (rhs1) == SSA_NAME
1833 && (def = SSA_NAME_DEF_STMT (rhs1))
1834 && is_gimple_assign (def)
1835 && TREE_CODE_CLASS (gimple_assign_rhs_code (def)) == tcc_binary
1836 && TREE_CODE (gimple_assign_rhs2 (def)) == INTEGER_CST)
1837 {
1838 tree utype = unsigned_type_for (type);
1839 chrec1 = interpret_rhs_expr (loop, at_stmt, utype,
1840 gimple_assign_rhs1 (def),
1841 gimple_assign_rhs_code (def),
1842 gimple_assign_rhs2 (def));
1843 }
1844 else
1845 chrec1 = analyze_scalar_evolution (loop, rhs1);
1846 res = chrec_convert (type, chrec1, at_stmt, true, rhs1);
1847 break;
1848
1849 case BIT_AND_EXPR:
1850 /* Given int variable A, handle A&0xffff as (int)(unsigned short)A.
1851 If A is SCEV and its value is in the range of representable set
1852 of type unsigned short, the result expression is a (no-overflow)
1853 SCEV. */
1854 res = chrec_dont_know;
1855 if (tree_fits_uhwi_p (rhs2))
1856 {
1857 int precision;
1858 unsigned HOST_WIDE_INT val = tree_to_uhwi (rhs2);
1859
1860 val ++;
1861 /* Skip if value of rhs2 wraps in unsigned HOST_WIDE_INT or
1862 it's not the maximum value of a smaller type than rhs1. */
1863 if (val != 0
1864 && (precision = exact_log2 (val)) > 0
1865 && (unsigned) precision < TYPE_PRECISION (TREE_TYPE (rhs1)))
1866 {
1867 tree utype = build_nonstandard_integer_type (precision, 1);
1868
1869 if (TYPE_PRECISION (utype) < TYPE_PRECISION (TREE_TYPE (rhs1)))
1870 {
1871 chrec1 = analyze_scalar_evolution (loop, rhs1);
1872 chrec1 = chrec_convert (utype, chrec1, at_stmt);
1873 res = chrec_convert (TREE_TYPE (rhs1), chrec1, at_stmt);
1874 }
1875 }
1876 }
1877 break;
1878
1879 default:
1880 res = chrec_dont_know;
1881 break;
1882 }
1883
1884 return res;
1885 }
1886
1887 /* Interpret the expression EXPR. */
1888
1889 static tree
1890 interpret_expr (class loop *loop, gimple *at_stmt, tree expr)
1891 {
1892 enum tree_code code;
1893 tree type = TREE_TYPE (expr), op0, op1;
1894
1895 if (automatically_generated_chrec_p (expr))
1896 return expr;
1897
1898 if (TREE_CODE (expr) == POLYNOMIAL_CHREC
1899 || TREE_CODE (expr) == CALL_EXPR
1900 || get_gimple_rhs_class (TREE_CODE (expr)) == GIMPLE_TERNARY_RHS)
1901 return chrec_dont_know;
1902
1903 extract_ops_from_tree (expr, &code, &op0, &op1);
1904
1905 return interpret_rhs_expr (loop, at_stmt, type,
1906 op0, code, op1);
1907 }
1908
1909 /* Interpret the rhs of the assignment STMT. */
1910
1911 static tree
1912 interpret_gimple_assign (class loop *loop, gimple *stmt)
1913 {
1914 tree type = TREE_TYPE (gimple_assign_lhs (stmt));
1915 enum tree_code code = gimple_assign_rhs_code (stmt);
1916
1917 return interpret_rhs_expr (loop, stmt, type,
1918 gimple_assign_rhs1 (stmt), code,
1919 gimple_assign_rhs2 (stmt));
1920 }
1921
1922 \f
1923
1924 /* This section contains all the entry points:
1925 - number_of_iterations_in_loop,
1926 - analyze_scalar_evolution,
1927 - instantiate_parameters.
1928 */
1929
1930 /* Helper recursive function. */
1931
1932 static tree
1933 analyze_scalar_evolution_1 (class loop *loop, tree var)
1934 {
1935 gimple *def;
1936 basic_block bb;
1937 class loop *def_loop;
1938 tree res;
1939
1940 if (TREE_CODE (var) != SSA_NAME)
1941 return interpret_expr (loop, NULL, var);
1942
1943 def = SSA_NAME_DEF_STMT (var);
1944 bb = gimple_bb (def);
1945 def_loop = bb->loop_father;
1946
1947 if (!flow_bb_inside_loop_p (loop, bb))
1948 {
1949 /* Keep symbolic form, but look through obvious copies for constants. */
1950 res = follow_copies_to_constant (var);
1951 goto set_and_end;
1952 }
1953
1954 if (loop != def_loop)
1955 {
1956 res = analyze_scalar_evolution_1 (def_loop, var);
1957 class loop *loop_to_skip = superloop_at_depth (def_loop,
1958 loop_depth (loop) + 1);
1959 res = compute_overall_effect_of_inner_loop (loop_to_skip, res);
1960 if (chrec_contains_symbols_defined_in_loop (res, loop->num))
1961 res = analyze_scalar_evolution_1 (loop, res);
1962 goto set_and_end;
1963 }
1964
1965 switch (gimple_code (def))
1966 {
1967 case GIMPLE_ASSIGN:
1968 res = interpret_gimple_assign (loop, def);
1969 break;
1970
1971 case GIMPLE_PHI:
1972 if (loop_phi_node_p (def))
1973 res = interpret_loop_phi (loop, as_a <gphi *> (def));
1974 else
1975 res = interpret_condition_phi (loop, as_a <gphi *> (def));
1976 break;
1977
1978 default:
1979 res = chrec_dont_know;
1980 break;
1981 }
1982
1983 set_and_end:
1984
1985 /* Keep the symbolic form. */
1986 if (res == chrec_dont_know)
1987 res = var;
1988
1989 if (loop == def_loop)
1990 set_scalar_evolution (block_before_loop (loop), var, res);
1991
1992 return res;
1993 }
1994
1995 /* Analyzes and returns the scalar evolution of the ssa_name VAR in
1996 LOOP. LOOP is the loop in which the variable is used.
1997
1998 Example of use: having a pointer VAR to a SSA_NAME node, STMT a
1999 pointer to the statement that uses this variable, in order to
2000 determine the evolution function of the variable, use the following
2001 calls:
2002
2003 loop_p loop = loop_containing_stmt (stmt);
2004 tree chrec_with_symbols = analyze_scalar_evolution (loop, var);
2005 tree chrec_instantiated = instantiate_parameters (loop, chrec_with_symbols);
2006 */
2007
2008 tree
2009 analyze_scalar_evolution (class loop *loop, tree var)
2010 {
2011 tree res;
2012
2013 /* ??? Fix callers. */
2014 if (! loop)
2015 return var;
2016
2017 if (dump_file && (dump_flags & TDF_SCEV))
2018 {
2019 fprintf (dump_file, "(analyze_scalar_evolution \n");
2020 fprintf (dump_file, " (loop_nb = %d)\n", loop->num);
2021 fprintf (dump_file, " (scalar = ");
2022 print_generic_expr (dump_file, var);
2023 fprintf (dump_file, ")\n");
2024 }
2025
2026 res = get_scalar_evolution (block_before_loop (loop), var);
2027 if (res == chrec_not_analyzed_yet)
2028 {
2029 /* We'll recurse into instantiate_scev, avoid tearing down the
2030 instantiate cache repeatedly and keep it live from here. */
2031 bool destr = false;
2032 if (!global_cache)
2033 {
2034 global_cache = new instantiate_cache_type;
2035 destr = true;
2036 }
2037 res = analyze_scalar_evolution_1 (loop, var);
2038 if (destr)
2039 {
2040 delete global_cache;
2041 global_cache = NULL;
2042 }
2043 }
2044
2045 if (dump_file && (dump_flags & TDF_SCEV))
2046 fprintf (dump_file, ")\n");
2047
2048 return res;
2049 }
2050
2051 /* Analyzes and returns the scalar evolution of VAR address in LOOP. */
2052
2053 static tree
2054 analyze_scalar_evolution_for_address_of (class loop *loop, tree var)
2055 {
2056 return analyze_scalar_evolution (loop, build_fold_addr_expr (var));
2057 }
2058
2059 /* Analyze scalar evolution of use of VERSION in USE_LOOP with respect to
2060 WRTO_LOOP (which should be a superloop of USE_LOOP)
2061
2062 FOLDED_CASTS is set to true if resolve_mixers used
2063 chrec_convert_aggressive (TODO -- not really, we are way too conservative
2064 at the moment in order to keep things simple).
2065
2066 To illustrate the meaning of USE_LOOP and WRTO_LOOP, consider the following
2067 example:
2068
2069 for (i = 0; i < 100; i++) -- loop 1
2070 {
2071 for (j = 0; j < 100; j++) -- loop 2
2072 {
2073 k1 = i;
2074 k2 = j;
2075
2076 use2 (k1, k2);
2077
2078 for (t = 0; t < 100; t++) -- loop 3
2079 use3 (k1, k2);
2080
2081 }
2082 use1 (k1, k2);
2083 }
2084
2085 Both k1 and k2 are invariants in loop3, thus
2086 analyze_scalar_evolution_in_loop (loop3, loop3, k1) = k1
2087 analyze_scalar_evolution_in_loop (loop3, loop3, k2) = k2
2088
2089 As they are invariant, it does not matter whether we consider their
2090 usage in loop 3 or loop 2, hence
2091 analyze_scalar_evolution_in_loop (loop2, loop3, k1) =
2092 analyze_scalar_evolution_in_loop (loop2, loop2, k1) = i
2093 analyze_scalar_evolution_in_loop (loop2, loop3, k2) =
2094 analyze_scalar_evolution_in_loop (loop2, loop2, k2) = [0,+,1]_2
2095
2096 Similarly for their evolutions with respect to loop 1. The values of K2
2097 in the use in loop 2 vary independently on loop 1, thus we cannot express
2098 the evolution with respect to loop 1:
2099 analyze_scalar_evolution_in_loop (loop1, loop3, k1) =
2100 analyze_scalar_evolution_in_loop (loop1, loop2, k1) = [0,+,1]_1
2101 analyze_scalar_evolution_in_loop (loop1, loop3, k2) =
2102 analyze_scalar_evolution_in_loop (loop1, loop2, k2) = dont_know
2103
2104 The value of k2 in the use in loop 1 is known, though:
2105 analyze_scalar_evolution_in_loop (loop1, loop1, k1) = [0,+,1]_1
2106 analyze_scalar_evolution_in_loop (loop1, loop1, k2) = 100
2107 */
2108
2109 static tree
2110 analyze_scalar_evolution_in_loop (class loop *wrto_loop, class loop *use_loop,
2111 tree version, bool *folded_casts)
2112 {
2113 bool val = false;
2114 tree ev = version, tmp;
2115
2116 /* We cannot just do
2117
2118 tmp = analyze_scalar_evolution (use_loop, version);
2119 ev = resolve_mixers (wrto_loop, tmp, folded_casts);
2120
2121 as resolve_mixers would query the scalar evolution with respect to
2122 wrto_loop. For example, in the situation described in the function
2123 comment, suppose that wrto_loop = loop1, use_loop = loop3 and
2124 version = k2. Then
2125
2126 analyze_scalar_evolution (use_loop, version) = k2
2127
2128 and resolve_mixers (loop1, k2, folded_casts) finds that the value of
2129 k2 in loop 1 is 100, which is a wrong result, since we are interested
2130 in the value in loop 3.
2131
2132 Instead, we need to proceed from use_loop to wrto_loop loop by loop,
2133 each time checking that there is no evolution in the inner loop. */
2134
2135 if (folded_casts)
2136 *folded_casts = false;
2137 while (1)
2138 {
2139 tmp = analyze_scalar_evolution (use_loop, ev);
2140 ev = resolve_mixers (use_loop, tmp, folded_casts);
2141
2142 if (use_loop == wrto_loop)
2143 return ev;
2144
2145 /* If the value of the use changes in the inner loop, we cannot express
2146 its value in the outer loop (we might try to return interval chrec,
2147 but we do not have a user for it anyway) */
2148 if (!no_evolution_in_loop_p (ev, use_loop->num, &val)
2149 || !val)
2150 return chrec_dont_know;
2151
2152 use_loop = loop_outer (use_loop);
2153 }
2154 }
2155
2156
2157 /* Computes a hash function for database element ELT. */
2158
2159 static inline hashval_t
2160 hash_idx_scev_info (const void *elt_)
2161 {
2162 unsigned idx = ((size_t) elt_) - 2;
2163 return scev_info_hasher::hash (&global_cache->entries[idx]);
2164 }
2165
2166 /* Compares database elements E1 and E2. */
2167
2168 static inline int
2169 eq_idx_scev_info (const void *e1, const void *e2)
2170 {
2171 unsigned idx1 = ((size_t) e1) - 2;
2172 return scev_info_hasher::equal (&global_cache->entries[idx1],
2173 (const scev_info_str *) e2);
2174 }
2175
2176 /* Returns from CACHE the slot number of the cached chrec for NAME. */
2177
2178 static unsigned
2179 get_instantiated_value_entry (instantiate_cache_type &cache,
2180 tree name, edge instantiate_below)
2181 {
2182 if (!cache.map)
2183 {
2184 cache.map = htab_create (10, hash_idx_scev_info, eq_idx_scev_info, NULL);
2185 cache.entries.create (10);
2186 }
2187
2188 scev_info_str e;
2189 e.name_version = SSA_NAME_VERSION (name);
2190 e.instantiated_below = instantiate_below->dest->index;
2191 void **slot = htab_find_slot_with_hash (cache.map, &e,
2192 scev_info_hasher::hash (&e), INSERT);
2193 if (!*slot)
2194 {
2195 e.chrec = chrec_not_analyzed_yet;
2196 *slot = (void *)(size_t)(cache.entries.length () + 2);
2197 cache.entries.safe_push (e);
2198 }
2199
2200 return ((size_t)*slot) - 2;
2201 }
2202
2203
2204 /* Return the closed_loop_phi node for VAR. If there is none, return
2205 NULL_TREE. */
2206
2207 static tree
2208 loop_closed_phi_def (tree var)
2209 {
2210 class loop *loop;
2211 edge exit;
2212 gphi *phi;
2213 gphi_iterator psi;
2214
2215 if (var == NULL_TREE
2216 || TREE_CODE (var) != SSA_NAME)
2217 return NULL_TREE;
2218
2219 loop = loop_containing_stmt (SSA_NAME_DEF_STMT (var));
2220 exit = single_exit (loop);
2221 if (!exit)
2222 return NULL_TREE;
2223
2224 for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
2225 {
2226 phi = psi.phi ();
2227 if (PHI_ARG_DEF_FROM_EDGE (phi, exit) == var)
2228 return PHI_RESULT (phi);
2229 }
2230
2231 return NULL_TREE;
2232 }
2233
2234 static tree instantiate_scev_r (edge, class loop *, class loop *,
2235 tree, bool *, int);
2236
2237 /* Analyze all the parameters of the chrec, between INSTANTIATE_BELOW
2238 and EVOLUTION_LOOP, that were left under a symbolic form.
2239
2240 CHREC is an SSA_NAME to be instantiated.
2241
2242 CACHE is the cache of already instantiated values.
2243
2244 Variable pointed by FOLD_CONVERSIONS is set to TRUE when the
2245 conversions that may wrap in signed/pointer type are folded, as long
2246 as the value of the chrec is preserved. If FOLD_CONVERSIONS is NULL
2247 then we don't do such fold.
2248
2249 SIZE_EXPR is used for computing the size of the expression to be
2250 instantiated, and to stop if it exceeds some limit. */
2251
2252 static tree
2253 instantiate_scev_name (edge instantiate_below,
2254 class loop *evolution_loop, class loop *inner_loop,
2255 tree chrec,
2256 bool *fold_conversions,
2257 int size_expr)
2258 {
2259 tree res;
2260 class loop *def_loop;
2261 basic_block def_bb = gimple_bb (SSA_NAME_DEF_STMT (chrec));
2262
2263 /* A parameter, nothing to do. */
2264 if (!def_bb
2265 || !dominated_by_p (CDI_DOMINATORS, def_bb, instantiate_below->dest))
2266 return chrec;
2267
2268 /* We cache the value of instantiated variable to avoid exponential
2269 time complexity due to reevaluations. We also store the convenient
2270 value in the cache in order to prevent infinite recursion -- we do
2271 not want to instantiate the SSA_NAME if it is in a mixer
2272 structure. This is used for avoiding the instantiation of
2273 recursively defined functions, such as:
2274
2275 | a_2 -> {0, +, 1, +, a_2}_1 */
2276
2277 unsigned si = get_instantiated_value_entry (*global_cache,
2278 chrec, instantiate_below);
2279 if (global_cache->get (si) != chrec_not_analyzed_yet)
2280 return global_cache->get (si);
2281
2282 /* On recursion return chrec_dont_know. */
2283 global_cache->set (si, chrec_dont_know);
2284
2285 def_loop = find_common_loop (evolution_loop, def_bb->loop_father);
2286
2287 if (! dominated_by_p (CDI_DOMINATORS,
2288 def_loop->header, instantiate_below->dest))
2289 {
2290 gimple *def = SSA_NAME_DEF_STMT (chrec);
2291 if (gassign *ass = dyn_cast <gassign *> (def))
2292 {
2293 switch (gimple_assign_rhs_class (ass))
2294 {
2295 case GIMPLE_UNARY_RHS:
2296 {
2297 tree op0 = instantiate_scev_r (instantiate_below, evolution_loop,
2298 inner_loop, gimple_assign_rhs1 (ass),
2299 fold_conversions, size_expr);
2300 if (op0 == chrec_dont_know)
2301 return chrec_dont_know;
2302 res = fold_build1 (gimple_assign_rhs_code (ass),
2303 TREE_TYPE (chrec), op0);
2304 break;
2305 }
2306 case GIMPLE_BINARY_RHS:
2307 {
2308 tree op0 = instantiate_scev_r (instantiate_below, evolution_loop,
2309 inner_loop, gimple_assign_rhs1 (ass),
2310 fold_conversions, size_expr);
2311 if (op0 == chrec_dont_know)
2312 return chrec_dont_know;
2313 tree op1 = instantiate_scev_r (instantiate_below, evolution_loop,
2314 inner_loop, gimple_assign_rhs2 (ass),
2315 fold_conversions, size_expr);
2316 if (op1 == chrec_dont_know)
2317 return chrec_dont_know;
2318 res = fold_build2 (gimple_assign_rhs_code (ass),
2319 TREE_TYPE (chrec), op0, op1);
2320 break;
2321 }
2322 default:
2323 res = chrec_dont_know;
2324 }
2325 }
2326 else
2327 res = chrec_dont_know;
2328 global_cache->set (si, res);
2329 return res;
2330 }
2331
2332 /* If the analysis yields a parametric chrec, instantiate the
2333 result again. */
2334 res = analyze_scalar_evolution (def_loop, chrec);
2335
2336 /* Don't instantiate default definitions. */
2337 if (TREE_CODE (res) == SSA_NAME
2338 && SSA_NAME_IS_DEFAULT_DEF (res))
2339 ;
2340
2341 /* Don't instantiate loop-closed-ssa phi nodes. */
2342 else if (TREE_CODE (res) == SSA_NAME
2343 && loop_depth (loop_containing_stmt (SSA_NAME_DEF_STMT (res)))
2344 > loop_depth (def_loop))
2345 {
2346 if (res == chrec)
2347 res = loop_closed_phi_def (chrec);
2348 else
2349 res = chrec;
2350
2351 /* When there is no loop_closed_phi_def, it means that the
2352 variable is not used after the loop: try to still compute the
2353 value of the variable when exiting the loop. */
2354 if (res == NULL_TREE)
2355 {
2356 loop_p loop = loop_containing_stmt (SSA_NAME_DEF_STMT (chrec));
2357 res = analyze_scalar_evolution (loop, chrec);
2358 res = compute_overall_effect_of_inner_loop (loop, res);
2359 res = instantiate_scev_r (instantiate_below, evolution_loop,
2360 inner_loop, res,
2361 fold_conversions, size_expr);
2362 }
2363 else if (dominated_by_p (CDI_DOMINATORS,
2364 gimple_bb (SSA_NAME_DEF_STMT (res)),
2365 instantiate_below->dest))
2366 res = chrec_dont_know;
2367 }
2368
2369 else if (res != chrec_dont_know)
2370 {
2371 if (inner_loop
2372 && def_bb->loop_father != inner_loop
2373 && !flow_loop_nested_p (def_bb->loop_father, inner_loop))
2374 /* ??? We could try to compute the overall effect of the loop here. */
2375 res = chrec_dont_know;
2376 else
2377 res = instantiate_scev_r (instantiate_below, evolution_loop,
2378 inner_loop, res,
2379 fold_conversions, size_expr);
2380 }
2381
2382 /* Store the correct value to the cache. */
2383 global_cache->set (si, res);
2384 return res;
2385 }
2386
2387 /* Analyze all the parameters of the chrec, between INSTANTIATE_BELOW
2388 and EVOLUTION_LOOP, that were left under a symbolic form.
2389
2390 CHREC is a polynomial chain of recurrence to be instantiated.
2391
2392 CACHE is the cache of already instantiated values.
2393
2394 Variable pointed by FOLD_CONVERSIONS is set to TRUE when the
2395 conversions that may wrap in signed/pointer type are folded, as long
2396 as the value of the chrec is preserved. If FOLD_CONVERSIONS is NULL
2397 then we don't do such fold.
2398
2399 SIZE_EXPR is used for computing the size of the expression to be
2400 instantiated, and to stop if it exceeds some limit. */
2401
2402 static tree
2403 instantiate_scev_poly (edge instantiate_below,
2404 class loop *evolution_loop, class loop *,
2405 tree chrec, bool *fold_conversions, int size_expr)
2406 {
2407 tree op1;
2408 tree op0 = instantiate_scev_r (instantiate_below, evolution_loop,
2409 get_chrec_loop (chrec),
2410 CHREC_LEFT (chrec), fold_conversions,
2411 size_expr);
2412 if (op0 == chrec_dont_know)
2413 return chrec_dont_know;
2414
2415 op1 = instantiate_scev_r (instantiate_below, evolution_loop,
2416 get_chrec_loop (chrec),
2417 CHREC_RIGHT (chrec), fold_conversions,
2418 size_expr);
2419 if (op1 == chrec_dont_know)
2420 return chrec_dont_know;
2421
2422 if (CHREC_LEFT (chrec) != op0
2423 || CHREC_RIGHT (chrec) != op1)
2424 {
2425 op1 = chrec_convert_rhs (chrec_type (op0), op1, NULL);
2426 chrec = build_polynomial_chrec (CHREC_VARIABLE (chrec), op0, op1);
2427 }
2428
2429 return chrec;
2430 }
2431
2432 /* Analyze all the parameters of the chrec, between INSTANTIATE_BELOW
2433 and EVOLUTION_LOOP, that were left under a symbolic form.
2434
2435 "C0 CODE C1" is a binary expression of type TYPE to be instantiated.
2436
2437 CACHE is the cache of already instantiated values.
2438
2439 Variable pointed by FOLD_CONVERSIONS is set to TRUE when the
2440 conversions that may wrap in signed/pointer type are folded, as long
2441 as the value of the chrec is preserved. If FOLD_CONVERSIONS is NULL
2442 then we don't do such fold.
2443
2444 SIZE_EXPR is used for computing the size of the expression to be
2445 instantiated, and to stop if it exceeds some limit. */
2446
2447 static tree
2448 instantiate_scev_binary (edge instantiate_below,
2449 class loop *evolution_loop, class loop *inner_loop,
2450 tree chrec, enum tree_code code,
2451 tree type, tree c0, tree c1,
2452 bool *fold_conversions, int size_expr)
2453 {
2454 tree op1;
2455 tree op0 = instantiate_scev_r (instantiate_below, evolution_loop, inner_loop,
2456 c0, fold_conversions, size_expr);
2457 if (op0 == chrec_dont_know)
2458 return chrec_dont_know;
2459
2460 /* While we eventually compute the same op1 if c0 == c1 the process
2461 of doing this is expensive so the following short-cut prevents
2462 exponential compile-time behavior. */
2463 if (c0 != c1)
2464 {
2465 op1 = instantiate_scev_r (instantiate_below, evolution_loop, inner_loop,
2466 c1, fold_conversions, size_expr);
2467 if (op1 == chrec_dont_know)
2468 return chrec_dont_know;
2469 }
2470 else
2471 op1 = op0;
2472
2473 if (c0 != op0
2474 || c1 != op1)
2475 {
2476 op0 = chrec_convert (type, op0, NULL);
2477 op1 = chrec_convert_rhs (type, op1, NULL);
2478
2479 switch (code)
2480 {
2481 case POINTER_PLUS_EXPR:
2482 case PLUS_EXPR:
2483 return chrec_fold_plus (type, op0, op1);
2484
2485 case MINUS_EXPR:
2486 return chrec_fold_minus (type, op0, op1);
2487
2488 case MULT_EXPR:
2489 return chrec_fold_multiply (type, op0, op1);
2490
2491 default:
2492 gcc_unreachable ();
2493 }
2494 }
2495
2496 return chrec ? chrec : fold_build2 (code, type, c0, c1);
2497 }
2498
2499 /* Analyze all the parameters of the chrec, between INSTANTIATE_BELOW
2500 and EVOLUTION_LOOP, that were left under a symbolic form.
2501
2502 "CHREC" that stands for a convert expression "(TYPE) OP" is to be
2503 instantiated.
2504
2505 CACHE is the cache of already instantiated values.
2506
2507 Variable pointed by FOLD_CONVERSIONS is set to TRUE when the
2508 conversions that may wrap in signed/pointer type are folded, as long
2509 as the value of the chrec is preserved. If FOLD_CONVERSIONS is NULL
2510 then we don't do such fold.
2511
2512 SIZE_EXPR is used for computing the size of the expression to be
2513 instantiated, and to stop if it exceeds some limit. */
2514
2515 static tree
2516 instantiate_scev_convert (edge instantiate_below,
2517 class loop *evolution_loop, class loop *inner_loop,
2518 tree chrec, tree type, tree op,
2519 bool *fold_conversions, int size_expr)
2520 {
2521 tree op0 = instantiate_scev_r (instantiate_below, evolution_loop,
2522 inner_loop, op,
2523 fold_conversions, size_expr);
2524
2525 if (op0 == chrec_dont_know)
2526 return chrec_dont_know;
2527
2528 if (fold_conversions)
2529 {
2530 tree tmp = chrec_convert_aggressive (type, op0, fold_conversions);
2531 if (tmp)
2532 return tmp;
2533
2534 /* If we used chrec_convert_aggressive, we can no longer assume that
2535 signed chrecs do not overflow, as chrec_convert does, so avoid
2536 calling it in that case. */
2537 if (*fold_conversions)
2538 {
2539 if (chrec && op0 == op)
2540 return chrec;
2541
2542 return fold_convert (type, op0);
2543 }
2544 }
2545
2546 return chrec_convert (type, op0, NULL);
2547 }
2548
2549 /* Analyze all the parameters of the chrec, between INSTANTIATE_BELOW
2550 and EVOLUTION_LOOP, that were left under a symbolic form.
2551
2552 CHREC is a BIT_NOT_EXPR or a NEGATE_EXPR expression to be instantiated.
2553 Handle ~X as -1 - X.
2554 Handle -X as -1 * X.
2555
2556 CACHE is the cache of already instantiated values.
2557
2558 Variable pointed by FOLD_CONVERSIONS is set to TRUE when the
2559 conversions that may wrap in signed/pointer type are folded, as long
2560 as the value of the chrec is preserved. If FOLD_CONVERSIONS is NULL
2561 then we don't do such fold.
2562
2563 SIZE_EXPR is used for computing the size of the expression to be
2564 instantiated, and to stop if it exceeds some limit. */
2565
2566 static tree
2567 instantiate_scev_not (edge instantiate_below,
2568 class loop *evolution_loop, class loop *inner_loop,
2569 tree chrec,
2570 enum tree_code code, tree type, tree op,
2571 bool *fold_conversions, int size_expr)
2572 {
2573 tree op0 = instantiate_scev_r (instantiate_below, evolution_loop,
2574 inner_loop, op,
2575 fold_conversions, size_expr);
2576
2577 if (op0 == chrec_dont_know)
2578 return chrec_dont_know;
2579
2580 if (op != op0)
2581 {
2582 op0 = chrec_convert (type, op0, NULL);
2583
2584 switch (code)
2585 {
2586 case BIT_NOT_EXPR:
2587 return chrec_fold_minus
2588 (type, fold_convert (type, integer_minus_one_node), op0);
2589
2590 case NEGATE_EXPR:
2591 return chrec_fold_multiply
2592 (type, fold_convert (type, integer_minus_one_node), op0);
2593
2594 default:
2595 gcc_unreachable ();
2596 }
2597 }
2598
2599 return chrec ? chrec : fold_build1 (code, type, op0);
2600 }
2601
2602 /* Analyze all the parameters of the chrec, between INSTANTIATE_BELOW
2603 and EVOLUTION_LOOP, that were left under a symbolic form.
2604
2605 CHREC is the scalar evolution to instantiate.
2606
2607 CACHE is the cache of already instantiated values.
2608
2609 Variable pointed by FOLD_CONVERSIONS is set to TRUE when the
2610 conversions that may wrap in signed/pointer type are folded, as long
2611 as the value of the chrec is preserved. If FOLD_CONVERSIONS is NULL
2612 then we don't do such fold.
2613
2614 SIZE_EXPR is used for computing the size of the expression to be
2615 instantiated, and to stop if it exceeds some limit. */
2616
2617 static tree
2618 instantiate_scev_r (edge instantiate_below,
2619 class loop *evolution_loop, class loop *inner_loop,
2620 tree chrec,
2621 bool *fold_conversions, int size_expr)
2622 {
2623 /* Give up if the expression is larger than the MAX that we allow. */
2624 if (size_expr++ > param_scev_max_expr_size)
2625 return chrec_dont_know;
2626
2627 if (chrec == NULL_TREE
2628 || automatically_generated_chrec_p (chrec)
2629 || is_gimple_min_invariant (chrec))
2630 return chrec;
2631
2632 switch (TREE_CODE (chrec))
2633 {
2634 case SSA_NAME:
2635 return instantiate_scev_name (instantiate_below, evolution_loop,
2636 inner_loop, chrec,
2637 fold_conversions, size_expr);
2638
2639 case POLYNOMIAL_CHREC:
2640 return instantiate_scev_poly (instantiate_below, evolution_loop,
2641 inner_loop, chrec,
2642 fold_conversions, size_expr);
2643
2644 case POINTER_PLUS_EXPR:
2645 case PLUS_EXPR:
2646 case MINUS_EXPR:
2647 case MULT_EXPR:
2648 return instantiate_scev_binary (instantiate_below, evolution_loop,
2649 inner_loop, chrec,
2650 TREE_CODE (chrec), chrec_type (chrec),
2651 TREE_OPERAND (chrec, 0),
2652 TREE_OPERAND (chrec, 1),
2653 fold_conversions, size_expr);
2654
2655 CASE_CONVERT:
2656 return instantiate_scev_convert (instantiate_below, evolution_loop,
2657 inner_loop, chrec,
2658 TREE_TYPE (chrec), TREE_OPERAND (chrec, 0),
2659 fold_conversions, size_expr);
2660
2661 case NEGATE_EXPR:
2662 case BIT_NOT_EXPR:
2663 return instantiate_scev_not (instantiate_below, evolution_loop,
2664 inner_loop, chrec,
2665 TREE_CODE (chrec), TREE_TYPE (chrec),
2666 TREE_OPERAND (chrec, 0),
2667 fold_conversions, size_expr);
2668
2669 case ADDR_EXPR:
2670 if (is_gimple_min_invariant (chrec))
2671 return chrec;
2672 /* Fallthru. */
2673 case SCEV_NOT_KNOWN:
2674 return chrec_dont_know;
2675
2676 case SCEV_KNOWN:
2677 return chrec_known;
2678
2679 default:
2680 if (CONSTANT_CLASS_P (chrec))
2681 return chrec;
2682 return chrec_dont_know;
2683 }
2684 }
2685
2686 /* Analyze all the parameters of the chrec that were left under a
2687 symbolic form. INSTANTIATE_BELOW is the basic block that stops the
2688 recursive instantiation of parameters: a parameter is a variable
2689 that is defined in a basic block that dominates INSTANTIATE_BELOW or
2690 a function parameter. */
2691
2692 tree
2693 instantiate_scev (edge instantiate_below, class loop *evolution_loop,
2694 tree chrec)
2695 {
2696 tree res;
2697
2698 if (dump_file && (dump_flags & TDF_SCEV))
2699 {
2700 fprintf (dump_file, "(instantiate_scev \n");
2701 fprintf (dump_file, " (instantiate_below = %d -> %d)\n",
2702 instantiate_below->src->index, instantiate_below->dest->index);
2703 if (evolution_loop)
2704 fprintf (dump_file, " (evolution_loop = %d)\n", evolution_loop->num);
2705 fprintf (dump_file, " (chrec = ");
2706 print_generic_expr (dump_file, chrec);
2707 fprintf (dump_file, ")\n");
2708 }
2709
2710 bool destr = false;
2711 if (!global_cache)
2712 {
2713 global_cache = new instantiate_cache_type;
2714 destr = true;
2715 }
2716
2717 res = instantiate_scev_r (instantiate_below, evolution_loop,
2718 NULL, chrec, NULL, 0);
2719
2720 if (destr)
2721 {
2722 delete global_cache;
2723 global_cache = NULL;
2724 }
2725
2726 if (dump_file && (dump_flags & TDF_SCEV))
2727 {
2728 fprintf (dump_file, " (res = ");
2729 print_generic_expr (dump_file, res);
2730 fprintf (dump_file, "))\n");
2731 }
2732
2733 return res;
2734 }
2735
2736 /* Similar to instantiate_parameters, but does not introduce the
2737 evolutions in outer loops for LOOP invariants in CHREC, and does not
2738 care about causing overflows, as long as they do not affect value
2739 of an expression. */
2740
2741 tree
2742 resolve_mixers (class loop *loop, tree chrec, bool *folded_casts)
2743 {
2744 bool destr = false;
2745 bool fold_conversions = false;
2746 if (!global_cache)
2747 {
2748 global_cache = new instantiate_cache_type;
2749 destr = true;
2750 }
2751
2752 tree ret = instantiate_scev_r (loop_preheader_edge (loop), loop, NULL,
2753 chrec, &fold_conversions, 0);
2754
2755 if (folded_casts && !*folded_casts)
2756 *folded_casts = fold_conversions;
2757
2758 if (destr)
2759 {
2760 delete global_cache;
2761 global_cache = NULL;
2762 }
2763
2764 return ret;
2765 }
2766
2767 /* Entry point for the analysis of the number of iterations pass.
2768 This function tries to safely approximate the number of iterations
2769 the loop will run. When this property is not decidable at compile
2770 time, the result is chrec_dont_know. Otherwise the result is a
2771 scalar or a symbolic parameter. When the number of iterations may
2772 be equal to zero and the property cannot be determined at compile
2773 time, the result is a COND_EXPR that represents in a symbolic form
2774 the conditions under which the number of iterations is not zero.
2775
2776 Example of analysis: suppose that the loop has an exit condition:
2777
2778 "if (b > 49) goto end_loop;"
2779
2780 and that in a previous analysis we have determined that the
2781 variable 'b' has an evolution function:
2782
2783 "EF = {23, +, 5}_2".
2784
2785 When we evaluate the function at the point 5, i.e. the value of the
2786 variable 'b' after 5 iterations in the loop, we have EF (5) = 48,
2787 and EF (6) = 53. In this case the value of 'b' on exit is '53' and
2788 the loop body has been executed 6 times. */
2789
2790 tree
2791 number_of_latch_executions (class loop *loop)
2792 {
2793 edge exit;
2794 class tree_niter_desc niter_desc;
2795 tree may_be_zero;
2796 tree res;
2797
2798 /* Determine whether the number of iterations in loop has already
2799 been computed. */
2800 res = loop->nb_iterations;
2801 if (res)
2802 return res;
2803
2804 may_be_zero = NULL_TREE;
2805
2806 if (dump_file && (dump_flags & TDF_SCEV))
2807 fprintf (dump_file, "(number_of_iterations_in_loop = \n");
2808
2809 res = chrec_dont_know;
2810 exit = single_exit (loop);
2811
2812 if (exit && number_of_iterations_exit (loop, exit, &niter_desc, false))
2813 {
2814 may_be_zero = niter_desc.may_be_zero;
2815 res = niter_desc.niter;
2816 }
2817
2818 if (res == chrec_dont_know
2819 || !may_be_zero
2820 || integer_zerop (may_be_zero))
2821 ;
2822 else if (integer_nonzerop (may_be_zero))
2823 res = build_int_cst (TREE_TYPE (res), 0);
2824
2825 else if (COMPARISON_CLASS_P (may_be_zero))
2826 res = fold_build3 (COND_EXPR, TREE_TYPE (res), may_be_zero,
2827 build_int_cst (TREE_TYPE (res), 0), res);
2828 else
2829 res = chrec_dont_know;
2830
2831 if (dump_file && (dump_flags & TDF_SCEV))
2832 {
2833 fprintf (dump_file, " (set_nb_iterations_in_loop = ");
2834 print_generic_expr (dump_file, res);
2835 fprintf (dump_file, "))\n");
2836 }
2837
2838 loop->nb_iterations = res;
2839 return res;
2840 }
2841 \f
2842
2843 /* Counters for the stats. */
2844
2845 struct chrec_stats
2846 {
2847 unsigned nb_chrecs;
2848 unsigned nb_affine;
2849 unsigned nb_affine_multivar;
2850 unsigned nb_higher_poly;
2851 unsigned nb_chrec_dont_know;
2852 unsigned nb_undetermined;
2853 };
2854
2855 /* Reset the counters. */
2856
2857 static inline void
2858 reset_chrecs_counters (struct chrec_stats *stats)
2859 {
2860 stats->nb_chrecs = 0;
2861 stats->nb_affine = 0;
2862 stats->nb_affine_multivar = 0;
2863 stats->nb_higher_poly = 0;
2864 stats->nb_chrec_dont_know = 0;
2865 stats->nb_undetermined = 0;
2866 }
2867
2868 /* Dump the contents of a CHREC_STATS structure. */
2869
2870 static void
2871 dump_chrecs_stats (FILE *file, struct chrec_stats *stats)
2872 {
2873 fprintf (file, "\n(\n");
2874 fprintf (file, "-----------------------------------------\n");
2875 fprintf (file, "%d\taffine univariate chrecs\n", stats->nb_affine);
2876 fprintf (file, "%d\taffine multivariate chrecs\n", stats->nb_affine_multivar);
2877 fprintf (file, "%d\tdegree greater than 2 polynomials\n",
2878 stats->nb_higher_poly);
2879 fprintf (file, "%d\tchrec_dont_know chrecs\n", stats->nb_chrec_dont_know);
2880 fprintf (file, "-----------------------------------------\n");
2881 fprintf (file, "%d\ttotal chrecs\n", stats->nb_chrecs);
2882 fprintf (file, "%d\twith undetermined coefficients\n",
2883 stats->nb_undetermined);
2884 fprintf (file, "-----------------------------------------\n");
2885 fprintf (file, "%d\tchrecs in the scev database\n",
2886 (int) scalar_evolution_info->elements ());
2887 fprintf (file, "%d\tsets in the scev database\n", nb_set_scev);
2888 fprintf (file, "%d\tgets in the scev database\n", nb_get_scev);
2889 fprintf (file, "-----------------------------------------\n");
2890 fprintf (file, ")\n\n");
2891 }
2892
2893 /* Gather statistics about CHREC. */
2894
2895 static void
2896 gather_chrec_stats (tree chrec, struct chrec_stats *stats)
2897 {
2898 if (dump_file && (dump_flags & TDF_STATS))
2899 {
2900 fprintf (dump_file, "(classify_chrec ");
2901 print_generic_expr (dump_file, chrec);
2902 fprintf (dump_file, "\n");
2903 }
2904
2905 stats->nb_chrecs++;
2906
2907 if (chrec == NULL_TREE)
2908 {
2909 stats->nb_undetermined++;
2910 return;
2911 }
2912
2913 switch (TREE_CODE (chrec))
2914 {
2915 case POLYNOMIAL_CHREC:
2916 if (evolution_function_is_affine_p (chrec))
2917 {
2918 if (dump_file && (dump_flags & TDF_STATS))
2919 fprintf (dump_file, " affine_univariate\n");
2920 stats->nb_affine++;
2921 }
2922 else if (evolution_function_is_affine_multivariate_p (chrec, 0))
2923 {
2924 if (dump_file && (dump_flags & TDF_STATS))
2925 fprintf (dump_file, " affine_multivariate\n");
2926 stats->nb_affine_multivar++;
2927 }
2928 else
2929 {
2930 if (dump_file && (dump_flags & TDF_STATS))
2931 fprintf (dump_file, " higher_degree_polynomial\n");
2932 stats->nb_higher_poly++;
2933 }
2934
2935 break;
2936
2937 default:
2938 break;
2939 }
2940
2941 if (chrec_contains_undetermined (chrec))
2942 {
2943 if (dump_file && (dump_flags & TDF_STATS))
2944 fprintf (dump_file, " undetermined\n");
2945 stats->nb_undetermined++;
2946 }
2947
2948 if (dump_file && (dump_flags & TDF_STATS))
2949 fprintf (dump_file, ")\n");
2950 }
2951
2952 /* Classify the chrecs of the whole database. */
2953
2954 void
2955 gather_stats_on_scev_database (void)
2956 {
2957 struct chrec_stats stats;
2958
2959 if (!dump_file)
2960 return;
2961
2962 reset_chrecs_counters (&stats);
2963
2964 hash_table<scev_info_hasher>::iterator iter;
2965 scev_info_str *elt;
2966 FOR_EACH_HASH_TABLE_ELEMENT (*scalar_evolution_info, elt, scev_info_str *,
2967 iter)
2968 gather_chrec_stats (elt->chrec, &stats);
2969
2970 dump_chrecs_stats (dump_file, &stats);
2971 }
2972
2973 \f
2974 /* Initialize the analysis of scalar evolutions for LOOPS. */
2975
2976 void
2977 scev_initialize (void)
2978 {
2979 gcc_assert (! scev_initialized_p ()
2980 && loops_state_satisfies_p (cfun, LOOPS_NORMAL));
2981
2982 scalar_evolution_info = hash_table<scev_info_hasher>::create_ggc (100);
2983
2984 for (auto loop : loops_list (cfun, 0))
2985 loop->nb_iterations = NULL_TREE;
2986 }
2987
2988 /* Return true if SCEV is initialized. */
2989
2990 bool
2991 scev_initialized_p (void)
2992 {
2993 return scalar_evolution_info != NULL;
2994 }
2995
2996 /* Cleans up the information cached by the scalar evolutions analysis
2997 in the hash table. */
2998
2999 void
3000 scev_reset_htab (void)
3001 {
3002 if (!scalar_evolution_info)
3003 return;
3004
3005 scalar_evolution_info->empty ();
3006 }
3007
3008 /* Cleans up the information cached by the scalar evolutions analysis
3009 in the hash table and in the loop->nb_iterations. */
3010
3011 void
3012 scev_reset (void)
3013 {
3014 scev_reset_htab ();
3015
3016 for (auto loop : loops_list (cfun, 0))
3017 loop->nb_iterations = NULL_TREE;
3018 }
3019
3020 /* Return true if the IV calculation in TYPE can overflow based on the knowledge
3021 of the upper bound on the number of iterations of LOOP, the BASE and STEP
3022 of IV.
3023
3024 We do not use information whether TYPE can overflow so it is safe to
3025 use this test even for derived IVs not computed every iteration or
3026 hypotetical IVs to be inserted into code. */
3027
3028 bool
3029 iv_can_overflow_p (class loop *loop, tree type, tree base, tree step)
3030 {
3031 widest_int nit;
3032 wide_int base_min, base_max, step_min, step_max, type_min, type_max;
3033 signop sgn = TYPE_SIGN (type);
3034 value_range r;
3035
3036 if (integer_zerop (step))
3037 return false;
3038
3039 if (!INTEGRAL_TYPE_P (TREE_TYPE (base))
3040 || !get_range_query (cfun)->range_of_expr (r, base)
3041 || r.kind () != VR_RANGE)
3042 return true;
3043
3044 base_min = r.lower_bound ();
3045 base_max = r.upper_bound ();
3046
3047 if (!INTEGRAL_TYPE_P (TREE_TYPE (step))
3048 || !get_range_query (cfun)->range_of_expr (r, step)
3049 || r.kind () != VR_RANGE)
3050 return true;
3051
3052 step_min = r.lower_bound ();
3053 step_max = r.upper_bound ();
3054
3055 if (!get_max_loop_iterations (loop, &nit))
3056 return true;
3057
3058 type_min = wi::min_value (type);
3059 type_max = wi::max_value (type);
3060
3061 /* Just sanity check that we don't see values out of the range of the type.
3062 In this case the arithmetics bellow would overflow. */
3063 gcc_checking_assert (wi::ge_p (base_min, type_min, sgn)
3064 && wi::le_p (base_max, type_max, sgn));
3065
3066 /* Account the possible increment in the last ieration. */
3067 wi::overflow_type overflow = wi::OVF_NONE;
3068 nit = wi::add (nit, 1, SIGNED, &overflow);
3069 if (overflow)
3070 return true;
3071
3072 /* NIT is typeless and can exceed the precision of the type. In this case
3073 overflow is always possible, because we know STEP is non-zero. */
3074 if (wi::min_precision (nit, UNSIGNED) > TYPE_PRECISION (type))
3075 return true;
3076 wide_int nit2 = wide_int::from (nit, TYPE_PRECISION (type), UNSIGNED);
3077
3078 /* If step can be positive, check that nit*step <= type_max-base.
3079 This can be done by unsigned arithmetic and we only need to watch overflow
3080 in the multiplication. The right hand side can always be represented in
3081 the type. */
3082 if (sgn == UNSIGNED || !wi::neg_p (step_max))
3083 {
3084 wi::overflow_type overflow = wi::OVF_NONE;
3085 if (wi::gtu_p (wi::mul (step_max, nit2, UNSIGNED, &overflow),
3086 type_max - base_max)
3087 || overflow)
3088 return true;
3089 }
3090 /* If step can be negative, check that nit*(-step) <= base_min-type_min. */
3091 if (sgn == SIGNED && wi::neg_p (step_min))
3092 {
3093 wi::overflow_type overflow, overflow2;
3094 overflow = overflow2 = wi::OVF_NONE;
3095 if (wi::gtu_p (wi::mul (wi::neg (step_min, &overflow2),
3096 nit2, UNSIGNED, &overflow),
3097 base_min - type_min)
3098 || overflow || overflow2)
3099 return true;
3100 }
3101
3102 return false;
3103 }
3104
3105 /* Given EV with form of "(type) {inner_base, inner_step}_loop", this
3106 function tries to derive condition under which it can be simplified
3107 into "{(type)inner_base, (type)inner_step}_loop". The condition is
3108 the maximum number that inner iv can iterate. */
3109
3110 static tree
3111 derive_simple_iv_with_niters (tree ev, tree *niters)
3112 {
3113 if (!CONVERT_EXPR_P (ev))
3114 return ev;
3115
3116 tree inner_ev = TREE_OPERAND (ev, 0);
3117 if (TREE_CODE (inner_ev) != POLYNOMIAL_CHREC)
3118 return ev;
3119
3120 tree init = CHREC_LEFT (inner_ev);
3121 tree step = CHREC_RIGHT (inner_ev);
3122 if (TREE_CODE (init) != INTEGER_CST
3123 || TREE_CODE (step) != INTEGER_CST || integer_zerop (step))
3124 return ev;
3125
3126 tree type = TREE_TYPE (ev);
3127 tree inner_type = TREE_TYPE (inner_ev);
3128 if (TYPE_PRECISION (inner_type) >= TYPE_PRECISION (type))
3129 return ev;
3130
3131 /* Type conversion in "(type) {inner_base, inner_step}_loop" can be
3132 folded only if inner iv won't overflow. We compute the maximum
3133 number the inner iv can iterate before overflowing and return the
3134 simplified affine iv. */
3135 tree delta;
3136 init = fold_convert (type, init);
3137 step = fold_convert (type, step);
3138 ev = build_polynomial_chrec (CHREC_VARIABLE (inner_ev), init, step);
3139 if (tree_int_cst_sign_bit (step))
3140 {
3141 tree bound = lower_bound_in_type (inner_type, inner_type);
3142 delta = fold_build2 (MINUS_EXPR, type, init, fold_convert (type, bound));
3143 step = fold_build1 (NEGATE_EXPR, type, step);
3144 }
3145 else
3146 {
3147 tree bound = upper_bound_in_type (inner_type, inner_type);
3148 delta = fold_build2 (MINUS_EXPR, type, fold_convert (type, bound), init);
3149 }
3150 *niters = fold_build2 (FLOOR_DIV_EXPR, type, delta, step);
3151 return ev;
3152 }
3153
3154 /* Checks whether use of OP in USE_LOOP behaves as a simple affine iv with
3155 respect to WRTO_LOOP and returns its base and step in IV if possible
3156 (see analyze_scalar_evolution_in_loop for more details on USE_LOOP
3157 and WRTO_LOOP). If ALLOW_NONCONSTANT_STEP is true, we want step to be
3158 invariant in LOOP. Otherwise we require it to be an integer constant.
3159
3160 IV->no_overflow is set to true if we are sure the iv cannot overflow (e.g.
3161 because it is computed in signed arithmetics). Consequently, adding an
3162 induction variable
3163
3164 for (i = IV->base; ; i += IV->step)
3165
3166 is only safe if IV->no_overflow is false, or TYPE_OVERFLOW_UNDEFINED is
3167 false for the type of the induction variable, or you can prove that i does
3168 not wrap by some other argument. Otherwise, this might introduce undefined
3169 behavior, and
3170
3171 i = iv->base;
3172 for (; ; i = (type) ((unsigned type) i + (unsigned type) iv->step))
3173
3174 must be used instead.
3175
3176 When IV_NITERS is not NULL, this function also checks case in which OP
3177 is a conversion of an inner simple iv of below form:
3178
3179 (outer_type){inner_base, inner_step}_loop.
3180
3181 If type of inner iv has smaller precision than outer_type, it can't be
3182 folded into {(outer_type)inner_base, (outer_type)inner_step}_loop because
3183 the inner iv could overflow/wrap. In this case, we derive a condition
3184 under which the inner iv won't overflow/wrap and do the simplification.
3185 The derived condition normally is the maximum number the inner iv can
3186 iterate, and will be stored in IV_NITERS. This is useful in loop niter
3187 analysis, to derive break conditions when a loop must terminate, when is
3188 infinite. */
3189
3190 bool
3191 simple_iv_with_niters (class loop *wrto_loop, class loop *use_loop,
3192 tree op, affine_iv *iv, tree *iv_niters,
3193 bool allow_nonconstant_step)
3194 {
3195 enum tree_code code;
3196 tree type, ev, base, e;
3197 wide_int extreme;
3198 bool folded_casts;
3199
3200 iv->base = NULL_TREE;
3201 iv->step = NULL_TREE;
3202 iv->no_overflow = false;
3203
3204 type = TREE_TYPE (op);
3205 if (!POINTER_TYPE_P (type)
3206 && !INTEGRAL_TYPE_P (type))
3207 return false;
3208
3209 ev = analyze_scalar_evolution_in_loop (wrto_loop, use_loop, op,
3210 &folded_casts);
3211 if (chrec_contains_undetermined (ev)
3212 || chrec_contains_symbols_defined_in_loop (ev, wrto_loop->num))
3213 return false;
3214
3215 if (tree_does_not_contain_chrecs (ev))
3216 {
3217 iv->base = ev;
3218 iv->step = build_int_cst (TREE_TYPE (ev), 0);
3219 iv->no_overflow = true;
3220 return true;
3221 }
3222
3223 /* If we can derive valid scalar evolution with assumptions. */
3224 if (iv_niters && TREE_CODE (ev) != POLYNOMIAL_CHREC)
3225 ev = derive_simple_iv_with_niters (ev, iv_niters);
3226
3227 if (TREE_CODE (ev) != POLYNOMIAL_CHREC)
3228 return false;
3229
3230 if (CHREC_VARIABLE (ev) != (unsigned) wrto_loop->num)
3231 return false;
3232
3233 iv->step = CHREC_RIGHT (ev);
3234 if ((!allow_nonconstant_step && TREE_CODE (iv->step) != INTEGER_CST)
3235 || tree_contains_chrecs (iv->step, NULL))
3236 return false;
3237
3238 iv->base = CHREC_LEFT (ev);
3239 if (tree_contains_chrecs (iv->base, NULL))
3240 return false;
3241
3242 iv->no_overflow = !folded_casts && nowrap_type_p (type);
3243
3244 if (!iv->no_overflow
3245 && !iv_can_overflow_p (wrto_loop, type, iv->base, iv->step))
3246 iv->no_overflow = true;
3247
3248 /* Try to simplify iv base:
3249
3250 (signed T) ((unsigned T)base + step) ;; TREE_TYPE (base) == signed T
3251 == (signed T)(unsigned T)base + step
3252 == base + step
3253
3254 If we can prove operation (base + step) doesn't overflow or underflow.
3255 Specifically, we try to prove below conditions are satisfied:
3256
3257 base <= UPPER_BOUND (type) - step ;;step > 0
3258 base >= LOWER_BOUND (type) - step ;;step < 0
3259
3260 This is done by proving the reverse conditions are false using loop's
3261 initial conditions.
3262
3263 The is necessary to make loop niter, or iv overflow analysis easier
3264 for below example:
3265
3266 int foo (int *a, signed char s, signed char l)
3267 {
3268 signed char i;
3269 for (i = s; i < l; i++)
3270 a[i] = 0;
3271 return 0;
3272 }
3273
3274 Note variable I is firstly converted to type unsigned char, incremented,
3275 then converted back to type signed char. */
3276
3277 if (wrto_loop->num != use_loop->num)
3278 return true;
3279
3280 if (!CONVERT_EXPR_P (iv->base) || TREE_CODE (iv->step) != INTEGER_CST)
3281 return true;
3282
3283 type = TREE_TYPE (iv->base);
3284 e = TREE_OPERAND (iv->base, 0);
3285 if (TREE_CODE (e) != PLUS_EXPR
3286 || TREE_CODE (TREE_OPERAND (e, 1)) != INTEGER_CST
3287 || !tree_int_cst_equal (iv->step,
3288 fold_convert (type, TREE_OPERAND (e, 1))))
3289 return true;
3290 e = TREE_OPERAND (e, 0);
3291 if (!CONVERT_EXPR_P (e))
3292 return true;
3293 base = TREE_OPERAND (e, 0);
3294 if (!useless_type_conversion_p (type, TREE_TYPE (base)))
3295 return true;
3296
3297 if (tree_int_cst_sign_bit (iv->step))
3298 {
3299 code = LT_EXPR;
3300 extreme = wi::min_value (type);
3301 }
3302 else
3303 {
3304 code = GT_EXPR;
3305 extreme = wi::max_value (type);
3306 }
3307 wi::overflow_type overflow = wi::OVF_NONE;
3308 extreme = wi::sub (extreme, wi::to_wide (iv->step),
3309 TYPE_SIGN (type), &overflow);
3310 if (overflow)
3311 return true;
3312 e = fold_build2 (code, boolean_type_node, base,
3313 wide_int_to_tree (type, extreme));
3314 e = simplify_using_initial_conditions (use_loop, e);
3315 if (!integer_zerop (e))
3316 return true;
3317
3318 if (POINTER_TYPE_P (TREE_TYPE (base)))
3319 code = POINTER_PLUS_EXPR;
3320 else
3321 code = PLUS_EXPR;
3322
3323 iv->base = fold_build2 (code, TREE_TYPE (base), base, iv->step);
3324 return true;
3325 }
3326
3327 /* Like simple_iv_with_niters, but return TRUE when OP behaves as a simple
3328 affine iv unconditionally. */
3329
3330 bool
3331 simple_iv (class loop *wrto_loop, class loop *use_loop, tree op,
3332 affine_iv *iv, bool allow_nonconstant_step)
3333 {
3334 return simple_iv_with_niters (wrto_loop, use_loop, op, iv,
3335 NULL, allow_nonconstant_step);
3336 }
3337
3338 /* Finalize the scalar evolution analysis. */
3339
3340 void
3341 scev_finalize (void)
3342 {
3343 if (!scalar_evolution_info)
3344 return;
3345 scalar_evolution_info->empty ();
3346 scalar_evolution_info = NULL;
3347 free_numbers_of_iterations_estimates (cfun);
3348 }
3349
3350 /* Returns true if the expression EXPR is considered to be too expensive
3351 for scev_const_prop. */
3352
3353 static bool
3354 expression_expensive_p (tree expr, hash_map<tree, uint64_t> &cache,
3355 uint64_t &cost)
3356 {
3357 enum tree_code code;
3358
3359 if (is_gimple_val (expr))
3360 return false;
3361
3362 code = TREE_CODE (expr);
3363 if (code == TRUNC_DIV_EXPR
3364 || code == CEIL_DIV_EXPR
3365 || code == FLOOR_DIV_EXPR
3366 || code == ROUND_DIV_EXPR
3367 || code == TRUNC_MOD_EXPR
3368 || code == CEIL_MOD_EXPR
3369 || code == FLOOR_MOD_EXPR
3370 || code == ROUND_MOD_EXPR
3371 || code == EXACT_DIV_EXPR)
3372 {
3373 /* Division by power of two is usually cheap, so we allow it.
3374 Forbid anything else. */
3375 if (!integer_pow2p (TREE_OPERAND (expr, 1)))
3376 return true;
3377 }
3378
3379 bool visited_p;
3380 uint64_t &local_cost = cache.get_or_insert (expr, &visited_p);
3381 if (visited_p)
3382 {
3383 uint64_t tem = cost + local_cost;
3384 if (tem < cost)
3385 return true;
3386 cost = tem;
3387 return false;
3388 }
3389 local_cost = 1;
3390
3391 uint64_t op_cost = 0;
3392 if (code == CALL_EXPR)
3393 {
3394 tree arg;
3395 call_expr_arg_iterator iter;
3396 /* Even though is_inexpensive_builtin might say true, we will get a
3397 library call for popcount when backend does not have an instruction
3398 to do so. We consider this to be expensive and generate
3399 __builtin_popcount only when backend defines it. */
3400 optab optab;
3401 combined_fn cfn = get_call_combined_fn (expr);
3402 switch (cfn)
3403 {
3404 CASE_CFN_POPCOUNT:
3405 optab = popcount_optab;
3406 goto bitcount_call;
3407 CASE_CFN_CLZ:
3408 optab = clz_optab;
3409 goto bitcount_call;
3410 CASE_CFN_CTZ:
3411 optab = ctz_optab;
3412 bitcount_call:
3413 /* Check if opcode for popcount is available in the mode required. */
3414 if (optab_handler (optab,
3415 TYPE_MODE (TREE_TYPE (CALL_EXPR_ARG (expr, 0))))
3416 == CODE_FOR_nothing)
3417 {
3418 machine_mode mode;
3419 mode = TYPE_MODE (TREE_TYPE (CALL_EXPR_ARG (expr, 0)));
3420 scalar_int_mode int_mode;
3421
3422 /* If the mode is of 2 * UNITS_PER_WORD size, we can handle
3423 double-word popcount by emitting two single-word popcount
3424 instructions. */
3425 if (is_a <scalar_int_mode> (mode, &int_mode)
3426 && GET_MODE_SIZE (int_mode) == 2 * UNITS_PER_WORD
3427 && (optab_handler (optab, word_mode)
3428 != CODE_FOR_nothing))
3429 break;
3430 return true;
3431 }
3432 break;
3433
3434 default:
3435 if (cfn == CFN_LAST
3436 || !is_inexpensive_builtin (get_callee_fndecl (expr)))
3437 return true;
3438 break;
3439 }
3440
3441 FOR_EACH_CALL_EXPR_ARG (arg, iter, expr)
3442 if (expression_expensive_p (arg, cache, op_cost))
3443 return true;
3444 *cache.get (expr) += op_cost;
3445 cost += op_cost + 1;
3446 return false;
3447 }
3448
3449 if (code == COND_EXPR)
3450 {
3451 if (expression_expensive_p (TREE_OPERAND (expr, 0), cache, op_cost)
3452 || (EXPR_P (TREE_OPERAND (expr, 1))
3453 && EXPR_P (TREE_OPERAND (expr, 2)))
3454 /* If either branch has side effects or could trap. */
3455 || TREE_SIDE_EFFECTS (TREE_OPERAND (expr, 1))
3456 || generic_expr_could_trap_p (TREE_OPERAND (expr, 1))
3457 || TREE_SIDE_EFFECTS (TREE_OPERAND (expr, 0))
3458 || generic_expr_could_trap_p (TREE_OPERAND (expr, 0))
3459 || expression_expensive_p (TREE_OPERAND (expr, 1),
3460 cache, op_cost)
3461 || expression_expensive_p (TREE_OPERAND (expr, 2),
3462 cache, op_cost))
3463 return true;
3464 *cache.get (expr) += op_cost;
3465 cost += op_cost + 1;
3466 return false;
3467 }
3468
3469 switch (TREE_CODE_CLASS (code))
3470 {
3471 case tcc_binary:
3472 case tcc_comparison:
3473 if (expression_expensive_p (TREE_OPERAND (expr, 1), cache, op_cost))
3474 return true;
3475
3476 /* Fallthru. */
3477 case tcc_unary:
3478 if (expression_expensive_p (TREE_OPERAND (expr, 0), cache, op_cost))
3479 return true;
3480 *cache.get (expr) += op_cost;
3481 cost += op_cost + 1;
3482 return false;
3483
3484 default:
3485 return true;
3486 }
3487 }
3488
3489 bool
3490 expression_expensive_p (tree expr)
3491 {
3492 hash_map<tree, uint64_t> cache;
3493 uint64_t expanded_size = 0;
3494 return (expression_expensive_p (expr, cache, expanded_size)
3495 || expanded_size > cache.elements ());
3496 }
3497
3498 /* Match.pd function to match bitwise inductive expression.
3499 .i.e.
3500 _2 = 1 << _1;
3501 _3 = ~_2;
3502 tmp_9 = _3 & tmp_12; */
3503 extern bool gimple_bitwise_induction_p (tree, tree *, tree (*)(tree));
3504
3505 /* Return the inductive expression of bitwise operation if possible,
3506 otherwise returns DEF. */
3507 static tree
3508 analyze_and_compute_bitwise_induction_effect (class loop* loop,
3509 tree phidef,
3510 unsigned HOST_WIDE_INT niter)
3511 {
3512 tree match_op[3],inv, bitwise_scev;
3513 tree type = TREE_TYPE (phidef);
3514 gphi* header_phi = NULL;
3515
3516 /* Match things like op2(MATCH_OP[2]), op1(MATCH_OP[1]), phidef(PHIDEF)
3517
3518 op2 = PHI <phidef, inv>
3519 _1 = (int) bit_17;
3520 _3 = 1 << _1;
3521 op1 = ~_3;
3522 phidef = op1 & op2; */
3523 if (!gimple_bitwise_induction_p (phidef, &match_op[0], NULL)
3524 || TREE_CODE (match_op[2]) != SSA_NAME
3525 || !(header_phi = dyn_cast <gphi *> (SSA_NAME_DEF_STMT (match_op[2])))
3526 || gimple_phi_num_args (header_phi) != 2)
3527 return NULL_TREE;
3528
3529 if (PHI_ARG_DEF_FROM_EDGE (header_phi, loop_latch_edge (loop)) != phidef)
3530 return NULL_TREE;
3531
3532 bitwise_scev = analyze_scalar_evolution (loop, match_op[1]);
3533 bitwise_scev = instantiate_parameters (loop, bitwise_scev);
3534
3535 /* Make sure bits is in range of type precision. */
3536 if (TREE_CODE (bitwise_scev) != POLYNOMIAL_CHREC
3537 || !INTEGRAL_TYPE_P (TREE_TYPE (bitwise_scev))
3538 || !tree_fits_uhwi_p (CHREC_LEFT (bitwise_scev))
3539 || tree_to_uhwi (CHREC_LEFT (bitwise_scev)) >= TYPE_PRECISION (type)
3540 || !tree_fits_shwi_p (CHREC_RIGHT (bitwise_scev)))
3541 return NULL_TREE;
3542
3543 enum bit_op_kind
3544 {
3545 INDUCTION_BIT_CLEAR,
3546 INDUCTION_BIT_IOR,
3547 INDUCTION_BIT_XOR,
3548 INDUCTION_BIT_RESET,
3549 INDUCTION_ZERO,
3550 INDUCTION_ALL
3551 };
3552
3553 enum bit_op_kind induction_kind;
3554 enum tree_code code1
3555 = gimple_assign_rhs_code (SSA_NAME_DEF_STMT (phidef));
3556 enum tree_code code2
3557 = gimple_assign_rhs_code (SSA_NAME_DEF_STMT (match_op[0]));
3558
3559 /* BIT_CLEAR: A &= ~(1 << bit)
3560 BIT_RESET: A ^= (1 << bit).
3561 BIT_IOR: A |= (1 << bit)
3562 BIT_ZERO: A &= (1 << bit)
3563 BIT_ALL: A |= ~(1 << bit)
3564 BIT_XOR: A ^= ~(1 << bit).
3565 bit is induction variable. */
3566 switch (code1)
3567 {
3568 case BIT_AND_EXPR:
3569 induction_kind = code2 == BIT_NOT_EXPR
3570 ? INDUCTION_BIT_CLEAR
3571 : INDUCTION_ZERO;
3572 break;
3573 case BIT_IOR_EXPR:
3574 induction_kind = code2 == BIT_NOT_EXPR
3575 ? INDUCTION_ALL
3576 : INDUCTION_BIT_IOR;
3577 break;
3578 case BIT_XOR_EXPR:
3579 induction_kind = code2 == BIT_NOT_EXPR
3580 ? INDUCTION_BIT_XOR
3581 : INDUCTION_BIT_RESET;
3582 break;
3583 /* A ^ ~(1 << bit) is equal to ~(A ^ (1 << bit)). */
3584 case BIT_NOT_EXPR:
3585 gcc_assert (code2 == BIT_XOR_EXPR);
3586 induction_kind = INDUCTION_BIT_XOR;
3587 break;
3588 default:
3589 gcc_unreachable ();
3590 }
3591
3592 if (induction_kind == INDUCTION_ZERO)
3593 return build_zero_cst (type);
3594 if (induction_kind == INDUCTION_ALL)
3595 return build_all_ones_cst (type);
3596
3597 wide_int bits = wi::zero (TYPE_PRECISION (type));
3598 HOST_WIDE_INT bit_start = tree_to_shwi (CHREC_LEFT (bitwise_scev));
3599 HOST_WIDE_INT step = tree_to_shwi (CHREC_RIGHT (bitwise_scev));
3600 HOST_WIDE_INT bit_final = bit_start + step * niter;
3601
3602 /* bit_start, bit_final in range of [0,TYPE_PRECISION)
3603 implies all bits are set in range. */
3604 if (bit_final >= TYPE_PRECISION (type)
3605 || bit_final < 0)
3606 return NULL_TREE;
3607
3608 /* Loop tripcount should be niter + 1. */
3609 for (unsigned i = 0; i != niter + 1; i++)
3610 {
3611 bits = wi::set_bit (bits, bit_start);
3612 bit_start += step;
3613 }
3614
3615 bool inverted = false;
3616 switch (induction_kind)
3617 {
3618 case INDUCTION_BIT_CLEAR:
3619 code1 = BIT_AND_EXPR;
3620 inverted = true;
3621 break;
3622 case INDUCTION_BIT_IOR:
3623 code1 = BIT_IOR_EXPR;
3624 break;
3625 case INDUCTION_BIT_RESET:
3626 code1 = BIT_XOR_EXPR;
3627 break;
3628 /* A ^= ~(1 << bit) is special, when loop tripcount is even,
3629 it's equal to A ^= bits, else A ^= ~bits. */
3630 case INDUCTION_BIT_XOR:
3631 code1 = BIT_XOR_EXPR;
3632 if (niter % 2 == 0)
3633 inverted = true;
3634 break;
3635 default:
3636 gcc_unreachable ();
3637 }
3638
3639 if (inverted)
3640 bits = wi::bit_not (bits);
3641
3642 inv = PHI_ARG_DEF_FROM_EDGE (header_phi, loop_preheader_edge (loop));
3643 return fold_build2 (code1, type, inv, wide_int_to_tree (type, bits));
3644 }
3645
3646 /* Match.pd function to match bitop with invariant expression
3647 .i.e.
3648 tmp_7 = _0 & _1; */
3649 extern bool gimple_bitop_with_inv_p (tree, tree *, tree (*)(tree));
3650
3651 /* Return the inductive expression of bitop with invariant if possible,
3652 otherwise returns DEF. */
3653 static tree
3654 analyze_and_compute_bitop_with_inv_effect (class loop* loop, tree phidef,
3655 tree niter)
3656 {
3657 tree match_op[2],inv;
3658 tree type = TREE_TYPE (phidef);
3659 gphi* header_phi = NULL;
3660 enum tree_code code;
3661 /* match thing like op0 (match[0]), op1 (match[1]), phidef (PHIDEF)
3662
3663 op1 = PHI <phidef, inv>
3664 phidef = op0 & op1
3665 if op0 is an invariant, it could change to
3666 phidef = op0 & inv. */
3667 gimple *def;
3668 def = SSA_NAME_DEF_STMT (phidef);
3669 if (!(is_gimple_assign (def)
3670 && ((code = gimple_assign_rhs_code (def)), true)
3671 && (code == BIT_AND_EXPR || code == BIT_IOR_EXPR
3672 || code == BIT_XOR_EXPR)))
3673 return NULL_TREE;
3674
3675 match_op[0] = gimple_assign_rhs1 (def);
3676 match_op[1] = gimple_assign_rhs2 (def);
3677
3678 if (TREE_CODE (match_op[1]) != SSA_NAME
3679 || !expr_invariant_in_loop_p (loop, match_op[0])
3680 || !(header_phi = dyn_cast <gphi *> (SSA_NAME_DEF_STMT (match_op[1])))
3681 || gimple_phi_num_args (header_phi) != 2)
3682 return NULL_TREE;
3683
3684 if (PHI_ARG_DEF_FROM_EDGE (header_phi, loop_latch_edge (loop)) != phidef)
3685 return NULL_TREE;
3686
3687 enum tree_code code1
3688 = gimple_assign_rhs_code (def);
3689
3690 if (code1 == BIT_XOR_EXPR)
3691 {
3692 if (!tree_fits_uhwi_p (niter))
3693 return NULL_TREE;
3694 unsigned HOST_WIDE_INT niter_num;
3695 niter_num = tree_to_uhwi (niter);
3696 if (niter_num % 2 != 0)
3697 match_op[0] = build_zero_cst (type);
3698 }
3699
3700 inv = PHI_ARG_DEF_FROM_EDGE (header_phi, loop_preheader_edge (loop));
3701 return fold_build2 (code1, type, inv, match_op[0]);
3702 }
3703
3704 /* Do final value replacement for LOOP, return true if we did anything. */
3705
3706 bool
3707 final_value_replacement_loop (class loop *loop)
3708 {
3709 /* If we do not know exact number of iterations of the loop, we cannot
3710 replace the final value. */
3711 edge exit = single_exit (loop);
3712 if (!exit)
3713 return false;
3714
3715 tree niter = number_of_latch_executions (loop);
3716 if (niter == chrec_dont_know)
3717 return false;
3718
3719 /* Ensure that it is possible to insert new statements somewhere. */
3720 if (!single_pred_p (exit->dest))
3721 split_loop_exit_edge (exit);
3722
3723 /* Set stmt insertion pointer. All stmts are inserted before this point. */
3724 gimple_stmt_iterator gsi = gsi_after_labels (exit->dest);
3725
3726 class loop *ex_loop
3727 = superloop_at_depth (loop,
3728 loop_depth (exit->dest->loop_father) + 1);
3729
3730 bool any = false;
3731 gphi_iterator psi;
3732 for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); )
3733 {
3734 gphi *phi = psi.phi ();
3735 tree rslt = PHI_RESULT (phi);
3736 tree phidef = PHI_ARG_DEF_FROM_EDGE (phi, exit);
3737 tree def = phidef;
3738 if (virtual_operand_p (def))
3739 {
3740 gsi_next (&psi);
3741 continue;
3742 }
3743
3744 if (!POINTER_TYPE_P (TREE_TYPE (def))
3745 && !INTEGRAL_TYPE_P (TREE_TYPE (def)))
3746 {
3747 gsi_next (&psi);
3748 continue;
3749 }
3750
3751 bool folded_casts;
3752 def = analyze_scalar_evolution_in_loop (ex_loop, loop, def,
3753 &folded_casts);
3754
3755 tree bitinv_def, bit_def;
3756 unsigned HOST_WIDE_INT niter_num;
3757
3758 if (def != chrec_dont_know)
3759 def = compute_overall_effect_of_inner_loop (ex_loop, def);
3760
3761 /* Handle bitop with invariant induction expression.
3762
3763 .i.e
3764 for (int i =0 ;i < 32; i++)
3765 tmp &= bit2;
3766 if bit2 is an invariant in loop which could simple to
3767 tmp &= bit2. */
3768 else if ((bitinv_def
3769 = analyze_and_compute_bitop_with_inv_effect (loop,
3770 phidef, niter)))
3771 def = bitinv_def;
3772
3773 /* Handle bitwise induction expression.
3774
3775 .i.e.
3776 for (int i = 0; i != 64; i+=3)
3777 res &= ~(1UL << i);
3778
3779 RES can't be analyzed out by SCEV because it is not polynomially
3780 expressible, but in fact final value of RES can be replaced by
3781 RES & CONSTANT where CONSTANT all ones with bit {0,3,6,9,... ,63}
3782 being cleared, similar for BIT_IOR_EXPR/BIT_XOR_EXPR. */
3783 else if (tree_fits_uhwi_p (niter)
3784 && (niter_num = tree_to_uhwi (niter)) != 0
3785 && niter_num < TYPE_PRECISION (TREE_TYPE (phidef))
3786 && (bit_def
3787 = analyze_and_compute_bitwise_induction_effect (loop,
3788 phidef,
3789 niter_num)))
3790 def = bit_def;
3791
3792 if (!tree_does_not_contain_chrecs (def)
3793 || chrec_contains_symbols_defined_in_loop (def, ex_loop->num)
3794 /* Moving the computation from the loop may prolong life range
3795 of some ssa names, which may cause problems if they appear
3796 on abnormal edges. */
3797 || contains_abnormal_ssa_name_p (def)
3798 /* Do not emit expensive expressions. The rationale is that
3799 when someone writes a code like
3800
3801 while (n > 45) n -= 45;
3802
3803 he probably knows that n is not large, and does not want it
3804 to be turned into n %= 45. */
3805 || expression_expensive_p (def))
3806 {
3807 if (dump_file && (dump_flags & TDF_DETAILS))
3808 {
3809 fprintf (dump_file, "not replacing:\n ");
3810 print_gimple_stmt (dump_file, phi, 0);
3811 fprintf (dump_file, "\n");
3812 }
3813 gsi_next (&psi);
3814 continue;
3815 }
3816
3817 /* Eliminate the PHI node and replace it by a computation outside
3818 the loop. */
3819 if (dump_file)
3820 {
3821 fprintf (dump_file, "\nfinal value replacement:\n ");
3822 print_gimple_stmt (dump_file, phi, 0);
3823 fprintf (dump_file, " with expr: ");
3824 print_generic_expr (dump_file, def);
3825 }
3826 any = true;
3827 def = unshare_expr (def);
3828 remove_phi_node (&psi, false);
3829
3830 /* If def's type has undefined overflow and there were folded
3831 casts, rewrite all stmts added for def into arithmetics
3832 with defined overflow behavior. */
3833 if (folded_casts && ANY_INTEGRAL_TYPE_P (TREE_TYPE (def))
3834 && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (def)))
3835 {
3836 gimple_seq stmts;
3837 gimple_stmt_iterator gsi2;
3838 def = force_gimple_operand (def, &stmts, true, NULL_TREE);
3839 gsi2 = gsi_start (stmts);
3840 while (!gsi_end_p (gsi2))
3841 {
3842 gimple *stmt = gsi_stmt (gsi2);
3843 gimple_stmt_iterator gsi3 = gsi2;
3844 gsi_next (&gsi2);
3845 gsi_remove (&gsi3, false);
3846 if (is_gimple_assign (stmt)
3847 && arith_code_with_undefined_signed_overflow
3848 (gimple_assign_rhs_code (stmt)))
3849 gsi_insert_seq_before (&gsi,
3850 rewrite_to_defined_overflow (stmt),
3851 GSI_SAME_STMT);
3852 else
3853 gsi_insert_before (&gsi, stmt, GSI_SAME_STMT);
3854 }
3855 }
3856 else
3857 def = force_gimple_operand_gsi (&gsi, def, false, NULL_TREE,
3858 true, GSI_SAME_STMT);
3859
3860 gassign *ass = gimple_build_assign (rslt, def);
3861 gimple_set_location (ass,
3862 gimple_phi_arg_location (phi, exit->dest_idx));
3863 gsi_insert_before (&gsi, ass, GSI_SAME_STMT);
3864 if (dump_file)
3865 {
3866 fprintf (dump_file, "\n final stmt:\n ");
3867 print_gimple_stmt (dump_file, ass, 0);
3868 fprintf (dump_file, "\n");
3869 }
3870 }
3871
3872 return any;
3873 }
3874
3875 #include "gt-tree-scalar-evolution.h"