]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/tree-ssa-loop-ivopts.c
re PR tree-optimization/90240 (ICE in try_improve_iv_set, at tree-ssa-loop-ivopts...
[thirdparty/gcc.git] / gcc / tree-ssa-loop-ivopts.c
1 /* Induction variable optimizations.
2 Copyright (C) 2003-2019 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
10
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
19
20 /* This pass tries to find the optimal set of induction variables for the loop.
21 It optimizes just the basic linear induction variables (although adding
22 support for other types should not be too hard). It includes the
23 optimizations commonly known as strength reduction, induction variable
24 coalescing and induction variable elimination. It does it in the
25 following steps:
26
27 1) The interesting uses of induction variables are found. This includes
28
29 -- uses of induction variables in non-linear expressions
30 -- addresses of arrays
31 -- comparisons of induction variables
32
33 Note the interesting uses are categorized and handled in group.
34 Generally, address type uses are grouped together if their iv bases
35 are different in constant offset.
36
37 2) Candidates for the induction variables are found. This includes
38
39 -- old induction variables
40 -- the variables defined by expressions derived from the "interesting
41 groups/uses" above
42
43 3) The optimal (w.r. to a cost function) set of variables is chosen. The
44 cost function assigns a cost to sets of induction variables and consists
45 of three parts:
46
47 -- The group/use costs. Each of the interesting groups/uses chooses
48 the best induction variable in the set and adds its cost to the sum.
49 The cost reflects the time spent on modifying the induction variables
50 value to be usable for the given purpose (adding base and offset for
51 arrays, etc.).
52 -- The variable costs. Each of the variables has a cost assigned that
53 reflects the costs associated with incrementing the value of the
54 variable. The original variables are somewhat preferred.
55 -- The set cost. Depending on the size of the set, extra cost may be
56 added to reflect register pressure.
57
58 All the costs are defined in a machine-specific way, using the target
59 hooks and machine descriptions to determine them.
60
61 4) The trees are transformed to use the new variables, the dead code is
62 removed.
63
64 All of this is done loop by loop. Doing it globally is theoretically
65 possible, it might give a better performance and it might enable us
66 to decide costs more precisely, but getting all the interactions right
67 would be complicated. */
68
69 #include "config.h"
70 #include "system.h"
71 #include "coretypes.h"
72 #include "backend.h"
73 #include "rtl.h"
74 #include "tree.h"
75 #include "gimple.h"
76 #include "cfghooks.h"
77 #include "tree-pass.h"
78 #include "memmodel.h"
79 #include "tm_p.h"
80 #include "ssa.h"
81 #include "expmed.h"
82 #include "insn-config.h"
83 #include "emit-rtl.h"
84 #include "recog.h"
85 #include "cgraph.h"
86 #include "gimple-pretty-print.h"
87 #include "alias.h"
88 #include "fold-const.h"
89 #include "stor-layout.h"
90 #include "tree-eh.h"
91 #include "gimplify.h"
92 #include "gimple-iterator.h"
93 #include "gimplify-me.h"
94 #include "tree-cfg.h"
95 #include "tree-ssa-loop-ivopts.h"
96 #include "tree-ssa-loop-manip.h"
97 #include "tree-ssa-loop-niter.h"
98 #include "tree-ssa-loop.h"
99 #include "explow.h"
100 #include "expr.h"
101 #include "tree-dfa.h"
102 #include "tree-ssa.h"
103 #include "cfgloop.h"
104 #include "tree-scalar-evolution.h"
105 #include "params.h"
106 #include "tree-affine.h"
107 #include "tree-ssa-propagate.h"
108 #include "tree-ssa-address.h"
109 #include "builtins.h"
110 #include "tree-vectorizer.h"
111
112 /* FIXME: Expressions are expanded to RTL in this pass to determine the
113 cost of different addressing modes. This should be moved to a TBD
114 interface between the GIMPLE and RTL worlds. */
115
116 /* The infinite cost. */
117 #define INFTY 10000000
118
119 /* Returns the expected number of loop iterations for LOOP.
120 The average trip count is computed from profile data if it
121 exists. */
122
123 static inline HOST_WIDE_INT
124 avg_loop_niter (struct loop *loop)
125 {
126 HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
127 if (niter == -1)
128 {
129 niter = likely_max_stmt_executions_int (loop);
130
131 if (niter == -1 || niter > PARAM_VALUE (PARAM_AVG_LOOP_NITER))
132 return PARAM_VALUE (PARAM_AVG_LOOP_NITER);
133 }
134
135 return niter;
136 }
137
138 struct iv_use;
139
140 /* Representation of the induction variable. */
141 struct iv
142 {
143 tree base; /* Initial value of the iv. */
144 tree base_object; /* A memory object to that the induction variable points. */
145 tree step; /* Step of the iv (constant only). */
146 tree ssa_name; /* The ssa name with the value. */
147 struct iv_use *nonlin_use; /* The identifier in the use if it is the case. */
148 bool biv_p; /* Is it a biv? */
149 bool no_overflow; /* True if the iv doesn't overflow. */
150 bool have_address_use;/* For biv, indicate if it's used in any address
151 type use. */
152 };
153
154 /* Per-ssa version information (induction variable descriptions, etc.). */
155 struct version_info
156 {
157 tree name; /* The ssa name. */
158 struct iv *iv; /* Induction variable description. */
159 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
160 an expression that is not an induction variable. */
161 bool preserve_biv; /* For the original biv, whether to preserve it. */
162 unsigned inv_id; /* Id of an invariant. */
163 };
164
165 /* Types of uses. */
166 enum use_type
167 {
168 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
169 USE_REF_ADDRESS, /* Use is an address for an explicit memory
170 reference. */
171 USE_PTR_ADDRESS, /* Use is a pointer argument to a function in
172 cases where the expansion of the function
173 will turn the argument into a normal address. */
174 USE_COMPARE /* Use is a compare. */
175 };
176
177 /* Cost of a computation. */
178 struct comp_cost
179 {
180 comp_cost (): cost (0), complexity (0), scratch (0)
181 {}
182
183 comp_cost (int cost, unsigned complexity, int scratch = 0)
184 : cost (cost), complexity (complexity), scratch (scratch)
185 {}
186
187 /* Returns true if COST is infinite. */
188 bool infinite_cost_p ();
189
190 /* Adds costs COST1 and COST2. */
191 friend comp_cost operator+ (comp_cost cost1, comp_cost cost2);
192
193 /* Adds COST to the comp_cost. */
194 comp_cost operator+= (comp_cost cost);
195
196 /* Adds constant C to this comp_cost. */
197 comp_cost operator+= (HOST_WIDE_INT c);
198
199 /* Subtracts constant C to this comp_cost. */
200 comp_cost operator-= (HOST_WIDE_INT c);
201
202 /* Divide the comp_cost by constant C. */
203 comp_cost operator/= (HOST_WIDE_INT c);
204
205 /* Multiply the comp_cost by constant C. */
206 comp_cost operator*= (HOST_WIDE_INT c);
207
208 /* Subtracts costs COST1 and COST2. */
209 friend comp_cost operator- (comp_cost cost1, comp_cost cost2);
210
211 /* Subtracts COST from this comp_cost. */
212 comp_cost operator-= (comp_cost cost);
213
214 /* Returns true if COST1 is smaller than COST2. */
215 friend bool operator< (comp_cost cost1, comp_cost cost2);
216
217 /* Returns true if COST1 and COST2 are equal. */
218 friend bool operator== (comp_cost cost1, comp_cost cost2);
219
220 /* Returns true if COST1 is smaller or equal than COST2. */
221 friend bool operator<= (comp_cost cost1, comp_cost cost2);
222
223 int cost; /* The runtime cost. */
224 unsigned complexity; /* The estimate of the complexity of the code for
225 the computation (in no concrete units --
226 complexity field should be larger for more
227 complex expressions and addressing modes). */
228 int scratch; /* Scratch used during cost computation. */
229 };
230
231 static const comp_cost no_cost;
232 static const comp_cost infinite_cost (INFTY, INFTY, INFTY);
233
234 bool
235 comp_cost::infinite_cost_p ()
236 {
237 return cost == INFTY;
238 }
239
240 comp_cost
241 operator+ (comp_cost cost1, comp_cost cost2)
242 {
243 if (cost1.infinite_cost_p () || cost2.infinite_cost_p ())
244 return infinite_cost;
245
246 cost1.cost += cost2.cost;
247 cost1.complexity += cost2.complexity;
248
249 return cost1;
250 }
251
252 comp_cost
253 operator- (comp_cost cost1, comp_cost cost2)
254 {
255 if (cost1.infinite_cost_p ())
256 return infinite_cost;
257
258 gcc_assert (!cost2.infinite_cost_p ());
259
260 cost1.cost -= cost2.cost;
261 cost1.complexity -= cost2.complexity;
262
263 return cost1;
264 }
265
266 comp_cost
267 comp_cost::operator+= (comp_cost cost)
268 {
269 *this = *this + cost;
270 return *this;
271 }
272
273 comp_cost
274 comp_cost::operator+= (HOST_WIDE_INT c)
275 {
276 if (infinite_cost_p ())
277 return *this;
278
279 this->cost += c;
280
281 return *this;
282 }
283
284 comp_cost
285 comp_cost::operator-= (HOST_WIDE_INT c)
286 {
287 if (infinite_cost_p ())
288 return *this;
289
290 this->cost -= c;
291
292 return *this;
293 }
294
295 comp_cost
296 comp_cost::operator/= (HOST_WIDE_INT c)
297 {
298 if (infinite_cost_p ())
299 return *this;
300
301 this->cost /= c;
302
303 return *this;
304 }
305
306 comp_cost
307 comp_cost::operator*= (HOST_WIDE_INT c)
308 {
309 if (infinite_cost_p ())
310 return *this;
311
312 this->cost *= c;
313
314 return *this;
315 }
316
317 comp_cost
318 comp_cost::operator-= (comp_cost cost)
319 {
320 *this = *this - cost;
321 return *this;
322 }
323
324 bool
325 operator< (comp_cost cost1, comp_cost cost2)
326 {
327 if (cost1.cost == cost2.cost)
328 return cost1.complexity < cost2.complexity;
329
330 return cost1.cost < cost2.cost;
331 }
332
333 bool
334 operator== (comp_cost cost1, comp_cost cost2)
335 {
336 return cost1.cost == cost2.cost
337 && cost1.complexity == cost2.complexity;
338 }
339
340 bool
341 operator<= (comp_cost cost1, comp_cost cost2)
342 {
343 return cost1 < cost2 || cost1 == cost2;
344 }
345
346 struct iv_inv_expr_ent;
347
348 /* The candidate - cost pair. */
349 struct cost_pair
350 {
351 struct iv_cand *cand; /* The candidate. */
352 comp_cost cost; /* The cost. */
353 enum tree_code comp; /* For iv elimination, the comparison. */
354 bitmap inv_vars; /* The list of invariant ssa_vars that have to be
355 preserved when representing iv_use with iv_cand. */
356 bitmap inv_exprs; /* The list of newly created invariant expressions
357 when representing iv_use with iv_cand. */
358 tree value; /* For final value elimination, the expression for
359 the final value of the iv. For iv elimination,
360 the new bound to compare with. */
361 };
362
363 /* Use. */
364 struct iv_use
365 {
366 unsigned id; /* The id of the use. */
367 unsigned group_id; /* The group id the use belongs to. */
368 enum use_type type; /* Type of the use. */
369 tree mem_type; /* The memory type to use when testing whether an
370 address is legitimate, and what the address's
371 cost is. */
372 struct iv *iv; /* The induction variable it is based on. */
373 gimple *stmt; /* Statement in that it occurs. */
374 tree *op_p; /* The place where it occurs. */
375
376 tree addr_base; /* Base address with const offset stripped. */
377 poly_uint64_pod addr_offset;
378 /* Const offset stripped from base address. */
379 };
380
381 /* Group of uses. */
382 struct iv_group
383 {
384 /* The id of the group. */
385 unsigned id;
386 /* Uses of the group are of the same type. */
387 enum use_type type;
388 /* The set of "related" IV candidates, plus the important ones. */
389 bitmap related_cands;
390 /* Number of IV candidates in the cost_map. */
391 unsigned n_map_members;
392 /* The costs wrto the iv candidates. */
393 struct cost_pair *cost_map;
394 /* The selected candidate for the group. */
395 struct iv_cand *selected;
396 /* Uses in the group. */
397 vec<struct iv_use *> vuses;
398 };
399
400 /* The position where the iv is computed. */
401 enum iv_position
402 {
403 IP_NORMAL, /* At the end, just before the exit condition. */
404 IP_END, /* At the end of the latch block. */
405 IP_BEFORE_USE, /* Immediately before a specific use. */
406 IP_AFTER_USE, /* Immediately after a specific use. */
407 IP_ORIGINAL /* The original biv. */
408 };
409
410 /* The induction variable candidate. */
411 struct iv_cand
412 {
413 unsigned id; /* The number of the candidate. */
414 bool important; /* Whether this is an "important" candidate, i.e. such
415 that it should be considered by all uses. */
416 ENUM_BITFIELD(iv_position) pos : 8; /* Where it is computed. */
417 gimple *incremented_at;/* For original biv, the statement where it is
418 incremented. */
419 tree var_before; /* The variable used for it before increment. */
420 tree var_after; /* The variable used for it after increment. */
421 struct iv *iv; /* The value of the candidate. NULL for
422 "pseudocandidate" used to indicate the possibility
423 to replace the final value of an iv by direct
424 computation of the value. */
425 unsigned cost; /* Cost of the candidate. */
426 unsigned cost_step; /* Cost of the candidate's increment operation. */
427 struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
428 where it is incremented. */
429 bitmap inv_vars; /* The list of invariant ssa_vars used in step of the
430 iv_cand. */
431 bitmap inv_exprs; /* If step is more complicated than a single ssa_var,
432 hanlde it as a new invariant expression which will
433 be hoisted out of loop. */
434 struct iv *orig_iv; /* The original iv if this cand is added from biv with
435 smaller type. */
436 };
437
438 /* Hashtable entry for common candidate derived from iv uses. */
439 struct iv_common_cand
440 {
441 tree base;
442 tree step;
443 /* IV uses from which this common candidate is derived. */
444 auto_vec<struct iv_use *> uses;
445 hashval_t hash;
446 };
447
448 /* Hashtable helpers. */
449
450 struct iv_common_cand_hasher : delete_ptr_hash <iv_common_cand>
451 {
452 static inline hashval_t hash (const iv_common_cand *);
453 static inline bool equal (const iv_common_cand *, const iv_common_cand *);
454 };
455
456 /* Hash function for possible common candidates. */
457
458 inline hashval_t
459 iv_common_cand_hasher::hash (const iv_common_cand *ccand)
460 {
461 return ccand->hash;
462 }
463
464 /* Hash table equality function for common candidates. */
465
466 inline bool
467 iv_common_cand_hasher::equal (const iv_common_cand *ccand1,
468 const iv_common_cand *ccand2)
469 {
470 return (ccand1->hash == ccand2->hash
471 && operand_equal_p (ccand1->base, ccand2->base, 0)
472 && operand_equal_p (ccand1->step, ccand2->step, 0)
473 && (TYPE_PRECISION (TREE_TYPE (ccand1->base))
474 == TYPE_PRECISION (TREE_TYPE (ccand2->base))));
475 }
476
477 /* Loop invariant expression hashtable entry. */
478
479 struct iv_inv_expr_ent
480 {
481 /* Tree expression of the entry. */
482 tree expr;
483 /* Unique indentifier. */
484 int id;
485 /* Hash value. */
486 hashval_t hash;
487 };
488
489 /* Sort iv_inv_expr_ent pair A and B by id field. */
490
491 static int
492 sort_iv_inv_expr_ent (const void *a, const void *b)
493 {
494 const iv_inv_expr_ent * const *e1 = (const iv_inv_expr_ent * const *) (a);
495 const iv_inv_expr_ent * const *e2 = (const iv_inv_expr_ent * const *) (b);
496
497 unsigned id1 = (*e1)->id;
498 unsigned id2 = (*e2)->id;
499
500 if (id1 < id2)
501 return -1;
502 else if (id1 > id2)
503 return 1;
504 else
505 return 0;
506 }
507
508 /* Hashtable helpers. */
509
510 struct iv_inv_expr_hasher : free_ptr_hash <iv_inv_expr_ent>
511 {
512 static inline hashval_t hash (const iv_inv_expr_ent *);
513 static inline bool equal (const iv_inv_expr_ent *, const iv_inv_expr_ent *);
514 };
515
516 /* Return true if uses of type TYPE represent some form of address. */
517
518 inline bool
519 address_p (use_type type)
520 {
521 return type == USE_REF_ADDRESS || type == USE_PTR_ADDRESS;
522 }
523
524 /* Hash function for loop invariant expressions. */
525
526 inline hashval_t
527 iv_inv_expr_hasher::hash (const iv_inv_expr_ent *expr)
528 {
529 return expr->hash;
530 }
531
532 /* Hash table equality function for expressions. */
533
534 inline bool
535 iv_inv_expr_hasher::equal (const iv_inv_expr_ent *expr1,
536 const iv_inv_expr_ent *expr2)
537 {
538 return expr1->hash == expr2->hash
539 && operand_equal_p (expr1->expr, expr2->expr, 0);
540 }
541
542 struct ivopts_data
543 {
544 /* The currently optimized loop. */
545 struct loop *current_loop;
546 location_t loop_loc;
547
548 /* Numbers of iterations for all exits of the current loop. */
549 hash_map<edge, tree_niter_desc *> *niters;
550
551 /* Number of registers used in it. */
552 unsigned regs_used;
553
554 /* The size of version_info array allocated. */
555 unsigned version_info_size;
556
557 /* The array of information for the ssa names. */
558 struct version_info *version_info;
559
560 /* The hashtable of loop invariant expressions created
561 by ivopt. */
562 hash_table<iv_inv_expr_hasher> *inv_expr_tab;
563
564 /* The bitmap of indices in version_info whose value was changed. */
565 bitmap relevant;
566
567 /* The uses of induction variables. */
568 vec<iv_group *> vgroups;
569
570 /* The candidates. */
571 vec<iv_cand *> vcands;
572
573 /* A bitmap of important candidates. */
574 bitmap important_candidates;
575
576 /* Cache used by tree_to_aff_combination_expand. */
577 hash_map<tree, name_expansion *> *name_expansion_cache;
578
579 /* The hashtable of common candidates derived from iv uses. */
580 hash_table<iv_common_cand_hasher> *iv_common_cand_tab;
581
582 /* The common candidates. */
583 vec<iv_common_cand *> iv_common_cands;
584
585 /* The maximum invariant variable id. */
586 unsigned max_inv_var_id;
587
588 /* The maximum invariant expression id. */
589 unsigned max_inv_expr_id;
590
591 /* Number of no_overflow BIVs which are not used in memory address. */
592 unsigned bivs_not_used_in_addr;
593
594 /* Obstack for iv structure. */
595 struct obstack iv_obstack;
596
597 /* Whether to consider just related and important candidates when replacing a
598 use. */
599 bool consider_all_candidates;
600
601 /* Are we optimizing for speed? */
602 bool speed;
603
604 /* Whether the loop body includes any function calls. */
605 bool body_includes_call;
606
607 /* Whether the loop body can only be exited via single exit. */
608 bool loop_single_exit_p;
609 };
610
611 /* An assignment of iv candidates to uses. */
612
613 struct iv_ca
614 {
615 /* The number of uses covered by the assignment. */
616 unsigned upto;
617
618 /* Number of uses that cannot be expressed by the candidates in the set. */
619 unsigned bad_groups;
620
621 /* Candidate assigned to a use, together with the related costs. */
622 struct cost_pair **cand_for_group;
623
624 /* Number of times each candidate is used. */
625 unsigned *n_cand_uses;
626
627 /* The candidates used. */
628 bitmap cands;
629
630 /* The number of candidates in the set. */
631 unsigned n_cands;
632
633 /* The number of invariants needed, including both invariant variants and
634 invariant expressions. */
635 unsigned n_invs;
636
637 /* Total cost of expressing uses. */
638 comp_cost cand_use_cost;
639
640 /* Total cost of candidates. */
641 unsigned cand_cost;
642
643 /* Number of times each invariant variable is used. */
644 unsigned *n_inv_var_uses;
645
646 /* Number of times each invariant expression is used. */
647 unsigned *n_inv_expr_uses;
648
649 /* Total cost of the assignment. */
650 comp_cost cost;
651 };
652
653 /* Difference of two iv candidate assignments. */
654
655 struct iv_ca_delta
656 {
657 /* Changed group. */
658 struct iv_group *group;
659
660 /* An old assignment (for rollback purposes). */
661 struct cost_pair *old_cp;
662
663 /* A new assignment. */
664 struct cost_pair *new_cp;
665
666 /* Next change in the list. */
667 struct iv_ca_delta *next;
668 };
669
670 /* Bound on number of candidates below that all candidates are considered. */
671
672 #define CONSIDER_ALL_CANDIDATES_BOUND \
673 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
674
675 /* If there are more iv occurrences, we just give up (it is quite unlikely that
676 optimizing such a loop would help, and it would take ages). */
677
678 #define MAX_CONSIDERED_GROUPS \
679 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
680
681 /* If there are at most this number of ivs in the set, try removing unnecessary
682 ivs from the set always. */
683
684 #define ALWAYS_PRUNE_CAND_SET_BOUND \
685 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
686
687 /* The list of trees for that the decl_rtl field must be reset is stored
688 here. */
689
690 static vec<tree> decl_rtl_to_reset;
691
692 static comp_cost force_expr_to_var_cost (tree, bool);
693
694 /* The single loop exit if it dominates the latch, NULL otherwise. */
695
696 edge
697 single_dom_exit (struct loop *loop)
698 {
699 edge exit = single_exit (loop);
700
701 if (!exit)
702 return NULL;
703
704 if (!just_once_each_iteration_p (loop, exit->src))
705 return NULL;
706
707 return exit;
708 }
709
710 /* Dumps information about the induction variable IV to FILE. Don't dump
711 variable's name if DUMP_NAME is FALSE. The information is dumped with
712 preceding spaces indicated by INDENT_LEVEL. */
713
714 void
715 dump_iv (FILE *file, struct iv *iv, bool dump_name, unsigned indent_level)
716 {
717 const char *p;
718 const char spaces[9] = {' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '\0'};
719
720 if (indent_level > 4)
721 indent_level = 4;
722 p = spaces + 8 - (indent_level << 1);
723
724 fprintf (file, "%sIV struct:\n", p);
725 if (iv->ssa_name && dump_name)
726 {
727 fprintf (file, "%s SSA_NAME:\t", p);
728 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
729 fprintf (file, "\n");
730 }
731
732 fprintf (file, "%s Type:\t", p);
733 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
734 fprintf (file, "\n");
735
736 fprintf (file, "%s Base:\t", p);
737 print_generic_expr (file, iv->base, TDF_SLIM);
738 fprintf (file, "\n");
739
740 fprintf (file, "%s Step:\t", p);
741 print_generic_expr (file, iv->step, TDF_SLIM);
742 fprintf (file, "\n");
743
744 if (iv->base_object)
745 {
746 fprintf (file, "%s Object:\t", p);
747 print_generic_expr (file, iv->base_object, TDF_SLIM);
748 fprintf (file, "\n");
749 }
750
751 fprintf (file, "%s Biv:\t%c\n", p, iv->biv_p ? 'Y' : 'N');
752
753 fprintf (file, "%s Overflowness wrto loop niter:\t%s\n",
754 p, iv->no_overflow ? "No-overflow" : "Overflow");
755 }
756
757 /* Dumps information about the USE to FILE. */
758
759 void
760 dump_use (FILE *file, struct iv_use *use)
761 {
762 fprintf (file, " Use %d.%d:\n", use->group_id, use->id);
763 fprintf (file, " At stmt:\t");
764 print_gimple_stmt (file, use->stmt, 0);
765 fprintf (file, " At pos:\t");
766 if (use->op_p)
767 print_generic_expr (file, *use->op_p, TDF_SLIM);
768 fprintf (file, "\n");
769 dump_iv (file, use->iv, false, 2);
770 }
771
772 /* Dumps information about the uses to FILE. */
773
774 void
775 dump_groups (FILE *file, struct ivopts_data *data)
776 {
777 unsigned i, j;
778 struct iv_group *group;
779
780 for (i = 0; i < data->vgroups.length (); i++)
781 {
782 group = data->vgroups[i];
783 fprintf (file, "Group %d:\n", group->id);
784 if (group->type == USE_NONLINEAR_EXPR)
785 fprintf (file, " Type:\tGENERIC\n");
786 else if (group->type == USE_REF_ADDRESS)
787 fprintf (file, " Type:\tREFERENCE ADDRESS\n");
788 else if (group->type == USE_PTR_ADDRESS)
789 fprintf (file, " Type:\tPOINTER ARGUMENT ADDRESS\n");
790 else
791 {
792 gcc_assert (group->type == USE_COMPARE);
793 fprintf (file, " Type:\tCOMPARE\n");
794 }
795 for (j = 0; j < group->vuses.length (); j++)
796 dump_use (file, group->vuses[j]);
797 }
798 }
799
800 /* Dumps information about induction variable candidate CAND to FILE. */
801
802 void
803 dump_cand (FILE *file, struct iv_cand *cand)
804 {
805 struct iv *iv = cand->iv;
806
807 fprintf (file, "Candidate %d:\n", cand->id);
808 if (cand->inv_vars)
809 {
810 fprintf (file, " Depend on inv.vars: ");
811 dump_bitmap (file, cand->inv_vars);
812 }
813 if (cand->inv_exprs)
814 {
815 fprintf (file, " Depend on inv.exprs: ");
816 dump_bitmap (file, cand->inv_exprs);
817 }
818
819 if (cand->var_before)
820 {
821 fprintf (file, " Var befor: ");
822 print_generic_expr (file, cand->var_before, TDF_SLIM);
823 fprintf (file, "\n");
824 }
825 if (cand->var_after)
826 {
827 fprintf (file, " Var after: ");
828 print_generic_expr (file, cand->var_after, TDF_SLIM);
829 fprintf (file, "\n");
830 }
831
832 switch (cand->pos)
833 {
834 case IP_NORMAL:
835 fprintf (file, " Incr POS: before exit test\n");
836 break;
837
838 case IP_BEFORE_USE:
839 fprintf (file, " Incr POS: before use %d\n", cand->ainc_use->id);
840 break;
841
842 case IP_AFTER_USE:
843 fprintf (file, " Incr POS: after use %d\n", cand->ainc_use->id);
844 break;
845
846 case IP_END:
847 fprintf (file, " Incr POS: at end\n");
848 break;
849
850 case IP_ORIGINAL:
851 fprintf (file, " Incr POS: orig biv\n");
852 break;
853 }
854
855 dump_iv (file, iv, false, 1);
856 }
857
858 /* Returns the info for ssa version VER. */
859
860 static inline struct version_info *
861 ver_info (struct ivopts_data *data, unsigned ver)
862 {
863 return data->version_info + ver;
864 }
865
866 /* Returns the info for ssa name NAME. */
867
868 static inline struct version_info *
869 name_info (struct ivopts_data *data, tree name)
870 {
871 return ver_info (data, SSA_NAME_VERSION (name));
872 }
873
874 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
875 emitted in LOOP. */
876
877 static bool
878 stmt_after_ip_normal_pos (struct loop *loop, gimple *stmt)
879 {
880 basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
881
882 gcc_assert (bb);
883
884 if (sbb == loop->latch)
885 return true;
886
887 if (sbb != bb)
888 return false;
889
890 return stmt == last_stmt (bb);
891 }
892
893 /* Returns true if STMT if after the place where the original induction
894 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
895 if the positions are identical. */
896
897 static bool
898 stmt_after_inc_pos (struct iv_cand *cand, gimple *stmt, bool true_if_equal)
899 {
900 basic_block cand_bb = gimple_bb (cand->incremented_at);
901 basic_block stmt_bb = gimple_bb (stmt);
902
903 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
904 return false;
905
906 if (stmt_bb != cand_bb)
907 return true;
908
909 if (true_if_equal
910 && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
911 return true;
912 return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
913 }
914
915 /* Returns true if STMT if after the place where the induction variable
916 CAND is incremented in LOOP. */
917
918 static bool
919 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple *stmt)
920 {
921 switch (cand->pos)
922 {
923 case IP_END:
924 return false;
925
926 case IP_NORMAL:
927 return stmt_after_ip_normal_pos (loop, stmt);
928
929 case IP_ORIGINAL:
930 case IP_AFTER_USE:
931 return stmt_after_inc_pos (cand, stmt, false);
932
933 case IP_BEFORE_USE:
934 return stmt_after_inc_pos (cand, stmt, true);
935
936 default:
937 gcc_unreachable ();
938 }
939 }
940
941 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
942
943 static bool
944 abnormal_ssa_name_p (tree exp)
945 {
946 if (!exp)
947 return false;
948
949 if (TREE_CODE (exp) != SSA_NAME)
950 return false;
951
952 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
953 }
954
955 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
956 abnormal phi node. Callback for for_each_index. */
957
958 static bool
959 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
960 void *data ATTRIBUTE_UNUSED)
961 {
962 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
963 {
964 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
965 return false;
966 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
967 return false;
968 }
969
970 return !abnormal_ssa_name_p (*index);
971 }
972
973 /* Returns true if EXPR contains a ssa name that occurs in an
974 abnormal phi node. */
975
976 bool
977 contains_abnormal_ssa_name_p (tree expr)
978 {
979 enum tree_code code;
980 enum tree_code_class codeclass;
981
982 if (!expr)
983 return false;
984
985 code = TREE_CODE (expr);
986 codeclass = TREE_CODE_CLASS (code);
987
988 if (code == CALL_EXPR)
989 {
990 tree arg;
991 call_expr_arg_iterator iter;
992 FOR_EACH_CALL_EXPR_ARG (arg, iter, expr)
993 if (contains_abnormal_ssa_name_p (arg))
994 return true;
995 return false;
996 }
997
998 if (code == SSA_NAME)
999 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
1000
1001 if (code == INTEGER_CST
1002 || is_gimple_min_invariant (expr))
1003 return false;
1004
1005 if (code == ADDR_EXPR)
1006 return !for_each_index (&TREE_OPERAND (expr, 0),
1007 idx_contains_abnormal_ssa_name_p,
1008 NULL);
1009
1010 if (code == COND_EXPR)
1011 return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
1012 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
1013 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
1014
1015 switch (codeclass)
1016 {
1017 case tcc_binary:
1018 case tcc_comparison:
1019 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
1020 return true;
1021
1022 /* Fallthru. */
1023 case tcc_unary:
1024 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
1025 return true;
1026
1027 break;
1028
1029 default:
1030 gcc_unreachable ();
1031 }
1032
1033 return false;
1034 }
1035
1036 /* Returns the structure describing number of iterations determined from
1037 EXIT of DATA->current_loop, or NULL if something goes wrong. */
1038
1039 static struct tree_niter_desc *
1040 niter_for_exit (struct ivopts_data *data, edge exit)
1041 {
1042 struct tree_niter_desc *desc;
1043 tree_niter_desc **slot;
1044
1045 if (!data->niters)
1046 {
1047 data->niters = new hash_map<edge, tree_niter_desc *>;
1048 slot = NULL;
1049 }
1050 else
1051 slot = data->niters->get (exit);
1052
1053 if (!slot)
1054 {
1055 /* Try to determine number of iterations. We cannot safely work with ssa
1056 names that appear in phi nodes on abnormal edges, so that we do not
1057 create overlapping life ranges for them (PR 27283). */
1058 desc = XNEW (struct tree_niter_desc);
1059 if (!number_of_iterations_exit (data->current_loop,
1060 exit, desc, true)
1061 || contains_abnormal_ssa_name_p (desc->niter))
1062 {
1063 XDELETE (desc);
1064 desc = NULL;
1065 }
1066 data->niters->put (exit, desc);
1067 }
1068 else
1069 desc = *slot;
1070
1071 return desc;
1072 }
1073
1074 /* Returns the structure describing number of iterations determined from
1075 single dominating exit of DATA->current_loop, or NULL if something
1076 goes wrong. */
1077
1078 static struct tree_niter_desc *
1079 niter_for_single_dom_exit (struct ivopts_data *data)
1080 {
1081 edge exit = single_dom_exit (data->current_loop);
1082
1083 if (!exit)
1084 return NULL;
1085
1086 return niter_for_exit (data, exit);
1087 }
1088
1089 /* Initializes data structures used by the iv optimization pass, stored
1090 in DATA. */
1091
1092 static void
1093 tree_ssa_iv_optimize_init (struct ivopts_data *data)
1094 {
1095 data->version_info_size = 2 * num_ssa_names;
1096 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
1097 data->relevant = BITMAP_ALLOC (NULL);
1098 data->important_candidates = BITMAP_ALLOC (NULL);
1099 data->max_inv_var_id = 0;
1100 data->max_inv_expr_id = 0;
1101 data->niters = NULL;
1102 data->vgroups.create (20);
1103 data->vcands.create (20);
1104 data->inv_expr_tab = new hash_table<iv_inv_expr_hasher> (10);
1105 data->name_expansion_cache = NULL;
1106 data->iv_common_cand_tab = new hash_table<iv_common_cand_hasher> (10);
1107 data->iv_common_cands.create (20);
1108 decl_rtl_to_reset.create (20);
1109 gcc_obstack_init (&data->iv_obstack);
1110 }
1111
1112 /* Returns a memory object to that EXPR points. In case we are able to
1113 determine that it does not point to any such object, NULL is returned. */
1114
1115 static tree
1116 determine_base_object (tree expr)
1117 {
1118 enum tree_code code = TREE_CODE (expr);
1119 tree base, obj;
1120
1121 /* If this is a pointer casted to any type, we need to determine
1122 the base object for the pointer; so handle conversions before
1123 throwing away non-pointer expressions. */
1124 if (CONVERT_EXPR_P (expr))
1125 return determine_base_object (TREE_OPERAND (expr, 0));
1126
1127 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
1128 return NULL_TREE;
1129
1130 switch (code)
1131 {
1132 case INTEGER_CST:
1133 return NULL_TREE;
1134
1135 case ADDR_EXPR:
1136 obj = TREE_OPERAND (expr, 0);
1137 base = get_base_address (obj);
1138
1139 if (!base)
1140 return expr;
1141
1142 if (TREE_CODE (base) == MEM_REF)
1143 return determine_base_object (TREE_OPERAND (base, 0));
1144
1145 return fold_convert (ptr_type_node,
1146 build_fold_addr_expr (base));
1147
1148 case POINTER_PLUS_EXPR:
1149 return determine_base_object (TREE_OPERAND (expr, 0));
1150
1151 case PLUS_EXPR:
1152 case MINUS_EXPR:
1153 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
1154 gcc_unreachable ();
1155
1156 default:
1157 if (POLY_INT_CST_P (expr))
1158 return NULL_TREE;
1159 return fold_convert (ptr_type_node, expr);
1160 }
1161 }
1162
1163 /* Return true if address expression with non-DECL_P operand appears
1164 in EXPR. */
1165
1166 static bool
1167 contain_complex_addr_expr (tree expr)
1168 {
1169 bool res = false;
1170
1171 STRIP_NOPS (expr);
1172 switch (TREE_CODE (expr))
1173 {
1174 case POINTER_PLUS_EXPR:
1175 case PLUS_EXPR:
1176 case MINUS_EXPR:
1177 res |= contain_complex_addr_expr (TREE_OPERAND (expr, 0));
1178 res |= contain_complex_addr_expr (TREE_OPERAND (expr, 1));
1179 break;
1180
1181 case ADDR_EXPR:
1182 return (!DECL_P (TREE_OPERAND (expr, 0)));
1183
1184 default:
1185 return false;
1186 }
1187
1188 return res;
1189 }
1190
1191 /* Allocates an induction variable with given initial value BASE and step STEP
1192 for loop LOOP. NO_OVERFLOW implies the iv doesn't overflow. */
1193
1194 static struct iv *
1195 alloc_iv (struct ivopts_data *data, tree base, tree step,
1196 bool no_overflow = false)
1197 {
1198 tree expr = base;
1199 struct iv *iv = (struct iv*) obstack_alloc (&data->iv_obstack,
1200 sizeof (struct iv));
1201 gcc_assert (step != NULL_TREE);
1202
1203 /* Lower address expression in base except ones with DECL_P as operand.
1204 By doing this:
1205 1) More accurate cost can be computed for address expressions;
1206 2) Duplicate candidates won't be created for bases in different
1207 forms, like &a[0] and &a. */
1208 STRIP_NOPS (expr);
1209 if ((TREE_CODE (expr) == ADDR_EXPR && !DECL_P (TREE_OPERAND (expr, 0)))
1210 || contain_complex_addr_expr (expr))
1211 {
1212 aff_tree comb;
1213 tree_to_aff_combination (expr, TREE_TYPE (expr), &comb);
1214 base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
1215 }
1216
1217 iv->base = base;
1218 iv->base_object = determine_base_object (base);
1219 iv->step = step;
1220 iv->biv_p = false;
1221 iv->nonlin_use = NULL;
1222 iv->ssa_name = NULL_TREE;
1223 if (!no_overflow
1224 && !iv_can_overflow_p (data->current_loop, TREE_TYPE (base),
1225 base, step))
1226 no_overflow = true;
1227 iv->no_overflow = no_overflow;
1228 iv->have_address_use = false;
1229
1230 return iv;
1231 }
1232
1233 /* Sets STEP and BASE for induction variable IV. NO_OVERFLOW implies the IV
1234 doesn't overflow. */
1235
1236 static void
1237 set_iv (struct ivopts_data *data, tree iv, tree base, tree step,
1238 bool no_overflow)
1239 {
1240 struct version_info *info = name_info (data, iv);
1241
1242 gcc_assert (!info->iv);
1243
1244 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
1245 info->iv = alloc_iv (data, base, step, no_overflow);
1246 info->iv->ssa_name = iv;
1247 }
1248
1249 /* Finds induction variable declaration for VAR. */
1250
1251 static struct iv *
1252 get_iv (struct ivopts_data *data, tree var)
1253 {
1254 basic_block bb;
1255 tree type = TREE_TYPE (var);
1256
1257 if (!POINTER_TYPE_P (type)
1258 && !INTEGRAL_TYPE_P (type))
1259 return NULL;
1260
1261 if (!name_info (data, var)->iv)
1262 {
1263 bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1264
1265 if (!bb
1266 || !flow_bb_inside_loop_p (data->current_loop, bb))
1267 set_iv (data, var, var, build_int_cst (type, 0), true);
1268 }
1269
1270 return name_info (data, var)->iv;
1271 }
1272
1273 /* Return the first non-invariant ssa var found in EXPR. */
1274
1275 static tree
1276 extract_single_var_from_expr (tree expr)
1277 {
1278 int i, n;
1279 tree tmp;
1280 enum tree_code code;
1281
1282 if (!expr || is_gimple_min_invariant (expr))
1283 return NULL;
1284
1285 code = TREE_CODE (expr);
1286 if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1287 {
1288 n = TREE_OPERAND_LENGTH (expr);
1289 for (i = 0; i < n; i++)
1290 {
1291 tmp = extract_single_var_from_expr (TREE_OPERAND (expr, i));
1292
1293 if (tmp)
1294 return tmp;
1295 }
1296 }
1297 return (TREE_CODE (expr) == SSA_NAME) ? expr : NULL;
1298 }
1299
1300 /* Finds basic ivs. */
1301
1302 static bool
1303 find_bivs (struct ivopts_data *data)
1304 {
1305 gphi *phi;
1306 affine_iv iv;
1307 tree step, type, base, stop;
1308 bool found = false;
1309 struct loop *loop = data->current_loop;
1310 gphi_iterator psi;
1311
1312 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1313 {
1314 phi = psi.phi ();
1315
1316 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1317 continue;
1318
1319 if (virtual_operand_p (PHI_RESULT (phi)))
1320 continue;
1321
1322 if (!simple_iv (loop, loop, PHI_RESULT (phi), &iv, true))
1323 continue;
1324
1325 if (integer_zerop (iv.step))
1326 continue;
1327
1328 step = iv.step;
1329 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1330 /* Stop expanding iv base at the first ssa var referred by iv step.
1331 Ideally we should stop at any ssa var, because that's expensive
1332 and unusual to happen, we just do it on the first one.
1333
1334 See PR64705 for the rationale. */
1335 stop = extract_single_var_from_expr (step);
1336 base = expand_simple_operations (base, stop);
1337 if (contains_abnormal_ssa_name_p (base)
1338 || contains_abnormal_ssa_name_p (step))
1339 continue;
1340
1341 type = TREE_TYPE (PHI_RESULT (phi));
1342 base = fold_convert (type, base);
1343 if (step)
1344 {
1345 if (POINTER_TYPE_P (type))
1346 step = convert_to_ptrofftype (step);
1347 else
1348 step = fold_convert (type, step);
1349 }
1350
1351 set_iv (data, PHI_RESULT (phi), base, step, iv.no_overflow);
1352 found = true;
1353 }
1354
1355 return found;
1356 }
1357
1358 /* Marks basic ivs. */
1359
1360 static void
1361 mark_bivs (struct ivopts_data *data)
1362 {
1363 gphi *phi;
1364 gimple *def;
1365 tree var;
1366 struct iv *iv, *incr_iv;
1367 struct loop *loop = data->current_loop;
1368 basic_block incr_bb;
1369 gphi_iterator psi;
1370
1371 data->bivs_not_used_in_addr = 0;
1372 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1373 {
1374 phi = psi.phi ();
1375
1376 iv = get_iv (data, PHI_RESULT (phi));
1377 if (!iv)
1378 continue;
1379
1380 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1381 def = SSA_NAME_DEF_STMT (var);
1382 /* Don't mark iv peeled from other one as biv. */
1383 if (def
1384 && gimple_code (def) == GIMPLE_PHI
1385 && gimple_bb (def) == loop->header)
1386 continue;
1387
1388 incr_iv = get_iv (data, var);
1389 if (!incr_iv)
1390 continue;
1391
1392 /* If the increment is in the subloop, ignore it. */
1393 incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1394 if (incr_bb->loop_father != data->current_loop
1395 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1396 continue;
1397
1398 iv->biv_p = true;
1399 incr_iv->biv_p = true;
1400 if (iv->no_overflow)
1401 data->bivs_not_used_in_addr++;
1402 if (incr_iv->no_overflow)
1403 data->bivs_not_used_in_addr++;
1404 }
1405 }
1406
1407 /* Checks whether STMT defines a linear induction variable and stores its
1408 parameters to IV. */
1409
1410 static bool
1411 find_givs_in_stmt_scev (struct ivopts_data *data, gimple *stmt, affine_iv *iv)
1412 {
1413 tree lhs, stop;
1414 struct loop *loop = data->current_loop;
1415
1416 iv->base = NULL_TREE;
1417 iv->step = NULL_TREE;
1418
1419 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1420 return false;
1421
1422 lhs = gimple_assign_lhs (stmt);
1423 if (TREE_CODE (lhs) != SSA_NAME)
1424 return false;
1425
1426 if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1427 return false;
1428
1429 /* Stop expanding iv base at the first ssa var referred by iv step.
1430 Ideally we should stop at any ssa var, because that's expensive
1431 and unusual to happen, we just do it on the first one.
1432
1433 See PR64705 for the rationale. */
1434 stop = extract_single_var_from_expr (iv->step);
1435 iv->base = expand_simple_operations (iv->base, stop);
1436 if (contains_abnormal_ssa_name_p (iv->base)
1437 || contains_abnormal_ssa_name_p (iv->step))
1438 return false;
1439
1440 /* If STMT could throw, then do not consider STMT as defining a GIV.
1441 While this will suppress optimizations, we cannot safely delete this
1442 GIV and associated statements, even if it appears it is not used. */
1443 if (stmt_could_throw_p (cfun, stmt))
1444 return false;
1445
1446 return true;
1447 }
1448
1449 /* Finds general ivs in statement STMT. */
1450
1451 static void
1452 find_givs_in_stmt (struct ivopts_data *data, gimple *stmt)
1453 {
1454 affine_iv iv;
1455
1456 if (!find_givs_in_stmt_scev (data, stmt, &iv))
1457 return;
1458
1459 set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step, iv.no_overflow);
1460 }
1461
1462 /* Finds general ivs in basic block BB. */
1463
1464 static void
1465 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1466 {
1467 gimple_stmt_iterator bsi;
1468
1469 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1470 find_givs_in_stmt (data, gsi_stmt (bsi));
1471 }
1472
1473 /* Finds general ivs. */
1474
1475 static void
1476 find_givs (struct ivopts_data *data)
1477 {
1478 struct loop *loop = data->current_loop;
1479 basic_block *body = get_loop_body_in_dom_order (loop);
1480 unsigned i;
1481
1482 for (i = 0; i < loop->num_nodes; i++)
1483 find_givs_in_bb (data, body[i]);
1484 free (body);
1485 }
1486
1487 /* For each ssa name defined in LOOP determines whether it is an induction
1488 variable and if so, its initial value and step. */
1489
1490 static bool
1491 find_induction_variables (struct ivopts_data *data)
1492 {
1493 unsigned i;
1494 bitmap_iterator bi;
1495
1496 if (!find_bivs (data))
1497 return false;
1498
1499 find_givs (data);
1500 mark_bivs (data);
1501
1502 if (dump_file && (dump_flags & TDF_DETAILS))
1503 {
1504 struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
1505
1506 if (niter)
1507 {
1508 fprintf (dump_file, " number of iterations ");
1509 print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1510 if (!integer_zerop (niter->may_be_zero))
1511 {
1512 fprintf (dump_file, "; zero if ");
1513 print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1514 }
1515 fprintf (dump_file, "\n");
1516 };
1517
1518 fprintf (dump_file, "\n<Induction Vars>:\n");
1519 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1520 {
1521 struct version_info *info = ver_info (data, i);
1522 if (info->iv && info->iv->step && !integer_zerop (info->iv->step))
1523 dump_iv (dump_file, ver_info (data, i)->iv, true, 0);
1524 }
1525 }
1526
1527 return true;
1528 }
1529
1530 /* Records a use of TYPE at *USE_P in STMT whose value is IV in GROUP.
1531 For address type use, ADDR_BASE is the stripped IV base, ADDR_OFFSET
1532 is the const offset stripped from IV base and MEM_TYPE is the type
1533 of the memory being addressed. For uses of other types, ADDR_BASE
1534 and ADDR_OFFSET are zero by default and MEM_TYPE is NULL_TREE. */
1535
1536 static struct iv_use *
1537 record_use (struct iv_group *group, tree *use_p, struct iv *iv,
1538 gimple *stmt, enum use_type type, tree mem_type,
1539 tree addr_base, poly_uint64 addr_offset)
1540 {
1541 struct iv_use *use = XCNEW (struct iv_use);
1542
1543 use->id = group->vuses.length ();
1544 use->group_id = group->id;
1545 use->type = type;
1546 use->mem_type = mem_type;
1547 use->iv = iv;
1548 use->stmt = stmt;
1549 use->op_p = use_p;
1550 use->addr_base = addr_base;
1551 use->addr_offset = addr_offset;
1552
1553 group->vuses.safe_push (use);
1554 return use;
1555 }
1556
1557 /* Checks whether OP is a loop-level invariant and if so, records it.
1558 NONLINEAR_USE is true if the invariant is used in a way we do not
1559 handle specially. */
1560
1561 static void
1562 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1563 {
1564 basic_block bb;
1565 struct version_info *info;
1566
1567 if (TREE_CODE (op) != SSA_NAME
1568 || virtual_operand_p (op))
1569 return;
1570
1571 bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1572 if (bb
1573 && flow_bb_inside_loop_p (data->current_loop, bb))
1574 return;
1575
1576 info = name_info (data, op);
1577 info->name = op;
1578 info->has_nonlin_use |= nonlinear_use;
1579 if (!info->inv_id)
1580 info->inv_id = ++data->max_inv_var_id;
1581 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1582 }
1583
1584 /* Record a group of TYPE. */
1585
1586 static struct iv_group *
1587 record_group (struct ivopts_data *data, enum use_type type)
1588 {
1589 struct iv_group *group = XCNEW (struct iv_group);
1590
1591 group->id = data->vgroups.length ();
1592 group->type = type;
1593 group->related_cands = BITMAP_ALLOC (NULL);
1594 group->vuses.create (1);
1595
1596 data->vgroups.safe_push (group);
1597 return group;
1598 }
1599
1600 /* Record a use of TYPE at *USE_P in STMT whose value is IV in a group.
1601 New group will be created if there is no existing group for the use.
1602 MEM_TYPE is the type of memory being addressed, or NULL if this
1603 isn't an address reference. */
1604
1605 static struct iv_use *
1606 record_group_use (struct ivopts_data *data, tree *use_p,
1607 struct iv *iv, gimple *stmt, enum use_type type,
1608 tree mem_type)
1609 {
1610 tree addr_base = NULL;
1611 struct iv_group *group = NULL;
1612 poly_uint64 addr_offset = 0;
1613
1614 /* Record non address type use in a new group. */
1615 if (address_p (type))
1616 {
1617 unsigned int i;
1618
1619 addr_base = strip_offset (iv->base, &addr_offset);
1620 for (i = 0; i < data->vgroups.length (); i++)
1621 {
1622 struct iv_use *use;
1623
1624 group = data->vgroups[i];
1625 use = group->vuses[0];
1626 if (!address_p (use->type))
1627 continue;
1628
1629 /* Check if it has the same stripped base and step. */
1630 if (operand_equal_p (iv->base_object, use->iv->base_object, 0)
1631 && operand_equal_p (iv->step, use->iv->step, 0)
1632 && operand_equal_p (addr_base, use->addr_base, 0))
1633 break;
1634 }
1635 if (i == data->vgroups.length ())
1636 group = NULL;
1637 }
1638
1639 if (!group)
1640 group = record_group (data, type);
1641
1642 return record_use (group, use_p, iv, stmt, type, mem_type,
1643 addr_base, addr_offset);
1644 }
1645
1646 /* Checks whether the use OP is interesting and if so, records it. */
1647
1648 static struct iv_use *
1649 find_interesting_uses_op (struct ivopts_data *data, tree op)
1650 {
1651 struct iv *iv;
1652 gimple *stmt;
1653 struct iv_use *use;
1654
1655 if (TREE_CODE (op) != SSA_NAME)
1656 return NULL;
1657
1658 iv = get_iv (data, op);
1659 if (!iv)
1660 return NULL;
1661
1662 if (iv->nonlin_use)
1663 {
1664 gcc_assert (iv->nonlin_use->type == USE_NONLINEAR_EXPR);
1665 return iv->nonlin_use;
1666 }
1667
1668 if (integer_zerop (iv->step))
1669 {
1670 record_invariant (data, op, true);
1671 return NULL;
1672 }
1673
1674 stmt = SSA_NAME_DEF_STMT (op);
1675 gcc_assert (gimple_code (stmt) == GIMPLE_PHI || is_gimple_assign (stmt));
1676
1677 use = record_group_use (data, NULL, iv, stmt, USE_NONLINEAR_EXPR, NULL_TREE);
1678 iv->nonlin_use = use;
1679 return use;
1680 }
1681
1682 /* Indicate how compare type iv_use can be handled. */
1683 enum comp_iv_rewrite
1684 {
1685 COMP_IV_NA,
1686 /* We may rewrite compare type iv_use by expressing value of the iv_use. */
1687 COMP_IV_EXPR,
1688 /* We may rewrite compare type iv_uses on both sides of comparison by
1689 expressing value of each iv_use. */
1690 COMP_IV_EXPR_2,
1691 /* We may rewrite compare type iv_use by expressing value of the iv_use
1692 or by eliminating it with other iv_cand. */
1693 COMP_IV_ELIM
1694 };
1695
1696 /* Given a condition in statement STMT, checks whether it is a compare
1697 of an induction variable and an invariant. If this is the case,
1698 CONTROL_VAR is set to location of the iv, BOUND to the location of
1699 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1700 induction variable descriptions, and true is returned. If this is not
1701 the case, CONTROL_VAR and BOUND are set to the arguments of the
1702 condition and false is returned. */
1703
1704 static enum comp_iv_rewrite
1705 extract_cond_operands (struct ivopts_data *data, gimple *stmt,
1706 tree **control_var, tree **bound,
1707 struct iv **iv_var, struct iv **iv_bound)
1708 {
1709 /* The objects returned when COND has constant operands. */
1710 static struct iv const_iv;
1711 static tree zero;
1712 tree *op0 = &zero, *op1 = &zero;
1713 struct iv *iv0 = &const_iv, *iv1 = &const_iv;
1714 enum comp_iv_rewrite rewrite_type = COMP_IV_NA;
1715
1716 if (gimple_code (stmt) == GIMPLE_COND)
1717 {
1718 gcond *cond_stmt = as_a <gcond *> (stmt);
1719 op0 = gimple_cond_lhs_ptr (cond_stmt);
1720 op1 = gimple_cond_rhs_ptr (cond_stmt);
1721 }
1722 else
1723 {
1724 op0 = gimple_assign_rhs1_ptr (stmt);
1725 op1 = gimple_assign_rhs2_ptr (stmt);
1726 }
1727
1728 zero = integer_zero_node;
1729 const_iv.step = integer_zero_node;
1730
1731 if (TREE_CODE (*op0) == SSA_NAME)
1732 iv0 = get_iv (data, *op0);
1733 if (TREE_CODE (*op1) == SSA_NAME)
1734 iv1 = get_iv (data, *op1);
1735
1736 /* If both sides of comparison are IVs. We can express ivs on both end. */
1737 if (iv0 && iv1 && !integer_zerop (iv0->step) && !integer_zerop (iv1->step))
1738 {
1739 rewrite_type = COMP_IV_EXPR_2;
1740 goto end;
1741 }
1742
1743 /* If none side of comparison is IV. */
1744 if ((!iv0 || integer_zerop (iv0->step))
1745 && (!iv1 || integer_zerop (iv1->step)))
1746 goto end;
1747
1748 /* Control variable may be on the other side. */
1749 if (!iv0 || integer_zerop (iv0->step))
1750 {
1751 std::swap (op0, op1);
1752 std::swap (iv0, iv1);
1753 }
1754 /* If one side is IV and the other side isn't loop invariant. */
1755 if (!iv1)
1756 rewrite_type = COMP_IV_EXPR;
1757 /* If one side is IV and the other side is loop invariant. */
1758 else if (!integer_zerop (iv0->step) && integer_zerop (iv1->step))
1759 rewrite_type = COMP_IV_ELIM;
1760
1761 end:
1762 if (control_var)
1763 *control_var = op0;
1764 if (iv_var)
1765 *iv_var = iv0;
1766 if (bound)
1767 *bound = op1;
1768 if (iv_bound)
1769 *iv_bound = iv1;
1770
1771 return rewrite_type;
1772 }
1773
1774 /* Checks whether the condition in STMT is interesting and if so,
1775 records it. */
1776
1777 static void
1778 find_interesting_uses_cond (struct ivopts_data *data, gimple *stmt)
1779 {
1780 tree *var_p, *bound_p;
1781 struct iv *var_iv, *bound_iv;
1782 enum comp_iv_rewrite ret;
1783
1784 ret = extract_cond_operands (data, stmt,
1785 &var_p, &bound_p, &var_iv, &bound_iv);
1786 if (ret == COMP_IV_NA)
1787 {
1788 find_interesting_uses_op (data, *var_p);
1789 find_interesting_uses_op (data, *bound_p);
1790 return;
1791 }
1792
1793 record_group_use (data, var_p, var_iv, stmt, USE_COMPARE, NULL_TREE);
1794 /* Record compare type iv_use for iv on the other side of comparison. */
1795 if (ret == COMP_IV_EXPR_2)
1796 record_group_use (data, bound_p, bound_iv, stmt, USE_COMPARE, NULL_TREE);
1797 }
1798
1799 /* Returns the outermost loop EXPR is obviously invariant in
1800 relative to the loop LOOP, i.e. if all its operands are defined
1801 outside of the returned loop. Returns NULL if EXPR is not
1802 even obviously invariant in LOOP. */
1803
1804 struct loop *
1805 outermost_invariant_loop_for_expr (struct loop *loop, tree expr)
1806 {
1807 basic_block def_bb;
1808 unsigned i, len;
1809
1810 if (is_gimple_min_invariant (expr))
1811 return current_loops->tree_root;
1812
1813 if (TREE_CODE (expr) == SSA_NAME)
1814 {
1815 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1816 if (def_bb)
1817 {
1818 if (flow_bb_inside_loop_p (loop, def_bb))
1819 return NULL;
1820 return superloop_at_depth (loop,
1821 loop_depth (def_bb->loop_father) + 1);
1822 }
1823
1824 return current_loops->tree_root;
1825 }
1826
1827 if (!EXPR_P (expr))
1828 return NULL;
1829
1830 unsigned maxdepth = 0;
1831 len = TREE_OPERAND_LENGTH (expr);
1832 for (i = 0; i < len; i++)
1833 {
1834 struct loop *ivloop;
1835 if (!TREE_OPERAND (expr, i))
1836 continue;
1837
1838 ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1839 if (!ivloop)
1840 return NULL;
1841 maxdepth = MAX (maxdepth, loop_depth (ivloop));
1842 }
1843
1844 return superloop_at_depth (loop, maxdepth);
1845 }
1846
1847 /* Returns true if expression EXPR is obviously invariant in LOOP,
1848 i.e. if all its operands are defined outside of the LOOP. LOOP
1849 should not be the function body. */
1850
1851 bool
1852 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1853 {
1854 basic_block def_bb;
1855 unsigned i, len;
1856
1857 gcc_assert (loop_depth (loop) > 0);
1858
1859 if (is_gimple_min_invariant (expr))
1860 return true;
1861
1862 if (TREE_CODE (expr) == SSA_NAME)
1863 {
1864 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1865 if (def_bb
1866 && flow_bb_inside_loop_p (loop, def_bb))
1867 return false;
1868
1869 return true;
1870 }
1871
1872 if (!EXPR_P (expr))
1873 return false;
1874
1875 len = TREE_OPERAND_LENGTH (expr);
1876 for (i = 0; i < len; i++)
1877 if (TREE_OPERAND (expr, i)
1878 && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1879 return false;
1880
1881 return true;
1882 }
1883
1884 /* Given expression EXPR which computes inductive values with respect
1885 to loop recorded in DATA, this function returns biv from which EXPR
1886 is derived by tracing definition chains of ssa variables in EXPR. */
1887
1888 static struct iv*
1889 find_deriving_biv_for_expr (struct ivopts_data *data, tree expr)
1890 {
1891 struct iv *iv;
1892 unsigned i, n;
1893 tree e2, e1;
1894 enum tree_code code;
1895 gimple *stmt;
1896
1897 if (expr == NULL_TREE)
1898 return NULL;
1899
1900 if (is_gimple_min_invariant (expr))
1901 return NULL;
1902
1903 code = TREE_CODE (expr);
1904 if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1905 {
1906 n = TREE_OPERAND_LENGTH (expr);
1907 for (i = 0; i < n; i++)
1908 {
1909 iv = find_deriving_biv_for_expr (data, TREE_OPERAND (expr, i));
1910 if (iv)
1911 return iv;
1912 }
1913 }
1914
1915 /* Stop if it's not ssa name. */
1916 if (code != SSA_NAME)
1917 return NULL;
1918
1919 iv = get_iv (data, expr);
1920 if (!iv || integer_zerop (iv->step))
1921 return NULL;
1922 else if (iv->biv_p)
1923 return iv;
1924
1925 stmt = SSA_NAME_DEF_STMT (expr);
1926 if (gphi *phi = dyn_cast <gphi *> (stmt))
1927 {
1928 ssa_op_iter iter;
1929 use_operand_p use_p;
1930 basic_block phi_bb = gimple_bb (phi);
1931
1932 /* Skip loop header PHI that doesn't define biv. */
1933 if (phi_bb->loop_father == data->current_loop)
1934 return NULL;
1935
1936 if (virtual_operand_p (gimple_phi_result (phi)))
1937 return NULL;
1938
1939 FOR_EACH_PHI_ARG (use_p, phi, iter, SSA_OP_USE)
1940 {
1941 tree use = USE_FROM_PTR (use_p);
1942 iv = find_deriving_biv_for_expr (data, use);
1943 if (iv)
1944 return iv;
1945 }
1946 return NULL;
1947 }
1948 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1949 return NULL;
1950
1951 e1 = gimple_assign_rhs1 (stmt);
1952 code = gimple_assign_rhs_code (stmt);
1953 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS)
1954 return find_deriving_biv_for_expr (data, e1);
1955
1956 switch (code)
1957 {
1958 case MULT_EXPR:
1959 case PLUS_EXPR:
1960 case MINUS_EXPR:
1961 case POINTER_PLUS_EXPR:
1962 /* Increments, decrements and multiplications by a constant
1963 are simple. */
1964 e2 = gimple_assign_rhs2 (stmt);
1965 iv = find_deriving_biv_for_expr (data, e2);
1966 if (iv)
1967 return iv;
1968 gcc_fallthrough ();
1969
1970 CASE_CONVERT:
1971 /* Casts are simple. */
1972 return find_deriving_biv_for_expr (data, e1);
1973
1974 default:
1975 break;
1976 }
1977
1978 return NULL;
1979 }
1980
1981 /* Record BIV, its predecessor and successor that they are used in
1982 address type uses. */
1983
1984 static void
1985 record_biv_for_address_use (struct ivopts_data *data, struct iv *biv)
1986 {
1987 unsigned i;
1988 tree type, base_1, base_2;
1989 bitmap_iterator bi;
1990
1991 if (!biv || !biv->biv_p || integer_zerop (biv->step)
1992 || biv->have_address_use || !biv->no_overflow)
1993 return;
1994
1995 type = TREE_TYPE (biv->base);
1996 if (!INTEGRAL_TYPE_P (type))
1997 return;
1998
1999 biv->have_address_use = true;
2000 data->bivs_not_used_in_addr--;
2001 base_1 = fold_build2 (PLUS_EXPR, type, biv->base, biv->step);
2002 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2003 {
2004 struct iv *iv = ver_info (data, i)->iv;
2005
2006 if (!iv || !iv->biv_p || integer_zerop (iv->step)
2007 || iv->have_address_use || !iv->no_overflow)
2008 continue;
2009
2010 if (type != TREE_TYPE (iv->base)
2011 || !INTEGRAL_TYPE_P (TREE_TYPE (iv->base)))
2012 continue;
2013
2014 if (!operand_equal_p (biv->step, iv->step, 0))
2015 continue;
2016
2017 base_2 = fold_build2 (PLUS_EXPR, type, iv->base, iv->step);
2018 if (operand_equal_p (base_1, iv->base, 0)
2019 || operand_equal_p (base_2, biv->base, 0))
2020 {
2021 iv->have_address_use = true;
2022 data->bivs_not_used_in_addr--;
2023 }
2024 }
2025 }
2026
2027 /* Cumulates the steps of indices into DATA and replaces their values with the
2028 initial ones. Returns false when the value of the index cannot be determined.
2029 Callback for for_each_index. */
2030
2031 struct ifs_ivopts_data
2032 {
2033 struct ivopts_data *ivopts_data;
2034 gimple *stmt;
2035 tree step;
2036 };
2037
2038 static bool
2039 idx_find_step (tree base, tree *idx, void *data)
2040 {
2041 struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
2042 struct iv *iv;
2043 bool use_overflow_semantics = false;
2044 tree step, iv_base, iv_step, lbound, off;
2045 struct loop *loop = dta->ivopts_data->current_loop;
2046
2047 /* If base is a component ref, require that the offset of the reference
2048 be invariant. */
2049 if (TREE_CODE (base) == COMPONENT_REF)
2050 {
2051 off = component_ref_field_offset (base);
2052 return expr_invariant_in_loop_p (loop, off);
2053 }
2054
2055 /* If base is array, first check whether we will be able to move the
2056 reference out of the loop (in order to take its address in strength
2057 reduction). In order for this to work we need both lower bound
2058 and step to be loop invariants. */
2059 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2060 {
2061 /* Moreover, for a range, the size needs to be invariant as well. */
2062 if (TREE_CODE (base) == ARRAY_RANGE_REF
2063 && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
2064 return false;
2065
2066 step = array_ref_element_size (base);
2067 lbound = array_ref_low_bound (base);
2068
2069 if (!expr_invariant_in_loop_p (loop, step)
2070 || !expr_invariant_in_loop_p (loop, lbound))
2071 return false;
2072 }
2073
2074 if (TREE_CODE (*idx) != SSA_NAME)
2075 return true;
2076
2077 iv = get_iv (dta->ivopts_data, *idx);
2078 if (!iv)
2079 return false;
2080
2081 /* XXX We produce for a base of *D42 with iv->base being &x[0]
2082 *&x[0], which is not folded and does not trigger the
2083 ARRAY_REF path below. */
2084 *idx = iv->base;
2085
2086 if (integer_zerop (iv->step))
2087 return true;
2088
2089 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2090 {
2091 step = array_ref_element_size (base);
2092
2093 /* We only handle addresses whose step is an integer constant. */
2094 if (TREE_CODE (step) != INTEGER_CST)
2095 return false;
2096 }
2097 else
2098 /* The step for pointer arithmetics already is 1 byte. */
2099 step = size_one_node;
2100
2101 iv_base = iv->base;
2102 iv_step = iv->step;
2103 if (iv->no_overflow && nowrap_type_p (TREE_TYPE (iv_step)))
2104 use_overflow_semantics = true;
2105
2106 if (!convert_affine_scev (dta->ivopts_data->current_loop,
2107 sizetype, &iv_base, &iv_step, dta->stmt,
2108 use_overflow_semantics))
2109 {
2110 /* The index might wrap. */
2111 return false;
2112 }
2113
2114 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
2115 dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
2116
2117 if (dta->ivopts_data->bivs_not_used_in_addr)
2118 {
2119 if (!iv->biv_p)
2120 iv = find_deriving_biv_for_expr (dta->ivopts_data, iv->ssa_name);
2121
2122 record_biv_for_address_use (dta->ivopts_data, iv);
2123 }
2124 return true;
2125 }
2126
2127 /* Records use in index IDX. Callback for for_each_index. Ivopts data
2128 object is passed to it in DATA. */
2129
2130 static bool
2131 idx_record_use (tree base, tree *idx,
2132 void *vdata)
2133 {
2134 struct ivopts_data *data = (struct ivopts_data *) vdata;
2135 find_interesting_uses_op (data, *idx);
2136 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2137 {
2138 find_interesting_uses_op (data, array_ref_element_size (base));
2139 find_interesting_uses_op (data, array_ref_low_bound (base));
2140 }
2141 return true;
2142 }
2143
2144 /* If we can prove that TOP = cst * BOT for some constant cst,
2145 store cst to MUL and return true. Otherwise return false.
2146 The returned value is always sign-extended, regardless of the
2147 signedness of TOP and BOT. */
2148
2149 static bool
2150 constant_multiple_of (tree top, tree bot, widest_int *mul)
2151 {
2152 tree mby;
2153 enum tree_code code;
2154 unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
2155 widest_int res, p0, p1;
2156
2157 STRIP_NOPS (top);
2158 STRIP_NOPS (bot);
2159
2160 if (operand_equal_p (top, bot, 0))
2161 {
2162 *mul = 1;
2163 return true;
2164 }
2165
2166 code = TREE_CODE (top);
2167 switch (code)
2168 {
2169 case MULT_EXPR:
2170 mby = TREE_OPERAND (top, 1);
2171 if (TREE_CODE (mby) != INTEGER_CST)
2172 return false;
2173
2174 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
2175 return false;
2176
2177 *mul = wi::sext (res * wi::to_widest (mby), precision);
2178 return true;
2179
2180 case PLUS_EXPR:
2181 case MINUS_EXPR:
2182 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
2183 || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
2184 return false;
2185
2186 if (code == MINUS_EXPR)
2187 p1 = -p1;
2188 *mul = wi::sext (p0 + p1, precision);
2189 return true;
2190
2191 case INTEGER_CST:
2192 if (TREE_CODE (bot) != INTEGER_CST)
2193 return false;
2194
2195 p0 = widest_int::from (wi::to_wide (top), SIGNED);
2196 p1 = widest_int::from (wi::to_wide (bot), SIGNED);
2197 if (p1 == 0)
2198 return false;
2199 *mul = wi::sext (wi::divmod_trunc (p0, p1, SIGNED, &res), precision);
2200 return res == 0;
2201
2202 default:
2203 if (POLY_INT_CST_P (top)
2204 && POLY_INT_CST_P (bot)
2205 && constant_multiple_p (wi::to_poly_widest (top),
2206 wi::to_poly_widest (bot), mul))
2207 return true;
2208
2209 return false;
2210 }
2211 }
2212
2213 /* Return true if memory reference REF with step STEP may be unaligned. */
2214
2215 static bool
2216 may_be_unaligned_p (tree ref, tree step)
2217 {
2218 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
2219 thus they are not misaligned. */
2220 if (TREE_CODE (ref) == TARGET_MEM_REF)
2221 return false;
2222
2223 unsigned int align = TYPE_ALIGN (TREE_TYPE (ref));
2224 if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))) > align)
2225 align = GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref)));
2226
2227 unsigned HOST_WIDE_INT bitpos;
2228 unsigned int ref_align;
2229 get_object_alignment_1 (ref, &ref_align, &bitpos);
2230 if (ref_align < align
2231 || (bitpos % align) != 0
2232 || (bitpos % BITS_PER_UNIT) != 0)
2233 return true;
2234
2235 unsigned int trailing_zeros = tree_ctz (step);
2236 if (trailing_zeros < HOST_BITS_PER_INT
2237 && (1U << trailing_zeros) * BITS_PER_UNIT < align)
2238 return true;
2239
2240 return false;
2241 }
2242
2243 /* Return true if EXPR may be non-addressable. */
2244
2245 bool
2246 may_be_nonaddressable_p (tree expr)
2247 {
2248 switch (TREE_CODE (expr))
2249 {
2250 case VAR_DECL:
2251 /* Check if it's a register variable. */
2252 return DECL_HARD_REGISTER (expr);
2253
2254 case TARGET_MEM_REF:
2255 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
2256 target, thus they are always addressable. */
2257 return false;
2258
2259 case MEM_REF:
2260 /* Likewise for MEM_REFs, modulo the storage order. */
2261 return REF_REVERSE_STORAGE_ORDER (expr);
2262
2263 case BIT_FIELD_REF:
2264 if (REF_REVERSE_STORAGE_ORDER (expr))
2265 return true;
2266 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2267
2268 case COMPONENT_REF:
2269 if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2270 return true;
2271 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
2272 || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2273
2274 case ARRAY_REF:
2275 case ARRAY_RANGE_REF:
2276 if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2277 return true;
2278 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2279
2280 case VIEW_CONVERT_EXPR:
2281 /* This kind of view-conversions may wrap non-addressable objects
2282 and make them look addressable. After some processing the
2283 non-addressability may be uncovered again, causing ADDR_EXPRs
2284 of inappropriate objects to be built. */
2285 if (is_gimple_reg (TREE_OPERAND (expr, 0))
2286 || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
2287 return true;
2288 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2289
2290 CASE_CONVERT:
2291 return true;
2292
2293 default:
2294 break;
2295 }
2296
2297 return false;
2298 }
2299
2300 /* Finds addresses in *OP_P inside STMT. */
2301
2302 static void
2303 find_interesting_uses_address (struct ivopts_data *data, gimple *stmt,
2304 tree *op_p)
2305 {
2306 tree base = *op_p, step = size_zero_node;
2307 struct iv *civ;
2308 struct ifs_ivopts_data ifs_ivopts_data;
2309
2310 /* Do not play with volatile memory references. A bit too conservative,
2311 perhaps, but safe. */
2312 if (gimple_has_volatile_ops (stmt))
2313 goto fail;
2314
2315 /* Ignore bitfields for now. Not really something terribly complicated
2316 to handle. TODO. */
2317 if (TREE_CODE (base) == BIT_FIELD_REF)
2318 goto fail;
2319
2320 base = unshare_expr (base);
2321
2322 if (TREE_CODE (base) == TARGET_MEM_REF)
2323 {
2324 tree type = build_pointer_type (TREE_TYPE (base));
2325 tree astep;
2326
2327 if (TMR_BASE (base)
2328 && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
2329 {
2330 civ = get_iv (data, TMR_BASE (base));
2331 if (!civ)
2332 goto fail;
2333
2334 TMR_BASE (base) = civ->base;
2335 step = civ->step;
2336 }
2337 if (TMR_INDEX2 (base)
2338 && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
2339 {
2340 civ = get_iv (data, TMR_INDEX2 (base));
2341 if (!civ)
2342 goto fail;
2343
2344 TMR_INDEX2 (base) = civ->base;
2345 step = civ->step;
2346 }
2347 if (TMR_INDEX (base)
2348 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
2349 {
2350 civ = get_iv (data, TMR_INDEX (base));
2351 if (!civ)
2352 goto fail;
2353
2354 TMR_INDEX (base) = civ->base;
2355 astep = civ->step;
2356
2357 if (astep)
2358 {
2359 if (TMR_STEP (base))
2360 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
2361
2362 step = fold_build2 (PLUS_EXPR, type, step, astep);
2363 }
2364 }
2365
2366 if (integer_zerop (step))
2367 goto fail;
2368 base = tree_mem_ref_addr (type, base);
2369 }
2370 else
2371 {
2372 ifs_ivopts_data.ivopts_data = data;
2373 ifs_ivopts_data.stmt = stmt;
2374 ifs_ivopts_data.step = size_zero_node;
2375 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
2376 || integer_zerop (ifs_ivopts_data.step))
2377 goto fail;
2378 step = ifs_ivopts_data.step;
2379
2380 /* Check that the base expression is addressable. This needs
2381 to be done after substituting bases of IVs into it. */
2382 if (may_be_nonaddressable_p (base))
2383 goto fail;
2384
2385 /* Moreover, on strict alignment platforms, check that it is
2386 sufficiently aligned. */
2387 if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
2388 goto fail;
2389
2390 base = build_fold_addr_expr (base);
2391
2392 /* Substituting bases of IVs into the base expression might
2393 have caused folding opportunities. */
2394 if (TREE_CODE (base) == ADDR_EXPR)
2395 {
2396 tree *ref = &TREE_OPERAND (base, 0);
2397 while (handled_component_p (*ref))
2398 ref = &TREE_OPERAND (*ref, 0);
2399 if (TREE_CODE (*ref) == MEM_REF)
2400 {
2401 tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
2402 TREE_OPERAND (*ref, 0),
2403 TREE_OPERAND (*ref, 1));
2404 if (tem)
2405 *ref = tem;
2406 }
2407 }
2408 }
2409
2410 civ = alloc_iv (data, base, step);
2411 /* Fail if base object of this memory reference is unknown. */
2412 if (civ->base_object == NULL_TREE)
2413 goto fail;
2414
2415 record_group_use (data, op_p, civ, stmt, USE_REF_ADDRESS, TREE_TYPE (*op_p));
2416 return;
2417
2418 fail:
2419 for_each_index (op_p, idx_record_use, data);
2420 }
2421
2422 /* Finds and records invariants used in STMT. */
2423
2424 static void
2425 find_invariants_stmt (struct ivopts_data *data, gimple *stmt)
2426 {
2427 ssa_op_iter iter;
2428 use_operand_p use_p;
2429 tree op;
2430
2431 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2432 {
2433 op = USE_FROM_PTR (use_p);
2434 record_invariant (data, op, false);
2435 }
2436 }
2437
2438 /* CALL calls an internal function. If operand *OP_P will become an
2439 address when the call is expanded, return the type of the memory
2440 being addressed, otherwise return null. */
2441
2442 static tree
2443 get_mem_type_for_internal_fn (gcall *call, tree *op_p)
2444 {
2445 switch (gimple_call_internal_fn (call))
2446 {
2447 case IFN_MASK_LOAD:
2448 if (op_p == gimple_call_arg_ptr (call, 0))
2449 return TREE_TYPE (gimple_call_lhs (call));
2450 return NULL_TREE;
2451
2452 case IFN_MASK_STORE:
2453 if (op_p == gimple_call_arg_ptr (call, 0))
2454 return TREE_TYPE (gimple_call_arg (call, 3));
2455 return NULL_TREE;
2456
2457 default:
2458 return NULL_TREE;
2459 }
2460 }
2461
2462 /* IV is a (non-address) iv that describes operand *OP_P of STMT.
2463 Return true if the operand will become an address when STMT
2464 is expanded and record the associated address use if so. */
2465
2466 static bool
2467 find_address_like_use (struct ivopts_data *data, gimple *stmt, tree *op_p,
2468 struct iv *iv)
2469 {
2470 /* Fail if base object of this memory reference is unknown. */
2471 if (iv->base_object == NULL_TREE)
2472 return false;
2473
2474 tree mem_type = NULL_TREE;
2475 if (gcall *call = dyn_cast <gcall *> (stmt))
2476 if (gimple_call_internal_p (call))
2477 mem_type = get_mem_type_for_internal_fn (call, op_p);
2478 if (mem_type)
2479 {
2480 iv = alloc_iv (data, iv->base, iv->step);
2481 record_group_use (data, op_p, iv, stmt, USE_PTR_ADDRESS, mem_type);
2482 return true;
2483 }
2484 return false;
2485 }
2486
2487 /* Finds interesting uses of induction variables in the statement STMT. */
2488
2489 static void
2490 find_interesting_uses_stmt (struct ivopts_data *data, gimple *stmt)
2491 {
2492 struct iv *iv;
2493 tree op, *lhs, *rhs;
2494 ssa_op_iter iter;
2495 use_operand_p use_p;
2496 enum tree_code code;
2497
2498 find_invariants_stmt (data, stmt);
2499
2500 if (gimple_code (stmt) == GIMPLE_COND)
2501 {
2502 find_interesting_uses_cond (data, stmt);
2503 return;
2504 }
2505
2506 if (is_gimple_assign (stmt))
2507 {
2508 lhs = gimple_assign_lhs_ptr (stmt);
2509 rhs = gimple_assign_rhs1_ptr (stmt);
2510
2511 if (TREE_CODE (*lhs) == SSA_NAME)
2512 {
2513 /* If the statement defines an induction variable, the uses are not
2514 interesting by themselves. */
2515
2516 iv = get_iv (data, *lhs);
2517
2518 if (iv && !integer_zerop (iv->step))
2519 return;
2520 }
2521
2522 code = gimple_assign_rhs_code (stmt);
2523 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
2524 && (REFERENCE_CLASS_P (*rhs)
2525 || is_gimple_val (*rhs)))
2526 {
2527 if (REFERENCE_CLASS_P (*rhs))
2528 find_interesting_uses_address (data, stmt, rhs);
2529 else
2530 find_interesting_uses_op (data, *rhs);
2531
2532 if (REFERENCE_CLASS_P (*lhs))
2533 find_interesting_uses_address (data, stmt, lhs);
2534 return;
2535 }
2536 else if (TREE_CODE_CLASS (code) == tcc_comparison)
2537 {
2538 find_interesting_uses_cond (data, stmt);
2539 return;
2540 }
2541
2542 /* TODO -- we should also handle address uses of type
2543
2544 memory = call (whatever);
2545
2546 and
2547
2548 call (memory). */
2549 }
2550
2551 if (gimple_code (stmt) == GIMPLE_PHI
2552 && gimple_bb (stmt) == data->current_loop->header)
2553 {
2554 iv = get_iv (data, PHI_RESULT (stmt));
2555
2556 if (iv && !integer_zerop (iv->step))
2557 return;
2558 }
2559
2560 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2561 {
2562 op = USE_FROM_PTR (use_p);
2563
2564 if (TREE_CODE (op) != SSA_NAME)
2565 continue;
2566
2567 iv = get_iv (data, op);
2568 if (!iv)
2569 continue;
2570
2571 if (!find_address_like_use (data, stmt, use_p->use, iv))
2572 find_interesting_uses_op (data, op);
2573 }
2574 }
2575
2576 /* Finds interesting uses of induction variables outside of loops
2577 on loop exit edge EXIT. */
2578
2579 static void
2580 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
2581 {
2582 gphi *phi;
2583 gphi_iterator psi;
2584 tree def;
2585
2586 for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
2587 {
2588 phi = psi.phi ();
2589 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2590 if (!virtual_operand_p (def))
2591 find_interesting_uses_op (data, def);
2592 }
2593 }
2594
2595 /* Return TRUE if OFFSET is within the range of [base + offset] addressing
2596 mode for memory reference represented by USE. */
2597
2598 static GTY (()) vec<rtx, va_gc> *addr_list;
2599
2600 static bool
2601 addr_offset_valid_p (struct iv_use *use, poly_int64 offset)
2602 {
2603 rtx reg, addr;
2604 unsigned list_index;
2605 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
2606 machine_mode addr_mode, mem_mode = TYPE_MODE (use->mem_type);
2607
2608 list_index = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
2609 if (list_index >= vec_safe_length (addr_list))
2610 vec_safe_grow_cleared (addr_list, list_index + MAX_MACHINE_MODE);
2611
2612 addr = (*addr_list)[list_index];
2613 if (!addr)
2614 {
2615 addr_mode = targetm.addr_space.address_mode (as);
2616 reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
2617 addr = gen_rtx_fmt_ee (PLUS, addr_mode, reg, NULL_RTX);
2618 (*addr_list)[list_index] = addr;
2619 }
2620 else
2621 addr_mode = GET_MODE (addr);
2622
2623 XEXP (addr, 1) = gen_int_mode (offset, addr_mode);
2624 return (memory_address_addr_space_p (mem_mode, addr, as));
2625 }
2626
2627 /* Comparison function to sort group in ascending order of addr_offset. */
2628
2629 static int
2630 group_compare_offset (const void *a, const void *b)
2631 {
2632 const struct iv_use *const *u1 = (const struct iv_use *const *) a;
2633 const struct iv_use *const *u2 = (const struct iv_use *const *) b;
2634
2635 return compare_sizes_for_sort ((*u1)->addr_offset, (*u2)->addr_offset);
2636 }
2637
2638 /* Check if small groups should be split. Return true if no group
2639 contains more than two uses with distinct addr_offsets. Return
2640 false otherwise. We want to split such groups because:
2641
2642 1) Small groups don't have much benefit and may interfer with
2643 general candidate selection.
2644 2) Size for problem with only small groups is usually small and
2645 general algorithm can handle it well.
2646
2647 TODO -- Above claim may not hold when we want to merge memory
2648 accesses with conseuctive addresses. */
2649
2650 static bool
2651 split_small_address_groups_p (struct ivopts_data *data)
2652 {
2653 unsigned int i, j, distinct = 1;
2654 struct iv_use *pre;
2655 struct iv_group *group;
2656
2657 for (i = 0; i < data->vgroups.length (); i++)
2658 {
2659 group = data->vgroups[i];
2660 if (group->vuses.length () == 1)
2661 continue;
2662
2663 gcc_assert (address_p (group->type));
2664 if (group->vuses.length () == 2)
2665 {
2666 if (compare_sizes_for_sort (group->vuses[0]->addr_offset,
2667 group->vuses[1]->addr_offset) > 0)
2668 std::swap (group->vuses[0], group->vuses[1]);
2669 }
2670 else
2671 group->vuses.qsort (group_compare_offset);
2672
2673 if (distinct > 2)
2674 continue;
2675
2676 distinct = 1;
2677 for (pre = group->vuses[0], j = 1; j < group->vuses.length (); j++)
2678 {
2679 if (maybe_ne (group->vuses[j]->addr_offset, pre->addr_offset))
2680 {
2681 pre = group->vuses[j];
2682 distinct++;
2683 }
2684
2685 if (distinct > 2)
2686 break;
2687 }
2688 }
2689
2690 return (distinct <= 2);
2691 }
2692
2693 /* For each group of address type uses, this function further groups
2694 these uses according to the maximum offset supported by target's
2695 [base + offset] addressing mode. */
2696
2697 static void
2698 split_address_groups (struct ivopts_data *data)
2699 {
2700 unsigned int i, j;
2701 /* Always split group. */
2702 bool split_p = split_small_address_groups_p (data);
2703
2704 for (i = 0; i < data->vgroups.length (); i++)
2705 {
2706 struct iv_group *new_group = NULL;
2707 struct iv_group *group = data->vgroups[i];
2708 struct iv_use *use = group->vuses[0];
2709
2710 use->id = 0;
2711 use->group_id = group->id;
2712 if (group->vuses.length () == 1)
2713 continue;
2714
2715 gcc_assert (address_p (use->type));
2716
2717 for (j = 1; j < group->vuses.length ();)
2718 {
2719 struct iv_use *next = group->vuses[j];
2720 poly_int64 offset = next->addr_offset - use->addr_offset;
2721
2722 /* Split group if aksed to, or the offset against the first
2723 use can't fit in offset part of addressing mode. IV uses
2724 having the same offset are still kept in one group. */
2725 if (maybe_ne (offset, 0)
2726 && (split_p || !addr_offset_valid_p (use, offset)))
2727 {
2728 if (!new_group)
2729 new_group = record_group (data, group->type);
2730 group->vuses.ordered_remove (j);
2731 new_group->vuses.safe_push (next);
2732 continue;
2733 }
2734
2735 next->id = j;
2736 next->group_id = group->id;
2737 j++;
2738 }
2739 }
2740 }
2741
2742 /* Finds uses of the induction variables that are interesting. */
2743
2744 static void
2745 find_interesting_uses (struct ivopts_data *data)
2746 {
2747 basic_block bb;
2748 gimple_stmt_iterator bsi;
2749 basic_block *body = get_loop_body (data->current_loop);
2750 unsigned i;
2751 edge e;
2752
2753 for (i = 0; i < data->current_loop->num_nodes; i++)
2754 {
2755 edge_iterator ei;
2756 bb = body[i];
2757
2758 FOR_EACH_EDGE (e, ei, bb->succs)
2759 if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2760 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2761 find_interesting_uses_outside (data, e);
2762
2763 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2764 find_interesting_uses_stmt (data, gsi_stmt (bsi));
2765 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2766 if (!is_gimple_debug (gsi_stmt (bsi)))
2767 find_interesting_uses_stmt (data, gsi_stmt (bsi));
2768 }
2769 free (body);
2770
2771 split_address_groups (data);
2772
2773 if (dump_file && (dump_flags & TDF_DETAILS))
2774 {
2775 fprintf (dump_file, "\n<IV Groups>:\n");
2776 dump_groups (dump_file, data);
2777 fprintf (dump_file, "\n");
2778 }
2779 }
2780
2781 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
2782 is true, assume we are inside an address. If TOP_COMPREF is true, assume
2783 we are at the top-level of the processed address. */
2784
2785 static tree
2786 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2787 poly_int64 *offset)
2788 {
2789 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2790 enum tree_code code;
2791 tree type, orig_type = TREE_TYPE (expr);
2792 poly_int64 off0, off1;
2793 HOST_WIDE_INT st;
2794 tree orig_expr = expr;
2795
2796 STRIP_NOPS (expr);
2797
2798 type = TREE_TYPE (expr);
2799 code = TREE_CODE (expr);
2800 *offset = 0;
2801
2802 switch (code)
2803 {
2804 case POINTER_PLUS_EXPR:
2805 case PLUS_EXPR:
2806 case MINUS_EXPR:
2807 op0 = TREE_OPERAND (expr, 0);
2808 op1 = TREE_OPERAND (expr, 1);
2809
2810 op0 = strip_offset_1 (op0, false, false, &off0);
2811 op1 = strip_offset_1 (op1, false, false, &off1);
2812
2813 *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2814 if (op0 == TREE_OPERAND (expr, 0)
2815 && op1 == TREE_OPERAND (expr, 1))
2816 return orig_expr;
2817
2818 if (integer_zerop (op1))
2819 expr = op0;
2820 else if (integer_zerop (op0))
2821 {
2822 if (code == MINUS_EXPR)
2823 expr = fold_build1 (NEGATE_EXPR, type, op1);
2824 else
2825 expr = op1;
2826 }
2827 else
2828 expr = fold_build2 (code, type, op0, op1);
2829
2830 return fold_convert (orig_type, expr);
2831
2832 case MULT_EXPR:
2833 op1 = TREE_OPERAND (expr, 1);
2834 if (!cst_and_fits_in_hwi (op1))
2835 return orig_expr;
2836
2837 op0 = TREE_OPERAND (expr, 0);
2838 op0 = strip_offset_1 (op0, false, false, &off0);
2839 if (op0 == TREE_OPERAND (expr, 0))
2840 return orig_expr;
2841
2842 *offset = off0 * int_cst_value (op1);
2843 if (integer_zerop (op0))
2844 expr = op0;
2845 else
2846 expr = fold_build2 (MULT_EXPR, type, op0, op1);
2847
2848 return fold_convert (orig_type, expr);
2849
2850 case ARRAY_REF:
2851 case ARRAY_RANGE_REF:
2852 if (!inside_addr)
2853 return orig_expr;
2854
2855 step = array_ref_element_size (expr);
2856 if (!cst_and_fits_in_hwi (step))
2857 break;
2858
2859 st = int_cst_value (step);
2860 op1 = TREE_OPERAND (expr, 1);
2861 op1 = strip_offset_1 (op1, false, false, &off1);
2862 *offset = off1 * st;
2863
2864 if (top_compref
2865 && integer_zerop (op1))
2866 {
2867 /* Strip the component reference completely. */
2868 op0 = TREE_OPERAND (expr, 0);
2869 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2870 *offset += off0;
2871 return op0;
2872 }
2873 break;
2874
2875 case COMPONENT_REF:
2876 {
2877 tree field;
2878
2879 if (!inside_addr)
2880 return orig_expr;
2881
2882 tmp = component_ref_field_offset (expr);
2883 field = TREE_OPERAND (expr, 1);
2884 if (top_compref
2885 && cst_and_fits_in_hwi (tmp)
2886 && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2887 {
2888 HOST_WIDE_INT boffset, abs_off;
2889
2890 /* Strip the component reference completely. */
2891 op0 = TREE_OPERAND (expr, 0);
2892 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2893 boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2894 abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2895 if (boffset < 0)
2896 abs_off = -abs_off;
2897
2898 *offset = off0 + int_cst_value (tmp) + abs_off;
2899 return op0;
2900 }
2901 }
2902 break;
2903
2904 case ADDR_EXPR:
2905 op0 = TREE_OPERAND (expr, 0);
2906 op0 = strip_offset_1 (op0, true, true, &off0);
2907 *offset += off0;
2908
2909 if (op0 == TREE_OPERAND (expr, 0))
2910 return orig_expr;
2911
2912 expr = build_fold_addr_expr (op0);
2913 return fold_convert (orig_type, expr);
2914
2915 case MEM_REF:
2916 /* ??? Offset operand? */
2917 inside_addr = false;
2918 break;
2919
2920 default:
2921 if (ptrdiff_tree_p (expr, offset) && maybe_ne (*offset, 0))
2922 return build_int_cst (orig_type, 0);
2923 return orig_expr;
2924 }
2925
2926 /* Default handling of expressions for that we want to recurse into
2927 the first operand. */
2928 op0 = TREE_OPERAND (expr, 0);
2929 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2930 *offset += off0;
2931
2932 if (op0 == TREE_OPERAND (expr, 0)
2933 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2934 return orig_expr;
2935
2936 expr = copy_node (expr);
2937 TREE_OPERAND (expr, 0) = op0;
2938 if (op1)
2939 TREE_OPERAND (expr, 1) = op1;
2940
2941 /* Inside address, we might strip the top level component references,
2942 thus changing type of the expression. Handling of ADDR_EXPR
2943 will fix that. */
2944 expr = fold_convert (orig_type, expr);
2945
2946 return expr;
2947 }
2948
2949 /* Strips constant offsets from EXPR and stores them to OFFSET. */
2950
2951 tree
2952 strip_offset (tree expr, poly_uint64_pod *offset)
2953 {
2954 poly_int64 off;
2955 tree core = strip_offset_1 (expr, false, false, &off);
2956 *offset = off;
2957 return core;
2958 }
2959
2960 /* Returns variant of TYPE that can be used as base for different uses.
2961 We return unsigned type with the same precision, which avoids problems
2962 with overflows. */
2963
2964 static tree
2965 generic_type_for (tree type)
2966 {
2967 if (POINTER_TYPE_P (type))
2968 return unsigned_type_for (type);
2969
2970 if (TYPE_UNSIGNED (type))
2971 return type;
2972
2973 return unsigned_type_for (type);
2974 }
2975
2976 /* Private data for walk_tree. */
2977
2978 struct walk_tree_data
2979 {
2980 bitmap *inv_vars;
2981 struct ivopts_data *idata;
2982 };
2983
2984 /* Callback function for walk_tree, it records invariants and symbol
2985 reference in *EXPR_P. DATA is the structure storing result info. */
2986
2987 static tree
2988 find_inv_vars_cb (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2989 {
2990 tree op = *expr_p;
2991 struct version_info *info;
2992 struct walk_tree_data *wdata = (struct walk_tree_data*) data;
2993
2994 if (TREE_CODE (op) != SSA_NAME)
2995 return NULL_TREE;
2996
2997 info = name_info (wdata->idata, op);
2998 /* Because we expand simple operations when finding IVs, loop invariant
2999 variable that isn't referred by the original loop could be used now.
3000 Record such invariant variables here. */
3001 if (!info->iv)
3002 {
3003 struct ivopts_data *idata = wdata->idata;
3004 basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (op));
3005
3006 if (!bb || !flow_bb_inside_loop_p (idata->current_loop, bb))
3007 {
3008 set_iv (idata, op, op, build_int_cst (TREE_TYPE (op), 0), true);
3009 record_invariant (idata, op, false);
3010 }
3011 }
3012 if (!info->inv_id || info->has_nonlin_use)
3013 return NULL_TREE;
3014
3015 if (!*wdata->inv_vars)
3016 *wdata->inv_vars = BITMAP_ALLOC (NULL);
3017 bitmap_set_bit (*wdata->inv_vars, info->inv_id);
3018
3019 return NULL_TREE;
3020 }
3021
3022 /* Records invariants in *EXPR_P. INV_VARS is the bitmap to that we should
3023 store it. */
3024
3025 static inline void
3026 find_inv_vars (struct ivopts_data *data, tree *expr_p, bitmap *inv_vars)
3027 {
3028 struct walk_tree_data wdata;
3029
3030 if (!inv_vars)
3031 return;
3032
3033 wdata.idata = data;
3034 wdata.inv_vars = inv_vars;
3035 walk_tree (expr_p, find_inv_vars_cb, &wdata, NULL);
3036 }
3037
3038 /* Get entry from invariant expr hash table for INV_EXPR. New entry
3039 will be recorded if it doesn't exist yet. Given below two exprs:
3040 inv_expr + cst1, inv_expr + cst2
3041 It's hard to make decision whether constant part should be stripped
3042 or not. We choose to not strip based on below facts:
3043 1) We need to count ADD cost for constant part if it's stripped,
3044 which isn't always trivial where this functions is called.
3045 2) Stripping constant away may be conflict with following loop
3046 invariant hoisting pass.
3047 3) Not stripping constant away results in more invariant exprs,
3048 which usually leads to decision preferring lower reg pressure. */
3049
3050 static iv_inv_expr_ent *
3051 get_loop_invariant_expr (struct ivopts_data *data, tree inv_expr)
3052 {
3053 STRIP_NOPS (inv_expr);
3054
3055 if (poly_int_tree_p (inv_expr)
3056 || TREE_CODE (inv_expr) == SSA_NAME)
3057 return NULL;
3058
3059 /* Don't strip constant part away as we used to. */
3060
3061 /* Stores EXPR in DATA->inv_expr_tab, return pointer to iv_inv_expr_ent. */
3062 struct iv_inv_expr_ent ent;
3063 ent.expr = inv_expr;
3064 ent.hash = iterative_hash_expr (inv_expr, 0);
3065 struct iv_inv_expr_ent **slot = data->inv_expr_tab->find_slot (&ent, INSERT);
3066
3067 if (!*slot)
3068 {
3069 *slot = XNEW (struct iv_inv_expr_ent);
3070 (*slot)->expr = inv_expr;
3071 (*slot)->hash = ent.hash;
3072 (*slot)->id = ++data->max_inv_expr_id;
3073 }
3074
3075 return *slot;
3076 }
3077
3078 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
3079 position to POS. If USE is not NULL, the candidate is set as related to
3080 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
3081 replacement of the final value of the iv by a direct computation. */
3082
3083 static struct iv_cand *
3084 add_candidate_1 (struct ivopts_data *data,
3085 tree base, tree step, bool important, enum iv_position pos,
3086 struct iv_use *use, gimple *incremented_at,
3087 struct iv *orig_iv = NULL)
3088 {
3089 unsigned i;
3090 struct iv_cand *cand = NULL;
3091 tree type, orig_type;
3092
3093 gcc_assert (base && step);
3094
3095 /* -fkeep-gc-roots-live means that we have to keep a real pointer
3096 live, but the ivopts code may replace a real pointer with one
3097 pointing before or after the memory block that is then adjusted
3098 into the memory block during the loop. FIXME: It would likely be
3099 better to actually force the pointer live and still use ivopts;
3100 for example, it would be enough to write the pointer into memory
3101 and keep it there until after the loop. */
3102 if (flag_keep_gc_roots_live && POINTER_TYPE_P (TREE_TYPE (base)))
3103 return NULL;
3104
3105 /* For non-original variables, make sure their values are computed in a type
3106 that does not invoke undefined behavior on overflows (since in general,
3107 we cannot prove that these induction variables are non-wrapping). */
3108 if (pos != IP_ORIGINAL)
3109 {
3110 orig_type = TREE_TYPE (base);
3111 type = generic_type_for (orig_type);
3112 if (type != orig_type)
3113 {
3114 base = fold_convert (type, base);
3115 step = fold_convert (type, step);
3116 }
3117 }
3118
3119 for (i = 0; i < data->vcands.length (); i++)
3120 {
3121 cand = data->vcands[i];
3122
3123 if (cand->pos != pos)
3124 continue;
3125
3126 if (cand->incremented_at != incremented_at
3127 || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
3128 && cand->ainc_use != use))
3129 continue;
3130
3131 if (operand_equal_p (base, cand->iv->base, 0)
3132 && operand_equal_p (step, cand->iv->step, 0)
3133 && (TYPE_PRECISION (TREE_TYPE (base))
3134 == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
3135 break;
3136 }
3137
3138 if (i == data->vcands.length ())
3139 {
3140 cand = XCNEW (struct iv_cand);
3141 cand->id = i;
3142 cand->iv = alloc_iv (data, base, step);
3143 cand->pos = pos;
3144 if (pos != IP_ORIGINAL)
3145 {
3146 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
3147 cand->var_after = cand->var_before;
3148 }
3149 cand->important = important;
3150 cand->incremented_at = incremented_at;
3151 data->vcands.safe_push (cand);
3152
3153 if (!poly_int_tree_p (step))
3154 {
3155 find_inv_vars (data, &step, &cand->inv_vars);
3156
3157 iv_inv_expr_ent *inv_expr = get_loop_invariant_expr (data, step);
3158 /* Share bitmap between inv_vars and inv_exprs for cand. */
3159 if (inv_expr != NULL)
3160 {
3161 cand->inv_exprs = cand->inv_vars;
3162 cand->inv_vars = NULL;
3163 if (cand->inv_exprs)
3164 bitmap_clear (cand->inv_exprs);
3165 else
3166 cand->inv_exprs = BITMAP_ALLOC (NULL);
3167
3168 bitmap_set_bit (cand->inv_exprs, inv_expr->id);
3169 }
3170 }
3171
3172 if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
3173 cand->ainc_use = use;
3174 else
3175 cand->ainc_use = NULL;
3176
3177 cand->orig_iv = orig_iv;
3178 if (dump_file && (dump_flags & TDF_DETAILS))
3179 dump_cand (dump_file, cand);
3180 }
3181
3182 cand->important |= important;
3183
3184 /* Relate candidate to the group for which it is added. */
3185 if (use)
3186 bitmap_set_bit (data->vgroups[use->group_id]->related_cands, i);
3187
3188 return cand;
3189 }
3190
3191 /* Returns true if incrementing the induction variable at the end of the LOOP
3192 is allowed.
3193
3194 The purpose is to avoid splitting latch edge with a biv increment, thus
3195 creating a jump, possibly confusing other optimization passes and leaving
3196 less freedom to scheduler. So we allow IP_END only if IP_NORMAL is not
3197 available (so we do not have a better alternative), or if the latch edge
3198 is already nonempty. */
3199
3200 static bool
3201 allow_ip_end_pos_p (struct loop *loop)
3202 {
3203 if (!ip_normal_pos (loop))
3204 return true;
3205
3206 if (!empty_block_p (ip_end_pos (loop)))
3207 return true;
3208
3209 return false;
3210 }
3211
3212 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
3213 Important field is set to IMPORTANT. */
3214
3215 static void
3216 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
3217 bool important, struct iv_use *use)
3218 {
3219 basic_block use_bb = gimple_bb (use->stmt);
3220 machine_mode mem_mode;
3221 unsigned HOST_WIDE_INT cstepi;
3222
3223 /* If we insert the increment in any position other than the standard
3224 ones, we must ensure that it is incremented once per iteration.
3225 It must not be in an inner nested loop, or one side of an if
3226 statement. */
3227 if (use_bb->loop_father != data->current_loop
3228 || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
3229 || stmt_can_throw_internal (cfun, use->stmt)
3230 || !cst_and_fits_in_hwi (step))
3231 return;
3232
3233 cstepi = int_cst_value (step);
3234
3235 mem_mode = TYPE_MODE (use->mem_type);
3236 if (((USE_LOAD_PRE_INCREMENT (mem_mode)
3237 || USE_STORE_PRE_INCREMENT (mem_mode))
3238 && known_eq (GET_MODE_SIZE (mem_mode), cstepi))
3239 || ((USE_LOAD_PRE_DECREMENT (mem_mode)
3240 || USE_STORE_PRE_DECREMENT (mem_mode))
3241 && known_eq (GET_MODE_SIZE (mem_mode), -cstepi)))
3242 {
3243 enum tree_code code = MINUS_EXPR;
3244 tree new_base;
3245 tree new_step = step;
3246
3247 if (POINTER_TYPE_P (TREE_TYPE (base)))
3248 {
3249 new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
3250 code = POINTER_PLUS_EXPR;
3251 }
3252 else
3253 new_step = fold_convert (TREE_TYPE (base), new_step);
3254 new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
3255 add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
3256 use->stmt);
3257 }
3258 if (((USE_LOAD_POST_INCREMENT (mem_mode)
3259 || USE_STORE_POST_INCREMENT (mem_mode))
3260 && known_eq (GET_MODE_SIZE (mem_mode), cstepi))
3261 || ((USE_LOAD_POST_DECREMENT (mem_mode)
3262 || USE_STORE_POST_DECREMENT (mem_mode))
3263 && known_eq (GET_MODE_SIZE (mem_mode), -cstepi)))
3264 {
3265 add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
3266 use->stmt);
3267 }
3268 }
3269
3270 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
3271 position to POS. If USE is not NULL, the candidate is set as related to
3272 it. The candidate computation is scheduled before exit condition and at
3273 the end of loop. */
3274
3275 static void
3276 add_candidate (struct ivopts_data *data,
3277 tree base, tree step, bool important, struct iv_use *use,
3278 struct iv *orig_iv = NULL)
3279 {
3280 if (ip_normal_pos (data->current_loop))
3281 add_candidate_1 (data, base, step, important,
3282 IP_NORMAL, use, NULL, orig_iv);
3283 if (ip_end_pos (data->current_loop)
3284 && allow_ip_end_pos_p (data->current_loop))
3285 add_candidate_1 (data, base, step, important, IP_END, use, NULL, orig_iv);
3286 }
3287
3288 /* Adds standard iv candidates. */
3289
3290 static void
3291 add_standard_iv_candidates (struct ivopts_data *data)
3292 {
3293 add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
3294
3295 /* The same for a double-integer type if it is still fast enough. */
3296 if (TYPE_PRECISION
3297 (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
3298 && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
3299 add_candidate (data, build_int_cst (long_integer_type_node, 0),
3300 build_int_cst (long_integer_type_node, 1), true, NULL);
3301
3302 /* The same for a double-integer type if it is still fast enough. */
3303 if (TYPE_PRECISION
3304 (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
3305 && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
3306 add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
3307 build_int_cst (long_long_integer_type_node, 1), true, NULL);
3308 }
3309
3310
3311 /* Adds candidates bases on the old induction variable IV. */
3312
3313 static void
3314 add_iv_candidate_for_biv (struct ivopts_data *data, struct iv *iv)
3315 {
3316 gimple *phi;
3317 tree def;
3318 struct iv_cand *cand;
3319
3320 /* Check if this biv is used in address type use. */
3321 if (iv->no_overflow && iv->have_address_use
3322 && INTEGRAL_TYPE_P (TREE_TYPE (iv->base))
3323 && TYPE_PRECISION (TREE_TYPE (iv->base)) < TYPE_PRECISION (sizetype))
3324 {
3325 tree base = fold_convert (sizetype, iv->base);
3326 tree step = fold_convert (sizetype, iv->step);
3327
3328 /* Add iv cand of same precision as index part in TARGET_MEM_REF. */
3329 add_candidate (data, base, step, true, NULL, iv);
3330 /* Add iv cand of the original type only if it has nonlinear use. */
3331 if (iv->nonlin_use)
3332 add_candidate (data, iv->base, iv->step, true, NULL);
3333 }
3334 else
3335 add_candidate (data, iv->base, iv->step, true, NULL);
3336
3337 /* The same, but with initial value zero. */
3338 if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
3339 add_candidate (data, size_int (0), iv->step, true, NULL);
3340 else
3341 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
3342 iv->step, true, NULL);
3343
3344 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
3345 if (gimple_code (phi) == GIMPLE_PHI)
3346 {
3347 /* Additionally record the possibility of leaving the original iv
3348 untouched. */
3349 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
3350 /* Don't add candidate if it's from another PHI node because
3351 it's an affine iv appearing in the form of PEELED_CHREC. */
3352 phi = SSA_NAME_DEF_STMT (def);
3353 if (gimple_code (phi) != GIMPLE_PHI)
3354 {
3355 cand = add_candidate_1 (data,
3356 iv->base, iv->step, true, IP_ORIGINAL, NULL,
3357 SSA_NAME_DEF_STMT (def));
3358 if (cand)
3359 {
3360 cand->var_before = iv->ssa_name;
3361 cand->var_after = def;
3362 }
3363 }
3364 else
3365 gcc_assert (gimple_bb (phi) == data->current_loop->header);
3366 }
3367 }
3368
3369 /* Adds candidates based on the old induction variables. */
3370
3371 static void
3372 add_iv_candidate_for_bivs (struct ivopts_data *data)
3373 {
3374 unsigned i;
3375 struct iv *iv;
3376 bitmap_iterator bi;
3377
3378 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
3379 {
3380 iv = ver_info (data, i)->iv;
3381 if (iv && iv->biv_p && !integer_zerop (iv->step))
3382 add_iv_candidate_for_biv (data, iv);
3383 }
3384 }
3385
3386 /* Record common candidate {BASE, STEP} derived from USE in hashtable. */
3387
3388 static void
3389 record_common_cand (struct ivopts_data *data, tree base,
3390 tree step, struct iv_use *use)
3391 {
3392 struct iv_common_cand ent;
3393 struct iv_common_cand **slot;
3394
3395 ent.base = base;
3396 ent.step = step;
3397 ent.hash = iterative_hash_expr (base, 0);
3398 ent.hash = iterative_hash_expr (step, ent.hash);
3399
3400 slot = data->iv_common_cand_tab->find_slot (&ent, INSERT);
3401 if (*slot == NULL)
3402 {
3403 *slot = new iv_common_cand ();
3404 (*slot)->base = base;
3405 (*slot)->step = step;
3406 (*slot)->uses.create (8);
3407 (*slot)->hash = ent.hash;
3408 data->iv_common_cands.safe_push ((*slot));
3409 }
3410
3411 gcc_assert (use != NULL);
3412 (*slot)->uses.safe_push (use);
3413 return;
3414 }
3415
3416 /* Comparison function used to sort common candidates. */
3417
3418 static int
3419 common_cand_cmp (const void *p1, const void *p2)
3420 {
3421 unsigned n1, n2;
3422 const struct iv_common_cand *const *const ccand1
3423 = (const struct iv_common_cand *const *)p1;
3424 const struct iv_common_cand *const *const ccand2
3425 = (const struct iv_common_cand *const *)p2;
3426
3427 n1 = (*ccand1)->uses.length ();
3428 n2 = (*ccand2)->uses.length ();
3429 return n2 - n1;
3430 }
3431
3432 /* Adds IV candidates based on common candidated recorded. */
3433
3434 static void
3435 add_iv_candidate_derived_from_uses (struct ivopts_data *data)
3436 {
3437 unsigned i, j;
3438 struct iv_cand *cand_1, *cand_2;
3439
3440 data->iv_common_cands.qsort (common_cand_cmp);
3441 for (i = 0; i < data->iv_common_cands.length (); i++)
3442 {
3443 struct iv_common_cand *ptr = data->iv_common_cands[i];
3444
3445 /* Only add IV candidate if it's derived from multiple uses. */
3446 if (ptr->uses.length () <= 1)
3447 break;
3448
3449 cand_1 = NULL;
3450 cand_2 = NULL;
3451 if (ip_normal_pos (data->current_loop))
3452 cand_1 = add_candidate_1 (data, ptr->base, ptr->step,
3453 false, IP_NORMAL, NULL, NULL);
3454
3455 if (ip_end_pos (data->current_loop)
3456 && allow_ip_end_pos_p (data->current_loop))
3457 cand_2 = add_candidate_1 (data, ptr->base, ptr->step,
3458 false, IP_END, NULL, NULL);
3459
3460 /* Bind deriving uses and the new candidates. */
3461 for (j = 0; j < ptr->uses.length (); j++)
3462 {
3463 struct iv_group *group = data->vgroups[ptr->uses[j]->group_id];
3464 if (cand_1)
3465 bitmap_set_bit (group->related_cands, cand_1->id);
3466 if (cand_2)
3467 bitmap_set_bit (group->related_cands, cand_2->id);
3468 }
3469 }
3470
3471 /* Release data since it is useless from this point. */
3472 data->iv_common_cand_tab->empty ();
3473 data->iv_common_cands.truncate (0);
3474 }
3475
3476 /* Adds candidates based on the value of USE's iv. */
3477
3478 static void
3479 add_iv_candidate_for_use (struct ivopts_data *data, struct iv_use *use)
3480 {
3481 poly_uint64 offset;
3482 tree base;
3483 tree basetype;
3484 struct iv *iv = use->iv;
3485
3486 add_candidate (data, iv->base, iv->step, false, use);
3487
3488 /* Record common candidate for use in case it can be shared by others. */
3489 record_common_cand (data, iv->base, iv->step, use);
3490
3491 /* Record common candidate with initial value zero. */
3492 basetype = TREE_TYPE (iv->base);
3493 if (POINTER_TYPE_P (basetype))
3494 basetype = sizetype;
3495 record_common_cand (data, build_int_cst (basetype, 0), iv->step, use);
3496
3497 /* Record common candidate with constant offset stripped in base.
3498 Like the use itself, we also add candidate directly for it. */
3499 base = strip_offset (iv->base, &offset);
3500 if (maybe_ne (offset, 0U) || base != iv->base)
3501 {
3502 record_common_cand (data, base, iv->step, use);
3503 add_candidate (data, base, iv->step, false, use);
3504 }
3505
3506 /* Record common candidate with base_object removed in base. */
3507 base = iv->base;
3508 STRIP_NOPS (base);
3509 if (iv->base_object != NULL && TREE_CODE (base) == POINTER_PLUS_EXPR)
3510 {
3511 tree step = iv->step;
3512
3513 STRIP_NOPS (step);
3514 base = TREE_OPERAND (base, 1);
3515 step = fold_convert (sizetype, step);
3516 record_common_cand (data, base, step, use);
3517 /* Also record common candidate with offset stripped. */
3518 base = strip_offset (base, &offset);
3519 if (maybe_ne (offset, 0U))
3520 record_common_cand (data, base, step, use);
3521 }
3522
3523 /* At last, add auto-incremental candidates. Make such variables
3524 important since other iv uses with same base object may be based
3525 on it. */
3526 if (use != NULL && address_p (use->type))
3527 add_autoinc_candidates (data, iv->base, iv->step, true, use);
3528 }
3529
3530 /* Adds candidates based on the uses. */
3531
3532 static void
3533 add_iv_candidate_for_groups (struct ivopts_data *data)
3534 {
3535 unsigned i;
3536
3537 /* Only add candidate for the first use in group. */
3538 for (i = 0; i < data->vgroups.length (); i++)
3539 {
3540 struct iv_group *group = data->vgroups[i];
3541
3542 gcc_assert (group->vuses[0] != NULL);
3543 add_iv_candidate_for_use (data, group->vuses[0]);
3544 }
3545 add_iv_candidate_derived_from_uses (data);
3546 }
3547
3548 /* Record important candidates and add them to related_cands bitmaps. */
3549
3550 static void
3551 record_important_candidates (struct ivopts_data *data)
3552 {
3553 unsigned i;
3554 struct iv_group *group;
3555
3556 for (i = 0; i < data->vcands.length (); i++)
3557 {
3558 struct iv_cand *cand = data->vcands[i];
3559
3560 if (cand->important)
3561 bitmap_set_bit (data->important_candidates, i);
3562 }
3563
3564 data->consider_all_candidates = (data->vcands.length ()
3565 <= CONSIDER_ALL_CANDIDATES_BOUND);
3566
3567 /* Add important candidates to groups' related_cands bitmaps. */
3568 for (i = 0; i < data->vgroups.length (); i++)
3569 {
3570 group = data->vgroups[i];
3571 bitmap_ior_into (group->related_cands, data->important_candidates);
3572 }
3573 }
3574
3575 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
3576 If consider_all_candidates is true, we use a two-dimensional array, otherwise
3577 we allocate a simple list to every use. */
3578
3579 static void
3580 alloc_use_cost_map (struct ivopts_data *data)
3581 {
3582 unsigned i, size, s;
3583
3584 for (i = 0; i < data->vgroups.length (); i++)
3585 {
3586 struct iv_group *group = data->vgroups[i];
3587
3588 if (data->consider_all_candidates)
3589 size = data->vcands.length ();
3590 else
3591 {
3592 s = bitmap_count_bits (group->related_cands);
3593
3594 /* Round up to the power of two, so that moduling by it is fast. */
3595 size = s ? (1 << ceil_log2 (s)) : 1;
3596 }
3597
3598 group->n_map_members = size;
3599 group->cost_map = XCNEWVEC (struct cost_pair, size);
3600 }
3601 }
3602
3603 /* Sets cost of (GROUP, CAND) pair to COST and record that it depends
3604 on invariants INV_VARS and that the value used in expressing it is
3605 VALUE, and in case of iv elimination the comparison operator is COMP. */
3606
3607 static void
3608 set_group_iv_cost (struct ivopts_data *data,
3609 struct iv_group *group, struct iv_cand *cand,
3610 comp_cost cost, bitmap inv_vars, tree value,
3611 enum tree_code comp, bitmap inv_exprs)
3612 {
3613 unsigned i, s;
3614
3615 if (cost.infinite_cost_p ())
3616 {
3617 BITMAP_FREE (inv_vars);
3618 BITMAP_FREE (inv_exprs);
3619 return;
3620 }
3621
3622 if (data->consider_all_candidates)
3623 {
3624 group->cost_map[cand->id].cand = cand;
3625 group->cost_map[cand->id].cost = cost;
3626 group->cost_map[cand->id].inv_vars = inv_vars;
3627 group->cost_map[cand->id].inv_exprs = inv_exprs;
3628 group->cost_map[cand->id].value = value;
3629 group->cost_map[cand->id].comp = comp;
3630 return;
3631 }
3632
3633 /* n_map_members is a power of two, so this computes modulo. */
3634 s = cand->id & (group->n_map_members - 1);
3635 for (i = s; i < group->n_map_members; i++)
3636 if (!group->cost_map[i].cand)
3637 goto found;
3638 for (i = 0; i < s; i++)
3639 if (!group->cost_map[i].cand)
3640 goto found;
3641
3642 gcc_unreachable ();
3643
3644 found:
3645 group->cost_map[i].cand = cand;
3646 group->cost_map[i].cost = cost;
3647 group->cost_map[i].inv_vars = inv_vars;
3648 group->cost_map[i].inv_exprs = inv_exprs;
3649 group->cost_map[i].value = value;
3650 group->cost_map[i].comp = comp;
3651 }
3652
3653 /* Gets cost of (GROUP, CAND) pair. */
3654
3655 static struct cost_pair *
3656 get_group_iv_cost (struct ivopts_data *data, struct iv_group *group,
3657 struct iv_cand *cand)
3658 {
3659 unsigned i, s;
3660 struct cost_pair *ret;
3661
3662 if (!cand)
3663 return NULL;
3664
3665 if (data->consider_all_candidates)
3666 {
3667 ret = group->cost_map + cand->id;
3668 if (!ret->cand)
3669 return NULL;
3670
3671 return ret;
3672 }
3673
3674 /* n_map_members is a power of two, so this computes modulo. */
3675 s = cand->id & (group->n_map_members - 1);
3676 for (i = s; i < group->n_map_members; i++)
3677 if (group->cost_map[i].cand == cand)
3678 return group->cost_map + i;
3679 else if (group->cost_map[i].cand == NULL)
3680 return NULL;
3681 for (i = 0; i < s; i++)
3682 if (group->cost_map[i].cand == cand)
3683 return group->cost_map + i;
3684 else if (group->cost_map[i].cand == NULL)
3685 return NULL;
3686
3687 return NULL;
3688 }
3689
3690 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
3691 static rtx
3692 produce_memory_decl_rtl (tree obj, int *regno)
3693 {
3694 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
3695 machine_mode address_mode = targetm.addr_space.address_mode (as);
3696 rtx x;
3697
3698 gcc_assert (obj);
3699 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
3700 {
3701 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
3702 x = gen_rtx_SYMBOL_REF (address_mode, name);
3703 SET_SYMBOL_REF_DECL (x, obj);
3704 x = gen_rtx_MEM (DECL_MODE (obj), x);
3705 set_mem_addr_space (x, as);
3706 targetm.encode_section_info (obj, x, true);
3707 }
3708 else
3709 {
3710 x = gen_raw_REG (address_mode, (*regno)++);
3711 x = gen_rtx_MEM (DECL_MODE (obj), x);
3712 set_mem_addr_space (x, as);
3713 }
3714
3715 return x;
3716 }
3717
3718 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
3719 walk_tree. DATA contains the actual fake register number. */
3720
3721 static tree
3722 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
3723 {
3724 tree obj = NULL_TREE;
3725 rtx x = NULL_RTX;
3726 int *regno = (int *) data;
3727
3728 switch (TREE_CODE (*expr_p))
3729 {
3730 case ADDR_EXPR:
3731 for (expr_p = &TREE_OPERAND (*expr_p, 0);
3732 handled_component_p (*expr_p);
3733 expr_p = &TREE_OPERAND (*expr_p, 0))
3734 continue;
3735 obj = *expr_p;
3736 if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
3737 x = produce_memory_decl_rtl (obj, regno);
3738 break;
3739
3740 case SSA_NAME:
3741 *ws = 0;
3742 obj = SSA_NAME_VAR (*expr_p);
3743 /* Defer handling of anonymous SSA_NAMEs to the expander. */
3744 if (!obj)
3745 return NULL_TREE;
3746 if (!DECL_RTL_SET_P (obj))
3747 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3748 break;
3749
3750 case VAR_DECL:
3751 case PARM_DECL:
3752 case RESULT_DECL:
3753 *ws = 0;
3754 obj = *expr_p;
3755
3756 if (DECL_RTL_SET_P (obj))
3757 break;
3758
3759 if (DECL_MODE (obj) == BLKmode)
3760 x = produce_memory_decl_rtl (obj, regno);
3761 else
3762 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3763
3764 break;
3765
3766 default:
3767 break;
3768 }
3769
3770 if (x)
3771 {
3772 decl_rtl_to_reset.safe_push (obj);
3773 SET_DECL_RTL (obj, x);
3774 }
3775
3776 return NULL_TREE;
3777 }
3778
3779 /* Determines cost of the computation of EXPR. */
3780
3781 static unsigned
3782 computation_cost (tree expr, bool speed)
3783 {
3784 rtx_insn *seq;
3785 rtx rslt;
3786 tree type = TREE_TYPE (expr);
3787 unsigned cost;
3788 /* Avoid using hard regs in ways which may be unsupported. */
3789 int regno = LAST_VIRTUAL_REGISTER + 1;
3790 struct cgraph_node *node = cgraph_node::get (current_function_decl);
3791 enum node_frequency real_frequency = node->frequency;
3792
3793 node->frequency = NODE_FREQUENCY_NORMAL;
3794 crtl->maybe_hot_insn_p = speed;
3795 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
3796 start_sequence ();
3797 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
3798 seq = get_insns ();
3799 end_sequence ();
3800 default_rtl_profile ();
3801 node->frequency = real_frequency;
3802
3803 cost = seq_cost (seq, speed);
3804 if (MEM_P (rslt))
3805 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
3806 TYPE_ADDR_SPACE (type), speed);
3807 else if (!REG_P (rslt))
3808 cost += set_src_cost (rslt, TYPE_MODE (type), speed);
3809
3810 return cost;
3811 }
3812
3813 /* Returns variable containing the value of candidate CAND at statement AT. */
3814
3815 static tree
3816 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple *stmt)
3817 {
3818 if (stmt_after_increment (loop, cand, stmt))
3819 return cand->var_after;
3820 else
3821 return cand->var_before;
3822 }
3823
3824 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
3825 same precision that is at least as wide as the precision of TYPE, stores
3826 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
3827 type of A and B. */
3828
3829 static tree
3830 determine_common_wider_type (tree *a, tree *b)
3831 {
3832 tree wider_type = NULL;
3833 tree suba, subb;
3834 tree atype = TREE_TYPE (*a);
3835
3836 if (CONVERT_EXPR_P (*a))
3837 {
3838 suba = TREE_OPERAND (*a, 0);
3839 wider_type = TREE_TYPE (suba);
3840 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
3841 return atype;
3842 }
3843 else
3844 return atype;
3845
3846 if (CONVERT_EXPR_P (*b))
3847 {
3848 subb = TREE_OPERAND (*b, 0);
3849 if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
3850 return atype;
3851 }
3852 else
3853 return atype;
3854
3855 *a = suba;
3856 *b = subb;
3857 return wider_type;
3858 }
3859
3860 /* Determines the expression by that USE is expressed from induction variable
3861 CAND at statement AT in LOOP. The expression is stored in two parts in a
3862 decomposed form. The invariant part is stored in AFF_INV; while variant
3863 part in AFF_VAR. Store ratio of CAND.step over USE.step in PRAT if it's
3864 non-null. Returns false if USE cannot be expressed using CAND. */
3865
3866 static bool
3867 get_computation_aff_1 (struct loop *loop, gimple *at, struct iv_use *use,
3868 struct iv_cand *cand, struct aff_tree *aff_inv,
3869 struct aff_tree *aff_var, widest_int *prat = NULL)
3870 {
3871 tree ubase = use->iv->base, ustep = use->iv->step;
3872 tree cbase = cand->iv->base, cstep = cand->iv->step;
3873 tree common_type, uutype, var, cstep_common;
3874 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3875 aff_tree aff_cbase;
3876 widest_int rat;
3877
3878 /* We must have a precision to express the values of use. */
3879 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3880 return false;
3881
3882 var = var_at_stmt (loop, cand, at);
3883 uutype = unsigned_type_for (utype);
3884
3885 /* If the conversion is not noop, perform it. */
3886 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3887 {
3888 if (cand->orig_iv != NULL && CONVERT_EXPR_P (cbase)
3889 && (CONVERT_EXPR_P (cstep) || poly_int_tree_p (cstep)))
3890 {
3891 tree inner_base, inner_step, inner_type;
3892 inner_base = TREE_OPERAND (cbase, 0);
3893 if (CONVERT_EXPR_P (cstep))
3894 inner_step = TREE_OPERAND (cstep, 0);
3895 else
3896 inner_step = cstep;
3897
3898 inner_type = TREE_TYPE (inner_base);
3899 /* If candidate is added from a biv whose type is smaller than
3900 ctype, we know both candidate and the biv won't overflow.
3901 In this case, it's safe to skip the convertion in candidate.
3902 As an example, (unsigned short)((unsigned long)A) equals to
3903 (unsigned short)A, if A has a type no larger than short. */
3904 if (TYPE_PRECISION (inner_type) <= TYPE_PRECISION (uutype))
3905 {
3906 cbase = inner_base;
3907 cstep = inner_step;
3908 }
3909 }
3910 cbase = fold_convert (uutype, cbase);
3911 cstep = fold_convert (uutype, cstep);
3912 var = fold_convert (uutype, var);
3913 }
3914
3915 /* Ratio is 1 when computing the value of biv cand by itself.
3916 We can't rely on constant_multiple_of in this case because the
3917 use is created after the original biv is selected. The call
3918 could fail because of inconsistent fold behavior. See PR68021
3919 for more information. */
3920 if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt)
3921 {
3922 gcc_assert (is_gimple_assign (use->stmt));
3923 gcc_assert (use->iv->ssa_name == cand->var_after);
3924 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
3925 rat = 1;
3926 }
3927 else if (!constant_multiple_of (ustep, cstep, &rat))
3928 return false;
3929
3930 if (prat)
3931 *prat = rat;
3932
3933 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3934 type, we achieve better folding by computing their difference in this
3935 wider type, and cast the result to UUTYPE. We do not need to worry about
3936 overflows, as all the arithmetics will in the end be performed in UUTYPE
3937 anyway. */
3938 common_type = determine_common_wider_type (&ubase, &cbase);
3939
3940 /* use = ubase - ratio * cbase + ratio * var. */
3941 tree_to_aff_combination (ubase, common_type, aff_inv);
3942 tree_to_aff_combination (cbase, common_type, &aff_cbase);
3943 tree_to_aff_combination (var, uutype, aff_var);
3944
3945 /* We need to shift the value if we are after the increment. */
3946 if (stmt_after_increment (loop, cand, at))
3947 {
3948 aff_tree cstep_aff;
3949
3950 if (common_type != uutype)
3951 cstep_common = fold_convert (common_type, cstep);
3952 else
3953 cstep_common = cstep;
3954
3955 tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
3956 aff_combination_add (&aff_cbase, &cstep_aff);
3957 }
3958
3959 aff_combination_scale (&aff_cbase, -rat);
3960 aff_combination_add (aff_inv, &aff_cbase);
3961 if (common_type != uutype)
3962 aff_combination_convert (aff_inv, uutype);
3963
3964 aff_combination_scale (aff_var, rat);
3965 return true;
3966 }
3967
3968 /* Determines the expression by that USE is expressed from induction variable
3969 CAND at statement AT in LOOP. The expression is stored in a decomposed
3970 form into AFF. Returns false if USE cannot be expressed using CAND. */
3971
3972 static bool
3973 get_computation_aff (struct loop *loop, gimple *at, struct iv_use *use,
3974 struct iv_cand *cand, struct aff_tree *aff)
3975 {
3976 aff_tree aff_var;
3977
3978 if (!get_computation_aff_1 (loop, at, use, cand, aff, &aff_var))
3979 return false;
3980
3981 aff_combination_add (aff, &aff_var);
3982 return true;
3983 }
3984
3985 /* Return the type of USE. */
3986
3987 static tree
3988 get_use_type (struct iv_use *use)
3989 {
3990 tree base_type = TREE_TYPE (use->iv->base);
3991 tree type;
3992
3993 if (use->type == USE_REF_ADDRESS)
3994 {
3995 /* The base_type may be a void pointer. Create a pointer type based on
3996 the mem_ref instead. */
3997 type = build_pointer_type (TREE_TYPE (*use->op_p));
3998 gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
3999 == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
4000 }
4001 else
4002 type = base_type;
4003
4004 return type;
4005 }
4006
4007 /* Determines the expression by that USE is expressed from induction variable
4008 CAND at statement AT in LOOP. The computation is unshared. */
4009
4010 static tree
4011 get_computation_at (struct loop *loop, gimple *at,
4012 struct iv_use *use, struct iv_cand *cand)
4013 {
4014 aff_tree aff;
4015 tree type = get_use_type (use);
4016
4017 if (!get_computation_aff (loop, at, use, cand, &aff))
4018 return NULL_TREE;
4019 unshare_aff_combination (&aff);
4020 return fold_convert (type, aff_combination_to_tree (&aff));
4021 }
4022
4023 /* Adjust the cost COST for being in loop setup rather than loop body.
4024 If we're optimizing for space, the loop setup overhead is constant;
4025 if we're optimizing for speed, amortize it over the per-iteration cost.
4026 If ROUND_UP_P is true, the result is round up rather than to zero when
4027 optimizing for speed. */
4028 static unsigned
4029 adjust_setup_cost (struct ivopts_data *data, unsigned cost,
4030 bool round_up_p = false)
4031 {
4032 if (cost == INFTY)
4033 return cost;
4034 else if (optimize_loop_for_speed_p (data->current_loop))
4035 {
4036 HOST_WIDE_INT niters = avg_loop_niter (data->current_loop);
4037 return ((HOST_WIDE_INT) cost + (round_up_p ? niters - 1 : 0)) / niters;
4038 }
4039 else
4040 return cost;
4041 }
4042
4043 /* Calculate the SPEED or size cost of shiftadd EXPR in MODE. MULT is the
4044 EXPR operand holding the shift. COST0 and COST1 are the costs for
4045 calculating the operands of EXPR. Returns true if successful, and returns
4046 the cost in COST. */
4047
4048 static bool
4049 get_shiftadd_cost (tree expr, scalar_int_mode mode, comp_cost cost0,
4050 comp_cost cost1, tree mult, bool speed, comp_cost *cost)
4051 {
4052 comp_cost res;
4053 tree op1 = TREE_OPERAND (expr, 1);
4054 tree cst = TREE_OPERAND (mult, 1);
4055 tree multop = TREE_OPERAND (mult, 0);
4056 int m = exact_log2 (int_cst_value (cst));
4057 int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
4058 int as_cost, sa_cost;
4059 bool mult_in_op1;
4060
4061 if (!(m >= 0 && m < maxm))
4062 return false;
4063
4064 STRIP_NOPS (op1);
4065 mult_in_op1 = operand_equal_p (op1, mult, 0);
4066
4067 as_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
4068
4069 /* If the target has a cheap shift-and-add or shift-and-sub instruction,
4070 use that in preference to a shift insn followed by an add insn. */
4071 sa_cost = (TREE_CODE (expr) != MINUS_EXPR
4072 ? shiftadd_cost (speed, mode, m)
4073 : (mult_in_op1
4074 ? shiftsub1_cost (speed, mode, m)
4075 : shiftsub0_cost (speed, mode, m)));
4076
4077 res = comp_cost (MIN (as_cost, sa_cost), 0);
4078 res += (mult_in_op1 ? cost0 : cost1);
4079
4080 STRIP_NOPS (multop);
4081 if (!is_gimple_val (multop))
4082 res += force_expr_to_var_cost (multop, speed);
4083
4084 *cost = res;
4085 return true;
4086 }
4087
4088 /* Estimates cost of forcing expression EXPR into a variable. */
4089
4090 static comp_cost
4091 force_expr_to_var_cost (tree expr, bool speed)
4092 {
4093 static bool costs_initialized = false;
4094 static unsigned integer_cost [2];
4095 static unsigned symbol_cost [2];
4096 static unsigned address_cost [2];
4097 tree op0, op1;
4098 comp_cost cost0, cost1, cost;
4099 machine_mode mode;
4100 scalar_int_mode int_mode;
4101
4102 if (!costs_initialized)
4103 {
4104 tree type = build_pointer_type (integer_type_node);
4105 tree var, addr;
4106 rtx x;
4107 int i;
4108
4109 var = create_tmp_var_raw (integer_type_node, "test_var");
4110 TREE_STATIC (var) = 1;
4111 x = produce_memory_decl_rtl (var, NULL);
4112 SET_DECL_RTL (var, x);
4113
4114 addr = build1 (ADDR_EXPR, type, var);
4115
4116
4117 for (i = 0; i < 2; i++)
4118 {
4119 integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
4120 2000), i);
4121
4122 symbol_cost[i] = computation_cost (addr, i) + 1;
4123
4124 address_cost[i]
4125 = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
4126 if (dump_file && (dump_flags & TDF_DETAILS))
4127 {
4128 fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
4129 fprintf (dump_file, " integer %d\n", (int) integer_cost[i]);
4130 fprintf (dump_file, " symbol %d\n", (int) symbol_cost[i]);
4131 fprintf (dump_file, " address %d\n", (int) address_cost[i]);
4132 fprintf (dump_file, " other %d\n", (int) target_spill_cost[i]);
4133 fprintf (dump_file, "\n");
4134 }
4135 }
4136
4137 costs_initialized = true;
4138 }
4139
4140 STRIP_NOPS (expr);
4141
4142 if (SSA_VAR_P (expr))
4143 return no_cost;
4144
4145 if (is_gimple_min_invariant (expr))
4146 {
4147 if (poly_int_tree_p (expr))
4148 return comp_cost (integer_cost [speed], 0);
4149
4150 if (TREE_CODE (expr) == ADDR_EXPR)
4151 {
4152 tree obj = TREE_OPERAND (expr, 0);
4153
4154 if (VAR_P (obj)
4155 || TREE_CODE (obj) == PARM_DECL
4156 || TREE_CODE (obj) == RESULT_DECL)
4157 return comp_cost (symbol_cost [speed], 0);
4158 }
4159
4160 return comp_cost (address_cost [speed], 0);
4161 }
4162
4163 switch (TREE_CODE (expr))
4164 {
4165 case POINTER_PLUS_EXPR:
4166 case PLUS_EXPR:
4167 case MINUS_EXPR:
4168 case MULT_EXPR:
4169 case TRUNC_DIV_EXPR:
4170 case BIT_AND_EXPR:
4171 case BIT_IOR_EXPR:
4172 case LSHIFT_EXPR:
4173 case RSHIFT_EXPR:
4174 op0 = TREE_OPERAND (expr, 0);
4175 op1 = TREE_OPERAND (expr, 1);
4176 STRIP_NOPS (op0);
4177 STRIP_NOPS (op1);
4178 break;
4179
4180 CASE_CONVERT:
4181 case NEGATE_EXPR:
4182 case BIT_NOT_EXPR:
4183 op0 = TREE_OPERAND (expr, 0);
4184 STRIP_NOPS (op0);
4185 op1 = NULL_TREE;
4186 break;
4187
4188 default:
4189 /* Just an arbitrary value, FIXME. */
4190 return comp_cost (target_spill_cost[speed], 0);
4191 }
4192
4193 if (op0 == NULL_TREE
4194 || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
4195 cost0 = no_cost;
4196 else
4197 cost0 = force_expr_to_var_cost (op0, speed);
4198
4199 if (op1 == NULL_TREE
4200 || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
4201 cost1 = no_cost;
4202 else
4203 cost1 = force_expr_to_var_cost (op1, speed);
4204
4205 mode = TYPE_MODE (TREE_TYPE (expr));
4206 switch (TREE_CODE (expr))
4207 {
4208 case POINTER_PLUS_EXPR:
4209 case PLUS_EXPR:
4210 case MINUS_EXPR:
4211 case NEGATE_EXPR:
4212 cost = comp_cost (add_cost (speed, mode), 0);
4213 if (TREE_CODE (expr) != NEGATE_EXPR)
4214 {
4215 tree mult = NULL_TREE;
4216 comp_cost sa_cost;
4217 if (TREE_CODE (op1) == MULT_EXPR)
4218 mult = op1;
4219 else if (TREE_CODE (op0) == MULT_EXPR)
4220 mult = op0;
4221
4222 if (mult != NULL_TREE
4223 && is_a <scalar_int_mode> (mode, &int_mode)
4224 && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
4225 && get_shiftadd_cost (expr, int_mode, cost0, cost1, mult,
4226 speed, &sa_cost))
4227 return sa_cost;
4228 }
4229 break;
4230
4231 CASE_CONVERT:
4232 {
4233 tree inner_mode, outer_mode;
4234 outer_mode = TREE_TYPE (expr);
4235 inner_mode = TREE_TYPE (op0);
4236 cost = comp_cost (convert_cost (TYPE_MODE (outer_mode),
4237 TYPE_MODE (inner_mode), speed), 0);
4238 }
4239 break;
4240
4241 case MULT_EXPR:
4242 if (cst_and_fits_in_hwi (op0))
4243 cost = comp_cost (mult_by_coeff_cost (int_cst_value (op0),
4244 mode, speed), 0);
4245 else if (cst_and_fits_in_hwi (op1))
4246 cost = comp_cost (mult_by_coeff_cost (int_cst_value (op1),
4247 mode, speed), 0);
4248 else
4249 return comp_cost (target_spill_cost [speed], 0);
4250 break;
4251
4252 case TRUNC_DIV_EXPR:
4253 /* Division by power of two is usually cheap, so we allow it. Forbid
4254 anything else. */
4255 if (integer_pow2p (TREE_OPERAND (expr, 1)))
4256 cost = comp_cost (add_cost (speed, mode), 0);
4257 else
4258 cost = comp_cost (target_spill_cost[speed], 0);
4259 break;
4260
4261 case BIT_AND_EXPR:
4262 case BIT_IOR_EXPR:
4263 case BIT_NOT_EXPR:
4264 case LSHIFT_EXPR:
4265 case RSHIFT_EXPR:
4266 cost = comp_cost (add_cost (speed, mode), 0);
4267 break;
4268
4269 default:
4270 gcc_unreachable ();
4271 }
4272
4273 cost += cost0;
4274 cost += cost1;
4275 return cost;
4276 }
4277
4278 /* Estimates cost of forcing EXPR into a variable. INV_VARS is a set of the
4279 invariants the computation depends on. */
4280
4281 static comp_cost
4282 force_var_cost (struct ivopts_data *data, tree expr, bitmap *inv_vars)
4283 {
4284 if (!expr)
4285 return no_cost;
4286
4287 find_inv_vars (data, &expr, inv_vars);
4288 return force_expr_to_var_cost (expr, data->speed);
4289 }
4290
4291 /* Returns cost of auto-modifying address expression in shape base + offset.
4292 AINC_STEP is step size of the address IV. AINC_OFFSET is offset of the
4293 address expression. The address expression has ADDR_MODE in addr space
4294 AS. The memory access has MEM_MODE. SPEED means we are optimizing for
4295 speed or size. */
4296
4297 enum ainc_type
4298 {
4299 AINC_PRE_INC, /* Pre increment. */
4300 AINC_PRE_DEC, /* Pre decrement. */
4301 AINC_POST_INC, /* Post increment. */
4302 AINC_POST_DEC, /* Post decrement. */
4303 AINC_NONE /* Also the number of auto increment types. */
4304 };
4305
4306 struct ainc_cost_data
4307 {
4308 unsigned costs[AINC_NONE];
4309 };
4310
4311 static comp_cost
4312 get_address_cost_ainc (poly_int64 ainc_step, poly_int64 ainc_offset,
4313 machine_mode addr_mode, machine_mode mem_mode,
4314 addr_space_t as, bool speed)
4315 {
4316 if (!USE_LOAD_PRE_DECREMENT (mem_mode)
4317 && !USE_STORE_PRE_DECREMENT (mem_mode)
4318 && !USE_LOAD_POST_DECREMENT (mem_mode)
4319 && !USE_STORE_POST_DECREMENT (mem_mode)
4320 && !USE_LOAD_PRE_INCREMENT (mem_mode)
4321 && !USE_STORE_PRE_INCREMENT (mem_mode)
4322 && !USE_LOAD_POST_INCREMENT (mem_mode)
4323 && !USE_STORE_POST_INCREMENT (mem_mode))
4324 return infinite_cost;
4325
4326 static vec<ainc_cost_data *> ainc_cost_data_list;
4327 unsigned idx = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
4328 if (idx >= ainc_cost_data_list.length ())
4329 {
4330 unsigned nsize = ((unsigned) as + 1) *MAX_MACHINE_MODE;
4331
4332 gcc_assert (nsize > idx);
4333 ainc_cost_data_list.safe_grow_cleared (nsize);
4334 }
4335
4336 ainc_cost_data *data = ainc_cost_data_list[idx];
4337 if (data == NULL)
4338 {
4339 rtx reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
4340
4341 data = (ainc_cost_data *) xcalloc (1, sizeof (*data));
4342 data->costs[AINC_PRE_DEC] = INFTY;
4343 data->costs[AINC_POST_DEC] = INFTY;
4344 data->costs[AINC_PRE_INC] = INFTY;
4345 data->costs[AINC_POST_INC] = INFTY;
4346 if (USE_LOAD_PRE_DECREMENT (mem_mode)
4347 || USE_STORE_PRE_DECREMENT (mem_mode))
4348 {
4349 rtx addr = gen_rtx_PRE_DEC (addr_mode, reg);
4350
4351 if (memory_address_addr_space_p (mem_mode, addr, as))
4352 data->costs[AINC_PRE_DEC]
4353 = address_cost (addr, mem_mode, as, speed);
4354 }
4355 if (USE_LOAD_POST_DECREMENT (mem_mode)
4356 || USE_STORE_POST_DECREMENT (mem_mode))
4357 {
4358 rtx addr = gen_rtx_POST_DEC (addr_mode, reg);
4359
4360 if (memory_address_addr_space_p (mem_mode, addr, as))
4361 data->costs[AINC_POST_DEC]
4362 = address_cost (addr, mem_mode, as, speed);
4363 }
4364 if (USE_LOAD_PRE_INCREMENT (mem_mode)
4365 || USE_STORE_PRE_INCREMENT (mem_mode))
4366 {
4367 rtx addr = gen_rtx_PRE_INC (addr_mode, reg);
4368
4369 if (memory_address_addr_space_p (mem_mode, addr, as))
4370 data->costs[AINC_PRE_INC]
4371 = address_cost (addr, mem_mode, as, speed);
4372 }
4373 if (USE_LOAD_POST_INCREMENT (mem_mode)
4374 || USE_STORE_POST_INCREMENT (mem_mode))
4375 {
4376 rtx addr = gen_rtx_POST_INC (addr_mode, reg);
4377
4378 if (memory_address_addr_space_p (mem_mode, addr, as))
4379 data->costs[AINC_POST_INC]
4380 = address_cost (addr, mem_mode, as, speed);
4381 }
4382 ainc_cost_data_list[idx] = data;
4383 }
4384
4385 poly_int64 msize = GET_MODE_SIZE (mem_mode);
4386 if (known_eq (ainc_offset, 0) && known_eq (msize, ainc_step))
4387 return comp_cost (data->costs[AINC_POST_INC], 0);
4388 if (known_eq (ainc_offset, 0) && known_eq (msize, -ainc_step))
4389 return comp_cost (data->costs[AINC_POST_DEC], 0);
4390 if (known_eq (ainc_offset, msize) && known_eq (msize, ainc_step))
4391 return comp_cost (data->costs[AINC_PRE_INC], 0);
4392 if (known_eq (ainc_offset, -msize) && known_eq (msize, -ainc_step))
4393 return comp_cost (data->costs[AINC_PRE_DEC], 0);
4394
4395 return infinite_cost;
4396 }
4397
4398 /* Return cost of computing USE's address expression by using CAND.
4399 AFF_INV and AFF_VAR represent invariant and variant parts of the
4400 address expression, respectively. If AFF_INV is simple, store
4401 the loop invariant variables which are depended by it in INV_VARS;
4402 if AFF_INV is complicated, handle it as a new invariant expression
4403 and record it in INV_EXPR. RATIO indicates multiple times between
4404 steps of USE and CAND. If CAN_AUTOINC is nonNULL, store boolean
4405 value to it indicating if this is an auto-increment address. */
4406
4407 static comp_cost
4408 get_address_cost (struct ivopts_data *data, struct iv_use *use,
4409 struct iv_cand *cand, aff_tree *aff_inv,
4410 aff_tree *aff_var, HOST_WIDE_INT ratio,
4411 bitmap *inv_vars, iv_inv_expr_ent **inv_expr,
4412 bool *can_autoinc, bool speed)
4413 {
4414 rtx addr;
4415 bool simple_inv = true;
4416 tree comp_inv = NULL_TREE, type = aff_var->type;
4417 comp_cost var_cost = no_cost, cost = no_cost;
4418 struct mem_address parts = {NULL_TREE, integer_one_node,
4419 NULL_TREE, NULL_TREE, NULL_TREE};
4420 machine_mode addr_mode = TYPE_MODE (type);
4421 machine_mode mem_mode = TYPE_MODE (use->mem_type);
4422 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
4423 /* Only true if ratio != 1. */
4424 bool ok_with_ratio_p = false;
4425 bool ok_without_ratio_p = false;
4426
4427 if (!aff_combination_const_p (aff_inv))
4428 {
4429 parts.index = integer_one_node;
4430 /* Addressing mode "base + index". */
4431 ok_without_ratio_p = valid_mem_ref_p (mem_mode, as, &parts);
4432 if (ratio != 1)
4433 {
4434 parts.step = wide_int_to_tree (type, ratio);
4435 /* Addressing mode "base + index << scale". */
4436 ok_with_ratio_p = valid_mem_ref_p (mem_mode, as, &parts);
4437 if (!ok_with_ratio_p)
4438 parts.step = NULL_TREE;
4439 }
4440 if (ok_with_ratio_p || ok_without_ratio_p)
4441 {
4442 if (maybe_ne (aff_inv->offset, 0))
4443 {
4444 parts.offset = wide_int_to_tree (sizetype, aff_inv->offset);
4445 /* Addressing mode "base + index [<< scale] + offset". */
4446 if (!valid_mem_ref_p (mem_mode, as, &parts))
4447 parts.offset = NULL_TREE;
4448 else
4449 aff_inv->offset = 0;
4450 }
4451
4452 move_fixed_address_to_symbol (&parts, aff_inv);
4453 /* Base is fixed address and is moved to symbol part. */
4454 if (parts.symbol != NULL_TREE && aff_combination_zero_p (aff_inv))
4455 parts.base = NULL_TREE;
4456
4457 /* Addressing mode "symbol + base + index [<< scale] [+ offset]". */
4458 if (parts.symbol != NULL_TREE
4459 && !valid_mem_ref_p (mem_mode, as, &parts))
4460 {
4461 aff_combination_add_elt (aff_inv, parts.symbol, 1);
4462 parts.symbol = NULL_TREE;
4463 /* Reset SIMPLE_INV since symbol address needs to be computed
4464 outside of address expression in this case. */
4465 simple_inv = false;
4466 /* Symbol part is moved back to base part, it can't be NULL. */
4467 parts.base = integer_one_node;
4468 }
4469 }
4470 else
4471 parts.index = NULL_TREE;
4472 }
4473 else
4474 {
4475 poly_int64 ainc_step;
4476 if (can_autoinc
4477 && ratio == 1
4478 && ptrdiff_tree_p (cand->iv->step, &ainc_step))
4479 {
4480 poly_int64 ainc_offset = (aff_inv->offset).force_shwi ();
4481
4482 if (stmt_after_increment (data->current_loop, cand, use->stmt))
4483 ainc_offset += ainc_step;
4484 cost = get_address_cost_ainc (ainc_step, ainc_offset,
4485 addr_mode, mem_mode, as, speed);
4486 if (!cost.infinite_cost_p ())
4487 {
4488 *can_autoinc = true;
4489 return cost;
4490 }
4491 cost = no_cost;
4492 }
4493 if (!aff_combination_zero_p (aff_inv))
4494 {
4495 parts.offset = wide_int_to_tree (sizetype, aff_inv->offset);
4496 /* Addressing mode "base + offset". */
4497 if (!valid_mem_ref_p (mem_mode, as, &parts))
4498 parts.offset = NULL_TREE;
4499 else
4500 aff_inv->offset = 0;
4501 }
4502 }
4503
4504 if (simple_inv)
4505 simple_inv = (aff_inv == NULL
4506 || aff_combination_const_p (aff_inv)
4507 || aff_combination_singleton_var_p (aff_inv));
4508 if (!aff_combination_zero_p (aff_inv))
4509 comp_inv = aff_combination_to_tree (aff_inv);
4510 if (comp_inv != NULL_TREE)
4511 cost = force_var_cost (data, comp_inv, inv_vars);
4512 if (ratio != 1 && parts.step == NULL_TREE)
4513 var_cost += mult_by_coeff_cost (ratio, addr_mode, speed);
4514 if (comp_inv != NULL_TREE && parts.index == NULL_TREE)
4515 var_cost += add_cost (speed, addr_mode);
4516
4517 if (comp_inv && inv_expr && !simple_inv)
4518 {
4519 *inv_expr = get_loop_invariant_expr (data, comp_inv);
4520 /* Clear depends on. */
4521 if (*inv_expr != NULL && inv_vars && *inv_vars)
4522 bitmap_clear (*inv_vars);
4523
4524 /* Cost of small invariant expression adjusted against loop niters
4525 is usually zero, which makes it difficult to be differentiated
4526 from candidate based on loop invariant variables. Secondly, the
4527 generated invariant expression may not be hoisted out of loop by
4528 following pass. We penalize the cost by rounding up in order to
4529 neutralize such effects. */
4530 cost.cost = adjust_setup_cost (data, cost.cost, true);
4531 cost.scratch = cost.cost;
4532 }
4533
4534 cost += var_cost;
4535 addr = addr_for_mem_ref (&parts, as, false);
4536 gcc_assert (memory_address_addr_space_p (mem_mode, addr, as));
4537 cost += address_cost (addr, mem_mode, as, speed);
4538
4539 if (parts.symbol != NULL_TREE)
4540 cost.complexity += 1;
4541 /* Don't increase the complexity of adding a scaled index if it's
4542 the only kind of index that the target allows. */
4543 if (parts.step != NULL_TREE && ok_without_ratio_p)
4544 cost.complexity += 1;
4545 if (parts.base != NULL_TREE && parts.index != NULL_TREE)
4546 cost.complexity += 1;
4547 if (parts.offset != NULL_TREE && !integer_zerop (parts.offset))
4548 cost.complexity += 1;
4549
4550 return cost;
4551 }
4552
4553 /* Scale (multiply) the computed COST (except scratch part that should be
4554 hoisted out a loop) by header->frequency / AT->frequency, which makes
4555 expected cost more accurate. */
4556
4557 static comp_cost
4558 get_scaled_computation_cost_at (ivopts_data *data, gimple *at, comp_cost cost)
4559 {
4560 if (data->speed
4561 && data->current_loop->header->count.to_frequency (cfun) > 0)
4562 {
4563 basic_block bb = gimple_bb (at);
4564 gcc_assert (cost.scratch <= cost.cost);
4565 int scale_factor = (int)(intptr_t) bb->aux;
4566 if (scale_factor == 1)
4567 return cost;
4568
4569 int scaled_cost
4570 = cost.scratch + (cost.cost - cost.scratch) * scale_factor;
4571
4572 if (dump_file && (dump_flags & TDF_DETAILS))
4573 fprintf (dump_file, "Scaling cost based on bb prob "
4574 "by %2.2f: %d (scratch: %d) -> %d\n",
4575 1.0f * scale_factor, cost.cost, cost.scratch, scaled_cost);
4576
4577 cost.cost = scaled_cost;
4578 }
4579
4580 return cost;
4581 }
4582
4583 /* Determines the cost of the computation by that USE is expressed
4584 from induction variable CAND. If ADDRESS_P is true, we just need
4585 to create an address from it, otherwise we want to get it into
4586 register. A set of invariants we depend on is stored in INV_VARS.
4587 If CAN_AUTOINC is nonnull, use it to record whether autoinc
4588 addressing is likely. If INV_EXPR is nonnull, record invariant
4589 expr entry in it. */
4590
4591 static comp_cost
4592 get_computation_cost (struct ivopts_data *data, struct iv_use *use,
4593 struct iv_cand *cand, bool address_p, bitmap *inv_vars,
4594 bool *can_autoinc, iv_inv_expr_ent **inv_expr)
4595 {
4596 gimple *at = use->stmt;
4597 tree ubase = use->iv->base, cbase = cand->iv->base;
4598 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
4599 tree comp_inv = NULL_TREE;
4600 HOST_WIDE_INT ratio, aratio;
4601 comp_cost cost;
4602 widest_int rat;
4603 aff_tree aff_inv, aff_var;
4604 bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4605
4606 if (inv_vars)
4607 *inv_vars = NULL;
4608 if (can_autoinc)
4609 *can_autoinc = false;
4610 if (inv_expr)
4611 *inv_expr = NULL;
4612
4613 /* Check if we have enough precision to express the values of use. */
4614 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4615 return infinite_cost;
4616
4617 if (address_p
4618 || (use->iv->base_object
4619 && cand->iv->base_object
4620 && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4621 && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4622 {
4623 /* Do not try to express address of an object with computation based
4624 on address of a different object. This may cause problems in rtl
4625 level alias analysis (that does not expect this to be happening,
4626 as this is illegal in C), and would be unlikely to be useful
4627 anyway. */
4628 if (use->iv->base_object
4629 && cand->iv->base_object
4630 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4631 return infinite_cost;
4632 }
4633
4634 if (!get_computation_aff_1 (data->current_loop, at, use,
4635 cand, &aff_inv, &aff_var, &rat)
4636 || !wi::fits_shwi_p (rat))
4637 return infinite_cost;
4638
4639 ratio = rat.to_shwi ();
4640 if (address_p)
4641 {
4642 cost = get_address_cost (data, use, cand, &aff_inv, &aff_var, ratio,
4643 inv_vars, inv_expr, can_autoinc, speed);
4644 return get_scaled_computation_cost_at (data, at, cost);
4645 }
4646
4647 bool simple_inv = (aff_combination_const_p (&aff_inv)
4648 || aff_combination_singleton_var_p (&aff_inv));
4649 tree signed_type = signed_type_for (aff_combination_type (&aff_inv));
4650 aff_combination_convert (&aff_inv, signed_type);
4651 if (!aff_combination_zero_p (&aff_inv))
4652 comp_inv = aff_combination_to_tree (&aff_inv);
4653
4654 cost = force_var_cost (data, comp_inv, inv_vars);
4655 if (comp_inv && inv_expr && !simple_inv)
4656 {
4657 *inv_expr = get_loop_invariant_expr (data, comp_inv);
4658 /* Clear depends on. */
4659 if (*inv_expr != NULL && inv_vars && *inv_vars)
4660 bitmap_clear (*inv_vars);
4661
4662 cost.cost = adjust_setup_cost (data, cost.cost);
4663 /* Record setup cost in scratch field. */
4664 cost.scratch = cost.cost;
4665 }
4666 /* Cost of constant integer can be covered when adding invariant part to
4667 variant part. */
4668 else if (comp_inv && CONSTANT_CLASS_P (comp_inv))
4669 cost = no_cost;
4670
4671 /* Need type narrowing to represent use with cand. */
4672 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4673 {
4674 machine_mode outer_mode = TYPE_MODE (utype);
4675 machine_mode inner_mode = TYPE_MODE (ctype);
4676 cost += comp_cost (convert_cost (outer_mode, inner_mode, speed), 0);
4677 }
4678
4679 /* Turn a + i * (-c) into a - i * c. */
4680 if (ratio < 0 && comp_inv && !integer_zerop (comp_inv))
4681 aratio = -ratio;
4682 else
4683 aratio = ratio;
4684
4685 if (ratio != 1)
4686 cost += mult_by_coeff_cost (aratio, TYPE_MODE (utype), speed);
4687
4688 /* TODO: We may also need to check if we can compute a + i * 4 in one
4689 instruction. */
4690 /* Need to add up the invariant and variant parts. */
4691 if (comp_inv && !integer_zerop (comp_inv))
4692 cost += add_cost (speed, TYPE_MODE (utype));
4693
4694 return get_scaled_computation_cost_at (data, at, cost);
4695 }
4696
4697 /* Determines cost of computing the use in GROUP with CAND in a generic
4698 expression. */
4699
4700 static bool
4701 determine_group_iv_cost_generic (struct ivopts_data *data,
4702 struct iv_group *group, struct iv_cand *cand)
4703 {
4704 comp_cost cost;
4705 iv_inv_expr_ent *inv_expr = NULL;
4706 bitmap inv_vars = NULL, inv_exprs = NULL;
4707 struct iv_use *use = group->vuses[0];
4708
4709 /* The simple case first -- if we need to express value of the preserved
4710 original biv, the cost is 0. This also prevents us from counting the
4711 cost of increment twice -- once at this use and once in the cost of
4712 the candidate. */
4713 if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt)
4714 cost = no_cost;
4715 else
4716 cost = get_computation_cost (data, use, cand, false,
4717 &inv_vars, NULL, &inv_expr);
4718
4719 if (inv_expr)
4720 {
4721 inv_exprs = BITMAP_ALLOC (NULL);
4722 bitmap_set_bit (inv_exprs, inv_expr->id);
4723 }
4724 set_group_iv_cost (data, group, cand, cost, inv_vars,
4725 NULL_TREE, ERROR_MARK, inv_exprs);
4726 return !cost.infinite_cost_p ();
4727 }
4728
4729 /* Determines cost of computing uses in GROUP with CAND in addresses. */
4730
4731 static bool
4732 determine_group_iv_cost_address (struct ivopts_data *data,
4733 struct iv_group *group, struct iv_cand *cand)
4734 {
4735 unsigned i;
4736 bitmap inv_vars = NULL, inv_exprs = NULL;
4737 bool can_autoinc;
4738 iv_inv_expr_ent *inv_expr = NULL;
4739 struct iv_use *use = group->vuses[0];
4740 comp_cost sum_cost = no_cost, cost;
4741
4742 cost = get_computation_cost (data, use, cand, true,
4743 &inv_vars, &can_autoinc, &inv_expr);
4744
4745 if (inv_expr)
4746 {
4747 inv_exprs = BITMAP_ALLOC (NULL);
4748 bitmap_set_bit (inv_exprs, inv_expr->id);
4749 }
4750 sum_cost = cost;
4751 if (!sum_cost.infinite_cost_p () && cand->ainc_use == use)
4752 {
4753 if (can_autoinc)
4754 sum_cost -= cand->cost_step;
4755 /* If we generated the candidate solely for exploiting autoincrement
4756 opportunities, and it turns out it can't be used, set the cost to
4757 infinity to make sure we ignore it. */
4758 else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
4759 sum_cost = infinite_cost;
4760 }
4761
4762 /* Uses in a group can share setup code, so only add setup cost once. */
4763 cost -= cost.scratch;
4764 /* Compute and add costs for rest uses of this group. */
4765 for (i = 1; i < group->vuses.length () && !sum_cost.infinite_cost_p (); i++)
4766 {
4767 struct iv_use *next = group->vuses[i];
4768
4769 /* TODO: We could skip computing cost for sub iv_use when it has the
4770 same cost as the first iv_use, but the cost really depends on the
4771 offset and where the iv_use is. */
4772 cost = get_computation_cost (data, next, cand, true,
4773 NULL, &can_autoinc, &inv_expr);
4774 if (inv_expr)
4775 {
4776 if (!inv_exprs)
4777 inv_exprs = BITMAP_ALLOC (NULL);
4778
4779 bitmap_set_bit (inv_exprs, inv_expr->id);
4780 }
4781 sum_cost += cost;
4782 }
4783 set_group_iv_cost (data, group, cand, sum_cost, inv_vars,
4784 NULL_TREE, ERROR_MARK, inv_exprs);
4785
4786 return !sum_cost.infinite_cost_p ();
4787 }
4788
4789 /* Computes value of candidate CAND at position AT in iteration NITER, and
4790 stores it to VAL. */
4791
4792 static void
4793 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple *at, tree niter,
4794 aff_tree *val)
4795 {
4796 aff_tree step, delta, nit;
4797 struct iv *iv = cand->iv;
4798 tree type = TREE_TYPE (iv->base);
4799 tree steptype;
4800 if (POINTER_TYPE_P (type))
4801 steptype = sizetype;
4802 else
4803 steptype = unsigned_type_for (type);
4804
4805 tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step);
4806 aff_combination_convert (&step, steptype);
4807 tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
4808 aff_combination_convert (&nit, steptype);
4809 aff_combination_mult (&nit, &step, &delta);
4810 if (stmt_after_increment (loop, cand, at))
4811 aff_combination_add (&delta, &step);
4812
4813 tree_to_aff_combination (iv->base, type, val);
4814 if (!POINTER_TYPE_P (type))
4815 aff_combination_convert (val, steptype);
4816 aff_combination_add (val, &delta);
4817 }
4818
4819 /* Returns period of induction variable iv. */
4820
4821 static tree
4822 iv_period (struct iv *iv)
4823 {
4824 tree step = iv->step, period, type;
4825 tree pow2div;
4826
4827 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4828
4829 type = unsigned_type_for (TREE_TYPE (step));
4830 /* Period of the iv is lcm (step, type_range)/step -1,
4831 i.e., N*type_range/step - 1. Since type range is power
4832 of two, N == (step >> num_of_ending_zeros_binary (step),
4833 so the final result is
4834
4835 (type_range >> num_of_ending_zeros_binary (step)) - 1
4836
4837 */
4838 pow2div = num_ending_zeros (step);
4839
4840 period = build_low_bits_mask (type,
4841 (TYPE_PRECISION (type)
4842 - tree_to_uhwi (pow2div)));
4843
4844 return period;
4845 }
4846
4847 /* Returns the comparison operator used when eliminating the iv USE. */
4848
4849 static enum tree_code
4850 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4851 {
4852 struct loop *loop = data->current_loop;
4853 basic_block ex_bb;
4854 edge exit;
4855
4856 ex_bb = gimple_bb (use->stmt);
4857 exit = EDGE_SUCC (ex_bb, 0);
4858 if (flow_bb_inside_loop_p (loop, exit->dest))
4859 exit = EDGE_SUCC (ex_bb, 1);
4860
4861 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4862 }
4863
4864 /* Returns true if we can prove that BASE - OFFSET does not overflow. For now,
4865 we only detect the situation that BASE = SOMETHING + OFFSET, where the
4866 calculation is performed in non-wrapping type.
4867
4868 TODO: More generally, we could test for the situation that
4869 BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
4870 This would require knowing the sign of OFFSET. */
4871
4872 static bool
4873 difference_cannot_overflow_p (struct ivopts_data *data, tree base, tree offset)
4874 {
4875 enum tree_code code;
4876 tree e1, e2;
4877 aff_tree aff_e1, aff_e2, aff_offset;
4878
4879 if (!nowrap_type_p (TREE_TYPE (base)))
4880 return false;
4881
4882 base = expand_simple_operations (base);
4883
4884 if (TREE_CODE (base) == SSA_NAME)
4885 {
4886 gimple *stmt = SSA_NAME_DEF_STMT (base);
4887
4888 if (gimple_code (stmt) != GIMPLE_ASSIGN)
4889 return false;
4890
4891 code = gimple_assign_rhs_code (stmt);
4892 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4893 return false;
4894
4895 e1 = gimple_assign_rhs1 (stmt);
4896 e2 = gimple_assign_rhs2 (stmt);
4897 }
4898 else
4899 {
4900 code = TREE_CODE (base);
4901 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4902 return false;
4903 e1 = TREE_OPERAND (base, 0);
4904 e2 = TREE_OPERAND (base, 1);
4905 }
4906
4907 /* Use affine expansion as deeper inspection to prove the equality. */
4908 tree_to_aff_combination_expand (e2, TREE_TYPE (e2),
4909 &aff_e2, &data->name_expansion_cache);
4910 tree_to_aff_combination_expand (offset, TREE_TYPE (offset),
4911 &aff_offset, &data->name_expansion_cache);
4912 aff_combination_scale (&aff_offset, -1);
4913 switch (code)
4914 {
4915 case PLUS_EXPR:
4916 aff_combination_add (&aff_e2, &aff_offset);
4917 if (aff_combination_zero_p (&aff_e2))
4918 return true;
4919
4920 tree_to_aff_combination_expand (e1, TREE_TYPE (e1),
4921 &aff_e1, &data->name_expansion_cache);
4922 aff_combination_add (&aff_e1, &aff_offset);
4923 return aff_combination_zero_p (&aff_e1);
4924
4925 case POINTER_PLUS_EXPR:
4926 aff_combination_add (&aff_e2, &aff_offset);
4927 return aff_combination_zero_p (&aff_e2);
4928
4929 default:
4930 return false;
4931 }
4932 }
4933
4934 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
4935 comparison with CAND. NITER describes the number of iterations of
4936 the loops. If successful, the comparison in COMP_P is altered accordingly.
4937
4938 We aim to handle the following situation:
4939
4940 sometype *base, *p;
4941 int a, b, i;
4942
4943 i = a;
4944 p = p_0 = base + a;
4945
4946 do
4947 {
4948 bla (*p);
4949 p++;
4950 i++;
4951 }
4952 while (i < b);
4953
4954 Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
4955 We aim to optimize this to
4956
4957 p = p_0 = base + a;
4958 do
4959 {
4960 bla (*p);
4961 p++;
4962 }
4963 while (p < p_0 - a + b);
4964
4965 This preserves the correctness, since the pointer arithmetics does not
4966 overflow. More precisely:
4967
4968 1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
4969 overflow in computing it or the values of p.
4970 2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
4971 overflow. To prove this, we use the fact that p_0 = base + a. */
4972
4973 static bool
4974 iv_elimination_compare_lt (struct ivopts_data *data,
4975 struct iv_cand *cand, enum tree_code *comp_p,
4976 struct tree_niter_desc *niter)
4977 {
4978 tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
4979 struct aff_tree nit, tmpa, tmpb;
4980 enum tree_code comp;
4981 HOST_WIDE_INT step;
4982
4983 /* We need to know that the candidate induction variable does not overflow.
4984 While more complex analysis may be used to prove this, for now just
4985 check that the variable appears in the original program and that it
4986 is computed in a type that guarantees no overflows. */
4987 cand_type = TREE_TYPE (cand->iv->base);
4988 if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
4989 return false;
4990
4991 /* Make sure that the loop iterates till the loop bound is hit, as otherwise
4992 the calculation of the BOUND could overflow, making the comparison
4993 invalid. */
4994 if (!data->loop_single_exit_p)
4995 return false;
4996
4997 /* We need to be able to decide whether candidate is increasing or decreasing
4998 in order to choose the right comparison operator. */
4999 if (!cst_and_fits_in_hwi (cand->iv->step))
5000 return false;
5001 step = int_cst_value (cand->iv->step);
5002
5003 /* Check that the number of iterations matches the expected pattern:
5004 a + 1 > b ? 0 : b - a - 1. */
5005 mbz = niter->may_be_zero;
5006 if (TREE_CODE (mbz) == GT_EXPR)
5007 {
5008 /* Handle a + 1 > b. */
5009 tree op0 = TREE_OPERAND (mbz, 0);
5010 if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
5011 {
5012 a = TREE_OPERAND (op0, 0);
5013 b = TREE_OPERAND (mbz, 1);
5014 }
5015 else
5016 return false;
5017 }
5018 else if (TREE_CODE (mbz) == LT_EXPR)
5019 {
5020 tree op1 = TREE_OPERAND (mbz, 1);
5021
5022 /* Handle b < a + 1. */
5023 if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
5024 {
5025 a = TREE_OPERAND (op1, 0);
5026 b = TREE_OPERAND (mbz, 0);
5027 }
5028 else
5029 return false;
5030 }
5031 else
5032 return false;
5033
5034 /* Expected number of iterations is B - A - 1. Check that it matches
5035 the actual number, i.e., that B - A - NITER = 1. */
5036 tree_to_aff_combination (niter->niter, nit_type, &nit);
5037 tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
5038 tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
5039 aff_combination_scale (&nit, -1);
5040 aff_combination_scale (&tmpa, -1);
5041 aff_combination_add (&tmpb, &tmpa);
5042 aff_combination_add (&tmpb, &nit);
5043 if (tmpb.n != 0 || maybe_ne (tmpb.offset, 1))
5044 return false;
5045
5046 /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
5047 overflow. */
5048 offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
5049 cand->iv->step,
5050 fold_convert (TREE_TYPE (cand->iv->step), a));
5051 if (!difference_cannot_overflow_p (data, cand->iv->base, offset))
5052 return false;
5053
5054 /* Determine the new comparison operator. */
5055 comp = step < 0 ? GT_EXPR : LT_EXPR;
5056 if (*comp_p == NE_EXPR)
5057 *comp_p = comp;
5058 else if (*comp_p == EQ_EXPR)
5059 *comp_p = invert_tree_comparison (comp, false);
5060 else
5061 gcc_unreachable ();
5062
5063 return true;
5064 }
5065
5066 /* Check whether it is possible to express the condition in USE by comparison
5067 of candidate CAND. If so, store the value compared with to BOUND, and the
5068 comparison operator to COMP. */
5069
5070 static bool
5071 may_eliminate_iv (struct ivopts_data *data,
5072 struct iv_use *use, struct iv_cand *cand, tree *bound,
5073 enum tree_code *comp)
5074 {
5075 basic_block ex_bb;
5076 edge exit;
5077 tree period;
5078 struct loop *loop = data->current_loop;
5079 aff_tree bnd;
5080 struct tree_niter_desc *desc = NULL;
5081
5082 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
5083 return false;
5084
5085 /* For now works only for exits that dominate the loop latch.
5086 TODO: extend to other conditions inside loop body. */
5087 ex_bb = gimple_bb (use->stmt);
5088 if (use->stmt != last_stmt (ex_bb)
5089 || gimple_code (use->stmt) != GIMPLE_COND
5090 || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
5091 return false;
5092
5093 exit = EDGE_SUCC (ex_bb, 0);
5094 if (flow_bb_inside_loop_p (loop, exit->dest))
5095 exit = EDGE_SUCC (ex_bb, 1);
5096 if (flow_bb_inside_loop_p (loop, exit->dest))
5097 return false;
5098
5099 desc = niter_for_exit (data, exit);
5100 if (!desc)
5101 return false;
5102
5103 /* Determine whether we can use the variable to test the exit condition.
5104 This is the case iff the period of the induction variable is greater
5105 than the number of iterations for which the exit condition is true. */
5106 period = iv_period (cand->iv);
5107
5108 /* If the number of iterations is constant, compare against it directly. */
5109 if (TREE_CODE (desc->niter) == INTEGER_CST)
5110 {
5111 /* See cand_value_at. */
5112 if (stmt_after_increment (loop, cand, use->stmt))
5113 {
5114 if (!tree_int_cst_lt (desc->niter, period))
5115 return false;
5116 }
5117 else
5118 {
5119 if (tree_int_cst_lt (period, desc->niter))
5120 return false;
5121 }
5122 }
5123
5124 /* If not, and if this is the only possible exit of the loop, see whether
5125 we can get a conservative estimate on the number of iterations of the
5126 entire loop and compare against that instead. */
5127 else
5128 {
5129 widest_int period_value, max_niter;
5130
5131 max_niter = desc->max;
5132 if (stmt_after_increment (loop, cand, use->stmt))
5133 max_niter += 1;
5134 period_value = wi::to_widest (period);
5135 if (wi::gtu_p (max_niter, period_value))
5136 {
5137 /* See if we can take advantage of inferred loop bound
5138 information. */
5139 if (data->loop_single_exit_p)
5140 {
5141 if (!max_loop_iterations (loop, &max_niter))
5142 return false;
5143 /* The loop bound is already adjusted by adding 1. */
5144 if (wi::gtu_p (max_niter, period_value))
5145 return false;
5146 }
5147 else
5148 return false;
5149 }
5150 }
5151
5152 cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
5153
5154 *bound = fold_convert (TREE_TYPE (cand->iv->base),
5155 aff_combination_to_tree (&bnd));
5156 *comp = iv_elimination_compare (data, use);
5157
5158 /* It is unlikely that computing the number of iterations using division
5159 would be more profitable than keeping the original induction variable. */
5160 if (expression_expensive_p (*bound))
5161 return false;
5162
5163 /* Sometimes, it is possible to handle the situation that the number of
5164 iterations may be zero unless additional assumptions by using <
5165 instead of != in the exit condition.
5166
5167 TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
5168 base the exit condition on it. However, that is often too
5169 expensive. */
5170 if (!integer_zerop (desc->may_be_zero))
5171 return iv_elimination_compare_lt (data, cand, comp, desc);
5172
5173 return true;
5174 }
5175
5176 /* Calculates the cost of BOUND, if it is a PARM_DECL. A PARM_DECL must
5177 be copied, if it is used in the loop body and DATA->body_includes_call. */
5178
5179 static int
5180 parm_decl_cost (struct ivopts_data *data, tree bound)
5181 {
5182 tree sbound = bound;
5183 STRIP_NOPS (sbound);
5184
5185 if (TREE_CODE (sbound) == SSA_NAME
5186 && SSA_NAME_IS_DEFAULT_DEF (sbound)
5187 && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
5188 && data->body_includes_call)
5189 return COSTS_N_INSNS (1);
5190
5191 return 0;
5192 }
5193
5194 /* Determines cost of computing the use in GROUP with CAND in a condition. */
5195
5196 static bool
5197 determine_group_iv_cost_cond (struct ivopts_data *data,
5198 struct iv_group *group, struct iv_cand *cand)
5199 {
5200 tree bound = NULL_TREE;
5201 struct iv *cmp_iv;
5202 bitmap inv_exprs = NULL;
5203 bitmap inv_vars_elim = NULL, inv_vars_express = NULL, inv_vars;
5204 comp_cost elim_cost = infinite_cost, express_cost, cost, bound_cost;
5205 enum comp_iv_rewrite rewrite_type;
5206 iv_inv_expr_ent *inv_expr_elim = NULL, *inv_expr_express = NULL, *inv_expr;
5207 tree *control_var, *bound_cst;
5208 enum tree_code comp = ERROR_MARK;
5209 struct iv_use *use = group->vuses[0];
5210
5211 /* Extract condition operands. */
5212 rewrite_type = extract_cond_operands (data, use->stmt, &control_var,
5213 &bound_cst, NULL, &cmp_iv);
5214 gcc_assert (rewrite_type != COMP_IV_NA);
5215
5216 /* Try iv elimination. */
5217 if (rewrite_type == COMP_IV_ELIM
5218 && may_eliminate_iv (data, use, cand, &bound, &comp))
5219 {
5220 elim_cost = force_var_cost (data, bound, &inv_vars_elim);
5221 if (elim_cost.cost == 0)
5222 elim_cost.cost = parm_decl_cost (data, bound);
5223 else if (TREE_CODE (bound) == INTEGER_CST)
5224 elim_cost.cost = 0;
5225 /* If we replace a loop condition 'i < n' with 'p < base + n',
5226 inv_vars_elim will have 'base' and 'n' set, which implies that both
5227 'base' and 'n' will be live during the loop. More likely,
5228 'base + n' will be loop invariant, resulting in only one live value
5229 during the loop. So in that case we clear inv_vars_elim and set
5230 inv_expr_elim instead. */
5231 if (inv_vars_elim && bitmap_count_bits (inv_vars_elim) > 1)
5232 {
5233 inv_expr_elim = get_loop_invariant_expr (data, bound);
5234 bitmap_clear (inv_vars_elim);
5235 }
5236 /* The bound is a loop invariant, so it will be only computed
5237 once. */
5238 elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
5239 }
5240
5241 /* When the condition is a comparison of the candidate IV against
5242 zero, prefer this IV.
5243
5244 TODO: The constant that we're subtracting from the cost should
5245 be target-dependent. This information should be added to the
5246 target costs for each backend. */
5247 if (!elim_cost.infinite_cost_p () /* Do not try to decrease infinite! */
5248 && integer_zerop (*bound_cst)
5249 && (operand_equal_p (*control_var, cand->var_after, 0)
5250 || operand_equal_p (*control_var, cand->var_before, 0)))
5251 elim_cost -= 1;
5252
5253 express_cost = get_computation_cost (data, use, cand, false,
5254 &inv_vars_express, NULL,
5255 &inv_expr_express);
5256 if (cmp_iv != NULL)
5257 find_inv_vars (data, &cmp_iv->base, &inv_vars_express);
5258
5259 /* Count the cost of the original bound as well. */
5260 bound_cost = force_var_cost (data, *bound_cst, NULL);
5261 if (bound_cost.cost == 0)
5262 bound_cost.cost = parm_decl_cost (data, *bound_cst);
5263 else if (TREE_CODE (*bound_cst) == INTEGER_CST)
5264 bound_cost.cost = 0;
5265 express_cost += bound_cost;
5266
5267 /* Choose the better approach, preferring the eliminated IV. */
5268 if (elim_cost <= express_cost)
5269 {
5270 cost = elim_cost;
5271 inv_vars = inv_vars_elim;
5272 inv_vars_elim = NULL;
5273 inv_expr = inv_expr_elim;
5274 }
5275 else
5276 {
5277 cost = express_cost;
5278 inv_vars = inv_vars_express;
5279 inv_vars_express = NULL;
5280 bound = NULL_TREE;
5281 comp = ERROR_MARK;
5282 inv_expr = inv_expr_express;
5283 }
5284
5285 if (inv_expr)
5286 {
5287 inv_exprs = BITMAP_ALLOC (NULL);
5288 bitmap_set_bit (inv_exprs, inv_expr->id);
5289 }
5290 set_group_iv_cost (data, group, cand, cost,
5291 inv_vars, bound, comp, inv_exprs);
5292
5293 if (inv_vars_elim)
5294 BITMAP_FREE (inv_vars_elim);
5295 if (inv_vars_express)
5296 BITMAP_FREE (inv_vars_express);
5297
5298 return !cost.infinite_cost_p ();
5299 }
5300
5301 /* Determines cost of computing uses in GROUP with CAND. Returns false
5302 if USE cannot be represented with CAND. */
5303
5304 static bool
5305 determine_group_iv_cost (struct ivopts_data *data,
5306 struct iv_group *group, struct iv_cand *cand)
5307 {
5308 switch (group->type)
5309 {
5310 case USE_NONLINEAR_EXPR:
5311 return determine_group_iv_cost_generic (data, group, cand);
5312
5313 case USE_REF_ADDRESS:
5314 case USE_PTR_ADDRESS:
5315 return determine_group_iv_cost_address (data, group, cand);
5316
5317 case USE_COMPARE:
5318 return determine_group_iv_cost_cond (data, group, cand);
5319
5320 default:
5321 gcc_unreachable ();
5322 }
5323 }
5324
5325 /* Return true if get_computation_cost indicates that autoincrement is
5326 a possibility for the pair of USE and CAND, false otherwise. */
5327
5328 static bool
5329 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
5330 struct iv_cand *cand)
5331 {
5332 if (!address_p (use->type))
5333 return false;
5334
5335 bool can_autoinc = false;
5336 get_computation_cost (data, use, cand, true, NULL, &can_autoinc, NULL);
5337 return can_autoinc;
5338 }
5339
5340 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
5341 use that allows autoincrement, and set their AINC_USE if possible. */
5342
5343 static void
5344 set_autoinc_for_original_candidates (struct ivopts_data *data)
5345 {
5346 unsigned i, j;
5347
5348 for (i = 0; i < data->vcands.length (); i++)
5349 {
5350 struct iv_cand *cand = data->vcands[i];
5351 struct iv_use *closest_before = NULL;
5352 struct iv_use *closest_after = NULL;
5353 if (cand->pos != IP_ORIGINAL)
5354 continue;
5355
5356 for (j = 0; j < data->vgroups.length (); j++)
5357 {
5358 struct iv_group *group = data->vgroups[j];
5359 struct iv_use *use = group->vuses[0];
5360 unsigned uid = gimple_uid (use->stmt);
5361
5362 if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
5363 continue;
5364
5365 if (uid < gimple_uid (cand->incremented_at)
5366 && (closest_before == NULL
5367 || uid > gimple_uid (closest_before->stmt)))
5368 closest_before = use;
5369
5370 if (uid > gimple_uid (cand->incremented_at)
5371 && (closest_after == NULL
5372 || uid < gimple_uid (closest_after->stmt)))
5373 closest_after = use;
5374 }
5375
5376 if (closest_before != NULL
5377 && autoinc_possible_for_pair (data, closest_before, cand))
5378 cand->ainc_use = closest_before;
5379 else if (closest_after != NULL
5380 && autoinc_possible_for_pair (data, closest_after, cand))
5381 cand->ainc_use = closest_after;
5382 }
5383 }
5384
5385 /* Relate compare use with all candidates. */
5386
5387 static void
5388 relate_compare_use_with_all_cands (struct ivopts_data *data)
5389 {
5390 unsigned i, count = data->vcands.length ();
5391 for (i = 0; i < data->vgroups.length (); i++)
5392 {
5393 struct iv_group *group = data->vgroups[i];
5394
5395 if (group->type == USE_COMPARE)
5396 bitmap_set_range (group->related_cands, 0, count);
5397 }
5398 }
5399
5400 /* Finds the candidates for the induction variables. */
5401
5402 static void
5403 find_iv_candidates (struct ivopts_data *data)
5404 {
5405 /* Add commonly used ivs. */
5406 add_standard_iv_candidates (data);
5407
5408 /* Add old induction variables. */
5409 add_iv_candidate_for_bivs (data);
5410
5411 /* Add induction variables derived from uses. */
5412 add_iv_candidate_for_groups (data);
5413
5414 set_autoinc_for_original_candidates (data);
5415
5416 /* Record the important candidates. */
5417 record_important_candidates (data);
5418
5419 /* Relate compare iv_use with all candidates. */
5420 if (!data->consider_all_candidates)
5421 relate_compare_use_with_all_cands (data);
5422
5423 if (dump_file && (dump_flags & TDF_DETAILS))
5424 {
5425 unsigned i;
5426
5427 fprintf (dump_file, "\n<Important Candidates>:\t");
5428 for (i = 0; i < data->vcands.length (); i++)
5429 if (data->vcands[i]->important)
5430 fprintf (dump_file, " %d,", data->vcands[i]->id);
5431 fprintf (dump_file, "\n");
5432
5433 fprintf (dump_file, "\n<Group, Cand> Related:\n");
5434 for (i = 0; i < data->vgroups.length (); i++)
5435 {
5436 struct iv_group *group = data->vgroups[i];
5437
5438 if (group->related_cands)
5439 {
5440 fprintf (dump_file, " Group %d:\t", group->id);
5441 dump_bitmap (dump_file, group->related_cands);
5442 }
5443 }
5444 fprintf (dump_file, "\n");
5445 }
5446 }
5447
5448 /* Determines costs of computing use of iv with an iv candidate. */
5449
5450 static void
5451 determine_group_iv_costs (struct ivopts_data *data)
5452 {
5453 unsigned i, j;
5454 struct iv_cand *cand;
5455 struct iv_group *group;
5456 bitmap to_clear = BITMAP_ALLOC (NULL);
5457
5458 alloc_use_cost_map (data);
5459
5460 for (i = 0; i < data->vgroups.length (); i++)
5461 {
5462 group = data->vgroups[i];
5463
5464 if (data->consider_all_candidates)
5465 {
5466 for (j = 0; j < data->vcands.length (); j++)
5467 {
5468 cand = data->vcands[j];
5469 determine_group_iv_cost (data, group, cand);
5470 }
5471 }
5472 else
5473 {
5474 bitmap_iterator bi;
5475
5476 EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, j, bi)
5477 {
5478 cand = data->vcands[j];
5479 if (!determine_group_iv_cost (data, group, cand))
5480 bitmap_set_bit (to_clear, j);
5481 }
5482
5483 /* Remove the candidates for that the cost is infinite from
5484 the list of related candidates. */
5485 bitmap_and_compl_into (group->related_cands, to_clear);
5486 bitmap_clear (to_clear);
5487 }
5488 }
5489
5490 BITMAP_FREE (to_clear);
5491
5492 if (dump_file && (dump_flags & TDF_DETAILS))
5493 {
5494 bitmap_iterator bi;
5495
5496 /* Dump invariant variables. */
5497 fprintf (dump_file, "\n<Invariant Vars>:\n");
5498 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5499 {
5500 struct version_info *info = ver_info (data, i);
5501 if (info->inv_id)
5502 {
5503 fprintf (dump_file, "Inv %d:\t", info->inv_id);
5504 print_generic_expr (dump_file, info->name, TDF_SLIM);
5505 fprintf (dump_file, "%s\n",
5506 info->has_nonlin_use ? "" : "\t(eliminable)");
5507 }
5508 }
5509
5510 /* Dump invariant expressions. */
5511 fprintf (dump_file, "\n<Invariant Expressions>:\n");
5512 auto_vec <iv_inv_expr_ent *> list (data->inv_expr_tab->elements ());
5513
5514 for (hash_table<iv_inv_expr_hasher>::iterator it
5515 = data->inv_expr_tab->begin (); it != data->inv_expr_tab->end ();
5516 ++it)
5517 list.safe_push (*it);
5518
5519 list.qsort (sort_iv_inv_expr_ent);
5520
5521 for (i = 0; i < list.length (); ++i)
5522 {
5523 fprintf (dump_file, "inv_expr %d: \t", list[i]->id);
5524 print_generic_expr (dump_file, list[i]->expr, TDF_SLIM);
5525 fprintf (dump_file, "\n");
5526 }
5527
5528 fprintf (dump_file, "\n<Group-candidate Costs>:\n");
5529
5530 for (i = 0; i < data->vgroups.length (); i++)
5531 {
5532 group = data->vgroups[i];
5533
5534 fprintf (dump_file, "Group %d:\n", i);
5535 fprintf (dump_file, " cand\tcost\tcompl.\tinv.expr.\tinv.vars\n");
5536 for (j = 0; j < group->n_map_members; j++)
5537 {
5538 if (!group->cost_map[j].cand
5539 || group->cost_map[j].cost.infinite_cost_p ())
5540 continue;
5541
5542 fprintf (dump_file, " %d\t%d\t%d\t",
5543 group->cost_map[j].cand->id,
5544 group->cost_map[j].cost.cost,
5545 group->cost_map[j].cost.complexity);
5546 if (!group->cost_map[j].inv_exprs
5547 || bitmap_empty_p (group->cost_map[j].inv_exprs))
5548 fprintf (dump_file, "NIL;\t");
5549 else
5550 bitmap_print (dump_file,
5551 group->cost_map[j].inv_exprs, "", ";\t");
5552 if (!group->cost_map[j].inv_vars
5553 || bitmap_empty_p (group->cost_map[j].inv_vars))
5554 fprintf (dump_file, "NIL;\n");
5555 else
5556 bitmap_print (dump_file,
5557 group->cost_map[j].inv_vars, "", "\n");
5558 }
5559
5560 fprintf (dump_file, "\n");
5561 }
5562 fprintf (dump_file, "\n");
5563 }
5564 }
5565
5566 /* Determines cost of the candidate CAND. */
5567
5568 static void
5569 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5570 {
5571 comp_cost cost_base;
5572 unsigned cost, cost_step;
5573 tree base;
5574
5575 gcc_assert (cand->iv != NULL);
5576
5577 /* There are two costs associated with the candidate -- its increment
5578 and its initialization. The second is almost negligible for any loop
5579 that rolls enough, so we take it just very little into account. */
5580
5581 base = cand->iv->base;
5582 cost_base = force_var_cost (data, base, NULL);
5583 /* It will be exceptional that the iv register happens to be initialized with
5584 the proper value at no cost. In general, there will at least be a regcopy
5585 or a const set. */
5586 if (cost_base.cost == 0)
5587 cost_base.cost = COSTS_N_INSNS (1);
5588 cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
5589
5590 cost = cost_step + adjust_setup_cost (data, cost_base.cost);
5591
5592 /* Prefer the original ivs unless we may gain something by replacing it.
5593 The reason is to make debugging simpler; so this is not relevant for
5594 artificial ivs created by other optimization passes. */
5595 if (cand->pos != IP_ORIGINAL
5596 || !SSA_NAME_VAR (cand->var_before)
5597 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
5598 cost++;
5599
5600 /* Prefer not to insert statements into latch unless there are some
5601 already (so that we do not create unnecessary jumps). */
5602 if (cand->pos == IP_END
5603 && empty_block_p (ip_end_pos (data->current_loop)))
5604 cost++;
5605
5606 cand->cost = cost;
5607 cand->cost_step = cost_step;
5608 }
5609
5610 /* Determines costs of computation of the candidates. */
5611
5612 static void
5613 determine_iv_costs (struct ivopts_data *data)
5614 {
5615 unsigned i;
5616
5617 if (dump_file && (dump_flags & TDF_DETAILS))
5618 {
5619 fprintf (dump_file, "<Candidate Costs>:\n");
5620 fprintf (dump_file, " cand\tcost\n");
5621 }
5622
5623 for (i = 0; i < data->vcands.length (); i++)
5624 {
5625 struct iv_cand *cand = data->vcands[i];
5626
5627 determine_iv_cost (data, cand);
5628
5629 if (dump_file && (dump_flags & TDF_DETAILS))
5630 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
5631 }
5632
5633 if (dump_file && (dump_flags & TDF_DETAILS))
5634 fprintf (dump_file, "\n");
5635 }
5636
5637 /* Estimate register pressure for loop having N_INVS invariants and N_CANDS
5638 induction variables. Note N_INVS includes both invariant variables and
5639 invariant expressions. */
5640
5641 static unsigned
5642 ivopts_estimate_reg_pressure (struct ivopts_data *data, unsigned n_invs,
5643 unsigned n_cands)
5644 {
5645 unsigned cost;
5646 unsigned n_old = data->regs_used, n_new = n_invs + n_cands;
5647 unsigned regs_needed = n_new + n_old, available_regs = target_avail_regs;
5648 bool speed = data->speed;
5649
5650 /* If there is a call in the loop body, the call-clobbered registers
5651 are not available for loop invariants. */
5652 if (data->body_includes_call)
5653 available_regs = available_regs - target_clobbered_regs;
5654
5655 /* If we have enough registers. */
5656 if (regs_needed + target_res_regs < available_regs)
5657 cost = n_new;
5658 /* If close to running out of registers, try to preserve them. */
5659 else if (regs_needed <= available_regs)
5660 cost = target_reg_cost [speed] * regs_needed;
5661 /* If we run out of available registers but the number of candidates
5662 does not, we penalize extra registers using target_spill_cost. */
5663 else if (n_cands <= available_regs)
5664 cost = target_reg_cost [speed] * available_regs
5665 + target_spill_cost [speed] * (regs_needed - available_regs);
5666 /* If the number of candidates runs out available registers, we penalize
5667 extra candidate registers using target_spill_cost * 2. Because it is
5668 more expensive to spill induction variable than invariant. */
5669 else
5670 cost = target_reg_cost [speed] * available_regs
5671 + target_spill_cost [speed] * (n_cands - available_regs) * 2
5672 + target_spill_cost [speed] * (regs_needed - n_cands);
5673
5674 /* Finally, add the number of candidates, so that we prefer eliminating
5675 induction variables if possible. */
5676 return cost + n_cands;
5677 }
5678
5679 /* For each size of the induction variable set determine the penalty. */
5680
5681 static void
5682 determine_set_costs (struct ivopts_data *data)
5683 {
5684 unsigned j, n;
5685 gphi *phi;
5686 gphi_iterator psi;
5687 tree op;
5688 struct loop *loop = data->current_loop;
5689 bitmap_iterator bi;
5690
5691 if (dump_file && (dump_flags & TDF_DETAILS))
5692 {
5693 fprintf (dump_file, "<Global Costs>:\n");
5694 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
5695 fprintf (dump_file, " target_clobbered_regs %d\n", target_clobbered_regs);
5696 fprintf (dump_file, " target_reg_cost %d\n", target_reg_cost[data->speed]);
5697 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost[data->speed]);
5698 }
5699
5700 n = 0;
5701 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
5702 {
5703 phi = psi.phi ();
5704 op = PHI_RESULT (phi);
5705
5706 if (virtual_operand_p (op))
5707 continue;
5708
5709 if (get_iv (data, op))
5710 continue;
5711
5712 if (!POINTER_TYPE_P (TREE_TYPE (op))
5713 && !INTEGRAL_TYPE_P (TREE_TYPE (op)))
5714 continue;
5715
5716 n++;
5717 }
5718
5719 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5720 {
5721 struct version_info *info = ver_info (data, j);
5722
5723 if (info->inv_id && info->has_nonlin_use)
5724 n++;
5725 }
5726
5727 data->regs_used = n;
5728 if (dump_file && (dump_flags & TDF_DETAILS))
5729 fprintf (dump_file, " regs_used %d\n", n);
5730
5731 if (dump_file && (dump_flags & TDF_DETAILS))
5732 {
5733 fprintf (dump_file, " cost for size:\n");
5734 fprintf (dump_file, " ivs\tcost\n");
5735 for (j = 0; j <= 2 * target_avail_regs; j++)
5736 fprintf (dump_file, " %d\t%d\n", j,
5737 ivopts_estimate_reg_pressure (data, 0, j));
5738 fprintf (dump_file, "\n");
5739 }
5740 }
5741
5742 /* Returns true if A is a cheaper cost pair than B. */
5743
5744 static bool
5745 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
5746 {
5747 if (!a)
5748 return false;
5749
5750 if (!b)
5751 return true;
5752
5753 if (a->cost < b->cost)
5754 return true;
5755
5756 if (b->cost < a->cost)
5757 return false;
5758
5759 /* In case the costs are the same, prefer the cheaper candidate. */
5760 if (a->cand->cost < b->cand->cost)
5761 return true;
5762
5763 return false;
5764 }
5765
5766 /* Compare if A is a more expensive cost pair than B. Return 1, 0 and -1
5767 for more expensive, equal and cheaper respectively. */
5768
5769 static int
5770 compare_cost_pair (struct cost_pair *a, struct cost_pair *b)
5771 {
5772 if (cheaper_cost_pair (a, b))
5773 return -1;
5774 if (cheaper_cost_pair (b, a))
5775 return 1;
5776
5777 return 0;
5778 }
5779
5780 /* Returns candidate by that USE is expressed in IVS. */
5781
5782 static struct cost_pair *
5783 iv_ca_cand_for_group (struct iv_ca *ivs, struct iv_group *group)
5784 {
5785 return ivs->cand_for_group[group->id];
5786 }
5787
5788 /* Computes the cost field of IVS structure. */
5789
5790 static void
5791 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
5792 {
5793 comp_cost cost = ivs->cand_use_cost;
5794
5795 cost += ivs->cand_cost;
5796 cost += ivopts_estimate_reg_pressure (data, ivs->n_invs, ivs->n_cands);
5797 ivs->cost = cost;
5798 }
5799
5800 /* Remove use of invariants in set INVS by decreasing counter in N_INV_USES
5801 and IVS. */
5802
5803 static void
5804 iv_ca_set_remove_invs (struct iv_ca *ivs, bitmap invs, unsigned *n_inv_uses)
5805 {
5806 bitmap_iterator bi;
5807 unsigned iid;
5808
5809 if (!invs)
5810 return;
5811
5812 gcc_assert (n_inv_uses != NULL);
5813 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5814 {
5815 n_inv_uses[iid]--;
5816 if (n_inv_uses[iid] == 0)
5817 ivs->n_invs--;
5818 }
5819 }
5820
5821 /* Set USE not to be expressed by any candidate in IVS. */
5822
5823 static void
5824 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
5825 struct iv_group *group)
5826 {
5827 unsigned gid = group->id, cid;
5828 struct cost_pair *cp;
5829
5830 cp = ivs->cand_for_group[gid];
5831 if (!cp)
5832 return;
5833 cid = cp->cand->id;
5834
5835 ivs->bad_groups++;
5836 ivs->cand_for_group[gid] = NULL;
5837 ivs->n_cand_uses[cid]--;
5838
5839 if (ivs->n_cand_uses[cid] == 0)
5840 {
5841 bitmap_clear_bit (ivs->cands, cid);
5842 ivs->n_cands--;
5843 ivs->cand_cost -= cp->cand->cost;
5844 iv_ca_set_remove_invs (ivs, cp->cand->inv_vars, ivs->n_inv_var_uses);
5845 iv_ca_set_remove_invs (ivs, cp->cand->inv_exprs, ivs->n_inv_expr_uses);
5846 }
5847
5848 ivs->cand_use_cost -= cp->cost;
5849 iv_ca_set_remove_invs (ivs, cp->inv_vars, ivs->n_inv_var_uses);
5850 iv_ca_set_remove_invs (ivs, cp->inv_exprs, ivs->n_inv_expr_uses);
5851 iv_ca_recount_cost (data, ivs);
5852 }
5853
5854 /* Add use of invariants in set INVS by increasing counter in N_INV_USES and
5855 IVS. */
5856
5857 static void
5858 iv_ca_set_add_invs (struct iv_ca *ivs, bitmap invs, unsigned *n_inv_uses)
5859 {
5860 bitmap_iterator bi;
5861 unsigned iid;
5862
5863 if (!invs)
5864 return;
5865
5866 gcc_assert (n_inv_uses != NULL);
5867 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5868 {
5869 n_inv_uses[iid]++;
5870 if (n_inv_uses[iid] == 1)
5871 ivs->n_invs++;
5872 }
5873 }
5874
5875 /* Set cost pair for GROUP in set IVS to CP. */
5876
5877 static void
5878 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
5879 struct iv_group *group, struct cost_pair *cp)
5880 {
5881 unsigned gid = group->id, cid;
5882
5883 if (ivs->cand_for_group[gid] == cp)
5884 return;
5885
5886 if (ivs->cand_for_group[gid])
5887 iv_ca_set_no_cp (data, ivs, group);
5888
5889 if (cp)
5890 {
5891 cid = cp->cand->id;
5892
5893 ivs->bad_groups--;
5894 ivs->cand_for_group[gid] = cp;
5895 ivs->n_cand_uses[cid]++;
5896 if (ivs->n_cand_uses[cid] == 1)
5897 {
5898 bitmap_set_bit (ivs->cands, cid);
5899 ivs->n_cands++;
5900 ivs->cand_cost += cp->cand->cost;
5901 iv_ca_set_add_invs (ivs, cp->cand->inv_vars, ivs->n_inv_var_uses);
5902 iv_ca_set_add_invs (ivs, cp->cand->inv_exprs, ivs->n_inv_expr_uses);
5903 }
5904
5905 ivs->cand_use_cost += cp->cost;
5906 iv_ca_set_add_invs (ivs, cp->inv_vars, ivs->n_inv_var_uses);
5907 iv_ca_set_add_invs (ivs, cp->inv_exprs, ivs->n_inv_expr_uses);
5908 iv_ca_recount_cost (data, ivs);
5909 }
5910 }
5911
5912 /* Extend set IVS by expressing USE by some of the candidates in it
5913 if possible. Consider all important candidates if candidates in
5914 set IVS don't give any result. */
5915
5916 static void
5917 iv_ca_add_group (struct ivopts_data *data, struct iv_ca *ivs,
5918 struct iv_group *group)
5919 {
5920 struct cost_pair *best_cp = NULL, *cp;
5921 bitmap_iterator bi;
5922 unsigned i;
5923 struct iv_cand *cand;
5924
5925 gcc_assert (ivs->upto >= group->id);
5926 ivs->upto++;
5927 ivs->bad_groups++;
5928
5929 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5930 {
5931 cand = data->vcands[i];
5932 cp = get_group_iv_cost (data, group, cand);
5933 if (cheaper_cost_pair (cp, best_cp))
5934 best_cp = cp;
5935 }
5936
5937 if (best_cp == NULL)
5938 {
5939 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5940 {
5941 cand = data->vcands[i];
5942 cp = get_group_iv_cost (data, group, cand);
5943 if (cheaper_cost_pair (cp, best_cp))
5944 best_cp = cp;
5945 }
5946 }
5947
5948 iv_ca_set_cp (data, ivs, group, best_cp);
5949 }
5950
5951 /* Get cost for assignment IVS. */
5952
5953 static comp_cost
5954 iv_ca_cost (struct iv_ca *ivs)
5955 {
5956 /* This was a conditional expression but it triggered a bug in
5957 Sun C 5.5. */
5958 if (ivs->bad_groups)
5959 return infinite_cost;
5960 else
5961 return ivs->cost;
5962 }
5963
5964 /* Compare if applying NEW_CP to GROUP for IVS introduces more invariants
5965 than OLD_CP. Return 1, 0 and -1 for more, equal and fewer invariants
5966 respectively. */
5967
5968 static int
5969 iv_ca_compare_deps (struct ivopts_data *data, struct iv_ca *ivs,
5970 struct iv_group *group, struct cost_pair *old_cp,
5971 struct cost_pair *new_cp)
5972 {
5973 gcc_assert (old_cp && new_cp && old_cp != new_cp);
5974 unsigned old_n_invs = ivs->n_invs;
5975 iv_ca_set_cp (data, ivs, group, new_cp);
5976 unsigned new_n_invs = ivs->n_invs;
5977 iv_ca_set_cp (data, ivs, group, old_cp);
5978
5979 return new_n_invs > old_n_invs ? 1 : (new_n_invs < old_n_invs ? -1 : 0);
5980 }
5981
5982 /* Creates change of expressing GROUP by NEW_CP instead of OLD_CP and chains
5983 it before NEXT. */
5984
5985 static struct iv_ca_delta *
5986 iv_ca_delta_add (struct iv_group *group, struct cost_pair *old_cp,
5987 struct cost_pair *new_cp, struct iv_ca_delta *next)
5988 {
5989 struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
5990
5991 change->group = group;
5992 change->old_cp = old_cp;
5993 change->new_cp = new_cp;
5994 change->next = next;
5995
5996 return change;
5997 }
5998
5999 /* Joins two lists of changes L1 and L2. Destructive -- old lists
6000 are rewritten. */
6001
6002 static struct iv_ca_delta *
6003 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
6004 {
6005 struct iv_ca_delta *last;
6006
6007 if (!l2)
6008 return l1;
6009
6010 if (!l1)
6011 return l2;
6012
6013 for (last = l1; last->next; last = last->next)
6014 continue;
6015 last->next = l2;
6016
6017 return l1;
6018 }
6019
6020 /* Reverse the list of changes DELTA, forming the inverse to it. */
6021
6022 static struct iv_ca_delta *
6023 iv_ca_delta_reverse (struct iv_ca_delta *delta)
6024 {
6025 struct iv_ca_delta *act, *next, *prev = NULL;
6026
6027 for (act = delta; act; act = next)
6028 {
6029 next = act->next;
6030 act->next = prev;
6031 prev = act;
6032
6033 std::swap (act->old_cp, act->new_cp);
6034 }
6035
6036 return prev;
6037 }
6038
6039 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
6040 reverted instead. */
6041
6042 static void
6043 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
6044 struct iv_ca_delta *delta, bool forward)
6045 {
6046 struct cost_pair *from, *to;
6047 struct iv_ca_delta *act;
6048
6049 if (!forward)
6050 delta = iv_ca_delta_reverse (delta);
6051
6052 for (act = delta; act; act = act->next)
6053 {
6054 from = act->old_cp;
6055 to = act->new_cp;
6056 gcc_assert (iv_ca_cand_for_group (ivs, act->group) == from);
6057 iv_ca_set_cp (data, ivs, act->group, to);
6058 }
6059
6060 if (!forward)
6061 iv_ca_delta_reverse (delta);
6062 }
6063
6064 /* Returns true if CAND is used in IVS. */
6065
6066 static bool
6067 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
6068 {
6069 return ivs->n_cand_uses[cand->id] > 0;
6070 }
6071
6072 /* Returns number of induction variable candidates in the set IVS. */
6073
6074 static unsigned
6075 iv_ca_n_cands (struct iv_ca *ivs)
6076 {
6077 return ivs->n_cands;
6078 }
6079
6080 /* Free the list of changes DELTA. */
6081
6082 static void
6083 iv_ca_delta_free (struct iv_ca_delta **delta)
6084 {
6085 struct iv_ca_delta *act, *next;
6086
6087 for (act = *delta; act; act = next)
6088 {
6089 next = act->next;
6090 free (act);
6091 }
6092
6093 *delta = NULL;
6094 }
6095
6096 /* Allocates new iv candidates assignment. */
6097
6098 static struct iv_ca *
6099 iv_ca_new (struct ivopts_data *data)
6100 {
6101 struct iv_ca *nw = XNEW (struct iv_ca);
6102
6103 nw->upto = 0;
6104 nw->bad_groups = 0;
6105 nw->cand_for_group = XCNEWVEC (struct cost_pair *,
6106 data->vgroups.length ());
6107 nw->n_cand_uses = XCNEWVEC (unsigned, data->vcands.length ());
6108 nw->cands = BITMAP_ALLOC (NULL);
6109 nw->n_cands = 0;
6110 nw->n_invs = 0;
6111 nw->cand_use_cost = no_cost;
6112 nw->cand_cost = 0;
6113 nw->n_inv_var_uses = XCNEWVEC (unsigned, data->max_inv_var_id + 1);
6114 nw->n_inv_expr_uses = XCNEWVEC (unsigned, data->max_inv_expr_id + 1);
6115 nw->cost = no_cost;
6116
6117 return nw;
6118 }
6119
6120 /* Free memory occupied by the set IVS. */
6121
6122 static void
6123 iv_ca_free (struct iv_ca **ivs)
6124 {
6125 free ((*ivs)->cand_for_group);
6126 free ((*ivs)->n_cand_uses);
6127 BITMAP_FREE ((*ivs)->cands);
6128 free ((*ivs)->n_inv_var_uses);
6129 free ((*ivs)->n_inv_expr_uses);
6130 free (*ivs);
6131 *ivs = NULL;
6132 }
6133
6134 /* Dumps IVS to FILE. */
6135
6136 static void
6137 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
6138 {
6139 unsigned i;
6140 comp_cost cost = iv_ca_cost (ivs);
6141
6142 fprintf (file, " cost: %d (complexity %d)\n", cost.cost,
6143 cost.complexity);
6144 fprintf (file, " cand_cost: %d\n cand_group_cost: %d (complexity %d)\n",
6145 ivs->cand_cost, ivs->cand_use_cost.cost,
6146 ivs->cand_use_cost.complexity);
6147 bitmap_print (file, ivs->cands, " candidates: ","\n");
6148
6149 for (i = 0; i < ivs->upto; i++)
6150 {
6151 struct iv_group *group = data->vgroups[i];
6152 struct cost_pair *cp = iv_ca_cand_for_group (ivs, group);
6153 if (cp)
6154 fprintf (file, " group:%d --> iv_cand:%d, cost=(%d,%d)\n",
6155 group->id, cp->cand->id, cp->cost.cost,
6156 cp->cost.complexity);
6157 else
6158 fprintf (file, " group:%d --> ??\n", group->id);
6159 }
6160
6161 const char *pref = "";
6162 fprintf (file, " invariant variables: ");
6163 for (i = 1; i <= data->max_inv_var_id; i++)
6164 if (ivs->n_inv_var_uses[i])
6165 {
6166 fprintf (file, "%s%d", pref, i);
6167 pref = ", ";
6168 }
6169
6170 pref = "";
6171 fprintf (file, "\n invariant expressions: ");
6172 for (i = 1; i <= data->max_inv_expr_id; i++)
6173 if (ivs->n_inv_expr_uses[i])
6174 {
6175 fprintf (file, "%s%d", pref, i);
6176 pref = ", ";
6177 }
6178
6179 fprintf (file, "\n\n");
6180 }
6181
6182 /* Try changing candidate in IVS to CAND for each use. Return cost of the
6183 new set, and store differences in DELTA. Number of induction variables
6184 in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
6185 the function will try to find a solution with mimimal iv candidates. */
6186
6187 static comp_cost
6188 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
6189 struct iv_cand *cand, struct iv_ca_delta **delta,
6190 unsigned *n_ivs, bool min_ncand)
6191 {
6192 unsigned i;
6193 comp_cost cost;
6194 struct iv_group *group;
6195 struct cost_pair *old_cp, *new_cp;
6196
6197 *delta = NULL;
6198 for (i = 0; i < ivs->upto; i++)
6199 {
6200 group = data->vgroups[i];
6201 old_cp = iv_ca_cand_for_group (ivs, group);
6202
6203 if (old_cp
6204 && old_cp->cand == cand)
6205 continue;
6206
6207 new_cp = get_group_iv_cost (data, group, cand);
6208 if (!new_cp)
6209 continue;
6210
6211 if (!min_ncand)
6212 {
6213 int cmp_invs = iv_ca_compare_deps (data, ivs, group, old_cp, new_cp);
6214 /* Skip if new_cp depends on more invariants. */
6215 if (cmp_invs > 0)
6216 continue;
6217
6218 int cmp_cost = compare_cost_pair (new_cp, old_cp);
6219 /* Skip if new_cp is not cheaper. */
6220 if (cmp_cost > 0 || (cmp_cost == 0 && cmp_invs == 0))
6221 continue;
6222 }
6223
6224 *delta = iv_ca_delta_add (group, old_cp, new_cp, *delta);
6225 }
6226
6227 iv_ca_delta_commit (data, ivs, *delta, true);
6228 cost = iv_ca_cost (ivs);
6229 if (n_ivs)
6230 *n_ivs = iv_ca_n_cands (ivs);
6231 iv_ca_delta_commit (data, ivs, *delta, false);
6232
6233 return cost;
6234 }
6235
6236 /* Try narrowing set IVS by removing CAND. Return the cost of
6237 the new set and store the differences in DELTA. START is
6238 the candidate with which we start narrowing. */
6239
6240 static comp_cost
6241 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
6242 struct iv_cand *cand, struct iv_cand *start,
6243 struct iv_ca_delta **delta)
6244 {
6245 unsigned i, ci;
6246 struct iv_group *group;
6247 struct cost_pair *old_cp, *new_cp, *cp;
6248 bitmap_iterator bi;
6249 struct iv_cand *cnd;
6250 comp_cost cost, best_cost, acost;
6251
6252 *delta = NULL;
6253 for (i = 0; i < data->vgroups.length (); i++)
6254 {
6255 group = data->vgroups[i];
6256
6257 old_cp = iv_ca_cand_for_group (ivs, group);
6258 if (old_cp->cand != cand)
6259 continue;
6260
6261 best_cost = iv_ca_cost (ivs);
6262 /* Start narrowing with START. */
6263 new_cp = get_group_iv_cost (data, group, start);
6264
6265 if (data->consider_all_candidates)
6266 {
6267 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
6268 {
6269 if (ci == cand->id || (start && ci == start->id))
6270 continue;
6271
6272 cnd = data->vcands[ci];
6273
6274 cp = get_group_iv_cost (data, group, cnd);
6275 if (!cp)
6276 continue;
6277
6278 iv_ca_set_cp (data, ivs, group, cp);
6279 acost = iv_ca_cost (ivs);
6280
6281 if (acost < best_cost)
6282 {
6283 best_cost = acost;
6284 new_cp = cp;
6285 }
6286 }
6287 }
6288 else
6289 {
6290 EXECUTE_IF_AND_IN_BITMAP (group->related_cands, ivs->cands, 0, ci, bi)
6291 {
6292 if (ci == cand->id || (start && ci == start->id))
6293 continue;
6294
6295 cnd = data->vcands[ci];
6296
6297 cp = get_group_iv_cost (data, group, cnd);
6298 if (!cp)
6299 continue;
6300
6301 iv_ca_set_cp (data, ivs, group, cp);
6302 acost = iv_ca_cost (ivs);
6303
6304 if (acost < best_cost)
6305 {
6306 best_cost = acost;
6307 new_cp = cp;
6308 }
6309 }
6310 }
6311 /* Restore to old cp for use. */
6312 iv_ca_set_cp (data, ivs, group, old_cp);
6313
6314 if (!new_cp)
6315 {
6316 iv_ca_delta_free (delta);
6317 return infinite_cost;
6318 }
6319
6320 *delta = iv_ca_delta_add (group, old_cp, new_cp, *delta);
6321 }
6322
6323 iv_ca_delta_commit (data, ivs, *delta, true);
6324 cost = iv_ca_cost (ivs);
6325 iv_ca_delta_commit (data, ivs, *delta, false);
6326
6327 return cost;
6328 }
6329
6330 /* Try optimizing the set of candidates IVS by removing candidates different
6331 from to EXCEPT_CAND from it. Return cost of the new set, and store
6332 differences in DELTA. */
6333
6334 static comp_cost
6335 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
6336 struct iv_cand *except_cand, struct iv_ca_delta **delta)
6337 {
6338 bitmap_iterator bi;
6339 struct iv_ca_delta *act_delta, *best_delta;
6340 unsigned i;
6341 comp_cost best_cost, acost;
6342 struct iv_cand *cand;
6343
6344 best_delta = NULL;
6345 best_cost = iv_ca_cost (ivs);
6346
6347 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6348 {
6349 cand = data->vcands[i];
6350
6351 if (cand == except_cand)
6352 continue;
6353
6354 acost = iv_ca_narrow (data, ivs, cand, except_cand, &act_delta);
6355
6356 if (acost < best_cost)
6357 {
6358 best_cost = acost;
6359 iv_ca_delta_free (&best_delta);
6360 best_delta = act_delta;
6361 }
6362 else
6363 iv_ca_delta_free (&act_delta);
6364 }
6365
6366 if (!best_delta)
6367 {
6368 *delta = NULL;
6369 return best_cost;
6370 }
6371
6372 /* Recurse to possibly remove other unnecessary ivs. */
6373 iv_ca_delta_commit (data, ivs, best_delta, true);
6374 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
6375 iv_ca_delta_commit (data, ivs, best_delta, false);
6376 *delta = iv_ca_delta_join (best_delta, *delta);
6377 return best_cost;
6378 }
6379
6380 /* Check if CAND_IDX is a candidate other than OLD_CAND and has
6381 cheaper local cost for GROUP than BEST_CP. Return pointer to
6382 the corresponding cost_pair, otherwise just return BEST_CP. */
6383
6384 static struct cost_pair*
6385 cheaper_cost_with_cand (struct ivopts_data *data, struct iv_group *group,
6386 unsigned int cand_idx, struct iv_cand *old_cand,
6387 struct cost_pair *best_cp)
6388 {
6389 struct iv_cand *cand;
6390 struct cost_pair *cp;
6391
6392 gcc_assert (old_cand != NULL && best_cp != NULL);
6393 if (cand_idx == old_cand->id)
6394 return best_cp;
6395
6396 cand = data->vcands[cand_idx];
6397 cp = get_group_iv_cost (data, group, cand);
6398 if (cp != NULL && cheaper_cost_pair (cp, best_cp))
6399 return cp;
6400
6401 return best_cp;
6402 }
6403
6404 /* Try breaking local optimal fixed-point for IVS by replacing candidates
6405 which are used by more than one iv uses. For each of those candidates,
6406 this function tries to represent iv uses under that candidate using
6407 other ones with lower local cost, then tries to prune the new set.
6408 If the new set has lower cost, It returns the new cost after recording
6409 candidate replacement in list DELTA. */
6410
6411 static comp_cost
6412 iv_ca_replace (struct ivopts_data *data, struct iv_ca *ivs,
6413 struct iv_ca_delta **delta)
6414 {
6415 bitmap_iterator bi, bj;
6416 unsigned int i, j, k;
6417 struct iv_cand *cand;
6418 comp_cost orig_cost, acost;
6419 struct iv_ca_delta *act_delta, *tmp_delta;
6420 struct cost_pair *old_cp, *best_cp = NULL;
6421
6422 *delta = NULL;
6423 orig_cost = iv_ca_cost (ivs);
6424
6425 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6426 {
6427 if (ivs->n_cand_uses[i] == 1
6428 || ivs->n_cand_uses[i] > ALWAYS_PRUNE_CAND_SET_BOUND)
6429 continue;
6430
6431 cand = data->vcands[i];
6432
6433 act_delta = NULL;
6434 /* Represent uses under current candidate using other ones with
6435 lower local cost. */
6436 for (j = 0; j < ivs->upto; j++)
6437 {
6438 struct iv_group *group = data->vgroups[j];
6439 old_cp = iv_ca_cand_for_group (ivs, group);
6440
6441 if (old_cp->cand != cand)
6442 continue;
6443
6444 best_cp = old_cp;
6445 if (data->consider_all_candidates)
6446 for (k = 0; k < data->vcands.length (); k++)
6447 best_cp = cheaper_cost_with_cand (data, group, k,
6448 old_cp->cand, best_cp);
6449 else
6450 EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, k, bj)
6451 best_cp = cheaper_cost_with_cand (data, group, k,
6452 old_cp->cand, best_cp);
6453
6454 if (best_cp == old_cp)
6455 continue;
6456
6457 act_delta = iv_ca_delta_add (group, old_cp, best_cp, act_delta);
6458 }
6459 /* No need for further prune. */
6460 if (!act_delta)
6461 continue;
6462
6463 /* Prune the new candidate set. */
6464 iv_ca_delta_commit (data, ivs, act_delta, true);
6465 acost = iv_ca_prune (data, ivs, NULL, &tmp_delta);
6466 iv_ca_delta_commit (data, ivs, act_delta, false);
6467 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6468
6469 if (acost < orig_cost)
6470 {
6471 *delta = act_delta;
6472 return acost;
6473 }
6474 else
6475 iv_ca_delta_free (&act_delta);
6476 }
6477
6478 return orig_cost;
6479 }
6480
6481 /* Tries to extend the sets IVS in the best possible way in order to
6482 express the GROUP. If ORIGINALP is true, prefer candidates from
6483 the original set of IVs, otherwise favor important candidates not
6484 based on any memory object. */
6485
6486 static bool
6487 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
6488 struct iv_group *group, bool originalp)
6489 {
6490 comp_cost best_cost, act_cost;
6491 unsigned i;
6492 bitmap_iterator bi;
6493 struct iv_cand *cand;
6494 struct iv_ca_delta *best_delta = NULL, *act_delta;
6495 struct cost_pair *cp;
6496
6497 iv_ca_add_group (data, ivs, group);
6498 best_cost = iv_ca_cost (ivs);
6499 cp = iv_ca_cand_for_group (ivs, group);
6500 if (cp)
6501 {
6502 best_delta = iv_ca_delta_add (group, NULL, cp, NULL);
6503 iv_ca_set_no_cp (data, ivs, group);
6504 }
6505
6506 /* If ORIGINALP is true, try to find the original IV for the use. Otherwise
6507 first try important candidates not based on any memory object. Only if
6508 this fails, try the specific ones. Rationale -- in loops with many
6509 variables the best choice often is to use just one generic biv. If we
6510 added here many ivs specific to the uses, the optimization algorithm later
6511 would be likely to get stuck in a local minimum, thus causing us to create
6512 too many ivs. The approach from few ivs to more seems more likely to be
6513 successful -- starting from few ivs, replacing an expensive use by a
6514 specific iv should always be a win. */
6515 EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, i, bi)
6516 {
6517 cand = data->vcands[i];
6518
6519 if (originalp && cand->pos !=IP_ORIGINAL)
6520 continue;
6521
6522 if (!originalp && cand->iv->base_object != NULL_TREE)
6523 continue;
6524
6525 if (iv_ca_cand_used_p (ivs, cand))
6526 continue;
6527
6528 cp = get_group_iv_cost (data, group, cand);
6529 if (!cp)
6530 continue;
6531
6532 iv_ca_set_cp (data, ivs, group, cp);
6533 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
6534 true);
6535 iv_ca_set_no_cp (data, ivs, group);
6536 act_delta = iv_ca_delta_add (group, NULL, cp, act_delta);
6537
6538 if (act_cost < best_cost)
6539 {
6540 best_cost = act_cost;
6541
6542 iv_ca_delta_free (&best_delta);
6543 best_delta = act_delta;
6544 }
6545 else
6546 iv_ca_delta_free (&act_delta);
6547 }
6548
6549 if (best_cost.infinite_cost_p ())
6550 {
6551 for (i = 0; i < group->n_map_members; i++)
6552 {
6553 cp = group->cost_map + i;
6554 cand = cp->cand;
6555 if (!cand)
6556 continue;
6557
6558 /* Already tried this. */
6559 if (cand->important)
6560 {
6561 if (originalp && cand->pos == IP_ORIGINAL)
6562 continue;
6563 if (!originalp && cand->iv->base_object == NULL_TREE)
6564 continue;
6565 }
6566
6567 if (iv_ca_cand_used_p (ivs, cand))
6568 continue;
6569
6570 act_delta = NULL;
6571 iv_ca_set_cp (data, ivs, group, cp);
6572 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
6573 iv_ca_set_no_cp (data, ivs, group);
6574 act_delta = iv_ca_delta_add (group,
6575 iv_ca_cand_for_group (ivs, group),
6576 cp, act_delta);
6577
6578 if (act_cost < best_cost)
6579 {
6580 best_cost = act_cost;
6581
6582 if (best_delta)
6583 iv_ca_delta_free (&best_delta);
6584 best_delta = act_delta;
6585 }
6586 else
6587 iv_ca_delta_free (&act_delta);
6588 }
6589 }
6590
6591 iv_ca_delta_commit (data, ivs, best_delta, true);
6592 iv_ca_delta_free (&best_delta);
6593
6594 return !best_cost.infinite_cost_p ();
6595 }
6596
6597 /* Finds an initial assignment of candidates to uses. */
6598
6599 static struct iv_ca *
6600 get_initial_solution (struct ivopts_data *data, bool originalp)
6601 {
6602 unsigned i;
6603 struct iv_ca *ivs = iv_ca_new (data);
6604
6605 for (i = 0; i < data->vgroups.length (); i++)
6606 if (!try_add_cand_for (data, ivs, data->vgroups[i], originalp))
6607 {
6608 iv_ca_free (&ivs);
6609 return NULL;
6610 }
6611
6612 return ivs;
6613 }
6614
6615 /* Tries to improve set of induction variables IVS. TRY_REPLACE_P
6616 points to a bool variable, this function tries to break local
6617 optimal fixed-point by replacing candidates in IVS if it's true. */
6618
6619 static bool
6620 try_improve_iv_set (struct ivopts_data *data,
6621 struct iv_ca *ivs, bool *try_replace_p)
6622 {
6623 unsigned i, n_ivs;
6624 comp_cost acost, best_cost = iv_ca_cost (ivs);
6625 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
6626 struct iv_cand *cand;
6627
6628 /* Try extending the set of induction variables by one. */
6629 for (i = 0; i < data->vcands.length (); i++)
6630 {
6631 cand = data->vcands[i];
6632
6633 if (iv_ca_cand_used_p (ivs, cand))
6634 continue;
6635
6636 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
6637 if (!act_delta)
6638 continue;
6639
6640 /* If we successfully added the candidate and the set is small enough,
6641 try optimizing it by removing other candidates. */
6642 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
6643 {
6644 iv_ca_delta_commit (data, ivs, act_delta, true);
6645 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
6646 iv_ca_delta_commit (data, ivs, act_delta, false);
6647 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6648 }
6649
6650 if (acost < best_cost)
6651 {
6652 best_cost = acost;
6653 iv_ca_delta_free (&best_delta);
6654 best_delta = act_delta;
6655 }
6656 else
6657 iv_ca_delta_free (&act_delta);
6658 }
6659
6660 if (!best_delta)
6661 {
6662 /* Try removing the candidates from the set instead. */
6663 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
6664
6665 if (!best_delta && *try_replace_p)
6666 {
6667 *try_replace_p = false;
6668 /* So far candidate selecting algorithm tends to choose fewer IVs
6669 so that it can handle cases in which loops have many variables
6670 but the best choice is often to use only one general biv. One
6671 weakness is it can't handle opposite cases, in which different
6672 candidates should be chosen with respect to each use. To solve
6673 the problem, we replace candidates in a manner described by the
6674 comments of iv_ca_replace, thus give general algorithm a chance
6675 to break local optimal fixed-point in these cases. */
6676 best_cost = iv_ca_replace (data, ivs, &best_delta);
6677 }
6678
6679 if (!best_delta)
6680 return false;
6681 }
6682
6683 iv_ca_delta_commit (data, ivs, best_delta, true);
6684 iv_ca_delta_free (&best_delta);
6685 return best_cost == iv_ca_cost (ivs);
6686 }
6687
6688 /* Attempts to find the optimal set of induction variables. We do simple
6689 greedy heuristic -- we try to replace at most one candidate in the selected
6690 solution and remove the unused ivs while this improves the cost. */
6691
6692 static struct iv_ca *
6693 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
6694 {
6695 struct iv_ca *set;
6696 bool try_replace_p = true;
6697
6698 /* Get the initial solution. */
6699 set = get_initial_solution (data, originalp);
6700 if (!set)
6701 {
6702 if (dump_file && (dump_flags & TDF_DETAILS))
6703 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
6704 return NULL;
6705 }
6706
6707 if (dump_file && (dump_flags & TDF_DETAILS))
6708 {
6709 fprintf (dump_file, "Initial set of candidates:\n");
6710 iv_ca_dump (data, dump_file, set);
6711 }
6712
6713 while (try_improve_iv_set (data, set, &try_replace_p))
6714 {
6715 if (dump_file && (dump_flags & TDF_DETAILS))
6716 {
6717 fprintf (dump_file, "Improved to:\n");
6718 iv_ca_dump (data, dump_file, set);
6719 }
6720 }
6721
6722 /* If the set has infinite_cost, it can't be optimal. */
6723 if (iv_ca_cost (set).infinite_cost_p ())
6724 {
6725 if (dump_file && (dump_flags & TDF_DETAILS))
6726 fprintf (dump_file,
6727 "Overflow to infinite cost in try_improve_iv_set.\n");
6728 iv_ca_free (&set);
6729 }
6730 return set;
6731 }
6732
6733 static struct iv_ca *
6734 find_optimal_iv_set (struct ivopts_data *data)
6735 {
6736 unsigned i;
6737 comp_cost cost, origcost;
6738 struct iv_ca *set, *origset;
6739
6740 /* Determine the cost based on a strategy that starts with original IVs,
6741 and try again using a strategy that prefers candidates not based
6742 on any IVs. */
6743 origset = find_optimal_iv_set_1 (data, true);
6744 set = find_optimal_iv_set_1 (data, false);
6745
6746 if (!origset && !set)
6747 return NULL;
6748
6749 origcost = origset ? iv_ca_cost (origset) : infinite_cost;
6750 cost = set ? iv_ca_cost (set) : infinite_cost;
6751
6752 if (dump_file && (dump_flags & TDF_DETAILS))
6753 {
6754 fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
6755 origcost.cost, origcost.complexity);
6756 fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
6757 cost.cost, cost.complexity);
6758 }
6759
6760 /* Choose the one with the best cost. */
6761 if (origcost <= cost)
6762 {
6763 if (set)
6764 iv_ca_free (&set);
6765 set = origset;
6766 }
6767 else if (origset)
6768 iv_ca_free (&origset);
6769
6770 for (i = 0; i < data->vgroups.length (); i++)
6771 {
6772 struct iv_group *group = data->vgroups[i];
6773 group->selected = iv_ca_cand_for_group (set, group)->cand;
6774 }
6775
6776 return set;
6777 }
6778
6779 /* Creates a new induction variable corresponding to CAND. */
6780
6781 static void
6782 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
6783 {
6784 gimple_stmt_iterator incr_pos;
6785 tree base;
6786 struct iv_use *use;
6787 struct iv_group *group;
6788 bool after = false;
6789
6790 gcc_assert (cand->iv != NULL);
6791
6792 switch (cand->pos)
6793 {
6794 case IP_NORMAL:
6795 incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
6796 break;
6797
6798 case IP_END:
6799 incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
6800 after = true;
6801 break;
6802
6803 case IP_AFTER_USE:
6804 after = true;
6805 /* fall through */
6806 case IP_BEFORE_USE:
6807 incr_pos = gsi_for_stmt (cand->incremented_at);
6808 break;
6809
6810 case IP_ORIGINAL:
6811 /* Mark that the iv is preserved. */
6812 name_info (data, cand->var_before)->preserve_biv = true;
6813 name_info (data, cand->var_after)->preserve_biv = true;
6814
6815 /* Rewrite the increment so that it uses var_before directly. */
6816 use = find_interesting_uses_op (data, cand->var_after);
6817 group = data->vgroups[use->group_id];
6818 group->selected = cand;
6819 return;
6820 }
6821
6822 gimple_add_tmp_var (cand->var_before);
6823
6824 base = unshare_expr (cand->iv->base);
6825
6826 create_iv (base, unshare_expr (cand->iv->step),
6827 cand->var_before, data->current_loop,
6828 &incr_pos, after, &cand->var_before, &cand->var_after);
6829 }
6830
6831 /* Creates new induction variables described in SET. */
6832
6833 static void
6834 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
6835 {
6836 unsigned i;
6837 struct iv_cand *cand;
6838 bitmap_iterator bi;
6839
6840 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6841 {
6842 cand = data->vcands[i];
6843 create_new_iv (data, cand);
6844 }
6845
6846 if (dump_file && (dump_flags & TDF_DETAILS))
6847 {
6848 fprintf (dump_file, "Selected IV set for loop %d",
6849 data->current_loop->num);
6850 if (data->loop_loc != UNKNOWN_LOCATION)
6851 fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
6852 LOCATION_LINE (data->loop_loc));
6853 fprintf (dump_file, ", " HOST_WIDE_INT_PRINT_DEC " avg niters",
6854 avg_loop_niter (data->current_loop));
6855 fprintf (dump_file, ", %lu IVs:\n", bitmap_count_bits (set->cands));
6856 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6857 {
6858 cand = data->vcands[i];
6859 dump_cand (dump_file, cand);
6860 }
6861 fprintf (dump_file, "\n");
6862 }
6863 }
6864
6865 /* Rewrites USE (definition of iv used in a nonlinear expression)
6866 using candidate CAND. */
6867
6868 static void
6869 rewrite_use_nonlinear_expr (struct ivopts_data *data,
6870 struct iv_use *use, struct iv_cand *cand)
6871 {
6872 gassign *ass;
6873 gimple_stmt_iterator bsi;
6874 tree comp, type = get_use_type (use), tgt;
6875
6876 /* An important special case -- if we are asked to express value of
6877 the original iv by itself, just exit; there is no need to
6878 introduce a new computation (that might also need casting the
6879 variable to unsigned and back). */
6880 if (cand->pos == IP_ORIGINAL
6881 && cand->incremented_at == use->stmt)
6882 {
6883 tree op = NULL_TREE;
6884 enum tree_code stmt_code;
6885
6886 gcc_assert (is_gimple_assign (use->stmt));
6887 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
6888
6889 /* Check whether we may leave the computation unchanged.
6890 This is the case only if it does not rely on other
6891 computations in the loop -- otherwise, the computation
6892 we rely upon may be removed in remove_unused_ivs,
6893 thus leading to ICE. */
6894 stmt_code = gimple_assign_rhs_code (use->stmt);
6895 if (stmt_code == PLUS_EXPR
6896 || stmt_code == MINUS_EXPR
6897 || stmt_code == POINTER_PLUS_EXPR)
6898 {
6899 if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
6900 op = gimple_assign_rhs2 (use->stmt);
6901 else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
6902 op = gimple_assign_rhs1 (use->stmt);
6903 }
6904
6905 if (op != NULL_TREE)
6906 {
6907 if (expr_invariant_in_loop_p (data->current_loop, op))
6908 return;
6909 if (TREE_CODE (op) == SSA_NAME)
6910 {
6911 struct iv *iv = get_iv (data, op);
6912 if (iv != NULL && integer_zerop (iv->step))
6913 return;
6914 }
6915 }
6916 }
6917
6918 switch (gimple_code (use->stmt))
6919 {
6920 case GIMPLE_PHI:
6921 tgt = PHI_RESULT (use->stmt);
6922
6923 /* If we should keep the biv, do not replace it. */
6924 if (name_info (data, tgt)->preserve_biv)
6925 return;
6926
6927 bsi = gsi_after_labels (gimple_bb (use->stmt));
6928 break;
6929
6930 case GIMPLE_ASSIGN:
6931 tgt = gimple_assign_lhs (use->stmt);
6932 bsi = gsi_for_stmt (use->stmt);
6933 break;
6934
6935 default:
6936 gcc_unreachable ();
6937 }
6938
6939 aff_tree aff_inv, aff_var;
6940 if (!get_computation_aff_1 (data->current_loop, use->stmt,
6941 use, cand, &aff_inv, &aff_var))
6942 gcc_unreachable ();
6943
6944 unshare_aff_combination (&aff_inv);
6945 unshare_aff_combination (&aff_var);
6946 /* Prefer CSE opportunity than loop invariant by adding offset at last
6947 so that iv_uses have different offsets can be CSEed. */
6948 poly_widest_int offset = aff_inv.offset;
6949 aff_inv.offset = 0;
6950
6951 gimple_seq stmt_list = NULL, seq = NULL;
6952 tree comp_op1 = aff_combination_to_tree (&aff_inv);
6953 tree comp_op2 = aff_combination_to_tree (&aff_var);
6954 gcc_assert (comp_op1 && comp_op2);
6955
6956 comp_op1 = force_gimple_operand (comp_op1, &seq, true, NULL);
6957 gimple_seq_add_seq (&stmt_list, seq);
6958 comp_op2 = force_gimple_operand (comp_op2, &seq, true, NULL);
6959 gimple_seq_add_seq (&stmt_list, seq);
6960
6961 if (POINTER_TYPE_P (TREE_TYPE (comp_op2)))
6962 std::swap (comp_op1, comp_op2);
6963
6964 if (POINTER_TYPE_P (TREE_TYPE (comp_op1)))
6965 {
6966 comp = fold_build_pointer_plus (comp_op1,
6967 fold_convert (sizetype, comp_op2));
6968 comp = fold_build_pointer_plus (comp,
6969 wide_int_to_tree (sizetype, offset));
6970 }
6971 else
6972 {
6973 comp = fold_build2 (PLUS_EXPR, TREE_TYPE (comp_op1), comp_op1,
6974 fold_convert (TREE_TYPE (comp_op1), comp_op2));
6975 comp = fold_build2 (PLUS_EXPR, TREE_TYPE (comp_op1), comp,
6976 wide_int_to_tree (TREE_TYPE (comp_op1), offset));
6977 }
6978
6979 comp = fold_convert (type, comp);
6980 if (!valid_gimple_rhs_p (comp)
6981 || (gimple_code (use->stmt) != GIMPLE_PHI
6982 /* We can't allow re-allocating the stmt as it might be pointed
6983 to still. */
6984 && (get_gimple_rhs_num_ops (TREE_CODE (comp))
6985 >= gimple_num_ops (gsi_stmt (bsi)))))
6986 {
6987 comp = force_gimple_operand (comp, &seq, true, NULL);
6988 gimple_seq_add_seq (&stmt_list, seq);
6989 if (POINTER_TYPE_P (TREE_TYPE (tgt)))
6990 {
6991 duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
6992 /* As this isn't a plain copy we have to reset alignment
6993 information. */
6994 if (SSA_NAME_PTR_INFO (comp))
6995 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
6996 }
6997 }
6998
6999 gsi_insert_seq_before (&bsi, stmt_list, GSI_SAME_STMT);
7000 if (gimple_code (use->stmt) == GIMPLE_PHI)
7001 {
7002 ass = gimple_build_assign (tgt, comp);
7003 gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
7004
7005 bsi = gsi_for_stmt (use->stmt);
7006 remove_phi_node (&bsi, false);
7007 }
7008 else
7009 {
7010 gimple_assign_set_rhs_from_tree (&bsi, comp);
7011 use->stmt = gsi_stmt (bsi);
7012 }
7013 }
7014
7015 /* Performs a peephole optimization to reorder the iv update statement with
7016 a mem ref to enable instruction combining in later phases. The mem ref uses
7017 the iv value before the update, so the reordering transformation requires
7018 adjustment of the offset. CAND is the selected IV_CAND.
7019
7020 Example:
7021
7022 t = MEM_REF (base, iv1, 8, 16); // base, index, stride, offset
7023 iv2 = iv1 + 1;
7024
7025 if (t < val) (1)
7026 goto L;
7027 goto Head;
7028
7029
7030 directly propagating t over to (1) will introduce overlapping live range
7031 thus increase register pressure. This peephole transform it into:
7032
7033
7034 iv2 = iv1 + 1;
7035 t = MEM_REF (base, iv2, 8, 8);
7036 if (t < val)
7037 goto L;
7038 goto Head;
7039 */
7040
7041 static void
7042 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
7043 {
7044 tree var_after;
7045 gimple *iv_update, *stmt;
7046 basic_block bb;
7047 gimple_stmt_iterator gsi, gsi_iv;
7048
7049 if (cand->pos != IP_NORMAL)
7050 return;
7051
7052 var_after = cand->var_after;
7053 iv_update = SSA_NAME_DEF_STMT (var_after);
7054
7055 bb = gimple_bb (iv_update);
7056 gsi = gsi_last_nondebug_bb (bb);
7057 stmt = gsi_stmt (gsi);
7058
7059 /* Only handle conditional statement for now. */
7060 if (gimple_code (stmt) != GIMPLE_COND)
7061 return;
7062
7063 gsi_prev_nondebug (&gsi);
7064 stmt = gsi_stmt (gsi);
7065 if (stmt != iv_update)
7066 return;
7067
7068 gsi_prev_nondebug (&gsi);
7069 if (gsi_end_p (gsi))
7070 return;
7071
7072 stmt = gsi_stmt (gsi);
7073 if (gimple_code (stmt) != GIMPLE_ASSIGN)
7074 return;
7075
7076 if (stmt != use->stmt)
7077 return;
7078
7079 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
7080 return;
7081
7082 if (dump_file && (dump_flags & TDF_DETAILS))
7083 {
7084 fprintf (dump_file, "Reordering \n");
7085 print_gimple_stmt (dump_file, iv_update, 0);
7086 print_gimple_stmt (dump_file, use->stmt, 0);
7087 fprintf (dump_file, "\n");
7088 }
7089
7090 gsi = gsi_for_stmt (use->stmt);
7091 gsi_iv = gsi_for_stmt (iv_update);
7092 gsi_move_before (&gsi_iv, &gsi);
7093
7094 cand->pos = IP_BEFORE_USE;
7095 cand->incremented_at = use->stmt;
7096 }
7097
7098 /* Return the alias pointer type that should be used for a MEM_REF
7099 associated with USE, which has type USE_PTR_ADDRESS. */
7100
7101 static tree
7102 get_alias_ptr_type_for_ptr_address (iv_use *use)
7103 {
7104 gcall *call = as_a <gcall *> (use->stmt);
7105 switch (gimple_call_internal_fn (call))
7106 {
7107 case IFN_MASK_LOAD:
7108 case IFN_MASK_STORE:
7109 /* The second argument contains the correct alias type. */
7110 gcc_assert (use->op_p = gimple_call_arg_ptr (call, 0));
7111 return TREE_TYPE (gimple_call_arg (call, 1));
7112
7113 default:
7114 gcc_unreachable ();
7115 }
7116 }
7117
7118
7119 /* Rewrites USE (address that is an iv) using candidate CAND. */
7120
7121 static void
7122 rewrite_use_address (struct ivopts_data *data,
7123 struct iv_use *use, struct iv_cand *cand)
7124 {
7125 aff_tree aff;
7126 bool ok;
7127
7128 adjust_iv_update_pos (cand, use);
7129 ok = get_computation_aff (data->current_loop, use->stmt, use, cand, &aff);
7130 gcc_assert (ok);
7131 unshare_aff_combination (&aff);
7132
7133 /* To avoid undefined overflow problems, all IV candidates use unsigned
7134 integer types. The drawback is that this makes it impossible for
7135 create_mem_ref to distinguish an IV that is based on a memory object
7136 from one that represents simply an offset.
7137
7138 To work around this problem, we pass a hint to create_mem_ref that
7139 indicates which variable (if any) in aff is an IV based on a memory
7140 object. Note that we only consider the candidate. If this is not
7141 based on an object, the base of the reference is in some subexpression
7142 of the use -- but these will use pointer types, so they are recognized
7143 by the create_mem_ref heuristics anyway. */
7144 tree iv = var_at_stmt (data->current_loop, cand, use->stmt);
7145 tree base_hint = (cand->iv->base_object) ? iv : NULL_TREE;
7146 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7147 tree type = use->mem_type;
7148 tree alias_ptr_type;
7149 if (use->type == USE_PTR_ADDRESS)
7150 alias_ptr_type = get_alias_ptr_type_for_ptr_address (use);
7151 else
7152 {
7153 gcc_assert (type == TREE_TYPE (*use->op_p));
7154 unsigned int align = get_object_alignment (*use->op_p);
7155 if (align != TYPE_ALIGN (type))
7156 type = build_aligned_type (type, align);
7157 alias_ptr_type = reference_alias_ptr_type (*use->op_p);
7158 }
7159 tree ref = create_mem_ref (&bsi, type, &aff, alias_ptr_type,
7160 iv, base_hint, data->speed);
7161
7162 if (use->type == USE_PTR_ADDRESS)
7163 {
7164 ref = fold_build1 (ADDR_EXPR, build_pointer_type (use->mem_type), ref);
7165 ref = fold_convert (get_use_type (use), ref);
7166 ref = force_gimple_operand_gsi (&bsi, ref, true, NULL_TREE,
7167 true, GSI_SAME_STMT);
7168 }
7169 else
7170 copy_ref_info (ref, *use->op_p);
7171
7172 *use->op_p = ref;
7173 }
7174
7175 /* Rewrites USE (the condition such that one of the arguments is an iv) using
7176 candidate CAND. */
7177
7178 static void
7179 rewrite_use_compare (struct ivopts_data *data,
7180 struct iv_use *use, struct iv_cand *cand)
7181 {
7182 tree comp, op, bound;
7183 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7184 enum tree_code compare;
7185 struct iv_group *group = data->vgroups[use->group_id];
7186 struct cost_pair *cp = get_group_iv_cost (data, group, cand);
7187
7188 bound = cp->value;
7189 if (bound)
7190 {
7191 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
7192 tree var_type = TREE_TYPE (var);
7193 gimple_seq stmts;
7194
7195 if (dump_file && (dump_flags & TDF_DETAILS))
7196 {
7197 fprintf (dump_file, "Replacing exit test: ");
7198 print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
7199 }
7200 compare = cp->comp;
7201 bound = unshare_expr (fold_convert (var_type, bound));
7202 op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
7203 if (stmts)
7204 gsi_insert_seq_on_edge_immediate (
7205 loop_preheader_edge (data->current_loop),
7206 stmts);
7207
7208 gcond *cond_stmt = as_a <gcond *> (use->stmt);
7209 gimple_cond_set_lhs (cond_stmt, var);
7210 gimple_cond_set_code (cond_stmt, compare);
7211 gimple_cond_set_rhs (cond_stmt, op);
7212 return;
7213 }
7214
7215 /* The induction variable elimination failed; just express the original
7216 giv. */
7217 comp = get_computation_at (data->current_loop, use->stmt, use, cand);
7218 gcc_assert (comp != NULL_TREE);
7219 gcc_assert (use->op_p != NULL);
7220 *use->op_p = force_gimple_operand_gsi (&bsi, comp, true,
7221 SSA_NAME_VAR (*use->op_p),
7222 true, GSI_SAME_STMT);
7223 }
7224
7225 /* Rewrite the groups using the selected induction variables. */
7226
7227 static void
7228 rewrite_groups (struct ivopts_data *data)
7229 {
7230 unsigned i, j;
7231
7232 for (i = 0; i < data->vgroups.length (); i++)
7233 {
7234 struct iv_group *group = data->vgroups[i];
7235 struct iv_cand *cand = group->selected;
7236
7237 gcc_assert (cand);
7238
7239 if (group->type == USE_NONLINEAR_EXPR)
7240 {
7241 for (j = 0; j < group->vuses.length (); j++)
7242 {
7243 rewrite_use_nonlinear_expr (data, group->vuses[j], cand);
7244 update_stmt (group->vuses[j]->stmt);
7245 }
7246 }
7247 else if (address_p (group->type))
7248 {
7249 for (j = 0; j < group->vuses.length (); j++)
7250 {
7251 rewrite_use_address (data, group->vuses[j], cand);
7252 update_stmt (group->vuses[j]->stmt);
7253 }
7254 }
7255 else
7256 {
7257 gcc_assert (group->type == USE_COMPARE);
7258
7259 for (j = 0; j < group->vuses.length (); j++)
7260 {
7261 rewrite_use_compare (data, group->vuses[j], cand);
7262 update_stmt (group->vuses[j]->stmt);
7263 }
7264 }
7265 }
7266 }
7267
7268 /* Removes the ivs that are not used after rewriting. */
7269
7270 static void
7271 remove_unused_ivs (struct ivopts_data *data, bitmap toremove)
7272 {
7273 unsigned j;
7274 bitmap_iterator bi;
7275
7276 /* Figure out an order in which to release SSA DEFs so that we don't
7277 release something that we'd have to propagate into a debug stmt
7278 afterwards. */
7279 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
7280 {
7281 struct version_info *info;
7282
7283 info = ver_info (data, j);
7284 if (info->iv
7285 && !integer_zerop (info->iv->step)
7286 && !info->inv_id
7287 && !info->iv->nonlin_use
7288 && !info->preserve_biv)
7289 {
7290 bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
7291
7292 tree def = info->iv->ssa_name;
7293
7294 if (MAY_HAVE_DEBUG_BIND_STMTS && SSA_NAME_DEF_STMT (def))
7295 {
7296 imm_use_iterator imm_iter;
7297 use_operand_p use_p;
7298 gimple *stmt;
7299 int count = 0;
7300
7301 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7302 {
7303 if (!gimple_debug_bind_p (stmt))
7304 continue;
7305
7306 /* We just want to determine whether to do nothing
7307 (count == 0), to substitute the computed
7308 expression into a single use of the SSA DEF by
7309 itself (count == 1), or to use a debug temp
7310 because the SSA DEF is used multiple times or as
7311 part of a larger expression (count > 1). */
7312 count++;
7313 if (gimple_debug_bind_get_value (stmt) != def)
7314 count++;
7315
7316 if (count > 1)
7317 BREAK_FROM_IMM_USE_STMT (imm_iter);
7318 }
7319
7320 if (!count)
7321 continue;
7322
7323 struct iv_use dummy_use;
7324 struct iv_cand *best_cand = NULL, *cand;
7325 unsigned i, best_pref = 0, cand_pref;
7326
7327 memset (&dummy_use, 0, sizeof (dummy_use));
7328 dummy_use.iv = info->iv;
7329 for (i = 0; i < data->vgroups.length () && i < 64; i++)
7330 {
7331 cand = data->vgroups[i]->selected;
7332 if (cand == best_cand)
7333 continue;
7334 cand_pref = operand_equal_p (cand->iv->step,
7335 info->iv->step, 0)
7336 ? 4 : 0;
7337 cand_pref
7338 += TYPE_MODE (TREE_TYPE (cand->iv->base))
7339 == TYPE_MODE (TREE_TYPE (info->iv->base))
7340 ? 2 : 0;
7341 cand_pref
7342 += TREE_CODE (cand->iv->base) == INTEGER_CST
7343 ? 1 : 0;
7344 if (best_cand == NULL || best_pref < cand_pref)
7345 {
7346 best_cand = cand;
7347 best_pref = cand_pref;
7348 }
7349 }
7350
7351 if (!best_cand)
7352 continue;
7353
7354 tree comp = get_computation_at (data->current_loop,
7355 SSA_NAME_DEF_STMT (def),
7356 &dummy_use, best_cand);
7357 if (!comp)
7358 continue;
7359
7360 if (count > 1)
7361 {
7362 tree vexpr = make_node (DEBUG_EXPR_DECL);
7363 DECL_ARTIFICIAL (vexpr) = 1;
7364 TREE_TYPE (vexpr) = TREE_TYPE (comp);
7365 if (SSA_NAME_VAR (def))
7366 SET_DECL_MODE (vexpr, DECL_MODE (SSA_NAME_VAR (def)));
7367 else
7368 SET_DECL_MODE (vexpr, TYPE_MODE (TREE_TYPE (vexpr)));
7369 gdebug *def_temp
7370 = gimple_build_debug_bind (vexpr, comp, NULL);
7371 gimple_stmt_iterator gsi;
7372
7373 if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
7374 gsi = gsi_after_labels (gimple_bb
7375 (SSA_NAME_DEF_STMT (def)));
7376 else
7377 gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
7378
7379 gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
7380 comp = vexpr;
7381 }
7382
7383 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7384 {
7385 if (!gimple_debug_bind_p (stmt))
7386 continue;
7387
7388 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
7389 SET_USE (use_p, comp);
7390
7391 update_stmt (stmt);
7392 }
7393 }
7394 }
7395 }
7396 }
7397
7398 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
7399 for hash_map::traverse. */
7400
7401 bool
7402 free_tree_niter_desc (edge const &, tree_niter_desc *const &value, void *)
7403 {
7404 free (value);
7405 return true;
7406 }
7407
7408 /* Frees data allocated by the optimization of a single loop. */
7409
7410 static void
7411 free_loop_data (struct ivopts_data *data)
7412 {
7413 unsigned i, j;
7414 bitmap_iterator bi;
7415 tree obj;
7416
7417 if (data->niters)
7418 {
7419 data->niters->traverse<void *, free_tree_niter_desc> (NULL);
7420 delete data->niters;
7421 data->niters = NULL;
7422 }
7423
7424 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
7425 {
7426 struct version_info *info;
7427
7428 info = ver_info (data, i);
7429 info->iv = NULL;
7430 info->has_nonlin_use = false;
7431 info->preserve_biv = false;
7432 info->inv_id = 0;
7433 }
7434 bitmap_clear (data->relevant);
7435 bitmap_clear (data->important_candidates);
7436
7437 for (i = 0; i < data->vgroups.length (); i++)
7438 {
7439 struct iv_group *group = data->vgroups[i];
7440
7441 for (j = 0; j < group->vuses.length (); j++)
7442 free (group->vuses[j]);
7443 group->vuses.release ();
7444
7445 BITMAP_FREE (group->related_cands);
7446 for (j = 0; j < group->n_map_members; j++)
7447 {
7448 if (group->cost_map[j].inv_vars)
7449 BITMAP_FREE (group->cost_map[j].inv_vars);
7450 if (group->cost_map[j].inv_exprs)
7451 BITMAP_FREE (group->cost_map[j].inv_exprs);
7452 }
7453
7454 free (group->cost_map);
7455 free (group);
7456 }
7457 data->vgroups.truncate (0);
7458
7459 for (i = 0; i < data->vcands.length (); i++)
7460 {
7461 struct iv_cand *cand = data->vcands[i];
7462
7463 if (cand->inv_vars)
7464 BITMAP_FREE (cand->inv_vars);
7465 if (cand->inv_exprs)
7466 BITMAP_FREE (cand->inv_exprs);
7467 free (cand);
7468 }
7469 data->vcands.truncate (0);
7470
7471 if (data->version_info_size < num_ssa_names)
7472 {
7473 data->version_info_size = 2 * num_ssa_names;
7474 free (data->version_info);
7475 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
7476 }
7477
7478 data->max_inv_var_id = 0;
7479 data->max_inv_expr_id = 0;
7480
7481 FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
7482 SET_DECL_RTL (obj, NULL_RTX);
7483
7484 decl_rtl_to_reset.truncate (0);
7485
7486 data->inv_expr_tab->empty ();
7487
7488 data->iv_common_cand_tab->empty ();
7489 data->iv_common_cands.truncate (0);
7490 }
7491
7492 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
7493 loop tree. */
7494
7495 static void
7496 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
7497 {
7498 free_loop_data (data);
7499 free (data->version_info);
7500 BITMAP_FREE (data->relevant);
7501 BITMAP_FREE (data->important_candidates);
7502
7503 decl_rtl_to_reset.release ();
7504 data->vgroups.release ();
7505 data->vcands.release ();
7506 delete data->inv_expr_tab;
7507 data->inv_expr_tab = NULL;
7508 free_affine_expand_cache (&data->name_expansion_cache);
7509 delete data->iv_common_cand_tab;
7510 data->iv_common_cand_tab = NULL;
7511 data->iv_common_cands.release ();
7512 obstack_free (&data->iv_obstack, NULL);
7513 }
7514
7515 /* Returns true if the loop body BODY includes any function calls. */
7516
7517 static bool
7518 loop_body_includes_call (basic_block *body, unsigned num_nodes)
7519 {
7520 gimple_stmt_iterator gsi;
7521 unsigned i;
7522
7523 for (i = 0; i < num_nodes; i++)
7524 for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
7525 {
7526 gimple *stmt = gsi_stmt (gsi);
7527 if (is_gimple_call (stmt)
7528 && !gimple_call_internal_p (stmt)
7529 && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
7530 return true;
7531 }
7532 return false;
7533 }
7534
7535 /* Determine cost scaling factor for basic blocks in loop. */
7536 #define COST_SCALING_FACTOR_BOUND (20)
7537
7538 static void
7539 determine_scaling_factor (struct ivopts_data *data, basic_block *body)
7540 {
7541 int lfreq = data->current_loop->header->count.to_frequency (cfun);
7542 if (!data->speed || lfreq <= 0)
7543 return;
7544
7545 int max_freq = lfreq;
7546 for (unsigned i = 0; i < data->current_loop->num_nodes; i++)
7547 {
7548 body[i]->aux = (void *)(intptr_t) 1;
7549 if (max_freq < body[i]->count.to_frequency (cfun))
7550 max_freq = body[i]->count.to_frequency (cfun);
7551 }
7552 if (max_freq > lfreq)
7553 {
7554 int divisor, factor;
7555 /* Check if scaling factor itself needs to be scaled by the bound. This
7556 is to avoid overflow when scaling cost according to profile info. */
7557 if (max_freq / lfreq > COST_SCALING_FACTOR_BOUND)
7558 {
7559 divisor = max_freq;
7560 factor = COST_SCALING_FACTOR_BOUND;
7561 }
7562 else
7563 {
7564 divisor = lfreq;
7565 factor = 1;
7566 }
7567 for (unsigned i = 0; i < data->current_loop->num_nodes; i++)
7568 {
7569 int bfreq = body[i]->count.to_frequency (cfun);
7570 if (bfreq <= lfreq)
7571 continue;
7572
7573 body[i]->aux = (void*)(intptr_t) (factor * bfreq / divisor);
7574 }
7575 }
7576 }
7577
7578 /* Optimizes the LOOP. Returns true if anything changed. */
7579
7580 static bool
7581 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop,
7582 bitmap toremove)
7583 {
7584 bool changed = false;
7585 struct iv_ca *iv_ca;
7586 edge exit = single_dom_exit (loop);
7587 basic_block *body;
7588
7589 gcc_assert (!data->niters);
7590 data->current_loop = loop;
7591 data->loop_loc = find_loop_location (loop).get_location_t ();
7592 data->speed = optimize_loop_for_speed_p (loop);
7593
7594 if (dump_file && (dump_flags & TDF_DETAILS))
7595 {
7596 fprintf (dump_file, "Processing loop %d", loop->num);
7597 if (data->loop_loc != UNKNOWN_LOCATION)
7598 fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
7599 LOCATION_LINE (data->loop_loc));
7600 fprintf (dump_file, "\n");
7601
7602 if (exit)
7603 {
7604 fprintf (dump_file, " single exit %d -> %d, exit condition ",
7605 exit->src->index, exit->dest->index);
7606 print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
7607 fprintf (dump_file, "\n");
7608 }
7609
7610 fprintf (dump_file, "\n");
7611 }
7612
7613 body = get_loop_body (loop);
7614 data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
7615 renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
7616
7617 data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
7618
7619 /* For each ssa name determines whether it behaves as an induction variable
7620 in some loop. */
7621 if (!find_induction_variables (data))
7622 goto finish;
7623
7624 /* Finds interesting uses (item 1). */
7625 find_interesting_uses (data);
7626 if (data->vgroups.length () > MAX_CONSIDERED_GROUPS)
7627 goto finish;
7628
7629 /* Determine cost scaling factor for basic blocks in loop. */
7630 determine_scaling_factor (data, body);
7631
7632 /* Finds candidates for the induction variables (item 2). */
7633 find_iv_candidates (data);
7634
7635 /* Calculates the costs (item 3, part 1). */
7636 determine_iv_costs (data);
7637 determine_group_iv_costs (data);
7638 determine_set_costs (data);
7639
7640 /* Find the optimal set of induction variables (item 3, part 2). */
7641 iv_ca = find_optimal_iv_set (data);
7642 /* Cleanup basic block aux field. */
7643 for (unsigned i = 0; i < data->current_loop->num_nodes; i++)
7644 body[i]->aux = NULL;
7645 if (!iv_ca)
7646 goto finish;
7647 changed = true;
7648
7649 /* Create the new induction variables (item 4, part 1). */
7650 create_new_ivs (data, iv_ca);
7651 iv_ca_free (&iv_ca);
7652
7653 /* Rewrite the uses (item 4, part 2). */
7654 rewrite_groups (data);
7655
7656 /* Remove the ivs that are unused after rewriting. */
7657 remove_unused_ivs (data, toremove);
7658
7659 finish:
7660 free (body);
7661 free_loop_data (data);
7662
7663 return changed;
7664 }
7665
7666 /* Main entry point. Optimizes induction variables in loops. */
7667
7668 void
7669 tree_ssa_iv_optimize (void)
7670 {
7671 struct loop *loop;
7672 struct ivopts_data data;
7673 auto_bitmap toremove;
7674
7675 tree_ssa_iv_optimize_init (&data);
7676
7677 /* Optimize the loops starting with the innermost ones. */
7678 FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
7679 {
7680 if (dump_file && (dump_flags & TDF_DETAILS))
7681 flow_loop_dump (loop, dump_file, NULL, 1);
7682
7683 tree_ssa_iv_optimize_loop (&data, loop, toremove);
7684 }
7685
7686 /* Remove eliminated IV defs. */
7687 release_defs_bitset (toremove);
7688
7689 /* We have changed the structure of induction variables; it might happen
7690 that definitions in the scev database refer to some of them that were
7691 eliminated. */
7692 scev_reset_htab ();
7693 /* Likewise niter and control-IV information. */
7694 free_numbers_of_iterations_estimates (cfun);
7695
7696 tree_ssa_iv_optimize_finalize (&data);
7697 }
7698
7699 #include "gt-tree-ssa-loop-ivopts.h"