]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/tree-ssa-loop-ivopts.c
Update libbid according to the latest Intel Decimal Floating-Point Math Library.
[thirdparty/gcc.git] / gcc / tree-ssa-loop-ivopts.c
1 /* Induction variable optimizations.
2 Copyright (C) 2003-2019 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
10
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
19
20 /* This pass tries to find the optimal set of induction variables for the loop.
21 It optimizes just the basic linear induction variables (although adding
22 support for other types should not be too hard). It includes the
23 optimizations commonly known as strength reduction, induction variable
24 coalescing and induction variable elimination. It does it in the
25 following steps:
26
27 1) The interesting uses of induction variables are found. This includes
28
29 -- uses of induction variables in non-linear expressions
30 -- addresses of arrays
31 -- comparisons of induction variables
32
33 Note the interesting uses are categorized and handled in group.
34 Generally, address type uses are grouped together if their iv bases
35 are different in constant offset.
36
37 2) Candidates for the induction variables are found. This includes
38
39 -- old induction variables
40 -- the variables defined by expressions derived from the "interesting
41 groups/uses" above
42
43 3) The optimal (w.r. to a cost function) set of variables is chosen. The
44 cost function assigns a cost to sets of induction variables and consists
45 of three parts:
46
47 -- The group/use costs. Each of the interesting groups/uses chooses
48 the best induction variable in the set and adds its cost to the sum.
49 The cost reflects the time spent on modifying the induction variables
50 value to be usable for the given purpose (adding base and offset for
51 arrays, etc.).
52 -- The variable costs. Each of the variables has a cost assigned that
53 reflects the costs associated with incrementing the value of the
54 variable. The original variables are somewhat preferred.
55 -- The set cost. Depending on the size of the set, extra cost may be
56 added to reflect register pressure.
57
58 All the costs are defined in a machine-specific way, using the target
59 hooks and machine descriptions to determine them.
60
61 4) The trees are transformed to use the new variables, the dead code is
62 removed.
63
64 All of this is done loop by loop. Doing it globally is theoretically
65 possible, it might give a better performance and it might enable us
66 to decide costs more precisely, but getting all the interactions right
67 would be complicated. */
68
69 #include "config.h"
70 #include "system.h"
71 #include "coretypes.h"
72 #include "backend.h"
73 #include "rtl.h"
74 #include "tree.h"
75 #include "gimple.h"
76 #include "cfghooks.h"
77 #include "tree-pass.h"
78 #include "memmodel.h"
79 #include "tm_p.h"
80 #include "ssa.h"
81 #include "expmed.h"
82 #include "insn-config.h"
83 #include "emit-rtl.h"
84 #include "recog.h"
85 #include "cgraph.h"
86 #include "gimple-pretty-print.h"
87 #include "alias.h"
88 #include "fold-const.h"
89 #include "stor-layout.h"
90 #include "tree-eh.h"
91 #include "gimplify.h"
92 #include "gimple-iterator.h"
93 #include "gimplify-me.h"
94 #include "tree-cfg.h"
95 #include "tree-ssa-loop-ivopts.h"
96 #include "tree-ssa-loop-manip.h"
97 #include "tree-ssa-loop-niter.h"
98 #include "tree-ssa-loop.h"
99 #include "explow.h"
100 #include "expr.h"
101 #include "tree-dfa.h"
102 #include "tree-ssa.h"
103 #include "cfgloop.h"
104 #include "tree-scalar-evolution.h"
105 #include "params.h"
106 #include "tree-affine.h"
107 #include "tree-ssa-propagate.h"
108 #include "tree-ssa-address.h"
109 #include "builtins.h"
110 #include "tree-vectorizer.h"
111
112 /* FIXME: Expressions are expanded to RTL in this pass to determine the
113 cost of different addressing modes. This should be moved to a TBD
114 interface between the GIMPLE and RTL worlds. */
115
116 /* The infinite cost. */
117 #define INFTY 1000000000
118
119 /* Returns the expected number of loop iterations for LOOP.
120 The average trip count is computed from profile data if it
121 exists. */
122
123 static inline HOST_WIDE_INT
124 avg_loop_niter (struct loop *loop)
125 {
126 HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
127 if (niter == -1)
128 {
129 niter = likely_max_stmt_executions_int (loop);
130
131 if (niter == -1 || niter > PARAM_VALUE (PARAM_AVG_LOOP_NITER))
132 return PARAM_VALUE (PARAM_AVG_LOOP_NITER);
133 }
134
135 return niter;
136 }
137
138 struct iv_use;
139
140 /* Representation of the induction variable. */
141 struct iv
142 {
143 tree base; /* Initial value of the iv. */
144 tree base_object; /* A memory object to that the induction variable points. */
145 tree step; /* Step of the iv (constant only). */
146 tree ssa_name; /* The ssa name with the value. */
147 struct iv_use *nonlin_use; /* The identifier in the use if it is the case. */
148 bool biv_p; /* Is it a biv? */
149 bool no_overflow; /* True if the iv doesn't overflow. */
150 bool have_address_use;/* For biv, indicate if it's used in any address
151 type use. */
152 };
153
154 /* Per-ssa version information (induction variable descriptions, etc.). */
155 struct version_info
156 {
157 tree name; /* The ssa name. */
158 struct iv *iv; /* Induction variable description. */
159 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
160 an expression that is not an induction variable. */
161 bool preserve_biv; /* For the original biv, whether to preserve it. */
162 unsigned inv_id; /* Id of an invariant. */
163 };
164
165 /* Types of uses. */
166 enum use_type
167 {
168 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
169 USE_REF_ADDRESS, /* Use is an address for an explicit memory
170 reference. */
171 USE_PTR_ADDRESS, /* Use is a pointer argument to a function in
172 cases where the expansion of the function
173 will turn the argument into a normal address. */
174 USE_COMPARE /* Use is a compare. */
175 };
176
177 /* Cost of a computation. */
178 struct comp_cost
179 {
180 comp_cost (): cost (0), complexity (0), scratch (0)
181 {}
182
183 comp_cost (int64_t cost, unsigned complexity, int64_t scratch = 0)
184 : cost (cost), complexity (complexity), scratch (scratch)
185 {}
186
187 /* Returns true if COST is infinite. */
188 bool infinite_cost_p ();
189
190 /* Adds costs COST1 and COST2. */
191 friend comp_cost operator+ (comp_cost cost1, comp_cost cost2);
192
193 /* Adds COST to the comp_cost. */
194 comp_cost operator+= (comp_cost cost);
195
196 /* Adds constant C to this comp_cost. */
197 comp_cost operator+= (HOST_WIDE_INT c);
198
199 /* Subtracts constant C to this comp_cost. */
200 comp_cost operator-= (HOST_WIDE_INT c);
201
202 /* Divide the comp_cost by constant C. */
203 comp_cost operator/= (HOST_WIDE_INT c);
204
205 /* Multiply the comp_cost by constant C. */
206 comp_cost operator*= (HOST_WIDE_INT c);
207
208 /* Subtracts costs COST1 and COST2. */
209 friend comp_cost operator- (comp_cost cost1, comp_cost cost2);
210
211 /* Subtracts COST from this comp_cost. */
212 comp_cost operator-= (comp_cost cost);
213
214 /* Returns true if COST1 is smaller than COST2. */
215 friend bool operator< (comp_cost cost1, comp_cost cost2);
216
217 /* Returns true if COST1 and COST2 are equal. */
218 friend bool operator== (comp_cost cost1, comp_cost cost2);
219
220 /* Returns true if COST1 is smaller or equal than COST2. */
221 friend bool operator<= (comp_cost cost1, comp_cost cost2);
222
223 int64_t cost; /* The runtime cost. */
224 unsigned complexity; /* The estimate of the complexity of the code for
225 the computation (in no concrete units --
226 complexity field should be larger for more
227 complex expressions and addressing modes). */
228 int64_t scratch; /* Scratch used during cost computation. */
229 };
230
231 static const comp_cost no_cost;
232 static const comp_cost infinite_cost (INFTY, 0, INFTY);
233
234 bool
235 comp_cost::infinite_cost_p ()
236 {
237 return cost == INFTY;
238 }
239
240 comp_cost
241 operator+ (comp_cost cost1, comp_cost cost2)
242 {
243 if (cost1.infinite_cost_p () || cost2.infinite_cost_p ())
244 return infinite_cost;
245
246 gcc_assert (cost1.cost + cost2.cost < infinite_cost.cost);
247 cost1.cost += cost2.cost;
248 cost1.complexity += cost2.complexity;
249
250 return cost1;
251 }
252
253 comp_cost
254 operator- (comp_cost cost1, comp_cost cost2)
255 {
256 if (cost1.infinite_cost_p ())
257 return infinite_cost;
258
259 gcc_assert (!cost2.infinite_cost_p ());
260 gcc_assert (cost1.cost - cost2.cost < infinite_cost.cost);
261
262 cost1.cost -= cost2.cost;
263 cost1.complexity -= cost2.complexity;
264
265 return cost1;
266 }
267
268 comp_cost
269 comp_cost::operator+= (comp_cost cost)
270 {
271 *this = *this + cost;
272 return *this;
273 }
274
275 comp_cost
276 comp_cost::operator+= (HOST_WIDE_INT c)
277 {
278 if (infinite_cost_p ())
279 return *this;
280
281 gcc_assert (this->cost + c < infinite_cost.cost);
282 this->cost += c;
283
284 return *this;
285 }
286
287 comp_cost
288 comp_cost::operator-= (HOST_WIDE_INT c)
289 {
290 if (infinite_cost_p ())
291 return *this;
292
293 gcc_assert (this->cost - c < infinite_cost.cost);
294 this->cost -= c;
295
296 return *this;
297 }
298
299 comp_cost
300 comp_cost::operator/= (HOST_WIDE_INT c)
301 {
302 gcc_assert (c != 0);
303 if (infinite_cost_p ())
304 return *this;
305
306 this->cost /= c;
307
308 return *this;
309 }
310
311 comp_cost
312 comp_cost::operator*= (HOST_WIDE_INT c)
313 {
314 if (infinite_cost_p ())
315 return *this;
316
317 gcc_assert (this->cost * c < infinite_cost.cost);
318 this->cost *= c;
319
320 return *this;
321 }
322
323 comp_cost
324 comp_cost::operator-= (comp_cost cost)
325 {
326 *this = *this - cost;
327 return *this;
328 }
329
330 bool
331 operator< (comp_cost cost1, comp_cost cost2)
332 {
333 if (cost1.cost == cost2.cost)
334 return cost1.complexity < cost2.complexity;
335
336 return cost1.cost < cost2.cost;
337 }
338
339 bool
340 operator== (comp_cost cost1, comp_cost cost2)
341 {
342 return cost1.cost == cost2.cost
343 && cost1.complexity == cost2.complexity;
344 }
345
346 bool
347 operator<= (comp_cost cost1, comp_cost cost2)
348 {
349 return cost1 < cost2 || cost1 == cost2;
350 }
351
352 struct iv_inv_expr_ent;
353
354 /* The candidate - cost pair. */
355 struct cost_pair
356 {
357 struct iv_cand *cand; /* The candidate. */
358 comp_cost cost; /* The cost. */
359 enum tree_code comp; /* For iv elimination, the comparison. */
360 bitmap inv_vars; /* The list of invariant ssa_vars that have to be
361 preserved when representing iv_use with iv_cand. */
362 bitmap inv_exprs; /* The list of newly created invariant expressions
363 when representing iv_use with iv_cand. */
364 tree value; /* For final value elimination, the expression for
365 the final value of the iv. For iv elimination,
366 the new bound to compare with. */
367 };
368
369 /* Use. */
370 struct iv_use
371 {
372 unsigned id; /* The id of the use. */
373 unsigned group_id; /* The group id the use belongs to. */
374 enum use_type type; /* Type of the use. */
375 tree mem_type; /* The memory type to use when testing whether an
376 address is legitimate, and what the address's
377 cost is. */
378 struct iv *iv; /* The induction variable it is based on. */
379 gimple *stmt; /* Statement in that it occurs. */
380 tree *op_p; /* The place where it occurs. */
381
382 tree addr_base; /* Base address with const offset stripped. */
383 poly_uint64_pod addr_offset;
384 /* Const offset stripped from base address. */
385 };
386
387 /* Group of uses. */
388 struct iv_group
389 {
390 /* The id of the group. */
391 unsigned id;
392 /* Uses of the group are of the same type. */
393 enum use_type type;
394 /* The set of "related" IV candidates, plus the important ones. */
395 bitmap related_cands;
396 /* Number of IV candidates in the cost_map. */
397 unsigned n_map_members;
398 /* The costs wrto the iv candidates. */
399 struct cost_pair *cost_map;
400 /* The selected candidate for the group. */
401 struct iv_cand *selected;
402 /* Uses in the group. */
403 vec<struct iv_use *> vuses;
404 };
405
406 /* The position where the iv is computed. */
407 enum iv_position
408 {
409 IP_NORMAL, /* At the end, just before the exit condition. */
410 IP_END, /* At the end of the latch block. */
411 IP_BEFORE_USE, /* Immediately before a specific use. */
412 IP_AFTER_USE, /* Immediately after a specific use. */
413 IP_ORIGINAL /* The original biv. */
414 };
415
416 /* The induction variable candidate. */
417 struct iv_cand
418 {
419 unsigned id; /* The number of the candidate. */
420 bool important; /* Whether this is an "important" candidate, i.e. such
421 that it should be considered by all uses. */
422 ENUM_BITFIELD(iv_position) pos : 8; /* Where it is computed. */
423 gimple *incremented_at;/* For original biv, the statement where it is
424 incremented. */
425 tree var_before; /* The variable used for it before increment. */
426 tree var_after; /* The variable used for it after increment. */
427 struct iv *iv; /* The value of the candidate. NULL for
428 "pseudocandidate" used to indicate the possibility
429 to replace the final value of an iv by direct
430 computation of the value. */
431 unsigned cost; /* Cost of the candidate. */
432 unsigned cost_step; /* Cost of the candidate's increment operation. */
433 struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
434 where it is incremented. */
435 bitmap inv_vars; /* The list of invariant ssa_vars used in step of the
436 iv_cand. */
437 bitmap inv_exprs; /* If step is more complicated than a single ssa_var,
438 hanlde it as a new invariant expression which will
439 be hoisted out of loop. */
440 struct iv *orig_iv; /* The original iv if this cand is added from biv with
441 smaller type. */
442 };
443
444 /* Hashtable entry for common candidate derived from iv uses. */
445 struct iv_common_cand
446 {
447 tree base;
448 tree step;
449 /* IV uses from which this common candidate is derived. */
450 auto_vec<struct iv_use *> uses;
451 hashval_t hash;
452 };
453
454 /* Hashtable helpers. */
455
456 struct iv_common_cand_hasher : delete_ptr_hash <iv_common_cand>
457 {
458 static inline hashval_t hash (const iv_common_cand *);
459 static inline bool equal (const iv_common_cand *, const iv_common_cand *);
460 };
461
462 /* Hash function for possible common candidates. */
463
464 inline hashval_t
465 iv_common_cand_hasher::hash (const iv_common_cand *ccand)
466 {
467 return ccand->hash;
468 }
469
470 /* Hash table equality function for common candidates. */
471
472 inline bool
473 iv_common_cand_hasher::equal (const iv_common_cand *ccand1,
474 const iv_common_cand *ccand2)
475 {
476 return (ccand1->hash == ccand2->hash
477 && operand_equal_p (ccand1->base, ccand2->base, 0)
478 && operand_equal_p (ccand1->step, ccand2->step, 0)
479 && (TYPE_PRECISION (TREE_TYPE (ccand1->base))
480 == TYPE_PRECISION (TREE_TYPE (ccand2->base))));
481 }
482
483 /* Loop invariant expression hashtable entry. */
484
485 struct iv_inv_expr_ent
486 {
487 /* Tree expression of the entry. */
488 tree expr;
489 /* Unique indentifier. */
490 int id;
491 /* Hash value. */
492 hashval_t hash;
493 };
494
495 /* Sort iv_inv_expr_ent pair A and B by id field. */
496
497 static int
498 sort_iv_inv_expr_ent (const void *a, const void *b)
499 {
500 const iv_inv_expr_ent * const *e1 = (const iv_inv_expr_ent * const *) (a);
501 const iv_inv_expr_ent * const *e2 = (const iv_inv_expr_ent * const *) (b);
502
503 unsigned id1 = (*e1)->id;
504 unsigned id2 = (*e2)->id;
505
506 if (id1 < id2)
507 return -1;
508 else if (id1 > id2)
509 return 1;
510 else
511 return 0;
512 }
513
514 /* Hashtable helpers. */
515
516 struct iv_inv_expr_hasher : free_ptr_hash <iv_inv_expr_ent>
517 {
518 static inline hashval_t hash (const iv_inv_expr_ent *);
519 static inline bool equal (const iv_inv_expr_ent *, const iv_inv_expr_ent *);
520 };
521
522 /* Return true if uses of type TYPE represent some form of address. */
523
524 inline bool
525 address_p (use_type type)
526 {
527 return type == USE_REF_ADDRESS || type == USE_PTR_ADDRESS;
528 }
529
530 /* Hash function for loop invariant expressions. */
531
532 inline hashval_t
533 iv_inv_expr_hasher::hash (const iv_inv_expr_ent *expr)
534 {
535 return expr->hash;
536 }
537
538 /* Hash table equality function for expressions. */
539
540 inline bool
541 iv_inv_expr_hasher::equal (const iv_inv_expr_ent *expr1,
542 const iv_inv_expr_ent *expr2)
543 {
544 return expr1->hash == expr2->hash
545 && operand_equal_p (expr1->expr, expr2->expr, 0);
546 }
547
548 struct ivopts_data
549 {
550 /* The currently optimized loop. */
551 struct loop *current_loop;
552 location_t loop_loc;
553
554 /* Numbers of iterations for all exits of the current loop. */
555 hash_map<edge, tree_niter_desc *> *niters;
556
557 /* Number of registers used in it. */
558 unsigned regs_used;
559
560 /* The size of version_info array allocated. */
561 unsigned version_info_size;
562
563 /* The array of information for the ssa names. */
564 struct version_info *version_info;
565
566 /* The hashtable of loop invariant expressions created
567 by ivopt. */
568 hash_table<iv_inv_expr_hasher> *inv_expr_tab;
569
570 /* The bitmap of indices in version_info whose value was changed. */
571 bitmap relevant;
572
573 /* The uses of induction variables. */
574 vec<iv_group *> vgroups;
575
576 /* The candidates. */
577 vec<iv_cand *> vcands;
578
579 /* A bitmap of important candidates. */
580 bitmap important_candidates;
581
582 /* Cache used by tree_to_aff_combination_expand. */
583 hash_map<tree, name_expansion *> *name_expansion_cache;
584
585 /* The hashtable of common candidates derived from iv uses. */
586 hash_table<iv_common_cand_hasher> *iv_common_cand_tab;
587
588 /* The common candidates. */
589 vec<iv_common_cand *> iv_common_cands;
590
591 /* The maximum invariant variable id. */
592 unsigned max_inv_var_id;
593
594 /* The maximum invariant expression id. */
595 unsigned max_inv_expr_id;
596
597 /* Number of no_overflow BIVs which are not used in memory address. */
598 unsigned bivs_not_used_in_addr;
599
600 /* Obstack for iv structure. */
601 struct obstack iv_obstack;
602
603 /* Whether to consider just related and important candidates when replacing a
604 use. */
605 bool consider_all_candidates;
606
607 /* Are we optimizing for speed? */
608 bool speed;
609
610 /* Whether the loop body includes any function calls. */
611 bool body_includes_call;
612
613 /* Whether the loop body can only be exited via single exit. */
614 bool loop_single_exit_p;
615 };
616
617 /* An assignment of iv candidates to uses. */
618
619 struct iv_ca
620 {
621 /* The number of uses covered by the assignment. */
622 unsigned upto;
623
624 /* Number of uses that cannot be expressed by the candidates in the set. */
625 unsigned bad_groups;
626
627 /* Candidate assigned to a use, together with the related costs. */
628 struct cost_pair **cand_for_group;
629
630 /* Number of times each candidate is used. */
631 unsigned *n_cand_uses;
632
633 /* The candidates used. */
634 bitmap cands;
635
636 /* The number of candidates in the set. */
637 unsigned n_cands;
638
639 /* The number of invariants needed, including both invariant variants and
640 invariant expressions. */
641 unsigned n_invs;
642
643 /* Total cost of expressing uses. */
644 comp_cost cand_use_cost;
645
646 /* Total cost of candidates. */
647 int64_t cand_cost;
648
649 /* Number of times each invariant variable is used. */
650 unsigned *n_inv_var_uses;
651
652 /* Number of times each invariant expression is used. */
653 unsigned *n_inv_expr_uses;
654
655 /* Total cost of the assignment. */
656 comp_cost cost;
657 };
658
659 /* Difference of two iv candidate assignments. */
660
661 struct iv_ca_delta
662 {
663 /* Changed group. */
664 struct iv_group *group;
665
666 /* An old assignment (for rollback purposes). */
667 struct cost_pair *old_cp;
668
669 /* A new assignment. */
670 struct cost_pair *new_cp;
671
672 /* Next change in the list. */
673 struct iv_ca_delta *next;
674 };
675
676 /* Bound on number of candidates below that all candidates are considered. */
677
678 #define CONSIDER_ALL_CANDIDATES_BOUND \
679 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
680
681 /* If there are more iv occurrences, we just give up (it is quite unlikely that
682 optimizing such a loop would help, and it would take ages). */
683
684 #define MAX_CONSIDERED_GROUPS \
685 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
686
687 /* If there are at most this number of ivs in the set, try removing unnecessary
688 ivs from the set always. */
689
690 #define ALWAYS_PRUNE_CAND_SET_BOUND \
691 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
692
693 /* The list of trees for that the decl_rtl field must be reset is stored
694 here. */
695
696 static vec<tree> decl_rtl_to_reset;
697
698 static comp_cost force_expr_to_var_cost (tree, bool);
699
700 /* The single loop exit if it dominates the latch, NULL otherwise. */
701
702 edge
703 single_dom_exit (struct loop *loop)
704 {
705 edge exit = single_exit (loop);
706
707 if (!exit)
708 return NULL;
709
710 if (!just_once_each_iteration_p (loop, exit->src))
711 return NULL;
712
713 return exit;
714 }
715
716 /* Dumps information about the induction variable IV to FILE. Don't dump
717 variable's name if DUMP_NAME is FALSE. The information is dumped with
718 preceding spaces indicated by INDENT_LEVEL. */
719
720 void
721 dump_iv (FILE *file, struct iv *iv, bool dump_name, unsigned indent_level)
722 {
723 const char *p;
724 const char spaces[9] = {' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '\0'};
725
726 if (indent_level > 4)
727 indent_level = 4;
728 p = spaces + 8 - (indent_level << 1);
729
730 fprintf (file, "%sIV struct:\n", p);
731 if (iv->ssa_name && dump_name)
732 {
733 fprintf (file, "%s SSA_NAME:\t", p);
734 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
735 fprintf (file, "\n");
736 }
737
738 fprintf (file, "%s Type:\t", p);
739 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
740 fprintf (file, "\n");
741
742 fprintf (file, "%s Base:\t", p);
743 print_generic_expr (file, iv->base, TDF_SLIM);
744 fprintf (file, "\n");
745
746 fprintf (file, "%s Step:\t", p);
747 print_generic_expr (file, iv->step, TDF_SLIM);
748 fprintf (file, "\n");
749
750 if (iv->base_object)
751 {
752 fprintf (file, "%s Object:\t", p);
753 print_generic_expr (file, iv->base_object, TDF_SLIM);
754 fprintf (file, "\n");
755 }
756
757 fprintf (file, "%s Biv:\t%c\n", p, iv->biv_p ? 'Y' : 'N');
758
759 fprintf (file, "%s Overflowness wrto loop niter:\t%s\n",
760 p, iv->no_overflow ? "No-overflow" : "Overflow");
761 }
762
763 /* Dumps information about the USE to FILE. */
764
765 void
766 dump_use (FILE *file, struct iv_use *use)
767 {
768 fprintf (file, " Use %d.%d:\n", use->group_id, use->id);
769 fprintf (file, " At stmt:\t");
770 print_gimple_stmt (file, use->stmt, 0);
771 fprintf (file, " At pos:\t");
772 if (use->op_p)
773 print_generic_expr (file, *use->op_p, TDF_SLIM);
774 fprintf (file, "\n");
775 dump_iv (file, use->iv, false, 2);
776 }
777
778 /* Dumps information about the uses to FILE. */
779
780 void
781 dump_groups (FILE *file, struct ivopts_data *data)
782 {
783 unsigned i, j;
784 struct iv_group *group;
785
786 for (i = 0; i < data->vgroups.length (); i++)
787 {
788 group = data->vgroups[i];
789 fprintf (file, "Group %d:\n", group->id);
790 if (group->type == USE_NONLINEAR_EXPR)
791 fprintf (file, " Type:\tGENERIC\n");
792 else if (group->type == USE_REF_ADDRESS)
793 fprintf (file, " Type:\tREFERENCE ADDRESS\n");
794 else if (group->type == USE_PTR_ADDRESS)
795 fprintf (file, " Type:\tPOINTER ARGUMENT ADDRESS\n");
796 else
797 {
798 gcc_assert (group->type == USE_COMPARE);
799 fprintf (file, " Type:\tCOMPARE\n");
800 }
801 for (j = 0; j < group->vuses.length (); j++)
802 dump_use (file, group->vuses[j]);
803 }
804 }
805
806 /* Dumps information about induction variable candidate CAND to FILE. */
807
808 void
809 dump_cand (FILE *file, struct iv_cand *cand)
810 {
811 struct iv *iv = cand->iv;
812
813 fprintf (file, "Candidate %d:\n", cand->id);
814 if (cand->inv_vars)
815 {
816 fprintf (file, " Depend on inv.vars: ");
817 dump_bitmap (file, cand->inv_vars);
818 }
819 if (cand->inv_exprs)
820 {
821 fprintf (file, " Depend on inv.exprs: ");
822 dump_bitmap (file, cand->inv_exprs);
823 }
824
825 if (cand->var_before)
826 {
827 fprintf (file, " Var befor: ");
828 print_generic_expr (file, cand->var_before, TDF_SLIM);
829 fprintf (file, "\n");
830 }
831 if (cand->var_after)
832 {
833 fprintf (file, " Var after: ");
834 print_generic_expr (file, cand->var_after, TDF_SLIM);
835 fprintf (file, "\n");
836 }
837
838 switch (cand->pos)
839 {
840 case IP_NORMAL:
841 fprintf (file, " Incr POS: before exit test\n");
842 break;
843
844 case IP_BEFORE_USE:
845 fprintf (file, " Incr POS: before use %d\n", cand->ainc_use->id);
846 break;
847
848 case IP_AFTER_USE:
849 fprintf (file, " Incr POS: after use %d\n", cand->ainc_use->id);
850 break;
851
852 case IP_END:
853 fprintf (file, " Incr POS: at end\n");
854 break;
855
856 case IP_ORIGINAL:
857 fprintf (file, " Incr POS: orig biv\n");
858 break;
859 }
860
861 dump_iv (file, iv, false, 1);
862 }
863
864 /* Returns the info for ssa version VER. */
865
866 static inline struct version_info *
867 ver_info (struct ivopts_data *data, unsigned ver)
868 {
869 return data->version_info + ver;
870 }
871
872 /* Returns the info for ssa name NAME. */
873
874 static inline struct version_info *
875 name_info (struct ivopts_data *data, tree name)
876 {
877 return ver_info (data, SSA_NAME_VERSION (name));
878 }
879
880 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
881 emitted in LOOP. */
882
883 static bool
884 stmt_after_ip_normal_pos (struct loop *loop, gimple *stmt)
885 {
886 basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
887
888 gcc_assert (bb);
889
890 if (sbb == loop->latch)
891 return true;
892
893 if (sbb != bb)
894 return false;
895
896 return stmt == last_stmt (bb);
897 }
898
899 /* Returns true if STMT if after the place where the original induction
900 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
901 if the positions are identical. */
902
903 static bool
904 stmt_after_inc_pos (struct iv_cand *cand, gimple *stmt, bool true_if_equal)
905 {
906 basic_block cand_bb = gimple_bb (cand->incremented_at);
907 basic_block stmt_bb = gimple_bb (stmt);
908
909 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
910 return false;
911
912 if (stmt_bb != cand_bb)
913 return true;
914
915 if (true_if_equal
916 && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
917 return true;
918 return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
919 }
920
921 /* Returns true if STMT if after the place where the induction variable
922 CAND is incremented in LOOP. */
923
924 static bool
925 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple *stmt)
926 {
927 switch (cand->pos)
928 {
929 case IP_END:
930 return false;
931
932 case IP_NORMAL:
933 return stmt_after_ip_normal_pos (loop, stmt);
934
935 case IP_ORIGINAL:
936 case IP_AFTER_USE:
937 return stmt_after_inc_pos (cand, stmt, false);
938
939 case IP_BEFORE_USE:
940 return stmt_after_inc_pos (cand, stmt, true);
941
942 default:
943 gcc_unreachable ();
944 }
945 }
946
947 /* walk_tree callback for contains_abnormal_ssa_name_p. */
948
949 static tree
950 contains_abnormal_ssa_name_p_1 (tree *tp, int *walk_subtrees, void *)
951 {
952 if (TREE_CODE (*tp) == SSA_NAME
953 && SSA_NAME_OCCURS_IN_ABNORMAL_PHI (*tp))
954 return *tp;
955
956 if (!EXPR_P (*tp))
957 *walk_subtrees = 0;
958
959 return NULL_TREE;
960 }
961
962 /* Returns true if EXPR contains a ssa name that occurs in an
963 abnormal phi node. */
964
965 bool
966 contains_abnormal_ssa_name_p (tree expr)
967 {
968 return walk_tree_without_duplicates
969 (&expr, contains_abnormal_ssa_name_p_1, NULL) != NULL_TREE;
970 }
971
972 /* Returns the structure describing number of iterations determined from
973 EXIT of DATA->current_loop, or NULL if something goes wrong. */
974
975 static struct tree_niter_desc *
976 niter_for_exit (struct ivopts_data *data, edge exit)
977 {
978 struct tree_niter_desc *desc;
979 tree_niter_desc **slot;
980
981 if (!data->niters)
982 {
983 data->niters = new hash_map<edge, tree_niter_desc *>;
984 slot = NULL;
985 }
986 else
987 slot = data->niters->get (exit);
988
989 if (!slot)
990 {
991 /* Try to determine number of iterations. We cannot safely work with ssa
992 names that appear in phi nodes on abnormal edges, so that we do not
993 create overlapping life ranges for them (PR 27283). */
994 desc = XNEW (struct tree_niter_desc);
995 if (!number_of_iterations_exit (data->current_loop,
996 exit, desc, true)
997 || contains_abnormal_ssa_name_p (desc->niter))
998 {
999 XDELETE (desc);
1000 desc = NULL;
1001 }
1002 data->niters->put (exit, desc);
1003 }
1004 else
1005 desc = *slot;
1006
1007 return desc;
1008 }
1009
1010 /* Returns the structure describing number of iterations determined from
1011 single dominating exit of DATA->current_loop, or NULL if something
1012 goes wrong. */
1013
1014 static struct tree_niter_desc *
1015 niter_for_single_dom_exit (struct ivopts_data *data)
1016 {
1017 edge exit = single_dom_exit (data->current_loop);
1018
1019 if (!exit)
1020 return NULL;
1021
1022 return niter_for_exit (data, exit);
1023 }
1024
1025 /* Initializes data structures used by the iv optimization pass, stored
1026 in DATA. */
1027
1028 static void
1029 tree_ssa_iv_optimize_init (struct ivopts_data *data)
1030 {
1031 data->version_info_size = 2 * num_ssa_names;
1032 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
1033 data->relevant = BITMAP_ALLOC (NULL);
1034 data->important_candidates = BITMAP_ALLOC (NULL);
1035 data->max_inv_var_id = 0;
1036 data->max_inv_expr_id = 0;
1037 data->niters = NULL;
1038 data->vgroups.create (20);
1039 data->vcands.create (20);
1040 data->inv_expr_tab = new hash_table<iv_inv_expr_hasher> (10);
1041 data->name_expansion_cache = NULL;
1042 data->iv_common_cand_tab = new hash_table<iv_common_cand_hasher> (10);
1043 data->iv_common_cands.create (20);
1044 decl_rtl_to_reset.create (20);
1045 gcc_obstack_init (&data->iv_obstack);
1046 }
1047
1048 /* Returns a memory object to that EXPR points. In case we are able to
1049 determine that it does not point to any such object, NULL is returned. */
1050
1051 static tree
1052 determine_base_object (tree expr)
1053 {
1054 enum tree_code code = TREE_CODE (expr);
1055 tree base, obj;
1056
1057 /* If this is a pointer casted to any type, we need to determine
1058 the base object for the pointer; so handle conversions before
1059 throwing away non-pointer expressions. */
1060 if (CONVERT_EXPR_P (expr))
1061 return determine_base_object (TREE_OPERAND (expr, 0));
1062
1063 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
1064 return NULL_TREE;
1065
1066 switch (code)
1067 {
1068 case INTEGER_CST:
1069 return NULL_TREE;
1070
1071 case ADDR_EXPR:
1072 obj = TREE_OPERAND (expr, 0);
1073 base = get_base_address (obj);
1074
1075 if (!base)
1076 return expr;
1077
1078 if (TREE_CODE (base) == MEM_REF)
1079 return determine_base_object (TREE_OPERAND (base, 0));
1080
1081 return fold_convert (ptr_type_node,
1082 build_fold_addr_expr (base));
1083
1084 case POINTER_PLUS_EXPR:
1085 return determine_base_object (TREE_OPERAND (expr, 0));
1086
1087 case PLUS_EXPR:
1088 case MINUS_EXPR:
1089 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
1090 gcc_unreachable ();
1091
1092 default:
1093 if (POLY_INT_CST_P (expr))
1094 return NULL_TREE;
1095 return fold_convert (ptr_type_node, expr);
1096 }
1097 }
1098
1099 /* Return true if address expression with non-DECL_P operand appears
1100 in EXPR. */
1101
1102 static bool
1103 contain_complex_addr_expr (tree expr)
1104 {
1105 bool res = false;
1106
1107 STRIP_NOPS (expr);
1108 switch (TREE_CODE (expr))
1109 {
1110 case POINTER_PLUS_EXPR:
1111 case PLUS_EXPR:
1112 case MINUS_EXPR:
1113 res |= contain_complex_addr_expr (TREE_OPERAND (expr, 0));
1114 res |= contain_complex_addr_expr (TREE_OPERAND (expr, 1));
1115 break;
1116
1117 case ADDR_EXPR:
1118 return (!DECL_P (TREE_OPERAND (expr, 0)));
1119
1120 default:
1121 return false;
1122 }
1123
1124 return res;
1125 }
1126
1127 /* Allocates an induction variable with given initial value BASE and step STEP
1128 for loop LOOP. NO_OVERFLOW implies the iv doesn't overflow. */
1129
1130 static struct iv *
1131 alloc_iv (struct ivopts_data *data, tree base, tree step,
1132 bool no_overflow = false)
1133 {
1134 tree expr = base;
1135 struct iv *iv = (struct iv*) obstack_alloc (&data->iv_obstack,
1136 sizeof (struct iv));
1137 gcc_assert (step != NULL_TREE);
1138
1139 /* Lower address expression in base except ones with DECL_P as operand.
1140 By doing this:
1141 1) More accurate cost can be computed for address expressions;
1142 2) Duplicate candidates won't be created for bases in different
1143 forms, like &a[0] and &a. */
1144 STRIP_NOPS (expr);
1145 if ((TREE_CODE (expr) == ADDR_EXPR && !DECL_P (TREE_OPERAND (expr, 0)))
1146 || contain_complex_addr_expr (expr))
1147 {
1148 aff_tree comb;
1149 tree_to_aff_combination (expr, TREE_TYPE (expr), &comb);
1150 base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
1151 }
1152
1153 iv->base = base;
1154 iv->base_object = determine_base_object (base);
1155 iv->step = step;
1156 iv->biv_p = false;
1157 iv->nonlin_use = NULL;
1158 iv->ssa_name = NULL_TREE;
1159 if (!no_overflow
1160 && !iv_can_overflow_p (data->current_loop, TREE_TYPE (base),
1161 base, step))
1162 no_overflow = true;
1163 iv->no_overflow = no_overflow;
1164 iv->have_address_use = false;
1165
1166 return iv;
1167 }
1168
1169 /* Sets STEP and BASE for induction variable IV. NO_OVERFLOW implies the IV
1170 doesn't overflow. */
1171
1172 static void
1173 set_iv (struct ivopts_data *data, tree iv, tree base, tree step,
1174 bool no_overflow)
1175 {
1176 struct version_info *info = name_info (data, iv);
1177
1178 gcc_assert (!info->iv);
1179
1180 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
1181 info->iv = alloc_iv (data, base, step, no_overflow);
1182 info->iv->ssa_name = iv;
1183 }
1184
1185 /* Finds induction variable declaration for VAR. */
1186
1187 static struct iv *
1188 get_iv (struct ivopts_data *data, tree var)
1189 {
1190 basic_block bb;
1191 tree type = TREE_TYPE (var);
1192
1193 if (!POINTER_TYPE_P (type)
1194 && !INTEGRAL_TYPE_P (type))
1195 return NULL;
1196
1197 if (!name_info (data, var)->iv)
1198 {
1199 bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1200
1201 if (!bb
1202 || !flow_bb_inside_loop_p (data->current_loop, bb))
1203 set_iv (data, var, var, build_int_cst (type, 0), true);
1204 }
1205
1206 return name_info (data, var)->iv;
1207 }
1208
1209 /* Return the first non-invariant ssa var found in EXPR. */
1210
1211 static tree
1212 extract_single_var_from_expr (tree expr)
1213 {
1214 int i, n;
1215 tree tmp;
1216 enum tree_code code;
1217
1218 if (!expr || is_gimple_min_invariant (expr))
1219 return NULL;
1220
1221 code = TREE_CODE (expr);
1222 if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1223 {
1224 n = TREE_OPERAND_LENGTH (expr);
1225 for (i = 0; i < n; i++)
1226 {
1227 tmp = extract_single_var_from_expr (TREE_OPERAND (expr, i));
1228
1229 if (tmp)
1230 return tmp;
1231 }
1232 }
1233 return (TREE_CODE (expr) == SSA_NAME) ? expr : NULL;
1234 }
1235
1236 /* Finds basic ivs. */
1237
1238 static bool
1239 find_bivs (struct ivopts_data *data)
1240 {
1241 gphi *phi;
1242 affine_iv iv;
1243 tree step, type, base, stop;
1244 bool found = false;
1245 struct loop *loop = data->current_loop;
1246 gphi_iterator psi;
1247
1248 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1249 {
1250 phi = psi.phi ();
1251
1252 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1253 continue;
1254
1255 if (virtual_operand_p (PHI_RESULT (phi)))
1256 continue;
1257
1258 if (!simple_iv (loop, loop, PHI_RESULT (phi), &iv, true))
1259 continue;
1260
1261 if (integer_zerop (iv.step))
1262 continue;
1263
1264 step = iv.step;
1265 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1266 /* Stop expanding iv base at the first ssa var referred by iv step.
1267 Ideally we should stop at any ssa var, because that's expensive
1268 and unusual to happen, we just do it on the first one.
1269
1270 See PR64705 for the rationale. */
1271 stop = extract_single_var_from_expr (step);
1272 base = expand_simple_operations (base, stop);
1273 if (contains_abnormal_ssa_name_p (base)
1274 || contains_abnormal_ssa_name_p (step))
1275 continue;
1276
1277 type = TREE_TYPE (PHI_RESULT (phi));
1278 base = fold_convert (type, base);
1279 if (step)
1280 {
1281 if (POINTER_TYPE_P (type))
1282 step = convert_to_ptrofftype (step);
1283 else
1284 step = fold_convert (type, step);
1285 }
1286
1287 set_iv (data, PHI_RESULT (phi), base, step, iv.no_overflow);
1288 found = true;
1289 }
1290
1291 return found;
1292 }
1293
1294 /* Marks basic ivs. */
1295
1296 static void
1297 mark_bivs (struct ivopts_data *data)
1298 {
1299 gphi *phi;
1300 gimple *def;
1301 tree var;
1302 struct iv *iv, *incr_iv;
1303 struct loop *loop = data->current_loop;
1304 basic_block incr_bb;
1305 gphi_iterator psi;
1306
1307 data->bivs_not_used_in_addr = 0;
1308 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1309 {
1310 phi = psi.phi ();
1311
1312 iv = get_iv (data, PHI_RESULT (phi));
1313 if (!iv)
1314 continue;
1315
1316 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1317 def = SSA_NAME_DEF_STMT (var);
1318 /* Don't mark iv peeled from other one as biv. */
1319 if (def
1320 && gimple_code (def) == GIMPLE_PHI
1321 && gimple_bb (def) == loop->header)
1322 continue;
1323
1324 incr_iv = get_iv (data, var);
1325 if (!incr_iv)
1326 continue;
1327
1328 /* If the increment is in the subloop, ignore it. */
1329 incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1330 if (incr_bb->loop_father != data->current_loop
1331 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1332 continue;
1333
1334 iv->biv_p = true;
1335 incr_iv->biv_p = true;
1336 if (iv->no_overflow)
1337 data->bivs_not_used_in_addr++;
1338 if (incr_iv->no_overflow)
1339 data->bivs_not_used_in_addr++;
1340 }
1341 }
1342
1343 /* Checks whether STMT defines a linear induction variable and stores its
1344 parameters to IV. */
1345
1346 static bool
1347 find_givs_in_stmt_scev (struct ivopts_data *data, gimple *stmt, affine_iv *iv)
1348 {
1349 tree lhs, stop;
1350 struct loop *loop = data->current_loop;
1351
1352 iv->base = NULL_TREE;
1353 iv->step = NULL_TREE;
1354
1355 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1356 return false;
1357
1358 lhs = gimple_assign_lhs (stmt);
1359 if (TREE_CODE (lhs) != SSA_NAME)
1360 return false;
1361
1362 if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1363 return false;
1364
1365 /* Stop expanding iv base at the first ssa var referred by iv step.
1366 Ideally we should stop at any ssa var, because that's expensive
1367 and unusual to happen, we just do it on the first one.
1368
1369 See PR64705 for the rationale. */
1370 stop = extract_single_var_from_expr (iv->step);
1371 iv->base = expand_simple_operations (iv->base, stop);
1372 if (contains_abnormal_ssa_name_p (iv->base)
1373 || contains_abnormal_ssa_name_p (iv->step))
1374 return false;
1375
1376 /* If STMT could throw, then do not consider STMT as defining a GIV.
1377 While this will suppress optimizations, we cannot safely delete this
1378 GIV and associated statements, even if it appears it is not used. */
1379 if (stmt_could_throw_p (cfun, stmt))
1380 return false;
1381
1382 return true;
1383 }
1384
1385 /* Finds general ivs in statement STMT. */
1386
1387 static void
1388 find_givs_in_stmt (struct ivopts_data *data, gimple *stmt)
1389 {
1390 affine_iv iv;
1391
1392 if (!find_givs_in_stmt_scev (data, stmt, &iv))
1393 return;
1394
1395 set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step, iv.no_overflow);
1396 }
1397
1398 /* Finds general ivs in basic block BB. */
1399
1400 static void
1401 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1402 {
1403 gimple_stmt_iterator bsi;
1404
1405 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1406 find_givs_in_stmt (data, gsi_stmt (bsi));
1407 }
1408
1409 /* Finds general ivs. */
1410
1411 static void
1412 find_givs (struct ivopts_data *data)
1413 {
1414 struct loop *loop = data->current_loop;
1415 basic_block *body = get_loop_body_in_dom_order (loop);
1416 unsigned i;
1417
1418 for (i = 0; i < loop->num_nodes; i++)
1419 find_givs_in_bb (data, body[i]);
1420 free (body);
1421 }
1422
1423 /* For each ssa name defined in LOOP determines whether it is an induction
1424 variable and if so, its initial value and step. */
1425
1426 static bool
1427 find_induction_variables (struct ivopts_data *data)
1428 {
1429 unsigned i;
1430 bitmap_iterator bi;
1431
1432 if (!find_bivs (data))
1433 return false;
1434
1435 find_givs (data);
1436 mark_bivs (data);
1437
1438 if (dump_file && (dump_flags & TDF_DETAILS))
1439 {
1440 struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
1441
1442 if (niter)
1443 {
1444 fprintf (dump_file, " number of iterations ");
1445 print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1446 if (!integer_zerop (niter->may_be_zero))
1447 {
1448 fprintf (dump_file, "; zero if ");
1449 print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1450 }
1451 fprintf (dump_file, "\n");
1452 };
1453
1454 fprintf (dump_file, "\n<Induction Vars>:\n");
1455 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1456 {
1457 struct version_info *info = ver_info (data, i);
1458 if (info->iv && info->iv->step && !integer_zerop (info->iv->step))
1459 dump_iv (dump_file, ver_info (data, i)->iv, true, 0);
1460 }
1461 }
1462
1463 return true;
1464 }
1465
1466 /* Records a use of TYPE at *USE_P in STMT whose value is IV in GROUP.
1467 For address type use, ADDR_BASE is the stripped IV base, ADDR_OFFSET
1468 is the const offset stripped from IV base and MEM_TYPE is the type
1469 of the memory being addressed. For uses of other types, ADDR_BASE
1470 and ADDR_OFFSET are zero by default and MEM_TYPE is NULL_TREE. */
1471
1472 static struct iv_use *
1473 record_use (struct iv_group *group, tree *use_p, struct iv *iv,
1474 gimple *stmt, enum use_type type, tree mem_type,
1475 tree addr_base, poly_uint64 addr_offset)
1476 {
1477 struct iv_use *use = XCNEW (struct iv_use);
1478
1479 use->id = group->vuses.length ();
1480 use->group_id = group->id;
1481 use->type = type;
1482 use->mem_type = mem_type;
1483 use->iv = iv;
1484 use->stmt = stmt;
1485 use->op_p = use_p;
1486 use->addr_base = addr_base;
1487 use->addr_offset = addr_offset;
1488
1489 group->vuses.safe_push (use);
1490 return use;
1491 }
1492
1493 /* Checks whether OP is a loop-level invariant and if so, records it.
1494 NONLINEAR_USE is true if the invariant is used in a way we do not
1495 handle specially. */
1496
1497 static void
1498 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1499 {
1500 basic_block bb;
1501 struct version_info *info;
1502
1503 if (TREE_CODE (op) != SSA_NAME
1504 || virtual_operand_p (op))
1505 return;
1506
1507 bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1508 if (bb
1509 && flow_bb_inside_loop_p (data->current_loop, bb))
1510 return;
1511
1512 info = name_info (data, op);
1513 info->name = op;
1514 info->has_nonlin_use |= nonlinear_use;
1515 if (!info->inv_id)
1516 info->inv_id = ++data->max_inv_var_id;
1517 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1518 }
1519
1520 /* Record a group of TYPE. */
1521
1522 static struct iv_group *
1523 record_group (struct ivopts_data *data, enum use_type type)
1524 {
1525 struct iv_group *group = XCNEW (struct iv_group);
1526
1527 group->id = data->vgroups.length ();
1528 group->type = type;
1529 group->related_cands = BITMAP_ALLOC (NULL);
1530 group->vuses.create (1);
1531
1532 data->vgroups.safe_push (group);
1533 return group;
1534 }
1535
1536 /* Record a use of TYPE at *USE_P in STMT whose value is IV in a group.
1537 New group will be created if there is no existing group for the use.
1538 MEM_TYPE is the type of memory being addressed, or NULL if this
1539 isn't an address reference. */
1540
1541 static struct iv_use *
1542 record_group_use (struct ivopts_data *data, tree *use_p,
1543 struct iv *iv, gimple *stmt, enum use_type type,
1544 tree mem_type)
1545 {
1546 tree addr_base = NULL;
1547 struct iv_group *group = NULL;
1548 poly_uint64 addr_offset = 0;
1549
1550 /* Record non address type use in a new group. */
1551 if (address_p (type))
1552 {
1553 unsigned int i;
1554
1555 addr_base = strip_offset (iv->base, &addr_offset);
1556 for (i = 0; i < data->vgroups.length (); i++)
1557 {
1558 struct iv_use *use;
1559
1560 group = data->vgroups[i];
1561 use = group->vuses[0];
1562 if (!address_p (use->type))
1563 continue;
1564
1565 /* Check if it has the same stripped base and step. */
1566 if (operand_equal_p (iv->base_object, use->iv->base_object, 0)
1567 && operand_equal_p (iv->step, use->iv->step, 0)
1568 && operand_equal_p (addr_base, use->addr_base, 0))
1569 break;
1570 }
1571 if (i == data->vgroups.length ())
1572 group = NULL;
1573 }
1574
1575 if (!group)
1576 group = record_group (data, type);
1577
1578 return record_use (group, use_p, iv, stmt, type, mem_type,
1579 addr_base, addr_offset);
1580 }
1581
1582 /* Checks whether the use OP is interesting and if so, records it. */
1583
1584 static struct iv_use *
1585 find_interesting_uses_op (struct ivopts_data *data, tree op)
1586 {
1587 struct iv *iv;
1588 gimple *stmt;
1589 struct iv_use *use;
1590
1591 if (TREE_CODE (op) != SSA_NAME)
1592 return NULL;
1593
1594 iv = get_iv (data, op);
1595 if (!iv)
1596 return NULL;
1597
1598 if (iv->nonlin_use)
1599 {
1600 gcc_assert (iv->nonlin_use->type == USE_NONLINEAR_EXPR);
1601 return iv->nonlin_use;
1602 }
1603
1604 if (integer_zerop (iv->step))
1605 {
1606 record_invariant (data, op, true);
1607 return NULL;
1608 }
1609
1610 stmt = SSA_NAME_DEF_STMT (op);
1611 gcc_assert (gimple_code (stmt) == GIMPLE_PHI || is_gimple_assign (stmt));
1612
1613 use = record_group_use (data, NULL, iv, stmt, USE_NONLINEAR_EXPR, NULL_TREE);
1614 iv->nonlin_use = use;
1615 return use;
1616 }
1617
1618 /* Indicate how compare type iv_use can be handled. */
1619 enum comp_iv_rewrite
1620 {
1621 COMP_IV_NA,
1622 /* We may rewrite compare type iv_use by expressing value of the iv_use. */
1623 COMP_IV_EXPR,
1624 /* We may rewrite compare type iv_uses on both sides of comparison by
1625 expressing value of each iv_use. */
1626 COMP_IV_EXPR_2,
1627 /* We may rewrite compare type iv_use by expressing value of the iv_use
1628 or by eliminating it with other iv_cand. */
1629 COMP_IV_ELIM
1630 };
1631
1632 /* Given a condition in statement STMT, checks whether it is a compare
1633 of an induction variable and an invariant. If this is the case,
1634 CONTROL_VAR is set to location of the iv, BOUND to the location of
1635 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1636 induction variable descriptions, and true is returned. If this is not
1637 the case, CONTROL_VAR and BOUND are set to the arguments of the
1638 condition and false is returned. */
1639
1640 static enum comp_iv_rewrite
1641 extract_cond_operands (struct ivopts_data *data, gimple *stmt,
1642 tree **control_var, tree **bound,
1643 struct iv **iv_var, struct iv **iv_bound)
1644 {
1645 /* The objects returned when COND has constant operands. */
1646 static struct iv const_iv;
1647 static tree zero;
1648 tree *op0 = &zero, *op1 = &zero;
1649 struct iv *iv0 = &const_iv, *iv1 = &const_iv;
1650 enum comp_iv_rewrite rewrite_type = COMP_IV_NA;
1651
1652 if (gimple_code (stmt) == GIMPLE_COND)
1653 {
1654 gcond *cond_stmt = as_a <gcond *> (stmt);
1655 op0 = gimple_cond_lhs_ptr (cond_stmt);
1656 op1 = gimple_cond_rhs_ptr (cond_stmt);
1657 }
1658 else
1659 {
1660 op0 = gimple_assign_rhs1_ptr (stmt);
1661 op1 = gimple_assign_rhs2_ptr (stmt);
1662 }
1663
1664 zero = integer_zero_node;
1665 const_iv.step = integer_zero_node;
1666
1667 if (TREE_CODE (*op0) == SSA_NAME)
1668 iv0 = get_iv (data, *op0);
1669 if (TREE_CODE (*op1) == SSA_NAME)
1670 iv1 = get_iv (data, *op1);
1671
1672 /* If both sides of comparison are IVs. We can express ivs on both end. */
1673 if (iv0 && iv1 && !integer_zerop (iv0->step) && !integer_zerop (iv1->step))
1674 {
1675 rewrite_type = COMP_IV_EXPR_2;
1676 goto end;
1677 }
1678
1679 /* If none side of comparison is IV. */
1680 if ((!iv0 || integer_zerop (iv0->step))
1681 && (!iv1 || integer_zerop (iv1->step)))
1682 goto end;
1683
1684 /* Control variable may be on the other side. */
1685 if (!iv0 || integer_zerop (iv0->step))
1686 {
1687 std::swap (op0, op1);
1688 std::swap (iv0, iv1);
1689 }
1690 /* If one side is IV and the other side isn't loop invariant. */
1691 if (!iv1)
1692 rewrite_type = COMP_IV_EXPR;
1693 /* If one side is IV and the other side is loop invariant. */
1694 else if (!integer_zerop (iv0->step) && integer_zerop (iv1->step))
1695 rewrite_type = COMP_IV_ELIM;
1696
1697 end:
1698 if (control_var)
1699 *control_var = op0;
1700 if (iv_var)
1701 *iv_var = iv0;
1702 if (bound)
1703 *bound = op1;
1704 if (iv_bound)
1705 *iv_bound = iv1;
1706
1707 return rewrite_type;
1708 }
1709
1710 /* Checks whether the condition in STMT is interesting and if so,
1711 records it. */
1712
1713 static void
1714 find_interesting_uses_cond (struct ivopts_data *data, gimple *stmt)
1715 {
1716 tree *var_p, *bound_p;
1717 struct iv *var_iv, *bound_iv;
1718 enum comp_iv_rewrite ret;
1719
1720 ret = extract_cond_operands (data, stmt,
1721 &var_p, &bound_p, &var_iv, &bound_iv);
1722 if (ret == COMP_IV_NA)
1723 {
1724 find_interesting_uses_op (data, *var_p);
1725 find_interesting_uses_op (data, *bound_p);
1726 return;
1727 }
1728
1729 record_group_use (data, var_p, var_iv, stmt, USE_COMPARE, NULL_TREE);
1730 /* Record compare type iv_use for iv on the other side of comparison. */
1731 if (ret == COMP_IV_EXPR_2)
1732 record_group_use (data, bound_p, bound_iv, stmt, USE_COMPARE, NULL_TREE);
1733 }
1734
1735 /* Returns the outermost loop EXPR is obviously invariant in
1736 relative to the loop LOOP, i.e. if all its operands are defined
1737 outside of the returned loop. Returns NULL if EXPR is not
1738 even obviously invariant in LOOP. */
1739
1740 struct loop *
1741 outermost_invariant_loop_for_expr (struct loop *loop, tree expr)
1742 {
1743 basic_block def_bb;
1744 unsigned i, len;
1745
1746 if (is_gimple_min_invariant (expr))
1747 return current_loops->tree_root;
1748
1749 if (TREE_CODE (expr) == SSA_NAME)
1750 {
1751 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1752 if (def_bb)
1753 {
1754 if (flow_bb_inside_loop_p (loop, def_bb))
1755 return NULL;
1756 return superloop_at_depth (loop,
1757 loop_depth (def_bb->loop_father) + 1);
1758 }
1759
1760 return current_loops->tree_root;
1761 }
1762
1763 if (!EXPR_P (expr))
1764 return NULL;
1765
1766 unsigned maxdepth = 0;
1767 len = TREE_OPERAND_LENGTH (expr);
1768 for (i = 0; i < len; i++)
1769 {
1770 struct loop *ivloop;
1771 if (!TREE_OPERAND (expr, i))
1772 continue;
1773
1774 ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1775 if (!ivloop)
1776 return NULL;
1777 maxdepth = MAX (maxdepth, loop_depth (ivloop));
1778 }
1779
1780 return superloop_at_depth (loop, maxdepth);
1781 }
1782
1783 /* Returns true if expression EXPR is obviously invariant in LOOP,
1784 i.e. if all its operands are defined outside of the LOOP. LOOP
1785 should not be the function body. */
1786
1787 bool
1788 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1789 {
1790 basic_block def_bb;
1791 unsigned i, len;
1792
1793 gcc_assert (loop_depth (loop) > 0);
1794
1795 if (is_gimple_min_invariant (expr))
1796 return true;
1797
1798 if (TREE_CODE (expr) == SSA_NAME)
1799 {
1800 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1801 if (def_bb
1802 && flow_bb_inside_loop_p (loop, def_bb))
1803 return false;
1804
1805 return true;
1806 }
1807
1808 if (!EXPR_P (expr))
1809 return false;
1810
1811 len = TREE_OPERAND_LENGTH (expr);
1812 for (i = 0; i < len; i++)
1813 if (TREE_OPERAND (expr, i)
1814 && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1815 return false;
1816
1817 return true;
1818 }
1819
1820 /* Given expression EXPR which computes inductive values with respect
1821 to loop recorded in DATA, this function returns biv from which EXPR
1822 is derived by tracing definition chains of ssa variables in EXPR. */
1823
1824 static struct iv*
1825 find_deriving_biv_for_expr (struct ivopts_data *data, tree expr)
1826 {
1827 struct iv *iv;
1828 unsigned i, n;
1829 tree e2, e1;
1830 enum tree_code code;
1831 gimple *stmt;
1832
1833 if (expr == NULL_TREE)
1834 return NULL;
1835
1836 if (is_gimple_min_invariant (expr))
1837 return NULL;
1838
1839 code = TREE_CODE (expr);
1840 if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1841 {
1842 n = TREE_OPERAND_LENGTH (expr);
1843 for (i = 0; i < n; i++)
1844 {
1845 iv = find_deriving_biv_for_expr (data, TREE_OPERAND (expr, i));
1846 if (iv)
1847 return iv;
1848 }
1849 }
1850
1851 /* Stop if it's not ssa name. */
1852 if (code != SSA_NAME)
1853 return NULL;
1854
1855 iv = get_iv (data, expr);
1856 if (!iv || integer_zerop (iv->step))
1857 return NULL;
1858 else if (iv->biv_p)
1859 return iv;
1860
1861 stmt = SSA_NAME_DEF_STMT (expr);
1862 if (gphi *phi = dyn_cast <gphi *> (stmt))
1863 {
1864 ssa_op_iter iter;
1865 use_operand_p use_p;
1866 basic_block phi_bb = gimple_bb (phi);
1867
1868 /* Skip loop header PHI that doesn't define biv. */
1869 if (phi_bb->loop_father == data->current_loop)
1870 return NULL;
1871
1872 if (virtual_operand_p (gimple_phi_result (phi)))
1873 return NULL;
1874
1875 FOR_EACH_PHI_ARG (use_p, phi, iter, SSA_OP_USE)
1876 {
1877 tree use = USE_FROM_PTR (use_p);
1878 iv = find_deriving_biv_for_expr (data, use);
1879 if (iv)
1880 return iv;
1881 }
1882 return NULL;
1883 }
1884 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1885 return NULL;
1886
1887 e1 = gimple_assign_rhs1 (stmt);
1888 code = gimple_assign_rhs_code (stmt);
1889 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS)
1890 return find_deriving_biv_for_expr (data, e1);
1891
1892 switch (code)
1893 {
1894 case MULT_EXPR:
1895 case PLUS_EXPR:
1896 case MINUS_EXPR:
1897 case POINTER_PLUS_EXPR:
1898 /* Increments, decrements and multiplications by a constant
1899 are simple. */
1900 e2 = gimple_assign_rhs2 (stmt);
1901 iv = find_deriving_biv_for_expr (data, e2);
1902 if (iv)
1903 return iv;
1904 gcc_fallthrough ();
1905
1906 CASE_CONVERT:
1907 /* Casts are simple. */
1908 return find_deriving_biv_for_expr (data, e1);
1909
1910 default:
1911 break;
1912 }
1913
1914 return NULL;
1915 }
1916
1917 /* Record BIV, its predecessor and successor that they are used in
1918 address type uses. */
1919
1920 static void
1921 record_biv_for_address_use (struct ivopts_data *data, struct iv *biv)
1922 {
1923 unsigned i;
1924 tree type, base_1, base_2;
1925 bitmap_iterator bi;
1926
1927 if (!biv || !biv->biv_p || integer_zerop (biv->step)
1928 || biv->have_address_use || !biv->no_overflow)
1929 return;
1930
1931 type = TREE_TYPE (biv->base);
1932 if (!INTEGRAL_TYPE_P (type))
1933 return;
1934
1935 biv->have_address_use = true;
1936 data->bivs_not_used_in_addr--;
1937 base_1 = fold_build2 (PLUS_EXPR, type, biv->base, biv->step);
1938 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1939 {
1940 struct iv *iv = ver_info (data, i)->iv;
1941
1942 if (!iv || !iv->biv_p || integer_zerop (iv->step)
1943 || iv->have_address_use || !iv->no_overflow)
1944 continue;
1945
1946 if (type != TREE_TYPE (iv->base)
1947 || !INTEGRAL_TYPE_P (TREE_TYPE (iv->base)))
1948 continue;
1949
1950 if (!operand_equal_p (biv->step, iv->step, 0))
1951 continue;
1952
1953 base_2 = fold_build2 (PLUS_EXPR, type, iv->base, iv->step);
1954 if (operand_equal_p (base_1, iv->base, 0)
1955 || operand_equal_p (base_2, biv->base, 0))
1956 {
1957 iv->have_address_use = true;
1958 data->bivs_not_used_in_addr--;
1959 }
1960 }
1961 }
1962
1963 /* Cumulates the steps of indices into DATA and replaces their values with the
1964 initial ones. Returns false when the value of the index cannot be determined.
1965 Callback for for_each_index. */
1966
1967 struct ifs_ivopts_data
1968 {
1969 struct ivopts_data *ivopts_data;
1970 gimple *stmt;
1971 tree step;
1972 };
1973
1974 static bool
1975 idx_find_step (tree base, tree *idx, void *data)
1976 {
1977 struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1978 struct iv *iv;
1979 bool use_overflow_semantics = false;
1980 tree step, iv_base, iv_step, lbound, off;
1981 struct loop *loop = dta->ivopts_data->current_loop;
1982
1983 /* If base is a component ref, require that the offset of the reference
1984 be invariant. */
1985 if (TREE_CODE (base) == COMPONENT_REF)
1986 {
1987 off = component_ref_field_offset (base);
1988 return expr_invariant_in_loop_p (loop, off);
1989 }
1990
1991 /* If base is array, first check whether we will be able to move the
1992 reference out of the loop (in order to take its address in strength
1993 reduction). In order for this to work we need both lower bound
1994 and step to be loop invariants. */
1995 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1996 {
1997 /* Moreover, for a range, the size needs to be invariant as well. */
1998 if (TREE_CODE (base) == ARRAY_RANGE_REF
1999 && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
2000 return false;
2001
2002 step = array_ref_element_size (base);
2003 lbound = array_ref_low_bound (base);
2004
2005 if (!expr_invariant_in_loop_p (loop, step)
2006 || !expr_invariant_in_loop_p (loop, lbound))
2007 return false;
2008 }
2009
2010 if (TREE_CODE (*idx) != SSA_NAME)
2011 return true;
2012
2013 iv = get_iv (dta->ivopts_data, *idx);
2014 if (!iv)
2015 return false;
2016
2017 /* XXX We produce for a base of *D42 with iv->base being &x[0]
2018 *&x[0], which is not folded and does not trigger the
2019 ARRAY_REF path below. */
2020 *idx = iv->base;
2021
2022 if (integer_zerop (iv->step))
2023 return true;
2024
2025 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2026 {
2027 step = array_ref_element_size (base);
2028
2029 /* We only handle addresses whose step is an integer constant. */
2030 if (TREE_CODE (step) != INTEGER_CST)
2031 return false;
2032 }
2033 else
2034 /* The step for pointer arithmetics already is 1 byte. */
2035 step = size_one_node;
2036
2037 iv_base = iv->base;
2038 iv_step = iv->step;
2039 if (iv->no_overflow && nowrap_type_p (TREE_TYPE (iv_step)))
2040 use_overflow_semantics = true;
2041
2042 if (!convert_affine_scev (dta->ivopts_data->current_loop,
2043 sizetype, &iv_base, &iv_step, dta->stmt,
2044 use_overflow_semantics))
2045 {
2046 /* The index might wrap. */
2047 return false;
2048 }
2049
2050 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
2051 dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
2052
2053 if (dta->ivopts_data->bivs_not_used_in_addr)
2054 {
2055 if (!iv->biv_p)
2056 iv = find_deriving_biv_for_expr (dta->ivopts_data, iv->ssa_name);
2057
2058 record_biv_for_address_use (dta->ivopts_data, iv);
2059 }
2060 return true;
2061 }
2062
2063 /* Records use in index IDX. Callback for for_each_index. Ivopts data
2064 object is passed to it in DATA. */
2065
2066 static bool
2067 idx_record_use (tree base, tree *idx,
2068 void *vdata)
2069 {
2070 struct ivopts_data *data = (struct ivopts_data *) vdata;
2071 find_interesting_uses_op (data, *idx);
2072 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2073 {
2074 find_interesting_uses_op (data, array_ref_element_size (base));
2075 find_interesting_uses_op (data, array_ref_low_bound (base));
2076 }
2077 return true;
2078 }
2079
2080 /* If we can prove that TOP = cst * BOT for some constant cst,
2081 store cst to MUL and return true. Otherwise return false.
2082 The returned value is always sign-extended, regardless of the
2083 signedness of TOP and BOT. */
2084
2085 static bool
2086 constant_multiple_of (tree top, tree bot, widest_int *mul)
2087 {
2088 tree mby;
2089 enum tree_code code;
2090 unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
2091 widest_int res, p0, p1;
2092
2093 STRIP_NOPS (top);
2094 STRIP_NOPS (bot);
2095
2096 if (operand_equal_p (top, bot, 0))
2097 {
2098 *mul = 1;
2099 return true;
2100 }
2101
2102 code = TREE_CODE (top);
2103 switch (code)
2104 {
2105 case MULT_EXPR:
2106 mby = TREE_OPERAND (top, 1);
2107 if (TREE_CODE (mby) != INTEGER_CST)
2108 return false;
2109
2110 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
2111 return false;
2112
2113 *mul = wi::sext (res * wi::to_widest (mby), precision);
2114 return true;
2115
2116 case PLUS_EXPR:
2117 case MINUS_EXPR:
2118 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
2119 || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
2120 return false;
2121
2122 if (code == MINUS_EXPR)
2123 p1 = -p1;
2124 *mul = wi::sext (p0 + p1, precision);
2125 return true;
2126
2127 case INTEGER_CST:
2128 if (TREE_CODE (bot) != INTEGER_CST)
2129 return false;
2130
2131 p0 = widest_int::from (wi::to_wide (top), SIGNED);
2132 p1 = widest_int::from (wi::to_wide (bot), SIGNED);
2133 if (p1 == 0)
2134 return false;
2135 *mul = wi::sext (wi::divmod_trunc (p0, p1, SIGNED, &res), precision);
2136 return res == 0;
2137
2138 default:
2139 if (POLY_INT_CST_P (top)
2140 && POLY_INT_CST_P (bot)
2141 && constant_multiple_p (wi::to_poly_widest (top),
2142 wi::to_poly_widest (bot), mul))
2143 return true;
2144
2145 return false;
2146 }
2147 }
2148
2149 /* Return true if memory reference REF with step STEP may be unaligned. */
2150
2151 static bool
2152 may_be_unaligned_p (tree ref, tree step)
2153 {
2154 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
2155 thus they are not misaligned. */
2156 if (TREE_CODE (ref) == TARGET_MEM_REF)
2157 return false;
2158
2159 unsigned int align = TYPE_ALIGN (TREE_TYPE (ref));
2160 if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))) > align)
2161 align = GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref)));
2162
2163 unsigned HOST_WIDE_INT bitpos;
2164 unsigned int ref_align;
2165 get_object_alignment_1 (ref, &ref_align, &bitpos);
2166 if (ref_align < align
2167 || (bitpos % align) != 0
2168 || (bitpos % BITS_PER_UNIT) != 0)
2169 return true;
2170
2171 unsigned int trailing_zeros = tree_ctz (step);
2172 if (trailing_zeros < HOST_BITS_PER_INT
2173 && (1U << trailing_zeros) * BITS_PER_UNIT < align)
2174 return true;
2175
2176 return false;
2177 }
2178
2179 /* Return true if EXPR may be non-addressable. */
2180
2181 bool
2182 may_be_nonaddressable_p (tree expr)
2183 {
2184 switch (TREE_CODE (expr))
2185 {
2186 case VAR_DECL:
2187 /* Check if it's a register variable. */
2188 return DECL_HARD_REGISTER (expr);
2189
2190 case TARGET_MEM_REF:
2191 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
2192 target, thus they are always addressable. */
2193 return false;
2194
2195 case MEM_REF:
2196 /* Likewise for MEM_REFs, modulo the storage order. */
2197 return REF_REVERSE_STORAGE_ORDER (expr);
2198
2199 case BIT_FIELD_REF:
2200 if (REF_REVERSE_STORAGE_ORDER (expr))
2201 return true;
2202 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2203
2204 case COMPONENT_REF:
2205 if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2206 return true;
2207 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
2208 || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2209
2210 case ARRAY_REF:
2211 case ARRAY_RANGE_REF:
2212 if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2213 return true;
2214 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2215
2216 case VIEW_CONVERT_EXPR:
2217 /* This kind of view-conversions may wrap non-addressable objects
2218 and make them look addressable. After some processing the
2219 non-addressability may be uncovered again, causing ADDR_EXPRs
2220 of inappropriate objects to be built. */
2221 if (is_gimple_reg (TREE_OPERAND (expr, 0))
2222 || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
2223 return true;
2224 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2225
2226 CASE_CONVERT:
2227 return true;
2228
2229 default:
2230 break;
2231 }
2232
2233 return false;
2234 }
2235
2236 /* Finds addresses in *OP_P inside STMT. */
2237
2238 static void
2239 find_interesting_uses_address (struct ivopts_data *data, gimple *stmt,
2240 tree *op_p)
2241 {
2242 tree base = *op_p, step = size_zero_node;
2243 struct iv *civ;
2244 struct ifs_ivopts_data ifs_ivopts_data;
2245
2246 /* Do not play with volatile memory references. A bit too conservative,
2247 perhaps, but safe. */
2248 if (gimple_has_volatile_ops (stmt))
2249 goto fail;
2250
2251 /* Ignore bitfields for now. Not really something terribly complicated
2252 to handle. TODO. */
2253 if (TREE_CODE (base) == BIT_FIELD_REF)
2254 goto fail;
2255
2256 base = unshare_expr (base);
2257
2258 if (TREE_CODE (base) == TARGET_MEM_REF)
2259 {
2260 tree type = build_pointer_type (TREE_TYPE (base));
2261 tree astep;
2262
2263 if (TMR_BASE (base)
2264 && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
2265 {
2266 civ = get_iv (data, TMR_BASE (base));
2267 if (!civ)
2268 goto fail;
2269
2270 TMR_BASE (base) = civ->base;
2271 step = civ->step;
2272 }
2273 if (TMR_INDEX2 (base)
2274 && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
2275 {
2276 civ = get_iv (data, TMR_INDEX2 (base));
2277 if (!civ)
2278 goto fail;
2279
2280 TMR_INDEX2 (base) = civ->base;
2281 step = civ->step;
2282 }
2283 if (TMR_INDEX (base)
2284 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
2285 {
2286 civ = get_iv (data, TMR_INDEX (base));
2287 if (!civ)
2288 goto fail;
2289
2290 TMR_INDEX (base) = civ->base;
2291 astep = civ->step;
2292
2293 if (astep)
2294 {
2295 if (TMR_STEP (base))
2296 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
2297
2298 step = fold_build2 (PLUS_EXPR, type, step, astep);
2299 }
2300 }
2301
2302 if (integer_zerop (step))
2303 goto fail;
2304 base = tree_mem_ref_addr (type, base);
2305 }
2306 else
2307 {
2308 ifs_ivopts_data.ivopts_data = data;
2309 ifs_ivopts_data.stmt = stmt;
2310 ifs_ivopts_data.step = size_zero_node;
2311 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
2312 || integer_zerop (ifs_ivopts_data.step))
2313 goto fail;
2314 step = ifs_ivopts_data.step;
2315
2316 /* Check that the base expression is addressable. This needs
2317 to be done after substituting bases of IVs into it. */
2318 if (may_be_nonaddressable_p (base))
2319 goto fail;
2320
2321 /* Moreover, on strict alignment platforms, check that it is
2322 sufficiently aligned. */
2323 if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
2324 goto fail;
2325
2326 base = build_fold_addr_expr (base);
2327
2328 /* Substituting bases of IVs into the base expression might
2329 have caused folding opportunities. */
2330 if (TREE_CODE (base) == ADDR_EXPR)
2331 {
2332 tree *ref = &TREE_OPERAND (base, 0);
2333 while (handled_component_p (*ref))
2334 ref = &TREE_OPERAND (*ref, 0);
2335 if (TREE_CODE (*ref) == MEM_REF)
2336 {
2337 tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
2338 TREE_OPERAND (*ref, 0),
2339 TREE_OPERAND (*ref, 1));
2340 if (tem)
2341 *ref = tem;
2342 }
2343 }
2344 }
2345
2346 civ = alloc_iv (data, base, step);
2347 /* Fail if base object of this memory reference is unknown. */
2348 if (civ->base_object == NULL_TREE)
2349 goto fail;
2350
2351 record_group_use (data, op_p, civ, stmt, USE_REF_ADDRESS, TREE_TYPE (*op_p));
2352 return;
2353
2354 fail:
2355 for_each_index (op_p, idx_record_use, data);
2356 }
2357
2358 /* Finds and records invariants used in STMT. */
2359
2360 static void
2361 find_invariants_stmt (struct ivopts_data *data, gimple *stmt)
2362 {
2363 ssa_op_iter iter;
2364 use_operand_p use_p;
2365 tree op;
2366
2367 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2368 {
2369 op = USE_FROM_PTR (use_p);
2370 record_invariant (data, op, false);
2371 }
2372 }
2373
2374 /* CALL calls an internal function. If operand *OP_P will become an
2375 address when the call is expanded, return the type of the memory
2376 being addressed, otherwise return null. */
2377
2378 static tree
2379 get_mem_type_for_internal_fn (gcall *call, tree *op_p)
2380 {
2381 switch (gimple_call_internal_fn (call))
2382 {
2383 case IFN_MASK_LOAD:
2384 case IFN_MASK_LOAD_LANES:
2385 if (op_p == gimple_call_arg_ptr (call, 0))
2386 return TREE_TYPE (gimple_call_lhs (call));
2387 return NULL_TREE;
2388
2389 case IFN_MASK_STORE:
2390 case IFN_MASK_STORE_LANES:
2391 if (op_p == gimple_call_arg_ptr (call, 0))
2392 return TREE_TYPE (gimple_call_arg (call, 3));
2393 return NULL_TREE;
2394
2395 default:
2396 return NULL_TREE;
2397 }
2398 }
2399
2400 /* IV is a (non-address) iv that describes operand *OP_P of STMT.
2401 Return true if the operand will become an address when STMT
2402 is expanded and record the associated address use if so. */
2403
2404 static bool
2405 find_address_like_use (struct ivopts_data *data, gimple *stmt, tree *op_p,
2406 struct iv *iv)
2407 {
2408 /* Fail if base object of this memory reference is unknown. */
2409 if (iv->base_object == NULL_TREE)
2410 return false;
2411
2412 tree mem_type = NULL_TREE;
2413 if (gcall *call = dyn_cast <gcall *> (stmt))
2414 if (gimple_call_internal_p (call))
2415 mem_type = get_mem_type_for_internal_fn (call, op_p);
2416 if (mem_type)
2417 {
2418 iv = alloc_iv (data, iv->base, iv->step);
2419 record_group_use (data, op_p, iv, stmt, USE_PTR_ADDRESS, mem_type);
2420 return true;
2421 }
2422 return false;
2423 }
2424
2425 /* Finds interesting uses of induction variables in the statement STMT. */
2426
2427 static void
2428 find_interesting_uses_stmt (struct ivopts_data *data, gimple *stmt)
2429 {
2430 struct iv *iv;
2431 tree op, *lhs, *rhs;
2432 ssa_op_iter iter;
2433 use_operand_p use_p;
2434 enum tree_code code;
2435
2436 find_invariants_stmt (data, stmt);
2437
2438 if (gimple_code (stmt) == GIMPLE_COND)
2439 {
2440 find_interesting_uses_cond (data, stmt);
2441 return;
2442 }
2443
2444 if (is_gimple_assign (stmt))
2445 {
2446 lhs = gimple_assign_lhs_ptr (stmt);
2447 rhs = gimple_assign_rhs1_ptr (stmt);
2448
2449 if (TREE_CODE (*lhs) == SSA_NAME)
2450 {
2451 /* If the statement defines an induction variable, the uses are not
2452 interesting by themselves. */
2453
2454 iv = get_iv (data, *lhs);
2455
2456 if (iv && !integer_zerop (iv->step))
2457 return;
2458 }
2459
2460 code = gimple_assign_rhs_code (stmt);
2461 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
2462 && (REFERENCE_CLASS_P (*rhs)
2463 || is_gimple_val (*rhs)))
2464 {
2465 if (REFERENCE_CLASS_P (*rhs))
2466 find_interesting_uses_address (data, stmt, rhs);
2467 else
2468 find_interesting_uses_op (data, *rhs);
2469
2470 if (REFERENCE_CLASS_P (*lhs))
2471 find_interesting_uses_address (data, stmt, lhs);
2472 return;
2473 }
2474 else if (TREE_CODE_CLASS (code) == tcc_comparison)
2475 {
2476 find_interesting_uses_cond (data, stmt);
2477 return;
2478 }
2479
2480 /* TODO -- we should also handle address uses of type
2481
2482 memory = call (whatever);
2483
2484 and
2485
2486 call (memory). */
2487 }
2488
2489 if (gimple_code (stmt) == GIMPLE_PHI
2490 && gimple_bb (stmt) == data->current_loop->header)
2491 {
2492 iv = get_iv (data, PHI_RESULT (stmt));
2493
2494 if (iv && !integer_zerop (iv->step))
2495 return;
2496 }
2497
2498 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2499 {
2500 op = USE_FROM_PTR (use_p);
2501
2502 if (TREE_CODE (op) != SSA_NAME)
2503 continue;
2504
2505 iv = get_iv (data, op);
2506 if (!iv)
2507 continue;
2508
2509 if (!find_address_like_use (data, stmt, use_p->use, iv))
2510 find_interesting_uses_op (data, op);
2511 }
2512 }
2513
2514 /* Finds interesting uses of induction variables outside of loops
2515 on loop exit edge EXIT. */
2516
2517 static void
2518 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
2519 {
2520 gphi *phi;
2521 gphi_iterator psi;
2522 tree def;
2523
2524 for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
2525 {
2526 phi = psi.phi ();
2527 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2528 if (!virtual_operand_p (def))
2529 find_interesting_uses_op (data, def);
2530 }
2531 }
2532
2533 /* Return TRUE if OFFSET is within the range of [base + offset] addressing
2534 mode for memory reference represented by USE. */
2535
2536 static GTY (()) vec<rtx, va_gc> *addr_list;
2537
2538 static bool
2539 addr_offset_valid_p (struct iv_use *use, poly_int64 offset)
2540 {
2541 rtx reg, addr;
2542 unsigned list_index;
2543 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
2544 machine_mode addr_mode, mem_mode = TYPE_MODE (use->mem_type);
2545
2546 list_index = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
2547 if (list_index >= vec_safe_length (addr_list))
2548 vec_safe_grow_cleared (addr_list, list_index + MAX_MACHINE_MODE);
2549
2550 addr = (*addr_list)[list_index];
2551 if (!addr)
2552 {
2553 addr_mode = targetm.addr_space.address_mode (as);
2554 reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
2555 addr = gen_rtx_fmt_ee (PLUS, addr_mode, reg, NULL_RTX);
2556 (*addr_list)[list_index] = addr;
2557 }
2558 else
2559 addr_mode = GET_MODE (addr);
2560
2561 XEXP (addr, 1) = gen_int_mode (offset, addr_mode);
2562 return (memory_address_addr_space_p (mem_mode, addr, as));
2563 }
2564
2565 /* Comparison function to sort group in ascending order of addr_offset. */
2566
2567 static int
2568 group_compare_offset (const void *a, const void *b)
2569 {
2570 const struct iv_use *const *u1 = (const struct iv_use *const *) a;
2571 const struct iv_use *const *u2 = (const struct iv_use *const *) b;
2572
2573 return compare_sizes_for_sort ((*u1)->addr_offset, (*u2)->addr_offset);
2574 }
2575
2576 /* Check if small groups should be split. Return true if no group
2577 contains more than two uses with distinct addr_offsets. Return
2578 false otherwise. We want to split such groups because:
2579
2580 1) Small groups don't have much benefit and may interfer with
2581 general candidate selection.
2582 2) Size for problem with only small groups is usually small and
2583 general algorithm can handle it well.
2584
2585 TODO -- Above claim may not hold when we want to merge memory
2586 accesses with conseuctive addresses. */
2587
2588 static bool
2589 split_small_address_groups_p (struct ivopts_data *data)
2590 {
2591 unsigned int i, j, distinct = 1;
2592 struct iv_use *pre;
2593 struct iv_group *group;
2594
2595 for (i = 0; i < data->vgroups.length (); i++)
2596 {
2597 group = data->vgroups[i];
2598 if (group->vuses.length () == 1)
2599 continue;
2600
2601 gcc_assert (address_p (group->type));
2602 if (group->vuses.length () == 2)
2603 {
2604 if (compare_sizes_for_sort (group->vuses[0]->addr_offset,
2605 group->vuses[1]->addr_offset) > 0)
2606 std::swap (group->vuses[0], group->vuses[1]);
2607 }
2608 else
2609 group->vuses.qsort (group_compare_offset);
2610
2611 if (distinct > 2)
2612 continue;
2613
2614 distinct = 1;
2615 for (pre = group->vuses[0], j = 1; j < group->vuses.length (); j++)
2616 {
2617 if (maybe_ne (group->vuses[j]->addr_offset, pre->addr_offset))
2618 {
2619 pre = group->vuses[j];
2620 distinct++;
2621 }
2622
2623 if (distinct > 2)
2624 break;
2625 }
2626 }
2627
2628 return (distinct <= 2);
2629 }
2630
2631 /* For each group of address type uses, this function further groups
2632 these uses according to the maximum offset supported by target's
2633 [base + offset] addressing mode. */
2634
2635 static void
2636 split_address_groups (struct ivopts_data *data)
2637 {
2638 unsigned int i, j;
2639 /* Always split group. */
2640 bool split_p = split_small_address_groups_p (data);
2641
2642 for (i = 0; i < data->vgroups.length (); i++)
2643 {
2644 struct iv_group *new_group = NULL;
2645 struct iv_group *group = data->vgroups[i];
2646 struct iv_use *use = group->vuses[0];
2647
2648 use->id = 0;
2649 use->group_id = group->id;
2650 if (group->vuses.length () == 1)
2651 continue;
2652
2653 gcc_assert (address_p (use->type));
2654
2655 for (j = 1; j < group->vuses.length ();)
2656 {
2657 struct iv_use *next = group->vuses[j];
2658 poly_int64 offset = next->addr_offset - use->addr_offset;
2659
2660 /* Split group if aksed to, or the offset against the first
2661 use can't fit in offset part of addressing mode. IV uses
2662 having the same offset are still kept in one group. */
2663 if (maybe_ne (offset, 0)
2664 && (split_p || !addr_offset_valid_p (use, offset)))
2665 {
2666 if (!new_group)
2667 new_group = record_group (data, group->type);
2668 group->vuses.ordered_remove (j);
2669 new_group->vuses.safe_push (next);
2670 continue;
2671 }
2672
2673 next->id = j;
2674 next->group_id = group->id;
2675 j++;
2676 }
2677 }
2678 }
2679
2680 /* Finds uses of the induction variables that are interesting. */
2681
2682 static void
2683 find_interesting_uses (struct ivopts_data *data)
2684 {
2685 basic_block bb;
2686 gimple_stmt_iterator bsi;
2687 basic_block *body = get_loop_body (data->current_loop);
2688 unsigned i;
2689 edge e;
2690
2691 for (i = 0; i < data->current_loop->num_nodes; i++)
2692 {
2693 edge_iterator ei;
2694 bb = body[i];
2695
2696 FOR_EACH_EDGE (e, ei, bb->succs)
2697 if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2698 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2699 find_interesting_uses_outside (data, e);
2700
2701 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2702 find_interesting_uses_stmt (data, gsi_stmt (bsi));
2703 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2704 if (!is_gimple_debug (gsi_stmt (bsi)))
2705 find_interesting_uses_stmt (data, gsi_stmt (bsi));
2706 }
2707 free (body);
2708
2709 split_address_groups (data);
2710
2711 if (dump_file && (dump_flags & TDF_DETAILS))
2712 {
2713 fprintf (dump_file, "\n<IV Groups>:\n");
2714 dump_groups (dump_file, data);
2715 fprintf (dump_file, "\n");
2716 }
2717 }
2718
2719 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
2720 is true, assume we are inside an address. If TOP_COMPREF is true, assume
2721 we are at the top-level of the processed address. */
2722
2723 static tree
2724 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2725 poly_int64 *offset)
2726 {
2727 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2728 enum tree_code code;
2729 tree type, orig_type = TREE_TYPE (expr);
2730 poly_int64 off0, off1;
2731 HOST_WIDE_INT st;
2732 tree orig_expr = expr;
2733
2734 STRIP_NOPS (expr);
2735
2736 type = TREE_TYPE (expr);
2737 code = TREE_CODE (expr);
2738 *offset = 0;
2739
2740 switch (code)
2741 {
2742 case POINTER_PLUS_EXPR:
2743 case PLUS_EXPR:
2744 case MINUS_EXPR:
2745 op0 = TREE_OPERAND (expr, 0);
2746 op1 = TREE_OPERAND (expr, 1);
2747
2748 op0 = strip_offset_1 (op0, false, false, &off0);
2749 op1 = strip_offset_1 (op1, false, false, &off1);
2750
2751 *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2752 if (op0 == TREE_OPERAND (expr, 0)
2753 && op1 == TREE_OPERAND (expr, 1))
2754 return orig_expr;
2755
2756 if (integer_zerop (op1))
2757 expr = op0;
2758 else if (integer_zerop (op0))
2759 {
2760 if (code == MINUS_EXPR)
2761 expr = fold_build1 (NEGATE_EXPR, type, op1);
2762 else
2763 expr = op1;
2764 }
2765 else
2766 expr = fold_build2 (code, type, op0, op1);
2767
2768 return fold_convert (orig_type, expr);
2769
2770 case MULT_EXPR:
2771 op1 = TREE_OPERAND (expr, 1);
2772 if (!cst_and_fits_in_hwi (op1))
2773 return orig_expr;
2774
2775 op0 = TREE_OPERAND (expr, 0);
2776 op0 = strip_offset_1 (op0, false, false, &off0);
2777 if (op0 == TREE_OPERAND (expr, 0))
2778 return orig_expr;
2779
2780 *offset = off0 * int_cst_value (op1);
2781 if (integer_zerop (op0))
2782 expr = op0;
2783 else
2784 expr = fold_build2 (MULT_EXPR, type, op0, op1);
2785
2786 return fold_convert (orig_type, expr);
2787
2788 case ARRAY_REF:
2789 case ARRAY_RANGE_REF:
2790 if (!inside_addr)
2791 return orig_expr;
2792
2793 step = array_ref_element_size (expr);
2794 if (!cst_and_fits_in_hwi (step))
2795 break;
2796
2797 st = int_cst_value (step);
2798 op1 = TREE_OPERAND (expr, 1);
2799 op1 = strip_offset_1 (op1, false, false, &off1);
2800 *offset = off1 * st;
2801
2802 if (top_compref
2803 && integer_zerop (op1))
2804 {
2805 /* Strip the component reference completely. */
2806 op0 = TREE_OPERAND (expr, 0);
2807 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2808 *offset += off0;
2809 return op0;
2810 }
2811 break;
2812
2813 case COMPONENT_REF:
2814 {
2815 tree field;
2816
2817 if (!inside_addr)
2818 return orig_expr;
2819
2820 tmp = component_ref_field_offset (expr);
2821 field = TREE_OPERAND (expr, 1);
2822 if (top_compref
2823 && cst_and_fits_in_hwi (tmp)
2824 && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2825 {
2826 HOST_WIDE_INT boffset, abs_off;
2827
2828 /* Strip the component reference completely. */
2829 op0 = TREE_OPERAND (expr, 0);
2830 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2831 boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2832 abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2833 if (boffset < 0)
2834 abs_off = -abs_off;
2835
2836 *offset = off0 + int_cst_value (tmp) + abs_off;
2837 return op0;
2838 }
2839 }
2840 break;
2841
2842 case ADDR_EXPR:
2843 op0 = TREE_OPERAND (expr, 0);
2844 op0 = strip_offset_1 (op0, true, true, &off0);
2845 *offset += off0;
2846
2847 if (op0 == TREE_OPERAND (expr, 0))
2848 return orig_expr;
2849
2850 expr = build_fold_addr_expr (op0);
2851 return fold_convert (orig_type, expr);
2852
2853 case MEM_REF:
2854 /* ??? Offset operand? */
2855 inside_addr = false;
2856 break;
2857
2858 default:
2859 if (ptrdiff_tree_p (expr, offset) && maybe_ne (*offset, 0))
2860 return build_int_cst (orig_type, 0);
2861 return orig_expr;
2862 }
2863
2864 /* Default handling of expressions for that we want to recurse into
2865 the first operand. */
2866 op0 = TREE_OPERAND (expr, 0);
2867 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2868 *offset += off0;
2869
2870 if (op0 == TREE_OPERAND (expr, 0)
2871 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2872 return orig_expr;
2873
2874 expr = copy_node (expr);
2875 TREE_OPERAND (expr, 0) = op0;
2876 if (op1)
2877 TREE_OPERAND (expr, 1) = op1;
2878
2879 /* Inside address, we might strip the top level component references,
2880 thus changing type of the expression. Handling of ADDR_EXPR
2881 will fix that. */
2882 expr = fold_convert (orig_type, expr);
2883
2884 return expr;
2885 }
2886
2887 /* Strips constant offsets from EXPR and stores them to OFFSET. */
2888
2889 tree
2890 strip_offset (tree expr, poly_uint64_pod *offset)
2891 {
2892 poly_int64 off;
2893 tree core = strip_offset_1 (expr, false, false, &off);
2894 *offset = off;
2895 return core;
2896 }
2897
2898 /* Returns variant of TYPE that can be used as base for different uses.
2899 We return unsigned type with the same precision, which avoids problems
2900 with overflows. */
2901
2902 static tree
2903 generic_type_for (tree type)
2904 {
2905 if (POINTER_TYPE_P (type))
2906 return unsigned_type_for (type);
2907
2908 if (TYPE_UNSIGNED (type))
2909 return type;
2910
2911 return unsigned_type_for (type);
2912 }
2913
2914 /* Private data for walk_tree. */
2915
2916 struct walk_tree_data
2917 {
2918 bitmap *inv_vars;
2919 struct ivopts_data *idata;
2920 };
2921
2922 /* Callback function for walk_tree, it records invariants and symbol
2923 reference in *EXPR_P. DATA is the structure storing result info. */
2924
2925 static tree
2926 find_inv_vars_cb (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2927 {
2928 tree op = *expr_p;
2929 struct version_info *info;
2930 struct walk_tree_data *wdata = (struct walk_tree_data*) data;
2931
2932 if (TREE_CODE (op) != SSA_NAME)
2933 return NULL_TREE;
2934
2935 info = name_info (wdata->idata, op);
2936 /* Because we expand simple operations when finding IVs, loop invariant
2937 variable that isn't referred by the original loop could be used now.
2938 Record such invariant variables here. */
2939 if (!info->iv)
2940 {
2941 struct ivopts_data *idata = wdata->idata;
2942 basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (op));
2943
2944 if (!bb || !flow_bb_inside_loop_p (idata->current_loop, bb))
2945 {
2946 set_iv (idata, op, op, build_int_cst (TREE_TYPE (op), 0), true);
2947 record_invariant (idata, op, false);
2948 }
2949 }
2950 if (!info->inv_id || info->has_nonlin_use)
2951 return NULL_TREE;
2952
2953 if (!*wdata->inv_vars)
2954 *wdata->inv_vars = BITMAP_ALLOC (NULL);
2955 bitmap_set_bit (*wdata->inv_vars, info->inv_id);
2956
2957 return NULL_TREE;
2958 }
2959
2960 /* Records invariants in *EXPR_P. INV_VARS is the bitmap to that we should
2961 store it. */
2962
2963 static inline void
2964 find_inv_vars (struct ivopts_data *data, tree *expr_p, bitmap *inv_vars)
2965 {
2966 struct walk_tree_data wdata;
2967
2968 if (!inv_vars)
2969 return;
2970
2971 wdata.idata = data;
2972 wdata.inv_vars = inv_vars;
2973 walk_tree (expr_p, find_inv_vars_cb, &wdata, NULL);
2974 }
2975
2976 /* Get entry from invariant expr hash table for INV_EXPR. New entry
2977 will be recorded if it doesn't exist yet. Given below two exprs:
2978 inv_expr + cst1, inv_expr + cst2
2979 It's hard to make decision whether constant part should be stripped
2980 or not. We choose to not strip based on below facts:
2981 1) We need to count ADD cost for constant part if it's stripped,
2982 which isn't always trivial where this functions is called.
2983 2) Stripping constant away may be conflict with following loop
2984 invariant hoisting pass.
2985 3) Not stripping constant away results in more invariant exprs,
2986 which usually leads to decision preferring lower reg pressure. */
2987
2988 static iv_inv_expr_ent *
2989 get_loop_invariant_expr (struct ivopts_data *data, tree inv_expr)
2990 {
2991 STRIP_NOPS (inv_expr);
2992
2993 if (poly_int_tree_p (inv_expr)
2994 || TREE_CODE (inv_expr) == SSA_NAME)
2995 return NULL;
2996
2997 /* Don't strip constant part away as we used to. */
2998
2999 /* Stores EXPR in DATA->inv_expr_tab, return pointer to iv_inv_expr_ent. */
3000 struct iv_inv_expr_ent ent;
3001 ent.expr = inv_expr;
3002 ent.hash = iterative_hash_expr (inv_expr, 0);
3003 struct iv_inv_expr_ent **slot = data->inv_expr_tab->find_slot (&ent, INSERT);
3004
3005 if (!*slot)
3006 {
3007 *slot = XNEW (struct iv_inv_expr_ent);
3008 (*slot)->expr = inv_expr;
3009 (*slot)->hash = ent.hash;
3010 (*slot)->id = ++data->max_inv_expr_id;
3011 }
3012
3013 return *slot;
3014 }
3015
3016 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
3017 position to POS. If USE is not NULL, the candidate is set as related to
3018 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
3019 replacement of the final value of the iv by a direct computation. */
3020
3021 static struct iv_cand *
3022 add_candidate_1 (struct ivopts_data *data,
3023 tree base, tree step, bool important, enum iv_position pos,
3024 struct iv_use *use, gimple *incremented_at,
3025 struct iv *orig_iv = NULL)
3026 {
3027 unsigned i;
3028 struct iv_cand *cand = NULL;
3029 tree type, orig_type;
3030
3031 gcc_assert (base && step);
3032
3033 /* -fkeep-gc-roots-live means that we have to keep a real pointer
3034 live, but the ivopts code may replace a real pointer with one
3035 pointing before or after the memory block that is then adjusted
3036 into the memory block during the loop. FIXME: It would likely be
3037 better to actually force the pointer live and still use ivopts;
3038 for example, it would be enough to write the pointer into memory
3039 and keep it there until after the loop. */
3040 if (flag_keep_gc_roots_live && POINTER_TYPE_P (TREE_TYPE (base)))
3041 return NULL;
3042
3043 /* For non-original variables, make sure their values are computed in a type
3044 that does not invoke undefined behavior on overflows (since in general,
3045 we cannot prove that these induction variables are non-wrapping). */
3046 if (pos != IP_ORIGINAL)
3047 {
3048 orig_type = TREE_TYPE (base);
3049 type = generic_type_for (orig_type);
3050 if (type != orig_type)
3051 {
3052 base = fold_convert (type, base);
3053 step = fold_convert (type, step);
3054 }
3055 }
3056
3057 for (i = 0; i < data->vcands.length (); i++)
3058 {
3059 cand = data->vcands[i];
3060
3061 if (cand->pos != pos)
3062 continue;
3063
3064 if (cand->incremented_at != incremented_at
3065 || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
3066 && cand->ainc_use != use))
3067 continue;
3068
3069 if (operand_equal_p (base, cand->iv->base, 0)
3070 && operand_equal_p (step, cand->iv->step, 0)
3071 && (TYPE_PRECISION (TREE_TYPE (base))
3072 == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
3073 break;
3074 }
3075
3076 if (i == data->vcands.length ())
3077 {
3078 cand = XCNEW (struct iv_cand);
3079 cand->id = i;
3080 cand->iv = alloc_iv (data, base, step);
3081 cand->pos = pos;
3082 if (pos != IP_ORIGINAL)
3083 {
3084 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
3085 cand->var_after = cand->var_before;
3086 }
3087 cand->important = important;
3088 cand->incremented_at = incremented_at;
3089 data->vcands.safe_push (cand);
3090
3091 if (!poly_int_tree_p (step))
3092 {
3093 find_inv_vars (data, &step, &cand->inv_vars);
3094
3095 iv_inv_expr_ent *inv_expr = get_loop_invariant_expr (data, step);
3096 /* Share bitmap between inv_vars and inv_exprs for cand. */
3097 if (inv_expr != NULL)
3098 {
3099 cand->inv_exprs = cand->inv_vars;
3100 cand->inv_vars = NULL;
3101 if (cand->inv_exprs)
3102 bitmap_clear (cand->inv_exprs);
3103 else
3104 cand->inv_exprs = BITMAP_ALLOC (NULL);
3105
3106 bitmap_set_bit (cand->inv_exprs, inv_expr->id);
3107 }
3108 }
3109
3110 if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
3111 cand->ainc_use = use;
3112 else
3113 cand->ainc_use = NULL;
3114
3115 cand->orig_iv = orig_iv;
3116 if (dump_file && (dump_flags & TDF_DETAILS))
3117 dump_cand (dump_file, cand);
3118 }
3119
3120 cand->important |= important;
3121
3122 /* Relate candidate to the group for which it is added. */
3123 if (use)
3124 bitmap_set_bit (data->vgroups[use->group_id]->related_cands, i);
3125
3126 return cand;
3127 }
3128
3129 /* Returns true if incrementing the induction variable at the end of the LOOP
3130 is allowed.
3131
3132 The purpose is to avoid splitting latch edge with a biv increment, thus
3133 creating a jump, possibly confusing other optimization passes and leaving
3134 less freedom to scheduler. So we allow IP_END only if IP_NORMAL is not
3135 available (so we do not have a better alternative), or if the latch edge
3136 is already nonempty. */
3137
3138 static bool
3139 allow_ip_end_pos_p (struct loop *loop)
3140 {
3141 if (!ip_normal_pos (loop))
3142 return true;
3143
3144 if (!empty_block_p (ip_end_pos (loop)))
3145 return true;
3146
3147 return false;
3148 }
3149
3150 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
3151 Important field is set to IMPORTANT. */
3152
3153 static void
3154 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
3155 bool important, struct iv_use *use)
3156 {
3157 basic_block use_bb = gimple_bb (use->stmt);
3158 machine_mode mem_mode;
3159 unsigned HOST_WIDE_INT cstepi;
3160
3161 /* If we insert the increment in any position other than the standard
3162 ones, we must ensure that it is incremented once per iteration.
3163 It must not be in an inner nested loop, or one side of an if
3164 statement. */
3165 if (use_bb->loop_father != data->current_loop
3166 || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
3167 || stmt_can_throw_internal (cfun, use->stmt)
3168 || !cst_and_fits_in_hwi (step))
3169 return;
3170
3171 cstepi = int_cst_value (step);
3172
3173 mem_mode = TYPE_MODE (use->mem_type);
3174 if (((USE_LOAD_PRE_INCREMENT (mem_mode)
3175 || USE_STORE_PRE_INCREMENT (mem_mode))
3176 && known_eq (GET_MODE_SIZE (mem_mode), cstepi))
3177 || ((USE_LOAD_PRE_DECREMENT (mem_mode)
3178 || USE_STORE_PRE_DECREMENT (mem_mode))
3179 && known_eq (GET_MODE_SIZE (mem_mode), -cstepi)))
3180 {
3181 enum tree_code code = MINUS_EXPR;
3182 tree new_base;
3183 tree new_step = step;
3184
3185 if (POINTER_TYPE_P (TREE_TYPE (base)))
3186 {
3187 new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
3188 code = POINTER_PLUS_EXPR;
3189 }
3190 else
3191 new_step = fold_convert (TREE_TYPE (base), new_step);
3192 new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
3193 add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
3194 use->stmt);
3195 }
3196 if (((USE_LOAD_POST_INCREMENT (mem_mode)
3197 || USE_STORE_POST_INCREMENT (mem_mode))
3198 && known_eq (GET_MODE_SIZE (mem_mode), cstepi))
3199 || ((USE_LOAD_POST_DECREMENT (mem_mode)
3200 || USE_STORE_POST_DECREMENT (mem_mode))
3201 && known_eq (GET_MODE_SIZE (mem_mode), -cstepi)))
3202 {
3203 add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
3204 use->stmt);
3205 }
3206 }
3207
3208 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
3209 position to POS. If USE is not NULL, the candidate is set as related to
3210 it. The candidate computation is scheduled before exit condition and at
3211 the end of loop. */
3212
3213 static void
3214 add_candidate (struct ivopts_data *data,
3215 tree base, tree step, bool important, struct iv_use *use,
3216 struct iv *orig_iv = NULL)
3217 {
3218 if (ip_normal_pos (data->current_loop))
3219 add_candidate_1 (data, base, step, important,
3220 IP_NORMAL, use, NULL, orig_iv);
3221 if (ip_end_pos (data->current_loop)
3222 && allow_ip_end_pos_p (data->current_loop))
3223 add_candidate_1 (data, base, step, important, IP_END, use, NULL, orig_iv);
3224 }
3225
3226 /* Adds standard iv candidates. */
3227
3228 static void
3229 add_standard_iv_candidates (struct ivopts_data *data)
3230 {
3231 add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
3232
3233 /* The same for a double-integer type if it is still fast enough. */
3234 if (TYPE_PRECISION
3235 (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
3236 && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
3237 add_candidate (data, build_int_cst (long_integer_type_node, 0),
3238 build_int_cst (long_integer_type_node, 1), true, NULL);
3239
3240 /* The same for a double-integer type if it is still fast enough. */
3241 if (TYPE_PRECISION
3242 (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
3243 && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
3244 add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
3245 build_int_cst (long_long_integer_type_node, 1), true, NULL);
3246 }
3247
3248
3249 /* Adds candidates bases on the old induction variable IV. */
3250
3251 static void
3252 add_iv_candidate_for_biv (struct ivopts_data *data, struct iv *iv)
3253 {
3254 gimple *phi;
3255 tree def;
3256 struct iv_cand *cand;
3257
3258 /* Check if this biv is used in address type use. */
3259 if (iv->no_overflow && iv->have_address_use
3260 && INTEGRAL_TYPE_P (TREE_TYPE (iv->base))
3261 && TYPE_PRECISION (TREE_TYPE (iv->base)) < TYPE_PRECISION (sizetype))
3262 {
3263 tree base = fold_convert (sizetype, iv->base);
3264 tree step = fold_convert (sizetype, iv->step);
3265
3266 /* Add iv cand of same precision as index part in TARGET_MEM_REF. */
3267 add_candidate (data, base, step, true, NULL, iv);
3268 /* Add iv cand of the original type only if it has nonlinear use. */
3269 if (iv->nonlin_use)
3270 add_candidate (data, iv->base, iv->step, true, NULL);
3271 }
3272 else
3273 add_candidate (data, iv->base, iv->step, true, NULL);
3274
3275 /* The same, but with initial value zero. */
3276 if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
3277 add_candidate (data, size_int (0), iv->step, true, NULL);
3278 else
3279 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
3280 iv->step, true, NULL);
3281
3282 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
3283 if (gimple_code (phi) == GIMPLE_PHI)
3284 {
3285 /* Additionally record the possibility of leaving the original iv
3286 untouched. */
3287 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
3288 /* Don't add candidate if it's from another PHI node because
3289 it's an affine iv appearing in the form of PEELED_CHREC. */
3290 phi = SSA_NAME_DEF_STMT (def);
3291 if (gimple_code (phi) != GIMPLE_PHI)
3292 {
3293 cand = add_candidate_1 (data,
3294 iv->base, iv->step, true, IP_ORIGINAL, NULL,
3295 SSA_NAME_DEF_STMT (def));
3296 if (cand)
3297 {
3298 cand->var_before = iv->ssa_name;
3299 cand->var_after = def;
3300 }
3301 }
3302 else
3303 gcc_assert (gimple_bb (phi) == data->current_loop->header);
3304 }
3305 }
3306
3307 /* Adds candidates based on the old induction variables. */
3308
3309 static void
3310 add_iv_candidate_for_bivs (struct ivopts_data *data)
3311 {
3312 unsigned i;
3313 struct iv *iv;
3314 bitmap_iterator bi;
3315
3316 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
3317 {
3318 iv = ver_info (data, i)->iv;
3319 if (iv && iv->biv_p && !integer_zerop (iv->step))
3320 add_iv_candidate_for_biv (data, iv);
3321 }
3322 }
3323
3324 /* Record common candidate {BASE, STEP} derived from USE in hashtable. */
3325
3326 static void
3327 record_common_cand (struct ivopts_data *data, tree base,
3328 tree step, struct iv_use *use)
3329 {
3330 struct iv_common_cand ent;
3331 struct iv_common_cand **slot;
3332
3333 ent.base = base;
3334 ent.step = step;
3335 ent.hash = iterative_hash_expr (base, 0);
3336 ent.hash = iterative_hash_expr (step, ent.hash);
3337
3338 slot = data->iv_common_cand_tab->find_slot (&ent, INSERT);
3339 if (*slot == NULL)
3340 {
3341 *slot = new iv_common_cand ();
3342 (*slot)->base = base;
3343 (*slot)->step = step;
3344 (*slot)->uses.create (8);
3345 (*slot)->hash = ent.hash;
3346 data->iv_common_cands.safe_push ((*slot));
3347 }
3348
3349 gcc_assert (use != NULL);
3350 (*slot)->uses.safe_push (use);
3351 return;
3352 }
3353
3354 /* Comparison function used to sort common candidates. */
3355
3356 static int
3357 common_cand_cmp (const void *p1, const void *p2)
3358 {
3359 unsigned n1, n2;
3360 const struct iv_common_cand *const *const ccand1
3361 = (const struct iv_common_cand *const *)p1;
3362 const struct iv_common_cand *const *const ccand2
3363 = (const struct iv_common_cand *const *)p2;
3364
3365 n1 = (*ccand1)->uses.length ();
3366 n2 = (*ccand2)->uses.length ();
3367 return n2 - n1;
3368 }
3369
3370 /* Adds IV candidates based on common candidated recorded. */
3371
3372 static void
3373 add_iv_candidate_derived_from_uses (struct ivopts_data *data)
3374 {
3375 unsigned i, j;
3376 struct iv_cand *cand_1, *cand_2;
3377
3378 data->iv_common_cands.qsort (common_cand_cmp);
3379 for (i = 0; i < data->iv_common_cands.length (); i++)
3380 {
3381 struct iv_common_cand *ptr = data->iv_common_cands[i];
3382
3383 /* Only add IV candidate if it's derived from multiple uses. */
3384 if (ptr->uses.length () <= 1)
3385 break;
3386
3387 cand_1 = NULL;
3388 cand_2 = NULL;
3389 if (ip_normal_pos (data->current_loop))
3390 cand_1 = add_candidate_1 (data, ptr->base, ptr->step,
3391 false, IP_NORMAL, NULL, NULL);
3392
3393 if (ip_end_pos (data->current_loop)
3394 && allow_ip_end_pos_p (data->current_loop))
3395 cand_2 = add_candidate_1 (data, ptr->base, ptr->step,
3396 false, IP_END, NULL, NULL);
3397
3398 /* Bind deriving uses and the new candidates. */
3399 for (j = 0; j < ptr->uses.length (); j++)
3400 {
3401 struct iv_group *group = data->vgroups[ptr->uses[j]->group_id];
3402 if (cand_1)
3403 bitmap_set_bit (group->related_cands, cand_1->id);
3404 if (cand_2)
3405 bitmap_set_bit (group->related_cands, cand_2->id);
3406 }
3407 }
3408
3409 /* Release data since it is useless from this point. */
3410 data->iv_common_cand_tab->empty ();
3411 data->iv_common_cands.truncate (0);
3412 }
3413
3414 /* Adds candidates based on the value of USE's iv. */
3415
3416 static void
3417 add_iv_candidate_for_use (struct ivopts_data *data, struct iv_use *use)
3418 {
3419 poly_uint64 offset;
3420 tree base;
3421 tree basetype;
3422 struct iv *iv = use->iv;
3423
3424 add_candidate (data, iv->base, iv->step, false, use);
3425
3426 /* Record common candidate for use in case it can be shared by others. */
3427 record_common_cand (data, iv->base, iv->step, use);
3428
3429 /* Record common candidate with initial value zero. */
3430 basetype = TREE_TYPE (iv->base);
3431 if (POINTER_TYPE_P (basetype))
3432 basetype = sizetype;
3433 record_common_cand (data, build_int_cst (basetype, 0), iv->step, use);
3434
3435 /* Compare the cost of an address with an unscaled index with the cost of
3436 an address with a scaled index and add candidate if useful. */
3437 poly_int64 step;
3438 if (use != NULL
3439 && poly_int_tree_p (iv->step, &step)
3440 && address_p (use->type))
3441 {
3442 poly_int64 new_step;
3443 unsigned int fact = preferred_mem_scale_factor
3444 (use->iv->base,
3445 TYPE_MODE (use->mem_type),
3446 optimize_loop_for_speed_p (data->current_loop));
3447
3448 if (fact != 1
3449 && multiple_p (step, fact, &new_step))
3450 add_candidate (data, size_int (0),
3451 wide_int_to_tree (sizetype, new_step),
3452 true, NULL);
3453 }
3454
3455 /* Record common candidate with constant offset stripped in base.
3456 Like the use itself, we also add candidate directly for it. */
3457 base = strip_offset (iv->base, &offset);
3458 if (maybe_ne (offset, 0U) || base != iv->base)
3459 {
3460 record_common_cand (data, base, iv->step, use);
3461 add_candidate (data, base, iv->step, false, use);
3462 }
3463
3464 /* Record common candidate with base_object removed in base. */
3465 base = iv->base;
3466 STRIP_NOPS (base);
3467 if (iv->base_object != NULL && TREE_CODE (base) == POINTER_PLUS_EXPR)
3468 {
3469 tree step = iv->step;
3470
3471 STRIP_NOPS (step);
3472 base = TREE_OPERAND (base, 1);
3473 step = fold_convert (sizetype, step);
3474 record_common_cand (data, base, step, use);
3475 /* Also record common candidate with offset stripped. */
3476 base = strip_offset (base, &offset);
3477 if (maybe_ne (offset, 0U))
3478 record_common_cand (data, base, step, use);
3479 }
3480
3481 /* At last, add auto-incremental candidates. Make such variables
3482 important since other iv uses with same base object may be based
3483 on it. */
3484 if (use != NULL && address_p (use->type))
3485 add_autoinc_candidates (data, iv->base, iv->step, true, use);
3486 }
3487
3488 /* Adds candidates based on the uses. */
3489
3490 static void
3491 add_iv_candidate_for_groups (struct ivopts_data *data)
3492 {
3493 unsigned i;
3494
3495 /* Only add candidate for the first use in group. */
3496 for (i = 0; i < data->vgroups.length (); i++)
3497 {
3498 struct iv_group *group = data->vgroups[i];
3499
3500 gcc_assert (group->vuses[0] != NULL);
3501 add_iv_candidate_for_use (data, group->vuses[0]);
3502 }
3503 add_iv_candidate_derived_from_uses (data);
3504 }
3505
3506 /* Record important candidates and add them to related_cands bitmaps. */
3507
3508 static void
3509 record_important_candidates (struct ivopts_data *data)
3510 {
3511 unsigned i;
3512 struct iv_group *group;
3513
3514 for (i = 0; i < data->vcands.length (); i++)
3515 {
3516 struct iv_cand *cand = data->vcands[i];
3517
3518 if (cand->important)
3519 bitmap_set_bit (data->important_candidates, i);
3520 }
3521
3522 data->consider_all_candidates = (data->vcands.length ()
3523 <= CONSIDER_ALL_CANDIDATES_BOUND);
3524
3525 /* Add important candidates to groups' related_cands bitmaps. */
3526 for (i = 0; i < data->vgroups.length (); i++)
3527 {
3528 group = data->vgroups[i];
3529 bitmap_ior_into (group->related_cands, data->important_candidates);
3530 }
3531 }
3532
3533 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
3534 If consider_all_candidates is true, we use a two-dimensional array, otherwise
3535 we allocate a simple list to every use. */
3536
3537 static void
3538 alloc_use_cost_map (struct ivopts_data *data)
3539 {
3540 unsigned i, size, s;
3541
3542 for (i = 0; i < data->vgroups.length (); i++)
3543 {
3544 struct iv_group *group = data->vgroups[i];
3545
3546 if (data->consider_all_candidates)
3547 size = data->vcands.length ();
3548 else
3549 {
3550 s = bitmap_count_bits (group->related_cands);
3551
3552 /* Round up to the power of two, so that moduling by it is fast. */
3553 size = s ? (1 << ceil_log2 (s)) : 1;
3554 }
3555
3556 group->n_map_members = size;
3557 group->cost_map = XCNEWVEC (struct cost_pair, size);
3558 }
3559 }
3560
3561 /* Sets cost of (GROUP, CAND) pair to COST and record that it depends
3562 on invariants INV_VARS and that the value used in expressing it is
3563 VALUE, and in case of iv elimination the comparison operator is COMP. */
3564
3565 static void
3566 set_group_iv_cost (struct ivopts_data *data,
3567 struct iv_group *group, struct iv_cand *cand,
3568 comp_cost cost, bitmap inv_vars, tree value,
3569 enum tree_code comp, bitmap inv_exprs)
3570 {
3571 unsigned i, s;
3572
3573 if (cost.infinite_cost_p ())
3574 {
3575 BITMAP_FREE (inv_vars);
3576 BITMAP_FREE (inv_exprs);
3577 return;
3578 }
3579
3580 if (data->consider_all_candidates)
3581 {
3582 group->cost_map[cand->id].cand = cand;
3583 group->cost_map[cand->id].cost = cost;
3584 group->cost_map[cand->id].inv_vars = inv_vars;
3585 group->cost_map[cand->id].inv_exprs = inv_exprs;
3586 group->cost_map[cand->id].value = value;
3587 group->cost_map[cand->id].comp = comp;
3588 return;
3589 }
3590
3591 /* n_map_members is a power of two, so this computes modulo. */
3592 s = cand->id & (group->n_map_members - 1);
3593 for (i = s; i < group->n_map_members; i++)
3594 if (!group->cost_map[i].cand)
3595 goto found;
3596 for (i = 0; i < s; i++)
3597 if (!group->cost_map[i].cand)
3598 goto found;
3599
3600 gcc_unreachable ();
3601
3602 found:
3603 group->cost_map[i].cand = cand;
3604 group->cost_map[i].cost = cost;
3605 group->cost_map[i].inv_vars = inv_vars;
3606 group->cost_map[i].inv_exprs = inv_exprs;
3607 group->cost_map[i].value = value;
3608 group->cost_map[i].comp = comp;
3609 }
3610
3611 /* Gets cost of (GROUP, CAND) pair. */
3612
3613 static struct cost_pair *
3614 get_group_iv_cost (struct ivopts_data *data, struct iv_group *group,
3615 struct iv_cand *cand)
3616 {
3617 unsigned i, s;
3618 struct cost_pair *ret;
3619
3620 if (!cand)
3621 return NULL;
3622
3623 if (data->consider_all_candidates)
3624 {
3625 ret = group->cost_map + cand->id;
3626 if (!ret->cand)
3627 return NULL;
3628
3629 return ret;
3630 }
3631
3632 /* n_map_members is a power of two, so this computes modulo. */
3633 s = cand->id & (group->n_map_members - 1);
3634 for (i = s; i < group->n_map_members; i++)
3635 if (group->cost_map[i].cand == cand)
3636 return group->cost_map + i;
3637 else if (group->cost_map[i].cand == NULL)
3638 return NULL;
3639 for (i = 0; i < s; i++)
3640 if (group->cost_map[i].cand == cand)
3641 return group->cost_map + i;
3642 else if (group->cost_map[i].cand == NULL)
3643 return NULL;
3644
3645 return NULL;
3646 }
3647
3648 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
3649 static rtx
3650 produce_memory_decl_rtl (tree obj, int *regno)
3651 {
3652 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
3653 machine_mode address_mode = targetm.addr_space.address_mode (as);
3654 rtx x;
3655
3656 gcc_assert (obj);
3657 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
3658 {
3659 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
3660 x = gen_rtx_SYMBOL_REF (address_mode, name);
3661 SET_SYMBOL_REF_DECL (x, obj);
3662 x = gen_rtx_MEM (DECL_MODE (obj), x);
3663 set_mem_addr_space (x, as);
3664 targetm.encode_section_info (obj, x, true);
3665 }
3666 else
3667 {
3668 x = gen_raw_REG (address_mode, (*regno)++);
3669 x = gen_rtx_MEM (DECL_MODE (obj), x);
3670 set_mem_addr_space (x, as);
3671 }
3672
3673 return x;
3674 }
3675
3676 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
3677 walk_tree. DATA contains the actual fake register number. */
3678
3679 static tree
3680 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
3681 {
3682 tree obj = NULL_TREE;
3683 rtx x = NULL_RTX;
3684 int *regno = (int *) data;
3685
3686 switch (TREE_CODE (*expr_p))
3687 {
3688 case ADDR_EXPR:
3689 for (expr_p = &TREE_OPERAND (*expr_p, 0);
3690 handled_component_p (*expr_p);
3691 expr_p = &TREE_OPERAND (*expr_p, 0))
3692 continue;
3693 obj = *expr_p;
3694 if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
3695 x = produce_memory_decl_rtl (obj, regno);
3696 break;
3697
3698 case SSA_NAME:
3699 *ws = 0;
3700 obj = SSA_NAME_VAR (*expr_p);
3701 /* Defer handling of anonymous SSA_NAMEs to the expander. */
3702 if (!obj)
3703 return NULL_TREE;
3704 if (!DECL_RTL_SET_P (obj))
3705 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3706 break;
3707
3708 case VAR_DECL:
3709 case PARM_DECL:
3710 case RESULT_DECL:
3711 *ws = 0;
3712 obj = *expr_p;
3713
3714 if (DECL_RTL_SET_P (obj))
3715 break;
3716
3717 if (DECL_MODE (obj) == BLKmode)
3718 x = produce_memory_decl_rtl (obj, regno);
3719 else
3720 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3721
3722 break;
3723
3724 default:
3725 break;
3726 }
3727
3728 if (x)
3729 {
3730 decl_rtl_to_reset.safe_push (obj);
3731 SET_DECL_RTL (obj, x);
3732 }
3733
3734 return NULL_TREE;
3735 }
3736
3737 /* Predict whether the given loop will be transformed in the RTL
3738 doloop_optimize pass. Attempt to duplicate some doloop_optimize checks.
3739 This is only for target independent checks, see targetm.predict_doloop_p
3740 for the target dependent ones.
3741
3742 Note that according to some initial investigation, some checks like costly
3743 niter check and invalid stmt scanning don't have much gains among general
3744 cases, so keep this as simple as possible first.
3745
3746 Some RTL specific checks seems unable to be checked in gimple, if any new
3747 checks or easy checks _are_ missing here, please add them. */
3748
3749 static bool ATTRIBUTE_UNUSED
3750 generic_predict_doloop_p (struct ivopts_data *data)
3751 {
3752 struct loop *loop = data->current_loop;
3753
3754 /* Call target hook for target dependent checks. */
3755 if (!targetm.predict_doloop_p (loop))
3756 {
3757 if (dump_file && (dump_flags & TDF_DETAILS))
3758 fprintf (dump_file, "Predict doloop failure due to"
3759 " target specific checks.\n");
3760 return false;
3761 }
3762
3763 /* Similar to doloop_optimize, check iteration description to know it's
3764 suitable or not. Keep it as simple as possible, feel free to extend it
3765 if you find any multiple exits cases matter. */
3766 edge exit = single_dom_exit (loop);
3767 struct tree_niter_desc *niter_desc;
3768 if (!exit || !(niter_desc = niter_for_exit (data, exit)))
3769 {
3770 if (dump_file && (dump_flags & TDF_DETAILS))
3771 fprintf (dump_file, "Predict doloop failure due to"
3772 " unexpected niters.\n");
3773 return false;
3774 }
3775
3776 /* Similar to doloop_optimize, check whether iteration count too small
3777 and not profitable. */
3778 HOST_WIDE_INT est_niter = get_estimated_loop_iterations_int (loop);
3779 if (est_niter == -1)
3780 est_niter = get_likely_max_loop_iterations_int (loop);
3781 if (est_niter >= 0 && est_niter < 3)
3782 {
3783 if (dump_file && (dump_flags & TDF_DETAILS))
3784 fprintf (dump_file,
3785 "Predict doloop failure due to"
3786 " too few iterations (%u).\n",
3787 (unsigned int) est_niter);
3788 return false;
3789 }
3790
3791 return true;
3792 }
3793
3794 /* Determines cost of the computation of EXPR. */
3795
3796 static unsigned
3797 computation_cost (tree expr, bool speed)
3798 {
3799 rtx_insn *seq;
3800 rtx rslt;
3801 tree type = TREE_TYPE (expr);
3802 unsigned cost;
3803 /* Avoid using hard regs in ways which may be unsupported. */
3804 int regno = LAST_VIRTUAL_REGISTER + 1;
3805 struct cgraph_node *node = cgraph_node::get (current_function_decl);
3806 enum node_frequency real_frequency = node->frequency;
3807
3808 node->frequency = NODE_FREQUENCY_NORMAL;
3809 crtl->maybe_hot_insn_p = speed;
3810 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
3811 start_sequence ();
3812 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
3813 seq = get_insns ();
3814 end_sequence ();
3815 default_rtl_profile ();
3816 node->frequency = real_frequency;
3817
3818 cost = seq_cost (seq, speed);
3819 if (MEM_P (rslt))
3820 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
3821 TYPE_ADDR_SPACE (type), speed);
3822 else if (!REG_P (rslt))
3823 cost += set_src_cost (rslt, TYPE_MODE (type), speed);
3824
3825 return cost;
3826 }
3827
3828 /* Returns variable containing the value of candidate CAND at statement AT. */
3829
3830 static tree
3831 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple *stmt)
3832 {
3833 if (stmt_after_increment (loop, cand, stmt))
3834 return cand->var_after;
3835 else
3836 return cand->var_before;
3837 }
3838
3839 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
3840 same precision that is at least as wide as the precision of TYPE, stores
3841 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
3842 type of A and B. */
3843
3844 static tree
3845 determine_common_wider_type (tree *a, tree *b)
3846 {
3847 tree wider_type = NULL;
3848 tree suba, subb;
3849 tree atype = TREE_TYPE (*a);
3850
3851 if (CONVERT_EXPR_P (*a))
3852 {
3853 suba = TREE_OPERAND (*a, 0);
3854 wider_type = TREE_TYPE (suba);
3855 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
3856 return atype;
3857 }
3858 else
3859 return atype;
3860
3861 if (CONVERT_EXPR_P (*b))
3862 {
3863 subb = TREE_OPERAND (*b, 0);
3864 if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
3865 return atype;
3866 }
3867 else
3868 return atype;
3869
3870 *a = suba;
3871 *b = subb;
3872 return wider_type;
3873 }
3874
3875 /* Determines the expression by that USE is expressed from induction variable
3876 CAND at statement AT in LOOP. The expression is stored in two parts in a
3877 decomposed form. The invariant part is stored in AFF_INV; while variant
3878 part in AFF_VAR. Store ratio of CAND.step over USE.step in PRAT if it's
3879 non-null. Returns false if USE cannot be expressed using CAND. */
3880
3881 static bool
3882 get_computation_aff_1 (struct loop *loop, gimple *at, struct iv_use *use,
3883 struct iv_cand *cand, struct aff_tree *aff_inv,
3884 struct aff_tree *aff_var, widest_int *prat = NULL)
3885 {
3886 tree ubase = use->iv->base, ustep = use->iv->step;
3887 tree cbase = cand->iv->base, cstep = cand->iv->step;
3888 tree common_type, uutype, var, cstep_common;
3889 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3890 aff_tree aff_cbase;
3891 widest_int rat;
3892
3893 /* We must have a precision to express the values of use. */
3894 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3895 return false;
3896
3897 var = var_at_stmt (loop, cand, at);
3898 uutype = unsigned_type_for (utype);
3899
3900 /* If the conversion is not noop, perform it. */
3901 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3902 {
3903 if (cand->orig_iv != NULL && CONVERT_EXPR_P (cbase)
3904 && (CONVERT_EXPR_P (cstep) || poly_int_tree_p (cstep)))
3905 {
3906 tree inner_base, inner_step, inner_type;
3907 inner_base = TREE_OPERAND (cbase, 0);
3908 if (CONVERT_EXPR_P (cstep))
3909 inner_step = TREE_OPERAND (cstep, 0);
3910 else
3911 inner_step = cstep;
3912
3913 inner_type = TREE_TYPE (inner_base);
3914 /* If candidate is added from a biv whose type is smaller than
3915 ctype, we know both candidate and the biv won't overflow.
3916 In this case, it's safe to skip the convertion in candidate.
3917 As an example, (unsigned short)((unsigned long)A) equals to
3918 (unsigned short)A, if A has a type no larger than short. */
3919 if (TYPE_PRECISION (inner_type) <= TYPE_PRECISION (uutype))
3920 {
3921 cbase = inner_base;
3922 cstep = inner_step;
3923 }
3924 }
3925 cbase = fold_convert (uutype, cbase);
3926 cstep = fold_convert (uutype, cstep);
3927 var = fold_convert (uutype, var);
3928 }
3929
3930 /* Ratio is 1 when computing the value of biv cand by itself.
3931 We can't rely on constant_multiple_of in this case because the
3932 use is created after the original biv is selected. The call
3933 could fail because of inconsistent fold behavior. See PR68021
3934 for more information. */
3935 if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt)
3936 {
3937 gcc_assert (is_gimple_assign (use->stmt));
3938 gcc_assert (use->iv->ssa_name == cand->var_after);
3939 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
3940 rat = 1;
3941 }
3942 else if (!constant_multiple_of (ustep, cstep, &rat))
3943 return false;
3944
3945 if (prat)
3946 *prat = rat;
3947
3948 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3949 type, we achieve better folding by computing their difference in this
3950 wider type, and cast the result to UUTYPE. We do not need to worry about
3951 overflows, as all the arithmetics will in the end be performed in UUTYPE
3952 anyway. */
3953 common_type = determine_common_wider_type (&ubase, &cbase);
3954
3955 /* use = ubase - ratio * cbase + ratio * var. */
3956 tree_to_aff_combination (ubase, common_type, aff_inv);
3957 tree_to_aff_combination (cbase, common_type, &aff_cbase);
3958 tree_to_aff_combination (var, uutype, aff_var);
3959
3960 /* We need to shift the value if we are after the increment. */
3961 if (stmt_after_increment (loop, cand, at))
3962 {
3963 aff_tree cstep_aff;
3964
3965 if (common_type != uutype)
3966 cstep_common = fold_convert (common_type, cstep);
3967 else
3968 cstep_common = cstep;
3969
3970 tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
3971 aff_combination_add (&aff_cbase, &cstep_aff);
3972 }
3973
3974 aff_combination_scale (&aff_cbase, -rat);
3975 aff_combination_add (aff_inv, &aff_cbase);
3976 if (common_type != uutype)
3977 aff_combination_convert (aff_inv, uutype);
3978
3979 aff_combination_scale (aff_var, rat);
3980 return true;
3981 }
3982
3983 /* Determines the expression by that USE is expressed from induction variable
3984 CAND at statement AT in LOOP. The expression is stored in a decomposed
3985 form into AFF. Returns false if USE cannot be expressed using CAND. */
3986
3987 static bool
3988 get_computation_aff (struct loop *loop, gimple *at, struct iv_use *use,
3989 struct iv_cand *cand, struct aff_tree *aff)
3990 {
3991 aff_tree aff_var;
3992
3993 if (!get_computation_aff_1 (loop, at, use, cand, aff, &aff_var))
3994 return false;
3995
3996 aff_combination_add (aff, &aff_var);
3997 return true;
3998 }
3999
4000 /* Return the type of USE. */
4001
4002 static tree
4003 get_use_type (struct iv_use *use)
4004 {
4005 tree base_type = TREE_TYPE (use->iv->base);
4006 tree type;
4007
4008 if (use->type == USE_REF_ADDRESS)
4009 {
4010 /* The base_type may be a void pointer. Create a pointer type based on
4011 the mem_ref instead. */
4012 type = build_pointer_type (TREE_TYPE (*use->op_p));
4013 gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
4014 == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
4015 }
4016 else
4017 type = base_type;
4018
4019 return type;
4020 }
4021
4022 /* Determines the expression by that USE is expressed from induction variable
4023 CAND at statement AT in LOOP. The computation is unshared. */
4024
4025 static tree
4026 get_computation_at (struct loop *loop, gimple *at,
4027 struct iv_use *use, struct iv_cand *cand)
4028 {
4029 aff_tree aff;
4030 tree type = get_use_type (use);
4031
4032 if (!get_computation_aff (loop, at, use, cand, &aff))
4033 return NULL_TREE;
4034 unshare_aff_combination (&aff);
4035 return fold_convert (type, aff_combination_to_tree (&aff));
4036 }
4037
4038 /* Adjust the cost COST for being in loop setup rather than loop body.
4039 If we're optimizing for space, the loop setup overhead is constant;
4040 if we're optimizing for speed, amortize it over the per-iteration cost.
4041 If ROUND_UP_P is true, the result is round up rather than to zero when
4042 optimizing for speed. */
4043 static int64_t
4044 adjust_setup_cost (struct ivopts_data *data, int64_t cost,
4045 bool round_up_p = false)
4046 {
4047 if (cost == INFTY)
4048 return cost;
4049 else if (optimize_loop_for_speed_p (data->current_loop))
4050 {
4051 int64_t niters = (int64_t) avg_loop_niter (data->current_loop);
4052 return (cost + (round_up_p ? niters - 1 : 0)) / niters;
4053 }
4054 else
4055 return cost;
4056 }
4057
4058 /* Calculate the SPEED or size cost of shiftadd EXPR in MODE. MULT is the
4059 EXPR operand holding the shift. COST0 and COST1 are the costs for
4060 calculating the operands of EXPR. Returns true if successful, and returns
4061 the cost in COST. */
4062
4063 static bool
4064 get_shiftadd_cost (tree expr, scalar_int_mode mode, comp_cost cost0,
4065 comp_cost cost1, tree mult, bool speed, comp_cost *cost)
4066 {
4067 comp_cost res;
4068 tree op1 = TREE_OPERAND (expr, 1);
4069 tree cst = TREE_OPERAND (mult, 1);
4070 tree multop = TREE_OPERAND (mult, 0);
4071 int m = exact_log2 (int_cst_value (cst));
4072 int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
4073 int as_cost, sa_cost;
4074 bool mult_in_op1;
4075
4076 if (!(m >= 0 && m < maxm))
4077 return false;
4078
4079 STRIP_NOPS (op1);
4080 mult_in_op1 = operand_equal_p (op1, mult, 0);
4081
4082 as_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
4083
4084 /* If the target has a cheap shift-and-add or shift-and-sub instruction,
4085 use that in preference to a shift insn followed by an add insn. */
4086 sa_cost = (TREE_CODE (expr) != MINUS_EXPR
4087 ? shiftadd_cost (speed, mode, m)
4088 : (mult_in_op1
4089 ? shiftsub1_cost (speed, mode, m)
4090 : shiftsub0_cost (speed, mode, m)));
4091
4092 res = comp_cost (MIN (as_cost, sa_cost), 0);
4093 res += (mult_in_op1 ? cost0 : cost1);
4094
4095 STRIP_NOPS (multop);
4096 if (!is_gimple_val (multop))
4097 res += force_expr_to_var_cost (multop, speed);
4098
4099 *cost = res;
4100 return true;
4101 }
4102
4103 /* Estimates cost of forcing expression EXPR into a variable. */
4104
4105 static comp_cost
4106 force_expr_to_var_cost (tree expr, bool speed)
4107 {
4108 static bool costs_initialized = false;
4109 static unsigned integer_cost [2];
4110 static unsigned symbol_cost [2];
4111 static unsigned address_cost [2];
4112 tree op0, op1;
4113 comp_cost cost0, cost1, cost;
4114 machine_mode mode;
4115 scalar_int_mode int_mode;
4116
4117 if (!costs_initialized)
4118 {
4119 tree type = build_pointer_type (integer_type_node);
4120 tree var, addr;
4121 rtx x;
4122 int i;
4123
4124 var = create_tmp_var_raw (integer_type_node, "test_var");
4125 TREE_STATIC (var) = 1;
4126 x = produce_memory_decl_rtl (var, NULL);
4127 SET_DECL_RTL (var, x);
4128
4129 addr = build1 (ADDR_EXPR, type, var);
4130
4131
4132 for (i = 0; i < 2; i++)
4133 {
4134 integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
4135 2000), i);
4136
4137 symbol_cost[i] = computation_cost (addr, i) + 1;
4138
4139 address_cost[i]
4140 = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
4141 if (dump_file && (dump_flags & TDF_DETAILS))
4142 {
4143 fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
4144 fprintf (dump_file, " integer %d\n", (int) integer_cost[i]);
4145 fprintf (dump_file, " symbol %d\n", (int) symbol_cost[i]);
4146 fprintf (dump_file, " address %d\n", (int) address_cost[i]);
4147 fprintf (dump_file, " other %d\n", (int) target_spill_cost[i]);
4148 fprintf (dump_file, "\n");
4149 }
4150 }
4151
4152 costs_initialized = true;
4153 }
4154
4155 STRIP_NOPS (expr);
4156
4157 if (SSA_VAR_P (expr))
4158 return no_cost;
4159
4160 if (is_gimple_min_invariant (expr))
4161 {
4162 if (poly_int_tree_p (expr))
4163 return comp_cost (integer_cost [speed], 0);
4164
4165 if (TREE_CODE (expr) == ADDR_EXPR)
4166 {
4167 tree obj = TREE_OPERAND (expr, 0);
4168
4169 if (VAR_P (obj)
4170 || TREE_CODE (obj) == PARM_DECL
4171 || TREE_CODE (obj) == RESULT_DECL)
4172 return comp_cost (symbol_cost [speed], 0);
4173 }
4174
4175 return comp_cost (address_cost [speed], 0);
4176 }
4177
4178 switch (TREE_CODE (expr))
4179 {
4180 case POINTER_PLUS_EXPR:
4181 case PLUS_EXPR:
4182 case MINUS_EXPR:
4183 case MULT_EXPR:
4184 case TRUNC_DIV_EXPR:
4185 case BIT_AND_EXPR:
4186 case BIT_IOR_EXPR:
4187 case LSHIFT_EXPR:
4188 case RSHIFT_EXPR:
4189 op0 = TREE_OPERAND (expr, 0);
4190 op1 = TREE_OPERAND (expr, 1);
4191 STRIP_NOPS (op0);
4192 STRIP_NOPS (op1);
4193 break;
4194
4195 CASE_CONVERT:
4196 case NEGATE_EXPR:
4197 case BIT_NOT_EXPR:
4198 op0 = TREE_OPERAND (expr, 0);
4199 STRIP_NOPS (op0);
4200 op1 = NULL_TREE;
4201 break;
4202
4203 default:
4204 /* Just an arbitrary value, FIXME. */
4205 return comp_cost (target_spill_cost[speed], 0);
4206 }
4207
4208 if (op0 == NULL_TREE
4209 || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
4210 cost0 = no_cost;
4211 else
4212 cost0 = force_expr_to_var_cost (op0, speed);
4213
4214 if (op1 == NULL_TREE
4215 || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
4216 cost1 = no_cost;
4217 else
4218 cost1 = force_expr_to_var_cost (op1, speed);
4219
4220 mode = TYPE_MODE (TREE_TYPE (expr));
4221 switch (TREE_CODE (expr))
4222 {
4223 case POINTER_PLUS_EXPR:
4224 case PLUS_EXPR:
4225 case MINUS_EXPR:
4226 case NEGATE_EXPR:
4227 cost = comp_cost (add_cost (speed, mode), 0);
4228 if (TREE_CODE (expr) != NEGATE_EXPR)
4229 {
4230 tree mult = NULL_TREE;
4231 comp_cost sa_cost;
4232 if (TREE_CODE (op1) == MULT_EXPR)
4233 mult = op1;
4234 else if (TREE_CODE (op0) == MULT_EXPR)
4235 mult = op0;
4236
4237 if (mult != NULL_TREE
4238 && is_a <scalar_int_mode> (mode, &int_mode)
4239 && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
4240 && get_shiftadd_cost (expr, int_mode, cost0, cost1, mult,
4241 speed, &sa_cost))
4242 return sa_cost;
4243 }
4244 break;
4245
4246 CASE_CONVERT:
4247 {
4248 tree inner_mode, outer_mode;
4249 outer_mode = TREE_TYPE (expr);
4250 inner_mode = TREE_TYPE (op0);
4251 cost = comp_cost (convert_cost (TYPE_MODE (outer_mode),
4252 TYPE_MODE (inner_mode), speed), 0);
4253 }
4254 break;
4255
4256 case MULT_EXPR:
4257 if (cst_and_fits_in_hwi (op0))
4258 cost = comp_cost (mult_by_coeff_cost (int_cst_value (op0),
4259 mode, speed), 0);
4260 else if (cst_and_fits_in_hwi (op1))
4261 cost = comp_cost (mult_by_coeff_cost (int_cst_value (op1),
4262 mode, speed), 0);
4263 else
4264 return comp_cost (target_spill_cost [speed], 0);
4265 break;
4266
4267 case TRUNC_DIV_EXPR:
4268 /* Division by power of two is usually cheap, so we allow it. Forbid
4269 anything else. */
4270 if (integer_pow2p (TREE_OPERAND (expr, 1)))
4271 cost = comp_cost (add_cost (speed, mode), 0);
4272 else
4273 cost = comp_cost (target_spill_cost[speed], 0);
4274 break;
4275
4276 case BIT_AND_EXPR:
4277 case BIT_IOR_EXPR:
4278 case BIT_NOT_EXPR:
4279 case LSHIFT_EXPR:
4280 case RSHIFT_EXPR:
4281 cost = comp_cost (add_cost (speed, mode), 0);
4282 break;
4283
4284 default:
4285 gcc_unreachable ();
4286 }
4287
4288 cost += cost0;
4289 cost += cost1;
4290 return cost;
4291 }
4292
4293 /* Estimates cost of forcing EXPR into a variable. INV_VARS is a set of the
4294 invariants the computation depends on. */
4295
4296 static comp_cost
4297 force_var_cost (struct ivopts_data *data, tree expr, bitmap *inv_vars)
4298 {
4299 if (!expr)
4300 return no_cost;
4301
4302 find_inv_vars (data, &expr, inv_vars);
4303 return force_expr_to_var_cost (expr, data->speed);
4304 }
4305
4306 /* Returns cost of auto-modifying address expression in shape base + offset.
4307 AINC_STEP is step size of the address IV. AINC_OFFSET is offset of the
4308 address expression. The address expression has ADDR_MODE in addr space
4309 AS. The memory access has MEM_MODE. SPEED means we are optimizing for
4310 speed or size. */
4311
4312 enum ainc_type
4313 {
4314 AINC_PRE_INC, /* Pre increment. */
4315 AINC_PRE_DEC, /* Pre decrement. */
4316 AINC_POST_INC, /* Post increment. */
4317 AINC_POST_DEC, /* Post decrement. */
4318 AINC_NONE /* Also the number of auto increment types. */
4319 };
4320
4321 struct ainc_cost_data
4322 {
4323 int64_t costs[AINC_NONE];
4324 };
4325
4326 static comp_cost
4327 get_address_cost_ainc (poly_int64 ainc_step, poly_int64 ainc_offset,
4328 machine_mode addr_mode, machine_mode mem_mode,
4329 addr_space_t as, bool speed)
4330 {
4331 if (!USE_LOAD_PRE_DECREMENT (mem_mode)
4332 && !USE_STORE_PRE_DECREMENT (mem_mode)
4333 && !USE_LOAD_POST_DECREMENT (mem_mode)
4334 && !USE_STORE_POST_DECREMENT (mem_mode)
4335 && !USE_LOAD_PRE_INCREMENT (mem_mode)
4336 && !USE_STORE_PRE_INCREMENT (mem_mode)
4337 && !USE_LOAD_POST_INCREMENT (mem_mode)
4338 && !USE_STORE_POST_INCREMENT (mem_mode))
4339 return infinite_cost;
4340
4341 static vec<ainc_cost_data *> ainc_cost_data_list;
4342 unsigned idx = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
4343 if (idx >= ainc_cost_data_list.length ())
4344 {
4345 unsigned nsize = ((unsigned) as + 1) *MAX_MACHINE_MODE;
4346
4347 gcc_assert (nsize > idx);
4348 ainc_cost_data_list.safe_grow_cleared (nsize);
4349 }
4350
4351 ainc_cost_data *data = ainc_cost_data_list[idx];
4352 if (data == NULL)
4353 {
4354 rtx reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
4355
4356 data = (ainc_cost_data *) xcalloc (1, sizeof (*data));
4357 data->costs[AINC_PRE_DEC] = INFTY;
4358 data->costs[AINC_POST_DEC] = INFTY;
4359 data->costs[AINC_PRE_INC] = INFTY;
4360 data->costs[AINC_POST_INC] = INFTY;
4361 if (USE_LOAD_PRE_DECREMENT (mem_mode)
4362 || USE_STORE_PRE_DECREMENT (mem_mode))
4363 {
4364 rtx addr = gen_rtx_PRE_DEC (addr_mode, reg);
4365
4366 if (memory_address_addr_space_p (mem_mode, addr, as))
4367 data->costs[AINC_PRE_DEC]
4368 = address_cost (addr, mem_mode, as, speed);
4369 }
4370 if (USE_LOAD_POST_DECREMENT (mem_mode)
4371 || USE_STORE_POST_DECREMENT (mem_mode))
4372 {
4373 rtx addr = gen_rtx_POST_DEC (addr_mode, reg);
4374
4375 if (memory_address_addr_space_p (mem_mode, addr, as))
4376 data->costs[AINC_POST_DEC]
4377 = address_cost (addr, mem_mode, as, speed);
4378 }
4379 if (USE_LOAD_PRE_INCREMENT (mem_mode)
4380 || USE_STORE_PRE_INCREMENT (mem_mode))
4381 {
4382 rtx addr = gen_rtx_PRE_INC (addr_mode, reg);
4383
4384 if (memory_address_addr_space_p (mem_mode, addr, as))
4385 data->costs[AINC_PRE_INC]
4386 = address_cost (addr, mem_mode, as, speed);
4387 }
4388 if (USE_LOAD_POST_INCREMENT (mem_mode)
4389 || USE_STORE_POST_INCREMENT (mem_mode))
4390 {
4391 rtx addr = gen_rtx_POST_INC (addr_mode, reg);
4392
4393 if (memory_address_addr_space_p (mem_mode, addr, as))
4394 data->costs[AINC_POST_INC]
4395 = address_cost (addr, mem_mode, as, speed);
4396 }
4397 ainc_cost_data_list[idx] = data;
4398 }
4399
4400 poly_int64 msize = GET_MODE_SIZE (mem_mode);
4401 if (known_eq (ainc_offset, 0) && known_eq (msize, ainc_step))
4402 return comp_cost (data->costs[AINC_POST_INC], 0);
4403 if (known_eq (ainc_offset, 0) && known_eq (msize, -ainc_step))
4404 return comp_cost (data->costs[AINC_POST_DEC], 0);
4405 if (known_eq (ainc_offset, msize) && known_eq (msize, ainc_step))
4406 return comp_cost (data->costs[AINC_PRE_INC], 0);
4407 if (known_eq (ainc_offset, -msize) && known_eq (msize, -ainc_step))
4408 return comp_cost (data->costs[AINC_PRE_DEC], 0);
4409
4410 return infinite_cost;
4411 }
4412
4413 /* Return cost of computing USE's address expression by using CAND.
4414 AFF_INV and AFF_VAR represent invariant and variant parts of the
4415 address expression, respectively. If AFF_INV is simple, store
4416 the loop invariant variables which are depended by it in INV_VARS;
4417 if AFF_INV is complicated, handle it as a new invariant expression
4418 and record it in INV_EXPR. RATIO indicates multiple times between
4419 steps of USE and CAND. If CAN_AUTOINC is nonNULL, store boolean
4420 value to it indicating if this is an auto-increment address. */
4421
4422 static comp_cost
4423 get_address_cost (struct ivopts_data *data, struct iv_use *use,
4424 struct iv_cand *cand, aff_tree *aff_inv,
4425 aff_tree *aff_var, HOST_WIDE_INT ratio,
4426 bitmap *inv_vars, iv_inv_expr_ent **inv_expr,
4427 bool *can_autoinc, bool speed)
4428 {
4429 rtx addr;
4430 bool simple_inv = true;
4431 tree comp_inv = NULL_TREE, type = aff_var->type;
4432 comp_cost var_cost = no_cost, cost = no_cost;
4433 struct mem_address parts = {NULL_TREE, integer_one_node,
4434 NULL_TREE, NULL_TREE, NULL_TREE};
4435 machine_mode addr_mode = TYPE_MODE (type);
4436 machine_mode mem_mode = TYPE_MODE (use->mem_type);
4437 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
4438 /* Only true if ratio != 1. */
4439 bool ok_with_ratio_p = false;
4440 bool ok_without_ratio_p = false;
4441
4442 if (!aff_combination_const_p (aff_inv))
4443 {
4444 parts.index = integer_one_node;
4445 /* Addressing mode "base + index". */
4446 ok_without_ratio_p = valid_mem_ref_p (mem_mode, as, &parts);
4447 if (ratio != 1)
4448 {
4449 parts.step = wide_int_to_tree (type, ratio);
4450 /* Addressing mode "base + index << scale". */
4451 ok_with_ratio_p = valid_mem_ref_p (mem_mode, as, &parts);
4452 if (!ok_with_ratio_p)
4453 parts.step = NULL_TREE;
4454 }
4455 if (ok_with_ratio_p || ok_without_ratio_p)
4456 {
4457 if (maybe_ne (aff_inv->offset, 0))
4458 {
4459 parts.offset = wide_int_to_tree (sizetype, aff_inv->offset);
4460 /* Addressing mode "base + index [<< scale] + offset". */
4461 if (!valid_mem_ref_p (mem_mode, as, &parts))
4462 parts.offset = NULL_TREE;
4463 else
4464 aff_inv->offset = 0;
4465 }
4466
4467 move_fixed_address_to_symbol (&parts, aff_inv);
4468 /* Base is fixed address and is moved to symbol part. */
4469 if (parts.symbol != NULL_TREE && aff_combination_zero_p (aff_inv))
4470 parts.base = NULL_TREE;
4471
4472 /* Addressing mode "symbol + base + index [<< scale] [+ offset]". */
4473 if (parts.symbol != NULL_TREE
4474 && !valid_mem_ref_p (mem_mode, as, &parts))
4475 {
4476 aff_combination_add_elt (aff_inv, parts.symbol, 1);
4477 parts.symbol = NULL_TREE;
4478 /* Reset SIMPLE_INV since symbol address needs to be computed
4479 outside of address expression in this case. */
4480 simple_inv = false;
4481 /* Symbol part is moved back to base part, it can't be NULL. */
4482 parts.base = integer_one_node;
4483 }
4484 }
4485 else
4486 parts.index = NULL_TREE;
4487 }
4488 else
4489 {
4490 poly_int64 ainc_step;
4491 if (can_autoinc
4492 && ratio == 1
4493 && ptrdiff_tree_p (cand->iv->step, &ainc_step))
4494 {
4495 poly_int64 ainc_offset = (aff_inv->offset).force_shwi ();
4496
4497 if (stmt_after_increment (data->current_loop, cand, use->stmt))
4498 ainc_offset += ainc_step;
4499 cost = get_address_cost_ainc (ainc_step, ainc_offset,
4500 addr_mode, mem_mode, as, speed);
4501 if (!cost.infinite_cost_p ())
4502 {
4503 *can_autoinc = true;
4504 return cost;
4505 }
4506 cost = no_cost;
4507 }
4508 if (!aff_combination_zero_p (aff_inv))
4509 {
4510 parts.offset = wide_int_to_tree (sizetype, aff_inv->offset);
4511 /* Addressing mode "base + offset". */
4512 if (!valid_mem_ref_p (mem_mode, as, &parts))
4513 parts.offset = NULL_TREE;
4514 else
4515 aff_inv->offset = 0;
4516 }
4517 }
4518
4519 if (simple_inv)
4520 simple_inv = (aff_inv == NULL
4521 || aff_combination_const_p (aff_inv)
4522 || aff_combination_singleton_var_p (aff_inv));
4523 if (!aff_combination_zero_p (aff_inv))
4524 comp_inv = aff_combination_to_tree (aff_inv);
4525 if (comp_inv != NULL_TREE)
4526 cost = force_var_cost (data, comp_inv, inv_vars);
4527 if (ratio != 1 && parts.step == NULL_TREE)
4528 var_cost += mult_by_coeff_cost (ratio, addr_mode, speed);
4529 if (comp_inv != NULL_TREE && parts.index == NULL_TREE)
4530 var_cost += add_cost (speed, addr_mode);
4531
4532 if (comp_inv && inv_expr && !simple_inv)
4533 {
4534 *inv_expr = get_loop_invariant_expr (data, comp_inv);
4535 /* Clear depends on. */
4536 if (*inv_expr != NULL && inv_vars && *inv_vars)
4537 bitmap_clear (*inv_vars);
4538
4539 /* Cost of small invariant expression adjusted against loop niters
4540 is usually zero, which makes it difficult to be differentiated
4541 from candidate based on loop invariant variables. Secondly, the
4542 generated invariant expression may not be hoisted out of loop by
4543 following pass. We penalize the cost by rounding up in order to
4544 neutralize such effects. */
4545 cost.cost = adjust_setup_cost (data, cost.cost, true);
4546 cost.scratch = cost.cost;
4547 }
4548
4549 cost += var_cost;
4550 addr = addr_for_mem_ref (&parts, as, false);
4551 gcc_assert (memory_address_addr_space_p (mem_mode, addr, as));
4552 cost += address_cost (addr, mem_mode, as, speed);
4553
4554 if (parts.symbol != NULL_TREE)
4555 cost.complexity += 1;
4556 /* Don't increase the complexity of adding a scaled index if it's
4557 the only kind of index that the target allows. */
4558 if (parts.step != NULL_TREE && ok_without_ratio_p)
4559 cost.complexity += 1;
4560 if (parts.base != NULL_TREE && parts.index != NULL_TREE)
4561 cost.complexity += 1;
4562 if (parts.offset != NULL_TREE && !integer_zerop (parts.offset))
4563 cost.complexity += 1;
4564
4565 return cost;
4566 }
4567
4568 /* Scale (multiply) the computed COST (except scratch part that should be
4569 hoisted out a loop) by header->frequency / AT->frequency, which makes
4570 expected cost more accurate. */
4571
4572 static comp_cost
4573 get_scaled_computation_cost_at (ivopts_data *data, gimple *at, comp_cost cost)
4574 {
4575 if (data->speed
4576 && data->current_loop->header->count.to_frequency (cfun) > 0)
4577 {
4578 basic_block bb = gimple_bb (at);
4579 gcc_assert (cost.scratch <= cost.cost);
4580 int scale_factor = (int)(intptr_t) bb->aux;
4581 if (scale_factor == 1)
4582 return cost;
4583
4584 int64_t scaled_cost
4585 = cost.scratch + (cost.cost - cost.scratch) * scale_factor;
4586
4587 if (dump_file && (dump_flags & TDF_DETAILS))
4588 fprintf (dump_file, "Scaling cost based on bb prob by %2.2f: "
4589 "%" PRId64 " (scratch: %" PRId64 ") -> %" PRId64 "\n",
4590 1.0f * scale_factor, cost.cost, cost.scratch, scaled_cost);
4591
4592 cost.cost = scaled_cost;
4593 }
4594
4595 return cost;
4596 }
4597
4598 /* Determines the cost of the computation by that USE is expressed
4599 from induction variable CAND. If ADDRESS_P is true, we just need
4600 to create an address from it, otherwise we want to get it into
4601 register. A set of invariants we depend on is stored in INV_VARS.
4602 If CAN_AUTOINC is nonnull, use it to record whether autoinc
4603 addressing is likely. If INV_EXPR is nonnull, record invariant
4604 expr entry in it. */
4605
4606 static comp_cost
4607 get_computation_cost (struct ivopts_data *data, struct iv_use *use,
4608 struct iv_cand *cand, bool address_p, bitmap *inv_vars,
4609 bool *can_autoinc, iv_inv_expr_ent **inv_expr)
4610 {
4611 gimple *at = use->stmt;
4612 tree ubase = use->iv->base, cbase = cand->iv->base;
4613 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
4614 tree comp_inv = NULL_TREE;
4615 HOST_WIDE_INT ratio, aratio;
4616 comp_cost cost;
4617 widest_int rat;
4618 aff_tree aff_inv, aff_var;
4619 bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4620
4621 if (inv_vars)
4622 *inv_vars = NULL;
4623 if (can_autoinc)
4624 *can_autoinc = false;
4625 if (inv_expr)
4626 *inv_expr = NULL;
4627
4628 /* Check if we have enough precision to express the values of use. */
4629 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4630 return infinite_cost;
4631
4632 if (address_p
4633 || (use->iv->base_object
4634 && cand->iv->base_object
4635 && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4636 && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4637 {
4638 /* Do not try to express address of an object with computation based
4639 on address of a different object. This may cause problems in rtl
4640 level alias analysis (that does not expect this to be happening,
4641 as this is illegal in C), and would be unlikely to be useful
4642 anyway. */
4643 if (use->iv->base_object
4644 && cand->iv->base_object
4645 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4646 return infinite_cost;
4647 }
4648
4649 if (!get_computation_aff_1 (data->current_loop, at, use,
4650 cand, &aff_inv, &aff_var, &rat)
4651 || !wi::fits_shwi_p (rat))
4652 return infinite_cost;
4653
4654 ratio = rat.to_shwi ();
4655 if (address_p)
4656 {
4657 cost = get_address_cost (data, use, cand, &aff_inv, &aff_var, ratio,
4658 inv_vars, inv_expr, can_autoinc, speed);
4659 return get_scaled_computation_cost_at (data, at, cost);
4660 }
4661
4662 bool simple_inv = (aff_combination_const_p (&aff_inv)
4663 || aff_combination_singleton_var_p (&aff_inv));
4664 tree signed_type = signed_type_for (aff_combination_type (&aff_inv));
4665 aff_combination_convert (&aff_inv, signed_type);
4666 if (!aff_combination_zero_p (&aff_inv))
4667 comp_inv = aff_combination_to_tree (&aff_inv);
4668
4669 cost = force_var_cost (data, comp_inv, inv_vars);
4670 if (comp_inv && inv_expr && !simple_inv)
4671 {
4672 *inv_expr = get_loop_invariant_expr (data, comp_inv);
4673 /* Clear depends on. */
4674 if (*inv_expr != NULL && inv_vars && *inv_vars)
4675 bitmap_clear (*inv_vars);
4676
4677 cost.cost = adjust_setup_cost (data, cost.cost);
4678 /* Record setup cost in scratch field. */
4679 cost.scratch = cost.cost;
4680 }
4681 /* Cost of constant integer can be covered when adding invariant part to
4682 variant part. */
4683 else if (comp_inv && CONSTANT_CLASS_P (comp_inv))
4684 cost = no_cost;
4685
4686 /* Need type narrowing to represent use with cand. */
4687 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4688 {
4689 machine_mode outer_mode = TYPE_MODE (utype);
4690 machine_mode inner_mode = TYPE_MODE (ctype);
4691 cost += comp_cost (convert_cost (outer_mode, inner_mode, speed), 0);
4692 }
4693
4694 /* Turn a + i * (-c) into a - i * c. */
4695 if (ratio < 0 && comp_inv && !integer_zerop (comp_inv))
4696 aratio = -ratio;
4697 else
4698 aratio = ratio;
4699
4700 if (ratio != 1)
4701 cost += mult_by_coeff_cost (aratio, TYPE_MODE (utype), speed);
4702
4703 /* TODO: We may also need to check if we can compute a + i * 4 in one
4704 instruction. */
4705 /* Need to add up the invariant and variant parts. */
4706 if (comp_inv && !integer_zerop (comp_inv))
4707 cost += add_cost (speed, TYPE_MODE (utype));
4708
4709 return get_scaled_computation_cost_at (data, at, cost);
4710 }
4711
4712 /* Determines cost of computing the use in GROUP with CAND in a generic
4713 expression. */
4714
4715 static bool
4716 determine_group_iv_cost_generic (struct ivopts_data *data,
4717 struct iv_group *group, struct iv_cand *cand)
4718 {
4719 comp_cost cost;
4720 iv_inv_expr_ent *inv_expr = NULL;
4721 bitmap inv_vars = NULL, inv_exprs = NULL;
4722 struct iv_use *use = group->vuses[0];
4723
4724 /* The simple case first -- if we need to express value of the preserved
4725 original biv, the cost is 0. This also prevents us from counting the
4726 cost of increment twice -- once at this use and once in the cost of
4727 the candidate. */
4728 if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt)
4729 cost = no_cost;
4730 else
4731 cost = get_computation_cost (data, use, cand, false,
4732 &inv_vars, NULL, &inv_expr);
4733
4734 if (inv_expr)
4735 {
4736 inv_exprs = BITMAP_ALLOC (NULL);
4737 bitmap_set_bit (inv_exprs, inv_expr->id);
4738 }
4739 set_group_iv_cost (data, group, cand, cost, inv_vars,
4740 NULL_TREE, ERROR_MARK, inv_exprs);
4741 return !cost.infinite_cost_p ();
4742 }
4743
4744 /* Determines cost of computing uses in GROUP with CAND in addresses. */
4745
4746 static bool
4747 determine_group_iv_cost_address (struct ivopts_data *data,
4748 struct iv_group *group, struct iv_cand *cand)
4749 {
4750 unsigned i;
4751 bitmap inv_vars = NULL, inv_exprs = NULL;
4752 bool can_autoinc;
4753 iv_inv_expr_ent *inv_expr = NULL;
4754 struct iv_use *use = group->vuses[0];
4755 comp_cost sum_cost = no_cost, cost;
4756
4757 cost = get_computation_cost (data, use, cand, true,
4758 &inv_vars, &can_autoinc, &inv_expr);
4759
4760 if (inv_expr)
4761 {
4762 inv_exprs = BITMAP_ALLOC (NULL);
4763 bitmap_set_bit (inv_exprs, inv_expr->id);
4764 }
4765 sum_cost = cost;
4766 if (!sum_cost.infinite_cost_p () && cand->ainc_use == use)
4767 {
4768 if (can_autoinc)
4769 sum_cost -= cand->cost_step;
4770 /* If we generated the candidate solely for exploiting autoincrement
4771 opportunities, and it turns out it can't be used, set the cost to
4772 infinity to make sure we ignore it. */
4773 else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
4774 sum_cost = infinite_cost;
4775 }
4776
4777 /* Uses in a group can share setup code, so only add setup cost once. */
4778 cost -= cost.scratch;
4779 /* Compute and add costs for rest uses of this group. */
4780 for (i = 1; i < group->vuses.length () && !sum_cost.infinite_cost_p (); i++)
4781 {
4782 struct iv_use *next = group->vuses[i];
4783
4784 /* TODO: We could skip computing cost for sub iv_use when it has the
4785 same cost as the first iv_use, but the cost really depends on the
4786 offset and where the iv_use is. */
4787 cost = get_computation_cost (data, next, cand, true,
4788 NULL, &can_autoinc, &inv_expr);
4789 if (inv_expr)
4790 {
4791 if (!inv_exprs)
4792 inv_exprs = BITMAP_ALLOC (NULL);
4793
4794 bitmap_set_bit (inv_exprs, inv_expr->id);
4795 }
4796 sum_cost += cost;
4797 }
4798 set_group_iv_cost (data, group, cand, sum_cost, inv_vars,
4799 NULL_TREE, ERROR_MARK, inv_exprs);
4800
4801 return !sum_cost.infinite_cost_p ();
4802 }
4803
4804 /* Computes value of candidate CAND at position AT in iteration NITER, and
4805 stores it to VAL. */
4806
4807 static void
4808 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple *at, tree niter,
4809 aff_tree *val)
4810 {
4811 aff_tree step, delta, nit;
4812 struct iv *iv = cand->iv;
4813 tree type = TREE_TYPE (iv->base);
4814 tree steptype;
4815 if (POINTER_TYPE_P (type))
4816 steptype = sizetype;
4817 else
4818 steptype = unsigned_type_for (type);
4819
4820 tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step);
4821 aff_combination_convert (&step, steptype);
4822 tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
4823 aff_combination_convert (&nit, steptype);
4824 aff_combination_mult (&nit, &step, &delta);
4825 if (stmt_after_increment (loop, cand, at))
4826 aff_combination_add (&delta, &step);
4827
4828 tree_to_aff_combination (iv->base, type, val);
4829 if (!POINTER_TYPE_P (type))
4830 aff_combination_convert (val, steptype);
4831 aff_combination_add (val, &delta);
4832 }
4833
4834 /* Returns period of induction variable iv. */
4835
4836 static tree
4837 iv_period (struct iv *iv)
4838 {
4839 tree step = iv->step, period, type;
4840 tree pow2div;
4841
4842 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4843
4844 type = unsigned_type_for (TREE_TYPE (step));
4845 /* Period of the iv is lcm (step, type_range)/step -1,
4846 i.e., N*type_range/step - 1. Since type range is power
4847 of two, N == (step >> num_of_ending_zeros_binary (step),
4848 so the final result is
4849
4850 (type_range >> num_of_ending_zeros_binary (step)) - 1
4851
4852 */
4853 pow2div = num_ending_zeros (step);
4854
4855 period = build_low_bits_mask (type,
4856 (TYPE_PRECISION (type)
4857 - tree_to_uhwi (pow2div)));
4858
4859 return period;
4860 }
4861
4862 /* Returns the comparison operator used when eliminating the iv USE. */
4863
4864 static enum tree_code
4865 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4866 {
4867 struct loop *loop = data->current_loop;
4868 basic_block ex_bb;
4869 edge exit;
4870
4871 ex_bb = gimple_bb (use->stmt);
4872 exit = EDGE_SUCC (ex_bb, 0);
4873 if (flow_bb_inside_loop_p (loop, exit->dest))
4874 exit = EDGE_SUCC (ex_bb, 1);
4875
4876 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4877 }
4878
4879 /* Returns true if we can prove that BASE - OFFSET does not overflow. For now,
4880 we only detect the situation that BASE = SOMETHING + OFFSET, where the
4881 calculation is performed in non-wrapping type.
4882
4883 TODO: More generally, we could test for the situation that
4884 BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
4885 This would require knowing the sign of OFFSET. */
4886
4887 static bool
4888 difference_cannot_overflow_p (struct ivopts_data *data, tree base, tree offset)
4889 {
4890 enum tree_code code;
4891 tree e1, e2;
4892 aff_tree aff_e1, aff_e2, aff_offset;
4893
4894 if (!nowrap_type_p (TREE_TYPE (base)))
4895 return false;
4896
4897 base = expand_simple_operations (base);
4898
4899 if (TREE_CODE (base) == SSA_NAME)
4900 {
4901 gimple *stmt = SSA_NAME_DEF_STMT (base);
4902
4903 if (gimple_code (stmt) != GIMPLE_ASSIGN)
4904 return false;
4905
4906 code = gimple_assign_rhs_code (stmt);
4907 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4908 return false;
4909
4910 e1 = gimple_assign_rhs1 (stmt);
4911 e2 = gimple_assign_rhs2 (stmt);
4912 }
4913 else
4914 {
4915 code = TREE_CODE (base);
4916 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4917 return false;
4918 e1 = TREE_OPERAND (base, 0);
4919 e2 = TREE_OPERAND (base, 1);
4920 }
4921
4922 /* Use affine expansion as deeper inspection to prove the equality. */
4923 tree_to_aff_combination_expand (e2, TREE_TYPE (e2),
4924 &aff_e2, &data->name_expansion_cache);
4925 tree_to_aff_combination_expand (offset, TREE_TYPE (offset),
4926 &aff_offset, &data->name_expansion_cache);
4927 aff_combination_scale (&aff_offset, -1);
4928 switch (code)
4929 {
4930 case PLUS_EXPR:
4931 aff_combination_add (&aff_e2, &aff_offset);
4932 if (aff_combination_zero_p (&aff_e2))
4933 return true;
4934
4935 tree_to_aff_combination_expand (e1, TREE_TYPE (e1),
4936 &aff_e1, &data->name_expansion_cache);
4937 aff_combination_add (&aff_e1, &aff_offset);
4938 return aff_combination_zero_p (&aff_e1);
4939
4940 case POINTER_PLUS_EXPR:
4941 aff_combination_add (&aff_e2, &aff_offset);
4942 return aff_combination_zero_p (&aff_e2);
4943
4944 default:
4945 return false;
4946 }
4947 }
4948
4949 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
4950 comparison with CAND. NITER describes the number of iterations of
4951 the loops. If successful, the comparison in COMP_P is altered accordingly.
4952
4953 We aim to handle the following situation:
4954
4955 sometype *base, *p;
4956 int a, b, i;
4957
4958 i = a;
4959 p = p_0 = base + a;
4960
4961 do
4962 {
4963 bla (*p);
4964 p++;
4965 i++;
4966 }
4967 while (i < b);
4968
4969 Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
4970 We aim to optimize this to
4971
4972 p = p_0 = base + a;
4973 do
4974 {
4975 bla (*p);
4976 p++;
4977 }
4978 while (p < p_0 - a + b);
4979
4980 This preserves the correctness, since the pointer arithmetics does not
4981 overflow. More precisely:
4982
4983 1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
4984 overflow in computing it or the values of p.
4985 2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
4986 overflow. To prove this, we use the fact that p_0 = base + a. */
4987
4988 static bool
4989 iv_elimination_compare_lt (struct ivopts_data *data,
4990 struct iv_cand *cand, enum tree_code *comp_p,
4991 struct tree_niter_desc *niter)
4992 {
4993 tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
4994 struct aff_tree nit, tmpa, tmpb;
4995 enum tree_code comp;
4996 HOST_WIDE_INT step;
4997
4998 /* We need to know that the candidate induction variable does not overflow.
4999 While more complex analysis may be used to prove this, for now just
5000 check that the variable appears in the original program and that it
5001 is computed in a type that guarantees no overflows. */
5002 cand_type = TREE_TYPE (cand->iv->base);
5003 if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
5004 return false;
5005
5006 /* Make sure that the loop iterates till the loop bound is hit, as otherwise
5007 the calculation of the BOUND could overflow, making the comparison
5008 invalid. */
5009 if (!data->loop_single_exit_p)
5010 return false;
5011
5012 /* We need to be able to decide whether candidate is increasing or decreasing
5013 in order to choose the right comparison operator. */
5014 if (!cst_and_fits_in_hwi (cand->iv->step))
5015 return false;
5016 step = int_cst_value (cand->iv->step);
5017
5018 /* Check that the number of iterations matches the expected pattern:
5019 a + 1 > b ? 0 : b - a - 1. */
5020 mbz = niter->may_be_zero;
5021 if (TREE_CODE (mbz) == GT_EXPR)
5022 {
5023 /* Handle a + 1 > b. */
5024 tree op0 = TREE_OPERAND (mbz, 0);
5025 if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
5026 {
5027 a = TREE_OPERAND (op0, 0);
5028 b = TREE_OPERAND (mbz, 1);
5029 }
5030 else
5031 return false;
5032 }
5033 else if (TREE_CODE (mbz) == LT_EXPR)
5034 {
5035 tree op1 = TREE_OPERAND (mbz, 1);
5036
5037 /* Handle b < a + 1. */
5038 if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
5039 {
5040 a = TREE_OPERAND (op1, 0);
5041 b = TREE_OPERAND (mbz, 0);
5042 }
5043 else
5044 return false;
5045 }
5046 else
5047 return false;
5048
5049 /* Expected number of iterations is B - A - 1. Check that it matches
5050 the actual number, i.e., that B - A - NITER = 1. */
5051 tree_to_aff_combination (niter->niter, nit_type, &nit);
5052 tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
5053 tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
5054 aff_combination_scale (&nit, -1);
5055 aff_combination_scale (&tmpa, -1);
5056 aff_combination_add (&tmpb, &tmpa);
5057 aff_combination_add (&tmpb, &nit);
5058 if (tmpb.n != 0 || maybe_ne (tmpb.offset, 1))
5059 return false;
5060
5061 /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
5062 overflow. */
5063 offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
5064 cand->iv->step,
5065 fold_convert (TREE_TYPE (cand->iv->step), a));
5066 if (!difference_cannot_overflow_p (data, cand->iv->base, offset))
5067 return false;
5068
5069 /* Determine the new comparison operator. */
5070 comp = step < 0 ? GT_EXPR : LT_EXPR;
5071 if (*comp_p == NE_EXPR)
5072 *comp_p = comp;
5073 else if (*comp_p == EQ_EXPR)
5074 *comp_p = invert_tree_comparison (comp, false);
5075 else
5076 gcc_unreachable ();
5077
5078 return true;
5079 }
5080
5081 /* Check whether it is possible to express the condition in USE by comparison
5082 of candidate CAND. If so, store the value compared with to BOUND, and the
5083 comparison operator to COMP. */
5084
5085 static bool
5086 may_eliminate_iv (struct ivopts_data *data,
5087 struct iv_use *use, struct iv_cand *cand, tree *bound,
5088 enum tree_code *comp)
5089 {
5090 basic_block ex_bb;
5091 edge exit;
5092 tree period;
5093 struct loop *loop = data->current_loop;
5094 aff_tree bnd;
5095 struct tree_niter_desc *desc = NULL;
5096
5097 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
5098 return false;
5099
5100 /* For now works only for exits that dominate the loop latch.
5101 TODO: extend to other conditions inside loop body. */
5102 ex_bb = gimple_bb (use->stmt);
5103 if (use->stmt != last_stmt (ex_bb)
5104 || gimple_code (use->stmt) != GIMPLE_COND
5105 || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
5106 return false;
5107
5108 exit = EDGE_SUCC (ex_bb, 0);
5109 if (flow_bb_inside_loop_p (loop, exit->dest))
5110 exit = EDGE_SUCC (ex_bb, 1);
5111 if (flow_bb_inside_loop_p (loop, exit->dest))
5112 return false;
5113
5114 desc = niter_for_exit (data, exit);
5115 if (!desc)
5116 return false;
5117
5118 /* Determine whether we can use the variable to test the exit condition.
5119 This is the case iff the period of the induction variable is greater
5120 than the number of iterations for which the exit condition is true. */
5121 period = iv_period (cand->iv);
5122
5123 /* If the number of iterations is constant, compare against it directly. */
5124 if (TREE_CODE (desc->niter) == INTEGER_CST)
5125 {
5126 /* See cand_value_at. */
5127 if (stmt_after_increment (loop, cand, use->stmt))
5128 {
5129 if (!tree_int_cst_lt (desc->niter, period))
5130 return false;
5131 }
5132 else
5133 {
5134 if (tree_int_cst_lt (period, desc->niter))
5135 return false;
5136 }
5137 }
5138
5139 /* If not, and if this is the only possible exit of the loop, see whether
5140 we can get a conservative estimate on the number of iterations of the
5141 entire loop and compare against that instead. */
5142 else
5143 {
5144 widest_int period_value, max_niter;
5145
5146 max_niter = desc->max;
5147 if (stmt_after_increment (loop, cand, use->stmt))
5148 max_niter += 1;
5149 period_value = wi::to_widest (period);
5150 if (wi::gtu_p (max_niter, period_value))
5151 {
5152 /* See if we can take advantage of inferred loop bound
5153 information. */
5154 if (data->loop_single_exit_p)
5155 {
5156 if (!max_loop_iterations (loop, &max_niter))
5157 return false;
5158 /* The loop bound is already adjusted by adding 1. */
5159 if (wi::gtu_p (max_niter, period_value))
5160 return false;
5161 }
5162 else
5163 return false;
5164 }
5165 }
5166
5167 cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
5168
5169 *bound = fold_convert (TREE_TYPE (cand->iv->base),
5170 aff_combination_to_tree (&bnd));
5171 *comp = iv_elimination_compare (data, use);
5172
5173 /* It is unlikely that computing the number of iterations using division
5174 would be more profitable than keeping the original induction variable. */
5175 if (expression_expensive_p (*bound))
5176 return false;
5177
5178 /* Sometimes, it is possible to handle the situation that the number of
5179 iterations may be zero unless additional assumptions by using <
5180 instead of != in the exit condition.
5181
5182 TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
5183 base the exit condition on it. However, that is often too
5184 expensive. */
5185 if (!integer_zerop (desc->may_be_zero))
5186 return iv_elimination_compare_lt (data, cand, comp, desc);
5187
5188 return true;
5189 }
5190
5191 /* Calculates the cost of BOUND, if it is a PARM_DECL. A PARM_DECL must
5192 be copied, if it is used in the loop body and DATA->body_includes_call. */
5193
5194 static int
5195 parm_decl_cost (struct ivopts_data *data, tree bound)
5196 {
5197 tree sbound = bound;
5198 STRIP_NOPS (sbound);
5199
5200 if (TREE_CODE (sbound) == SSA_NAME
5201 && SSA_NAME_IS_DEFAULT_DEF (sbound)
5202 && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
5203 && data->body_includes_call)
5204 return COSTS_N_INSNS (1);
5205
5206 return 0;
5207 }
5208
5209 /* Determines cost of computing the use in GROUP with CAND in a condition. */
5210
5211 static bool
5212 determine_group_iv_cost_cond (struct ivopts_data *data,
5213 struct iv_group *group, struct iv_cand *cand)
5214 {
5215 tree bound = NULL_TREE;
5216 struct iv *cmp_iv;
5217 bitmap inv_exprs = NULL;
5218 bitmap inv_vars_elim = NULL, inv_vars_express = NULL, inv_vars;
5219 comp_cost elim_cost = infinite_cost, express_cost, cost, bound_cost;
5220 enum comp_iv_rewrite rewrite_type;
5221 iv_inv_expr_ent *inv_expr_elim = NULL, *inv_expr_express = NULL, *inv_expr;
5222 tree *control_var, *bound_cst;
5223 enum tree_code comp = ERROR_MARK;
5224 struct iv_use *use = group->vuses[0];
5225
5226 /* Extract condition operands. */
5227 rewrite_type = extract_cond_operands (data, use->stmt, &control_var,
5228 &bound_cst, NULL, &cmp_iv);
5229 gcc_assert (rewrite_type != COMP_IV_NA);
5230
5231 /* Try iv elimination. */
5232 if (rewrite_type == COMP_IV_ELIM
5233 && may_eliminate_iv (data, use, cand, &bound, &comp))
5234 {
5235 elim_cost = force_var_cost (data, bound, &inv_vars_elim);
5236 if (elim_cost.cost == 0)
5237 elim_cost.cost = parm_decl_cost (data, bound);
5238 else if (TREE_CODE (bound) == INTEGER_CST)
5239 elim_cost.cost = 0;
5240 /* If we replace a loop condition 'i < n' with 'p < base + n',
5241 inv_vars_elim will have 'base' and 'n' set, which implies that both
5242 'base' and 'n' will be live during the loop. More likely,
5243 'base + n' will be loop invariant, resulting in only one live value
5244 during the loop. So in that case we clear inv_vars_elim and set
5245 inv_expr_elim instead. */
5246 if (inv_vars_elim && bitmap_count_bits (inv_vars_elim) > 1)
5247 {
5248 inv_expr_elim = get_loop_invariant_expr (data, bound);
5249 bitmap_clear (inv_vars_elim);
5250 }
5251 /* The bound is a loop invariant, so it will be only computed
5252 once. */
5253 elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
5254 }
5255
5256 /* When the condition is a comparison of the candidate IV against
5257 zero, prefer this IV.
5258
5259 TODO: The constant that we're subtracting from the cost should
5260 be target-dependent. This information should be added to the
5261 target costs for each backend. */
5262 if (!elim_cost.infinite_cost_p () /* Do not try to decrease infinite! */
5263 && integer_zerop (*bound_cst)
5264 && (operand_equal_p (*control_var, cand->var_after, 0)
5265 || operand_equal_p (*control_var, cand->var_before, 0)))
5266 elim_cost -= 1;
5267
5268 express_cost = get_computation_cost (data, use, cand, false,
5269 &inv_vars_express, NULL,
5270 &inv_expr_express);
5271 if (cmp_iv != NULL)
5272 find_inv_vars (data, &cmp_iv->base, &inv_vars_express);
5273
5274 /* Count the cost of the original bound as well. */
5275 bound_cost = force_var_cost (data, *bound_cst, NULL);
5276 if (bound_cost.cost == 0)
5277 bound_cost.cost = parm_decl_cost (data, *bound_cst);
5278 else if (TREE_CODE (*bound_cst) == INTEGER_CST)
5279 bound_cost.cost = 0;
5280 express_cost += bound_cost;
5281
5282 /* Choose the better approach, preferring the eliminated IV. */
5283 if (elim_cost <= express_cost)
5284 {
5285 cost = elim_cost;
5286 inv_vars = inv_vars_elim;
5287 inv_vars_elim = NULL;
5288 inv_expr = inv_expr_elim;
5289 }
5290 else
5291 {
5292 cost = express_cost;
5293 inv_vars = inv_vars_express;
5294 inv_vars_express = NULL;
5295 bound = NULL_TREE;
5296 comp = ERROR_MARK;
5297 inv_expr = inv_expr_express;
5298 }
5299
5300 if (inv_expr)
5301 {
5302 inv_exprs = BITMAP_ALLOC (NULL);
5303 bitmap_set_bit (inv_exprs, inv_expr->id);
5304 }
5305 set_group_iv_cost (data, group, cand, cost,
5306 inv_vars, bound, comp, inv_exprs);
5307
5308 if (inv_vars_elim)
5309 BITMAP_FREE (inv_vars_elim);
5310 if (inv_vars_express)
5311 BITMAP_FREE (inv_vars_express);
5312
5313 return !cost.infinite_cost_p ();
5314 }
5315
5316 /* Determines cost of computing uses in GROUP with CAND. Returns false
5317 if USE cannot be represented with CAND. */
5318
5319 static bool
5320 determine_group_iv_cost (struct ivopts_data *data,
5321 struct iv_group *group, struct iv_cand *cand)
5322 {
5323 switch (group->type)
5324 {
5325 case USE_NONLINEAR_EXPR:
5326 return determine_group_iv_cost_generic (data, group, cand);
5327
5328 case USE_REF_ADDRESS:
5329 case USE_PTR_ADDRESS:
5330 return determine_group_iv_cost_address (data, group, cand);
5331
5332 case USE_COMPARE:
5333 return determine_group_iv_cost_cond (data, group, cand);
5334
5335 default:
5336 gcc_unreachable ();
5337 }
5338 }
5339
5340 /* Return true if get_computation_cost indicates that autoincrement is
5341 a possibility for the pair of USE and CAND, false otherwise. */
5342
5343 static bool
5344 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
5345 struct iv_cand *cand)
5346 {
5347 if (!address_p (use->type))
5348 return false;
5349
5350 bool can_autoinc = false;
5351 get_computation_cost (data, use, cand, true, NULL, &can_autoinc, NULL);
5352 return can_autoinc;
5353 }
5354
5355 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
5356 use that allows autoincrement, and set their AINC_USE if possible. */
5357
5358 static void
5359 set_autoinc_for_original_candidates (struct ivopts_data *data)
5360 {
5361 unsigned i, j;
5362
5363 for (i = 0; i < data->vcands.length (); i++)
5364 {
5365 struct iv_cand *cand = data->vcands[i];
5366 struct iv_use *closest_before = NULL;
5367 struct iv_use *closest_after = NULL;
5368 if (cand->pos != IP_ORIGINAL)
5369 continue;
5370
5371 for (j = 0; j < data->vgroups.length (); j++)
5372 {
5373 struct iv_group *group = data->vgroups[j];
5374 struct iv_use *use = group->vuses[0];
5375 unsigned uid = gimple_uid (use->stmt);
5376
5377 if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
5378 continue;
5379
5380 if (uid < gimple_uid (cand->incremented_at)
5381 && (closest_before == NULL
5382 || uid > gimple_uid (closest_before->stmt)))
5383 closest_before = use;
5384
5385 if (uid > gimple_uid (cand->incremented_at)
5386 && (closest_after == NULL
5387 || uid < gimple_uid (closest_after->stmt)))
5388 closest_after = use;
5389 }
5390
5391 if (closest_before != NULL
5392 && autoinc_possible_for_pair (data, closest_before, cand))
5393 cand->ainc_use = closest_before;
5394 else if (closest_after != NULL
5395 && autoinc_possible_for_pair (data, closest_after, cand))
5396 cand->ainc_use = closest_after;
5397 }
5398 }
5399
5400 /* Relate compare use with all candidates. */
5401
5402 static void
5403 relate_compare_use_with_all_cands (struct ivopts_data *data)
5404 {
5405 unsigned i, count = data->vcands.length ();
5406 for (i = 0; i < data->vgroups.length (); i++)
5407 {
5408 struct iv_group *group = data->vgroups[i];
5409
5410 if (group->type == USE_COMPARE)
5411 bitmap_set_range (group->related_cands, 0, count);
5412 }
5413 }
5414
5415 /* Finds the candidates for the induction variables. */
5416
5417 static void
5418 find_iv_candidates (struct ivopts_data *data)
5419 {
5420 /* Add commonly used ivs. */
5421 add_standard_iv_candidates (data);
5422
5423 /* Add old induction variables. */
5424 add_iv_candidate_for_bivs (data);
5425
5426 /* Add induction variables derived from uses. */
5427 add_iv_candidate_for_groups (data);
5428
5429 set_autoinc_for_original_candidates (data);
5430
5431 /* Record the important candidates. */
5432 record_important_candidates (data);
5433
5434 /* Relate compare iv_use with all candidates. */
5435 if (!data->consider_all_candidates)
5436 relate_compare_use_with_all_cands (data);
5437
5438 if (dump_file && (dump_flags & TDF_DETAILS))
5439 {
5440 unsigned i;
5441
5442 fprintf (dump_file, "\n<Important Candidates>:\t");
5443 for (i = 0; i < data->vcands.length (); i++)
5444 if (data->vcands[i]->important)
5445 fprintf (dump_file, " %d,", data->vcands[i]->id);
5446 fprintf (dump_file, "\n");
5447
5448 fprintf (dump_file, "\n<Group, Cand> Related:\n");
5449 for (i = 0; i < data->vgroups.length (); i++)
5450 {
5451 struct iv_group *group = data->vgroups[i];
5452
5453 if (group->related_cands)
5454 {
5455 fprintf (dump_file, " Group %d:\t", group->id);
5456 dump_bitmap (dump_file, group->related_cands);
5457 }
5458 }
5459 fprintf (dump_file, "\n");
5460 }
5461 }
5462
5463 /* Determines costs of computing use of iv with an iv candidate. */
5464
5465 static void
5466 determine_group_iv_costs (struct ivopts_data *data)
5467 {
5468 unsigned i, j;
5469 struct iv_cand *cand;
5470 struct iv_group *group;
5471 bitmap to_clear = BITMAP_ALLOC (NULL);
5472
5473 alloc_use_cost_map (data);
5474
5475 for (i = 0; i < data->vgroups.length (); i++)
5476 {
5477 group = data->vgroups[i];
5478
5479 if (data->consider_all_candidates)
5480 {
5481 for (j = 0; j < data->vcands.length (); j++)
5482 {
5483 cand = data->vcands[j];
5484 determine_group_iv_cost (data, group, cand);
5485 }
5486 }
5487 else
5488 {
5489 bitmap_iterator bi;
5490
5491 EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, j, bi)
5492 {
5493 cand = data->vcands[j];
5494 if (!determine_group_iv_cost (data, group, cand))
5495 bitmap_set_bit (to_clear, j);
5496 }
5497
5498 /* Remove the candidates for that the cost is infinite from
5499 the list of related candidates. */
5500 bitmap_and_compl_into (group->related_cands, to_clear);
5501 bitmap_clear (to_clear);
5502 }
5503 }
5504
5505 BITMAP_FREE (to_clear);
5506
5507 if (dump_file && (dump_flags & TDF_DETAILS))
5508 {
5509 bitmap_iterator bi;
5510
5511 /* Dump invariant variables. */
5512 fprintf (dump_file, "\n<Invariant Vars>:\n");
5513 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5514 {
5515 struct version_info *info = ver_info (data, i);
5516 if (info->inv_id)
5517 {
5518 fprintf (dump_file, "Inv %d:\t", info->inv_id);
5519 print_generic_expr (dump_file, info->name, TDF_SLIM);
5520 fprintf (dump_file, "%s\n",
5521 info->has_nonlin_use ? "" : "\t(eliminable)");
5522 }
5523 }
5524
5525 /* Dump invariant expressions. */
5526 fprintf (dump_file, "\n<Invariant Expressions>:\n");
5527 auto_vec <iv_inv_expr_ent *> list (data->inv_expr_tab->elements ());
5528
5529 for (hash_table<iv_inv_expr_hasher>::iterator it
5530 = data->inv_expr_tab->begin (); it != data->inv_expr_tab->end ();
5531 ++it)
5532 list.safe_push (*it);
5533
5534 list.qsort (sort_iv_inv_expr_ent);
5535
5536 for (i = 0; i < list.length (); ++i)
5537 {
5538 fprintf (dump_file, "inv_expr %d: \t", list[i]->id);
5539 print_generic_expr (dump_file, list[i]->expr, TDF_SLIM);
5540 fprintf (dump_file, "\n");
5541 }
5542
5543 fprintf (dump_file, "\n<Group-candidate Costs>:\n");
5544
5545 for (i = 0; i < data->vgroups.length (); i++)
5546 {
5547 group = data->vgroups[i];
5548
5549 fprintf (dump_file, "Group %d:\n", i);
5550 fprintf (dump_file, " cand\tcost\tcompl.\tinv.expr.\tinv.vars\n");
5551 for (j = 0; j < group->n_map_members; j++)
5552 {
5553 if (!group->cost_map[j].cand
5554 || group->cost_map[j].cost.infinite_cost_p ())
5555 continue;
5556
5557 fprintf (dump_file, " %d\t%" PRId64 "\t%d\t",
5558 group->cost_map[j].cand->id,
5559 group->cost_map[j].cost.cost,
5560 group->cost_map[j].cost.complexity);
5561 if (!group->cost_map[j].inv_exprs
5562 || bitmap_empty_p (group->cost_map[j].inv_exprs))
5563 fprintf (dump_file, "NIL;\t");
5564 else
5565 bitmap_print (dump_file,
5566 group->cost_map[j].inv_exprs, "", ";\t");
5567 if (!group->cost_map[j].inv_vars
5568 || bitmap_empty_p (group->cost_map[j].inv_vars))
5569 fprintf (dump_file, "NIL;\n");
5570 else
5571 bitmap_print (dump_file,
5572 group->cost_map[j].inv_vars, "", "\n");
5573 }
5574
5575 fprintf (dump_file, "\n");
5576 }
5577 fprintf (dump_file, "\n");
5578 }
5579 }
5580
5581 /* Determines cost of the candidate CAND. */
5582
5583 static void
5584 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5585 {
5586 comp_cost cost_base;
5587 int64_t cost, cost_step;
5588 tree base;
5589
5590 gcc_assert (cand->iv != NULL);
5591
5592 /* There are two costs associated with the candidate -- its increment
5593 and its initialization. The second is almost negligible for any loop
5594 that rolls enough, so we take it just very little into account. */
5595
5596 base = cand->iv->base;
5597 cost_base = force_var_cost (data, base, NULL);
5598 /* It will be exceptional that the iv register happens to be initialized with
5599 the proper value at no cost. In general, there will at least be a regcopy
5600 or a const set. */
5601 if (cost_base.cost == 0)
5602 cost_base.cost = COSTS_N_INSNS (1);
5603 cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
5604
5605 cost = cost_step + adjust_setup_cost (data, cost_base.cost);
5606
5607 /* Prefer the original ivs unless we may gain something by replacing it.
5608 The reason is to make debugging simpler; so this is not relevant for
5609 artificial ivs created by other optimization passes. */
5610 if (cand->pos != IP_ORIGINAL
5611 || !SSA_NAME_VAR (cand->var_before)
5612 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
5613 cost++;
5614
5615 /* Prefer not to insert statements into latch unless there are some
5616 already (so that we do not create unnecessary jumps). */
5617 if (cand->pos == IP_END
5618 && empty_block_p (ip_end_pos (data->current_loop)))
5619 cost++;
5620
5621 cand->cost = cost;
5622 cand->cost_step = cost_step;
5623 }
5624
5625 /* Determines costs of computation of the candidates. */
5626
5627 static void
5628 determine_iv_costs (struct ivopts_data *data)
5629 {
5630 unsigned i;
5631
5632 if (dump_file && (dump_flags & TDF_DETAILS))
5633 {
5634 fprintf (dump_file, "<Candidate Costs>:\n");
5635 fprintf (dump_file, " cand\tcost\n");
5636 }
5637
5638 for (i = 0; i < data->vcands.length (); i++)
5639 {
5640 struct iv_cand *cand = data->vcands[i];
5641
5642 determine_iv_cost (data, cand);
5643
5644 if (dump_file && (dump_flags & TDF_DETAILS))
5645 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
5646 }
5647
5648 if (dump_file && (dump_flags & TDF_DETAILS))
5649 fprintf (dump_file, "\n");
5650 }
5651
5652 /* Estimate register pressure for loop having N_INVS invariants and N_CANDS
5653 induction variables. Note N_INVS includes both invariant variables and
5654 invariant expressions. */
5655
5656 static unsigned
5657 ivopts_estimate_reg_pressure (struct ivopts_data *data, unsigned n_invs,
5658 unsigned n_cands)
5659 {
5660 unsigned cost;
5661 unsigned n_old = data->regs_used, n_new = n_invs + n_cands;
5662 unsigned regs_needed = n_new + n_old, available_regs = target_avail_regs;
5663 bool speed = data->speed;
5664
5665 /* If there is a call in the loop body, the call-clobbered registers
5666 are not available for loop invariants. */
5667 if (data->body_includes_call)
5668 available_regs = available_regs - target_clobbered_regs;
5669
5670 /* If we have enough registers. */
5671 if (regs_needed + target_res_regs < available_regs)
5672 cost = n_new;
5673 /* If close to running out of registers, try to preserve them. */
5674 else if (regs_needed <= available_regs)
5675 cost = target_reg_cost [speed] * regs_needed;
5676 /* If we run out of available registers but the number of candidates
5677 does not, we penalize extra registers using target_spill_cost. */
5678 else if (n_cands <= available_regs)
5679 cost = target_reg_cost [speed] * available_regs
5680 + target_spill_cost [speed] * (regs_needed - available_regs);
5681 /* If the number of candidates runs out available registers, we penalize
5682 extra candidate registers using target_spill_cost * 2. Because it is
5683 more expensive to spill induction variable than invariant. */
5684 else
5685 cost = target_reg_cost [speed] * available_regs
5686 + target_spill_cost [speed] * (n_cands - available_regs) * 2
5687 + target_spill_cost [speed] * (regs_needed - n_cands);
5688
5689 /* Finally, add the number of candidates, so that we prefer eliminating
5690 induction variables if possible. */
5691 return cost + n_cands;
5692 }
5693
5694 /* For each size of the induction variable set determine the penalty. */
5695
5696 static void
5697 determine_set_costs (struct ivopts_data *data)
5698 {
5699 unsigned j, n;
5700 gphi *phi;
5701 gphi_iterator psi;
5702 tree op;
5703 struct loop *loop = data->current_loop;
5704 bitmap_iterator bi;
5705
5706 if (dump_file && (dump_flags & TDF_DETAILS))
5707 {
5708 fprintf (dump_file, "<Global Costs>:\n");
5709 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
5710 fprintf (dump_file, " target_clobbered_regs %d\n", target_clobbered_regs);
5711 fprintf (dump_file, " target_reg_cost %d\n", target_reg_cost[data->speed]);
5712 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost[data->speed]);
5713 }
5714
5715 n = 0;
5716 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
5717 {
5718 phi = psi.phi ();
5719 op = PHI_RESULT (phi);
5720
5721 if (virtual_operand_p (op))
5722 continue;
5723
5724 if (get_iv (data, op))
5725 continue;
5726
5727 if (!POINTER_TYPE_P (TREE_TYPE (op))
5728 && !INTEGRAL_TYPE_P (TREE_TYPE (op)))
5729 continue;
5730
5731 n++;
5732 }
5733
5734 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5735 {
5736 struct version_info *info = ver_info (data, j);
5737
5738 if (info->inv_id && info->has_nonlin_use)
5739 n++;
5740 }
5741
5742 data->regs_used = n;
5743 if (dump_file && (dump_flags & TDF_DETAILS))
5744 fprintf (dump_file, " regs_used %d\n", n);
5745
5746 if (dump_file && (dump_flags & TDF_DETAILS))
5747 {
5748 fprintf (dump_file, " cost for size:\n");
5749 fprintf (dump_file, " ivs\tcost\n");
5750 for (j = 0; j <= 2 * target_avail_regs; j++)
5751 fprintf (dump_file, " %d\t%d\n", j,
5752 ivopts_estimate_reg_pressure (data, 0, j));
5753 fprintf (dump_file, "\n");
5754 }
5755 }
5756
5757 /* Returns true if A is a cheaper cost pair than B. */
5758
5759 static bool
5760 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
5761 {
5762 if (!a)
5763 return false;
5764
5765 if (!b)
5766 return true;
5767
5768 if (a->cost < b->cost)
5769 return true;
5770
5771 if (b->cost < a->cost)
5772 return false;
5773
5774 /* In case the costs are the same, prefer the cheaper candidate. */
5775 if (a->cand->cost < b->cand->cost)
5776 return true;
5777
5778 return false;
5779 }
5780
5781 /* Compare if A is a more expensive cost pair than B. Return 1, 0 and -1
5782 for more expensive, equal and cheaper respectively. */
5783
5784 static int
5785 compare_cost_pair (struct cost_pair *a, struct cost_pair *b)
5786 {
5787 if (cheaper_cost_pair (a, b))
5788 return -1;
5789 if (cheaper_cost_pair (b, a))
5790 return 1;
5791
5792 return 0;
5793 }
5794
5795 /* Returns candidate by that USE is expressed in IVS. */
5796
5797 static struct cost_pair *
5798 iv_ca_cand_for_group (struct iv_ca *ivs, struct iv_group *group)
5799 {
5800 return ivs->cand_for_group[group->id];
5801 }
5802
5803 /* Computes the cost field of IVS structure. */
5804
5805 static void
5806 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
5807 {
5808 comp_cost cost = ivs->cand_use_cost;
5809
5810 cost += ivs->cand_cost;
5811 cost += ivopts_estimate_reg_pressure (data, ivs->n_invs, ivs->n_cands);
5812 ivs->cost = cost;
5813 }
5814
5815 /* Remove use of invariants in set INVS by decreasing counter in N_INV_USES
5816 and IVS. */
5817
5818 static void
5819 iv_ca_set_remove_invs (struct iv_ca *ivs, bitmap invs, unsigned *n_inv_uses)
5820 {
5821 bitmap_iterator bi;
5822 unsigned iid;
5823
5824 if (!invs)
5825 return;
5826
5827 gcc_assert (n_inv_uses != NULL);
5828 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5829 {
5830 n_inv_uses[iid]--;
5831 if (n_inv_uses[iid] == 0)
5832 ivs->n_invs--;
5833 }
5834 }
5835
5836 /* Set USE not to be expressed by any candidate in IVS. */
5837
5838 static void
5839 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
5840 struct iv_group *group)
5841 {
5842 unsigned gid = group->id, cid;
5843 struct cost_pair *cp;
5844
5845 cp = ivs->cand_for_group[gid];
5846 if (!cp)
5847 return;
5848 cid = cp->cand->id;
5849
5850 ivs->bad_groups++;
5851 ivs->cand_for_group[gid] = NULL;
5852 ivs->n_cand_uses[cid]--;
5853
5854 if (ivs->n_cand_uses[cid] == 0)
5855 {
5856 bitmap_clear_bit (ivs->cands, cid);
5857 ivs->n_cands--;
5858 ivs->cand_cost -= cp->cand->cost;
5859 iv_ca_set_remove_invs (ivs, cp->cand->inv_vars, ivs->n_inv_var_uses);
5860 iv_ca_set_remove_invs (ivs, cp->cand->inv_exprs, ivs->n_inv_expr_uses);
5861 }
5862
5863 ivs->cand_use_cost -= cp->cost;
5864 iv_ca_set_remove_invs (ivs, cp->inv_vars, ivs->n_inv_var_uses);
5865 iv_ca_set_remove_invs (ivs, cp->inv_exprs, ivs->n_inv_expr_uses);
5866 iv_ca_recount_cost (data, ivs);
5867 }
5868
5869 /* Add use of invariants in set INVS by increasing counter in N_INV_USES and
5870 IVS. */
5871
5872 static void
5873 iv_ca_set_add_invs (struct iv_ca *ivs, bitmap invs, unsigned *n_inv_uses)
5874 {
5875 bitmap_iterator bi;
5876 unsigned iid;
5877
5878 if (!invs)
5879 return;
5880
5881 gcc_assert (n_inv_uses != NULL);
5882 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5883 {
5884 n_inv_uses[iid]++;
5885 if (n_inv_uses[iid] == 1)
5886 ivs->n_invs++;
5887 }
5888 }
5889
5890 /* Set cost pair for GROUP in set IVS to CP. */
5891
5892 static void
5893 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
5894 struct iv_group *group, struct cost_pair *cp)
5895 {
5896 unsigned gid = group->id, cid;
5897
5898 if (ivs->cand_for_group[gid] == cp)
5899 return;
5900
5901 if (ivs->cand_for_group[gid])
5902 iv_ca_set_no_cp (data, ivs, group);
5903
5904 if (cp)
5905 {
5906 cid = cp->cand->id;
5907
5908 ivs->bad_groups--;
5909 ivs->cand_for_group[gid] = cp;
5910 ivs->n_cand_uses[cid]++;
5911 if (ivs->n_cand_uses[cid] == 1)
5912 {
5913 bitmap_set_bit (ivs->cands, cid);
5914 ivs->n_cands++;
5915 ivs->cand_cost += cp->cand->cost;
5916 iv_ca_set_add_invs (ivs, cp->cand->inv_vars, ivs->n_inv_var_uses);
5917 iv_ca_set_add_invs (ivs, cp->cand->inv_exprs, ivs->n_inv_expr_uses);
5918 }
5919
5920 ivs->cand_use_cost += cp->cost;
5921 iv_ca_set_add_invs (ivs, cp->inv_vars, ivs->n_inv_var_uses);
5922 iv_ca_set_add_invs (ivs, cp->inv_exprs, ivs->n_inv_expr_uses);
5923 iv_ca_recount_cost (data, ivs);
5924 }
5925 }
5926
5927 /* Extend set IVS by expressing USE by some of the candidates in it
5928 if possible. Consider all important candidates if candidates in
5929 set IVS don't give any result. */
5930
5931 static void
5932 iv_ca_add_group (struct ivopts_data *data, struct iv_ca *ivs,
5933 struct iv_group *group)
5934 {
5935 struct cost_pair *best_cp = NULL, *cp;
5936 bitmap_iterator bi;
5937 unsigned i;
5938 struct iv_cand *cand;
5939
5940 gcc_assert (ivs->upto >= group->id);
5941 ivs->upto++;
5942 ivs->bad_groups++;
5943
5944 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5945 {
5946 cand = data->vcands[i];
5947 cp = get_group_iv_cost (data, group, cand);
5948 if (cheaper_cost_pair (cp, best_cp))
5949 best_cp = cp;
5950 }
5951
5952 if (best_cp == NULL)
5953 {
5954 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5955 {
5956 cand = data->vcands[i];
5957 cp = get_group_iv_cost (data, group, cand);
5958 if (cheaper_cost_pair (cp, best_cp))
5959 best_cp = cp;
5960 }
5961 }
5962
5963 iv_ca_set_cp (data, ivs, group, best_cp);
5964 }
5965
5966 /* Get cost for assignment IVS. */
5967
5968 static comp_cost
5969 iv_ca_cost (struct iv_ca *ivs)
5970 {
5971 /* This was a conditional expression but it triggered a bug in
5972 Sun C 5.5. */
5973 if (ivs->bad_groups)
5974 return infinite_cost;
5975 else
5976 return ivs->cost;
5977 }
5978
5979 /* Compare if applying NEW_CP to GROUP for IVS introduces more invariants
5980 than OLD_CP. Return 1, 0 and -1 for more, equal and fewer invariants
5981 respectively. */
5982
5983 static int
5984 iv_ca_compare_deps (struct ivopts_data *data, struct iv_ca *ivs,
5985 struct iv_group *group, struct cost_pair *old_cp,
5986 struct cost_pair *new_cp)
5987 {
5988 gcc_assert (old_cp && new_cp && old_cp != new_cp);
5989 unsigned old_n_invs = ivs->n_invs;
5990 iv_ca_set_cp (data, ivs, group, new_cp);
5991 unsigned new_n_invs = ivs->n_invs;
5992 iv_ca_set_cp (data, ivs, group, old_cp);
5993
5994 return new_n_invs > old_n_invs ? 1 : (new_n_invs < old_n_invs ? -1 : 0);
5995 }
5996
5997 /* Creates change of expressing GROUP by NEW_CP instead of OLD_CP and chains
5998 it before NEXT. */
5999
6000 static struct iv_ca_delta *
6001 iv_ca_delta_add (struct iv_group *group, struct cost_pair *old_cp,
6002 struct cost_pair *new_cp, struct iv_ca_delta *next)
6003 {
6004 struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
6005
6006 change->group = group;
6007 change->old_cp = old_cp;
6008 change->new_cp = new_cp;
6009 change->next = next;
6010
6011 return change;
6012 }
6013
6014 /* Joins two lists of changes L1 and L2. Destructive -- old lists
6015 are rewritten. */
6016
6017 static struct iv_ca_delta *
6018 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
6019 {
6020 struct iv_ca_delta *last;
6021
6022 if (!l2)
6023 return l1;
6024
6025 if (!l1)
6026 return l2;
6027
6028 for (last = l1; last->next; last = last->next)
6029 continue;
6030 last->next = l2;
6031
6032 return l1;
6033 }
6034
6035 /* Reverse the list of changes DELTA, forming the inverse to it. */
6036
6037 static struct iv_ca_delta *
6038 iv_ca_delta_reverse (struct iv_ca_delta *delta)
6039 {
6040 struct iv_ca_delta *act, *next, *prev = NULL;
6041
6042 for (act = delta; act; act = next)
6043 {
6044 next = act->next;
6045 act->next = prev;
6046 prev = act;
6047
6048 std::swap (act->old_cp, act->new_cp);
6049 }
6050
6051 return prev;
6052 }
6053
6054 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
6055 reverted instead. */
6056
6057 static void
6058 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
6059 struct iv_ca_delta *delta, bool forward)
6060 {
6061 struct cost_pair *from, *to;
6062 struct iv_ca_delta *act;
6063
6064 if (!forward)
6065 delta = iv_ca_delta_reverse (delta);
6066
6067 for (act = delta; act; act = act->next)
6068 {
6069 from = act->old_cp;
6070 to = act->new_cp;
6071 gcc_assert (iv_ca_cand_for_group (ivs, act->group) == from);
6072 iv_ca_set_cp (data, ivs, act->group, to);
6073 }
6074
6075 if (!forward)
6076 iv_ca_delta_reverse (delta);
6077 }
6078
6079 /* Returns true if CAND is used in IVS. */
6080
6081 static bool
6082 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
6083 {
6084 return ivs->n_cand_uses[cand->id] > 0;
6085 }
6086
6087 /* Returns number of induction variable candidates in the set IVS. */
6088
6089 static unsigned
6090 iv_ca_n_cands (struct iv_ca *ivs)
6091 {
6092 return ivs->n_cands;
6093 }
6094
6095 /* Free the list of changes DELTA. */
6096
6097 static void
6098 iv_ca_delta_free (struct iv_ca_delta **delta)
6099 {
6100 struct iv_ca_delta *act, *next;
6101
6102 for (act = *delta; act; act = next)
6103 {
6104 next = act->next;
6105 free (act);
6106 }
6107
6108 *delta = NULL;
6109 }
6110
6111 /* Allocates new iv candidates assignment. */
6112
6113 static struct iv_ca *
6114 iv_ca_new (struct ivopts_data *data)
6115 {
6116 struct iv_ca *nw = XNEW (struct iv_ca);
6117
6118 nw->upto = 0;
6119 nw->bad_groups = 0;
6120 nw->cand_for_group = XCNEWVEC (struct cost_pair *,
6121 data->vgroups.length ());
6122 nw->n_cand_uses = XCNEWVEC (unsigned, data->vcands.length ());
6123 nw->cands = BITMAP_ALLOC (NULL);
6124 nw->n_cands = 0;
6125 nw->n_invs = 0;
6126 nw->cand_use_cost = no_cost;
6127 nw->cand_cost = 0;
6128 nw->n_inv_var_uses = XCNEWVEC (unsigned, data->max_inv_var_id + 1);
6129 nw->n_inv_expr_uses = XCNEWVEC (unsigned, data->max_inv_expr_id + 1);
6130 nw->cost = no_cost;
6131
6132 return nw;
6133 }
6134
6135 /* Free memory occupied by the set IVS. */
6136
6137 static void
6138 iv_ca_free (struct iv_ca **ivs)
6139 {
6140 free ((*ivs)->cand_for_group);
6141 free ((*ivs)->n_cand_uses);
6142 BITMAP_FREE ((*ivs)->cands);
6143 free ((*ivs)->n_inv_var_uses);
6144 free ((*ivs)->n_inv_expr_uses);
6145 free (*ivs);
6146 *ivs = NULL;
6147 }
6148
6149 /* Dumps IVS to FILE. */
6150
6151 static void
6152 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
6153 {
6154 unsigned i;
6155 comp_cost cost = iv_ca_cost (ivs);
6156
6157 fprintf (file, " cost: %" PRId64 " (complexity %d)\n", cost.cost,
6158 cost.complexity);
6159 fprintf (file, " cand_cost: %" PRId64 "\n cand_group_cost: "
6160 "%" PRId64 " (complexity %d)\n", ivs->cand_cost,
6161 ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
6162 bitmap_print (file, ivs->cands, " candidates: ","\n");
6163
6164 for (i = 0; i < ivs->upto; i++)
6165 {
6166 struct iv_group *group = data->vgroups[i];
6167 struct cost_pair *cp = iv_ca_cand_for_group (ivs, group);
6168 if (cp)
6169 fprintf (file, " group:%d --> iv_cand:%d, cost=("
6170 "%" PRId64 ",%d)\n", group->id, cp->cand->id,
6171 cp->cost.cost, cp->cost.complexity);
6172 else
6173 fprintf (file, " group:%d --> ??\n", group->id);
6174 }
6175
6176 const char *pref = "";
6177 fprintf (file, " invariant variables: ");
6178 for (i = 1; i <= data->max_inv_var_id; i++)
6179 if (ivs->n_inv_var_uses[i])
6180 {
6181 fprintf (file, "%s%d", pref, i);
6182 pref = ", ";
6183 }
6184
6185 pref = "";
6186 fprintf (file, "\n invariant expressions: ");
6187 for (i = 1; i <= data->max_inv_expr_id; i++)
6188 if (ivs->n_inv_expr_uses[i])
6189 {
6190 fprintf (file, "%s%d", pref, i);
6191 pref = ", ";
6192 }
6193
6194 fprintf (file, "\n\n");
6195 }
6196
6197 /* Try changing candidate in IVS to CAND for each use. Return cost of the
6198 new set, and store differences in DELTA. Number of induction variables
6199 in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
6200 the function will try to find a solution with mimimal iv candidates. */
6201
6202 static comp_cost
6203 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
6204 struct iv_cand *cand, struct iv_ca_delta **delta,
6205 unsigned *n_ivs, bool min_ncand)
6206 {
6207 unsigned i;
6208 comp_cost cost;
6209 struct iv_group *group;
6210 struct cost_pair *old_cp, *new_cp;
6211
6212 *delta = NULL;
6213 for (i = 0; i < ivs->upto; i++)
6214 {
6215 group = data->vgroups[i];
6216 old_cp = iv_ca_cand_for_group (ivs, group);
6217
6218 if (old_cp
6219 && old_cp->cand == cand)
6220 continue;
6221
6222 new_cp = get_group_iv_cost (data, group, cand);
6223 if (!new_cp)
6224 continue;
6225
6226 if (!min_ncand)
6227 {
6228 int cmp_invs = iv_ca_compare_deps (data, ivs, group, old_cp, new_cp);
6229 /* Skip if new_cp depends on more invariants. */
6230 if (cmp_invs > 0)
6231 continue;
6232
6233 int cmp_cost = compare_cost_pair (new_cp, old_cp);
6234 /* Skip if new_cp is not cheaper. */
6235 if (cmp_cost > 0 || (cmp_cost == 0 && cmp_invs == 0))
6236 continue;
6237 }
6238
6239 *delta = iv_ca_delta_add (group, old_cp, new_cp, *delta);
6240 }
6241
6242 iv_ca_delta_commit (data, ivs, *delta, true);
6243 cost = iv_ca_cost (ivs);
6244 if (n_ivs)
6245 *n_ivs = iv_ca_n_cands (ivs);
6246 iv_ca_delta_commit (data, ivs, *delta, false);
6247
6248 return cost;
6249 }
6250
6251 /* Try narrowing set IVS by removing CAND. Return the cost of
6252 the new set and store the differences in DELTA. START is
6253 the candidate with which we start narrowing. */
6254
6255 static comp_cost
6256 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
6257 struct iv_cand *cand, struct iv_cand *start,
6258 struct iv_ca_delta **delta)
6259 {
6260 unsigned i, ci;
6261 struct iv_group *group;
6262 struct cost_pair *old_cp, *new_cp, *cp;
6263 bitmap_iterator bi;
6264 struct iv_cand *cnd;
6265 comp_cost cost, best_cost, acost;
6266
6267 *delta = NULL;
6268 for (i = 0; i < data->vgroups.length (); i++)
6269 {
6270 group = data->vgroups[i];
6271
6272 old_cp = iv_ca_cand_for_group (ivs, group);
6273 if (old_cp->cand != cand)
6274 continue;
6275
6276 best_cost = iv_ca_cost (ivs);
6277 /* Start narrowing with START. */
6278 new_cp = get_group_iv_cost (data, group, start);
6279
6280 if (data->consider_all_candidates)
6281 {
6282 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
6283 {
6284 if (ci == cand->id || (start && ci == start->id))
6285 continue;
6286
6287 cnd = data->vcands[ci];
6288
6289 cp = get_group_iv_cost (data, group, cnd);
6290 if (!cp)
6291 continue;
6292
6293 iv_ca_set_cp (data, ivs, group, cp);
6294 acost = iv_ca_cost (ivs);
6295
6296 if (acost < best_cost)
6297 {
6298 best_cost = acost;
6299 new_cp = cp;
6300 }
6301 }
6302 }
6303 else
6304 {
6305 EXECUTE_IF_AND_IN_BITMAP (group->related_cands, ivs->cands, 0, ci, bi)
6306 {
6307 if (ci == cand->id || (start && ci == start->id))
6308 continue;
6309
6310 cnd = data->vcands[ci];
6311
6312 cp = get_group_iv_cost (data, group, cnd);
6313 if (!cp)
6314 continue;
6315
6316 iv_ca_set_cp (data, ivs, group, cp);
6317 acost = iv_ca_cost (ivs);
6318
6319 if (acost < best_cost)
6320 {
6321 best_cost = acost;
6322 new_cp = cp;
6323 }
6324 }
6325 }
6326 /* Restore to old cp for use. */
6327 iv_ca_set_cp (data, ivs, group, old_cp);
6328
6329 if (!new_cp)
6330 {
6331 iv_ca_delta_free (delta);
6332 return infinite_cost;
6333 }
6334
6335 *delta = iv_ca_delta_add (group, old_cp, new_cp, *delta);
6336 }
6337
6338 iv_ca_delta_commit (data, ivs, *delta, true);
6339 cost = iv_ca_cost (ivs);
6340 iv_ca_delta_commit (data, ivs, *delta, false);
6341
6342 return cost;
6343 }
6344
6345 /* Try optimizing the set of candidates IVS by removing candidates different
6346 from to EXCEPT_CAND from it. Return cost of the new set, and store
6347 differences in DELTA. */
6348
6349 static comp_cost
6350 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
6351 struct iv_cand *except_cand, struct iv_ca_delta **delta)
6352 {
6353 bitmap_iterator bi;
6354 struct iv_ca_delta *act_delta, *best_delta;
6355 unsigned i;
6356 comp_cost best_cost, acost;
6357 struct iv_cand *cand;
6358
6359 best_delta = NULL;
6360 best_cost = iv_ca_cost (ivs);
6361
6362 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6363 {
6364 cand = data->vcands[i];
6365
6366 if (cand == except_cand)
6367 continue;
6368
6369 acost = iv_ca_narrow (data, ivs, cand, except_cand, &act_delta);
6370
6371 if (acost < best_cost)
6372 {
6373 best_cost = acost;
6374 iv_ca_delta_free (&best_delta);
6375 best_delta = act_delta;
6376 }
6377 else
6378 iv_ca_delta_free (&act_delta);
6379 }
6380
6381 if (!best_delta)
6382 {
6383 *delta = NULL;
6384 return best_cost;
6385 }
6386
6387 /* Recurse to possibly remove other unnecessary ivs. */
6388 iv_ca_delta_commit (data, ivs, best_delta, true);
6389 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
6390 iv_ca_delta_commit (data, ivs, best_delta, false);
6391 *delta = iv_ca_delta_join (best_delta, *delta);
6392 return best_cost;
6393 }
6394
6395 /* Check if CAND_IDX is a candidate other than OLD_CAND and has
6396 cheaper local cost for GROUP than BEST_CP. Return pointer to
6397 the corresponding cost_pair, otherwise just return BEST_CP. */
6398
6399 static struct cost_pair*
6400 cheaper_cost_with_cand (struct ivopts_data *data, struct iv_group *group,
6401 unsigned int cand_idx, struct iv_cand *old_cand,
6402 struct cost_pair *best_cp)
6403 {
6404 struct iv_cand *cand;
6405 struct cost_pair *cp;
6406
6407 gcc_assert (old_cand != NULL && best_cp != NULL);
6408 if (cand_idx == old_cand->id)
6409 return best_cp;
6410
6411 cand = data->vcands[cand_idx];
6412 cp = get_group_iv_cost (data, group, cand);
6413 if (cp != NULL && cheaper_cost_pair (cp, best_cp))
6414 return cp;
6415
6416 return best_cp;
6417 }
6418
6419 /* Try breaking local optimal fixed-point for IVS by replacing candidates
6420 which are used by more than one iv uses. For each of those candidates,
6421 this function tries to represent iv uses under that candidate using
6422 other ones with lower local cost, then tries to prune the new set.
6423 If the new set has lower cost, It returns the new cost after recording
6424 candidate replacement in list DELTA. */
6425
6426 static comp_cost
6427 iv_ca_replace (struct ivopts_data *data, struct iv_ca *ivs,
6428 struct iv_ca_delta **delta)
6429 {
6430 bitmap_iterator bi, bj;
6431 unsigned int i, j, k;
6432 struct iv_cand *cand;
6433 comp_cost orig_cost, acost;
6434 struct iv_ca_delta *act_delta, *tmp_delta;
6435 struct cost_pair *old_cp, *best_cp = NULL;
6436
6437 *delta = NULL;
6438 orig_cost = iv_ca_cost (ivs);
6439
6440 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6441 {
6442 if (ivs->n_cand_uses[i] == 1
6443 || ivs->n_cand_uses[i] > ALWAYS_PRUNE_CAND_SET_BOUND)
6444 continue;
6445
6446 cand = data->vcands[i];
6447
6448 act_delta = NULL;
6449 /* Represent uses under current candidate using other ones with
6450 lower local cost. */
6451 for (j = 0; j < ivs->upto; j++)
6452 {
6453 struct iv_group *group = data->vgroups[j];
6454 old_cp = iv_ca_cand_for_group (ivs, group);
6455
6456 if (old_cp->cand != cand)
6457 continue;
6458
6459 best_cp = old_cp;
6460 if (data->consider_all_candidates)
6461 for (k = 0; k < data->vcands.length (); k++)
6462 best_cp = cheaper_cost_with_cand (data, group, k,
6463 old_cp->cand, best_cp);
6464 else
6465 EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, k, bj)
6466 best_cp = cheaper_cost_with_cand (data, group, k,
6467 old_cp->cand, best_cp);
6468
6469 if (best_cp == old_cp)
6470 continue;
6471
6472 act_delta = iv_ca_delta_add (group, old_cp, best_cp, act_delta);
6473 }
6474 /* No need for further prune. */
6475 if (!act_delta)
6476 continue;
6477
6478 /* Prune the new candidate set. */
6479 iv_ca_delta_commit (data, ivs, act_delta, true);
6480 acost = iv_ca_prune (data, ivs, NULL, &tmp_delta);
6481 iv_ca_delta_commit (data, ivs, act_delta, false);
6482 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6483
6484 if (acost < orig_cost)
6485 {
6486 *delta = act_delta;
6487 return acost;
6488 }
6489 else
6490 iv_ca_delta_free (&act_delta);
6491 }
6492
6493 return orig_cost;
6494 }
6495
6496 /* Tries to extend the sets IVS in the best possible way in order to
6497 express the GROUP. If ORIGINALP is true, prefer candidates from
6498 the original set of IVs, otherwise favor important candidates not
6499 based on any memory object. */
6500
6501 static bool
6502 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
6503 struct iv_group *group, bool originalp)
6504 {
6505 comp_cost best_cost, act_cost;
6506 unsigned i;
6507 bitmap_iterator bi;
6508 struct iv_cand *cand;
6509 struct iv_ca_delta *best_delta = NULL, *act_delta;
6510 struct cost_pair *cp;
6511
6512 iv_ca_add_group (data, ivs, group);
6513 best_cost = iv_ca_cost (ivs);
6514 cp = iv_ca_cand_for_group (ivs, group);
6515 if (cp)
6516 {
6517 best_delta = iv_ca_delta_add (group, NULL, cp, NULL);
6518 iv_ca_set_no_cp (data, ivs, group);
6519 }
6520
6521 /* If ORIGINALP is true, try to find the original IV for the use. Otherwise
6522 first try important candidates not based on any memory object. Only if
6523 this fails, try the specific ones. Rationale -- in loops with many
6524 variables the best choice often is to use just one generic biv. If we
6525 added here many ivs specific to the uses, the optimization algorithm later
6526 would be likely to get stuck in a local minimum, thus causing us to create
6527 too many ivs. The approach from few ivs to more seems more likely to be
6528 successful -- starting from few ivs, replacing an expensive use by a
6529 specific iv should always be a win. */
6530 EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, i, bi)
6531 {
6532 cand = data->vcands[i];
6533
6534 if (originalp && cand->pos !=IP_ORIGINAL)
6535 continue;
6536
6537 if (!originalp && cand->iv->base_object != NULL_TREE)
6538 continue;
6539
6540 if (iv_ca_cand_used_p (ivs, cand))
6541 continue;
6542
6543 cp = get_group_iv_cost (data, group, cand);
6544 if (!cp)
6545 continue;
6546
6547 iv_ca_set_cp (data, ivs, group, cp);
6548 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
6549 true);
6550 iv_ca_set_no_cp (data, ivs, group);
6551 act_delta = iv_ca_delta_add (group, NULL, cp, act_delta);
6552
6553 if (act_cost < best_cost)
6554 {
6555 best_cost = act_cost;
6556
6557 iv_ca_delta_free (&best_delta);
6558 best_delta = act_delta;
6559 }
6560 else
6561 iv_ca_delta_free (&act_delta);
6562 }
6563
6564 if (best_cost.infinite_cost_p ())
6565 {
6566 for (i = 0; i < group->n_map_members; i++)
6567 {
6568 cp = group->cost_map + i;
6569 cand = cp->cand;
6570 if (!cand)
6571 continue;
6572
6573 /* Already tried this. */
6574 if (cand->important)
6575 {
6576 if (originalp && cand->pos == IP_ORIGINAL)
6577 continue;
6578 if (!originalp && cand->iv->base_object == NULL_TREE)
6579 continue;
6580 }
6581
6582 if (iv_ca_cand_used_p (ivs, cand))
6583 continue;
6584
6585 act_delta = NULL;
6586 iv_ca_set_cp (data, ivs, group, cp);
6587 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
6588 iv_ca_set_no_cp (data, ivs, group);
6589 act_delta = iv_ca_delta_add (group,
6590 iv_ca_cand_for_group (ivs, group),
6591 cp, act_delta);
6592
6593 if (act_cost < best_cost)
6594 {
6595 best_cost = act_cost;
6596
6597 if (best_delta)
6598 iv_ca_delta_free (&best_delta);
6599 best_delta = act_delta;
6600 }
6601 else
6602 iv_ca_delta_free (&act_delta);
6603 }
6604 }
6605
6606 iv_ca_delta_commit (data, ivs, best_delta, true);
6607 iv_ca_delta_free (&best_delta);
6608
6609 return !best_cost.infinite_cost_p ();
6610 }
6611
6612 /* Finds an initial assignment of candidates to uses. */
6613
6614 static struct iv_ca *
6615 get_initial_solution (struct ivopts_data *data, bool originalp)
6616 {
6617 unsigned i;
6618 struct iv_ca *ivs = iv_ca_new (data);
6619
6620 for (i = 0; i < data->vgroups.length (); i++)
6621 if (!try_add_cand_for (data, ivs, data->vgroups[i], originalp))
6622 {
6623 iv_ca_free (&ivs);
6624 return NULL;
6625 }
6626
6627 return ivs;
6628 }
6629
6630 /* Tries to improve set of induction variables IVS. TRY_REPLACE_P
6631 points to a bool variable, this function tries to break local
6632 optimal fixed-point by replacing candidates in IVS if it's true. */
6633
6634 static bool
6635 try_improve_iv_set (struct ivopts_data *data,
6636 struct iv_ca *ivs, bool *try_replace_p)
6637 {
6638 unsigned i, n_ivs;
6639 comp_cost acost, best_cost = iv_ca_cost (ivs);
6640 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
6641 struct iv_cand *cand;
6642
6643 /* Try extending the set of induction variables by one. */
6644 for (i = 0; i < data->vcands.length (); i++)
6645 {
6646 cand = data->vcands[i];
6647
6648 if (iv_ca_cand_used_p (ivs, cand))
6649 continue;
6650
6651 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
6652 if (!act_delta)
6653 continue;
6654
6655 /* If we successfully added the candidate and the set is small enough,
6656 try optimizing it by removing other candidates. */
6657 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
6658 {
6659 iv_ca_delta_commit (data, ivs, act_delta, true);
6660 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
6661 iv_ca_delta_commit (data, ivs, act_delta, false);
6662 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6663 }
6664
6665 if (acost < best_cost)
6666 {
6667 best_cost = acost;
6668 iv_ca_delta_free (&best_delta);
6669 best_delta = act_delta;
6670 }
6671 else
6672 iv_ca_delta_free (&act_delta);
6673 }
6674
6675 if (!best_delta)
6676 {
6677 /* Try removing the candidates from the set instead. */
6678 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
6679
6680 if (!best_delta && *try_replace_p)
6681 {
6682 *try_replace_p = false;
6683 /* So far candidate selecting algorithm tends to choose fewer IVs
6684 so that it can handle cases in which loops have many variables
6685 but the best choice is often to use only one general biv. One
6686 weakness is it can't handle opposite cases, in which different
6687 candidates should be chosen with respect to each use. To solve
6688 the problem, we replace candidates in a manner described by the
6689 comments of iv_ca_replace, thus give general algorithm a chance
6690 to break local optimal fixed-point in these cases. */
6691 best_cost = iv_ca_replace (data, ivs, &best_delta);
6692 }
6693
6694 if (!best_delta)
6695 return false;
6696 }
6697
6698 iv_ca_delta_commit (data, ivs, best_delta, true);
6699 iv_ca_delta_free (&best_delta);
6700 return best_cost == iv_ca_cost (ivs);
6701 }
6702
6703 /* Attempts to find the optimal set of induction variables. We do simple
6704 greedy heuristic -- we try to replace at most one candidate in the selected
6705 solution and remove the unused ivs while this improves the cost. */
6706
6707 static struct iv_ca *
6708 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
6709 {
6710 struct iv_ca *set;
6711 bool try_replace_p = true;
6712
6713 /* Get the initial solution. */
6714 set = get_initial_solution (data, originalp);
6715 if (!set)
6716 {
6717 if (dump_file && (dump_flags & TDF_DETAILS))
6718 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
6719 return NULL;
6720 }
6721
6722 if (dump_file && (dump_flags & TDF_DETAILS))
6723 {
6724 fprintf (dump_file, "Initial set of candidates:\n");
6725 iv_ca_dump (data, dump_file, set);
6726 }
6727
6728 while (try_improve_iv_set (data, set, &try_replace_p))
6729 {
6730 if (dump_file && (dump_flags & TDF_DETAILS))
6731 {
6732 fprintf (dump_file, "Improved to:\n");
6733 iv_ca_dump (data, dump_file, set);
6734 }
6735 }
6736
6737 /* If the set has infinite_cost, it can't be optimal. */
6738 if (iv_ca_cost (set).infinite_cost_p ())
6739 {
6740 if (dump_file && (dump_flags & TDF_DETAILS))
6741 fprintf (dump_file,
6742 "Overflow to infinite cost in try_improve_iv_set.\n");
6743 iv_ca_free (&set);
6744 }
6745 return set;
6746 }
6747
6748 static struct iv_ca *
6749 find_optimal_iv_set (struct ivopts_data *data)
6750 {
6751 unsigned i;
6752 comp_cost cost, origcost;
6753 struct iv_ca *set, *origset;
6754
6755 /* Determine the cost based on a strategy that starts with original IVs,
6756 and try again using a strategy that prefers candidates not based
6757 on any IVs. */
6758 origset = find_optimal_iv_set_1 (data, true);
6759 set = find_optimal_iv_set_1 (data, false);
6760
6761 if (!origset && !set)
6762 return NULL;
6763
6764 origcost = origset ? iv_ca_cost (origset) : infinite_cost;
6765 cost = set ? iv_ca_cost (set) : infinite_cost;
6766
6767 if (dump_file && (dump_flags & TDF_DETAILS))
6768 {
6769 fprintf (dump_file, "Original cost %" PRId64 " (complexity %d)\n\n",
6770 origcost.cost, origcost.complexity);
6771 fprintf (dump_file, "Final cost %" PRId64 " (complexity %d)\n\n",
6772 cost.cost, cost.complexity);
6773 }
6774
6775 /* Choose the one with the best cost. */
6776 if (origcost <= cost)
6777 {
6778 if (set)
6779 iv_ca_free (&set);
6780 set = origset;
6781 }
6782 else if (origset)
6783 iv_ca_free (&origset);
6784
6785 for (i = 0; i < data->vgroups.length (); i++)
6786 {
6787 struct iv_group *group = data->vgroups[i];
6788 group->selected = iv_ca_cand_for_group (set, group)->cand;
6789 }
6790
6791 return set;
6792 }
6793
6794 /* Creates a new induction variable corresponding to CAND. */
6795
6796 static void
6797 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
6798 {
6799 gimple_stmt_iterator incr_pos;
6800 tree base;
6801 struct iv_use *use;
6802 struct iv_group *group;
6803 bool after = false;
6804
6805 gcc_assert (cand->iv != NULL);
6806
6807 switch (cand->pos)
6808 {
6809 case IP_NORMAL:
6810 incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
6811 break;
6812
6813 case IP_END:
6814 incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
6815 after = true;
6816 break;
6817
6818 case IP_AFTER_USE:
6819 after = true;
6820 /* fall through */
6821 case IP_BEFORE_USE:
6822 incr_pos = gsi_for_stmt (cand->incremented_at);
6823 break;
6824
6825 case IP_ORIGINAL:
6826 /* Mark that the iv is preserved. */
6827 name_info (data, cand->var_before)->preserve_biv = true;
6828 name_info (data, cand->var_after)->preserve_biv = true;
6829
6830 /* Rewrite the increment so that it uses var_before directly. */
6831 use = find_interesting_uses_op (data, cand->var_after);
6832 group = data->vgroups[use->group_id];
6833 group->selected = cand;
6834 return;
6835 }
6836
6837 gimple_add_tmp_var (cand->var_before);
6838
6839 base = unshare_expr (cand->iv->base);
6840
6841 create_iv (base, unshare_expr (cand->iv->step),
6842 cand->var_before, data->current_loop,
6843 &incr_pos, after, &cand->var_before, &cand->var_after);
6844 }
6845
6846 /* Creates new induction variables described in SET. */
6847
6848 static void
6849 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
6850 {
6851 unsigned i;
6852 struct iv_cand *cand;
6853 bitmap_iterator bi;
6854
6855 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6856 {
6857 cand = data->vcands[i];
6858 create_new_iv (data, cand);
6859 }
6860
6861 if (dump_file && (dump_flags & TDF_DETAILS))
6862 {
6863 fprintf (dump_file, "Selected IV set for loop %d",
6864 data->current_loop->num);
6865 if (data->loop_loc != UNKNOWN_LOCATION)
6866 fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
6867 LOCATION_LINE (data->loop_loc));
6868 fprintf (dump_file, ", " HOST_WIDE_INT_PRINT_DEC " avg niters",
6869 avg_loop_niter (data->current_loop));
6870 fprintf (dump_file, ", %lu IVs:\n", bitmap_count_bits (set->cands));
6871 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6872 {
6873 cand = data->vcands[i];
6874 dump_cand (dump_file, cand);
6875 }
6876 fprintf (dump_file, "\n");
6877 }
6878 }
6879
6880 /* Rewrites USE (definition of iv used in a nonlinear expression)
6881 using candidate CAND. */
6882
6883 static void
6884 rewrite_use_nonlinear_expr (struct ivopts_data *data,
6885 struct iv_use *use, struct iv_cand *cand)
6886 {
6887 gassign *ass;
6888 gimple_stmt_iterator bsi;
6889 tree comp, type = get_use_type (use), tgt;
6890
6891 /* An important special case -- if we are asked to express value of
6892 the original iv by itself, just exit; there is no need to
6893 introduce a new computation (that might also need casting the
6894 variable to unsigned and back). */
6895 if (cand->pos == IP_ORIGINAL
6896 && cand->incremented_at == use->stmt)
6897 {
6898 tree op = NULL_TREE;
6899 enum tree_code stmt_code;
6900
6901 gcc_assert (is_gimple_assign (use->stmt));
6902 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
6903
6904 /* Check whether we may leave the computation unchanged.
6905 This is the case only if it does not rely on other
6906 computations in the loop -- otherwise, the computation
6907 we rely upon may be removed in remove_unused_ivs,
6908 thus leading to ICE. */
6909 stmt_code = gimple_assign_rhs_code (use->stmt);
6910 if (stmt_code == PLUS_EXPR
6911 || stmt_code == MINUS_EXPR
6912 || stmt_code == POINTER_PLUS_EXPR)
6913 {
6914 if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
6915 op = gimple_assign_rhs2 (use->stmt);
6916 else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
6917 op = gimple_assign_rhs1 (use->stmt);
6918 }
6919
6920 if (op != NULL_TREE)
6921 {
6922 if (expr_invariant_in_loop_p (data->current_loop, op))
6923 return;
6924 if (TREE_CODE (op) == SSA_NAME)
6925 {
6926 struct iv *iv = get_iv (data, op);
6927 if (iv != NULL && integer_zerop (iv->step))
6928 return;
6929 }
6930 }
6931 }
6932
6933 switch (gimple_code (use->stmt))
6934 {
6935 case GIMPLE_PHI:
6936 tgt = PHI_RESULT (use->stmt);
6937
6938 /* If we should keep the biv, do not replace it. */
6939 if (name_info (data, tgt)->preserve_biv)
6940 return;
6941
6942 bsi = gsi_after_labels (gimple_bb (use->stmt));
6943 break;
6944
6945 case GIMPLE_ASSIGN:
6946 tgt = gimple_assign_lhs (use->stmt);
6947 bsi = gsi_for_stmt (use->stmt);
6948 break;
6949
6950 default:
6951 gcc_unreachable ();
6952 }
6953
6954 aff_tree aff_inv, aff_var;
6955 if (!get_computation_aff_1 (data->current_loop, use->stmt,
6956 use, cand, &aff_inv, &aff_var))
6957 gcc_unreachable ();
6958
6959 unshare_aff_combination (&aff_inv);
6960 unshare_aff_combination (&aff_var);
6961 /* Prefer CSE opportunity than loop invariant by adding offset at last
6962 so that iv_uses have different offsets can be CSEed. */
6963 poly_widest_int offset = aff_inv.offset;
6964 aff_inv.offset = 0;
6965
6966 gimple_seq stmt_list = NULL, seq = NULL;
6967 tree comp_op1 = aff_combination_to_tree (&aff_inv);
6968 tree comp_op2 = aff_combination_to_tree (&aff_var);
6969 gcc_assert (comp_op1 && comp_op2);
6970
6971 comp_op1 = force_gimple_operand (comp_op1, &seq, true, NULL);
6972 gimple_seq_add_seq (&stmt_list, seq);
6973 comp_op2 = force_gimple_operand (comp_op2, &seq, true, NULL);
6974 gimple_seq_add_seq (&stmt_list, seq);
6975
6976 if (POINTER_TYPE_P (TREE_TYPE (comp_op2)))
6977 std::swap (comp_op1, comp_op2);
6978
6979 if (POINTER_TYPE_P (TREE_TYPE (comp_op1)))
6980 {
6981 comp = fold_build_pointer_plus (comp_op1,
6982 fold_convert (sizetype, comp_op2));
6983 comp = fold_build_pointer_plus (comp,
6984 wide_int_to_tree (sizetype, offset));
6985 }
6986 else
6987 {
6988 comp = fold_build2 (PLUS_EXPR, TREE_TYPE (comp_op1), comp_op1,
6989 fold_convert (TREE_TYPE (comp_op1), comp_op2));
6990 comp = fold_build2 (PLUS_EXPR, TREE_TYPE (comp_op1), comp,
6991 wide_int_to_tree (TREE_TYPE (comp_op1), offset));
6992 }
6993
6994 comp = fold_convert (type, comp);
6995 if (!valid_gimple_rhs_p (comp)
6996 || (gimple_code (use->stmt) != GIMPLE_PHI
6997 /* We can't allow re-allocating the stmt as it might be pointed
6998 to still. */
6999 && (get_gimple_rhs_num_ops (TREE_CODE (comp))
7000 >= gimple_num_ops (gsi_stmt (bsi)))))
7001 {
7002 comp = force_gimple_operand (comp, &seq, true, NULL);
7003 gimple_seq_add_seq (&stmt_list, seq);
7004 if (POINTER_TYPE_P (TREE_TYPE (tgt)))
7005 {
7006 duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
7007 /* As this isn't a plain copy we have to reset alignment
7008 information. */
7009 if (SSA_NAME_PTR_INFO (comp))
7010 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
7011 }
7012 }
7013
7014 gsi_insert_seq_before (&bsi, stmt_list, GSI_SAME_STMT);
7015 if (gimple_code (use->stmt) == GIMPLE_PHI)
7016 {
7017 ass = gimple_build_assign (tgt, comp);
7018 gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
7019
7020 bsi = gsi_for_stmt (use->stmt);
7021 remove_phi_node (&bsi, false);
7022 }
7023 else
7024 {
7025 gimple_assign_set_rhs_from_tree (&bsi, comp);
7026 use->stmt = gsi_stmt (bsi);
7027 }
7028 }
7029
7030 /* Performs a peephole optimization to reorder the iv update statement with
7031 a mem ref to enable instruction combining in later phases. The mem ref uses
7032 the iv value before the update, so the reordering transformation requires
7033 adjustment of the offset. CAND is the selected IV_CAND.
7034
7035 Example:
7036
7037 t = MEM_REF (base, iv1, 8, 16); // base, index, stride, offset
7038 iv2 = iv1 + 1;
7039
7040 if (t < val) (1)
7041 goto L;
7042 goto Head;
7043
7044
7045 directly propagating t over to (1) will introduce overlapping live range
7046 thus increase register pressure. This peephole transform it into:
7047
7048
7049 iv2 = iv1 + 1;
7050 t = MEM_REF (base, iv2, 8, 8);
7051 if (t < val)
7052 goto L;
7053 goto Head;
7054 */
7055
7056 static void
7057 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
7058 {
7059 tree var_after;
7060 gimple *iv_update, *stmt;
7061 basic_block bb;
7062 gimple_stmt_iterator gsi, gsi_iv;
7063
7064 if (cand->pos != IP_NORMAL)
7065 return;
7066
7067 var_after = cand->var_after;
7068 iv_update = SSA_NAME_DEF_STMT (var_after);
7069
7070 bb = gimple_bb (iv_update);
7071 gsi = gsi_last_nondebug_bb (bb);
7072 stmt = gsi_stmt (gsi);
7073
7074 /* Only handle conditional statement for now. */
7075 if (gimple_code (stmt) != GIMPLE_COND)
7076 return;
7077
7078 gsi_prev_nondebug (&gsi);
7079 stmt = gsi_stmt (gsi);
7080 if (stmt != iv_update)
7081 return;
7082
7083 gsi_prev_nondebug (&gsi);
7084 if (gsi_end_p (gsi))
7085 return;
7086
7087 stmt = gsi_stmt (gsi);
7088 if (gimple_code (stmt) != GIMPLE_ASSIGN)
7089 return;
7090
7091 if (stmt != use->stmt)
7092 return;
7093
7094 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
7095 return;
7096
7097 if (dump_file && (dump_flags & TDF_DETAILS))
7098 {
7099 fprintf (dump_file, "Reordering \n");
7100 print_gimple_stmt (dump_file, iv_update, 0);
7101 print_gimple_stmt (dump_file, use->stmt, 0);
7102 fprintf (dump_file, "\n");
7103 }
7104
7105 gsi = gsi_for_stmt (use->stmt);
7106 gsi_iv = gsi_for_stmt (iv_update);
7107 gsi_move_before (&gsi_iv, &gsi);
7108
7109 cand->pos = IP_BEFORE_USE;
7110 cand->incremented_at = use->stmt;
7111 }
7112
7113 /* Return the alias pointer type that should be used for a MEM_REF
7114 associated with USE, which has type USE_PTR_ADDRESS. */
7115
7116 static tree
7117 get_alias_ptr_type_for_ptr_address (iv_use *use)
7118 {
7119 gcall *call = as_a <gcall *> (use->stmt);
7120 switch (gimple_call_internal_fn (call))
7121 {
7122 case IFN_MASK_LOAD:
7123 case IFN_MASK_STORE:
7124 case IFN_MASK_LOAD_LANES:
7125 case IFN_MASK_STORE_LANES:
7126 /* The second argument contains the correct alias type. */
7127 gcc_assert (use->op_p = gimple_call_arg_ptr (call, 0));
7128 return TREE_TYPE (gimple_call_arg (call, 1));
7129
7130 default:
7131 gcc_unreachable ();
7132 }
7133 }
7134
7135
7136 /* Rewrites USE (address that is an iv) using candidate CAND. */
7137
7138 static void
7139 rewrite_use_address (struct ivopts_data *data,
7140 struct iv_use *use, struct iv_cand *cand)
7141 {
7142 aff_tree aff;
7143 bool ok;
7144
7145 adjust_iv_update_pos (cand, use);
7146 ok = get_computation_aff (data->current_loop, use->stmt, use, cand, &aff);
7147 gcc_assert (ok);
7148 unshare_aff_combination (&aff);
7149
7150 /* To avoid undefined overflow problems, all IV candidates use unsigned
7151 integer types. The drawback is that this makes it impossible for
7152 create_mem_ref to distinguish an IV that is based on a memory object
7153 from one that represents simply an offset.
7154
7155 To work around this problem, we pass a hint to create_mem_ref that
7156 indicates which variable (if any) in aff is an IV based on a memory
7157 object. Note that we only consider the candidate. If this is not
7158 based on an object, the base of the reference is in some subexpression
7159 of the use -- but these will use pointer types, so they are recognized
7160 by the create_mem_ref heuristics anyway. */
7161 tree iv = var_at_stmt (data->current_loop, cand, use->stmt);
7162 tree base_hint = (cand->iv->base_object) ? iv : NULL_TREE;
7163 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7164 tree type = use->mem_type;
7165 tree alias_ptr_type;
7166 if (use->type == USE_PTR_ADDRESS)
7167 alias_ptr_type = get_alias_ptr_type_for_ptr_address (use);
7168 else
7169 {
7170 gcc_assert (type == TREE_TYPE (*use->op_p));
7171 unsigned int align = get_object_alignment (*use->op_p);
7172 if (align != TYPE_ALIGN (type))
7173 type = build_aligned_type (type, align);
7174 alias_ptr_type = reference_alias_ptr_type (*use->op_p);
7175 }
7176 tree ref = create_mem_ref (&bsi, type, &aff, alias_ptr_type,
7177 iv, base_hint, data->speed);
7178
7179 if (use->type == USE_PTR_ADDRESS)
7180 {
7181 ref = fold_build1 (ADDR_EXPR, build_pointer_type (use->mem_type), ref);
7182 ref = fold_convert (get_use_type (use), ref);
7183 ref = force_gimple_operand_gsi (&bsi, ref, true, NULL_TREE,
7184 true, GSI_SAME_STMT);
7185 }
7186 else
7187 copy_ref_info (ref, *use->op_p);
7188
7189 *use->op_p = ref;
7190 }
7191
7192 /* Rewrites USE (the condition such that one of the arguments is an iv) using
7193 candidate CAND. */
7194
7195 static void
7196 rewrite_use_compare (struct ivopts_data *data,
7197 struct iv_use *use, struct iv_cand *cand)
7198 {
7199 tree comp, op, bound;
7200 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7201 enum tree_code compare;
7202 struct iv_group *group = data->vgroups[use->group_id];
7203 struct cost_pair *cp = get_group_iv_cost (data, group, cand);
7204
7205 bound = cp->value;
7206 if (bound)
7207 {
7208 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
7209 tree var_type = TREE_TYPE (var);
7210 gimple_seq stmts;
7211
7212 if (dump_file && (dump_flags & TDF_DETAILS))
7213 {
7214 fprintf (dump_file, "Replacing exit test: ");
7215 print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
7216 }
7217 compare = cp->comp;
7218 bound = unshare_expr (fold_convert (var_type, bound));
7219 op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
7220 if (stmts)
7221 gsi_insert_seq_on_edge_immediate (
7222 loop_preheader_edge (data->current_loop),
7223 stmts);
7224
7225 gcond *cond_stmt = as_a <gcond *> (use->stmt);
7226 gimple_cond_set_lhs (cond_stmt, var);
7227 gimple_cond_set_code (cond_stmt, compare);
7228 gimple_cond_set_rhs (cond_stmt, op);
7229 return;
7230 }
7231
7232 /* The induction variable elimination failed; just express the original
7233 giv. */
7234 comp = get_computation_at (data->current_loop, use->stmt, use, cand);
7235 gcc_assert (comp != NULL_TREE);
7236 gcc_assert (use->op_p != NULL);
7237 *use->op_p = force_gimple_operand_gsi (&bsi, comp, true,
7238 SSA_NAME_VAR (*use->op_p),
7239 true, GSI_SAME_STMT);
7240 }
7241
7242 /* Rewrite the groups using the selected induction variables. */
7243
7244 static void
7245 rewrite_groups (struct ivopts_data *data)
7246 {
7247 unsigned i, j;
7248
7249 for (i = 0; i < data->vgroups.length (); i++)
7250 {
7251 struct iv_group *group = data->vgroups[i];
7252 struct iv_cand *cand = group->selected;
7253
7254 gcc_assert (cand);
7255
7256 if (group->type == USE_NONLINEAR_EXPR)
7257 {
7258 for (j = 0; j < group->vuses.length (); j++)
7259 {
7260 rewrite_use_nonlinear_expr (data, group->vuses[j], cand);
7261 update_stmt (group->vuses[j]->stmt);
7262 }
7263 }
7264 else if (address_p (group->type))
7265 {
7266 for (j = 0; j < group->vuses.length (); j++)
7267 {
7268 rewrite_use_address (data, group->vuses[j], cand);
7269 update_stmt (group->vuses[j]->stmt);
7270 }
7271 }
7272 else
7273 {
7274 gcc_assert (group->type == USE_COMPARE);
7275
7276 for (j = 0; j < group->vuses.length (); j++)
7277 {
7278 rewrite_use_compare (data, group->vuses[j], cand);
7279 update_stmt (group->vuses[j]->stmt);
7280 }
7281 }
7282 }
7283 }
7284
7285 /* Removes the ivs that are not used after rewriting. */
7286
7287 static void
7288 remove_unused_ivs (struct ivopts_data *data, bitmap toremove)
7289 {
7290 unsigned j;
7291 bitmap_iterator bi;
7292
7293 /* Figure out an order in which to release SSA DEFs so that we don't
7294 release something that we'd have to propagate into a debug stmt
7295 afterwards. */
7296 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
7297 {
7298 struct version_info *info;
7299
7300 info = ver_info (data, j);
7301 if (info->iv
7302 && !integer_zerop (info->iv->step)
7303 && !info->inv_id
7304 && !info->iv->nonlin_use
7305 && !info->preserve_biv)
7306 {
7307 bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
7308
7309 tree def = info->iv->ssa_name;
7310
7311 if (MAY_HAVE_DEBUG_BIND_STMTS && SSA_NAME_DEF_STMT (def))
7312 {
7313 imm_use_iterator imm_iter;
7314 use_operand_p use_p;
7315 gimple *stmt;
7316 int count = 0;
7317
7318 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7319 {
7320 if (!gimple_debug_bind_p (stmt))
7321 continue;
7322
7323 /* We just want to determine whether to do nothing
7324 (count == 0), to substitute the computed
7325 expression into a single use of the SSA DEF by
7326 itself (count == 1), or to use a debug temp
7327 because the SSA DEF is used multiple times or as
7328 part of a larger expression (count > 1). */
7329 count++;
7330 if (gimple_debug_bind_get_value (stmt) != def)
7331 count++;
7332
7333 if (count > 1)
7334 BREAK_FROM_IMM_USE_STMT (imm_iter);
7335 }
7336
7337 if (!count)
7338 continue;
7339
7340 struct iv_use dummy_use;
7341 struct iv_cand *best_cand = NULL, *cand;
7342 unsigned i, best_pref = 0, cand_pref;
7343
7344 memset (&dummy_use, 0, sizeof (dummy_use));
7345 dummy_use.iv = info->iv;
7346 for (i = 0; i < data->vgroups.length () && i < 64; i++)
7347 {
7348 cand = data->vgroups[i]->selected;
7349 if (cand == best_cand)
7350 continue;
7351 cand_pref = operand_equal_p (cand->iv->step,
7352 info->iv->step, 0)
7353 ? 4 : 0;
7354 cand_pref
7355 += TYPE_MODE (TREE_TYPE (cand->iv->base))
7356 == TYPE_MODE (TREE_TYPE (info->iv->base))
7357 ? 2 : 0;
7358 cand_pref
7359 += TREE_CODE (cand->iv->base) == INTEGER_CST
7360 ? 1 : 0;
7361 if (best_cand == NULL || best_pref < cand_pref)
7362 {
7363 best_cand = cand;
7364 best_pref = cand_pref;
7365 }
7366 }
7367
7368 if (!best_cand)
7369 continue;
7370
7371 tree comp = get_computation_at (data->current_loop,
7372 SSA_NAME_DEF_STMT (def),
7373 &dummy_use, best_cand);
7374 if (!comp)
7375 continue;
7376
7377 if (count > 1)
7378 {
7379 tree vexpr = make_node (DEBUG_EXPR_DECL);
7380 DECL_ARTIFICIAL (vexpr) = 1;
7381 TREE_TYPE (vexpr) = TREE_TYPE (comp);
7382 if (SSA_NAME_VAR (def))
7383 SET_DECL_MODE (vexpr, DECL_MODE (SSA_NAME_VAR (def)));
7384 else
7385 SET_DECL_MODE (vexpr, TYPE_MODE (TREE_TYPE (vexpr)));
7386 gdebug *def_temp
7387 = gimple_build_debug_bind (vexpr, comp, NULL);
7388 gimple_stmt_iterator gsi;
7389
7390 if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
7391 gsi = gsi_after_labels (gimple_bb
7392 (SSA_NAME_DEF_STMT (def)));
7393 else
7394 gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
7395
7396 gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
7397 comp = vexpr;
7398 }
7399
7400 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7401 {
7402 if (!gimple_debug_bind_p (stmt))
7403 continue;
7404
7405 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
7406 SET_USE (use_p, comp);
7407
7408 update_stmt (stmt);
7409 }
7410 }
7411 }
7412 }
7413 }
7414
7415 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
7416 for hash_map::traverse. */
7417
7418 bool
7419 free_tree_niter_desc (edge const &, tree_niter_desc *const &value, void *)
7420 {
7421 free (value);
7422 return true;
7423 }
7424
7425 /* Frees data allocated by the optimization of a single loop. */
7426
7427 static void
7428 free_loop_data (struct ivopts_data *data)
7429 {
7430 unsigned i, j;
7431 bitmap_iterator bi;
7432 tree obj;
7433
7434 if (data->niters)
7435 {
7436 data->niters->traverse<void *, free_tree_niter_desc> (NULL);
7437 delete data->niters;
7438 data->niters = NULL;
7439 }
7440
7441 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
7442 {
7443 struct version_info *info;
7444
7445 info = ver_info (data, i);
7446 info->iv = NULL;
7447 info->has_nonlin_use = false;
7448 info->preserve_biv = false;
7449 info->inv_id = 0;
7450 }
7451 bitmap_clear (data->relevant);
7452 bitmap_clear (data->important_candidates);
7453
7454 for (i = 0; i < data->vgroups.length (); i++)
7455 {
7456 struct iv_group *group = data->vgroups[i];
7457
7458 for (j = 0; j < group->vuses.length (); j++)
7459 free (group->vuses[j]);
7460 group->vuses.release ();
7461
7462 BITMAP_FREE (group->related_cands);
7463 for (j = 0; j < group->n_map_members; j++)
7464 {
7465 if (group->cost_map[j].inv_vars)
7466 BITMAP_FREE (group->cost_map[j].inv_vars);
7467 if (group->cost_map[j].inv_exprs)
7468 BITMAP_FREE (group->cost_map[j].inv_exprs);
7469 }
7470
7471 free (group->cost_map);
7472 free (group);
7473 }
7474 data->vgroups.truncate (0);
7475
7476 for (i = 0; i < data->vcands.length (); i++)
7477 {
7478 struct iv_cand *cand = data->vcands[i];
7479
7480 if (cand->inv_vars)
7481 BITMAP_FREE (cand->inv_vars);
7482 if (cand->inv_exprs)
7483 BITMAP_FREE (cand->inv_exprs);
7484 free (cand);
7485 }
7486 data->vcands.truncate (0);
7487
7488 if (data->version_info_size < num_ssa_names)
7489 {
7490 data->version_info_size = 2 * num_ssa_names;
7491 free (data->version_info);
7492 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
7493 }
7494
7495 data->max_inv_var_id = 0;
7496 data->max_inv_expr_id = 0;
7497
7498 FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
7499 SET_DECL_RTL (obj, NULL_RTX);
7500
7501 decl_rtl_to_reset.truncate (0);
7502
7503 data->inv_expr_tab->empty ();
7504
7505 data->iv_common_cand_tab->empty ();
7506 data->iv_common_cands.truncate (0);
7507 }
7508
7509 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
7510 loop tree. */
7511
7512 static void
7513 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
7514 {
7515 free_loop_data (data);
7516 free (data->version_info);
7517 BITMAP_FREE (data->relevant);
7518 BITMAP_FREE (data->important_candidates);
7519
7520 decl_rtl_to_reset.release ();
7521 data->vgroups.release ();
7522 data->vcands.release ();
7523 delete data->inv_expr_tab;
7524 data->inv_expr_tab = NULL;
7525 free_affine_expand_cache (&data->name_expansion_cache);
7526 delete data->iv_common_cand_tab;
7527 data->iv_common_cand_tab = NULL;
7528 data->iv_common_cands.release ();
7529 obstack_free (&data->iv_obstack, NULL);
7530 }
7531
7532 /* Returns true if the loop body BODY includes any function calls. */
7533
7534 static bool
7535 loop_body_includes_call (basic_block *body, unsigned num_nodes)
7536 {
7537 gimple_stmt_iterator gsi;
7538 unsigned i;
7539
7540 for (i = 0; i < num_nodes; i++)
7541 for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
7542 {
7543 gimple *stmt = gsi_stmt (gsi);
7544 if (is_gimple_call (stmt)
7545 && !gimple_call_internal_p (stmt)
7546 && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
7547 return true;
7548 }
7549 return false;
7550 }
7551
7552 /* Determine cost scaling factor for basic blocks in loop. */
7553 #define COST_SCALING_FACTOR_BOUND (20)
7554
7555 static void
7556 determine_scaling_factor (struct ivopts_data *data, basic_block *body)
7557 {
7558 int lfreq = data->current_loop->header->count.to_frequency (cfun);
7559 if (!data->speed || lfreq <= 0)
7560 return;
7561
7562 int max_freq = lfreq;
7563 for (unsigned i = 0; i < data->current_loop->num_nodes; i++)
7564 {
7565 body[i]->aux = (void *)(intptr_t) 1;
7566 if (max_freq < body[i]->count.to_frequency (cfun))
7567 max_freq = body[i]->count.to_frequency (cfun);
7568 }
7569 if (max_freq > lfreq)
7570 {
7571 int divisor, factor;
7572 /* Check if scaling factor itself needs to be scaled by the bound. This
7573 is to avoid overflow when scaling cost according to profile info. */
7574 if (max_freq / lfreq > COST_SCALING_FACTOR_BOUND)
7575 {
7576 divisor = max_freq;
7577 factor = COST_SCALING_FACTOR_BOUND;
7578 }
7579 else
7580 {
7581 divisor = lfreq;
7582 factor = 1;
7583 }
7584 for (unsigned i = 0; i < data->current_loop->num_nodes; i++)
7585 {
7586 int bfreq = body[i]->count.to_frequency (cfun);
7587 if (bfreq <= lfreq)
7588 continue;
7589
7590 body[i]->aux = (void*)(intptr_t) (factor * bfreq / divisor);
7591 }
7592 }
7593 }
7594
7595 /* Optimizes the LOOP. Returns true if anything changed. */
7596
7597 static bool
7598 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop,
7599 bitmap toremove)
7600 {
7601 bool changed = false;
7602 struct iv_ca *iv_ca;
7603 edge exit = single_dom_exit (loop);
7604 basic_block *body;
7605
7606 gcc_assert (!data->niters);
7607 data->current_loop = loop;
7608 data->loop_loc = find_loop_location (loop).get_location_t ();
7609 data->speed = optimize_loop_for_speed_p (loop);
7610
7611 if (dump_file && (dump_flags & TDF_DETAILS))
7612 {
7613 fprintf (dump_file, "Processing loop %d", loop->num);
7614 if (data->loop_loc != UNKNOWN_LOCATION)
7615 fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
7616 LOCATION_LINE (data->loop_loc));
7617 fprintf (dump_file, "\n");
7618
7619 if (exit)
7620 {
7621 fprintf (dump_file, " single exit %d -> %d, exit condition ",
7622 exit->src->index, exit->dest->index);
7623 print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
7624 fprintf (dump_file, "\n");
7625 }
7626
7627 fprintf (dump_file, "\n");
7628 }
7629
7630 body = get_loop_body (loop);
7631 data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
7632 renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
7633
7634 data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
7635
7636 /* For each ssa name determines whether it behaves as an induction variable
7637 in some loop. */
7638 if (!find_induction_variables (data))
7639 goto finish;
7640
7641 /* Finds interesting uses (item 1). */
7642 find_interesting_uses (data);
7643 if (data->vgroups.length () > MAX_CONSIDERED_GROUPS)
7644 goto finish;
7645
7646 /* Determine cost scaling factor for basic blocks in loop. */
7647 determine_scaling_factor (data, body);
7648
7649 /* Finds candidates for the induction variables (item 2). */
7650 find_iv_candidates (data);
7651
7652 /* Calculates the costs (item 3, part 1). */
7653 determine_iv_costs (data);
7654 determine_group_iv_costs (data);
7655 determine_set_costs (data);
7656
7657 /* Find the optimal set of induction variables (item 3, part 2). */
7658 iv_ca = find_optimal_iv_set (data);
7659 /* Cleanup basic block aux field. */
7660 for (unsigned i = 0; i < data->current_loop->num_nodes; i++)
7661 body[i]->aux = NULL;
7662 if (!iv_ca)
7663 goto finish;
7664 changed = true;
7665
7666 /* Create the new induction variables (item 4, part 1). */
7667 create_new_ivs (data, iv_ca);
7668 iv_ca_free (&iv_ca);
7669
7670 /* Rewrite the uses (item 4, part 2). */
7671 rewrite_groups (data);
7672
7673 /* Remove the ivs that are unused after rewriting. */
7674 remove_unused_ivs (data, toremove);
7675
7676 finish:
7677 free (body);
7678 free_loop_data (data);
7679
7680 return changed;
7681 }
7682
7683 /* Main entry point. Optimizes induction variables in loops. */
7684
7685 void
7686 tree_ssa_iv_optimize (void)
7687 {
7688 struct loop *loop;
7689 struct ivopts_data data;
7690 auto_bitmap toremove;
7691
7692 tree_ssa_iv_optimize_init (&data);
7693
7694 /* Optimize the loops starting with the innermost ones. */
7695 FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
7696 {
7697 if (dump_file && (dump_flags & TDF_DETAILS))
7698 flow_loop_dump (loop, dump_file, NULL, 1);
7699
7700 tree_ssa_iv_optimize_loop (&data, loop, toremove);
7701 }
7702
7703 /* Remove eliminated IV defs. */
7704 release_defs_bitset (toremove);
7705
7706 /* We have changed the structure of induction variables; it might happen
7707 that definitions in the scev database refer to some of them that were
7708 eliminated. */
7709 scev_reset_htab ();
7710 /* Likewise niter and control-IV information. */
7711 free_numbers_of_iterations_estimates (cfun);
7712
7713 tree_ssa_iv_optimize_finalize (&data);
7714 }
7715
7716 #include "gt-tree-ssa-loop-ivopts.h"