]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/tree-ssa-loop-ivopts.c
tree-ssa-loop-ivopts.c (split_address_cost): Check depends_on.
[thirdparty/gcc.git] / gcc / tree-ssa-loop-ivopts.c
1 /* Induction variable optimizations.
2 Copyright (C) 2003-2015 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
10
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
19
20 /* This pass tries to find the optimal set of induction variables for the loop.
21 It optimizes just the basic linear induction variables (although adding
22 support for other types should not be too hard). It includes the
23 optimizations commonly known as strength reduction, induction variable
24 coalescing and induction variable elimination. It does it in the
25 following steps:
26
27 1) The interesting uses of induction variables are found. This includes
28
29 -- uses of induction variables in non-linear expressions
30 -- addresses of arrays
31 -- comparisons of induction variables
32
33 2) Candidates for the induction variables are found. This includes
34
35 -- old induction variables
36 -- the variables defined by expressions derived from the "interesting
37 uses" above
38
39 3) The optimal (w.r. to a cost function) set of variables is chosen. The
40 cost function assigns a cost to sets of induction variables and consists
41 of three parts:
42
43 -- The use costs. Each of the interesting uses chooses the best induction
44 variable in the set and adds its cost to the sum. The cost reflects
45 the time spent on modifying the induction variables value to be usable
46 for the given purpose (adding base and offset for arrays, etc.).
47 -- The variable costs. Each of the variables has a cost assigned that
48 reflects the costs associated with incrementing the value of the
49 variable. The original variables are somewhat preferred.
50 -- The set cost. Depending on the size of the set, extra cost may be
51 added to reflect register pressure.
52
53 All the costs are defined in a machine-specific way, using the target
54 hooks and machine descriptions to determine them.
55
56 4) The trees are transformed to use the new variables, the dead code is
57 removed.
58
59 All of this is done loop by loop. Doing it globally is theoretically
60 possible, it might give a better performance and it might enable us
61 to decide costs more precisely, but getting all the interactions right
62 would be complicated. */
63
64 #include "config.h"
65 #include "system.h"
66 #include "coretypes.h"
67 #include "backend.h"
68 #include "cfghooks.h"
69 #include "tree.h"
70 #include "gimple.h"
71 #include "rtl.h"
72 #include "ssa.h"
73 #include "alias.h"
74 #include "fold-const.h"
75 #include "stor-layout.h"
76 #include "tm_p.h"
77 #include "gimple-pretty-print.h"
78 #include "internal-fn.h"
79 #include "tree-eh.h"
80 #include "gimplify.h"
81 #include "gimple-iterator.h"
82 #include "gimplify-me.h"
83 #include "cgraph.h"
84 #include "tree-cfg.h"
85 #include "tree-ssa-loop-ivopts.h"
86 #include "tree-ssa-loop-manip.h"
87 #include "tree-ssa-loop-niter.h"
88 #include "tree-ssa-loop.h"
89 #include "flags.h"
90 #include "insn-config.h"
91 #include "expmed.h"
92 #include "dojump.h"
93 #include "explow.h"
94 #include "calls.h"
95 #include "emit-rtl.h"
96 #include "varasm.h"
97 #include "stmt.h"
98 #include "expr.h"
99 #include "tree-dfa.h"
100 #include "tree-ssa.h"
101 #include "cfgloop.h"
102 #include "tree-pass.h"
103 #include "tree-chrec.h"
104 #include "tree-scalar-evolution.h"
105 #include "params.h"
106 #include "langhooks.h"
107 #include "tree-affine.h"
108 #include "target.h"
109 #include "tree-inline.h"
110 #include "tree-ssa-propagate.h"
111 #include "tree-ssa-address.h"
112 #include "builtins.h"
113 #include "tree-vectorizer.h"
114
115 /* FIXME: Expressions are expanded to RTL in this pass to determine the
116 cost of different addressing modes. This should be moved to a TBD
117 interface between the GIMPLE and RTL worlds. */
118 #include "recog.h"
119
120 /* The infinite cost. */
121 #define INFTY 10000000
122
123 #define AVG_LOOP_NITER(LOOP) 5
124
125 /* Returns the expected number of loop iterations for LOOP.
126 The average trip count is computed from profile data if it
127 exists. */
128
129 static inline HOST_WIDE_INT
130 avg_loop_niter (struct loop *loop)
131 {
132 HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
133 if (niter == -1)
134 return AVG_LOOP_NITER (loop);
135
136 return niter;
137 }
138
139 /* Representation of the induction variable. */
140 struct iv
141 {
142 tree base; /* Initial value of the iv. */
143 tree base_object; /* A memory object to that the induction variable points. */
144 tree step; /* Step of the iv (constant only). */
145 tree ssa_name; /* The ssa name with the value. */
146 unsigned use_id; /* The identifier in the use if it is the case. */
147 bool biv_p; /* Is it a biv? */
148 bool have_use_for; /* Do we already have a use for it? */
149 bool no_overflow; /* True if the iv doesn't overflow. */
150 bool have_address_use;/* For biv, indicate if it's used in any address
151 type use. */
152 };
153
154 /* Per-ssa version information (induction variable descriptions, etc.). */
155 struct version_info
156 {
157 tree name; /* The ssa name. */
158 struct iv *iv; /* Induction variable description. */
159 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
160 an expression that is not an induction variable. */
161 bool preserve_biv; /* For the original biv, whether to preserve it. */
162 unsigned inv_id; /* Id of an invariant. */
163 };
164
165 /* Types of uses. */
166 enum use_type
167 {
168 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
169 USE_ADDRESS, /* Use in an address. */
170 USE_COMPARE /* Use is a compare. */
171 };
172
173 /* Cost of a computation. */
174 struct comp_cost
175 {
176 int cost; /* The runtime cost. */
177 unsigned complexity; /* The estimate of the complexity of the code for
178 the computation (in no concrete units --
179 complexity field should be larger for more
180 complex expressions and addressing modes). */
181 };
182
183 static const comp_cost no_cost = {0, 0};
184 static const comp_cost infinite_cost = {INFTY, INFTY};
185
186 /* The candidate - cost pair. */
187 struct cost_pair
188 {
189 struct iv_cand *cand; /* The candidate. */
190 comp_cost cost; /* The cost. */
191 bitmap depends_on; /* The list of invariants that have to be
192 preserved. */
193 tree value; /* For final value elimination, the expression for
194 the final value of the iv. For iv elimination,
195 the new bound to compare with. */
196 enum tree_code comp; /* For iv elimination, the comparison. */
197 int inv_expr_id; /* Loop invariant expression id. */
198 };
199
200 /* Use. */
201 struct iv_use
202 {
203 unsigned id; /* The id of the use. */
204 unsigned sub_id; /* The id of the sub use. */
205 enum use_type type; /* Type of the use. */
206 struct iv *iv; /* The induction variable it is based on. */
207 gimple *stmt; /* Statement in that it occurs. */
208 tree *op_p; /* The place where it occurs. */
209 bitmap related_cands; /* The set of "related" iv candidates, plus the common
210 important ones. */
211
212 unsigned n_map_members; /* Number of candidates in the cost_map list. */
213 struct cost_pair *cost_map;
214 /* The costs wrto the iv candidates. */
215
216 struct iv_cand *selected;
217 /* The selected candidate. */
218
219 struct iv_use *next; /* The next sub use. */
220 tree addr_base; /* Base address with const offset stripped. */
221 unsigned HOST_WIDE_INT addr_offset;
222 /* Const offset stripped from base address. */
223 };
224
225 /* The position where the iv is computed. */
226 enum iv_position
227 {
228 IP_NORMAL, /* At the end, just before the exit condition. */
229 IP_END, /* At the end of the latch block. */
230 IP_BEFORE_USE, /* Immediately before a specific use. */
231 IP_AFTER_USE, /* Immediately after a specific use. */
232 IP_ORIGINAL /* The original biv. */
233 };
234
235 /* The induction variable candidate. */
236 struct iv_cand
237 {
238 unsigned id; /* The number of the candidate. */
239 bool important; /* Whether this is an "important" candidate, i.e. such
240 that it should be considered by all uses. */
241 ENUM_BITFIELD(iv_position) pos : 8; /* Where it is computed. */
242 gimple *incremented_at;/* For original biv, the statement where it is
243 incremented. */
244 tree var_before; /* The variable used for it before increment. */
245 tree var_after; /* The variable used for it after increment. */
246 struct iv *iv; /* The value of the candidate. NULL for
247 "pseudocandidate" used to indicate the possibility
248 to replace the final value of an iv by direct
249 computation of the value. */
250 unsigned cost; /* Cost of the candidate. */
251 unsigned cost_step; /* Cost of the candidate's increment operation. */
252 struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
253 where it is incremented. */
254 bitmap depends_on; /* The list of invariants that are used in step of the
255 biv. */
256 struct iv *orig_iv; /* The original iv if this cand is added from biv with
257 smaller type. */
258 };
259
260 /* Loop invariant expression hashtable entry. */
261 struct iv_inv_expr_ent
262 {
263 tree expr;
264 int id;
265 hashval_t hash;
266 };
267
268 /* The data used by the induction variable optimizations. */
269
270 /* Hashtable helpers. */
271
272 struct iv_inv_expr_hasher : free_ptr_hash <iv_inv_expr_ent>
273 {
274 static inline hashval_t hash (const iv_inv_expr_ent *);
275 static inline bool equal (const iv_inv_expr_ent *, const iv_inv_expr_ent *);
276 };
277
278 /* Hash function for loop invariant expressions. */
279
280 inline hashval_t
281 iv_inv_expr_hasher::hash (const iv_inv_expr_ent *expr)
282 {
283 return expr->hash;
284 }
285
286 /* Hash table equality function for expressions. */
287
288 inline bool
289 iv_inv_expr_hasher::equal (const iv_inv_expr_ent *expr1,
290 const iv_inv_expr_ent *expr2)
291 {
292 return expr1->hash == expr2->hash
293 && operand_equal_p (expr1->expr, expr2->expr, 0);
294 }
295
296 struct ivopts_data
297 {
298 /* The currently optimized loop. */
299 struct loop *current_loop;
300 source_location loop_loc;
301
302 /* Numbers of iterations for all exits of the current loop. */
303 hash_map<edge, tree_niter_desc *> *niters;
304
305 /* Number of registers used in it. */
306 unsigned regs_used;
307
308 /* The size of version_info array allocated. */
309 unsigned version_info_size;
310
311 /* The array of information for the ssa names. */
312 struct version_info *version_info;
313
314 /* The hashtable of loop invariant expressions created
315 by ivopt. */
316 hash_table<iv_inv_expr_hasher> *inv_expr_tab;
317
318 /* Loop invariant expression id. */
319 int inv_expr_id;
320
321 /* The bitmap of indices in version_info whose value was changed. */
322 bitmap relevant;
323
324 /* The uses of induction variables. */
325 vec<iv_use *> iv_uses;
326
327 /* The candidates. */
328 vec<iv_cand *> iv_candidates;
329
330 /* A bitmap of important candidates. */
331 bitmap important_candidates;
332
333 /* Cache used by tree_to_aff_combination_expand. */
334 hash_map<tree, name_expansion *> *name_expansion_cache;
335
336 /* The maximum invariant id. */
337 unsigned max_inv_id;
338
339 /* Number of no_overflow BIVs which are not used in memory address. */
340 unsigned bivs_not_used_in_addr;
341
342 /* Obstack for iv structure. */
343 struct obstack iv_obstack;
344
345 /* Whether to consider just related and important candidates when replacing a
346 use. */
347 bool consider_all_candidates;
348
349 /* Are we optimizing for speed? */
350 bool speed;
351
352 /* Whether the loop body includes any function calls. */
353 bool body_includes_call;
354
355 /* Whether the loop body can only be exited via single exit. */
356 bool loop_single_exit_p;
357 };
358
359 /* An assignment of iv candidates to uses. */
360
361 struct iv_ca
362 {
363 /* The number of uses covered by the assignment. */
364 unsigned upto;
365
366 /* Number of uses that cannot be expressed by the candidates in the set. */
367 unsigned bad_uses;
368
369 /* Candidate assigned to a use, together with the related costs. */
370 struct cost_pair **cand_for_use;
371
372 /* Number of times each candidate is used. */
373 unsigned *n_cand_uses;
374
375 /* The candidates used. */
376 bitmap cands;
377
378 /* The number of candidates in the set. */
379 unsigned n_cands;
380
381 /* Total number of registers needed. */
382 unsigned n_regs;
383
384 /* Total cost of expressing uses. */
385 comp_cost cand_use_cost;
386
387 /* Total cost of candidates. */
388 unsigned cand_cost;
389
390 /* Number of times each invariant is used. */
391 unsigned *n_invariant_uses;
392
393 /* The array holding the number of uses of each loop
394 invariant expressions created by ivopt. */
395 unsigned *used_inv_expr;
396
397 /* The number of created loop invariants. */
398 unsigned num_used_inv_expr;
399
400 /* Total cost of the assignment. */
401 comp_cost cost;
402 };
403
404 /* Difference of two iv candidate assignments. */
405
406 struct iv_ca_delta
407 {
408 /* Changed use. */
409 struct iv_use *use;
410
411 /* An old assignment (for rollback purposes). */
412 struct cost_pair *old_cp;
413
414 /* A new assignment. */
415 struct cost_pair *new_cp;
416
417 /* Next change in the list. */
418 struct iv_ca_delta *next_change;
419 };
420
421 /* Bound on number of candidates below that all candidates are considered. */
422
423 #define CONSIDER_ALL_CANDIDATES_BOUND \
424 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
425
426 /* If there are more iv occurrences, we just give up (it is quite unlikely that
427 optimizing such a loop would help, and it would take ages). */
428
429 #define MAX_CONSIDERED_USES \
430 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
431
432 /* If there are at most this number of ivs in the set, try removing unnecessary
433 ivs from the set always. */
434
435 #define ALWAYS_PRUNE_CAND_SET_BOUND \
436 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
437
438 /* The list of trees for that the decl_rtl field must be reset is stored
439 here. */
440
441 static vec<tree> decl_rtl_to_reset;
442
443 static comp_cost force_expr_to_var_cost (tree, bool);
444
445 /* Number of uses recorded in DATA. */
446
447 static inline unsigned
448 n_iv_uses (struct ivopts_data *data)
449 {
450 return data->iv_uses.length ();
451 }
452
453 /* Ith use recorded in DATA. */
454
455 static inline struct iv_use *
456 iv_use (struct ivopts_data *data, unsigned i)
457 {
458 return data->iv_uses[i];
459 }
460
461 /* Number of candidates recorded in DATA. */
462
463 static inline unsigned
464 n_iv_cands (struct ivopts_data *data)
465 {
466 return data->iv_candidates.length ();
467 }
468
469 /* Ith candidate recorded in DATA. */
470
471 static inline struct iv_cand *
472 iv_cand (struct ivopts_data *data, unsigned i)
473 {
474 return data->iv_candidates[i];
475 }
476
477 /* The single loop exit if it dominates the latch, NULL otherwise. */
478
479 edge
480 single_dom_exit (struct loop *loop)
481 {
482 edge exit = single_exit (loop);
483
484 if (!exit)
485 return NULL;
486
487 if (!just_once_each_iteration_p (loop, exit->src))
488 return NULL;
489
490 return exit;
491 }
492
493 /* Dumps information about the induction variable IV to FILE. */
494
495 void
496 dump_iv (FILE *file, struct iv *iv, bool dump_name)
497 {
498 if (iv->ssa_name && dump_name)
499 {
500 fprintf (file, "ssa name ");
501 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
502 fprintf (file, "\n");
503 }
504
505 fprintf (file, " type ");
506 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
507 fprintf (file, "\n");
508
509 if (iv->step)
510 {
511 fprintf (file, " base ");
512 print_generic_expr (file, iv->base, TDF_SLIM);
513 fprintf (file, "\n");
514
515 fprintf (file, " step ");
516 print_generic_expr (file, iv->step, TDF_SLIM);
517 fprintf (file, "\n");
518 }
519 else
520 {
521 fprintf (file, " invariant ");
522 print_generic_expr (file, iv->base, TDF_SLIM);
523 fprintf (file, "\n");
524 }
525
526 if (iv->base_object)
527 {
528 fprintf (file, " base object ");
529 print_generic_expr (file, iv->base_object, TDF_SLIM);
530 fprintf (file, "\n");
531 }
532
533 if (iv->biv_p)
534 fprintf (file, " is a biv\n");
535
536 if (iv->no_overflow)
537 fprintf (file, " iv doesn't overflow wrto loop niter\n");
538 }
539
540 /* Dumps information about the USE to FILE. */
541
542 void
543 dump_use (FILE *file, struct iv_use *use)
544 {
545 fprintf (file, "use %d", use->id);
546 if (use->sub_id)
547 fprintf (file, ".%d", use->sub_id);
548
549 fprintf (file, "\n");
550
551 switch (use->type)
552 {
553 case USE_NONLINEAR_EXPR:
554 fprintf (file, " generic\n");
555 break;
556
557 case USE_ADDRESS:
558 fprintf (file, " address\n");
559 break;
560
561 case USE_COMPARE:
562 fprintf (file, " compare\n");
563 break;
564
565 default:
566 gcc_unreachable ();
567 }
568
569 fprintf (file, " in statement ");
570 print_gimple_stmt (file, use->stmt, 0, 0);
571 fprintf (file, "\n");
572
573 fprintf (file, " at position ");
574 if (use->op_p)
575 print_generic_expr (file, *use->op_p, TDF_SLIM);
576 fprintf (file, "\n");
577
578 dump_iv (file, use->iv, false);
579
580 if (use->related_cands)
581 {
582 fprintf (file, " related candidates ");
583 dump_bitmap (file, use->related_cands);
584 }
585 }
586
587 /* Dumps information about the uses to FILE. */
588
589 void
590 dump_uses (FILE *file, struct ivopts_data *data)
591 {
592 unsigned i;
593 struct iv_use *use;
594
595 for (i = 0; i < n_iv_uses (data); i++)
596 {
597 use = iv_use (data, i);
598 do
599 {
600 dump_use (file, use);
601 use = use->next;
602 }
603 while (use);
604 fprintf (file, "\n");
605 }
606 }
607
608 /* Dumps information about induction variable candidate CAND to FILE. */
609
610 void
611 dump_cand (FILE *file, struct iv_cand *cand)
612 {
613 struct iv *iv = cand->iv;
614
615 fprintf (file, "candidate %d%s\n",
616 cand->id, cand->important ? " (important)" : "");
617
618 if (cand->depends_on)
619 {
620 fprintf (file, " depends on ");
621 dump_bitmap (file, cand->depends_on);
622 }
623
624 if (!iv)
625 {
626 fprintf (file, " final value replacement\n");
627 return;
628 }
629
630 if (cand->var_before)
631 {
632 fprintf (file, " var_before ");
633 print_generic_expr (file, cand->var_before, TDF_SLIM);
634 fprintf (file, "\n");
635 }
636 if (cand->var_after)
637 {
638 fprintf (file, " var_after ");
639 print_generic_expr (file, cand->var_after, TDF_SLIM);
640 fprintf (file, "\n");
641 }
642
643 switch (cand->pos)
644 {
645 case IP_NORMAL:
646 fprintf (file, " incremented before exit test\n");
647 break;
648
649 case IP_BEFORE_USE:
650 fprintf (file, " incremented before use %d\n", cand->ainc_use->id);
651 break;
652
653 case IP_AFTER_USE:
654 fprintf (file, " incremented after use %d\n", cand->ainc_use->id);
655 break;
656
657 case IP_END:
658 fprintf (file, " incremented at end\n");
659 break;
660
661 case IP_ORIGINAL:
662 fprintf (file, " original biv\n");
663 break;
664 }
665
666 dump_iv (file, iv, false);
667 }
668
669 /* Returns the info for ssa version VER. */
670
671 static inline struct version_info *
672 ver_info (struct ivopts_data *data, unsigned ver)
673 {
674 return data->version_info + ver;
675 }
676
677 /* Returns the info for ssa name NAME. */
678
679 static inline struct version_info *
680 name_info (struct ivopts_data *data, tree name)
681 {
682 return ver_info (data, SSA_NAME_VERSION (name));
683 }
684
685 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
686 emitted in LOOP. */
687
688 static bool
689 stmt_after_ip_normal_pos (struct loop *loop, gimple *stmt)
690 {
691 basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
692
693 gcc_assert (bb);
694
695 if (sbb == loop->latch)
696 return true;
697
698 if (sbb != bb)
699 return false;
700
701 return stmt == last_stmt (bb);
702 }
703
704 /* Returns true if STMT if after the place where the original induction
705 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
706 if the positions are identical. */
707
708 static bool
709 stmt_after_inc_pos (struct iv_cand *cand, gimple *stmt, bool true_if_equal)
710 {
711 basic_block cand_bb = gimple_bb (cand->incremented_at);
712 basic_block stmt_bb = gimple_bb (stmt);
713
714 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
715 return false;
716
717 if (stmt_bb != cand_bb)
718 return true;
719
720 if (true_if_equal
721 && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
722 return true;
723 return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
724 }
725
726 /* Returns true if STMT if after the place where the induction variable
727 CAND is incremented in LOOP. */
728
729 static bool
730 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple *stmt)
731 {
732 switch (cand->pos)
733 {
734 case IP_END:
735 return false;
736
737 case IP_NORMAL:
738 return stmt_after_ip_normal_pos (loop, stmt);
739
740 case IP_ORIGINAL:
741 case IP_AFTER_USE:
742 return stmt_after_inc_pos (cand, stmt, false);
743
744 case IP_BEFORE_USE:
745 return stmt_after_inc_pos (cand, stmt, true);
746
747 default:
748 gcc_unreachable ();
749 }
750 }
751
752 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
753
754 static bool
755 abnormal_ssa_name_p (tree exp)
756 {
757 if (!exp)
758 return false;
759
760 if (TREE_CODE (exp) != SSA_NAME)
761 return false;
762
763 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
764 }
765
766 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
767 abnormal phi node. Callback for for_each_index. */
768
769 static bool
770 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
771 void *data ATTRIBUTE_UNUSED)
772 {
773 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
774 {
775 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
776 return false;
777 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
778 return false;
779 }
780
781 return !abnormal_ssa_name_p (*index);
782 }
783
784 /* Returns true if EXPR contains a ssa name that occurs in an
785 abnormal phi node. */
786
787 bool
788 contains_abnormal_ssa_name_p (tree expr)
789 {
790 enum tree_code code;
791 enum tree_code_class codeclass;
792
793 if (!expr)
794 return false;
795
796 code = TREE_CODE (expr);
797 codeclass = TREE_CODE_CLASS (code);
798
799 if (code == SSA_NAME)
800 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
801
802 if (code == INTEGER_CST
803 || is_gimple_min_invariant (expr))
804 return false;
805
806 if (code == ADDR_EXPR)
807 return !for_each_index (&TREE_OPERAND (expr, 0),
808 idx_contains_abnormal_ssa_name_p,
809 NULL);
810
811 if (code == COND_EXPR)
812 return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
813 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
814 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
815
816 switch (codeclass)
817 {
818 case tcc_binary:
819 case tcc_comparison:
820 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
821 return true;
822
823 /* Fallthru. */
824 case tcc_unary:
825 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
826 return true;
827
828 break;
829
830 default:
831 gcc_unreachable ();
832 }
833
834 return false;
835 }
836
837 /* Returns the structure describing number of iterations determined from
838 EXIT of DATA->current_loop, or NULL if something goes wrong. */
839
840 static struct tree_niter_desc *
841 niter_for_exit (struct ivopts_data *data, edge exit)
842 {
843 struct tree_niter_desc *desc;
844 tree_niter_desc **slot;
845
846 if (!data->niters)
847 {
848 data->niters = new hash_map<edge, tree_niter_desc *>;
849 slot = NULL;
850 }
851 else
852 slot = data->niters->get (exit);
853
854 if (!slot)
855 {
856 /* Try to determine number of iterations. We cannot safely work with ssa
857 names that appear in phi nodes on abnormal edges, so that we do not
858 create overlapping life ranges for them (PR 27283). */
859 desc = XNEW (struct tree_niter_desc);
860 if (!number_of_iterations_exit (data->current_loop,
861 exit, desc, true)
862 || contains_abnormal_ssa_name_p (desc->niter))
863 {
864 XDELETE (desc);
865 desc = NULL;
866 }
867 data->niters->put (exit, desc);
868 }
869 else
870 desc = *slot;
871
872 return desc;
873 }
874
875 /* Returns the structure describing number of iterations determined from
876 single dominating exit of DATA->current_loop, or NULL if something
877 goes wrong. */
878
879 static struct tree_niter_desc *
880 niter_for_single_dom_exit (struct ivopts_data *data)
881 {
882 edge exit = single_dom_exit (data->current_loop);
883
884 if (!exit)
885 return NULL;
886
887 return niter_for_exit (data, exit);
888 }
889
890 /* Initializes data structures used by the iv optimization pass, stored
891 in DATA. */
892
893 static void
894 tree_ssa_iv_optimize_init (struct ivopts_data *data)
895 {
896 data->version_info_size = 2 * num_ssa_names;
897 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
898 data->relevant = BITMAP_ALLOC (NULL);
899 data->important_candidates = BITMAP_ALLOC (NULL);
900 data->max_inv_id = 0;
901 data->niters = NULL;
902 data->iv_uses.create (20);
903 data->iv_candidates.create (20);
904 data->inv_expr_tab = new hash_table<iv_inv_expr_hasher> (10);
905 data->inv_expr_id = 0;
906 data->name_expansion_cache = NULL;
907 decl_rtl_to_reset.create (20);
908 gcc_obstack_init (&data->iv_obstack);
909 }
910
911 /* Returns a memory object to that EXPR points. In case we are able to
912 determine that it does not point to any such object, NULL is returned. */
913
914 static tree
915 determine_base_object (tree expr)
916 {
917 enum tree_code code = TREE_CODE (expr);
918 tree base, obj;
919
920 /* If this is a pointer casted to any type, we need to determine
921 the base object for the pointer; so handle conversions before
922 throwing away non-pointer expressions. */
923 if (CONVERT_EXPR_P (expr))
924 return determine_base_object (TREE_OPERAND (expr, 0));
925
926 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
927 return NULL_TREE;
928
929 switch (code)
930 {
931 case INTEGER_CST:
932 return NULL_TREE;
933
934 case ADDR_EXPR:
935 obj = TREE_OPERAND (expr, 0);
936 base = get_base_address (obj);
937
938 if (!base)
939 return expr;
940
941 if (TREE_CODE (base) == MEM_REF)
942 return determine_base_object (TREE_OPERAND (base, 0));
943
944 return fold_convert (ptr_type_node,
945 build_fold_addr_expr (base));
946
947 case POINTER_PLUS_EXPR:
948 return determine_base_object (TREE_OPERAND (expr, 0));
949
950 case PLUS_EXPR:
951 case MINUS_EXPR:
952 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
953 gcc_unreachable ();
954
955 default:
956 return fold_convert (ptr_type_node, expr);
957 }
958 }
959
960 /* Return true if address expression with non-DECL_P operand appears
961 in EXPR. */
962
963 static bool
964 contain_complex_addr_expr (tree expr)
965 {
966 bool res = false;
967
968 STRIP_NOPS (expr);
969 switch (TREE_CODE (expr))
970 {
971 case POINTER_PLUS_EXPR:
972 case PLUS_EXPR:
973 case MINUS_EXPR:
974 res |= contain_complex_addr_expr (TREE_OPERAND (expr, 0));
975 res |= contain_complex_addr_expr (TREE_OPERAND (expr, 1));
976 break;
977
978 case ADDR_EXPR:
979 return (!DECL_P (TREE_OPERAND (expr, 0)));
980
981 default:
982 return false;
983 }
984
985 return res;
986 }
987
988 /* Allocates an induction variable with given initial value BASE and step STEP
989 for loop LOOP. NO_OVERFLOW implies the iv doesn't overflow. */
990
991 static struct iv *
992 alloc_iv (struct ivopts_data *data, tree base, tree step,
993 bool no_overflow = false)
994 {
995 tree expr = base;
996 struct iv *iv = (struct iv*) obstack_alloc (&data->iv_obstack,
997 sizeof (struct iv));
998 gcc_assert (step != NULL_TREE);
999
1000 /* Lower address expression in base except ones with DECL_P as operand.
1001 By doing this:
1002 1) More accurate cost can be computed for address expressions;
1003 2) Duplicate candidates won't be created for bases in different
1004 forms, like &a[0] and &a. */
1005 STRIP_NOPS (expr);
1006 if ((TREE_CODE (expr) == ADDR_EXPR && !DECL_P (TREE_OPERAND (expr, 0)))
1007 || contain_complex_addr_expr (expr))
1008 {
1009 aff_tree comb;
1010 tree_to_aff_combination (expr, TREE_TYPE (base), &comb);
1011 base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
1012 }
1013
1014 iv->base = base;
1015 iv->base_object = determine_base_object (base);
1016 iv->step = step;
1017 iv->biv_p = false;
1018 iv->have_use_for = false;
1019 iv->use_id = 0;
1020 iv->ssa_name = NULL_TREE;
1021 iv->no_overflow = no_overflow;
1022 iv->have_address_use = false;
1023
1024 return iv;
1025 }
1026
1027 /* Sets STEP and BASE for induction variable IV. NO_OVERFLOW implies the IV
1028 doesn't overflow. */
1029
1030 static void
1031 set_iv (struct ivopts_data *data, tree iv, tree base, tree step,
1032 bool no_overflow)
1033 {
1034 struct version_info *info = name_info (data, iv);
1035
1036 gcc_assert (!info->iv);
1037
1038 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
1039 info->iv = alloc_iv (data, base, step, no_overflow);
1040 info->iv->ssa_name = iv;
1041 }
1042
1043 /* Finds induction variable declaration for VAR. */
1044
1045 static struct iv *
1046 get_iv (struct ivopts_data *data, tree var)
1047 {
1048 basic_block bb;
1049 tree type = TREE_TYPE (var);
1050
1051 if (!POINTER_TYPE_P (type)
1052 && !INTEGRAL_TYPE_P (type))
1053 return NULL;
1054
1055 if (!name_info (data, var)->iv)
1056 {
1057 bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1058
1059 if (!bb
1060 || !flow_bb_inside_loop_p (data->current_loop, bb))
1061 set_iv (data, var, var, build_int_cst (type, 0), true);
1062 }
1063
1064 return name_info (data, var)->iv;
1065 }
1066
1067 /* Return the first non-invariant ssa var found in EXPR. */
1068
1069 static tree
1070 extract_single_var_from_expr (tree expr)
1071 {
1072 int i, n;
1073 tree tmp;
1074 enum tree_code code;
1075
1076 if (!expr || is_gimple_min_invariant (expr))
1077 return NULL;
1078
1079 code = TREE_CODE (expr);
1080 if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1081 {
1082 n = TREE_OPERAND_LENGTH (expr);
1083 for (i = 0; i < n; i++)
1084 {
1085 tmp = extract_single_var_from_expr (TREE_OPERAND (expr, i));
1086
1087 if (tmp)
1088 return tmp;
1089 }
1090 }
1091 return (TREE_CODE (expr) == SSA_NAME) ? expr : NULL;
1092 }
1093
1094 /* Finds basic ivs. */
1095
1096 static bool
1097 find_bivs (struct ivopts_data *data)
1098 {
1099 gphi *phi;
1100 affine_iv iv;
1101 tree step, type, base, stop;
1102 bool found = false;
1103 struct loop *loop = data->current_loop;
1104 gphi_iterator psi;
1105
1106 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1107 {
1108 phi = psi.phi ();
1109
1110 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1111 continue;
1112
1113 if (virtual_operand_p (PHI_RESULT (phi)))
1114 continue;
1115
1116 if (!simple_iv (loop, loop, PHI_RESULT (phi), &iv, true))
1117 continue;
1118
1119 if (integer_zerop (iv.step))
1120 continue;
1121
1122 step = iv.step;
1123 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1124 /* Stop expanding iv base at the first ssa var referred by iv step.
1125 Ideally we should stop at any ssa var, because that's expensive
1126 and unusual to happen, we just do it on the first one.
1127
1128 See PR64705 for the rationale. */
1129 stop = extract_single_var_from_expr (step);
1130 base = expand_simple_operations (base, stop);
1131 if (contains_abnormal_ssa_name_p (base)
1132 || contains_abnormal_ssa_name_p (step))
1133 continue;
1134
1135 type = TREE_TYPE (PHI_RESULT (phi));
1136 base = fold_convert (type, base);
1137 if (step)
1138 {
1139 if (POINTER_TYPE_P (type))
1140 step = convert_to_ptrofftype (step);
1141 else
1142 step = fold_convert (type, step);
1143 }
1144
1145 set_iv (data, PHI_RESULT (phi), base, step, iv.no_overflow);
1146 found = true;
1147 }
1148
1149 return found;
1150 }
1151
1152 /* Marks basic ivs. */
1153
1154 static void
1155 mark_bivs (struct ivopts_data *data)
1156 {
1157 gphi *phi;
1158 gimple *def;
1159 tree var;
1160 struct iv *iv, *incr_iv;
1161 struct loop *loop = data->current_loop;
1162 basic_block incr_bb;
1163 gphi_iterator psi;
1164
1165 data->bivs_not_used_in_addr = 0;
1166 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1167 {
1168 phi = psi.phi ();
1169
1170 iv = get_iv (data, PHI_RESULT (phi));
1171 if (!iv)
1172 continue;
1173
1174 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1175 def = SSA_NAME_DEF_STMT (var);
1176 /* Don't mark iv peeled from other one as biv. */
1177 if (def
1178 && gimple_code (def) == GIMPLE_PHI
1179 && gimple_bb (def) == loop->header)
1180 continue;
1181
1182 incr_iv = get_iv (data, var);
1183 if (!incr_iv)
1184 continue;
1185
1186 /* If the increment is in the subloop, ignore it. */
1187 incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1188 if (incr_bb->loop_father != data->current_loop
1189 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1190 continue;
1191
1192 iv->biv_p = true;
1193 incr_iv->biv_p = true;
1194 if (iv->no_overflow)
1195 data->bivs_not_used_in_addr++;
1196 if (incr_iv->no_overflow)
1197 data->bivs_not_used_in_addr++;
1198 }
1199 }
1200
1201 /* Checks whether STMT defines a linear induction variable and stores its
1202 parameters to IV. */
1203
1204 static bool
1205 find_givs_in_stmt_scev (struct ivopts_data *data, gimple *stmt, affine_iv *iv)
1206 {
1207 tree lhs, stop;
1208 struct loop *loop = data->current_loop;
1209
1210 iv->base = NULL_TREE;
1211 iv->step = NULL_TREE;
1212
1213 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1214 return false;
1215
1216 lhs = gimple_assign_lhs (stmt);
1217 if (TREE_CODE (lhs) != SSA_NAME)
1218 return false;
1219
1220 if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1221 return false;
1222
1223 /* Stop expanding iv base at the first ssa var referred by iv step.
1224 Ideally we should stop at any ssa var, because that's expensive
1225 and unusual to happen, we just do it on the first one.
1226
1227 See PR64705 for the rationale. */
1228 stop = extract_single_var_from_expr (iv->step);
1229 iv->base = expand_simple_operations (iv->base, stop);
1230 if (contains_abnormal_ssa_name_p (iv->base)
1231 || contains_abnormal_ssa_name_p (iv->step))
1232 return false;
1233
1234 /* If STMT could throw, then do not consider STMT as defining a GIV.
1235 While this will suppress optimizations, we can not safely delete this
1236 GIV and associated statements, even if it appears it is not used. */
1237 if (stmt_could_throw_p (stmt))
1238 return false;
1239
1240 return true;
1241 }
1242
1243 /* Finds general ivs in statement STMT. */
1244
1245 static void
1246 find_givs_in_stmt (struct ivopts_data *data, gimple *stmt)
1247 {
1248 affine_iv iv;
1249
1250 if (!find_givs_in_stmt_scev (data, stmt, &iv))
1251 return;
1252
1253 set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step, iv.no_overflow);
1254 }
1255
1256 /* Finds general ivs in basic block BB. */
1257
1258 static void
1259 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1260 {
1261 gimple_stmt_iterator bsi;
1262
1263 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1264 find_givs_in_stmt (data, gsi_stmt (bsi));
1265 }
1266
1267 /* Finds general ivs. */
1268
1269 static void
1270 find_givs (struct ivopts_data *data)
1271 {
1272 struct loop *loop = data->current_loop;
1273 basic_block *body = get_loop_body_in_dom_order (loop);
1274 unsigned i;
1275
1276 for (i = 0; i < loop->num_nodes; i++)
1277 find_givs_in_bb (data, body[i]);
1278 free (body);
1279 }
1280
1281 /* For each ssa name defined in LOOP determines whether it is an induction
1282 variable and if so, its initial value and step. */
1283
1284 static bool
1285 find_induction_variables (struct ivopts_data *data)
1286 {
1287 unsigned i;
1288 bitmap_iterator bi;
1289
1290 if (!find_bivs (data))
1291 return false;
1292
1293 find_givs (data);
1294 mark_bivs (data);
1295
1296 if (dump_file && (dump_flags & TDF_DETAILS))
1297 {
1298 struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
1299
1300 if (niter)
1301 {
1302 fprintf (dump_file, " number of iterations ");
1303 print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1304 if (!integer_zerop (niter->may_be_zero))
1305 {
1306 fprintf (dump_file, "; zero if ");
1307 print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1308 }
1309 fprintf (dump_file, "\n\n");
1310 };
1311
1312 fprintf (dump_file, "Induction variables:\n\n");
1313
1314 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1315 {
1316 if (ver_info (data, i)->iv)
1317 dump_iv (dump_file, ver_info (data, i)->iv, true);
1318 }
1319 }
1320
1321 return true;
1322 }
1323
1324 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.
1325 For address type use, ADDR_BASE is the stripped IV base, ADDR_OFFSET
1326 is the const offset stripped from IV base. For uses of other types,
1327 ADDR_BASE and ADDR_OFFSET are zero by default. */
1328
1329 static struct iv_use *
1330 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1331 gimple *stmt, enum use_type use_type, tree addr_base = NULL,
1332 unsigned HOST_WIDE_INT addr_offset = 0)
1333 {
1334 struct iv_use *use = XCNEW (struct iv_use);
1335
1336 use->id = n_iv_uses (data);
1337 use->sub_id = 0;
1338 use->type = use_type;
1339 use->iv = iv;
1340 use->stmt = stmt;
1341 use->op_p = use_p;
1342 use->related_cands = BITMAP_ALLOC (NULL);
1343 use->next = NULL;
1344 use->addr_base = addr_base;
1345 use->addr_offset = addr_offset;
1346
1347 data->iv_uses.safe_push (use);
1348
1349 return use;
1350 }
1351
1352 /* Records a sub use of type USE_TYPE at *USE_P in STMT whose value is IV.
1353 The sub use is recorded under the one whose use id is ID_GROUP. */
1354
1355 static struct iv_use *
1356 record_sub_use (struct ivopts_data *data, tree *use_p,
1357 struct iv *iv, gimple *stmt, enum use_type use_type,
1358 tree addr_base, unsigned HOST_WIDE_INT addr_offset,
1359 unsigned int id_group)
1360 {
1361 struct iv_use *use = XCNEW (struct iv_use);
1362 struct iv_use *group = iv_use (data, id_group);
1363
1364 use->id = group->id;
1365 use->sub_id = 0;
1366 use->type = use_type;
1367 use->iv = iv;
1368 use->stmt = stmt;
1369 use->op_p = use_p;
1370 use->related_cands = NULL;
1371 use->addr_base = addr_base;
1372 use->addr_offset = addr_offset;
1373
1374 /* Sub use list is maintained in offset ascending order. */
1375 if (addr_offset <= group->addr_offset)
1376 {
1377 use->related_cands = group->related_cands;
1378 group->related_cands = NULL;
1379 use->next = group;
1380 data->iv_uses[id_group] = use;
1381 }
1382 else
1383 {
1384 struct iv_use *pre;
1385 do
1386 {
1387 pre = group;
1388 group = group->next;
1389 }
1390 while (group && addr_offset > group->addr_offset);
1391 use->next = pre->next;
1392 pre->next = use;
1393 }
1394
1395 return use;
1396 }
1397
1398 /* Checks whether OP is a loop-level invariant and if so, records it.
1399 NONLINEAR_USE is true if the invariant is used in a way we do not
1400 handle specially. */
1401
1402 static void
1403 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1404 {
1405 basic_block bb;
1406 struct version_info *info;
1407
1408 if (TREE_CODE (op) != SSA_NAME
1409 || virtual_operand_p (op))
1410 return;
1411
1412 bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1413 if (bb
1414 && flow_bb_inside_loop_p (data->current_loop, bb))
1415 return;
1416
1417 info = name_info (data, op);
1418 info->name = op;
1419 info->has_nonlin_use |= nonlinear_use;
1420 if (!info->inv_id)
1421 info->inv_id = ++data->max_inv_id;
1422 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1423 }
1424
1425 /* Checks whether the use OP is interesting and if so, records it. */
1426
1427 static struct iv_use *
1428 find_interesting_uses_op (struct ivopts_data *data, tree op)
1429 {
1430 struct iv *iv;
1431 gimple *stmt;
1432 struct iv_use *use;
1433
1434 if (TREE_CODE (op) != SSA_NAME)
1435 return NULL;
1436
1437 iv = get_iv (data, op);
1438 if (!iv)
1439 return NULL;
1440
1441 if (iv->have_use_for)
1442 {
1443 use = iv_use (data, iv->use_id);
1444
1445 gcc_assert (use->type == USE_NONLINEAR_EXPR);
1446 return use;
1447 }
1448
1449 if (integer_zerop (iv->step))
1450 {
1451 record_invariant (data, op, true);
1452 return NULL;
1453 }
1454 iv->have_use_for = true;
1455
1456 stmt = SSA_NAME_DEF_STMT (op);
1457 gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1458 || is_gimple_assign (stmt));
1459
1460 use = record_use (data, NULL, iv, stmt, USE_NONLINEAR_EXPR);
1461 iv->use_id = use->id;
1462
1463 return use;
1464 }
1465
1466 /* Given a condition in statement STMT, checks whether it is a compare
1467 of an induction variable and an invariant. If this is the case,
1468 CONTROL_VAR is set to location of the iv, BOUND to the location of
1469 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1470 induction variable descriptions, and true is returned. If this is not
1471 the case, CONTROL_VAR and BOUND are set to the arguments of the
1472 condition and false is returned. */
1473
1474 static bool
1475 extract_cond_operands (struct ivopts_data *data, gimple *stmt,
1476 tree **control_var, tree **bound,
1477 struct iv **iv_var, struct iv **iv_bound)
1478 {
1479 /* The objects returned when COND has constant operands. */
1480 static struct iv const_iv;
1481 static tree zero;
1482 tree *op0 = &zero, *op1 = &zero;
1483 struct iv *iv0 = &const_iv, *iv1 = &const_iv;
1484 bool ret = false;
1485
1486 if (gimple_code (stmt) == GIMPLE_COND)
1487 {
1488 gcond *cond_stmt = as_a <gcond *> (stmt);
1489 op0 = gimple_cond_lhs_ptr (cond_stmt);
1490 op1 = gimple_cond_rhs_ptr (cond_stmt);
1491 }
1492 else
1493 {
1494 op0 = gimple_assign_rhs1_ptr (stmt);
1495 op1 = gimple_assign_rhs2_ptr (stmt);
1496 }
1497
1498 zero = integer_zero_node;
1499 const_iv.step = integer_zero_node;
1500
1501 if (TREE_CODE (*op0) == SSA_NAME)
1502 iv0 = get_iv (data, *op0);
1503 if (TREE_CODE (*op1) == SSA_NAME)
1504 iv1 = get_iv (data, *op1);
1505
1506 /* Exactly one of the compared values must be an iv, and the other one must
1507 be an invariant. */
1508 if (!iv0 || !iv1)
1509 goto end;
1510
1511 if (integer_zerop (iv0->step))
1512 {
1513 /* Control variable may be on the other side. */
1514 std::swap (op0, op1);
1515 std::swap (iv0, iv1);
1516 }
1517 ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1518
1519 end:
1520 if (control_var)
1521 *control_var = op0;
1522 if (iv_var)
1523 *iv_var = iv0;
1524 if (bound)
1525 *bound = op1;
1526 if (iv_bound)
1527 *iv_bound = iv1;
1528
1529 return ret;
1530 }
1531
1532 /* Checks whether the condition in STMT is interesting and if so,
1533 records it. */
1534
1535 static void
1536 find_interesting_uses_cond (struct ivopts_data *data, gimple *stmt)
1537 {
1538 tree *var_p, *bound_p;
1539 struct iv *var_iv;
1540
1541 if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1542 {
1543 find_interesting_uses_op (data, *var_p);
1544 find_interesting_uses_op (data, *bound_p);
1545 return;
1546 }
1547
1548 record_use (data, NULL, var_iv, stmt, USE_COMPARE);
1549 }
1550
1551 /* Returns the outermost loop EXPR is obviously invariant in
1552 relative to the loop LOOP, i.e. if all its operands are defined
1553 outside of the returned loop. Returns NULL if EXPR is not
1554 even obviously invariant in LOOP. */
1555
1556 struct loop *
1557 outermost_invariant_loop_for_expr (struct loop *loop, tree expr)
1558 {
1559 basic_block def_bb;
1560 unsigned i, len;
1561
1562 if (is_gimple_min_invariant (expr))
1563 return current_loops->tree_root;
1564
1565 if (TREE_CODE (expr) == SSA_NAME)
1566 {
1567 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1568 if (def_bb)
1569 {
1570 if (flow_bb_inside_loop_p (loop, def_bb))
1571 return NULL;
1572 return superloop_at_depth (loop,
1573 loop_depth (def_bb->loop_father) + 1);
1574 }
1575
1576 return current_loops->tree_root;
1577 }
1578
1579 if (!EXPR_P (expr))
1580 return NULL;
1581
1582 unsigned maxdepth = 0;
1583 len = TREE_OPERAND_LENGTH (expr);
1584 for (i = 0; i < len; i++)
1585 {
1586 struct loop *ivloop;
1587 if (!TREE_OPERAND (expr, i))
1588 continue;
1589
1590 ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1591 if (!ivloop)
1592 return NULL;
1593 maxdepth = MAX (maxdepth, loop_depth (ivloop));
1594 }
1595
1596 return superloop_at_depth (loop, maxdepth);
1597 }
1598
1599 /* Returns true if expression EXPR is obviously invariant in LOOP,
1600 i.e. if all its operands are defined outside of the LOOP. LOOP
1601 should not be the function body. */
1602
1603 bool
1604 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1605 {
1606 basic_block def_bb;
1607 unsigned i, len;
1608
1609 gcc_assert (loop_depth (loop) > 0);
1610
1611 if (is_gimple_min_invariant (expr))
1612 return true;
1613
1614 if (TREE_CODE (expr) == SSA_NAME)
1615 {
1616 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1617 if (def_bb
1618 && flow_bb_inside_loop_p (loop, def_bb))
1619 return false;
1620
1621 return true;
1622 }
1623
1624 if (!EXPR_P (expr))
1625 return false;
1626
1627 len = TREE_OPERAND_LENGTH (expr);
1628 for (i = 0; i < len; i++)
1629 if (TREE_OPERAND (expr, i)
1630 && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1631 return false;
1632
1633 return true;
1634 }
1635
1636 /* Given expression EXPR which computes inductive values with respect
1637 to loop recorded in DATA, this function returns biv from which EXPR
1638 is derived by tracing definition chains of ssa variables in EXPR. */
1639
1640 static struct iv*
1641 find_deriving_biv_for_expr (struct ivopts_data *data, tree expr)
1642 {
1643 struct iv *iv;
1644 unsigned i, n;
1645 tree e2, e1;
1646 enum tree_code code;
1647 gimple *stmt;
1648
1649 if (expr == NULL_TREE)
1650 return NULL;
1651
1652 if (is_gimple_min_invariant (expr))
1653 return NULL;
1654
1655 code = TREE_CODE (expr);
1656 if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1657 {
1658 n = TREE_OPERAND_LENGTH (expr);
1659 for (i = 0; i < n; i++)
1660 {
1661 iv = find_deriving_biv_for_expr (data, TREE_OPERAND (expr, i));
1662 if (iv)
1663 return iv;
1664 }
1665 }
1666
1667 /* Stop if it's not ssa name. */
1668 if (code != SSA_NAME)
1669 return NULL;
1670
1671 iv = get_iv (data, expr);
1672 if (!iv || integer_zerop (iv->step))
1673 return NULL;
1674 else if (iv->biv_p)
1675 return iv;
1676
1677 stmt = SSA_NAME_DEF_STMT (expr);
1678 if (gphi *phi = dyn_cast <gphi *> (stmt))
1679 {
1680 ssa_op_iter iter;
1681 use_operand_p use_p;
1682
1683 if (virtual_operand_p (gimple_phi_result (phi)))
1684 return NULL;
1685
1686 FOR_EACH_PHI_ARG (use_p, phi, iter, SSA_OP_USE)
1687 {
1688 tree use = USE_FROM_PTR (use_p);
1689 iv = find_deriving_biv_for_expr (data, use);
1690 if (iv)
1691 return iv;
1692 }
1693 return NULL;
1694 }
1695 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1696 return NULL;
1697
1698 e1 = gimple_assign_rhs1 (stmt);
1699 code = gimple_assign_rhs_code (stmt);
1700 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS)
1701 return find_deriving_biv_for_expr (data, e1);
1702
1703 switch (code)
1704 {
1705 case MULT_EXPR:
1706 case PLUS_EXPR:
1707 case MINUS_EXPR:
1708 case POINTER_PLUS_EXPR:
1709 /* Increments, decrements and multiplications by a constant
1710 are simple. */
1711 e2 = gimple_assign_rhs2 (stmt);
1712 iv = find_deriving_biv_for_expr (data, e2);
1713 if (iv)
1714 return iv;
1715
1716 /* Fallthru. */
1717 CASE_CONVERT:
1718 /* Casts are simple. */
1719 return find_deriving_biv_for_expr (data, e1);
1720
1721 default:
1722 break;
1723 }
1724
1725 return NULL;
1726 }
1727
1728 /* Record BIV, its predecessor and successor that they are used in
1729 address type uses. */
1730
1731 static void
1732 record_biv_for_address_use (struct ivopts_data *data, struct iv *biv)
1733 {
1734 unsigned i;
1735 tree type, base_1, base_2;
1736 bitmap_iterator bi;
1737
1738 if (!biv || !biv->biv_p || integer_zerop (biv->step)
1739 || biv->have_address_use || !biv->no_overflow)
1740 return;
1741
1742 type = TREE_TYPE (biv->base);
1743 if (!INTEGRAL_TYPE_P (type))
1744 return;
1745
1746 biv->have_address_use = true;
1747 data->bivs_not_used_in_addr--;
1748 base_1 = fold_build2 (PLUS_EXPR, type, biv->base, biv->step);
1749 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1750 {
1751 struct iv *iv = ver_info (data, i)->iv;
1752
1753 if (!iv || !iv->biv_p || integer_zerop (iv->step)
1754 || iv->have_address_use || !iv->no_overflow)
1755 continue;
1756
1757 if (type != TREE_TYPE (iv->base)
1758 || !INTEGRAL_TYPE_P (TREE_TYPE (iv->base)))
1759 continue;
1760
1761 if (!operand_equal_p (biv->step, iv->step, 0))
1762 continue;
1763
1764 base_2 = fold_build2 (PLUS_EXPR, type, iv->base, iv->step);
1765 if (operand_equal_p (base_1, iv->base, 0)
1766 || operand_equal_p (base_2, biv->base, 0))
1767 {
1768 iv->have_address_use = true;
1769 data->bivs_not_used_in_addr--;
1770 }
1771 }
1772 }
1773
1774 /* Cumulates the steps of indices into DATA and replaces their values with the
1775 initial ones. Returns false when the value of the index cannot be determined.
1776 Callback for for_each_index. */
1777
1778 struct ifs_ivopts_data
1779 {
1780 struct ivopts_data *ivopts_data;
1781 gimple *stmt;
1782 tree step;
1783 };
1784
1785 static bool
1786 idx_find_step (tree base, tree *idx, void *data)
1787 {
1788 struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1789 struct iv *iv;
1790 bool use_overflow_semantics = false;
1791 tree step, iv_base, iv_step, lbound, off;
1792 struct loop *loop = dta->ivopts_data->current_loop;
1793
1794 /* If base is a component ref, require that the offset of the reference
1795 be invariant. */
1796 if (TREE_CODE (base) == COMPONENT_REF)
1797 {
1798 off = component_ref_field_offset (base);
1799 return expr_invariant_in_loop_p (loop, off);
1800 }
1801
1802 /* If base is array, first check whether we will be able to move the
1803 reference out of the loop (in order to take its address in strength
1804 reduction). In order for this to work we need both lower bound
1805 and step to be loop invariants. */
1806 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1807 {
1808 /* Moreover, for a range, the size needs to be invariant as well. */
1809 if (TREE_CODE (base) == ARRAY_RANGE_REF
1810 && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1811 return false;
1812
1813 step = array_ref_element_size (base);
1814 lbound = array_ref_low_bound (base);
1815
1816 if (!expr_invariant_in_loop_p (loop, step)
1817 || !expr_invariant_in_loop_p (loop, lbound))
1818 return false;
1819 }
1820
1821 if (TREE_CODE (*idx) != SSA_NAME)
1822 return true;
1823
1824 iv = get_iv (dta->ivopts_data, *idx);
1825 if (!iv)
1826 return false;
1827
1828 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1829 *&x[0], which is not folded and does not trigger the
1830 ARRAY_REF path below. */
1831 *idx = iv->base;
1832
1833 if (integer_zerop (iv->step))
1834 return true;
1835
1836 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1837 {
1838 step = array_ref_element_size (base);
1839
1840 /* We only handle addresses whose step is an integer constant. */
1841 if (TREE_CODE (step) != INTEGER_CST)
1842 return false;
1843 }
1844 else
1845 /* The step for pointer arithmetics already is 1 byte. */
1846 step = size_one_node;
1847
1848 iv_base = iv->base;
1849 iv_step = iv->step;
1850 if (iv->no_overflow && nowrap_type_p (TREE_TYPE (iv_step)))
1851 use_overflow_semantics = true;
1852
1853 if (!convert_affine_scev (dta->ivopts_data->current_loop,
1854 sizetype, &iv_base, &iv_step, dta->stmt,
1855 use_overflow_semantics))
1856 {
1857 /* The index might wrap. */
1858 return false;
1859 }
1860
1861 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1862 dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1863
1864 if (dta->ivopts_data->bivs_not_used_in_addr)
1865 {
1866 if (!iv->biv_p)
1867 iv = find_deriving_biv_for_expr (dta->ivopts_data, iv->ssa_name);
1868
1869 record_biv_for_address_use (dta->ivopts_data, iv);
1870 }
1871 return true;
1872 }
1873
1874 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1875 object is passed to it in DATA. */
1876
1877 static bool
1878 idx_record_use (tree base, tree *idx,
1879 void *vdata)
1880 {
1881 struct ivopts_data *data = (struct ivopts_data *) vdata;
1882 find_interesting_uses_op (data, *idx);
1883 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1884 {
1885 find_interesting_uses_op (data, array_ref_element_size (base));
1886 find_interesting_uses_op (data, array_ref_low_bound (base));
1887 }
1888 return true;
1889 }
1890
1891 /* If we can prove that TOP = cst * BOT for some constant cst,
1892 store cst to MUL and return true. Otherwise return false.
1893 The returned value is always sign-extended, regardless of the
1894 signedness of TOP and BOT. */
1895
1896 static bool
1897 constant_multiple_of (tree top, tree bot, widest_int *mul)
1898 {
1899 tree mby;
1900 enum tree_code code;
1901 unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1902 widest_int res, p0, p1;
1903
1904 STRIP_NOPS (top);
1905 STRIP_NOPS (bot);
1906
1907 if (operand_equal_p (top, bot, 0))
1908 {
1909 *mul = 1;
1910 return true;
1911 }
1912
1913 code = TREE_CODE (top);
1914 switch (code)
1915 {
1916 case MULT_EXPR:
1917 mby = TREE_OPERAND (top, 1);
1918 if (TREE_CODE (mby) != INTEGER_CST)
1919 return false;
1920
1921 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1922 return false;
1923
1924 *mul = wi::sext (res * wi::to_widest (mby), precision);
1925 return true;
1926
1927 case PLUS_EXPR:
1928 case MINUS_EXPR:
1929 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1930 || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1931 return false;
1932
1933 if (code == MINUS_EXPR)
1934 p1 = -p1;
1935 *mul = wi::sext (p0 + p1, precision);
1936 return true;
1937
1938 case INTEGER_CST:
1939 if (TREE_CODE (bot) != INTEGER_CST)
1940 return false;
1941
1942 p0 = widest_int::from (top, SIGNED);
1943 p1 = widest_int::from (bot, SIGNED);
1944 if (p1 == 0)
1945 return false;
1946 *mul = wi::sext (wi::divmod_trunc (p0, p1, SIGNED, &res), precision);
1947 return res == 0;
1948
1949 default:
1950 return false;
1951 }
1952 }
1953
1954 /* Return true if memory reference REF with step STEP may be unaligned. */
1955
1956 static bool
1957 may_be_unaligned_p (tree ref, tree step)
1958 {
1959 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1960 thus they are not misaligned. */
1961 if (TREE_CODE (ref) == TARGET_MEM_REF)
1962 return false;
1963
1964 unsigned int align = TYPE_ALIGN (TREE_TYPE (ref));
1965 if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))) > align)
1966 align = GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref)));
1967
1968 unsigned HOST_WIDE_INT bitpos;
1969 unsigned int ref_align;
1970 get_object_alignment_1 (ref, &ref_align, &bitpos);
1971 if (ref_align < align
1972 || (bitpos % align) != 0
1973 || (bitpos % BITS_PER_UNIT) != 0)
1974 return true;
1975
1976 unsigned int trailing_zeros = tree_ctz (step);
1977 if (trailing_zeros < HOST_BITS_PER_INT
1978 && (1U << trailing_zeros) * BITS_PER_UNIT < align)
1979 return true;
1980
1981 return false;
1982 }
1983
1984 /* Return true if EXPR may be non-addressable. */
1985
1986 bool
1987 may_be_nonaddressable_p (tree expr)
1988 {
1989 switch (TREE_CODE (expr))
1990 {
1991 case TARGET_MEM_REF:
1992 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1993 target, thus they are always addressable. */
1994 return false;
1995
1996 case COMPONENT_REF:
1997 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1998 || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1999
2000 case VIEW_CONVERT_EXPR:
2001 /* This kind of view-conversions may wrap non-addressable objects
2002 and make them look addressable. After some processing the
2003 non-addressability may be uncovered again, causing ADDR_EXPRs
2004 of inappropriate objects to be built. */
2005 if (is_gimple_reg (TREE_OPERAND (expr, 0))
2006 || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
2007 return true;
2008
2009 /* ... fall through ... */
2010
2011 case ARRAY_REF:
2012 case ARRAY_RANGE_REF:
2013 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2014
2015 CASE_CONVERT:
2016 return true;
2017
2018 default:
2019 break;
2020 }
2021
2022 return false;
2023 }
2024
2025 static tree
2026 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset);
2027
2028 /* Record a use of type USE_TYPE at *USE_P in STMT whose value is IV.
2029 If there is an existing use which has same stripped iv base and step,
2030 this function records this one as a sub use to that; otherwise records
2031 it as a normal one. */
2032
2033 static struct iv_use *
2034 record_group_use (struct ivopts_data *data, tree *use_p,
2035 struct iv *iv, gimple *stmt, enum use_type use_type)
2036 {
2037 unsigned int i;
2038 struct iv_use *use;
2039 tree addr_base;
2040 unsigned HOST_WIDE_INT addr_offset;
2041
2042 /* Only support sub use for address type uses, that is, with base
2043 object. */
2044 if (!iv->base_object)
2045 return record_use (data, use_p, iv, stmt, use_type);
2046
2047 addr_base = strip_offset (iv->base, &addr_offset);
2048 for (i = 0; i < n_iv_uses (data); i++)
2049 {
2050 use = iv_use (data, i);
2051 if (use->type != USE_ADDRESS || !use->iv->base_object)
2052 continue;
2053
2054 /* Check if it has the same stripped base and step. */
2055 if (operand_equal_p (iv->base_object, use->iv->base_object, 0)
2056 && operand_equal_p (iv->step, use->iv->step, 0)
2057 && operand_equal_p (addr_base, use->addr_base, 0))
2058 break;
2059 }
2060
2061 if (i == n_iv_uses (data))
2062 return record_use (data, use_p, iv, stmt,
2063 use_type, addr_base, addr_offset);
2064 else
2065 return record_sub_use (data, use_p, iv, stmt,
2066 use_type, addr_base, addr_offset, i);
2067 }
2068
2069 /* Finds addresses in *OP_P inside STMT. */
2070
2071 static void
2072 find_interesting_uses_address (struct ivopts_data *data, gimple *stmt,
2073 tree *op_p)
2074 {
2075 tree base = *op_p, step = size_zero_node;
2076 struct iv *civ;
2077 struct ifs_ivopts_data ifs_ivopts_data;
2078
2079 /* Do not play with volatile memory references. A bit too conservative,
2080 perhaps, but safe. */
2081 if (gimple_has_volatile_ops (stmt))
2082 goto fail;
2083
2084 /* Ignore bitfields for now. Not really something terribly complicated
2085 to handle. TODO. */
2086 if (TREE_CODE (base) == BIT_FIELD_REF)
2087 goto fail;
2088
2089 base = unshare_expr (base);
2090
2091 if (TREE_CODE (base) == TARGET_MEM_REF)
2092 {
2093 tree type = build_pointer_type (TREE_TYPE (base));
2094 tree astep;
2095
2096 if (TMR_BASE (base)
2097 && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
2098 {
2099 civ = get_iv (data, TMR_BASE (base));
2100 if (!civ)
2101 goto fail;
2102
2103 TMR_BASE (base) = civ->base;
2104 step = civ->step;
2105 }
2106 if (TMR_INDEX2 (base)
2107 && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
2108 {
2109 civ = get_iv (data, TMR_INDEX2 (base));
2110 if (!civ)
2111 goto fail;
2112
2113 TMR_INDEX2 (base) = civ->base;
2114 step = civ->step;
2115 }
2116 if (TMR_INDEX (base)
2117 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
2118 {
2119 civ = get_iv (data, TMR_INDEX (base));
2120 if (!civ)
2121 goto fail;
2122
2123 TMR_INDEX (base) = civ->base;
2124 astep = civ->step;
2125
2126 if (astep)
2127 {
2128 if (TMR_STEP (base))
2129 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
2130
2131 step = fold_build2 (PLUS_EXPR, type, step, astep);
2132 }
2133 }
2134
2135 if (integer_zerop (step))
2136 goto fail;
2137 base = tree_mem_ref_addr (type, base);
2138 }
2139 else
2140 {
2141 ifs_ivopts_data.ivopts_data = data;
2142 ifs_ivopts_data.stmt = stmt;
2143 ifs_ivopts_data.step = size_zero_node;
2144 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
2145 || integer_zerop (ifs_ivopts_data.step))
2146 goto fail;
2147 step = ifs_ivopts_data.step;
2148
2149 /* Check that the base expression is addressable. This needs
2150 to be done after substituting bases of IVs into it. */
2151 if (may_be_nonaddressable_p (base))
2152 goto fail;
2153
2154 /* Moreover, on strict alignment platforms, check that it is
2155 sufficiently aligned. */
2156 if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
2157 goto fail;
2158
2159 base = build_fold_addr_expr (base);
2160
2161 /* Substituting bases of IVs into the base expression might
2162 have caused folding opportunities. */
2163 if (TREE_CODE (base) == ADDR_EXPR)
2164 {
2165 tree *ref = &TREE_OPERAND (base, 0);
2166 while (handled_component_p (*ref))
2167 ref = &TREE_OPERAND (*ref, 0);
2168 if (TREE_CODE (*ref) == MEM_REF)
2169 {
2170 tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
2171 TREE_OPERAND (*ref, 0),
2172 TREE_OPERAND (*ref, 1));
2173 if (tem)
2174 *ref = tem;
2175 }
2176 }
2177 }
2178
2179 civ = alloc_iv (data, base, step);
2180 record_group_use (data, op_p, civ, stmt, USE_ADDRESS);
2181 return;
2182
2183 fail:
2184 for_each_index (op_p, idx_record_use, data);
2185 }
2186
2187 /* Finds and records invariants used in STMT. */
2188
2189 static void
2190 find_invariants_stmt (struct ivopts_data *data, gimple *stmt)
2191 {
2192 ssa_op_iter iter;
2193 use_operand_p use_p;
2194 tree op;
2195
2196 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2197 {
2198 op = USE_FROM_PTR (use_p);
2199 record_invariant (data, op, false);
2200 }
2201 }
2202
2203 /* Finds interesting uses of induction variables in the statement STMT. */
2204
2205 static void
2206 find_interesting_uses_stmt (struct ivopts_data *data, gimple *stmt)
2207 {
2208 struct iv *iv;
2209 tree op, *lhs, *rhs;
2210 ssa_op_iter iter;
2211 use_operand_p use_p;
2212 enum tree_code code;
2213
2214 find_invariants_stmt (data, stmt);
2215
2216 if (gimple_code (stmt) == GIMPLE_COND)
2217 {
2218 find_interesting_uses_cond (data, stmt);
2219 return;
2220 }
2221
2222 if (is_gimple_assign (stmt))
2223 {
2224 lhs = gimple_assign_lhs_ptr (stmt);
2225 rhs = gimple_assign_rhs1_ptr (stmt);
2226
2227 if (TREE_CODE (*lhs) == SSA_NAME)
2228 {
2229 /* If the statement defines an induction variable, the uses are not
2230 interesting by themselves. */
2231
2232 iv = get_iv (data, *lhs);
2233
2234 if (iv && !integer_zerop (iv->step))
2235 return;
2236 }
2237
2238 code = gimple_assign_rhs_code (stmt);
2239 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
2240 && (REFERENCE_CLASS_P (*rhs)
2241 || is_gimple_val (*rhs)))
2242 {
2243 if (REFERENCE_CLASS_P (*rhs))
2244 find_interesting_uses_address (data, stmt, rhs);
2245 else
2246 find_interesting_uses_op (data, *rhs);
2247
2248 if (REFERENCE_CLASS_P (*lhs))
2249 find_interesting_uses_address (data, stmt, lhs);
2250 return;
2251 }
2252 else if (TREE_CODE_CLASS (code) == tcc_comparison)
2253 {
2254 find_interesting_uses_cond (data, stmt);
2255 return;
2256 }
2257
2258 /* TODO -- we should also handle address uses of type
2259
2260 memory = call (whatever);
2261
2262 and
2263
2264 call (memory). */
2265 }
2266
2267 if (gimple_code (stmt) == GIMPLE_PHI
2268 && gimple_bb (stmt) == data->current_loop->header)
2269 {
2270 iv = get_iv (data, PHI_RESULT (stmt));
2271
2272 if (iv && !integer_zerop (iv->step))
2273 return;
2274 }
2275
2276 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2277 {
2278 op = USE_FROM_PTR (use_p);
2279
2280 if (TREE_CODE (op) != SSA_NAME)
2281 continue;
2282
2283 iv = get_iv (data, op);
2284 if (!iv)
2285 continue;
2286
2287 find_interesting_uses_op (data, op);
2288 }
2289 }
2290
2291 /* Finds interesting uses of induction variables outside of loops
2292 on loop exit edge EXIT. */
2293
2294 static void
2295 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
2296 {
2297 gphi *phi;
2298 gphi_iterator psi;
2299 tree def;
2300
2301 for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
2302 {
2303 phi = psi.phi ();
2304 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2305 if (!virtual_operand_p (def))
2306 find_interesting_uses_op (data, def);
2307 }
2308 }
2309
2310 /* Finds uses of the induction variables that are interesting. */
2311
2312 static void
2313 find_interesting_uses (struct ivopts_data *data)
2314 {
2315 basic_block bb;
2316 gimple_stmt_iterator bsi;
2317 basic_block *body = get_loop_body (data->current_loop);
2318 unsigned i;
2319 struct version_info *info;
2320 edge e;
2321
2322 if (dump_file && (dump_flags & TDF_DETAILS))
2323 fprintf (dump_file, "Uses:\n\n");
2324
2325 for (i = 0; i < data->current_loop->num_nodes; i++)
2326 {
2327 edge_iterator ei;
2328 bb = body[i];
2329
2330 FOR_EACH_EDGE (e, ei, bb->succs)
2331 if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2332 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2333 find_interesting_uses_outside (data, e);
2334
2335 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2336 find_interesting_uses_stmt (data, gsi_stmt (bsi));
2337 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2338 if (!is_gimple_debug (gsi_stmt (bsi)))
2339 find_interesting_uses_stmt (data, gsi_stmt (bsi));
2340 }
2341
2342 if (dump_file && (dump_flags & TDF_DETAILS))
2343 {
2344 bitmap_iterator bi;
2345
2346 fprintf (dump_file, "\n");
2347
2348 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2349 {
2350 info = ver_info (data, i);
2351 if (info->inv_id)
2352 {
2353 fprintf (dump_file, " ");
2354 print_generic_expr (dump_file, info->name, TDF_SLIM);
2355 fprintf (dump_file, " is invariant (%d)%s\n",
2356 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
2357 }
2358 }
2359
2360 fprintf (dump_file, "\n");
2361 }
2362
2363 free (body);
2364 }
2365
2366 /* Compute maximum offset of [base + offset] addressing mode
2367 for memory reference represented by USE. */
2368
2369 static HOST_WIDE_INT
2370 compute_max_addr_offset (struct iv_use *use)
2371 {
2372 int width;
2373 rtx reg, addr;
2374 HOST_WIDE_INT i, off;
2375 unsigned list_index, num;
2376 addr_space_t as;
2377 machine_mode mem_mode, addr_mode;
2378 static vec<HOST_WIDE_INT> max_offset_list;
2379
2380 as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
2381 mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2382
2383 num = max_offset_list.length ();
2384 list_index = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
2385 if (list_index >= num)
2386 {
2387 max_offset_list.safe_grow (list_index + MAX_MACHINE_MODE);
2388 for (; num < max_offset_list.length (); num++)
2389 max_offset_list[num] = -1;
2390 }
2391
2392 off = max_offset_list[list_index];
2393 if (off != -1)
2394 return off;
2395
2396 addr_mode = targetm.addr_space.address_mode (as);
2397 reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
2398 addr = gen_rtx_fmt_ee (PLUS, addr_mode, reg, NULL_RTX);
2399
2400 width = GET_MODE_BITSIZE (addr_mode) - 1;
2401 if (width > (HOST_BITS_PER_WIDE_INT - 1))
2402 width = HOST_BITS_PER_WIDE_INT - 1;
2403
2404 for (i = width; i > 0; i--)
2405 {
2406 off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
2407 XEXP (addr, 1) = gen_int_mode (off, addr_mode);
2408 if (memory_address_addr_space_p (mem_mode, addr, as))
2409 break;
2410
2411 /* For some strict-alignment targets, the offset must be naturally
2412 aligned. Try an aligned offset if mem_mode is not QImode. */
2413 off = ((unsigned HOST_WIDE_INT) 1 << i);
2414 if (off > GET_MODE_SIZE (mem_mode) && mem_mode != QImode)
2415 {
2416 off -= GET_MODE_SIZE (mem_mode);
2417 XEXP (addr, 1) = gen_int_mode (off, addr_mode);
2418 if (memory_address_addr_space_p (mem_mode, addr, as))
2419 break;
2420 }
2421 }
2422 if (i == 0)
2423 off = 0;
2424
2425 max_offset_list[list_index] = off;
2426 return off;
2427 }
2428
2429 /* Check if all small groups should be split. Return true if and
2430 only if:
2431
2432 1) At least one groups contain two uses with different offsets.
2433 2) No group contains more than two uses with different offsets.
2434
2435 Return false otherwise. We want to split such groups because:
2436
2437 1) Small groups don't have much benefit and may interfer with
2438 general candidate selection.
2439 2) Size for problem with only small groups is usually small and
2440 general algorithm can handle it well.
2441
2442 TODO -- Above claim may not hold when auto increment is supported. */
2443
2444 static bool
2445 split_all_small_groups (struct ivopts_data *data)
2446 {
2447 bool split_p = false;
2448 unsigned int i, n, distinct;
2449 struct iv_use *pre, *use;
2450
2451 n = n_iv_uses (data);
2452 for (i = 0; i < n; i++)
2453 {
2454 use = iv_use (data, i);
2455 if (!use->next)
2456 continue;
2457
2458 distinct = 1;
2459 gcc_assert (use->type == USE_ADDRESS);
2460 for (pre = use, use = use->next; use; pre = use, use = use->next)
2461 {
2462 if (pre->addr_offset != use->addr_offset)
2463 distinct++;
2464
2465 if (distinct > 2)
2466 return false;
2467 }
2468 if (distinct == 2)
2469 split_p = true;
2470 }
2471
2472 return split_p;
2473 }
2474
2475 /* For each group of address type uses, this function further groups
2476 these uses according to the maximum offset supported by target's
2477 [base + offset] addressing mode. */
2478
2479 static void
2480 group_address_uses (struct ivopts_data *data)
2481 {
2482 HOST_WIDE_INT max_offset = -1;
2483 unsigned int i, n, sub_id;
2484 struct iv_use *pre, *use;
2485 unsigned HOST_WIDE_INT addr_offset_first;
2486
2487 /* Reset max offset to split all small groups. */
2488 if (split_all_small_groups (data))
2489 max_offset = 0;
2490
2491 n = n_iv_uses (data);
2492 for (i = 0; i < n; i++)
2493 {
2494 use = iv_use (data, i);
2495 if (!use->next)
2496 continue;
2497
2498 gcc_assert (use->type == USE_ADDRESS);
2499 if (max_offset != 0)
2500 max_offset = compute_max_addr_offset (use);
2501
2502 while (use)
2503 {
2504 sub_id = 0;
2505 addr_offset_first = use->addr_offset;
2506 /* Only uses with offset that can fit in offset part against
2507 the first use can be grouped together. */
2508 for (pre = use, use = use->next;
2509 use && (use->addr_offset - addr_offset_first
2510 <= (unsigned HOST_WIDE_INT) max_offset);
2511 pre = use, use = use->next)
2512 {
2513 use->id = pre->id;
2514 use->sub_id = ++sub_id;
2515 }
2516
2517 /* Break the list and create new group. */
2518 if (use)
2519 {
2520 pre->next = NULL;
2521 use->id = n_iv_uses (data);
2522 use->related_cands = BITMAP_ALLOC (NULL);
2523 data->iv_uses.safe_push (use);
2524 }
2525 }
2526 }
2527
2528 if (dump_file && (dump_flags & TDF_DETAILS))
2529 dump_uses (dump_file, data);
2530 }
2531
2532 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
2533 is true, assume we are inside an address. If TOP_COMPREF is true, assume
2534 we are at the top-level of the processed address. */
2535
2536 static tree
2537 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2538 HOST_WIDE_INT *offset)
2539 {
2540 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2541 enum tree_code code;
2542 tree type, orig_type = TREE_TYPE (expr);
2543 HOST_WIDE_INT off0, off1, st;
2544 tree orig_expr = expr;
2545
2546 STRIP_NOPS (expr);
2547
2548 type = TREE_TYPE (expr);
2549 code = TREE_CODE (expr);
2550 *offset = 0;
2551
2552 switch (code)
2553 {
2554 case INTEGER_CST:
2555 if (!cst_and_fits_in_hwi (expr)
2556 || integer_zerop (expr))
2557 return orig_expr;
2558
2559 *offset = int_cst_value (expr);
2560 return build_int_cst (orig_type, 0);
2561
2562 case POINTER_PLUS_EXPR:
2563 case PLUS_EXPR:
2564 case MINUS_EXPR:
2565 op0 = TREE_OPERAND (expr, 0);
2566 op1 = TREE_OPERAND (expr, 1);
2567
2568 op0 = strip_offset_1 (op0, false, false, &off0);
2569 op1 = strip_offset_1 (op1, false, false, &off1);
2570
2571 *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2572 if (op0 == TREE_OPERAND (expr, 0)
2573 && op1 == TREE_OPERAND (expr, 1))
2574 return orig_expr;
2575
2576 if (integer_zerop (op1))
2577 expr = op0;
2578 else if (integer_zerop (op0))
2579 {
2580 if (code == MINUS_EXPR)
2581 expr = fold_build1 (NEGATE_EXPR, type, op1);
2582 else
2583 expr = op1;
2584 }
2585 else
2586 expr = fold_build2 (code, type, op0, op1);
2587
2588 return fold_convert (orig_type, expr);
2589
2590 case MULT_EXPR:
2591 op1 = TREE_OPERAND (expr, 1);
2592 if (!cst_and_fits_in_hwi (op1))
2593 return orig_expr;
2594
2595 op0 = TREE_OPERAND (expr, 0);
2596 op0 = strip_offset_1 (op0, false, false, &off0);
2597 if (op0 == TREE_OPERAND (expr, 0))
2598 return orig_expr;
2599
2600 *offset = off0 * int_cst_value (op1);
2601 if (integer_zerop (op0))
2602 expr = op0;
2603 else
2604 expr = fold_build2 (MULT_EXPR, type, op0, op1);
2605
2606 return fold_convert (orig_type, expr);
2607
2608 case ARRAY_REF:
2609 case ARRAY_RANGE_REF:
2610 if (!inside_addr)
2611 return orig_expr;
2612
2613 step = array_ref_element_size (expr);
2614 if (!cst_and_fits_in_hwi (step))
2615 break;
2616
2617 st = int_cst_value (step);
2618 op1 = TREE_OPERAND (expr, 1);
2619 op1 = strip_offset_1 (op1, false, false, &off1);
2620 *offset = off1 * st;
2621
2622 if (top_compref
2623 && integer_zerop (op1))
2624 {
2625 /* Strip the component reference completely. */
2626 op0 = TREE_OPERAND (expr, 0);
2627 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2628 *offset += off0;
2629 return op0;
2630 }
2631 break;
2632
2633 case COMPONENT_REF:
2634 {
2635 tree field;
2636
2637 if (!inside_addr)
2638 return orig_expr;
2639
2640 tmp = component_ref_field_offset (expr);
2641 field = TREE_OPERAND (expr, 1);
2642 if (top_compref
2643 && cst_and_fits_in_hwi (tmp)
2644 && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2645 {
2646 HOST_WIDE_INT boffset, abs_off;
2647
2648 /* Strip the component reference completely. */
2649 op0 = TREE_OPERAND (expr, 0);
2650 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2651 boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2652 abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2653 if (boffset < 0)
2654 abs_off = -abs_off;
2655
2656 *offset = off0 + int_cst_value (tmp) + abs_off;
2657 return op0;
2658 }
2659 }
2660 break;
2661
2662 case ADDR_EXPR:
2663 op0 = TREE_OPERAND (expr, 0);
2664 op0 = strip_offset_1 (op0, true, true, &off0);
2665 *offset += off0;
2666
2667 if (op0 == TREE_OPERAND (expr, 0))
2668 return orig_expr;
2669
2670 expr = build_fold_addr_expr (op0);
2671 return fold_convert (orig_type, expr);
2672
2673 case MEM_REF:
2674 /* ??? Offset operand? */
2675 inside_addr = false;
2676 break;
2677
2678 default:
2679 return orig_expr;
2680 }
2681
2682 /* Default handling of expressions for that we want to recurse into
2683 the first operand. */
2684 op0 = TREE_OPERAND (expr, 0);
2685 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2686 *offset += off0;
2687
2688 if (op0 == TREE_OPERAND (expr, 0)
2689 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2690 return orig_expr;
2691
2692 expr = copy_node (expr);
2693 TREE_OPERAND (expr, 0) = op0;
2694 if (op1)
2695 TREE_OPERAND (expr, 1) = op1;
2696
2697 /* Inside address, we might strip the top level component references,
2698 thus changing type of the expression. Handling of ADDR_EXPR
2699 will fix that. */
2700 expr = fold_convert (orig_type, expr);
2701
2702 return expr;
2703 }
2704
2705 /* Strips constant offsets from EXPR and stores them to OFFSET. */
2706
2707 static tree
2708 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2709 {
2710 HOST_WIDE_INT off;
2711 tree core = strip_offset_1 (expr, false, false, &off);
2712 *offset = off;
2713 return core;
2714 }
2715
2716 /* Returns variant of TYPE that can be used as base for different uses.
2717 We return unsigned type with the same precision, which avoids problems
2718 with overflows. */
2719
2720 static tree
2721 generic_type_for (tree type)
2722 {
2723 if (POINTER_TYPE_P (type))
2724 return unsigned_type_for (type);
2725
2726 if (TYPE_UNSIGNED (type))
2727 return type;
2728
2729 return unsigned_type_for (type);
2730 }
2731
2732 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2733 the bitmap to that we should store it. */
2734
2735 static struct ivopts_data *fd_ivopts_data;
2736 static tree
2737 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2738 {
2739 bitmap *depends_on = (bitmap *) data;
2740 struct version_info *info;
2741
2742 if (TREE_CODE (*expr_p) != SSA_NAME)
2743 return NULL_TREE;
2744 info = name_info (fd_ivopts_data, *expr_p);
2745
2746 if (!info->inv_id || info->has_nonlin_use)
2747 return NULL_TREE;
2748
2749 if (!*depends_on)
2750 *depends_on = BITMAP_ALLOC (NULL);
2751 bitmap_set_bit (*depends_on, info->inv_id);
2752
2753 return NULL_TREE;
2754 }
2755
2756 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2757 position to POS. If USE is not NULL, the candidate is set as related to
2758 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
2759 replacement of the final value of the iv by a direct computation. */
2760
2761 static struct iv_cand *
2762 add_candidate_1 (struct ivopts_data *data,
2763 tree base, tree step, bool important, enum iv_position pos,
2764 struct iv_use *use, gimple *incremented_at,
2765 struct iv *orig_iv = NULL)
2766 {
2767 unsigned i;
2768 struct iv_cand *cand = NULL;
2769 tree type, orig_type;
2770
2771 /* For non-original variables, make sure their values are computed in a type
2772 that does not invoke undefined behavior on overflows (since in general,
2773 we cannot prove that these induction variables are non-wrapping). */
2774 if (pos != IP_ORIGINAL)
2775 {
2776 orig_type = TREE_TYPE (base);
2777 type = generic_type_for (orig_type);
2778 if (type != orig_type)
2779 {
2780 base = fold_convert (type, base);
2781 step = fold_convert (type, step);
2782 }
2783 }
2784
2785 for (i = 0; i < n_iv_cands (data); i++)
2786 {
2787 cand = iv_cand (data, i);
2788
2789 if (cand->pos != pos)
2790 continue;
2791
2792 if (cand->incremented_at != incremented_at
2793 || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2794 && cand->ainc_use != use))
2795 continue;
2796
2797 if (!cand->iv)
2798 {
2799 if (!base && !step)
2800 break;
2801
2802 continue;
2803 }
2804
2805 if (!base && !step)
2806 continue;
2807
2808 if (operand_equal_p (base, cand->iv->base, 0)
2809 && operand_equal_p (step, cand->iv->step, 0)
2810 && (TYPE_PRECISION (TREE_TYPE (base))
2811 == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
2812 break;
2813 }
2814
2815 if (i == n_iv_cands (data))
2816 {
2817 cand = XCNEW (struct iv_cand);
2818 cand->id = i;
2819
2820 if (!base && !step)
2821 cand->iv = NULL;
2822 else
2823 cand->iv = alloc_iv (data, base, step);
2824
2825 cand->pos = pos;
2826 if (pos != IP_ORIGINAL && cand->iv)
2827 {
2828 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2829 cand->var_after = cand->var_before;
2830 }
2831 cand->important = important;
2832 cand->incremented_at = incremented_at;
2833 data->iv_candidates.safe_push (cand);
2834
2835 if (step
2836 && TREE_CODE (step) != INTEGER_CST)
2837 {
2838 fd_ivopts_data = data;
2839 walk_tree (&step, find_depends, &cand->depends_on, NULL);
2840 }
2841
2842 if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2843 cand->ainc_use = use;
2844 else
2845 cand->ainc_use = NULL;
2846
2847 cand->orig_iv = orig_iv;
2848 if (dump_file && (dump_flags & TDF_DETAILS))
2849 dump_cand (dump_file, cand);
2850 }
2851
2852 if (important && !cand->important)
2853 {
2854 cand->important = true;
2855 if (dump_file && (dump_flags & TDF_DETAILS))
2856 fprintf (dump_file, "Candidate %d is important\n", cand->id);
2857 }
2858
2859 if (use)
2860 {
2861 bitmap_set_bit (use->related_cands, i);
2862 if (dump_file && (dump_flags & TDF_DETAILS))
2863 fprintf (dump_file, "Candidate %d is related to use %d\n",
2864 cand->id, use->id);
2865 }
2866
2867 return cand;
2868 }
2869
2870 /* Returns true if incrementing the induction variable at the end of the LOOP
2871 is allowed.
2872
2873 The purpose is to avoid splitting latch edge with a biv increment, thus
2874 creating a jump, possibly confusing other optimization passes and leaving
2875 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2876 is not available (so we do not have a better alternative), or if the latch
2877 edge is already nonempty. */
2878
2879 static bool
2880 allow_ip_end_pos_p (struct loop *loop)
2881 {
2882 if (!ip_normal_pos (loop))
2883 return true;
2884
2885 if (!empty_block_p (ip_end_pos (loop)))
2886 return true;
2887
2888 return false;
2889 }
2890
2891 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2892 Important field is set to IMPORTANT. */
2893
2894 static void
2895 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2896 bool important, struct iv_use *use)
2897 {
2898 basic_block use_bb = gimple_bb (use->stmt);
2899 machine_mode mem_mode;
2900 unsigned HOST_WIDE_INT cstepi;
2901
2902 /* If we insert the increment in any position other than the standard
2903 ones, we must ensure that it is incremented once per iteration.
2904 It must not be in an inner nested loop, or one side of an if
2905 statement. */
2906 if (use_bb->loop_father != data->current_loop
2907 || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2908 || stmt_could_throw_p (use->stmt)
2909 || !cst_and_fits_in_hwi (step))
2910 return;
2911
2912 cstepi = int_cst_value (step);
2913
2914 mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2915 if (((USE_LOAD_PRE_INCREMENT (mem_mode)
2916 || USE_STORE_PRE_INCREMENT (mem_mode))
2917 && GET_MODE_SIZE (mem_mode) == cstepi)
2918 || ((USE_LOAD_PRE_DECREMENT (mem_mode)
2919 || USE_STORE_PRE_DECREMENT (mem_mode))
2920 && GET_MODE_SIZE (mem_mode) == -cstepi))
2921 {
2922 enum tree_code code = MINUS_EXPR;
2923 tree new_base;
2924 tree new_step = step;
2925
2926 if (POINTER_TYPE_P (TREE_TYPE (base)))
2927 {
2928 new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2929 code = POINTER_PLUS_EXPR;
2930 }
2931 else
2932 new_step = fold_convert (TREE_TYPE (base), new_step);
2933 new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2934 add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2935 use->stmt);
2936 }
2937 if (((USE_LOAD_POST_INCREMENT (mem_mode)
2938 || USE_STORE_POST_INCREMENT (mem_mode))
2939 && GET_MODE_SIZE (mem_mode) == cstepi)
2940 || ((USE_LOAD_POST_DECREMENT (mem_mode)
2941 || USE_STORE_POST_DECREMENT (mem_mode))
2942 && GET_MODE_SIZE (mem_mode) == -cstepi))
2943 {
2944 add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
2945 use->stmt);
2946 }
2947 }
2948
2949 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2950 position to POS. If USE is not NULL, the candidate is set as related to
2951 it. The candidate computation is scheduled before exit condition and at
2952 the end of loop. */
2953
2954 static void
2955 add_candidate (struct ivopts_data *data,
2956 tree base, tree step, bool important, struct iv_use *use,
2957 struct iv *orig_iv = NULL)
2958 {
2959 gcc_assert (use == NULL || use->sub_id == 0);
2960
2961 if (ip_normal_pos (data->current_loop))
2962 add_candidate_1 (data, base, step, important,
2963 IP_NORMAL, use, NULL, orig_iv);
2964 if (ip_end_pos (data->current_loop)
2965 && allow_ip_end_pos_p (data->current_loop))
2966 add_candidate_1 (data, base, step, important, IP_END, use, NULL, orig_iv);
2967 }
2968
2969 /* Adds standard iv candidates. */
2970
2971 static void
2972 add_standard_iv_candidates (struct ivopts_data *data)
2973 {
2974 add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
2975
2976 /* The same for a double-integer type if it is still fast enough. */
2977 if (TYPE_PRECISION
2978 (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
2979 && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
2980 add_candidate (data, build_int_cst (long_integer_type_node, 0),
2981 build_int_cst (long_integer_type_node, 1), true, NULL);
2982
2983 /* The same for a double-integer type if it is still fast enough. */
2984 if (TYPE_PRECISION
2985 (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
2986 && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
2987 add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
2988 build_int_cst (long_long_integer_type_node, 1), true, NULL);
2989 }
2990
2991
2992 /* Adds candidates bases on the old induction variable IV. */
2993
2994 static void
2995 add_iv_candidate_for_biv (struct ivopts_data *data, struct iv *iv)
2996 {
2997 gimple *phi;
2998 tree def;
2999 struct iv_cand *cand;
3000
3001 /* Check if this biv is used in address type use. */
3002 if (iv->no_overflow && iv->have_address_use
3003 && INTEGRAL_TYPE_P (TREE_TYPE (iv->base))
3004 && TYPE_PRECISION (TREE_TYPE (iv->base)) < TYPE_PRECISION (sizetype))
3005 {
3006 tree base = fold_convert (sizetype, iv->base);
3007 tree step = fold_convert (sizetype, iv->step);
3008
3009 /* Add iv cand of same precision as index part in TARGET_MEM_REF. */
3010 add_candidate (data, base, step, true, NULL, iv);
3011 /* Add iv cand of the original type only if it has nonlinear use. */
3012 if (iv->have_use_for)
3013 add_candidate (data, iv->base, iv->step, true, NULL);
3014 }
3015 else
3016 add_candidate (data, iv->base, iv->step, true, NULL);
3017
3018 /* The same, but with initial value zero. */
3019 if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
3020 add_candidate (data, size_int (0), iv->step, true, NULL);
3021 else
3022 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
3023 iv->step, true, NULL);
3024
3025 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
3026 if (gimple_code (phi) == GIMPLE_PHI)
3027 {
3028 /* Additionally record the possibility of leaving the original iv
3029 untouched. */
3030 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
3031 /* Don't add candidate if it's from another PHI node because
3032 it's an affine iv appearing in the form of PEELED_CHREC. */
3033 phi = SSA_NAME_DEF_STMT (def);
3034 if (gimple_code (phi) != GIMPLE_PHI)
3035 {
3036 cand = add_candidate_1 (data,
3037 iv->base, iv->step, true, IP_ORIGINAL, NULL,
3038 SSA_NAME_DEF_STMT (def));
3039 cand->var_before = iv->ssa_name;
3040 cand->var_after = def;
3041 }
3042 else
3043 gcc_assert (gimple_bb (phi) == data->current_loop->header);
3044 }
3045 }
3046
3047 /* Adds candidates based on the old induction variables. */
3048
3049 static void
3050 add_iv_candidate_for_bivs (struct ivopts_data *data)
3051 {
3052 unsigned i;
3053 struct iv *iv;
3054 bitmap_iterator bi;
3055
3056 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
3057 {
3058 iv = ver_info (data, i)->iv;
3059 if (iv && iv->biv_p && !integer_zerop (iv->step))
3060 add_iv_candidate_for_biv (data, iv);
3061 }
3062 }
3063
3064 /* Adds candidates based on the value of USE's iv. */
3065
3066 static void
3067 add_iv_candidate_for_use (struct ivopts_data *data, struct iv_use *use)
3068 {
3069 unsigned HOST_WIDE_INT offset;
3070 tree base;
3071 tree basetype;
3072 struct iv *iv = use->iv;
3073
3074 add_candidate (data, iv->base, iv->step, false, use);
3075
3076 /* The same, but with initial value zero. Make such variable important,
3077 since it is generic enough so that possibly many uses may be based
3078 on it. */
3079 basetype = TREE_TYPE (iv->base);
3080 if (POINTER_TYPE_P (basetype))
3081 basetype = sizetype;
3082 add_candidate (data, build_int_cst (basetype, 0), iv->step, true, use);
3083
3084 /* Third, try removing the constant offset. Make sure to even
3085 add a candidate for &a[0] vs. (T *)&a. */
3086 base = strip_offset (iv->base, &offset);
3087 if (offset || base != iv->base)
3088 add_candidate (data, base, iv->step, false, use);
3089
3090 /* At last, add auto-incremental candidates. Make such variables
3091 important since other iv uses with same base object may be based
3092 on it. */
3093 if (use != NULL && use->type == USE_ADDRESS)
3094 add_autoinc_candidates (data, iv->base, iv->step, true, use);
3095 }
3096
3097 /* Adds candidates based on the uses. */
3098
3099 static void
3100 add_iv_candidate_for_uses (struct ivopts_data *data)
3101 {
3102 unsigned i;
3103
3104 for (i = 0; i < n_iv_uses (data); i++)
3105 {
3106 struct iv_use *use = iv_use (data, i);
3107
3108 if (!use)
3109 continue;
3110
3111 switch (use->type)
3112 {
3113 case USE_NONLINEAR_EXPR:
3114 case USE_COMPARE:
3115 case USE_ADDRESS:
3116 /* Just add the ivs based on the value of the iv used here. */
3117 add_iv_candidate_for_use (data, use);
3118 break;
3119
3120 default:
3121 gcc_unreachable ();
3122 }
3123 }
3124 }
3125
3126 /* Record important candidates and add them to related_cands bitmaps
3127 if needed. */
3128
3129 static void
3130 record_important_candidates (struct ivopts_data *data)
3131 {
3132 unsigned i;
3133 struct iv_use *use;
3134
3135 for (i = 0; i < n_iv_cands (data); i++)
3136 {
3137 struct iv_cand *cand = iv_cand (data, i);
3138
3139 if (cand->important)
3140 bitmap_set_bit (data->important_candidates, i);
3141 }
3142
3143 data->consider_all_candidates = (n_iv_cands (data)
3144 <= CONSIDER_ALL_CANDIDATES_BOUND);
3145
3146 if (data->consider_all_candidates)
3147 {
3148 /* We will not need "related_cands" bitmaps in this case,
3149 so release them to decrease peak memory consumption. */
3150 for (i = 0; i < n_iv_uses (data); i++)
3151 {
3152 use = iv_use (data, i);
3153 BITMAP_FREE (use->related_cands);
3154 }
3155 }
3156 else
3157 {
3158 /* Add important candidates to the related_cands bitmaps. */
3159 for (i = 0; i < n_iv_uses (data); i++)
3160 bitmap_ior_into (iv_use (data, i)->related_cands,
3161 data->important_candidates);
3162 }
3163 }
3164
3165 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
3166 If consider_all_candidates is true, we use a two-dimensional array, otherwise
3167 we allocate a simple list to every use. */
3168
3169 static void
3170 alloc_use_cost_map (struct ivopts_data *data)
3171 {
3172 unsigned i, size, s;
3173
3174 for (i = 0; i < n_iv_uses (data); i++)
3175 {
3176 struct iv_use *use = iv_use (data, i);
3177
3178 if (data->consider_all_candidates)
3179 size = n_iv_cands (data);
3180 else
3181 {
3182 s = bitmap_count_bits (use->related_cands);
3183
3184 /* Round up to the power of two, so that moduling by it is fast. */
3185 size = s ? (1 << ceil_log2 (s)) : 1;
3186 }
3187
3188 use->n_map_members = size;
3189 use->cost_map = XCNEWVEC (struct cost_pair, size);
3190 }
3191 }
3192
3193 /* Returns description of computation cost of expression whose runtime
3194 cost is RUNTIME and complexity corresponds to COMPLEXITY. */
3195
3196 static comp_cost
3197 new_cost (unsigned runtime, unsigned complexity)
3198 {
3199 comp_cost cost;
3200
3201 cost.cost = runtime;
3202 cost.complexity = complexity;
3203
3204 return cost;
3205 }
3206
3207 /* Returns true if COST is infinite. */
3208
3209 static bool
3210 infinite_cost_p (comp_cost cost)
3211 {
3212 return cost.cost == INFTY;
3213 }
3214
3215 /* Adds costs COST1 and COST2. */
3216
3217 static comp_cost
3218 add_costs (comp_cost cost1, comp_cost cost2)
3219 {
3220 if (infinite_cost_p (cost1) || infinite_cost_p (cost2))
3221 return infinite_cost;
3222
3223 cost1.cost += cost2.cost;
3224 cost1.complexity += cost2.complexity;
3225
3226 return cost1;
3227 }
3228 /* Subtracts costs COST1 and COST2. */
3229
3230 static comp_cost
3231 sub_costs (comp_cost cost1, comp_cost cost2)
3232 {
3233 cost1.cost -= cost2.cost;
3234 cost1.complexity -= cost2.complexity;
3235
3236 return cost1;
3237 }
3238
3239 /* Returns a negative number if COST1 < COST2, a positive number if
3240 COST1 > COST2, and 0 if COST1 = COST2. */
3241
3242 static int
3243 compare_costs (comp_cost cost1, comp_cost cost2)
3244 {
3245 if (cost1.cost == cost2.cost)
3246 return cost1.complexity - cost2.complexity;
3247
3248 return cost1.cost - cost2.cost;
3249 }
3250
3251 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
3252 on invariants DEPENDS_ON and that the value used in expressing it
3253 is VALUE, and in case of iv elimination the comparison operator is COMP. */
3254
3255 static void
3256 set_use_iv_cost (struct ivopts_data *data,
3257 struct iv_use *use, struct iv_cand *cand,
3258 comp_cost cost, bitmap depends_on, tree value,
3259 enum tree_code comp, int inv_expr_id)
3260 {
3261 unsigned i, s;
3262
3263 if (infinite_cost_p (cost))
3264 {
3265 BITMAP_FREE (depends_on);
3266 return;
3267 }
3268
3269 if (data->consider_all_candidates)
3270 {
3271 use->cost_map[cand->id].cand = cand;
3272 use->cost_map[cand->id].cost = cost;
3273 use->cost_map[cand->id].depends_on = depends_on;
3274 use->cost_map[cand->id].value = value;
3275 use->cost_map[cand->id].comp = comp;
3276 use->cost_map[cand->id].inv_expr_id = inv_expr_id;
3277 return;
3278 }
3279
3280 /* n_map_members is a power of two, so this computes modulo. */
3281 s = cand->id & (use->n_map_members - 1);
3282 for (i = s; i < use->n_map_members; i++)
3283 if (!use->cost_map[i].cand)
3284 goto found;
3285 for (i = 0; i < s; i++)
3286 if (!use->cost_map[i].cand)
3287 goto found;
3288
3289 gcc_unreachable ();
3290
3291 found:
3292 use->cost_map[i].cand = cand;
3293 use->cost_map[i].cost = cost;
3294 use->cost_map[i].depends_on = depends_on;
3295 use->cost_map[i].value = value;
3296 use->cost_map[i].comp = comp;
3297 use->cost_map[i].inv_expr_id = inv_expr_id;
3298 }
3299
3300 /* Gets cost of (USE, CANDIDATE) pair. */
3301
3302 static struct cost_pair *
3303 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
3304 struct iv_cand *cand)
3305 {
3306 unsigned i, s;
3307 struct cost_pair *ret;
3308
3309 if (!cand)
3310 return NULL;
3311
3312 if (data->consider_all_candidates)
3313 {
3314 ret = use->cost_map + cand->id;
3315 if (!ret->cand)
3316 return NULL;
3317
3318 return ret;
3319 }
3320
3321 /* n_map_members is a power of two, so this computes modulo. */
3322 s = cand->id & (use->n_map_members - 1);
3323 for (i = s; i < use->n_map_members; i++)
3324 if (use->cost_map[i].cand == cand)
3325 return use->cost_map + i;
3326 else if (use->cost_map[i].cand == NULL)
3327 return NULL;
3328 for (i = 0; i < s; i++)
3329 if (use->cost_map[i].cand == cand)
3330 return use->cost_map + i;
3331 else if (use->cost_map[i].cand == NULL)
3332 return NULL;
3333
3334 return NULL;
3335 }
3336
3337 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
3338 static rtx
3339 produce_memory_decl_rtl (tree obj, int *regno)
3340 {
3341 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
3342 machine_mode address_mode = targetm.addr_space.address_mode (as);
3343 rtx x;
3344
3345 gcc_assert (obj);
3346 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
3347 {
3348 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
3349 x = gen_rtx_SYMBOL_REF (address_mode, name);
3350 SET_SYMBOL_REF_DECL (x, obj);
3351 x = gen_rtx_MEM (DECL_MODE (obj), x);
3352 set_mem_addr_space (x, as);
3353 targetm.encode_section_info (obj, x, true);
3354 }
3355 else
3356 {
3357 x = gen_raw_REG (address_mode, (*regno)++);
3358 x = gen_rtx_MEM (DECL_MODE (obj), x);
3359 set_mem_addr_space (x, as);
3360 }
3361
3362 return x;
3363 }
3364
3365 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
3366 walk_tree. DATA contains the actual fake register number. */
3367
3368 static tree
3369 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
3370 {
3371 tree obj = NULL_TREE;
3372 rtx x = NULL_RTX;
3373 int *regno = (int *) data;
3374
3375 switch (TREE_CODE (*expr_p))
3376 {
3377 case ADDR_EXPR:
3378 for (expr_p = &TREE_OPERAND (*expr_p, 0);
3379 handled_component_p (*expr_p);
3380 expr_p = &TREE_OPERAND (*expr_p, 0))
3381 continue;
3382 obj = *expr_p;
3383 if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
3384 x = produce_memory_decl_rtl (obj, regno);
3385 break;
3386
3387 case SSA_NAME:
3388 *ws = 0;
3389 obj = SSA_NAME_VAR (*expr_p);
3390 /* Defer handling of anonymous SSA_NAMEs to the expander. */
3391 if (!obj)
3392 return NULL_TREE;
3393 if (!DECL_RTL_SET_P (obj))
3394 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3395 break;
3396
3397 case VAR_DECL:
3398 case PARM_DECL:
3399 case RESULT_DECL:
3400 *ws = 0;
3401 obj = *expr_p;
3402
3403 if (DECL_RTL_SET_P (obj))
3404 break;
3405
3406 if (DECL_MODE (obj) == BLKmode)
3407 x = produce_memory_decl_rtl (obj, regno);
3408 else
3409 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3410
3411 break;
3412
3413 default:
3414 break;
3415 }
3416
3417 if (x)
3418 {
3419 decl_rtl_to_reset.safe_push (obj);
3420 SET_DECL_RTL (obj, x);
3421 }
3422
3423 return NULL_TREE;
3424 }
3425
3426 /* Determines cost of the computation of EXPR. */
3427
3428 static unsigned
3429 computation_cost (tree expr, bool speed)
3430 {
3431 rtx_insn *seq;
3432 rtx rslt;
3433 tree type = TREE_TYPE (expr);
3434 unsigned cost;
3435 /* Avoid using hard regs in ways which may be unsupported. */
3436 int regno = LAST_VIRTUAL_REGISTER + 1;
3437 struct cgraph_node *node = cgraph_node::get (current_function_decl);
3438 enum node_frequency real_frequency = node->frequency;
3439
3440 node->frequency = NODE_FREQUENCY_NORMAL;
3441 crtl->maybe_hot_insn_p = speed;
3442 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
3443 start_sequence ();
3444 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
3445 seq = get_insns ();
3446 end_sequence ();
3447 default_rtl_profile ();
3448 node->frequency = real_frequency;
3449
3450 cost = seq_cost (seq, speed);
3451 if (MEM_P (rslt))
3452 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
3453 TYPE_ADDR_SPACE (type), speed);
3454 else if (!REG_P (rslt))
3455 cost += set_src_cost (rslt, TYPE_MODE (type), speed);
3456
3457 return cost;
3458 }
3459
3460 /* Returns variable containing the value of candidate CAND at statement AT. */
3461
3462 static tree
3463 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple *stmt)
3464 {
3465 if (stmt_after_increment (loop, cand, stmt))
3466 return cand->var_after;
3467 else
3468 return cand->var_before;
3469 }
3470
3471 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
3472 same precision that is at least as wide as the precision of TYPE, stores
3473 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
3474 type of A and B. */
3475
3476 static tree
3477 determine_common_wider_type (tree *a, tree *b)
3478 {
3479 tree wider_type = NULL;
3480 tree suba, subb;
3481 tree atype = TREE_TYPE (*a);
3482
3483 if (CONVERT_EXPR_P (*a))
3484 {
3485 suba = TREE_OPERAND (*a, 0);
3486 wider_type = TREE_TYPE (suba);
3487 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
3488 return atype;
3489 }
3490 else
3491 return atype;
3492
3493 if (CONVERT_EXPR_P (*b))
3494 {
3495 subb = TREE_OPERAND (*b, 0);
3496 if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
3497 return atype;
3498 }
3499 else
3500 return atype;
3501
3502 *a = suba;
3503 *b = subb;
3504 return wider_type;
3505 }
3506
3507 /* Determines the expression by that USE is expressed from induction variable
3508 CAND at statement AT in LOOP. The expression is stored in a decomposed
3509 form into AFF. Returns false if USE cannot be expressed using CAND. */
3510
3511 static bool
3512 get_computation_aff (struct loop *loop,
3513 struct iv_use *use, struct iv_cand *cand, gimple *at,
3514 struct aff_tree *aff)
3515 {
3516 tree ubase = use->iv->base;
3517 tree ustep = use->iv->step;
3518 tree cbase = cand->iv->base;
3519 tree cstep = cand->iv->step, cstep_common;
3520 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3521 tree common_type, var;
3522 tree uutype;
3523 aff_tree cbase_aff, var_aff;
3524 widest_int rat;
3525
3526 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3527 {
3528 /* We do not have a precision to express the values of use. */
3529 return false;
3530 }
3531
3532 var = var_at_stmt (loop, cand, at);
3533 uutype = unsigned_type_for (utype);
3534
3535 /* If the conversion is not noop, perform it. */
3536 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3537 {
3538 if (cand->orig_iv != NULL && CONVERT_EXPR_P (cbase)
3539 && (CONVERT_EXPR_P (cstep) || TREE_CODE (cstep) == INTEGER_CST))
3540 {
3541 tree inner_base, inner_step, inner_type;
3542 inner_base = TREE_OPERAND (cbase, 0);
3543 if (CONVERT_EXPR_P (cstep))
3544 inner_step = TREE_OPERAND (cstep, 0);
3545 else
3546 inner_step = cstep;
3547
3548 inner_type = TREE_TYPE (inner_base);
3549 /* If candidate is added from a biv whose type is smaller than
3550 ctype, we know both candidate and the biv won't overflow.
3551 In this case, it's safe to skip the convertion in candidate.
3552 As an example, (unsigned short)((unsigned long)A) equals to
3553 (unsigned short)A, if A has a type no larger than short. */
3554 if (TYPE_PRECISION (inner_type) <= TYPE_PRECISION (uutype))
3555 {
3556 cbase = inner_base;
3557 cstep = inner_step;
3558 }
3559 }
3560 cstep = fold_convert (uutype, cstep);
3561 cbase = fold_convert (uutype, cbase);
3562 var = fold_convert (uutype, var);
3563 }
3564
3565 if (!constant_multiple_of (ustep, cstep, &rat))
3566 return false;
3567
3568 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3569 type, we achieve better folding by computing their difference in this
3570 wider type, and cast the result to UUTYPE. We do not need to worry about
3571 overflows, as all the arithmetics will in the end be performed in UUTYPE
3572 anyway. */
3573 common_type = determine_common_wider_type (&ubase, &cbase);
3574
3575 /* use = ubase - ratio * cbase + ratio * var. */
3576 tree_to_aff_combination (ubase, common_type, aff);
3577 tree_to_aff_combination (cbase, common_type, &cbase_aff);
3578 tree_to_aff_combination (var, uutype, &var_aff);
3579
3580 /* We need to shift the value if we are after the increment. */
3581 if (stmt_after_increment (loop, cand, at))
3582 {
3583 aff_tree cstep_aff;
3584
3585 if (common_type != uutype)
3586 cstep_common = fold_convert (common_type, cstep);
3587 else
3588 cstep_common = cstep;
3589
3590 tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
3591 aff_combination_add (&cbase_aff, &cstep_aff);
3592 }
3593
3594 aff_combination_scale (&cbase_aff, -rat);
3595 aff_combination_add (aff, &cbase_aff);
3596 if (common_type != uutype)
3597 aff_combination_convert (aff, uutype);
3598
3599 aff_combination_scale (&var_aff, rat);
3600 aff_combination_add (aff, &var_aff);
3601
3602 return true;
3603 }
3604
3605 /* Return the type of USE. */
3606
3607 static tree
3608 get_use_type (struct iv_use *use)
3609 {
3610 tree base_type = TREE_TYPE (use->iv->base);
3611 tree type;
3612
3613 if (use->type == USE_ADDRESS)
3614 {
3615 /* The base_type may be a void pointer. Create a pointer type based on
3616 the mem_ref instead. */
3617 type = build_pointer_type (TREE_TYPE (*use->op_p));
3618 gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
3619 == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
3620 }
3621 else
3622 type = base_type;
3623
3624 return type;
3625 }
3626
3627 /* Determines the expression by that USE is expressed from induction variable
3628 CAND at statement AT in LOOP. The computation is unshared. */
3629
3630 static tree
3631 get_computation_at (struct loop *loop,
3632 struct iv_use *use, struct iv_cand *cand, gimple *at)
3633 {
3634 aff_tree aff;
3635 tree type = get_use_type (use);
3636
3637 if (!get_computation_aff (loop, use, cand, at, &aff))
3638 return NULL_TREE;
3639 unshare_aff_combination (&aff);
3640 return fold_convert (type, aff_combination_to_tree (&aff));
3641 }
3642
3643 /* Determines the expression by that USE is expressed from induction variable
3644 CAND in LOOP. The computation is unshared. */
3645
3646 static tree
3647 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3648 {
3649 return get_computation_at (loop, use, cand, use->stmt);
3650 }
3651
3652 /* Adjust the cost COST for being in loop setup rather than loop body.
3653 If we're optimizing for space, the loop setup overhead is constant;
3654 if we're optimizing for speed, amortize it over the per-iteration cost. */
3655 static unsigned
3656 adjust_setup_cost (struct ivopts_data *data, unsigned cost)
3657 {
3658 if (cost == INFTY)
3659 return cost;
3660 else if (optimize_loop_for_speed_p (data->current_loop))
3661 return cost / avg_loop_niter (data->current_loop);
3662 else
3663 return cost;
3664 }
3665
3666 /* Returns true if multiplying by RATIO is allowed in an address. Test the
3667 validity for a memory reference accessing memory of mode MODE in
3668 address space AS. */
3669
3670
3671 bool
3672 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, machine_mode mode,
3673 addr_space_t as)
3674 {
3675 #define MAX_RATIO 128
3676 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3677 static vec<sbitmap> valid_mult_list;
3678 sbitmap valid_mult;
3679
3680 if (data_index >= valid_mult_list.length ())
3681 valid_mult_list.safe_grow_cleared (data_index + 1);
3682
3683 valid_mult = valid_mult_list[data_index];
3684 if (!valid_mult)
3685 {
3686 machine_mode address_mode = targetm.addr_space.address_mode (as);
3687 rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3688 rtx reg2 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3689 rtx addr, scaled;
3690 HOST_WIDE_INT i;
3691
3692 valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3693 bitmap_clear (valid_mult);
3694 scaled = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3695 addr = gen_rtx_fmt_ee (PLUS, address_mode, scaled, reg2);
3696 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3697 {
3698 XEXP (scaled, 1) = gen_int_mode (i, address_mode);
3699 if (memory_address_addr_space_p (mode, addr, as)
3700 || memory_address_addr_space_p (mode, scaled, as))
3701 bitmap_set_bit (valid_mult, i + MAX_RATIO);
3702 }
3703
3704 if (dump_file && (dump_flags & TDF_DETAILS))
3705 {
3706 fprintf (dump_file, " allowed multipliers:");
3707 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3708 if (bitmap_bit_p (valid_mult, i + MAX_RATIO))
3709 fprintf (dump_file, " %d", (int) i);
3710 fprintf (dump_file, "\n");
3711 fprintf (dump_file, "\n");
3712 }
3713
3714 valid_mult_list[data_index] = valid_mult;
3715 }
3716
3717 if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3718 return false;
3719
3720 return bitmap_bit_p (valid_mult, ratio + MAX_RATIO);
3721 }
3722
3723 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3724 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3725 variable is omitted. Compute the cost for a memory reference that accesses
3726 a memory location of mode MEM_MODE in address space AS.
3727
3728 MAY_AUTOINC is set to true if the autoincrement (increasing index by
3729 size of MEM_MODE / RATIO) is available. To make this determination, we
3730 look at the size of the increment to be made, which is given in CSTEP.
3731 CSTEP may be zero if the step is unknown.
3732 STMT_AFTER_INC is true iff the statement we're looking at is after the
3733 increment of the original biv.
3734
3735 TODO -- there must be some better way. This all is quite crude. */
3736
3737 enum ainc_type
3738 {
3739 AINC_PRE_INC, /* Pre increment. */
3740 AINC_PRE_DEC, /* Pre decrement. */
3741 AINC_POST_INC, /* Post increment. */
3742 AINC_POST_DEC, /* Post decrement. */
3743 AINC_NONE /* Also the number of auto increment types. */
3744 };
3745
3746 struct address_cost_data
3747 {
3748 HOST_WIDE_INT min_offset, max_offset;
3749 unsigned costs[2][2][2][2];
3750 unsigned ainc_costs[AINC_NONE];
3751 };
3752
3753
3754 static comp_cost
3755 get_address_cost (bool symbol_present, bool var_present,
3756 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3757 HOST_WIDE_INT cstep, machine_mode mem_mode,
3758 addr_space_t as, bool speed,
3759 bool stmt_after_inc, bool *may_autoinc)
3760 {
3761 machine_mode address_mode = targetm.addr_space.address_mode (as);
3762 static vec<address_cost_data *> address_cost_data_list;
3763 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3764 address_cost_data *data;
3765 static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3766 static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3767 unsigned cost, acost, complexity;
3768 enum ainc_type autoinc_type;
3769 bool offset_p, ratio_p, autoinc;
3770 HOST_WIDE_INT s_offset, autoinc_offset, msize;
3771 unsigned HOST_WIDE_INT mask;
3772 unsigned bits;
3773
3774 if (data_index >= address_cost_data_list.length ())
3775 address_cost_data_list.safe_grow_cleared (data_index + 1);
3776
3777 data = address_cost_data_list[data_index];
3778 if (!data)
3779 {
3780 HOST_WIDE_INT i;
3781 HOST_WIDE_INT rat, off = 0;
3782 int old_cse_not_expected, width;
3783 unsigned sym_p, var_p, off_p, rat_p, add_c;
3784 rtx_insn *seq;
3785 rtx addr, base;
3786 rtx reg0, reg1;
3787
3788 data = (address_cost_data *) xcalloc (1, sizeof (*data));
3789
3790 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3791
3792 width = GET_MODE_BITSIZE (address_mode) - 1;
3793 if (width > (HOST_BITS_PER_WIDE_INT - 1))
3794 width = HOST_BITS_PER_WIDE_INT - 1;
3795 addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
3796
3797 for (i = width; i >= 0; i--)
3798 {
3799 off = -((unsigned HOST_WIDE_INT) 1 << i);
3800 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3801 if (memory_address_addr_space_p (mem_mode, addr, as))
3802 break;
3803 }
3804 data->min_offset = (i == -1? 0 : off);
3805
3806 for (i = width; i >= 0; i--)
3807 {
3808 off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
3809 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3810 if (memory_address_addr_space_p (mem_mode, addr, as))
3811 break;
3812 /* For some strict-alignment targets, the offset must be naturally
3813 aligned. Try an aligned offset if mem_mode is not QImode. */
3814 off = mem_mode != QImode
3815 ? ((unsigned HOST_WIDE_INT) 1 << i)
3816 - GET_MODE_SIZE (mem_mode)
3817 : 0;
3818 if (off > 0)
3819 {
3820 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3821 if (memory_address_addr_space_p (mem_mode, addr, as))
3822 break;
3823 }
3824 }
3825 if (i == -1)
3826 off = 0;
3827 data->max_offset = off;
3828
3829 if (dump_file && (dump_flags & TDF_DETAILS))
3830 {
3831 fprintf (dump_file, "get_address_cost:\n");
3832 fprintf (dump_file, " min offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3833 GET_MODE_NAME (mem_mode),
3834 data->min_offset);
3835 fprintf (dump_file, " max offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3836 GET_MODE_NAME (mem_mode),
3837 data->max_offset);
3838 }
3839
3840 rat = 1;
3841 for (i = 2; i <= MAX_RATIO; i++)
3842 if (multiplier_allowed_in_address_p (i, mem_mode, as))
3843 {
3844 rat = i;
3845 break;
3846 }
3847
3848 /* Compute the cost of various addressing modes. */
3849 acost = 0;
3850 reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3851 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3852
3853 if (USE_LOAD_PRE_DECREMENT (mem_mode)
3854 || USE_STORE_PRE_DECREMENT (mem_mode))
3855 {
3856 addr = gen_rtx_PRE_DEC (address_mode, reg0);
3857 has_predec[mem_mode]
3858 = memory_address_addr_space_p (mem_mode, addr, as);
3859
3860 if (has_predec[mem_mode])
3861 data->ainc_costs[AINC_PRE_DEC]
3862 = address_cost (addr, mem_mode, as, speed);
3863 }
3864 if (USE_LOAD_POST_DECREMENT (mem_mode)
3865 || USE_STORE_POST_DECREMENT (mem_mode))
3866 {
3867 addr = gen_rtx_POST_DEC (address_mode, reg0);
3868 has_postdec[mem_mode]
3869 = memory_address_addr_space_p (mem_mode, addr, as);
3870
3871 if (has_postdec[mem_mode])
3872 data->ainc_costs[AINC_POST_DEC]
3873 = address_cost (addr, mem_mode, as, speed);
3874 }
3875 if (USE_LOAD_PRE_INCREMENT (mem_mode)
3876 || USE_STORE_PRE_DECREMENT (mem_mode))
3877 {
3878 addr = gen_rtx_PRE_INC (address_mode, reg0);
3879 has_preinc[mem_mode]
3880 = memory_address_addr_space_p (mem_mode, addr, as);
3881
3882 if (has_preinc[mem_mode])
3883 data->ainc_costs[AINC_PRE_INC]
3884 = address_cost (addr, mem_mode, as, speed);
3885 }
3886 if (USE_LOAD_POST_INCREMENT (mem_mode)
3887 || USE_STORE_POST_INCREMENT (mem_mode))
3888 {
3889 addr = gen_rtx_POST_INC (address_mode, reg0);
3890 has_postinc[mem_mode]
3891 = memory_address_addr_space_p (mem_mode, addr, as);
3892
3893 if (has_postinc[mem_mode])
3894 data->ainc_costs[AINC_POST_INC]
3895 = address_cost (addr, mem_mode, as, speed);
3896 }
3897 for (i = 0; i < 16; i++)
3898 {
3899 sym_p = i & 1;
3900 var_p = (i >> 1) & 1;
3901 off_p = (i >> 2) & 1;
3902 rat_p = (i >> 3) & 1;
3903
3904 addr = reg0;
3905 if (rat_p)
3906 addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
3907 gen_int_mode (rat, address_mode));
3908
3909 if (var_p)
3910 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
3911
3912 if (sym_p)
3913 {
3914 base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
3915 /* ??? We can run into trouble with some backends by presenting
3916 it with symbols which haven't been properly passed through
3917 targetm.encode_section_info. By setting the local bit, we
3918 enhance the probability of things working. */
3919 SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3920
3921 if (off_p)
3922 base = gen_rtx_fmt_e (CONST, address_mode,
3923 gen_rtx_fmt_ee
3924 (PLUS, address_mode, base,
3925 gen_int_mode (off, address_mode)));
3926 }
3927 else if (off_p)
3928 base = gen_int_mode (off, address_mode);
3929 else
3930 base = NULL_RTX;
3931
3932 if (base)
3933 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
3934
3935 start_sequence ();
3936 /* To avoid splitting addressing modes, pretend that no cse will
3937 follow. */
3938 old_cse_not_expected = cse_not_expected;
3939 cse_not_expected = true;
3940 addr = memory_address_addr_space (mem_mode, addr, as);
3941 cse_not_expected = old_cse_not_expected;
3942 seq = get_insns ();
3943 end_sequence ();
3944
3945 acost = seq_cost (seq, speed);
3946 acost += address_cost (addr, mem_mode, as, speed);
3947
3948 if (!acost)
3949 acost = 1;
3950 data->costs[sym_p][var_p][off_p][rat_p] = acost;
3951 }
3952
3953 /* On some targets, it is quite expensive to load symbol to a register,
3954 which makes addresses that contain symbols look much more expensive.
3955 However, the symbol will have to be loaded in any case before the
3956 loop (and quite likely we have it in register already), so it does not
3957 make much sense to penalize them too heavily. So make some final
3958 tweaks for the SYMBOL_PRESENT modes:
3959
3960 If VAR_PRESENT is false, and the mode obtained by changing symbol to
3961 var is cheaper, use this mode with small penalty.
3962 If VAR_PRESENT is true, try whether the mode with
3963 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3964 if this is the case, use it. */
3965 add_c = add_cost (speed, address_mode);
3966 for (i = 0; i < 8; i++)
3967 {
3968 var_p = i & 1;
3969 off_p = (i >> 1) & 1;
3970 rat_p = (i >> 2) & 1;
3971
3972 acost = data->costs[0][1][off_p][rat_p] + 1;
3973 if (var_p)
3974 acost += add_c;
3975
3976 if (acost < data->costs[1][var_p][off_p][rat_p])
3977 data->costs[1][var_p][off_p][rat_p] = acost;
3978 }
3979
3980 if (dump_file && (dump_flags & TDF_DETAILS))
3981 {
3982 fprintf (dump_file, "Address costs:\n");
3983
3984 for (i = 0; i < 16; i++)
3985 {
3986 sym_p = i & 1;
3987 var_p = (i >> 1) & 1;
3988 off_p = (i >> 2) & 1;
3989 rat_p = (i >> 3) & 1;
3990
3991 fprintf (dump_file, " ");
3992 if (sym_p)
3993 fprintf (dump_file, "sym + ");
3994 if (var_p)
3995 fprintf (dump_file, "var + ");
3996 if (off_p)
3997 fprintf (dump_file, "cst + ");
3998 if (rat_p)
3999 fprintf (dump_file, "rat * ");
4000
4001 acost = data->costs[sym_p][var_p][off_p][rat_p];
4002 fprintf (dump_file, "index costs %d\n", acost);
4003 }
4004 if (has_predec[mem_mode] || has_postdec[mem_mode]
4005 || has_preinc[mem_mode] || has_postinc[mem_mode])
4006 fprintf (dump_file, " May include autoinc/dec\n");
4007 fprintf (dump_file, "\n");
4008 }
4009
4010 address_cost_data_list[data_index] = data;
4011 }
4012
4013 bits = GET_MODE_BITSIZE (address_mode);
4014 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
4015 offset &= mask;
4016 if ((offset >> (bits - 1) & 1))
4017 offset |= ~mask;
4018 s_offset = offset;
4019
4020 autoinc = false;
4021 autoinc_type = AINC_NONE;
4022 msize = GET_MODE_SIZE (mem_mode);
4023 autoinc_offset = offset;
4024 if (stmt_after_inc)
4025 autoinc_offset += ratio * cstep;
4026 if (symbol_present || var_present || ratio != 1)
4027 autoinc = false;
4028 else
4029 {
4030 if (has_postinc[mem_mode] && autoinc_offset == 0
4031 && msize == cstep)
4032 autoinc_type = AINC_POST_INC;
4033 else if (has_postdec[mem_mode] && autoinc_offset == 0
4034 && msize == -cstep)
4035 autoinc_type = AINC_POST_DEC;
4036 else if (has_preinc[mem_mode] && autoinc_offset == msize
4037 && msize == cstep)
4038 autoinc_type = AINC_PRE_INC;
4039 else if (has_predec[mem_mode] && autoinc_offset == -msize
4040 && msize == -cstep)
4041 autoinc_type = AINC_PRE_DEC;
4042
4043 if (autoinc_type != AINC_NONE)
4044 autoinc = true;
4045 }
4046
4047 cost = 0;
4048 offset_p = (s_offset != 0
4049 && data->min_offset <= s_offset
4050 && s_offset <= data->max_offset);
4051 ratio_p = (ratio != 1
4052 && multiplier_allowed_in_address_p (ratio, mem_mode, as));
4053
4054 if (ratio != 1 && !ratio_p)
4055 cost += mult_by_coeff_cost (ratio, address_mode, speed);
4056
4057 if (s_offset && !offset_p && !symbol_present)
4058 cost += add_cost (speed, address_mode);
4059
4060 if (may_autoinc)
4061 *may_autoinc = autoinc;
4062 if (autoinc)
4063 acost = data->ainc_costs[autoinc_type];
4064 else
4065 acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
4066 complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
4067 return new_cost (cost + acost, complexity);
4068 }
4069
4070 /* Calculate the SPEED or size cost of shiftadd EXPR in MODE. MULT is the
4071 the EXPR operand holding the shift. COST0 and COST1 are the costs for
4072 calculating the operands of EXPR. Returns true if successful, and returns
4073 the cost in COST. */
4074
4075 static bool
4076 get_shiftadd_cost (tree expr, machine_mode mode, comp_cost cost0,
4077 comp_cost cost1, tree mult, bool speed, comp_cost *cost)
4078 {
4079 comp_cost res;
4080 tree op1 = TREE_OPERAND (expr, 1);
4081 tree cst = TREE_OPERAND (mult, 1);
4082 tree multop = TREE_OPERAND (mult, 0);
4083 int m = exact_log2 (int_cst_value (cst));
4084 int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
4085 int as_cost, sa_cost;
4086 bool mult_in_op1;
4087
4088 if (!(m >= 0 && m < maxm))
4089 return false;
4090
4091 STRIP_NOPS (op1);
4092 mult_in_op1 = operand_equal_p (op1, mult, 0);
4093
4094 as_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
4095
4096 /* If the target has a cheap shift-and-add or shift-and-sub instruction,
4097 use that in preference to a shift insn followed by an add insn. */
4098 sa_cost = (TREE_CODE (expr) != MINUS_EXPR
4099 ? shiftadd_cost (speed, mode, m)
4100 : (mult_in_op1
4101 ? shiftsub1_cost (speed, mode, m)
4102 : shiftsub0_cost (speed, mode, m)));
4103
4104 res = new_cost (MIN (as_cost, sa_cost), 0);
4105 res = add_costs (res, mult_in_op1 ? cost0 : cost1);
4106
4107 STRIP_NOPS (multop);
4108 if (!is_gimple_val (multop))
4109 res = add_costs (res, force_expr_to_var_cost (multop, speed));
4110
4111 *cost = res;
4112 return true;
4113 }
4114
4115 /* Estimates cost of forcing expression EXPR into a variable. */
4116
4117 static comp_cost
4118 force_expr_to_var_cost (tree expr, bool speed)
4119 {
4120 static bool costs_initialized = false;
4121 static unsigned integer_cost [2];
4122 static unsigned symbol_cost [2];
4123 static unsigned address_cost [2];
4124 tree op0, op1;
4125 comp_cost cost0, cost1, cost;
4126 machine_mode mode;
4127
4128 if (!costs_initialized)
4129 {
4130 tree type = build_pointer_type (integer_type_node);
4131 tree var, addr;
4132 rtx x;
4133 int i;
4134
4135 var = create_tmp_var_raw (integer_type_node, "test_var");
4136 TREE_STATIC (var) = 1;
4137 x = produce_memory_decl_rtl (var, NULL);
4138 SET_DECL_RTL (var, x);
4139
4140 addr = build1 (ADDR_EXPR, type, var);
4141
4142
4143 for (i = 0; i < 2; i++)
4144 {
4145 integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
4146 2000), i);
4147
4148 symbol_cost[i] = computation_cost (addr, i) + 1;
4149
4150 address_cost[i]
4151 = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
4152 if (dump_file && (dump_flags & TDF_DETAILS))
4153 {
4154 fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
4155 fprintf (dump_file, " integer %d\n", (int) integer_cost[i]);
4156 fprintf (dump_file, " symbol %d\n", (int) symbol_cost[i]);
4157 fprintf (dump_file, " address %d\n", (int) address_cost[i]);
4158 fprintf (dump_file, " other %d\n", (int) target_spill_cost[i]);
4159 fprintf (dump_file, "\n");
4160 }
4161 }
4162
4163 costs_initialized = true;
4164 }
4165
4166 STRIP_NOPS (expr);
4167
4168 if (SSA_VAR_P (expr))
4169 return no_cost;
4170
4171 if (is_gimple_min_invariant (expr))
4172 {
4173 if (TREE_CODE (expr) == INTEGER_CST)
4174 return new_cost (integer_cost [speed], 0);
4175
4176 if (TREE_CODE (expr) == ADDR_EXPR)
4177 {
4178 tree obj = TREE_OPERAND (expr, 0);
4179
4180 if (TREE_CODE (obj) == VAR_DECL
4181 || TREE_CODE (obj) == PARM_DECL
4182 || TREE_CODE (obj) == RESULT_DECL)
4183 return new_cost (symbol_cost [speed], 0);
4184 }
4185
4186 return new_cost (address_cost [speed], 0);
4187 }
4188
4189 switch (TREE_CODE (expr))
4190 {
4191 case POINTER_PLUS_EXPR:
4192 case PLUS_EXPR:
4193 case MINUS_EXPR:
4194 case MULT_EXPR:
4195 op0 = TREE_OPERAND (expr, 0);
4196 op1 = TREE_OPERAND (expr, 1);
4197 STRIP_NOPS (op0);
4198 STRIP_NOPS (op1);
4199 break;
4200
4201 CASE_CONVERT:
4202 case NEGATE_EXPR:
4203 op0 = TREE_OPERAND (expr, 0);
4204 STRIP_NOPS (op0);
4205 op1 = NULL_TREE;
4206 break;
4207
4208 default:
4209 /* Just an arbitrary value, FIXME. */
4210 return new_cost (target_spill_cost[speed], 0);
4211 }
4212
4213 if (op0 == NULL_TREE
4214 || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
4215 cost0 = no_cost;
4216 else
4217 cost0 = force_expr_to_var_cost (op0, speed);
4218
4219 if (op1 == NULL_TREE
4220 || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
4221 cost1 = no_cost;
4222 else
4223 cost1 = force_expr_to_var_cost (op1, speed);
4224
4225 mode = TYPE_MODE (TREE_TYPE (expr));
4226 switch (TREE_CODE (expr))
4227 {
4228 case POINTER_PLUS_EXPR:
4229 case PLUS_EXPR:
4230 case MINUS_EXPR:
4231 case NEGATE_EXPR:
4232 cost = new_cost (add_cost (speed, mode), 0);
4233 if (TREE_CODE (expr) != NEGATE_EXPR)
4234 {
4235 tree mult = NULL_TREE;
4236 comp_cost sa_cost;
4237 if (TREE_CODE (op1) == MULT_EXPR)
4238 mult = op1;
4239 else if (TREE_CODE (op0) == MULT_EXPR)
4240 mult = op0;
4241
4242 if (mult != NULL_TREE
4243 && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
4244 && get_shiftadd_cost (expr, mode, cost0, cost1, mult,
4245 speed, &sa_cost))
4246 return sa_cost;
4247 }
4248 break;
4249
4250 CASE_CONVERT:
4251 {
4252 tree inner_mode, outer_mode;
4253 outer_mode = TREE_TYPE (expr);
4254 inner_mode = TREE_TYPE (op0);
4255 cost = new_cost (convert_cost (TYPE_MODE (outer_mode),
4256 TYPE_MODE (inner_mode), speed), 0);
4257 }
4258 break;
4259
4260 case MULT_EXPR:
4261 if (cst_and_fits_in_hwi (op0))
4262 cost = new_cost (mult_by_coeff_cost (int_cst_value (op0),
4263 mode, speed), 0);
4264 else if (cst_and_fits_in_hwi (op1))
4265 cost = new_cost (mult_by_coeff_cost (int_cst_value (op1),
4266 mode, speed), 0);
4267 else
4268 return new_cost (target_spill_cost [speed], 0);
4269 break;
4270
4271 default:
4272 gcc_unreachable ();
4273 }
4274
4275 cost = add_costs (cost, cost0);
4276 cost = add_costs (cost, cost1);
4277
4278 /* Bound the cost by target_spill_cost. The parts of complicated
4279 computations often are either loop invariant or at least can
4280 be shared between several iv uses, so letting this grow without
4281 limits would not give reasonable results. */
4282 if (cost.cost > (int) target_spill_cost [speed])
4283 cost.cost = target_spill_cost [speed];
4284
4285 return cost;
4286 }
4287
4288 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
4289 invariants the computation depends on. */
4290
4291 static comp_cost
4292 force_var_cost (struct ivopts_data *data,
4293 tree expr, bitmap *depends_on)
4294 {
4295 if (depends_on)
4296 {
4297 fd_ivopts_data = data;
4298 walk_tree (&expr, find_depends, depends_on, NULL);
4299 }
4300
4301 return force_expr_to_var_cost (expr, data->speed);
4302 }
4303
4304 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
4305 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
4306 to false if the corresponding part is missing. DEPENDS_ON is a set of the
4307 invariants the computation depends on. */
4308
4309 static comp_cost
4310 split_address_cost (struct ivopts_data *data,
4311 tree addr, bool *symbol_present, bool *var_present,
4312 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4313 {
4314 tree core;
4315 HOST_WIDE_INT bitsize;
4316 HOST_WIDE_INT bitpos;
4317 tree toffset;
4318 machine_mode mode;
4319 int unsignedp, volatilep;
4320
4321 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
4322 &unsignedp, &volatilep, false);
4323
4324 if (toffset != 0
4325 || bitpos % BITS_PER_UNIT != 0
4326 || TREE_CODE (core) != VAR_DECL)
4327 {
4328 *symbol_present = false;
4329 *var_present = true;
4330 fd_ivopts_data = data;
4331 if (depends_on)
4332 walk_tree (&addr, find_depends, depends_on, NULL);
4333
4334 return new_cost (target_spill_cost[data->speed], 0);
4335 }
4336
4337 *offset += bitpos / BITS_PER_UNIT;
4338 if (TREE_STATIC (core)
4339 || DECL_EXTERNAL (core))
4340 {
4341 *symbol_present = true;
4342 *var_present = false;
4343 return no_cost;
4344 }
4345
4346 *symbol_present = false;
4347 *var_present = true;
4348 return no_cost;
4349 }
4350
4351 /* Estimates cost of expressing difference of addresses E1 - E2 as
4352 var + symbol + offset. The value of offset is added to OFFSET,
4353 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
4354 part is missing. DEPENDS_ON is a set of the invariants the computation
4355 depends on. */
4356
4357 static comp_cost
4358 ptr_difference_cost (struct ivopts_data *data,
4359 tree e1, tree e2, bool *symbol_present, bool *var_present,
4360 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4361 {
4362 HOST_WIDE_INT diff = 0;
4363 aff_tree aff_e1, aff_e2;
4364 tree type;
4365
4366 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
4367
4368 if (ptr_difference_const (e1, e2, &diff))
4369 {
4370 *offset += diff;
4371 *symbol_present = false;
4372 *var_present = false;
4373 return no_cost;
4374 }
4375
4376 if (integer_zerop (e2))
4377 return split_address_cost (data, TREE_OPERAND (e1, 0),
4378 symbol_present, var_present, offset, depends_on);
4379
4380 *symbol_present = false;
4381 *var_present = true;
4382
4383 type = signed_type_for (TREE_TYPE (e1));
4384 tree_to_aff_combination (e1, type, &aff_e1);
4385 tree_to_aff_combination (e2, type, &aff_e2);
4386 aff_combination_scale (&aff_e2, -1);
4387 aff_combination_add (&aff_e1, &aff_e2);
4388
4389 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
4390 }
4391
4392 /* Estimates cost of expressing difference E1 - E2 as
4393 var + symbol + offset. The value of offset is added to OFFSET,
4394 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
4395 part is missing. DEPENDS_ON is a set of the invariants the computation
4396 depends on. */
4397
4398 static comp_cost
4399 difference_cost (struct ivopts_data *data,
4400 tree e1, tree e2, bool *symbol_present, bool *var_present,
4401 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4402 {
4403 machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
4404 unsigned HOST_WIDE_INT off1, off2;
4405 aff_tree aff_e1, aff_e2;
4406 tree type;
4407
4408 e1 = strip_offset (e1, &off1);
4409 e2 = strip_offset (e2, &off2);
4410 *offset += off1 - off2;
4411
4412 STRIP_NOPS (e1);
4413 STRIP_NOPS (e2);
4414
4415 if (TREE_CODE (e1) == ADDR_EXPR)
4416 return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
4417 offset, depends_on);
4418 *symbol_present = false;
4419
4420 if (operand_equal_p (e1, e2, 0))
4421 {
4422 *var_present = false;
4423 return no_cost;
4424 }
4425
4426 *var_present = true;
4427
4428 if (integer_zerop (e2))
4429 return force_var_cost (data, e1, depends_on);
4430
4431 if (integer_zerop (e1))
4432 {
4433 comp_cost cost = force_var_cost (data, e2, depends_on);
4434 cost.cost += mult_by_coeff_cost (-1, mode, data->speed);
4435 return cost;
4436 }
4437
4438 type = signed_type_for (TREE_TYPE (e1));
4439 tree_to_aff_combination (e1, type, &aff_e1);
4440 tree_to_aff_combination (e2, type, &aff_e2);
4441 aff_combination_scale (&aff_e2, -1);
4442 aff_combination_add (&aff_e1, &aff_e2);
4443
4444 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
4445 }
4446
4447 /* Returns true if AFF1 and AFF2 are identical. */
4448
4449 static bool
4450 compare_aff_trees (aff_tree *aff1, aff_tree *aff2)
4451 {
4452 unsigned i;
4453
4454 if (aff1->n != aff2->n)
4455 return false;
4456
4457 for (i = 0; i < aff1->n; i++)
4458 {
4459 if (aff1->elts[i].coef != aff2->elts[i].coef)
4460 return false;
4461
4462 if (!operand_equal_p (aff1->elts[i].val, aff2->elts[i].val, 0))
4463 return false;
4464 }
4465 return true;
4466 }
4467
4468 /* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id. */
4469
4470 static int
4471 get_expr_id (struct ivopts_data *data, tree expr)
4472 {
4473 struct iv_inv_expr_ent ent;
4474 struct iv_inv_expr_ent **slot;
4475
4476 ent.expr = expr;
4477 ent.hash = iterative_hash_expr (expr, 0);
4478 slot = data->inv_expr_tab->find_slot (&ent, INSERT);
4479 if (*slot)
4480 return (*slot)->id;
4481
4482 *slot = XNEW (struct iv_inv_expr_ent);
4483 (*slot)->expr = expr;
4484 (*slot)->hash = ent.hash;
4485 (*slot)->id = data->inv_expr_id++;
4486 return (*slot)->id;
4487 }
4488
4489 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
4490 requires a new compiler generated temporary. Returns -1 otherwise.
4491 ADDRESS_P is a flag indicating if the expression is for address
4492 computation. */
4493
4494 static int
4495 get_loop_invariant_expr_id (struct ivopts_data *data, tree ubase,
4496 tree cbase, HOST_WIDE_INT ratio,
4497 bool address_p)
4498 {
4499 aff_tree ubase_aff, cbase_aff;
4500 tree expr, ub, cb;
4501
4502 STRIP_NOPS (ubase);
4503 STRIP_NOPS (cbase);
4504 ub = ubase;
4505 cb = cbase;
4506
4507 if ((TREE_CODE (ubase) == INTEGER_CST)
4508 && (TREE_CODE (cbase) == INTEGER_CST))
4509 return -1;
4510
4511 /* Strips the constant part. */
4512 if (TREE_CODE (ubase) == PLUS_EXPR
4513 || TREE_CODE (ubase) == MINUS_EXPR
4514 || TREE_CODE (ubase) == POINTER_PLUS_EXPR)
4515 {
4516 if (TREE_CODE (TREE_OPERAND (ubase, 1)) == INTEGER_CST)
4517 ubase = TREE_OPERAND (ubase, 0);
4518 }
4519
4520 /* Strips the constant part. */
4521 if (TREE_CODE (cbase) == PLUS_EXPR
4522 || TREE_CODE (cbase) == MINUS_EXPR
4523 || TREE_CODE (cbase) == POINTER_PLUS_EXPR)
4524 {
4525 if (TREE_CODE (TREE_OPERAND (cbase, 1)) == INTEGER_CST)
4526 cbase = TREE_OPERAND (cbase, 0);
4527 }
4528
4529 if (address_p)
4530 {
4531 if (((TREE_CODE (ubase) == SSA_NAME)
4532 || (TREE_CODE (ubase) == ADDR_EXPR
4533 && is_gimple_min_invariant (ubase)))
4534 && (TREE_CODE (cbase) == INTEGER_CST))
4535 return -1;
4536
4537 if (((TREE_CODE (cbase) == SSA_NAME)
4538 || (TREE_CODE (cbase) == ADDR_EXPR
4539 && is_gimple_min_invariant (cbase)))
4540 && (TREE_CODE (ubase) == INTEGER_CST))
4541 return -1;
4542 }
4543
4544 if (ratio == 1)
4545 {
4546 if (operand_equal_p (ubase, cbase, 0))
4547 return -1;
4548
4549 if (TREE_CODE (ubase) == ADDR_EXPR
4550 && TREE_CODE (cbase) == ADDR_EXPR)
4551 {
4552 tree usym, csym;
4553
4554 usym = TREE_OPERAND (ubase, 0);
4555 csym = TREE_OPERAND (cbase, 0);
4556 if (TREE_CODE (usym) == ARRAY_REF)
4557 {
4558 tree ind = TREE_OPERAND (usym, 1);
4559 if (TREE_CODE (ind) == INTEGER_CST
4560 && tree_fits_shwi_p (ind)
4561 && tree_to_shwi (ind) == 0)
4562 usym = TREE_OPERAND (usym, 0);
4563 }
4564 if (TREE_CODE (csym) == ARRAY_REF)
4565 {
4566 tree ind = TREE_OPERAND (csym, 1);
4567 if (TREE_CODE (ind) == INTEGER_CST
4568 && tree_fits_shwi_p (ind)
4569 && tree_to_shwi (ind) == 0)
4570 csym = TREE_OPERAND (csym, 0);
4571 }
4572 if (operand_equal_p (usym, csym, 0))
4573 return -1;
4574 }
4575 /* Now do more complex comparison */
4576 tree_to_aff_combination (ubase, TREE_TYPE (ubase), &ubase_aff);
4577 tree_to_aff_combination (cbase, TREE_TYPE (cbase), &cbase_aff);
4578 if (compare_aff_trees (&ubase_aff, &cbase_aff))
4579 return -1;
4580 }
4581
4582 tree_to_aff_combination (ub, TREE_TYPE (ub), &ubase_aff);
4583 tree_to_aff_combination (cb, TREE_TYPE (cb), &cbase_aff);
4584
4585 aff_combination_scale (&cbase_aff, -1 * ratio);
4586 aff_combination_add (&ubase_aff, &cbase_aff);
4587 expr = aff_combination_to_tree (&ubase_aff);
4588 return get_expr_id (data, expr);
4589 }
4590
4591
4592
4593 /* Determines the cost of the computation by that USE is expressed
4594 from induction variable CAND. If ADDRESS_P is true, we just need
4595 to create an address from it, otherwise we want to get it into
4596 register. A set of invariants we depend on is stored in
4597 DEPENDS_ON. AT is the statement at that the value is computed.
4598 If CAN_AUTOINC is nonnull, use it to record whether autoinc
4599 addressing is likely. */
4600
4601 static comp_cost
4602 get_computation_cost_at (struct ivopts_data *data,
4603 struct iv_use *use, struct iv_cand *cand,
4604 bool address_p, bitmap *depends_on, gimple *at,
4605 bool *can_autoinc,
4606 int *inv_expr_id)
4607 {
4608 tree ubase = use->iv->base, ustep = use->iv->step;
4609 tree cbase, cstep;
4610 tree utype = TREE_TYPE (ubase), ctype;
4611 unsigned HOST_WIDE_INT cstepi, offset = 0;
4612 HOST_WIDE_INT ratio, aratio;
4613 bool var_present, symbol_present, stmt_is_after_inc;
4614 comp_cost cost;
4615 widest_int rat;
4616 bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4617 machine_mode mem_mode = (address_p
4618 ? TYPE_MODE (TREE_TYPE (*use->op_p))
4619 : VOIDmode);
4620
4621 if (depends_on)
4622 *depends_on = NULL;
4623
4624 /* Only consider real candidates. */
4625 if (!cand->iv)
4626 return infinite_cost;
4627
4628 cbase = cand->iv->base;
4629 cstep = cand->iv->step;
4630 ctype = TREE_TYPE (cbase);
4631
4632 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4633 {
4634 /* We do not have a precision to express the values of use. */
4635 return infinite_cost;
4636 }
4637
4638 if (address_p
4639 || (use->iv->base_object
4640 && cand->iv->base_object
4641 && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4642 && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4643 {
4644 /* Do not try to express address of an object with computation based
4645 on address of a different object. This may cause problems in rtl
4646 level alias analysis (that does not expect this to be happening,
4647 as this is illegal in C), and would be unlikely to be useful
4648 anyway. */
4649 if (use->iv->base_object
4650 && cand->iv->base_object
4651 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4652 return infinite_cost;
4653 }
4654
4655 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4656 {
4657 /* TODO -- add direct handling of this case. */
4658 goto fallback;
4659 }
4660
4661 /* CSTEPI is removed from the offset in case statement is after the
4662 increment. If the step is not constant, we use zero instead.
4663 This is a bit imprecise (there is the extra addition), but
4664 redundancy elimination is likely to transform the code so that
4665 it uses value of the variable before increment anyway,
4666 so it is not that much unrealistic. */
4667 if (cst_and_fits_in_hwi (cstep))
4668 cstepi = int_cst_value (cstep);
4669 else
4670 cstepi = 0;
4671
4672 if (!constant_multiple_of (ustep, cstep, &rat))
4673 return infinite_cost;
4674
4675 if (wi::fits_shwi_p (rat))
4676 ratio = rat.to_shwi ();
4677 else
4678 return infinite_cost;
4679
4680 STRIP_NOPS (cbase);
4681 ctype = TREE_TYPE (cbase);
4682
4683 stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
4684
4685 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
4686 or ratio == 1, it is better to handle this like
4687
4688 ubase - ratio * cbase + ratio * var
4689
4690 (also holds in the case ratio == -1, TODO. */
4691
4692 if (cst_and_fits_in_hwi (cbase))
4693 {
4694 offset = - ratio * (unsigned HOST_WIDE_INT) int_cst_value (cbase);
4695 cost = difference_cost (data,
4696 ubase, build_int_cst (utype, 0),
4697 &symbol_present, &var_present, &offset,
4698 depends_on);
4699 cost.cost /= avg_loop_niter (data->current_loop);
4700 }
4701 else if (ratio == 1)
4702 {
4703 tree real_cbase = cbase;
4704
4705 /* Check to see if any adjustment is needed. */
4706 if (cstepi == 0 && stmt_is_after_inc)
4707 {
4708 aff_tree real_cbase_aff;
4709 aff_tree cstep_aff;
4710
4711 tree_to_aff_combination (cbase, TREE_TYPE (real_cbase),
4712 &real_cbase_aff);
4713 tree_to_aff_combination (cstep, TREE_TYPE (cstep), &cstep_aff);
4714
4715 aff_combination_add (&real_cbase_aff, &cstep_aff);
4716 real_cbase = aff_combination_to_tree (&real_cbase_aff);
4717 }
4718
4719 cost = difference_cost (data,
4720 ubase, real_cbase,
4721 &symbol_present, &var_present, &offset,
4722 depends_on);
4723 cost.cost /= avg_loop_niter (data->current_loop);
4724 }
4725 else if (address_p
4726 && !POINTER_TYPE_P (ctype)
4727 && multiplier_allowed_in_address_p
4728 (ratio, mem_mode,
4729 TYPE_ADDR_SPACE (TREE_TYPE (utype))))
4730 {
4731 if (cstepi == 0 && stmt_is_after_inc)
4732 {
4733 if (POINTER_TYPE_P (ctype))
4734 cbase = fold_build2 (POINTER_PLUS_EXPR, ctype, cbase, cstep);
4735 else
4736 cbase = fold_build2 (PLUS_EXPR, ctype, cbase, cstep);
4737 }
4738 cbase
4739 = fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio));
4740 cost = difference_cost (data,
4741 ubase, cbase,
4742 &symbol_present, &var_present, &offset,
4743 depends_on);
4744 cost.cost /= avg_loop_niter (data->current_loop);
4745 }
4746 else
4747 {
4748 cost = force_var_cost (data, cbase, depends_on);
4749 cost = add_costs (cost,
4750 difference_cost (data,
4751 ubase, build_int_cst (utype, 0),
4752 &symbol_present, &var_present,
4753 &offset, depends_on));
4754 cost.cost /= avg_loop_niter (data->current_loop);
4755 cost.cost += add_cost (data->speed, TYPE_MODE (ctype));
4756 }
4757
4758 /* Set of invariants depended on by sub use has already been computed
4759 for the first use in the group. */
4760 if (use->sub_id)
4761 {
4762 cost.cost = 0;
4763 if (depends_on && *depends_on)
4764 bitmap_clear (*depends_on);
4765 }
4766 else if (inv_expr_id)
4767 {
4768 *inv_expr_id =
4769 get_loop_invariant_expr_id (data, ubase, cbase, ratio, address_p);
4770 /* Clear depends on. */
4771 if (*inv_expr_id != -1 && depends_on && *depends_on)
4772 bitmap_clear (*depends_on);
4773 }
4774
4775 /* If we are after the increment, the value of the candidate is higher by
4776 one iteration. */
4777 if (stmt_is_after_inc)
4778 offset -= ratio * cstepi;
4779
4780 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4781 (symbol/var1/const parts may be omitted). If we are looking for an
4782 address, find the cost of addressing this. */
4783 if (address_p)
4784 return add_costs (cost,
4785 get_address_cost (symbol_present, var_present,
4786 offset, ratio, cstepi,
4787 mem_mode,
4788 TYPE_ADDR_SPACE (TREE_TYPE (utype)),
4789 speed, stmt_is_after_inc,
4790 can_autoinc));
4791
4792 /* Otherwise estimate the costs for computing the expression. */
4793 if (!symbol_present && !var_present && !offset)
4794 {
4795 if (ratio != 1)
4796 cost.cost += mult_by_coeff_cost (ratio, TYPE_MODE (ctype), speed);
4797 return cost;
4798 }
4799
4800 /* Symbol + offset should be compile-time computable so consider that they
4801 are added once to the variable, if present. */
4802 if (var_present && (symbol_present || offset))
4803 cost.cost += adjust_setup_cost (data,
4804 add_cost (speed, TYPE_MODE (ctype)));
4805
4806 /* Having offset does not affect runtime cost in case it is added to
4807 symbol, but it increases complexity. */
4808 if (offset)
4809 cost.complexity++;
4810
4811 cost.cost += add_cost (speed, TYPE_MODE (ctype));
4812
4813 aratio = ratio > 0 ? ratio : -ratio;
4814 if (aratio != 1)
4815 cost.cost += mult_by_coeff_cost (aratio, TYPE_MODE (ctype), speed);
4816 return cost;
4817
4818 fallback:
4819 if (can_autoinc)
4820 *can_autoinc = false;
4821
4822 {
4823 /* Just get the expression, expand it and measure the cost. */
4824 tree comp = get_computation_at (data->current_loop, use, cand, at);
4825
4826 if (!comp)
4827 return infinite_cost;
4828
4829 if (address_p)
4830 comp = build_simple_mem_ref (comp);
4831
4832 return new_cost (computation_cost (comp, speed), 0);
4833 }
4834 }
4835
4836 /* Determines the cost of the computation by that USE is expressed
4837 from induction variable CAND. If ADDRESS_P is true, we just need
4838 to create an address from it, otherwise we want to get it into
4839 register. A set of invariants we depend on is stored in
4840 DEPENDS_ON. If CAN_AUTOINC is nonnull, use it to record whether
4841 autoinc addressing is likely. */
4842
4843 static comp_cost
4844 get_computation_cost (struct ivopts_data *data,
4845 struct iv_use *use, struct iv_cand *cand,
4846 bool address_p, bitmap *depends_on,
4847 bool *can_autoinc, int *inv_expr_id)
4848 {
4849 return get_computation_cost_at (data,
4850 use, cand, address_p, depends_on, use->stmt,
4851 can_autoinc, inv_expr_id);
4852 }
4853
4854 /* Determines cost of basing replacement of USE on CAND in a generic
4855 expression. */
4856
4857 static bool
4858 determine_use_iv_cost_generic (struct ivopts_data *data,
4859 struct iv_use *use, struct iv_cand *cand)
4860 {
4861 bitmap depends_on;
4862 comp_cost cost;
4863 int inv_expr_id = -1;
4864
4865 /* The simple case first -- if we need to express value of the preserved
4866 original biv, the cost is 0. This also prevents us from counting the
4867 cost of increment twice -- once at this use and once in the cost of
4868 the candidate. */
4869 if (cand->pos == IP_ORIGINAL
4870 && cand->incremented_at == use->stmt)
4871 {
4872 set_use_iv_cost (data, use, cand, no_cost, NULL, NULL_TREE,
4873 ERROR_MARK, -1);
4874 return true;
4875 }
4876
4877 cost = get_computation_cost (data, use, cand, false, &depends_on,
4878 NULL, &inv_expr_id);
4879
4880 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4881 inv_expr_id);
4882
4883 return !infinite_cost_p (cost);
4884 }
4885
4886 /* Determines cost of basing replacement of USE on CAND in an address. */
4887
4888 static bool
4889 determine_use_iv_cost_address (struct ivopts_data *data,
4890 struct iv_use *use, struct iv_cand *cand)
4891 {
4892 bitmap depends_on;
4893 bool can_autoinc;
4894 int inv_expr_id = -1;
4895 struct iv_use *sub_use;
4896 comp_cost sub_cost;
4897 comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
4898 &can_autoinc, &inv_expr_id);
4899
4900 if (cand->ainc_use == use)
4901 {
4902 if (can_autoinc)
4903 cost.cost -= cand->cost_step;
4904 /* If we generated the candidate solely for exploiting autoincrement
4905 opportunities, and it turns out it can't be used, set the cost to
4906 infinity to make sure we ignore it. */
4907 else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
4908 cost = infinite_cost;
4909 }
4910 for (sub_use = use->next;
4911 sub_use && !infinite_cost_p (cost);
4912 sub_use = sub_use->next)
4913 {
4914 sub_cost = get_computation_cost (data, sub_use, cand, true, NULL,
4915 &can_autoinc, NULL);
4916 cost = add_costs (cost, sub_cost);
4917 }
4918
4919 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4920 inv_expr_id);
4921
4922 return !infinite_cost_p (cost);
4923 }
4924
4925 /* Computes value of candidate CAND at position AT in iteration NITER, and
4926 stores it to VAL. */
4927
4928 static void
4929 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple *at, tree niter,
4930 aff_tree *val)
4931 {
4932 aff_tree step, delta, nit;
4933 struct iv *iv = cand->iv;
4934 tree type = TREE_TYPE (iv->base);
4935 tree steptype = type;
4936 if (POINTER_TYPE_P (type))
4937 steptype = sizetype;
4938 steptype = unsigned_type_for (type);
4939
4940 tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step);
4941 aff_combination_convert (&step, steptype);
4942 tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
4943 aff_combination_convert (&nit, steptype);
4944 aff_combination_mult (&nit, &step, &delta);
4945 if (stmt_after_increment (loop, cand, at))
4946 aff_combination_add (&delta, &step);
4947
4948 tree_to_aff_combination (iv->base, type, val);
4949 if (!POINTER_TYPE_P (type))
4950 aff_combination_convert (val, steptype);
4951 aff_combination_add (val, &delta);
4952 }
4953
4954 /* Returns period of induction variable iv. */
4955
4956 static tree
4957 iv_period (struct iv *iv)
4958 {
4959 tree step = iv->step, period, type;
4960 tree pow2div;
4961
4962 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4963
4964 type = unsigned_type_for (TREE_TYPE (step));
4965 /* Period of the iv is lcm (step, type_range)/step -1,
4966 i.e., N*type_range/step - 1. Since type range is power
4967 of two, N == (step >> num_of_ending_zeros_binary (step),
4968 so the final result is
4969
4970 (type_range >> num_of_ending_zeros_binary (step)) - 1
4971
4972 */
4973 pow2div = num_ending_zeros (step);
4974
4975 period = build_low_bits_mask (type,
4976 (TYPE_PRECISION (type)
4977 - tree_to_uhwi (pow2div)));
4978
4979 return period;
4980 }
4981
4982 /* Returns the comparison operator used when eliminating the iv USE. */
4983
4984 static enum tree_code
4985 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4986 {
4987 struct loop *loop = data->current_loop;
4988 basic_block ex_bb;
4989 edge exit;
4990
4991 ex_bb = gimple_bb (use->stmt);
4992 exit = EDGE_SUCC (ex_bb, 0);
4993 if (flow_bb_inside_loop_p (loop, exit->dest))
4994 exit = EDGE_SUCC (ex_bb, 1);
4995
4996 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4997 }
4998
4999 /* Returns true if we can prove that BASE - OFFSET does not overflow. For now,
5000 we only detect the situation that BASE = SOMETHING + OFFSET, where the
5001 calculation is performed in non-wrapping type.
5002
5003 TODO: More generally, we could test for the situation that
5004 BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
5005 This would require knowing the sign of OFFSET. */
5006
5007 static bool
5008 difference_cannot_overflow_p (struct ivopts_data *data, tree base, tree offset)
5009 {
5010 enum tree_code code;
5011 tree e1, e2;
5012 aff_tree aff_e1, aff_e2, aff_offset;
5013
5014 if (!nowrap_type_p (TREE_TYPE (base)))
5015 return false;
5016
5017 base = expand_simple_operations (base);
5018
5019 if (TREE_CODE (base) == SSA_NAME)
5020 {
5021 gimple *stmt = SSA_NAME_DEF_STMT (base);
5022
5023 if (gimple_code (stmt) != GIMPLE_ASSIGN)
5024 return false;
5025
5026 code = gimple_assign_rhs_code (stmt);
5027 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
5028 return false;
5029
5030 e1 = gimple_assign_rhs1 (stmt);
5031 e2 = gimple_assign_rhs2 (stmt);
5032 }
5033 else
5034 {
5035 code = TREE_CODE (base);
5036 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
5037 return false;
5038 e1 = TREE_OPERAND (base, 0);
5039 e2 = TREE_OPERAND (base, 1);
5040 }
5041
5042 /* Use affine expansion as deeper inspection to prove the equality. */
5043 tree_to_aff_combination_expand (e2, TREE_TYPE (e2),
5044 &aff_e2, &data->name_expansion_cache);
5045 tree_to_aff_combination_expand (offset, TREE_TYPE (offset),
5046 &aff_offset, &data->name_expansion_cache);
5047 aff_combination_scale (&aff_offset, -1);
5048 switch (code)
5049 {
5050 case PLUS_EXPR:
5051 aff_combination_add (&aff_e2, &aff_offset);
5052 if (aff_combination_zero_p (&aff_e2))
5053 return true;
5054
5055 tree_to_aff_combination_expand (e1, TREE_TYPE (e1),
5056 &aff_e1, &data->name_expansion_cache);
5057 aff_combination_add (&aff_e1, &aff_offset);
5058 return aff_combination_zero_p (&aff_e1);
5059
5060 case POINTER_PLUS_EXPR:
5061 aff_combination_add (&aff_e2, &aff_offset);
5062 return aff_combination_zero_p (&aff_e2);
5063
5064 default:
5065 return false;
5066 }
5067 }
5068
5069 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
5070 comparison with CAND. NITER describes the number of iterations of
5071 the loops. If successful, the comparison in COMP_P is altered accordingly.
5072
5073 We aim to handle the following situation:
5074
5075 sometype *base, *p;
5076 int a, b, i;
5077
5078 i = a;
5079 p = p_0 = base + a;
5080
5081 do
5082 {
5083 bla (*p);
5084 p++;
5085 i++;
5086 }
5087 while (i < b);
5088
5089 Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
5090 We aim to optimize this to
5091
5092 p = p_0 = base + a;
5093 do
5094 {
5095 bla (*p);
5096 p++;
5097 }
5098 while (p < p_0 - a + b);
5099
5100 This preserves the correctness, since the pointer arithmetics does not
5101 overflow. More precisely:
5102
5103 1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
5104 overflow in computing it or the values of p.
5105 2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
5106 overflow. To prove this, we use the fact that p_0 = base + a. */
5107
5108 static bool
5109 iv_elimination_compare_lt (struct ivopts_data *data,
5110 struct iv_cand *cand, enum tree_code *comp_p,
5111 struct tree_niter_desc *niter)
5112 {
5113 tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
5114 struct aff_tree nit, tmpa, tmpb;
5115 enum tree_code comp;
5116 HOST_WIDE_INT step;
5117
5118 /* We need to know that the candidate induction variable does not overflow.
5119 While more complex analysis may be used to prove this, for now just
5120 check that the variable appears in the original program and that it
5121 is computed in a type that guarantees no overflows. */
5122 cand_type = TREE_TYPE (cand->iv->base);
5123 if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
5124 return false;
5125
5126 /* Make sure that the loop iterates till the loop bound is hit, as otherwise
5127 the calculation of the BOUND could overflow, making the comparison
5128 invalid. */
5129 if (!data->loop_single_exit_p)
5130 return false;
5131
5132 /* We need to be able to decide whether candidate is increasing or decreasing
5133 in order to choose the right comparison operator. */
5134 if (!cst_and_fits_in_hwi (cand->iv->step))
5135 return false;
5136 step = int_cst_value (cand->iv->step);
5137
5138 /* Check that the number of iterations matches the expected pattern:
5139 a + 1 > b ? 0 : b - a - 1. */
5140 mbz = niter->may_be_zero;
5141 if (TREE_CODE (mbz) == GT_EXPR)
5142 {
5143 /* Handle a + 1 > b. */
5144 tree op0 = TREE_OPERAND (mbz, 0);
5145 if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
5146 {
5147 a = TREE_OPERAND (op0, 0);
5148 b = TREE_OPERAND (mbz, 1);
5149 }
5150 else
5151 return false;
5152 }
5153 else if (TREE_CODE (mbz) == LT_EXPR)
5154 {
5155 tree op1 = TREE_OPERAND (mbz, 1);
5156
5157 /* Handle b < a + 1. */
5158 if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
5159 {
5160 a = TREE_OPERAND (op1, 0);
5161 b = TREE_OPERAND (mbz, 0);
5162 }
5163 else
5164 return false;
5165 }
5166 else
5167 return false;
5168
5169 /* Expected number of iterations is B - A - 1. Check that it matches
5170 the actual number, i.e., that B - A - NITER = 1. */
5171 tree_to_aff_combination (niter->niter, nit_type, &nit);
5172 tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
5173 tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
5174 aff_combination_scale (&nit, -1);
5175 aff_combination_scale (&tmpa, -1);
5176 aff_combination_add (&tmpb, &tmpa);
5177 aff_combination_add (&tmpb, &nit);
5178 if (tmpb.n != 0 || tmpb.offset != 1)
5179 return false;
5180
5181 /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
5182 overflow. */
5183 offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
5184 cand->iv->step,
5185 fold_convert (TREE_TYPE (cand->iv->step), a));
5186 if (!difference_cannot_overflow_p (data, cand->iv->base, offset))
5187 return false;
5188
5189 /* Determine the new comparison operator. */
5190 comp = step < 0 ? GT_EXPR : LT_EXPR;
5191 if (*comp_p == NE_EXPR)
5192 *comp_p = comp;
5193 else if (*comp_p == EQ_EXPR)
5194 *comp_p = invert_tree_comparison (comp, false);
5195 else
5196 gcc_unreachable ();
5197
5198 return true;
5199 }
5200
5201 /* Check whether it is possible to express the condition in USE by comparison
5202 of candidate CAND. If so, store the value compared with to BOUND, and the
5203 comparison operator to COMP. */
5204
5205 static bool
5206 may_eliminate_iv (struct ivopts_data *data,
5207 struct iv_use *use, struct iv_cand *cand, tree *bound,
5208 enum tree_code *comp)
5209 {
5210 basic_block ex_bb;
5211 edge exit;
5212 tree period;
5213 struct loop *loop = data->current_loop;
5214 aff_tree bnd;
5215 struct tree_niter_desc *desc = NULL;
5216
5217 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
5218 return false;
5219
5220 /* For now works only for exits that dominate the loop latch.
5221 TODO: extend to other conditions inside loop body. */
5222 ex_bb = gimple_bb (use->stmt);
5223 if (use->stmt != last_stmt (ex_bb)
5224 || gimple_code (use->stmt) != GIMPLE_COND
5225 || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
5226 return false;
5227
5228 exit = EDGE_SUCC (ex_bb, 0);
5229 if (flow_bb_inside_loop_p (loop, exit->dest))
5230 exit = EDGE_SUCC (ex_bb, 1);
5231 if (flow_bb_inside_loop_p (loop, exit->dest))
5232 return false;
5233
5234 desc = niter_for_exit (data, exit);
5235 if (!desc)
5236 return false;
5237
5238 /* Determine whether we can use the variable to test the exit condition.
5239 This is the case iff the period of the induction variable is greater
5240 than the number of iterations for which the exit condition is true. */
5241 period = iv_period (cand->iv);
5242
5243 /* If the number of iterations is constant, compare against it directly. */
5244 if (TREE_CODE (desc->niter) == INTEGER_CST)
5245 {
5246 /* See cand_value_at. */
5247 if (stmt_after_increment (loop, cand, use->stmt))
5248 {
5249 if (!tree_int_cst_lt (desc->niter, period))
5250 return false;
5251 }
5252 else
5253 {
5254 if (tree_int_cst_lt (period, desc->niter))
5255 return false;
5256 }
5257 }
5258
5259 /* If not, and if this is the only possible exit of the loop, see whether
5260 we can get a conservative estimate on the number of iterations of the
5261 entire loop and compare against that instead. */
5262 else
5263 {
5264 widest_int period_value, max_niter;
5265
5266 max_niter = desc->max;
5267 if (stmt_after_increment (loop, cand, use->stmt))
5268 max_niter += 1;
5269 period_value = wi::to_widest (period);
5270 if (wi::gtu_p (max_niter, period_value))
5271 {
5272 /* See if we can take advantage of inferred loop bound information. */
5273 if (data->loop_single_exit_p)
5274 {
5275 if (!max_loop_iterations (loop, &max_niter))
5276 return false;
5277 /* The loop bound is already adjusted by adding 1. */
5278 if (wi::gtu_p (max_niter, period_value))
5279 return false;
5280 }
5281 else
5282 return false;
5283 }
5284 }
5285
5286 cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
5287
5288 *bound = fold_convert (TREE_TYPE (cand->iv->base),
5289 aff_combination_to_tree (&bnd));
5290 *comp = iv_elimination_compare (data, use);
5291
5292 /* It is unlikely that computing the number of iterations using division
5293 would be more profitable than keeping the original induction variable. */
5294 if (expression_expensive_p (*bound))
5295 return false;
5296
5297 /* Sometimes, it is possible to handle the situation that the number of
5298 iterations may be zero unless additional assumtions by using <
5299 instead of != in the exit condition.
5300
5301 TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
5302 base the exit condition on it. However, that is often too
5303 expensive. */
5304 if (!integer_zerop (desc->may_be_zero))
5305 return iv_elimination_compare_lt (data, cand, comp, desc);
5306
5307 return true;
5308 }
5309
5310 /* Calculates the cost of BOUND, if it is a PARM_DECL. A PARM_DECL must
5311 be copied, if it is used in the loop body and DATA->body_includes_call. */
5312
5313 static int
5314 parm_decl_cost (struct ivopts_data *data, tree bound)
5315 {
5316 tree sbound = bound;
5317 STRIP_NOPS (sbound);
5318
5319 if (TREE_CODE (sbound) == SSA_NAME
5320 && SSA_NAME_IS_DEFAULT_DEF (sbound)
5321 && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
5322 && data->body_includes_call)
5323 return COSTS_N_INSNS (1);
5324
5325 return 0;
5326 }
5327
5328 /* Determines cost of basing replacement of USE on CAND in a condition. */
5329
5330 static bool
5331 determine_use_iv_cost_condition (struct ivopts_data *data,
5332 struct iv_use *use, struct iv_cand *cand)
5333 {
5334 tree bound = NULL_TREE;
5335 struct iv *cmp_iv;
5336 bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
5337 comp_cost elim_cost, express_cost, cost, bound_cost;
5338 bool ok;
5339 int elim_inv_expr_id = -1, express_inv_expr_id = -1, inv_expr_id;
5340 tree *control_var, *bound_cst;
5341 enum tree_code comp = ERROR_MARK;
5342
5343 /* Only consider real candidates. */
5344 if (!cand->iv)
5345 {
5346 set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE,
5347 ERROR_MARK, -1);
5348 return false;
5349 }
5350
5351 /* Try iv elimination. */
5352 if (may_eliminate_iv (data, use, cand, &bound, &comp))
5353 {
5354 elim_cost = force_var_cost (data, bound, &depends_on_elim);
5355 if (elim_cost.cost == 0)
5356 elim_cost.cost = parm_decl_cost (data, bound);
5357 else if (TREE_CODE (bound) == INTEGER_CST)
5358 elim_cost.cost = 0;
5359 /* If we replace a loop condition 'i < n' with 'p < base + n',
5360 depends_on_elim will have 'base' and 'n' set, which implies
5361 that both 'base' and 'n' will be live during the loop. More likely,
5362 'base + n' will be loop invariant, resulting in only one live value
5363 during the loop. So in that case we clear depends_on_elim and set
5364 elim_inv_expr_id instead. */
5365 if (depends_on_elim && bitmap_count_bits (depends_on_elim) > 1)
5366 {
5367 elim_inv_expr_id = get_expr_id (data, bound);
5368 bitmap_clear (depends_on_elim);
5369 }
5370 /* The bound is a loop invariant, so it will be only computed
5371 once. */
5372 elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
5373 }
5374 else
5375 elim_cost = infinite_cost;
5376
5377 /* Try expressing the original giv. If it is compared with an invariant,
5378 note that we cannot get rid of it. */
5379 ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
5380 NULL, &cmp_iv);
5381 gcc_assert (ok);
5382
5383 /* When the condition is a comparison of the candidate IV against
5384 zero, prefer this IV.
5385
5386 TODO: The constant that we're subtracting from the cost should
5387 be target-dependent. This information should be added to the
5388 target costs for each backend. */
5389 if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */
5390 && integer_zerop (*bound_cst)
5391 && (operand_equal_p (*control_var, cand->var_after, 0)
5392 || operand_equal_p (*control_var, cand->var_before, 0)))
5393 elim_cost.cost -= 1;
5394
5395 express_cost = get_computation_cost (data, use, cand, false,
5396 &depends_on_express, NULL,
5397 &express_inv_expr_id);
5398 fd_ivopts_data = data;
5399 walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
5400
5401 /* Count the cost of the original bound as well. */
5402 bound_cost = force_var_cost (data, *bound_cst, NULL);
5403 if (bound_cost.cost == 0)
5404 bound_cost.cost = parm_decl_cost (data, *bound_cst);
5405 else if (TREE_CODE (*bound_cst) == INTEGER_CST)
5406 bound_cost.cost = 0;
5407 express_cost.cost += bound_cost.cost;
5408
5409 /* Choose the better approach, preferring the eliminated IV. */
5410 if (compare_costs (elim_cost, express_cost) <= 0)
5411 {
5412 cost = elim_cost;
5413 depends_on = depends_on_elim;
5414 depends_on_elim = NULL;
5415 inv_expr_id = elim_inv_expr_id;
5416 }
5417 else
5418 {
5419 cost = express_cost;
5420 depends_on = depends_on_express;
5421 depends_on_express = NULL;
5422 bound = NULL_TREE;
5423 comp = ERROR_MARK;
5424 inv_expr_id = express_inv_expr_id;
5425 }
5426
5427 set_use_iv_cost (data, use, cand, cost, depends_on, bound, comp, inv_expr_id);
5428
5429 if (depends_on_elim)
5430 BITMAP_FREE (depends_on_elim);
5431 if (depends_on_express)
5432 BITMAP_FREE (depends_on_express);
5433
5434 return !infinite_cost_p (cost);
5435 }
5436
5437 /* Determines cost of basing replacement of USE on CAND. Returns false
5438 if USE cannot be based on CAND. */
5439
5440 static bool
5441 determine_use_iv_cost (struct ivopts_data *data,
5442 struct iv_use *use, struct iv_cand *cand)
5443 {
5444 switch (use->type)
5445 {
5446 case USE_NONLINEAR_EXPR:
5447 return determine_use_iv_cost_generic (data, use, cand);
5448
5449 case USE_ADDRESS:
5450 return determine_use_iv_cost_address (data, use, cand);
5451
5452 case USE_COMPARE:
5453 return determine_use_iv_cost_condition (data, use, cand);
5454
5455 default:
5456 gcc_unreachable ();
5457 }
5458 }
5459
5460 /* Return true if get_computation_cost indicates that autoincrement is
5461 a possibility for the pair of USE and CAND, false otherwise. */
5462
5463 static bool
5464 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
5465 struct iv_cand *cand)
5466 {
5467 bitmap depends_on;
5468 bool can_autoinc;
5469 comp_cost cost;
5470
5471 if (use->type != USE_ADDRESS)
5472 return false;
5473
5474 cost = get_computation_cost (data, use, cand, true, &depends_on,
5475 &can_autoinc, NULL);
5476
5477 BITMAP_FREE (depends_on);
5478
5479 return !infinite_cost_p (cost) && can_autoinc;
5480 }
5481
5482 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
5483 use that allows autoincrement, and set their AINC_USE if possible. */
5484
5485 static void
5486 set_autoinc_for_original_candidates (struct ivopts_data *data)
5487 {
5488 unsigned i, j;
5489
5490 for (i = 0; i < n_iv_cands (data); i++)
5491 {
5492 struct iv_cand *cand = iv_cand (data, i);
5493 struct iv_use *closest_before = NULL;
5494 struct iv_use *closest_after = NULL;
5495 if (cand->pos != IP_ORIGINAL)
5496 continue;
5497
5498 for (j = 0; j < n_iv_uses (data); j++)
5499 {
5500 struct iv_use *use = iv_use (data, j);
5501 unsigned uid = gimple_uid (use->stmt);
5502
5503 if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
5504 continue;
5505
5506 if (uid < gimple_uid (cand->incremented_at)
5507 && (closest_before == NULL
5508 || uid > gimple_uid (closest_before->stmt)))
5509 closest_before = use;
5510
5511 if (uid > gimple_uid (cand->incremented_at)
5512 && (closest_after == NULL
5513 || uid < gimple_uid (closest_after->stmt)))
5514 closest_after = use;
5515 }
5516
5517 if (closest_before != NULL
5518 && autoinc_possible_for_pair (data, closest_before, cand))
5519 cand->ainc_use = closest_before;
5520 else if (closest_after != NULL
5521 && autoinc_possible_for_pair (data, closest_after, cand))
5522 cand->ainc_use = closest_after;
5523 }
5524 }
5525
5526 /* Finds the candidates for the induction variables. */
5527
5528 static void
5529 find_iv_candidates (struct ivopts_data *data)
5530 {
5531 /* Add commonly used ivs. */
5532 add_standard_iv_candidates (data);
5533
5534 /* Add old induction variables. */
5535 add_iv_candidate_for_bivs (data);
5536
5537 /* Add induction variables derived from uses. */
5538 add_iv_candidate_for_uses (data);
5539
5540 set_autoinc_for_original_candidates (data);
5541
5542 /* Record the important candidates. */
5543 record_important_candidates (data);
5544 }
5545
5546 /* Determines costs of basing the use of the iv on an iv candidate. */
5547
5548 static void
5549 determine_use_iv_costs (struct ivopts_data *data)
5550 {
5551 unsigned i, j;
5552 struct iv_use *use;
5553 struct iv_cand *cand;
5554 bitmap to_clear = BITMAP_ALLOC (NULL);
5555
5556 alloc_use_cost_map (data);
5557
5558 for (i = 0; i < n_iv_uses (data); i++)
5559 {
5560 use = iv_use (data, i);
5561
5562 if (data->consider_all_candidates)
5563 {
5564 for (j = 0; j < n_iv_cands (data); j++)
5565 {
5566 cand = iv_cand (data, j);
5567 determine_use_iv_cost (data, use, cand);
5568 }
5569 }
5570 else
5571 {
5572 bitmap_iterator bi;
5573
5574 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
5575 {
5576 cand = iv_cand (data, j);
5577 if (!determine_use_iv_cost (data, use, cand))
5578 bitmap_set_bit (to_clear, j);
5579 }
5580
5581 /* Remove the candidates for that the cost is infinite from
5582 the list of related candidates. */
5583 bitmap_and_compl_into (use->related_cands, to_clear);
5584 bitmap_clear (to_clear);
5585 }
5586 }
5587
5588 BITMAP_FREE (to_clear);
5589
5590 if (dump_file && (dump_flags & TDF_DETAILS))
5591 {
5592 fprintf (dump_file, "Use-candidate costs:\n");
5593
5594 for (i = 0; i < n_iv_uses (data); i++)
5595 {
5596 use = iv_use (data, i);
5597
5598 fprintf (dump_file, "Use %d:\n", i);
5599 fprintf (dump_file, " cand\tcost\tcompl.\tdepends on\n");
5600 for (j = 0; j < use->n_map_members; j++)
5601 {
5602 if (!use->cost_map[j].cand
5603 || infinite_cost_p (use->cost_map[j].cost))
5604 continue;
5605
5606 fprintf (dump_file, " %d\t%d\t%d\t",
5607 use->cost_map[j].cand->id,
5608 use->cost_map[j].cost.cost,
5609 use->cost_map[j].cost.complexity);
5610 if (use->cost_map[j].depends_on)
5611 bitmap_print (dump_file,
5612 use->cost_map[j].depends_on, "","");
5613 if (use->cost_map[j].inv_expr_id != -1)
5614 fprintf (dump_file, " inv_expr:%d", use->cost_map[j].inv_expr_id);
5615 fprintf (dump_file, "\n");
5616 }
5617
5618 fprintf (dump_file, "\n");
5619 }
5620 fprintf (dump_file, "\n");
5621 }
5622 }
5623
5624 /* Determines cost of the candidate CAND. */
5625
5626 static void
5627 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5628 {
5629 comp_cost cost_base;
5630 unsigned cost, cost_step;
5631 tree base;
5632
5633 if (!cand->iv)
5634 {
5635 cand->cost = 0;
5636 return;
5637 }
5638
5639 /* There are two costs associated with the candidate -- its increment
5640 and its initialization. The second is almost negligible for any loop
5641 that rolls enough, so we take it just very little into account. */
5642
5643 base = cand->iv->base;
5644 cost_base = force_var_cost (data, base, NULL);
5645 /* It will be exceptional that the iv register happens to be initialized with
5646 the proper value at no cost. In general, there will at least be a regcopy
5647 or a const set. */
5648 if (cost_base.cost == 0)
5649 cost_base.cost = COSTS_N_INSNS (1);
5650 cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
5651
5652 cost = cost_step + adjust_setup_cost (data, cost_base.cost);
5653
5654 /* Prefer the original ivs unless we may gain something by replacing it.
5655 The reason is to make debugging simpler; so this is not relevant for
5656 artificial ivs created by other optimization passes. */
5657 if (cand->pos != IP_ORIGINAL
5658 || !SSA_NAME_VAR (cand->var_before)
5659 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
5660 cost++;
5661
5662 /* Prefer not to insert statements into latch unless there are some
5663 already (so that we do not create unnecessary jumps). */
5664 if (cand->pos == IP_END
5665 && empty_block_p (ip_end_pos (data->current_loop)))
5666 cost++;
5667
5668 cand->cost = cost;
5669 cand->cost_step = cost_step;
5670 }
5671
5672 /* Determines costs of computation of the candidates. */
5673
5674 static void
5675 determine_iv_costs (struct ivopts_data *data)
5676 {
5677 unsigned i;
5678
5679 if (dump_file && (dump_flags & TDF_DETAILS))
5680 {
5681 fprintf (dump_file, "Candidate costs:\n");
5682 fprintf (dump_file, " cand\tcost\n");
5683 }
5684
5685 for (i = 0; i < n_iv_cands (data); i++)
5686 {
5687 struct iv_cand *cand = iv_cand (data, i);
5688
5689 determine_iv_cost (data, cand);
5690
5691 if (dump_file && (dump_flags & TDF_DETAILS))
5692 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
5693 }
5694
5695 if (dump_file && (dump_flags & TDF_DETAILS))
5696 fprintf (dump_file, "\n");
5697 }
5698
5699 /* Calculates cost for having SIZE induction variables. */
5700
5701 static unsigned
5702 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
5703 {
5704 /* We add size to the cost, so that we prefer eliminating ivs
5705 if possible. */
5706 return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed,
5707 data->body_includes_call);
5708 }
5709
5710 /* For each size of the induction variable set determine the penalty. */
5711
5712 static void
5713 determine_set_costs (struct ivopts_data *data)
5714 {
5715 unsigned j, n;
5716 gphi *phi;
5717 gphi_iterator psi;
5718 tree op;
5719 struct loop *loop = data->current_loop;
5720 bitmap_iterator bi;
5721
5722 if (dump_file && (dump_flags & TDF_DETAILS))
5723 {
5724 fprintf (dump_file, "Global costs:\n");
5725 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
5726 fprintf (dump_file, " target_clobbered_regs %d\n", target_clobbered_regs);
5727 fprintf (dump_file, " target_reg_cost %d\n", target_reg_cost[data->speed]);
5728 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost[data->speed]);
5729 }
5730
5731 n = 0;
5732 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
5733 {
5734 phi = psi.phi ();
5735 op = PHI_RESULT (phi);
5736
5737 if (virtual_operand_p (op))
5738 continue;
5739
5740 if (get_iv (data, op))
5741 continue;
5742
5743 n++;
5744 }
5745
5746 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5747 {
5748 struct version_info *info = ver_info (data, j);
5749
5750 if (info->inv_id && info->has_nonlin_use)
5751 n++;
5752 }
5753
5754 data->regs_used = n;
5755 if (dump_file && (dump_flags & TDF_DETAILS))
5756 fprintf (dump_file, " regs_used %d\n", n);
5757
5758 if (dump_file && (dump_flags & TDF_DETAILS))
5759 {
5760 fprintf (dump_file, " cost for size:\n");
5761 fprintf (dump_file, " ivs\tcost\n");
5762 for (j = 0; j <= 2 * target_avail_regs; j++)
5763 fprintf (dump_file, " %d\t%d\n", j,
5764 ivopts_global_cost_for_size (data, j));
5765 fprintf (dump_file, "\n");
5766 }
5767 }
5768
5769 /* Returns true if A is a cheaper cost pair than B. */
5770
5771 static bool
5772 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
5773 {
5774 int cmp;
5775
5776 if (!a)
5777 return false;
5778
5779 if (!b)
5780 return true;
5781
5782 cmp = compare_costs (a->cost, b->cost);
5783 if (cmp < 0)
5784 return true;
5785
5786 if (cmp > 0)
5787 return false;
5788
5789 /* In case the costs are the same, prefer the cheaper candidate. */
5790 if (a->cand->cost < b->cand->cost)
5791 return true;
5792
5793 return false;
5794 }
5795
5796
5797 /* Returns candidate by that USE is expressed in IVS. */
5798
5799 static struct cost_pair *
5800 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
5801 {
5802 return ivs->cand_for_use[use->id];
5803 }
5804
5805 /* Computes the cost field of IVS structure. */
5806
5807 static void
5808 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
5809 {
5810 comp_cost cost = ivs->cand_use_cost;
5811
5812 cost.cost += ivs->cand_cost;
5813
5814 cost.cost += ivopts_global_cost_for_size (data,
5815 ivs->n_regs + ivs->num_used_inv_expr);
5816
5817 ivs->cost = cost;
5818 }
5819
5820 /* Remove invariants in set INVS to set IVS. */
5821
5822 static void
5823 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
5824 {
5825 bitmap_iterator bi;
5826 unsigned iid;
5827
5828 if (!invs)
5829 return;
5830
5831 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5832 {
5833 ivs->n_invariant_uses[iid]--;
5834 if (ivs->n_invariant_uses[iid] == 0)
5835 ivs->n_regs--;
5836 }
5837 }
5838
5839 /* Set USE not to be expressed by any candidate in IVS. */
5840
5841 static void
5842 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
5843 struct iv_use *use)
5844 {
5845 unsigned uid = use->id, cid;
5846 struct cost_pair *cp;
5847
5848 cp = ivs->cand_for_use[uid];
5849 if (!cp)
5850 return;
5851 cid = cp->cand->id;
5852
5853 ivs->bad_uses++;
5854 ivs->cand_for_use[uid] = NULL;
5855 ivs->n_cand_uses[cid]--;
5856
5857 if (ivs->n_cand_uses[cid] == 0)
5858 {
5859 bitmap_clear_bit (ivs->cands, cid);
5860 /* Do not count the pseudocandidates. */
5861 if (cp->cand->iv)
5862 ivs->n_regs--;
5863 ivs->n_cands--;
5864 ivs->cand_cost -= cp->cand->cost;
5865
5866 iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
5867 }
5868
5869 ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
5870
5871 iv_ca_set_remove_invariants (ivs, cp->depends_on);
5872
5873 if (cp->inv_expr_id != -1)
5874 {
5875 ivs->used_inv_expr[cp->inv_expr_id]--;
5876 if (ivs->used_inv_expr[cp->inv_expr_id] == 0)
5877 ivs->num_used_inv_expr--;
5878 }
5879 iv_ca_recount_cost (data, ivs);
5880 }
5881
5882 /* Add invariants in set INVS to set IVS. */
5883
5884 static void
5885 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
5886 {
5887 bitmap_iterator bi;
5888 unsigned iid;
5889
5890 if (!invs)
5891 return;
5892
5893 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5894 {
5895 ivs->n_invariant_uses[iid]++;
5896 if (ivs->n_invariant_uses[iid] == 1)
5897 ivs->n_regs++;
5898 }
5899 }
5900
5901 /* Set cost pair for USE in set IVS to CP. */
5902
5903 static void
5904 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
5905 struct iv_use *use, struct cost_pair *cp)
5906 {
5907 unsigned uid = use->id, cid;
5908
5909 if (ivs->cand_for_use[uid] == cp)
5910 return;
5911
5912 if (ivs->cand_for_use[uid])
5913 iv_ca_set_no_cp (data, ivs, use);
5914
5915 if (cp)
5916 {
5917 cid = cp->cand->id;
5918
5919 ivs->bad_uses--;
5920 ivs->cand_for_use[uid] = cp;
5921 ivs->n_cand_uses[cid]++;
5922 if (ivs->n_cand_uses[cid] == 1)
5923 {
5924 bitmap_set_bit (ivs->cands, cid);
5925 /* Do not count the pseudocandidates. */
5926 if (cp->cand->iv)
5927 ivs->n_regs++;
5928 ivs->n_cands++;
5929 ivs->cand_cost += cp->cand->cost;
5930
5931 iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
5932 }
5933
5934 ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
5935 iv_ca_set_add_invariants (ivs, cp->depends_on);
5936
5937 if (cp->inv_expr_id != -1)
5938 {
5939 ivs->used_inv_expr[cp->inv_expr_id]++;
5940 if (ivs->used_inv_expr[cp->inv_expr_id] == 1)
5941 ivs->num_used_inv_expr++;
5942 }
5943 iv_ca_recount_cost (data, ivs);
5944 }
5945 }
5946
5947 /* Extend set IVS by expressing USE by some of the candidates in it
5948 if possible. Consider all important candidates if candidates in
5949 set IVS don't give any result. */
5950
5951 static void
5952 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
5953 struct iv_use *use)
5954 {
5955 struct cost_pair *best_cp = NULL, *cp;
5956 bitmap_iterator bi;
5957 unsigned i;
5958 struct iv_cand *cand;
5959
5960 gcc_assert (ivs->upto >= use->id);
5961 ivs->upto++;
5962 ivs->bad_uses++;
5963
5964 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5965 {
5966 cand = iv_cand (data, i);
5967 cp = get_use_iv_cost (data, use, cand);
5968 if (cheaper_cost_pair (cp, best_cp))
5969 best_cp = cp;
5970 }
5971
5972 if (best_cp == NULL)
5973 {
5974 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5975 {
5976 cand = iv_cand (data, i);
5977 cp = get_use_iv_cost (data, use, cand);
5978 if (cheaper_cost_pair (cp, best_cp))
5979 best_cp = cp;
5980 }
5981 }
5982
5983 iv_ca_set_cp (data, ivs, use, best_cp);
5984 }
5985
5986 /* Get cost for assignment IVS. */
5987
5988 static comp_cost
5989 iv_ca_cost (struct iv_ca *ivs)
5990 {
5991 /* This was a conditional expression but it triggered a bug in
5992 Sun C 5.5. */
5993 if (ivs->bad_uses)
5994 return infinite_cost;
5995 else
5996 return ivs->cost;
5997 }
5998
5999 /* Returns true if all dependences of CP are among invariants in IVS. */
6000
6001 static bool
6002 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
6003 {
6004 unsigned i;
6005 bitmap_iterator bi;
6006
6007 if (!cp->depends_on)
6008 return true;
6009
6010 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
6011 {
6012 if (ivs->n_invariant_uses[i] == 0)
6013 return false;
6014 }
6015
6016 return true;
6017 }
6018
6019 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
6020 it before NEXT_CHANGE. */
6021
6022 static struct iv_ca_delta *
6023 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
6024 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
6025 {
6026 struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
6027
6028 change->use = use;
6029 change->old_cp = old_cp;
6030 change->new_cp = new_cp;
6031 change->next_change = next_change;
6032
6033 return change;
6034 }
6035
6036 /* Joins two lists of changes L1 and L2. Destructive -- old lists
6037 are rewritten. */
6038
6039 static struct iv_ca_delta *
6040 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
6041 {
6042 struct iv_ca_delta *last;
6043
6044 if (!l2)
6045 return l1;
6046
6047 if (!l1)
6048 return l2;
6049
6050 for (last = l1; last->next_change; last = last->next_change)
6051 continue;
6052 last->next_change = l2;
6053
6054 return l1;
6055 }
6056
6057 /* Reverse the list of changes DELTA, forming the inverse to it. */
6058
6059 static struct iv_ca_delta *
6060 iv_ca_delta_reverse (struct iv_ca_delta *delta)
6061 {
6062 struct iv_ca_delta *act, *next, *prev = NULL;
6063
6064 for (act = delta; act; act = next)
6065 {
6066 next = act->next_change;
6067 act->next_change = prev;
6068 prev = act;
6069
6070 std::swap (act->old_cp, act->new_cp);
6071 }
6072
6073 return prev;
6074 }
6075
6076 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
6077 reverted instead. */
6078
6079 static void
6080 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
6081 struct iv_ca_delta *delta, bool forward)
6082 {
6083 struct cost_pair *from, *to;
6084 struct iv_ca_delta *act;
6085
6086 if (!forward)
6087 delta = iv_ca_delta_reverse (delta);
6088
6089 for (act = delta; act; act = act->next_change)
6090 {
6091 from = act->old_cp;
6092 to = act->new_cp;
6093 gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
6094 iv_ca_set_cp (data, ivs, act->use, to);
6095 }
6096
6097 if (!forward)
6098 iv_ca_delta_reverse (delta);
6099 }
6100
6101 /* Returns true if CAND is used in IVS. */
6102
6103 static bool
6104 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
6105 {
6106 return ivs->n_cand_uses[cand->id] > 0;
6107 }
6108
6109 /* Returns number of induction variable candidates in the set IVS. */
6110
6111 static unsigned
6112 iv_ca_n_cands (struct iv_ca *ivs)
6113 {
6114 return ivs->n_cands;
6115 }
6116
6117 /* Free the list of changes DELTA. */
6118
6119 static void
6120 iv_ca_delta_free (struct iv_ca_delta **delta)
6121 {
6122 struct iv_ca_delta *act, *next;
6123
6124 for (act = *delta; act; act = next)
6125 {
6126 next = act->next_change;
6127 free (act);
6128 }
6129
6130 *delta = NULL;
6131 }
6132
6133 /* Allocates new iv candidates assignment. */
6134
6135 static struct iv_ca *
6136 iv_ca_new (struct ivopts_data *data)
6137 {
6138 struct iv_ca *nw = XNEW (struct iv_ca);
6139
6140 nw->upto = 0;
6141 nw->bad_uses = 0;
6142 nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
6143 nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
6144 nw->cands = BITMAP_ALLOC (NULL);
6145 nw->n_cands = 0;
6146 nw->n_regs = 0;
6147 nw->cand_use_cost = no_cost;
6148 nw->cand_cost = 0;
6149 nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
6150 nw->cost = no_cost;
6151 nw->used_inv_expr = XCNEWVEC (unsigned, data->inv_expr_id + 1);
6152 nw->num_used_inv_expr = 0;
6153
6154 return nw;
6155 }
6156
6157 /* Free memory occupied by the set IVS. */
6158
6159 static void
6160 iv_ca_free (struct iv_ca **ivs)
6161 {
6162 free ((*ivs)->cand_for_use);
6163 free ((*ivs)->n_cand_uses);
6164 BITMAP_FREE ((*ivs)->cands);
6165 free ((*ivs)->n_invariant_uses);
6166 free ((*ivs)->used_inv_expr);
6167 free (*ivs);
6168 *ivs = NULL;
6169 }
6170
6171 /* Dumps IVS to FILE. */
6172
6173 static void
6174 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
6175 {
6176 const char *pref = " invariants ";
6177 unsigned i;
6178 comp_cost cost = iv_ca_cost (ivs);
6179
6180 fprintf (file, " cost: %d (complexity %d)\n", cost.cost, cost.complexity);
6181 fprintf (file, " cand_cost: %d\n cand_use_cost: %d (complexity %d)\n",
6182 ivs->cand_cost, ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
6183 bitmap_print (file, ivs->cands, " candidates: ","\n");
6184
6185 for (i = 0; i < ivs->upto; i++)
6186 {
6187 struct iv_use *use = iv_use (data, i);
6188 struct cost_pair *cp = iv_ca_cand_for_use (ivs, use);
6189 if (cp)
6190 fprintf (file, " use:%d --> iv_cand:%d, cost=(%d,%d)\n",
6191 use->id, cp->cand->id, cp->cost.cost, cp->cost.complexity);
6192 else
6193 fprintf (file, " use:%d --> ??\n", use->id);
6194 }
6195
6196 for (i = 1; i <= data->max_inv_id; i++)
6197 if (ivs->n_invariant_uses[i])
6198 {
6199 fprintf (file, "%s%d", pref, i);
6200 pref = ", ";
6201 }
6202 fprintf (file, "\n\n");
6203 }
6204
6205 /* Try changing candidate in IVS to CAND for each use. Return cost of the
6206 new set, and store differences in DELTA. Number of induction variables
6207 in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
6208 the function will try to find a solution with mimimal iv candidates. */
6209
6210 static comp_cost
6211 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
6212 struct iv_cand *cand, struct iv_ca_delta **delta,
6213 unsigned *n_ivs, bool min_ncand)
6214 {
6215 unsigned i;
6216 comp_cost cost;
6217 struct iv_use *use;
6218 struct cost_pair *old_cp, *new_cp;
6219
6220 *delta = NULL;
6221 for (i = 0; i < ivs->upto; i++)
6222 {
6223 use = iv_use (data, i);
6224 old_cp = iv_ca_cand_for_use (ivs, use);
6225
6226 if (old_cp
6227 && old_cp->cand == cand)
6228 continue;
6229
6230 new_cp = get_use_iv_cost (data, use, cand);
6231 if (!new_cp)
6232 continue;
6233
6234 if (!min_ncand && !iv_ca_has_deps (ivs, new_cp))
6235 continue;
6236
6237 if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp))
6238 continue;
6239
6240 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
6241 }
6242
6243 iv_ca_delta_commit (data, ivs, *delta, true);
6244 cost = iv_ca_cost (ivs);
6245 if (n_ivs)
6246 *n_ivs = iv_ca_n_cands (ivs);
6247 iv_ca_delta_commit (data, ivs, *delta, false);
6248
6249 return cost;
6250 }
6251
6252 /* Try narrowing set IVS by removing CAND. Return the cost of
6253 the new set and store the differences in DELTA. START is
6254 the candidate with which we start narrowing. */
6255
6256 static comp_cost
6257 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
6258 struct iv_cand *cand, struct iv_cand *start,
6259 struct iv_ca_delta **delta)
6260 {
6261 unsigned i, ci;
6262 struct iv_use *use;
6263 struct cost_pair *old_cp, *new_cp, *cp;
6264 bitmap_iterator bi;
6265 struct iv_cand *cnd;
6266 comp_cost cost, best_cost, acost;
6267
6268 *delta = NULL;
6269 for (i = 0; i < n_iv_uses (data); i++)
6270 {
6271 use = iv_use (data, i);
6272
6273 old_cp = iv_ca_cand_for_use (ivs, use);
6274 if (old_cp->cand != cand)
6275 continue;
6276
6277 best_cost = iv_ca_cost (ivs);
6278 /* Start narrowing with START. */
6279 new_cp = get_use_iv_cost (data, use, start);
6280
6281 if (data->consider_all_candidates)
6282 {
6283 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
6284 {
6285 if (ci == cand->id || (start && ci == start->id))
6286 continue;
6287
6288 cnd = iv_cand (data, ci);
6289
6290 cp = get_use_iv_cost (data, use, cnd);
6291 if (!cp)
6292 continue;
6293
6294 iv_ca_set_cp (data, ivs, use, cp);
6295 acost = iv_ca_cost (ivs);
6296
6297 if (compare_costs (acost, best_cost) < 0)
6298 {
6299 best_cost = acost;
6300 new_cp = cp;
6301 }
6302 }
6303 }
6304 else
6305 {
6306 EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
6307 {
6308 if (ci == cand->id || (start && ci == start->id))
6309 continue;
6310
6311 cnd = iv_cand (data, ci);
6312
6313 cp = get_use_iv_cost (data, use, cnd);
6314 if (!cp)
6315 continue;
6316
6317 iv_ca_set_cp (data, ivs, use, cp);
6318 acost = iv_ca_cost (ivs);
6319
6320 if (compare_costs (acost, best_cost) < 0)
6321 {
6322 best_cost = acost;
6323 new_cp = cp;
6324 }
6325 }
6326 }
6327 /* Restore to old cp for use. */
6328 iv_ca_set_cp (data, ivs, use, old_cp);
6329
6330 if (!new_cp)
6331 {
6332 iv_ca_delta_free (delta);
6333 return infinite_cost;
6334 }
6335
6336 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
6337 }
6338
6339 iv_ca_delta_commit (data, ivs, *delta, true);
6340 cost = iv_ca_cost (ivs);
6341 iv_ca_delta_commit (data, ivs, *delta, false);
6342
6343 return cost;
6344 }
6345
6346 /* Try optimizing the set of candidates IVS by removing candidates different
6347 from to EXCEPT_CAND from it. Return cost of the new set, and store
6348 differences in DELTA. */
6349
6350 static comp_cost
6351 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
6352 struct iv_cand *except_cand, struct iv_ca_delta **delta)
6353 {
6354 bitmap_iterator bi;
6355 struct iv_ca_delta *act_delta, *best_delta;
6356 unsigned i;
6357 comp_cost best_cost, acost;
6358 struct iv_cand *cand;
6359
6360 best_delta = NULL;
6361 best_cost = iv_ca_cost (ivs);
6362
6363 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6364 {
6365 cand = iv_cand (data, i);
6366
6367 if (cand == except_cand)
6368 continue;
6369
6370 acost = iv_ca_narrow (data, ivs, cand, except_cand, &act_delta);
6371
6372 if (compare_costs (acost, best_cost) < 0)
6373 {
6374 best_cost = acost;
6375 iv_ca_delta_free (&best_delta);
6376 best_delta = act_delta;
6377 }
6378 else
6379 iv_ca_delta_free (&act_delta);
6380 }
6381
6382 if (!best_delta)
6383 {
6384 *delta = NULL;
6385 return best_cost;
6386 }
6387
6388 /* Recurse to possibly remove other unnecessary ivs. */
6389 iv_ca_delta_commit (data, ivs, best_delta, true);
6390 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
6391 iv_ca_delta_commit (data, ivs, best_delta, false);
6392 *delta = iv_ca_delta_join (best_delta, *delta);
6393 return best_cost;
6394 }
6395
6396 /* Check if CAND_IDX is a candidate other than OLD_CAND and has
6397 cheaper local cost for USE than BEST_CP. Return pointer to
6398 the corresponding cost_pair, otherwise just return BEST_CP. */
6399
6400 static struct cost_pair*
6401 cheaper_cost_with_cand (struct ivopts_data *data, struct iv_use *use,
6402 unsigned int cand_idx, struct iv_cand *old_cand,
6403 struct cost_pair *best_cp)
6404 {
6405 struct iv_cand *cand;
6406 struct cost_pair *cp;
6407
6408 gcc_assert (old_cand != NULL && best_cp != NULL);
6409 if (cand_idx == old_cand->id)
6410 return best_cp;
6411
6412 cand = iv_cand (data, cand_idx);
6413 cp = get_use_iv_cost (data, use, cand);
6414 if (cp != NULL && cheaper_cost_pair (cp, best_cp))
6415 return cp;
6416
6417 return best_cp;
6418 }
6419
6420 /* Try breaking local optimal fixed-point for IVS by replacing candidates
6421 which are used by more than one iv uses. For each of those candidates,
6422 this function tries to represent iv uses under that candidate using
6423 other ones with lower local cost, then tries to prune the new set.
6424 If the new set has lower cost, It returns the new cost after recording
6425 candidate replacement in list DELTA. */
6426
6427 static comp_cost
6428 iv_ca_replace (struct ivopts_data *data, struct iv_ca *ivs,
6429 struct iv_ca_delta **delta)
6430 {
6431 bitmap_iterator bi, bj;
6432 unsigned int i, j, k;
6433 struct iv_use *use;
6434 struct iv_cand *cand;
6435 comp_cost orig_cost, acost;
6436 struct iv_ca_delta *act_delta, *tmp_delta;
6437 struct cost_pair *old_cp, *best_cp = NULL;
6438
6439 *delta = NULL;
6440 orig_cost = iv_ca_cost (ivs);
6441
6442 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6443 {
6444 if (ivs->n_cand_uses[i] == 1
6445 || ivs->n_cand_uses[i] > ALWAYS_PRUNE_CAND_SET_BOUND)
6446 continue;
6447
6448 cand = iv_cand (data, i);
6449
6450 act_delta = NULL;
6451 /* Represent uses under current candidate using other ones with
6452 lower local cost. */
6453 for (j = 0; j < ivs->upto; j++)
6454 {
6455 use = iv_use (data, j);
6456 old_cp = iv_ca_cand_for_use (ivs, use);
6457
6458 if (old_cp->cand != cand)
6459 continue;
6460
6461 best_cp = old_cp;
6462 if (data->consider_all_candidates)
6463 for (k = 0; k < n_iv_cands (data); k++)
6464 best_cp = cheaper_cost_with_cand (data, use, k,
6465 old_cp->cand, best_cp);
6466 else
6467 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, k, bj)
6468 best_cp = cheaper_cost_with_cand (data, use, k,
6469 old_cp->cand, best_cp);
6470
6471 if (best_cp == old_cp)
6472 continue;
6473
6474 act_delta = iv_ca_delta_add (use, old_cp, best_cp, act_delta);
6475 }
6476 /* No need for further prune. */
6477 if (!act_delta)
6478 continue;
6479
6480 /* Prune the new candidate set. */
6481 iv_ca_delta_commit (data, ivs, act_delta, true);
6482 acost = iv_ca_prune (data, ivs, NULL, &tmp_delta);
6483 iv_ca_delta_commit (data, ivs, act_delta, false);
6484 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6485
6486 if (compare_costs (acost, orig_cost) < 0)
6487 {
6488 *delta = act_delta;
6489 return acost;
6490 }
6491 else
6492 iv_ca_delta_free (&act_delta);
6493 }
6494
6495 return orig_cost;
6496 }
6497
6498 /* Tries to extend the sets IVS in the best possible way in order
6499 to express the USE. If ORIGINALP is true, prefer candidates from
6500 the original set of IVs, otherwise favor important candidates not
6501 based on any memory object. */
6502
6503 static bool
6504 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
6505 struct iv_use *use, bool originalp)
6506 {
6507 comp_cost best_cost, act_cost;
6508 unsigned i;
6509 bitmap_iterator bi;
6510 struct iv_cand *cand;
6511 struct iv_ca_delta *best_delta = NULL, *act_delta;
6512 struct cost_pair *cp;
6513
6514 iv_ca_add_use (data, ivs, use);
6515 best_cost = iv_ca_cost (ivs);
6516 cp = iv_ca_cand_for_use (ivs, use);
6517 if (cp)
6518 {
6519 best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
6520 iv_ca_set_no_cp (data, ivs, use);
6521 }
6522
6523 /* If ORIGINALP is true, try to find the original IV for the use. Otherwise
6524 first try important candidates not based on any memory object. Only if
6525 this fails, try the specific ones. Rationale -- in loops with many
6526 variables the best choice often is to use just one generic biv. If we
6527 added here many ivs specific to the uses, the optimization algorithm later
6528 would be likely to get stuck in a local minimum, thus causing us to create
6529 too many ivs. The approach from few ivs to more seems more likely to be
6530 successful -- starting from few ivs, replacing an expensive use by a
6531 specific iv should always be a win. */
6532 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
6533 {
6534 cand = iv_cand (data, i);
6535
6536 if (originalp && cand->pos !=IP_ORIGINAL)
6537 continue;
6538
6539 if (!originalp && cand->iv->base_object != NULL_TREE)
6540 continue;
6541
6542 if (iv_ca_cand_used_p (ivs, cand))
6543 continue;
6544
6545 cp = get_use_iv_cost (data, use, cand);
6546 if (!cp)
6547 continue;
6548
6549 iv_ca_set_cp (data, ivs, use, cp);
6550 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
6551 true);
6552 iv_ca_set_no_cp (data, ivs, use);
6553 act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
6554
6555 if (compare_costs (act_cost, best_cost) < 0)
6556 {
6557 best_cost = act_cost;
6558
6559 iv_ca_delta_free (&best_delta);
6560 best_delta = act_delta;
6561 }
6562 else
6563 iv_ca_delta_free (&act_delta);
6564 }
6565
6566 if (infinite_cost_p (best_cost))
6567 {
6568 for (i = 0; i < use->n_map_members; i++)
6569 {
6570 cp = use->cost_map + i;
6571 cand = cp->cand;
6572 if (!cand)
6573 continue;
6574
6575 /* Already tried this. */
6576 if (cand->important)
6577 {
6578 if (originalp && cand->pos == IP_ORIGINAL)
6579 continue;
6580 if (!originalp && cand->iv->base_object == NULL_TREE)
6581 continue;
6582 }
6583
6584 if (iv_ca_cand_used_p (ivs, cand))
6585 continue;
6586
6587 act_delta = NULL;
6588 iv_ca_set_cp (data, ivs, use, cp);
6589 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
6590 iv_ca_set_no_cp (data, ivs, use);
6591 act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
6592 cp, act_delta);
6593
6594 if (compare_costs (act_cost, best_cost) < 0)
6595 {
6596 best_cost = act_cost;
6597
6598 if (best_delta)
6599 iv_ca_delta_free (&best_delta);
6600 best_delta = act_delta;
6601 }
6602 else
6603 iv_ca_delta_free (&act_delta);
6604 }
6605 }
6606
6607 iv_ca_delta_commit (data, ivs, best_delta, true);
6608 iv_ca_delta_free (&best_delta);
6609
6610 return !infinite_cost_p (best_cost);
6611 }
6612
6613 /* Finds an initial assignment of candidates to uses. */
6614
6615 static struct iv_ca *
6616 get_initial_solution (struct ivopts_data *data, bool originalp)
6617 {
6618 struct iv_ca *ivs = iv_ca_new (data);
6619 unsigned i;
6620
6621 for (i = 0; i < n_iv_uses (data); i++)
6622 if (!try_add_cand_for (data, ivs, iv_use (data, i), originalp))
6623 {
6624 iv_ca_free (&ivs);
6625 return NULL;
6626 }
6627
6628 return ivs;
6629 }
6630
6631 /* Tries to improve set of induction variables IVS. TRY_REPLACE_P
6632 points to a bool variable, this function tries to break local
6633 optimal fixed-point by replacing candidates in IVS if it's true. */
6634
6635 static bool
6636 try_improve_iv_set (struct ivopts_data *data,
6637 struct iv_ca *ivs, bool *try_replace_p)
6638 {
6639 unsigned i, n_ivs;
6640 comp_cost acost, best_cost = iv_ca_cost (ivs);
6641 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
6642 struct iv_cand *cand;
6643
6644 /* Try extending the set of induction variables by one. */
6645 for (i = 0; i < n_iv_cands (data); i++)
6646 {
6647 cand = iv_cand (data, i);
6648
6649 if (iv_ca_cand_used_p (ivs, cand))
6650 continue;
6651
6652 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
6653 if (!act_delta)
6654 continue;
6655
6656 /* If we successfully added the candidate and the set is small enough,
6657 try optimizing it by removing other candidates. */
6658 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
6659 {
6660 iv_ca_delta_commit (data, ivs, act_delta, true);
6661 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
6662 iv_ca_delta_commit (data, ivs, act_delta, false);
6663 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6664 }
6665
6666 if (compare_costs (acost, best_cost) < 0)
6667 {
6668 best_cost = acost;
6669 iv_ca_delta_free (&best_delta);
6670 best_delta = act_delta;
6671 }
6672 else
6673 iv_ca_delta_free (&act_delta);
6674 }
6675
6676 if (!best_delta)
6677 {
6678 /* Try removing the candidates from the set instead. */
6679 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
6680
6681 if (!best_delta && *try_replace_p)
6682 {
6683 *try_replace_p = false;
6684 /* So far candidate selecting algorithm tends to choose fewer IVs
6685 so that it can handle cases in which loops have many variables
6686 but the best choice is often to use only one general biv. One
6687 weakness is it can't handle opposite cases, in which different
6688 candidates should be chosen with respect to each use. To solve
6689 the problem, we replace candidates in a manner described by the
6690 comments of iv_ca_replace, thus give general algorithm a chance
6691 to break local optimal fixed-point in these cases. */
6692 best_cost = iv_ca_replace (data, ivs, &best_delta);
6693 }
6694
6695 if (!best_delta)
6696 return false;
6697 }
6698
6699 iv_ca_delta_commit (data, ivs, best_delta, true);
6700 gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
6701 iv_ca_delta_free (&best_delta);
6702 return true;
6703 }
6704
6705 /* Attempts to find the optimal set of induction variables. We do simple
6706 greedy heuristic -- we try to replace at most one candidate in the selected
6707 solution and remove the unused ivs while this improves the cost. */
6708
6709 static struct iv_ca *
6710 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
6711 {
6712 struct iv_ca *set;
6713 bool try_replace_p = true;
6714
6715 /* Get the initial solution. */
6716 set = get_initial_solution (data, originalp);
6717 if (!set)
6718 {
6719 if (dump_file && (dump_flags & TDF_DETAILS))
6720 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
6721 return NULL;
6722 }
6723
6724 if (dump_file && (dump_flags & TDF_DETAILS))
6725 {
6726 fprintf (dump_file, "Initial set of candidates:\n");
6727 iv_ca_dump (data, dump_file, set);
6728 }
6729
6730 while (try_improve_iv_set (data, set, &try_replace_p))
6731 {
6732 if (dump_file && (dump_flags & TDF_DETAILS))
6733 {
6734 fprintf (dump_file, "Improved to:\n");
6735 iv_ca_dump (data, dump_file, set);
6736 }
6737 }
6738
6739 return set;
6740 }
6741
6742 static struct iv_ca *
6743 find_optimal_iv_set (struct ivopts_data *data)
6744 {
6745 unsigned i;
6746 struct iv_ca *set, *origset;
6747 struct iv_use *use;
6748 comp_cost cost, origcost;
6749
6750 /* Determine the cost based on a strategy that starts with original IVs,
6751 and try again using a strategy that prefers candidates not based
6752 on any IVs. */
6753 origset = find_optimal_iv_set_1 (data, true);
6754 set = find_optimal_iv_set_1 (data, false);
6755
6756 if (!origset && !set)
6757 return NULL;
6758
6759 origcost = origset ? iv_ca_cost (origset) : infinite_cost;
6760 cost = set ? iv_ca_cost (set) : infinite_cost;
6761
6762 if (dump_file && (dump_flags & TDF_DETAILS))
6763 {
6764 fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
6765 origcost.cost, origcost.complexity);
6766 fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
6767 cost.cost, cost.complexity);
6768 }
6769
6770 /* Choose the one with the best cost. */
6771 if (compare_costs (origcost, cost) <= 0)
6772 {
6773 if (set)
6774 iv_ca_free (&set);
6775 set = origset;
6776 }
6777 else if (origset)
6778 iv_ca_free (&origset);
6779
6780 for (i = 0; i < n_iv_uses (data); i++)
6781 {
6782 use = iv_use (data, i);
6783 use->selected = iv_ca_cand_for_use (set, use)->cand;
6784 }
6785
6786 return set;
6787 }
6788
6789 /* Creates a new induction variable corresponding to CAND. */
6790
6791 static void
6792 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
6793 {
6794 gimple_stmt_iterator incr_pos;
6795 tree base;
6796 bool after = false;
6797
6798 if (!cand->iv)
6799 return;
6800
6801 switch (cand->pos)
6802 {
6803 case IP_NORMAL:
6804 incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
6805 break;
6806
6807 case IP_END:
6808 incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
6809 after = true;
6810 break;
6811
6812 case IP_AFTER_USE:
6813 after = true;
6814 /* fall through */
6815 case IP_BEFORE_USE:
6816 incr_pos = gsi_for_stmt (cand->incremented_at);
6817 break;
6818
6819 case IP_ORIGINAL:
6820 /* Mark that the iv is preserved. */
6821 name_info (data, cand->var_before)->preserve_biv = true;
6822 name_info (data, cand->var_after)->preserve_biv = true;
6823
6824 /* Rewrite the increment so that it uses var_before directly. */
6825 find_interesting_uses_op (data, cand->var_after)->selected = cand;
6826 return;
6827 }
6828
6829 gimple_add_tmp_var (cand->var_before);
6830
6831 base = unshare_expr (cand->iv->base);
6832
6833 create_iv (base, unshare_expr (cand->iv->step),
6834 cand->var_before, data->current_loop,
6835 &incr_pos, after, &cand->var_before, &cand->var_after);
6836 }
6837
6838 /* Creates new induction variables described in SET. */
6839
6840 static void
6841 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
6842 {
6843 unsigned i;
6844 struct iv_cand *cand;
6845 bitmap_iterator bi;
6846
6847 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6848 {
6849 cand = iv_cand (data, i);
6850 create_new_iv (data, cand);
6851 }
6852
6853 if (dump_file && (dump_flags & TDF_DETAILS))
6854 {
6855 fprintf (dump_file, "Selected IV set for loop %d",
6856 data->current_loop->num);
6857 if (data->loop_loc != UNKNOWN_LOCATION)
6858 fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
6859 LOCATION_LINE (data->loop_loc));
6860 fprintf (dump_file, ", %lu IVs:\n", bitmap_count_bits (set->cands));
6861 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6862 {
6863 cand = iv_cand (data, i);
6864 dump_cand (dump_file, cand);
6865 }
6866 fprintf (dump_file, "\n");
6867 }
6868 }
6869
6870 /* Rewrites USE (definition of iv used in a nonlinear expression)
6871 using candidate CAND. */
6872
6873 static void
6874 rewrite_use_nonlinear_expr (struct ivopts_data *data,
6875 struct iv_use *use, struct iv_cand *cand)
6876 {
6877 tree comp;
6878 tree op, tgt;
6879 gassign *ass;
6880 gimple_stmt_iterator bsi;
6881
6882 /* An important special case -- if we are asked to express value of
6883 the original iv by itself, just exit; there is no need to
6884 introduce a new computation (that might also need casting the
6885 variable to unsigned and back). */
6886 if (cand->pos == IP_ORIGINAL
6887 && cand->incremented_at == use->stmt)
6888 {
6889 enum tree_code stmt_code;
6890
6891 gcc_assert (is_gimple_assign (use->stmt));
6892 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
6893
6894 /* Check whether we may leave the computation unchanged.
6895 This is the case only if it does not rely on other
6896 computations in the loop -- otherwise, the computation
6897 we rely upon may be removed in remove_unused_ivs,
6898 thus leading to ICE. */
6899 stmt_code = gimple_assign_rhs_code (use->stmt);
6900 if (stmt_code == PLUS_EXPR
6901 || stmt_code == MINUS_EXPR
6902 || stmt_code == POINTER_PLUS_EXPR)
6903 {
6904 if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
6905 op = gimple_assign_rhs2 (use->stmt);
6906 else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
6907 op = gimple_assign_rhs1 (use->stmt);
6908 else
6909 op = NULL_TREE;
6910 }
6911 else
6912 op = NULL_TREE;
6913
6914 if (op && expr_invariant_in_loop_p (data->current_loop, op))
6915 return;
6916 }
6917
6918 comp = get_computation (data->current_loop, use, cand);
6919 gcc_assert (comp != NULL_TREE);
6920
6921 switch (gimple_code (use->stmt))
6922 {
6923 case GIMPLE_PHI:
6924 tgt = PHI_RESULT (use->stmt);
6925
6926 /* If we should keep the biv, do not replace it. */
6927 if (name_info (data, tgt)->preserve_biv)
6928 return;
6929
6930 bsi = gsi_after_labels (gimple_bb (use->stmt));
6931 break;
6932
6933 case GIMPLE_ASSIGN:
6934 tgt = gimple_assign_lhs (use->stmt);
6935 bsi = gsi_for_stmt (use->stmt);
6936 break;
6937
6938 default:
6939 gcc_unreachable ();
6940 }
6941
6942 if (!valid_gimple_rhs_p (comp)
6943 || (gimple_code (use->stmt) != GIMPLE_PHI
6944 /* We can't allow re-allocating the stmt as it might be pointed
6945 to still. */
6946 && (get_gimple_rhs_num_ops (TREE_CODE (comp))
6947 >= gimple_num_ops (gsi_stmt (bsi)))))
6948 {
6949 comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE,
6950 true, GSI_SAME_STMT);
6951 if (POINTER_TYPE_P (TREE_TYPE (tgt)))
6952 {
6953 duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
6954 /* As this isn't a plain copy we have to reset alignment
6955 information. */
6956 if (SSA_NAME_PTR_INFO (comp))
6957 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
6958 }
6959 }
6960
6961 if (gimple_code (use->stmt) == GIMPLE_PHI)
6962 {
6963 ass = gimple_build_assign (tgt, comp);
6964 gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
6965
6966 bsi = gsi_for_stmt (use->stmt);
6967 remove_phi_node (&bsi, false);
6968 }
6969 else
6970 {
6971 gimple_assign_set_rhs_from_tree (&bsi, comp);
6972 use->stmt = gsi_stmt (bsi);
6973 }
6974 }
6975
6976 /* Performs a peephole optimization to reorder the iv update statement with
6977 a mem ref to enable instruction combining in later phases. The mem ref uses
6978 the iv value before the update, so the reordering transformation requires
6979 adjustment of the offset. CAND is the selected IV_CAND.
6980
6981 Example:
6982
6983 t = MEM_REF (base, iv1, 8, 16); // base, index, stride, offset
6984 iv2 = iv1 + 1;
6985
6986 if (t < val) (1)
6987 goto L;
6988 goto Head;
6989
6990
6991 directly propagating t over to (1) will introduce overlapping live range
6992 thus increase register pressure. This peephole transform it into:
6993
6994
6995 iv2 = iv1 + 1;
6996 t = MEM_REF (base, iv2, 8, 8);
6997 if (t < val)
6998 goto L;
6999 goto Head;
7000 */
7001
7002 static void
7003 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
7004 {
7005 tree var_after;
7006 gimple *iv_update, *stmt;
7007 basic_block bb;
7008 gimple_stmt_iterator gsi, gsi_iv;
7009
7010 if (cand->pos != IP_NORMAL)
7011 return;
7012
7013 var_after = cand->var_after;
7014 iv_update = SSA_NAME_DEF_STMT (var_after);
7015
7016 bb = gimple_bb (iv_update);
7017 gsi = gsi_last_nondebug_bb (bb);
7018 stmt = gsi_stmt (gsi);
7019
7020 /* Only handle conditional statement for now. */
7021 if (gimple_code (stmt) != GIMPLE_COND)
7022 return;
7023
7024 gsi_prev_nondebug (&gsi);
7025 stmt = gsi_stmt (gsi);
7026 if (stmt != iv_update)
7027 return;
7028
7029 gsi_prev_nondebug (&gsi);
7030 if (gsi_end_p (gsi))
7031 return;
7032
7033 stmt = gsi_stmt (gsi);
7034 if (gimple_code (stmt) != GIMPLE_ASSIGN)
7035 return;
7036
7037 if (stmt != use->stmt)
7038 return;
7039
7040 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
7041 return;
7042
7043 if (dump_file && (dump_flags & TDF_DETAILS))
7044 {
7045 fprintf (dump_file, "Reordering \n");
7046 print_gimple_stmt (dump_file, iv_update, 0, 0);
7047 print_gimple_stmt (dump_file, use->stmt, 0, 0);
7048 fprintf (dump_file, "\n");
7049 }
7050
7051 gsi = gsi_for_stmt (use->stmt);
7052 gsi_iv = gsi_for_stmt (iv_update);
7053 gsi_move_before (&gsi_iv, &gsi);
7054
7055 cand->pos = IP_BEFORE_USE;
7056 cand->incremented_at = use->stmt;
7057 }
7058
7059 /* Rewrites USE (address that is an iv) using candidate CAND. */
7060
7061 static void
7062 rewrite_use_address_1 (struct ivopts_data *data,
7063 struct iv_use *use, struct iv_cand *cand)
7064 {
7065 aff_tree aff;
7066 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7067 tree base_hint = NULL_TREE;
7068 tree ref, iv;
7069 bool ok;
7070
7071 adjust_iv_update_pos (cand, use);
7072 ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
7073 gcc_assert (ok);
7074 unshare_aff_combination (&aff);
7075
7076 /* To avoid undefined overflow problems, all IV candidates use unsigned
7077 integer types. The drawback is that this makes it impossible for
7078 create_mem_ref to distinguish an IV that is based on a memory object
7079 from one that represents simply an offset.
7080
7081 To work around this problem, we pass a hint to create_mem_ref that
7082 indicates which variable (if any) in aff is an IV based on a memory
7083 object. Note that we only consider the candidate. If this is not
7084 based on an object, the base of the reference is in some subexpression
7085 of the use -- but these will use pointer types, so they are recognized
7086 by the create_mem_ref heuristics anyway. */
7087 if (cand->iv->base_object)
7088 base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
7089
7090 iv = var_at_stmt (data->current_loop, cand, use->stmt);
7091 ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff,
7092 reference_alias_ptr_type (*use->op_p),
7093 iv, base_hint, data->speed);
7094 copy_ref_info (ref, *use->op_p);
7095 *use->op_p = ref;
7096 }
7097
7098 /* Rewrites USE (address that is an iv) using candidate CAND. If it's the
7099 first use of a group, rewrites sub uses in the group too. */
7100
7101 static void
7102 rewrite_use_address (struct ivopts_data *data,
7103 struct iv_use *use, struct iv_cand *cand)
7104 {
7105 struct iv_use *next;
7106
7107 gcc_assert (use->sub_id == 0);
7108 rewrite_use_address_1 (data, use, cand);
7109 update_stmt (use->stmt);
7110
7111 for (next = use->next; next != NULL; next = next->next)
7112 {
7113 rewrite_use_address_1 (data, next, cand);
7114 update_stmt (next->stmt);
7115 }
7116
7117 return;
7118 }
7119
7120 /* Rewrites USE (the condition such that one of the arguments is an iv) using
7121 candidate CAND. */
7122
7123 static void
7124 rewrite_use_compare (struct ivopts_data *data,
7125 struct iv_use *use, struct iv_cand *cand)
7126 {
7127 tree comp, *var_p, op, bound;
7128 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
7129 enum tree_code compare;
7130 struct cost_pair *cp = get_use_iv_cost (data, use, cand);
7131 bool ok;
7132
7133 bound = cp->value;
7134 if (bound)
7135 {
7136 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
7137 tree var_type = TREE_TYPE (var);
7138 gimple_seq stmts;
7139
7140 if (dump_file && (dump_flags & TDF_DETAILS))
7141 {
7142 fprintf (dump_file, "Replacing exit test: ");
7143 print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
7144 }
7145 compare = cp->comp;
7146 bound = unshare_expr (fold_convert (var_type, bound));
7147 op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
7148 if (stmts)
7149 gsi_insert_seq_on_edge_immediate (
7150 loop_preheader_edge (data->current_loop),
7151 stmts);
7152
7153 gcond *cond_stmt = as_a <gcond *> (use->stmt);
7154 gimple_cond_set_lhs (cond_stmt, var);
7155 gimple_cond_set_code (cond_stmt, compare);
7156 gimple_cond_set_rhs (cond_stmt, op);
7157 return;
7158 }
7159
7160 /* The induction variable elimination failed; just express the original
7161 giv. */
7162 comp = get_computation (data->current_loop, use, cand);
7163 gcc_assert (comp != NULL_TREE);
7164
7165 ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
7166 gcc_assert (ok);
7167
7168 *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
7169 true, GSI_SAME_STMT);
7170 }
7171
7172 /* Rewrites USE using candidate CAND. */
7173
7174 static void
7175 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
7176 {
7177 switch (use->type)
7178 {
7179 case USE_NONLINEAR_EXPR:
7180 rewrite_use_nonlinear_expr (data, use, cand);
7181 break;
7182
7183 case USE_ADDRESS:
7184 rewrite_use_address (data, use, cand);
7185 break;
7186
7187 case USE_COMPARE:
7188 rewrite_use_compare (data, use, cand);
7189 break;
7190
7191 default:
7192 gcc_unreachable ();
7193 }
7194
7195 update_stmt (use->stmt);
7196 }
7197
7198 /* Rewrite the uses using the selected induction variables. */
7199
7200 static void
7201 rewrite_uses (struct ivopts_data *data)
7202 {
7203 unsigned i;
7204 struct iv_cand *cand;
7205 struct iv_use *use;
7206
7207 for (i = 0; i < n_iv_uses (data); i++)
7208 {
7209 use = iv_use (data, i);
7210 cand = use->selected;
7211 gcc_assert (cand);
7212
7213 rewrite_use (data, use, cand);
7214 }
7215 }
7216
7217 /* Removes the ivs that are not used after rewriting. */
7218
7219 static void
7220 remove_unused_ivs (struct ivopts_data *data)
7221 {
7222 unsigned j;
7223 bitmap_iterator bi;
7224 bitmap toremove = BITMAP_ALLOC (NULL);
7225
7226 /* Figure out an order in which to release SSA DEFs so that we don't
7227 release something that we'd have to propagate into a debug stmt
7228 afterwards. */
7229 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
7230 {
7231 struct version_info *info;
7232
7233 info = ver_info (data, j);
7234 if (info->iv
7235 && !integer_zerop (info->iv->step)
7236 && !info->inv_id
7237 && !info->iv->have_use_for
7238 && !info->preserve_biv)
7239 {
7240 bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
7241
7242 tree def = info->iv->ssa_name;
7243
7244 if (MAY_HAVE_DEBUG_STMTS && SSA_NAME_DEF_STMT (def))
7245 {
7246 imm_use_iterator imm_iter;
7247 use_operand_p use_p;
7248 gimple *stmt;
7249 int count = 0;
7250
7251 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7252 {
7253 if (!gimple_debug_bind_p (stmt))
7254 continue;
7255
7256 /* We just want to determine whether to do nothing
7257 (count == 0), to substitute the computed
7258 expression into a single use of the SSA DEF by
7259 itself (count == 1), or to use a debug temp
7260 because the SSA DEF is used multiple times or as
7261 part of a larger expression (count > 1). */
7262 count++;
7263 if (gimple_debug_bind_get_value (stmt) != def)
7264 count++;
7265
7266 if (count > 1)
7267 BREAK_FROM_IMM_USE_STMT (imm_iter);
7268 }
7269
7270 if (!count)
7271 continue;
7272
7273 struct iv_use dummy_use;
7274 struct iv_cand *best_cand = NULL, *cand;
7275 unsigned i, best_pref = 0, cand_pref;
7276
7277 memset (&dummy_use, 0, sizeof (dummy_use));
7278 dummy_use.iv = info->iv;
7279 for (i = 0; i < n_iv_uses (data) && i < 64; i++)
7280 {
7281 cand = iv_use (data, i)->selected;
7282 if (cand == best_cand)
7283 continue;
7284 cand_pref = operand_equal_p (cand->iv->step,
7285 info->iv->step, 0)
7286 ? 4 : 0;
7287 cand_pref
7288 += TYPE_MODE (TREE_TYPE (cand->iv->base))
7289 == TYPE_MODE (TREE_TYPE (info->iv->base))
7290 ? 2 : 0;
7291 cand_pref
7292 += TREE_CODE (cand->iv->base) == INTEGER_CST
7293 ? 1 : 0;
7294 if (best_cand == NULL || best_pref < cand_pref)
7295 {
7296 best_cand = cand;
7297 best_pref = cand_pref;
7298 }
7299 }
7300
7301 if (!best_cand)
7302 continue;
7303
7304 tree comp = get_computation_at (data->current_loop,
7305 &dummy_use, best_cand,
7306 SSA_NAME_DEF_STMT (def));
7307 if (!comp)
7308 continue;
7309
7310 if (count > 1)
7311 {
7312 tree vexpr = make_node (DEBUG_EXPR_DECL);
7313 DECL_ARTIFICIAL (vexpr) = 1;
7314 TREE_TYPE (vexpr) = TREE_TYPE (comp);
7315 if (SSA_NAME_VAR (def))
7316 DECL_MODE (vexpr) = DECL_MODE (SSA_NAME_VAR (def));
7317 else
7318 DECL_MODE (vexpr) = TYPE_MODE (TREE_TYPE (vexpr));
7319 gdebug *def_temp
7320 = gimple_build_debug_bind (vexpr, comp, NULL);
7321 gimple_stmt_iterator gsi;
7322
7323 if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
7324 gsi = gsi_after_labels (gimple_bb
7325 (SSA_NAME_DEF_STMT (def)));
7326 else
7327 gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
7328
7329 gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
7330 comp = vexpr;
7331 }
7332
7333 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7334 {
7335 if (!gimple_debug_bind_p (stmt))
7336 continue;
7337
7338 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
7339 SET_USE (use_p, comp);
7340
7341 update_stmt (stmt);
7342 }
7343 }
7344 }
7345 }
7346
7347 release_defs_bitset (toremove);
7348
7349 BITMAP_FREE (toremove);
7350 }
7351
7352 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
7353 for hash_map::traverse. */
7354
7355 bool
7356 free_tree_niter_desc (edge const &, tree_niter_desc *const &value, void *)
7357 {
7358 free (value);
7359 return true;
7360 }
7361
7362 /* Frees data allocated by the optimization of a single loop. */
7363
7364 static void
7365 free_loop_data (struct ivopts_data *data)
7366 {
7367 unsigned i, j;
7368 bitmap_iterator bi;
7369 tree obj;
7370
7371 if (data->niters)
7372 {
7373 data->niters->traverse<void *, free_tree_niter_desc> (NULL);
7374 delete data->niters;
7375 data->niters = NULL;
7376 }
7377
7378 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
7379 {
7380 struct version_info *info;
7381
7382 info = ver_info (data, i);
7383 info->iv = NULL;
7384 info->has_nonlin_use = false;
7385 info->preserve_biv = false;
7386 info->inv_id = 0;
7387 }
7388 bitmap_clear (data->relevant);
7389 bitmap_clear (data->important_candidates);
7390
7391 for (i = 0; i < n_iv_uses (data); i++)
7392 {
7393 struct iv_use *use = iv_use (data, i);
7394 struct iv_use *pre = use, *sub = use->next;
7395
7396 while (sub)
7397 {
7398 gcc_assert (sub->related_cands == NULL);
7399 gcc_assert (sub->n_map_members == 0 && sub->cost_map == NULL);
7400
7401 pre = sub;
7402 sub = sub->next;
7403 free (pre);
7404 }
7405
7406 BITMAP_FREE (use->related_cands);
7407 for (j = 0; j < use->n_map_members; j++)
7408 if (use->cost_map[j].depends_on)
7409 BITMAP_FREE (use->cost_map[j].depends_on);
7410 free (use->cost_map);
7411 free (use);
7412 }
7413 data->iv_uses.truncate (0);
7414
7415 for (i = 0; i < n_iv_cands (data); i++)
7416 {
7417 struct iv_cand *cand = iv_cand (data, i);
7418
7419 if (cand->depends_on)
7420 BITMAP_FREE (cand->depends_on);
7421 free (cand);
7422 }
7423 data->iv_candidates.truncate (0);
7424
7425 if (data->version_info_size < num_ssa_names)
7426 {
7427 data->version_info_size = 2 * num_ssa_names;
7428 free (data->version_info);
7429 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
7430 }
7431
7432 data->max_inv_id = 0;
7433
7434 FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
7435 SET_DECL_RTL (obj, NULL_RTX);
7436
7437 decl_rtl_to_reset.truncate (0);
7438
7439 data->inv_expr_tab->empty ();
7440 data->inv_expr_id = 0;
7441 }
7442
7443 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
7444 loop tree. */
7445
7446 static void
7447 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
7448 {
7449 free_loop_data (data);
7450 free (data->version_info);
7451 BITMAP_FREE (data->relevant);
7452 BITMAP_FREE (data->important_candidates);
7453
7454 decl_rtl_to_reset.release ();
7455 data->iv_uses.release ();
7456 data->iv_candidates.release ();
7457 delete data->inv_expr_tab;
7458 data->inv_expr_tab = NULL;
7459 free_affine_expand_cache (&data->name_expansion_cache);
7460 obstack_free (&data->iv_obstack, NULL);
7461 }
7462
7463 /* Returns true if the loop body BODY includes any function calls. */
7464
7465 static bool
7466 loop_body_includes_call (basic_block *body, unsigned num_nodes)
7467 {
7468 gimple_stmt_iterator gsi;
7469 unsigned i;
7470
7471 for (i = 0; i < num_nodes; i++)
7472 for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
7473 {
7474 gimple *stmt = gsi_stmt (gsi);
7475 if (is_gimple_call (stmt)
7476 && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
7477 return true;
7478 }
7479 return false;
7480 }
7481
7482 /* Optimizes the LOOP. Returns true if anything changed. */
7483
7484 static bool
7485 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
7486 {
7487 bool changed = false;
7488 struct iv_ca *iv_ca;
7489 edge exit = single_dom_exit (loop);
7490 basic_block *body;
7491
7492 gcc_assert (!data->niters);
7493 data->current_loop = loop;
7494 data->loop_loc = find_loop_location (loop);
7495 data->speed = optimize_loop_for_speed_p (loop);
7496
7497 if (dump_file && (dump_flags & TDF_DETAILS))
7498 {
7499 fprintf (dump_file, "Processing loop %d", loop->num);
7500 if (data->loop_loc != UNKNOWN_LOCATION)
7501 fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
7502 LOCATION_LINE (data->loop_loc));
7503 fprintf (dump_file, "\n");
7504
7505 if (exit)
7506 {
7507 fprintf (dump_file, " single exit %d -> %d, exit condition ",
7508 exit->src->index, exit->dest->index);
7509 print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
7510 fprintf (dump_file, "\n");
7511 }
7512
7513 fprintf (dump_file, "\n");
7514 }
7515
7516 body = get_loop_body (loop);
7517 data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
7518 renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
7519 free (body);
7520
7521 data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
7522
7523 /* For each ssa name determines whether it behaves as an induction variable
7524 in some loop. */
7525 if (!find_induction_variables (data))
7526 goto finish;
7527
7528 /* Finds interesting uses (item 1). */
7529 find_interesting_uses (data);
7530 group_address_uses (data);
7531 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
7532 goto finish;
7533
7534 /* Finds candidates for the induction variables (item 2). */
7535 find_iv_candidates (data);
7536
7537 /* Calculates the costs (item 3, part 1). */
7538 determine_iv_costs (data);
7539 determine_use_iv_costs (data);
7540 determine_set_costs (data);
7541
7542 /* Find the optimal set of induction variables (item 3, part 2). */
7543 iv_ca = find_optimal_iv_set (data);
7544 if (!iv_ca)
7545 goto finish;
7546 changed = true;
7547
7548 /* Create the new induction variables (item 4, part 1). */
7549 create_new_ivs (data, iv_ca);
7550 iv_ca_free (&iv_ca);
7551
7552 /* Rewrite the uses (item 4, part 2). */
7553 rewrite_uses (data);
7554
7555 /* Remove the ivs that are unused after rewriting. */
7556 remove_unused_ivs (data);
7557
7558 /* We have changed the structure of induction variables; it might happen
7559 that definitions in the scev database refer to some of them that were
7560 eliminated. */
7561 scev_reset ();
7562
7563 finish:
7564 free_loop_data (data);
7565
7566 return changed;
7567 }
7568
7569 /* Main entry point. Optimizes induction variables in loops. */
7570
7571 void
7572 tree_ssa_iv_optimize (void)
7573 {
7574 struct loop *loop;
7575 struct ivopts_data data;
7576
7577 tree_ssa_iv_optimize_init (&data);
7578
7579 /* Optimize the loops starting with the innermost ones. */
7580 FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
7581 {
7582 if (dump_file && (dump_flags & TDF_DETAILS))
7583 flow_loop_dump (loop, dump_file, NULL, 1);
7584
7585 tree_ssa_iv_optimize_loop (&data, loop);
7586 }
7587
7588 tree_ssa_iv_optimize_finalize (&data);
7589 }