]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/tree-ssa-loop-ivopts.c
genattrtab.c (write_header): Include hash-set.h...
[thirdparty/gcc.git] / gcc / tree-ssa-loop-ivopts.c
1 /* Induction variable optimizations.
2 Copyright (C) 2003-2015 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
10
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
19
20 /* This pass tries to find the optimal set of induction variables for the loop.
21 It optimizes just the basic linear induction variables (although adding
22 support for other types should not be too hard). It includes the
23 optimizations commonly known as strength reduction, induction variable
24 coalescing and induction variable elimination. It does it in the
25 following steps:
26
27 1) The interesting uses of induction variables are found. This includes
28
29 -- uses of induction variables in non-linear expressions
30 -- addresses of arrays
31 -- comparisons of induction variables
32
33 2) Candidates for the induction variables are found. This includes
34
35 -- old induction variables
36 -- the variables defined by expressions derived from the "interesting
37 uses" above
38
39 3) The optimal (w.r. to a cost function) set of variables is chosen. The
40 cost function assigns a cost to sets of induction variables and consists
41 of three parts:
42
43 -- The use costs. Each of the interesting uses chooses the best induction
44 variable in the set and adds its cost to the sum. The cost reflects
45 the time spent on modifying the induction variables value to be usable
46 for the given purpose (adding base and offset for arrays, etc.).
47 -- The variable costs. Each of the variables has a cost assigned that
48 reflects the costs associated with incrementing the value of the
49 variable. The original variables are somewhat preferred.
50 -- The set cost. Depending on the size of the set, extra cost may be
51 added to reflect register pressure.
52
53 All the costs are defined in a machine-specific way, using the target
54 hooks and machine descriptions to determine them.
55
56 4) The trees are transformed to use the new variables, the dead code is
57 removed.
58
59 All of this is done loop by loop. Doing it globally is theoretically
60 possible, it might give a better performance and it might enable us
61 to decide costs more precisely, but getting all the interactions right
62 would be complicated. */
63
64 #include "config.h"
65 #include "system.h"
66 #include "coretypes.h"
67 #include "tm.h"
68 #include "hash-set.h"
69 #include "machmode.h"
70 #include "vec.h"
71 #include "double-int.h"
72 #include "input.h"
73 #include "alias.h"
74 #include "symtab.h"
75 #include "wide-int.h"
76 #include "inchash.h"
77 #include "tree.h"
78 #include "fold-const.h"
79 #include "stor-layout.h"
80 #include "tm_p.h"
81 #include "predict.h"
82 #include "hard-reg-set.h"
83 #include "input.h"
84 #include "function.h"
85 #include "dominance.h"
86 #include "cfg.h"
87 #include "basic-block.h"
88 #include "gimple-pretty-print.h"
89 #include "hash-map.h"
90 #include "hash-table.h"
91 #include "tree-ssa-alias.h"
92 #include "internal-fn.h"
93 #include "tree-eh.h"
94 #include "gimple-expr.h"
95 #include "is-a.h"
96 #include "gimple.h"
97 #include "gimplify.h"
98 #include "gimple-iterator.h"
99 #include "gimplify-me.h"
100 #include "gimple-ssa.h"
101 #include "plugin-api.h"
102 #include "ipa-ref.h"
103 #include "cgraph.h"
104 #include "tree-cfg.h"
105 #include "tree-phinodes.h"
106 #include "ssa-iterators.h"
107 #include "stringpool.h"
108 #include "tree-ssanames.h"
109 #include "tree-ssa-loop-ivopts.h"
110 #include "tree-ssa-loop-manip.h"
111 #include "tree-ssa-loop-niter.h"
112 #include "tree-ssa-loop.h"
113 #include "expr.h"
114 #include "tree-dfa.h"
115 #include "tree-ssa.h"
116 #include "cfgloop.h"
117 #include "tree-pass.h"
118 #include "insn-config.h"
119 #include "tree-chrec.h"
120 #include "tree-scalar-evolution.h"
121 #include "cfgloop.h"
122 #include "params.h"
123 #include "langhooks.h"
124 #include "tree-affine.h"
125 #include "target.h"
126 #include "tree-inline.h"
127 #include "tree-ssa-propagate.h"
128 #include "expmed.h"
129 #include "tree-ssa-address.h"
130 #include "builtins.h"
131
132 /* FIXME: Expressions are expanded to RTL in this pass to determine the
133 cost of different addressing modes. This should be moved to a TBD
134 interface between the GIMPLE and RTL worlds. */
135 #include "expr.h"
136 #include "recog.h"
137
138 /* The infinite cost. */
139 #define INFTY 10000000
140
141 #define AVG_LOOP_NITER(LOOP) 5
142
143 /* Returns the expected number of loop iterations for LOOP.
144 The average trip count is computed from profile data if it
145 exists. */
146
147 static inline HOST_WIDE_INT
148 avg_loop_niter (struct loop *loop)
149 {
150 HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
151 if (niter == -1)
152 return AVG_LOOP_NITER (loop);
153
154 return niter;
155 }
156
157 /* Representation of the induction variable. */
158 struct iv
159 {
160 tree base; /* Initial value of the iv. */
161 tree base_object; /* A memory object to that the induction variable points. */
162 tree step; /* Step of the iv (constant only). */
163 tree ssa_name; /* The ssa name with the value. */
164 bool biv_p; /* Is it a biv? */
165 bool have_use_for; /* Do we already have a use for it? */
166 unsigned use_id; /* The identifier in the use if it is the case. */
167 };
168
169 /* Per-ssa version information (induction variable descriptions, etc.). */
170 struct version_info
171 {
172 tree name; /* The ssa name. */
173 struct iv *iv; /* Induction variable description. */
174 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
175 an expression that is not an induction variable. */
176 bool preserve_biv; /* For the original biv, whether to preserve it. */
177 unsigned inv_id; /* Id of an invariant. */
178 };
179
180 /* Types of uses. */
181 enum use_type
182 {
183 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
184 USE_ADDRESS, /* Use in an address. */
185 USE_COMPARE /* Use is a compare. */
186 };
187
188 /* Cost of a computation. */
189 typedef struct
190 {
191 int cost; /* The runtime cost. */
192 unsigned complexity; /* The estimate of the complexity of the code for
193 the computation (in no concrete units --
194 complexity field should be larger for more
195 complex expressions and addressing modes). */
196 } comp_cost;
197
198 static const comp_cost no_cost = {0, 0};
199 static const comp_cost infinite_cost = {INFTY, INFTY};
200
201 /* The candidate - cost pair. */
202 struct cost_pair
203 {
204 struct iv_cand *cand; /* The candidate. */
205 comp_cost cost; /* The cost. */
206 bitmap depends_on; /* The list of invariants that have to be
207 preserved. */
208 tree value; /* For final value elimination, the expression for
209 the final value of the iv. For iv elimination,
210 the new bound to compare with. */
211 enum tree_code comp; /* For iv elimination, the comparison. */
212 int inv_expr_id; /* Loop invariant expression id. */
213 };
214
215 /* Use. */
216 struct iv_use
217 {
218 unsigned id; /* The id of the use. */
219 enum use_type type; /* Type of the use. */
220 struct iv *iv; /* The induction variable it is based on. */
221 gimple stmt; /* Statement in that it occurs. */
222 tree *op_p; /* The place where it occurs. */
223 bitmap related_cands; /* The set of "related" iv candidates, plus the common
224 important ones. */
225
226 unsigned n_map_members; /* Number of candidates in the cost_map list. */
227 struct cost_pair *cost_map;
228 /* The costs wrto the iv candidates. */
229
230 struct iv_cand *selected;
231 /* The selected candidate. */
232 };
233
234 /* The position where the iv is computed. */
235 enum iv_position
236 {
237 IP_NORMAL, /* At the end, just before the exit condition. */
238 IP_END, /* At the end of the latch block. */
239 IP_BEFORE_USE, /* Immediately before a specific use. */
240 IP_AFTER_USE, /* Immediately after a specific use. */
241 IP_ORIGINAL /* The original biv. */
242 };
243
244 /* The induction variable candidate. */
245 struct iv_cand
246 {
247 unsigned id; /* The number of the candidate. */
248 bool important; /* Whether this is an "important" candidate, i.e. such
249 that it should be considered by all uses. */
250 ENUM_BITFIELD(iv_position) pos : 8; /* Where it is computed. */
251 gimple incremented_at;/* For original biv, the statement where it is
252 incremented. */
253 tree var_before; /* The variable used for it before increment. */
254 tree var_after; /* The variable used for it after increment. */
255 struct iv *iv; /* The value of the candidate. NULL for
256 "pseudocandidate" used to indicate the possibility
257 to replace the final value of an iv by direct
258 computation of the value. */
259 unsigned cost; /* Cost of the candidate. */
260 unsigned cost_step; /* Cost of the candidate's increment operation. */
261 struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
262 where it is incremented. */
263 bitmap depends_on; /* The list of invariants that are used in step of the
264 biv. */
265 };
266
267 /* Loop invariant expression hashtable entry. */
268 struct iv_inv_expr_ent
269 {
270 tree expr;
271 int id;
272 hashval_t hash;
273 };
274
275 /* The data used by the induction variable optimizations. */
276
277 typedef struct iv_use *iv_use_p;
278
279 typedef struct iv_cand *iv_cand_p;
280
281 /* Hashtable helpers. */
282
283 struct iv_inv_expr_hasher : typed_free_remove <iv_inv_expr_ent>
284 {
285 typedef iv_inv_expr_ent value_type;
286 typedef iv_inv_expr_ent compare_type;
287 static inline hashval_t hash (const value_type *);
288 static inline bool equal (const value_type *, const compare_type *);
289 };
290
291 /* Hash function for loop invariant expressions. */
292
293 inline hashval_t
294 iv_inv_expr_hasher::hash (const value_type *expr)
295 {
296 return expr->hash;
297 }
298
299 /* Hash table equality function for expressions. */
300
301 inline bool
302 iv_inv_expr_hasher::equal (const value_type *expr1, const compare_type *expr2)
303 {
304 return expr1->hash == expr2->hash
305 && operand_equal_p (expr1->expr, expr2->expr, 0);
306 }
307
308 struct ivopts_data
309 {
310 /* The currently optimized loop. */
311 struct loop *current_loop;
312
313 /* Numbers of iterations for all exits of the current loop. */
314 hash_map<edge, tree_niter_desc *> *niters;
315
316 /* Number of registers used in it. */
317 unsigned regs_used;
318
319 /* The size of version_info array allocated. */
320 unsigned version_info_size;
321
322 /* The array of information for the ssa names. */
323 struct version_info *version_info;
324
325 /* The hashtable of loop invariant expressions created
326 by ivopt. */
327 hash_table<iv_inv_expr_hasher> *inv_expr_tab;
328
329 /* Loop invariant expression id. */
330 int inv_expr_id;
331
332 /* The bitmap of indices in version_info whose value was changed. */
333 bitmap relevant;
334
335 /* The uses of induction variables. */
336 vec<iv_use_p> iv_uses;
337
338 /* The candidates. */
339 vec<iv_cand_p> iv_candidates;
340
341 /* A bitmap of important candidates. */
342 bitmap important_candidates;
343
344 /* Cache used by tree_to_aff_combination_expand. */
345 hash_map<tree, name_expansion *> *name_expansion_cache;
346
347 /* The maximum invariant id. */
348 unsigned max_inv_id;
349
350 /* Whether to consider just related and important candidates when replacing a
351 use. */
352 bool consider_all_candidates;
353
354 /* Are we optimizing for speed? */
355 bool speed;
356
357 /* Whether the loop body includes any function calls. */
358 bool body_includes_call;
359
360 /* Whether the loop body can only be exited via single exit. */
361 bool loop_single_exit_p;
362 };
363
364 /* An assignment of iv candidates to uses. */
365
366 struct iv_ca
367 {
368 /* The number of uses covered by the assignment. */
369 unsigned upto;
370
371 /* Number of uses that cannot be expressed by the candidates in the set. */
372 unsigned bad_uses;
373
374 /* Candidate assigned to a use, together with the related costs. */
375 struct cost_pair **cand_for_use;
376
377 /* Number of times each candidate is used. */
378 unsigned *n_cand_uses;
379
380 /* The candidates used. */
381 bitmap cands;
382
383 /* The number of candidates in the set. */
384 unsigned n_cands;
385
386 /* Total number of registers needed. */
387 unsigned n_regs;
388
389 /* Total cost of expressing uses. */
390 comp_cost cand_use_cost;
391
392 /* Total cost of candidates. */
393 unsigned cand_cost;
394
395 /* Number of times each invariant is used. */
396 unsigned *n_invariant_uses;
397
398 /* The array holding the number of uses of each loop
399 invariant expressions created by ivopt. */
400 unsigned *used_inv_expr;
401
402 /* The number of created loop invariants. */
403 unsigned num_used_inv_expr;
404
405 /* Total cost of the assignment. */
406 comp_cost cost;
407 };
408
409 /* Difference of two iv candidate assignments. */
410
411 struct iv_ca_delta
412 {
413 /* Changed use. */
414 struct iv_use *use;
415
416 /* An old assignment (for rollback purposes). */
417 struct cost_pair *old_cp;
418
419 /* A new assignment. */
420 struct cost_pair *new_cp;
421
422 /* Next change in the list. */
423 struct iv_ca_delta *next_change;
424 };
425
426 /* Bound on number of candidates below that all candidates are considered. */
427
428 #define CONSIDER_ALL_CANDIDATES_BOUND \
429 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
430
431 /* If there are more iv occurrences, we just give up (it is quite unlikely that
432 optimizing such a loop would help, and it would take ages). */
433
434 #define MAX_CONSIDERED_USES \
435 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
436
437 /* If there are at most this number of ivs in the set, try removing unnecessary
438 ivs from the set always. */
439
440 #define ALWAYS_PRUNE_CAND_SET_BOUND \
441 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
442
443 /* The list of trees for that the decl_rtl field must be reset is stored
444 here. */
445
446 static vec<tree> decl_rtl_to_reset;
447
448 static comp_cost force_expr_to_var_cost (tree, bool);
449
450 /* Number of uses recorded in DATA. */
451
452 static inline unsigned
453 n_iv_uses (struct ivopts_data *data)
454 {
455 return data->iv_uses.length ();
456 }
457
458 /* Ith use recorded in DATA. */
459
460 static inline struct iv_use *
461 iv_use (struct ivopts_data *data, unsigned i)
462 {
463 return data->iv_uses[i];
464 }
465
466 /* Number of candidates recorded in DATA. */
467
468 static inline unsigned
469 n_iv_cands (struct ivopts_data *data)
470 {
471 return data->iv_candidates.length ();
472 }
473
474 /* Ith candidate recorded in DATA. */
475
476 static inline struct iv_cand *
477 iv_cand (struct ivopts_data *data, unsigned i)
478 {
479 return data->iv_candidates[i];
480 }
481
482 /* The single loop exit if it dominates the latch, NULL otherwise. */
483
484 edge
485 single_dom_exit (struct loop *loop)
486 {
487 edge exit = single_exit (loop);
488
489 if (!exit)
490 return NULL;
491
492 if (!just_once_each_iteration_p (loop, exit->src))
493 return NULL;
494
495 return exit;
496 }
497
498 /* Dumps information about the induction variable IV to FILE. */
499
500 void
501 dump_iv (FILE *file, struct iv *iv)
502 {
503 if (iv->ssa_name)
504 {
505 fprintf (file, "ssa name ");
506 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
507 fprintf (file, "\n");
508 }
509
510 fprintf (file, " type ");
511 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
512 fprintf (file, "\n");
513
514 if (iv->step)
515 {
516 fprintf (file, " base ");
517 print_generic_expr (file, iv->base, TDF_SLIM);
518 fprintf (file, "\n");
519
520 fprintf (file, " step ");
521 print_generic_expr (file, iv->step, TDF_SLIM);
522 fprintf (file, "\n");
523 }
524 else
525 {
526 fprintf (file, " invariant ");
527 print_generic_expr (file, iv->base, TDF_SLIM);
528 fprintf (file, "\n");
529 }
530
531 if (iv->base_object)
532 {
533 fprintf (file, " base object ");
534 print_generic_expr (file, iv->base_object, TDF_SLIM);
535 fprintf (file, "\n");
536 }
537
538 if (iv->biv_p)
539 fprintf (file, " is a biv\n");
540 }
541
542 /* Dumps information about the USE to FILE. */
543
544 void
545 dump_use (FILE *file, struct iv_use *use)
546 {
547 fprintf (file, "use %d\n", use->id);
548
549 switch (use->type)
550 {
551 case USE_NONLINEAR_EXPR:
552 fprintf (file, " generic\n");
553 break;
554
555 case USE_ADDRESS:
556 fprintf (file, " address\n");
557 break;
558
559 case USE_COMPARE:
560 fprintf (file, " compare\n");
561 break;
562
563 default:
564 gcc_unreachable ();
565 }
566
567 fprintf (file, " in statement ");
568 print_gimple_stmt (file, use->stmt, 0, 0);
569 fprintf (file, "\n");
570
571 fprintf (file, " at position ");
572 if (use->op_p)
573 print_generic_expr (file, *use->op_p, TDF_SLIM);
574 fprintf (file, "\n");
575
576 dump_iv (file, use->iv);
577
578 if (use->related_cands)
579 {
580 fprintf (file, " related candidates ");
581 dump_bitmap (file, use->related_cands);
582 }
583 }
584
585 /* Dumps information about the uses to FILE. */
586
587 void
588 dump_uses (FILE *file, struct ivopts_data *data)
589 {
590 unsigned i;
591 struct iv_use *use;
592
593 for (i = 0; i < n_iv_uses (data); i++)
594 {
595 use = iv_use (data, i);
596
597 dump_use (file, use);
598 fprintf (file, "\n");
599 }
600 }
601
602 /* Dumps information about induction variable candidate CAND to FILE. */
603
604 void
605 dump_cand (FILE *file, struct iv_cand *cand)
606 {
607 struct iv *iv = cand->iv;
608
609 fprintf (file, "candidate %d%s\n",
610 cand->id, cand->important ? " (important)" : "");
611
612 if (cand->depends_on)
613 {
614 fprintf (file, " depends on ");
615 dump_bitmap (file, cand->depends_on);
616 }
617
618 if (!iv)
619 {
620 fprintf (file, " final value replacement\n");
621 return;
622 }
623
624 if (cand->var_before)
625 {
626 fprintf (file, " var_before ");
627 print_generic_expr (file, cand->var_before, TDF_SLIM);
628 fprintf (file, "\n");
629 }
630 if (cand->var_after)
631 {
632 fprintf (file, " var_after ");
633 print_generic_expr (file, cand->var_after, TDF_SLIM);
634 fprintf (file, "\n");
635 }
636
637 switch (cand->pos)
638 {
639 case IP_NORMAL:
640 fprintf (file, " incremented before exit test\n");
641 break;
642
643 case IP_BEFORE_USE:
644 fprintf (file, " incremented before use %d\n", cand->ainc_use->id);
645 break;
646
647 case IP_AFTER_USE:
648 fprintf (file, " incremented after use %d\n", cand->ainc_use->id);
649 break;
650
651 case IP_END:
652 fprintf (file, " incremented at end\n");
653 break;
654
655 case IP_ORIGINAL:
656 fprintf (file, " original biv\n");
657 break;
658 }
659
660 dump_iv (file, iv);
661 }
662
663 /* Returns the info for ssa version VER. */
664
665 static inline struct version_info *
666 ver_info (struct ivopts_data *data, unsigned ver)
667 {
668 return data->version_info + ver;
669 }
670
671 /* Returns the info for ssa name NAME. */
672
673 static inline struct version_info *
674 name_info (struct ivopts_data *data, tree name)
675 {
676 return ver_info (data, SSA_NAME_VERSION (name));
677 }
678
679 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
680 emitted in LOOP. */
681
682 static bool
683 stmt_after_ip_normal_pos (struct loop *loop, gimple stmt)
684 {
685 basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
686
687 gcc_assert (bb);
688
689 if (sbb == loop->latch)
690 return true;
691
692 if (sbb != bb)
693 return false;
694
695 return stmt == last_stmt (bb);
696 }
697
698 /* Returns true if STMT if after the place where the original induction
699 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
700 if the positions are identical. */
701
702 static bool
703 stmt_after_inc_pos (struct iv_cand *cand, gimple stmt, bool true_if_equal)
704 {
705 basic_block cand_bb = gimple_bb (cand->incremented_at);
706 basic_block stmt_bb = gimple_bb (stmt);
707
708 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
709 return false;
710
711 if (stmt_bb != cand_bb)
712 return true;
713
714 if (true_if_equal
715 && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
716 return true;
717 return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
718 }
719
720 /* Returns true if STMT if after the place where the induction variable
721 CAND is incremented in LOOP. */
722
723 static bool
724 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple stmt)
725 {
726 switch (cand->pos)
727 {
728 case IP_END:
729 return false;
730
731 case IP_NORMAL:
732 return stmt_after_ip_normal_pos (loop, stmt);
733
734 case IP_ORIGINAL:
735 case IP_AFTER_USE:
736 return stmt_after_inc_pos (cand, stmt, false);
737
738 case IP_BEFORE_USE:
739 return stmt_after_inc_pos (cand, stmt, true);
740
741 default:
742 gcc_unreachable ();
743 }
744 }
745
746 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
747
748 static bool
749 abnormal_ssa_name_p (tree exp)
750 {
751 if (!exp)
752 return false;
753
754 if (TREE_CODE (exp) != SSA_NAME)
755 return false;
756
757 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
758 }
759
760 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
761 abnormal phi node. Callback for for_each_index. */
762
763 static bool
764 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
765 void *data ATTRIBUTE_UNUSED)
766 {
767 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
768 {
769 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
770 return false;
771 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
772 return false;
773 }
774
775 return !abnormal_ssa_name_p (*index);
776 }
777
778 /* Returns true if EXPR contains a ssa name that occurs in an
779 abnormal phi node. */
780
781 bool
782 contains_abnormal_ssa_name_p (tree expr)
783 {
784 enum tree_code code;
785 enum tree_code_class codeclass;
786
787 if (!expr)
788 return false;
789
790 code = TREE_CODE (expr);
791 codeclass = TREE_CODE_CLASS (code);
792
793 if (code == SSA_NAME)
794 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
795
796 if (code == INTEGER_CST
797 || is_gimple_min_invariant (expr))
798 return false;
799
800 if (code == ADDR_EXPR)
801 return !for_each_index (&TREE_OPERAND (expr, 0),
802 idx_contains_abnormal_ssa_name_p,
803 NULL);
804
805 if (code == COND_EXPR)
806 return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
807 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
808 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
809
810 switch (codeclass)
811 {
812 case tcc_binary:
813 case tcc_comparison:
814 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
815 return true;
816
817 /* Fallthru. */
818 case tcc_unary:
819 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
820 return true;
821
822 break;
823
824 default:
825 gcc_unreachable ();
826 }
827
828 return false;
829 }
830
831 /* Returns the structure describing number of iterations determined from
832 EXIT of DATA->current_loop, or NULL if something goes wrong. */
833
834 static struct tree_niter_desc *
835 niter_for_exit (struct ivopts_data *data, edge exit)
836 {
837 struct tree_niter_desc *desc;
838 tree_niter_desc **slot;
839
840 if (!data->niters)
841 {
842 data->niters = new hash_map<edge, tree_niter_desc *>;
843 slot = NULL;
844 }
845 else
846 slot = data->niters->get (exit);
847
848 if (!slot)
849 {
850 /* Try to determine number of iterations. We cannot safely work with ssa
851 names that appear in phi nodes on abnormal edges, so that we do not
852 create overlapping life ranges for them (PR 27283). */
853 desc = XNEW (struct tree_niter_desc);
854 if (!number_of_iterations_exit (data->current_loop,
855 exit, desc, true)
856 || contains_abnormal_ssa_name_p (desc->niter))
857 {
858 XDELETE (desc);
859 desc = NULL;
860 }
861 data->niters->put (exit, desc);
862 }
863 else
864 desc = *slot;
865
866 return desc;
867 }
868
869 /* Returns the structure describing number of iterations determined from
870 single dominating exit of DATA->current_loop, or NULL if something
871 goes wrong. */
872
873 static struct tree_niter_desc *
874 niter_for_single_dom_exit (struct ivopts_data *data)
875 {
876 edge exit = single_dom_exit (data->current_loop);
877
878 if (!exit)
879 return NULL;
880
881 return niter_for_exit (data, exit);
882 }
883
884 /* Initializes data structures used by the iv optimization pass, stored
885 in DATA. */
886
887 static void
888 tree_ssa_iv_optimize_init (struct ivopts_data *data)
889 {
890 data->version_info_size = 2 * num_ssa_names;
891 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
892 data->relevant = BITMAP_ALLOC (NULL);
893 data->important_candidates = BITMAP_ALLOC (NULL);
894 data->max_inv_id = 0;
895 data->niters = NULL;
896 data->iv_uses.create (20);
897 data->iv_candidates.create (20);
898 data->inv_expr_tab = new hash_table<iv_inv_expr_hasher> (10);
899 data->inv_expr_id = 0;
900 data->name_expansion_cache = NULL;
901 decl_rtl_to_reset.create (20);
902 }
903
904 /* Returns a memory object to that EXPR points. In case we are able to
905 determine that it does not point to any such object, NULL is returned. */
906
907 static tree
908 determine_base_object (tree expr)
909 {
910 enum tree_code code = TREE_CODE (expr);
911 tree base, obj;
912
913 /* If this is a pointer casted to any type, we need to determine
914 the base object for the pointer; so handle conversions before
915 throwing away non-pointer expressions. */
916 if (CONVERT_EXPR_P (expr))
917 return determine_base_object (TREE_OPERAND (expr, 0));
918
919 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
920 return NULL_TREE;
921
922 switch (code)
923 {
924 case INTEGER_CST:
925 return NULL_TREE;
926
927 case ADDR_EXPR:
928 obj = TREE_OPERAND (expr, 0);
929 base = get_base_address (obj);
930
931 if (!base)
932 return expr;
933
934 if (TREE_CODE (base) == MEM_REF)
935 return determine_base_object (TREE_OPERAND (base, 0));
936
937 return fold_convert (ptr_type_node,
938 build_fold_addr_expr (base));
939
940 case POINTER_PLUS_EXPR:
941 return determine_base_object (TREE_OPERAND (expr, 0));
942
943 case PLUS_EXPR:
944 case MINUS_EXPR:
945 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
946 gcc_unreachable ();
947
948 default:
949 return fold_convert (ptr_type_node, expr);
950 }
951 }
952
953 /* Return true if address expression with non-DECL_P operand appears
954 in EXPR. */
955
956 static bool
957 contain_complex_addr_expr (tree expr)
958 {
959 bool res = false;
960
961 STRIP_NOPS (expr);
962 switch (TREE_CODE (expr))
963 {
964 case POINTER_PLUS_EXPR:
965 case PLUS_EXPR:
966 case MINUS_EXPR:
967 res |= contain_complex_addr_expr (TREE_OPERAND (expr, 0));
968 res |= contain_complex_addr_expr (TREE_OPERAND (expr, 1));
969 break;
970
971 case ADDR_EXPR:
972 return (!DECL_P (TREE_OPERAND (expr, 0)));
973
974 default:
975 return false;
976 }
977
978 return res;
979 }
980
981 /* Allocates an induction variable with given initial value BASE and step STEP
982 for loop LOOP. */
983
984 static struct iv *
985 alloc_iv (tree base, tree step)
986 {
987 tree expr = base;
988 struct iv *iv = XCNEW (struct iv);
989 gcc_assert (step != NULL_TREE);
990
991 /* Lower address expression in base except ones with DECL_P as operand.
992 By doing this:
993 1) More accurate cost can be computed for address expressions;
994 2) Duplicate candidates won't be created for bases in different
995 forms, like &a[0] and &a. */
996 STRIP_NOPS (expr);
997 if ((TREE_CODE (expr) == ADDR_EXPR && !DECL_P (TREE_OPERAND (expr, 0)))
998 || contain_complex_addr_expr (expr))
999 {
1000 aff_tree comb;
1001 tree_to_aff_combination (expr, TREE_TYPE (base), &comb);
1002 base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
1003 }
1004
1005 iv->base = base;
1006 iv->base_object = determine_base_object (base);
1007 iv->step = step;
1008 iv->biv_p = false;
1009 iv->have_use_for = false;
1010 iv->use_id = 0;
1011 iv->ssa_name = NULL_TREE;
1012
1013 return iv;
1014 }
1015
1016 /* Sets STEP and BASE for induction variable IV. */
1017
1018 static void
1019 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
1020 {
1021 struct version_info *info = name_info (data, iv);
1022
1023 gcc_assert (!info->iv);
1024
1025 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
1026 info->iv = alloc_iv (base, step);
1027 info->iv->ssa_name = iv;
1028 }
1029
1030 /* Finds induction variable declaration for VAR. */
1031
1032 static struct iv *
1033 get_iv (struct ivopts_data *data, tree var)
1034 {
1035 basic_block bb;
1036 tree type = TREE_TYPE (var);
1037
1038 if (!POINTER_TYPE_P (type)
1039 && !INTEGRAL_TYPE_P (type))
1040 return NULL;
1041
1042 if (!name_info (data, var)->iv)
1043 {
1044 bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1045
1046 if (!bb
1047 || !flow_bb_inside_loop_p (data->current_loop, bb))
1048 set_iv (data, var, var, build_int_cst (type, 0));
1049 }
1050
1051 return name_info (data, var)->iv;
1052 }
1053
1054 /* Determines the step of a biv defined in PHI. Returns NULL if PHI does
1055 not define a simple affine biv with nonzero step. */
1056
1057 static tree
1058 determine_biv_step (gphi *phi)
1059 {
1060 struct loop *loop = gimple_bb (phi)->loop_father;
1061 tree name = PHI_RESULT (phi);
1062 affine_iv iv;
1063
1064 if (virtual_operand_p (name))
1065 return NULL_TREE;
1066
1067 if (!simple_iv (loop, loop, name, &iv, true))
1068 return NULL_TREE;
1069
1070 return integer_zerop (iv.step) ? NULL_TREE : iv.step;
1071 }
1072
1073 /* Finds basic ivs. */
1074
1075 static bool
1076 find_bivs (struct ivopts_data *data)
1077 {
1078 gphi *phi;
1079 tree step, type, base;
1080 bool found = false;
1081 struct loop *loop = data->current_loop;
1082 gphi_iterator psi;
1083
1084 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1085 {
1086 phi = psi.phi ();
1087
1088 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1089 continue;
1090
1091 step = determine_biv_step (phi);
1092 if (!step)
1093 continue;
1094
1095 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1096 base = expand_simple_operations (base);
1097 if (contains_abnormal_ssa_name_p (base)
1098 || contains_abnormal_ssa_name_p (step))
1099 continue;
1100
1101 type = TREE_TYPE (PHI_RESULT (phi));
1102 base = fold_convert (type, base);
1103 if (step)
1104 {
1105 if (POINTER_TYPE_P (type))
1106 step = convert_to_ptrofftype (step);
1107 else
1108 step = fold_convert (type, step);
1109 }
1110
1111 set_iv (data, PHI_RESULT (phi), base, step);
1112 found = true;
1113 }
1114
1115 return found;
1116 }
1117
1118 /* Marks basic ivs. */
1119
1120 static void
1121 mark_bivs (struct ivopts_data *data)
1122 {
1123 gphi *phi;
1124 gimple def;
1125 tree var;
1126 struct iv *iv, *incr_iv;
1127 struct loop *loop = data->current_loop;
1128 basic_block incr_bb;
1129 gphi_iterator psi;
1130
1131 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1132 {
1133 phi = psi.phi ();
1134
1135 iv = get_iv (data, PHI_RESULT (phi));
1136 if (!iv)
1137 continue;
1138
1139 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1140 def = SSA_NAME_DEF_STMT (var);
1141 /* Don't mark iv peeled from other one as biv. */
1142 if (def
1143 && gimple_code (def) == GIMPLE_PHI
1144 && gimple_bb (def) == loop->header)
1145 continue;
1146
1147 incr_iv = get_iv (data, var);
1148 if (!incr_iv)
1149 continue;
1150
1151 /* If the increment is in the subloop, ignore it. */
1152 incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1153 if (incr_bb->loop_father != data->current_loop
1154 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1155 continue;
1156
1157 iv->biv_p = true;
1158 incr_iv->biv_p = true;
1159 }
1160 }
1161
1162 /* Checks whether STMT defines a linear induction variable and stores its
1163 parameters to IV. */
1164
1165 static bool
1166 find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv)
1167 {
1168 tree lhs;
1169 struct loop *loop = data->current_loop;
1170
1171 iv->base = NULL_TREE;
1172 iv->step = NULL_TREE;
1173
1174 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1175 return false;
1176
1177 lhs = gimple_assign_lhs (stmt);
1178 if (TREE_CODE (lhs) != SSA_NAME)
1179 return false;
1180
1181 if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1182 return false;
1183 iv->base = expand_simple_operations (iv->base);
1184
1185 if (contains_abnormal_ssa_name_p (iv->base)
1186 || contains_abnormal_ssa_name_p (iv->step))
1187 return false;
1188
1189 /* If STMT could throw, then do not consider STMT as defining a GIV.
1190 While this will suppress optimizations, we can not safely delete this
1191 GIV and associated statements, even if it appears it is not used. */
1192 if (stmt_could_throw_p (stmt))
1193 return false;
1194
1195 return true;
1196 }
1197
1198 /* Finds general ivs in statement STMT. */
1199
1200 static void
1201 find_givs_in_stmt (struct ivopts_data *data, gimple stmt)
1202 {
1203 affine_iv iv;
1204
1205 if (!find_givs_in_stmt_scev (data, stmt, &iv))
1206 return;
1207
1208 set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step);
1209 }
1210
1211 /* Finds general ivs in basic block BB. */
1212
1213 static void
1214 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1215 {
1216 gimple_stmt_iterator bsi;
1217
1218 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1219 find_givs_in_stmt (data, gsi_stmt (bsi));
1220 }
1221
1222 /* Finds general ivs. */
1223
1224 static void
1225 find_givs (struct ivopts_data *data)
1226 {
1227 struct loop *loop = data->current_loop;
1228 basic_block *body = get_loop_body_in_dom_order (loop);
1229 unsigned i;
1230
1231 for (i = 0; i < loop->num_nodes; i++)
1232 find_givs_in_bb (data, body[i]);
1233 free (body);
1234 }
1235
1236 /* For each ssa name defined in LOOP determines whether it is an induction
1237 variable and if so, its initial value and step. */
1238
1239 static bool
1240 find_induction_variables (struct ivopts_data *data)
1241 {
1242 unsigned i;
1243 bitmap_iterator bi;
1244
1245 if (!find_bivs (data))
1246 return false;
1247
1248 find_givs (data);
1249 mark_bivs (data);
1250
1251 if (dump_file && (dump_flags & TDF_DETAILS))
1252 {
1253 struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
1254
1255 if (niter)
1256 {
1257 fprintf (dump_file, " number of iterations ");
1258 print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1259 if (!integer_zerop (niter->may_be_zero))
1260 {
1261 fprintf (dump_file, "; zero if ");
1262 print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1263 }
1264 fprintf (dump_file, "\n\n");
1265 };
1266
1267 fprintf (dump_file, "Induction variables:\n\n");
1268
1269 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1270 {
1271 if (ver_info (data, i)->iv)
1272 dump_iv (dump_file, ver_info (data, i)->iv);
1273 }
1274 }
1275
1276 return true;
1277 }
1278
1279 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1280
1281 static struct iv_use *
1282 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1283 gimple stmt, enum use_type use_type)
1284 {
1285 struct iv_use *use = XCNEW (struct iv_use);
1286
1287 use->id = n_iv_uses (data);
1288 use->type = use_type;
1289 use->iv = iv;
1290 use->stmt = stmt;
1291 use->op_p = use_p;
1292 use->related_cands = BITMAP_ALLOC (NULL);
1293
1294 /* To avoid showing ssa name in the dumps, if it was not reset by the
1295 caller. */
1296 iv->ssa_name = NULL_TREE;
1297
1298 if (dump_file && (dump_flags & TDF_DETAILS))
1299 dump_use (dump_file, use);
1300
1301 data->iv_uses.safe_push (use);
1302
1303 return use;
1304 }
1305
1306 /* Checks whether OP is a loop-level invariant and if so, records it.
1307 NONLINEAR_USE is true if the invariant is used in a way we do not
1308 handle specially. */
1309
1310 static void
1311 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1312 {
1313 basic_block bb;
1314 struct version_info *info;
1315
1316 if (TREE_CODE (op) != SSA_NAME
1317 || virtual_operand_p (op))
1318 return;
1319
1320 bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1321 if (bb
1322 && flow_bb_inside_loop_p (data->current_loop, bb))
1323 return;
1324
1325 info = name_info (data, op);
1326 info->name = op;
1327 info->has_nonlin_use |= nonlinear_use;
1328 if (!info->inv_id)
1329 info->inv_id = ++data->max_inv_id;
1330 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1331 }
1332
1333 /* Checks whether the use OP is interesting and if so, records it. */
1334
1335 static struct iv_use *
1336 find_interesting_uses_op (struct ivopts_data *data, tree op)
1337 {
1338 struct iv *iv;
1339 struct iv *civ;
1340 gimple stmt;
1341 struct iv_use *use;
1342
1343 if (TREE_CODE (op) != SSA_NAME)
1344 return NULL;
1345
1346 iv = get_iv (data, op);
1347 if (!iv)
1348 return NULL;
1349
1350 if (iv->have_use_for)
1351 {
1352 use = iv_use (data, iv->use_id);
1353
1354 gcc_assert (use->type == USE_NONLINEAR_EXPR);
1355 return use;
1356 }
1357
1358 if (integer_zerop (iv->step))
1359 {
1360 record_invariant (data, op, true);
1361 return NULL;
1362 }
1363 iv->have_use_for = true;
1364
1365 civ = XNEW (struct iv);
1366 *civ = *iv;
1367
1368 stmt = SSA_NAME_DEF_STMT (op);
1369 gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1370 || is_gimple_assign (stmt));
1371
1372 use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1373 iv->use_id = use->id;
1374
1375 return use;
1376 }
1377
1378 /* Given a condition in statement STMT, checks whether it is a compare
1379 of an induction variable and an invariant. If this is the case,
1380 CONTROL_VAR is set to location of the iv, BOUND to the location of
1381 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1382 induction variable descriptions, and true is returned. If this is not
1383 the case, CONTROL_VAR and BOUND are set to the arguments of the
1384 condition and false is returned. */
1385
1386 static bool
1387 extract_cond_operands (struct ivopts_data *data, gimple stmt,
1388 tree **control_var, tree **bound,
1389 struct iv **iv_var, struct iv **iv_bound)
1390 {
1391 /* The objects returned when COND has constant operands. */
1392 static struct iv const_iv;
1393 static tree zero;
1394 tree *op0 = &zero, *op1 = &zero, *tmp_op;
1395 struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1396 bool ret = false;
1397
1398 if (gimple_code (stmt) == GIMPLE_COND)
1399 {
1400 gcond *cond_stmt = as_a <gcond *> (stmt);
1401 op0 = gimple_cond_lhs_ptr (cond_stmt);
1402 op1 = gimple_cond_rhs_ptr (cond_stmt);
1403 }
1404 else
1405 {
1406 op0 = gimple_assign_rhs1_ptr (stmt);
1407 op1 = gimple_assign_rhs2_ptr (stmt);
1408 }
1409
1410 zero = integer_zero_node;
1411 const_iv.step = integer_zero_node;
1412
1413 if (TREE_CODE (*op0) == SSA_NAME)
1414 iv0 = get_iv (data, *op0);
1415 if (TREE_CODE (*op1) == SSA_NAME)
1416 iv1 = get_iv (data, *op1);
1417
1418 /* Exactly one of the compared values must be an iv, and the other one must
1419 be an invariant. */
1420 if (!iv0 || !iv1)
1421 goto end;
1422
1423 if (integer_zerop (iv0->step))
1424 {
1425 /* Control variable may be on the other side. */
1426 tmp_op = op0; op0 = op1; op1 = tmp_op;
1427 tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1428 }
1429 ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1430
1431 end:
1432 if (control_var)
1433 *control_var = op0;;
1434 if (iv_var)
1435 *iv_var = iv0;;
1436 if (bound)
1437 *bound = op1;
1438 if (iv_bound)
1439 *iv_bound = iv1;
1440
1441 return ret;
1442 }
1443
1444 /* Checks whether the condition in STMT is interesting and if so,
1445 records it. */
1446
1447 static void
1448 find_interesting_uses_cond (struct ivopts_data *data, gimple stmt)
1449 {
1450 tree *var_p, *bound_p;
1451 struct iv *var_iv, *civ;
1452
1453 if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1454 {
1455 find_interesting_uses_op (data, *var_p);
1456 find_interesting_uses_op (data, *bound_p);
1457 return;
1458 }
1459
1460 civ = XNEW (struct iv);
1461 *civ = *var_iv;
1462 record_use (data, NULL, civ, stmt, USE_COMPARE);
1463 }
1464
1465 /* Returns the outermost loop EXPR is obviously invariant in
1466 relative to the loop LOOP, i.e. if all its operands are defined
1467 outside of the returned loop. Returns NULL if EXPR is not
1468 even obviously invariant in LOOP. */
1469
1470 struct loop *
1471 outermost_invariant_loop_for_expr (struct loop *loop, tree expr)
1472 {
1473 basic_block def_bb;
1474 unsigned i, len;
1475
1476 if (is_gimple_min_invariant (expr))
1477 return current_loops->tree_root;
1478
1479 if (TREE_CODE (expr) == SSA_NAME)
1480 {
1481 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1482 if (def_bb)
1483 {
1484 if (flow_bb_inside_loop_p (loop, def_bb))
1485 return NULL;
1486 return superloop_at_depth (loop,
1487 loop_depth (def_bb->loop_father) + 1);
1488 }
1489
1490 return current_loops->tree_root;
1491 }
1492
1493 if (!EXPR_P (expr))
1494 return NULL;
1495
1496 unsigned maxdepth = 0;
1497 len = TREE_OPERAND_LENGTH (expr);
1498 for (i = 0; i < len; i++)
1499 {
1500 struct loop *ivloop;
1501 if (!TREE_OPERAND (expr, i))
1502 continue;
1503
1504 ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1505 if (!ivloop)
1506 return NULL;
1507 maxdepth = MAX (maxdepth, loop_depth (ivloop));
1508 }
1509
1510 return superloop_at_depth (loop, maxdepth);
1511 }
1512
1513 /* Returns true if expression EXPR is obviously invariant in LOOP,
1514 i.e. if all its operands are defined outside of the LOOP. LOOP
1515 should not be the function body. */
1516
1517 bool
1518 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1519 {
1520 basic_block def_bb;
1521 unsigned i, len;
1522
1523 gcc_assert (loop_depth (loop) > 0);
1524
1525 if (is_gimple_min_invariant (expr))
1526 return true;
1527
1528 if (TREE_CODE (expr) == SSA_NAME)
1529 {
1530 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1531 if (def_bb
1532 && flow_bb_inside_loop_p (loop, def_bb))
1533 return false;
1534
1535 return true;
1536 }
1537
1538 if (!EXPR_P (expr))
1539 return false;
1540
1541 len = TREE_OPERAND_LENGTH (expr);
1542 for (i = 0; i < len; i++)
1543 if (TREE_OPERAND (expr, i)
1544 && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1545 return false;
1546
1547 return true;
1548 }
1549
1550 /* Cumulates the steps of indices into DATA and replaces their values with the
1551 initial ones. Returns false when the value of the index cannot be determined.
1552 Callback for for_each_index. */
1553
1554 struct ifs_ivopts_data
1555 {
1556 struct ivopts_data *ivopts_data;
1557 gimple stmt;
1558 tree step;
1559 };
1560
1561 static bool
1562 idx_find_step (tree base, tree *idx, void *data)
1563 {
1564 struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1565 struct iv *iv;
1566 tree step, iv_base, iv_step, lbound, off;
1567 struct loop *loop = dta->ivopts_data->current_loop;
1568
1569 /* If base is a component ref, require that the offset of the reference
1570 be invariant. */
1571 if (TREE_CODE (base) == COMPONENT_REF)
1572 {
1573 off = component_ref_field_offset (base);
1574 return expr_invariant_in_loop_p (loop, off);
1575 }
1576
1577 /* If base is array, first check whether we will be able to move the
1578 reference out of the loop (in order to take its address in strength
1579 reduction). In order for this to work we need both lower bound
1580 and step to be loop invariants. */
1581 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1582 {
1583 /* Moreover, for a range, the size needs to be invariant as well. */
1584 if (TREE_CODE (base) == ARRAY_RANGE_REF
1585 && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1586 return false;
1587
1588 step = array_ref_element_size (base);
1589 lbound = array_ref_low_bound (base);
1590
1591 if (!expr_invariant_in_loop_p (loop, step)
1592 || !expr_invariant_in_loop_p (loop, lbound))
1593 return false;
1594 }
1595
1596 if (TREE_CODE (*idx) != SSA_NAME)
1597 return true;
1598
1599 iv = get_iv (dta->ivopts_data, *idx);
1600 if (!iv)
1601 return false;
1602
1603 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1604 *&x[0], which is not folded and does not trigger the
1605 ARRAY_REF path below. */
1606 *idx = iv->base;
1607
1608 if (integer_zerop (iv->step))
1609 return true;
1610
1611 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1612 {
1613 step = array_ref_element_size (base);
1614
1615 /* We only handle addresses whose step is an integer constant. */
1616 if (TREE_CODE (step) != INTEGER_CST)
1617 return false;
1618 }
1619 else
1620 /* The step for pointer arithmetics already is 1 byte. */
1621 step = size_one_node;
1622
1623 iv_base = iv->base;
1624 iv_step = iv->step;
1625 if (!convert_affine_scev (dta->ivopts_data->current_loop,
1626 sizetype, &iv_base, &iv_step, dta->stmt,
1627 false))
1628 {
1629 /* The index might wrap. */
1630 return false;
1631 }
1632
1633 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1634 dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1635
1636 return true;
1637 }
1638
1639 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1640 object is passed to it in DATA. */
1641
1642 static bool
1643 idx_record_use (tree base, tree *idx,
1644 void *vdata)
1645 {
1646 struct ivopts_data *data = (struct ivopts_data *) vdata;
1647 find_interesting_uses_op (data, *idx);
1648 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1649 {
1650 find_interesting_uses_op (data, array_ref_element_size (base));
1651 find_interesting_uses_op (data, array_ref_low_bound (base));
1652 }
1653 return true;
1654 }
1655
1656 /* If we can prove that TOP = cst * BOT for some constant cst,
1657 store cst to MUL and return true. Otherwise return false.
1658 The returned value is always sign-extended, regardless of the
1659 signedness of TOP and BOT. */
1660
1661 static bool
1662 constant_multiple_of (tree top, tree bot, widest_int *mul)
1663 {
1664 tree mby;
1665 enum tree_code code;
1666 unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1667 widest_int res, p0, p1;
1668
1669 STRIP_NOPS (top);
1670 STRIP_NOPS (bot);
1671
1672 if (operand_equal_p (top, bot, 0))
1673 {
1674 *mul = 1;
1675 return true;
1676 }
1677
1678 code = TREE_CODE (top);
1679 switch (code)
1680 {
1681 case MULT_EXPR:
1682 mby = TREE_OPERAND (top, 1);
1683 if (TREE_CODE (mby) != INTEGER_CST)
1684 return false;
1685
1686 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1687 return false;
1688
1689 *mul = wi::sext (res * wi::to_widest (mby), precision);
1690 return true;
1691
1692 case PLUS_EXPR:
1693 case MINUS_EXPR:
1694 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1695 || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1696 return false;
1697
1698 if (code == MINUS_EXPR)
1699 p1 = -p1;
1700 *mul = wi::sext (p0 + p1, precision);
1701 return true;
1702
1703 case INTEGER_CST:
1704 if (TREE_CODE (bot) != INTEGER_CST)
1705 return false;
1706
1707 p0 = widest_int::from (top, SIGNED);
1708 p1 = widest_int::from (bot, SIGNED);
1709 if (p1 == 0)
1710 return false;
1711 *mul = wi::sext (wi::divmod_trunc (p0, p1, SIGNED, &res), precision);
1712 return res == 0;
1713
1714 default:
1715 return false;
1716 }
1717 }
1718
1719 /* Return true if memory reference REF with step STEP may be unaligned. */
1720
1721 static bool
1722 may_be_unaligned_p (tree ref, tree step)
1723 {
1724 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1725 thus they are not misaligned. */
1726 if (TREE_CODE (ref) == TARGET_MEM_REF)
1727 return false;
1728
1729 unsigned int align = TYPE_ALIGN (TREE_TYPE (ref));
1730 if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))) > align)
1731 align = GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref)));
1732
1733 unsigned HOST_WIDE_INT bitpos;
1734 unsigned int ref_align;
1735 get_object_alignment_1 (ref, &ref_align, &bitpos);
1736 if (ref_align < align
1737 || (bitpos % align) != 0
1738 || (bitpos % BITS_PER_UNIT) != 0)
1739 return true;
1740
1741 unsigned int trailing_zeros = tree_ctz (step);
1742 if (trailing_zeros < HOST_BITS_PER_INT
1743 && (1U << trailing_zeros) * BITS_PER_UNIT < align)
1744 return true;
1745
1746 return false;
1747 }
1748
1749 /* Return true if EXPR may be non-addressable. */
1750
1751 bool
1752 may_be_nonaddressable_p (tree expr)
1753 {
1754 switch (TREE_CODE (expr))
1755 {
1756 case TARGET_MEM_REF:
1757 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1758 target, thus they are always addressable. */
1759 return false;
1760
1761 case COMPONENT_REF:
1762 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1763 || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1764
1765 case VIEW_CONVERT_EXPR:
1766 /* This kind of view-conversions may wrap non-addressable objects
1767 and make them look addressable. After some processing the
1768 non-addressability may be uncovered again, causing ADDR_EXPRs
1769 of inappropriate objects to be built. */
1770 if (is_gimple_reg (TREE_OPERAND (expr, 0))
1771 || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
1772 return true;
1773
1774 /* ... fall through ... */
1775
1776 case ARRAY_REF:
1777 case ARRAY_RANGE_REF:
1778 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1779
1780 CASE_CONVERT:
1781 return true;
1782
1783 default:
1784 break;
1785 }
1786
1787 return false;
1788 }
1789
1790 /* Finds addresses in *OP_P inside STMT. */
1791
1792 static void
1793 find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p)
1794 {
1795 tree base = *op_p, step = size_zero_node;
1796 struct iv *civ;
1797 struct ifs_ivopts_data ifs_ivopts_data;
1798
1799 /* Do not play with volatile memory references. A bit too conservative,
1800 perhaps, but safe. */
1801 if (gimple_has_volatile_ops (stmt))
1802 goto fail;
1803
1804 /* Ignore bitfields for now. Not really something terribly complicated
1805 to handle. TODO. */
1806 if (TREE_CODE (base) == BIT_FIELD_REF)
1807 goto fail;
1808
1809 base = unshare_expr (base);
1810
1811 if (TREE_CODE (base) == TARGET_MEM_REF)
1812 {
1813 tree type = build_pointer_type (TREE_TYPE (base));
1814 tree astep;
1815
1816 if (TMR_BASE (base)
1817 && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1818 {
1819 civ = get_iv (data, TMR_BASE (base));
1820 if (!civ)
1821 goto fail;
1822
1823 TMR_BASE (base) = civ->base;
1824 step = civ->step;
1825 }
1826 if (TMR_INDEX2 (base)
1827 && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
1828 {
1829 civ = get_iv (data, TMR_INDEX2 (base));
1830 if (!civ)
1831 goto fail;
1832
1833 TMR_INDEX2 (base) = civ->base;
1834 step = civ->step;
1835 }
1836 if (TMR_INDEX (base)
1837 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1838 {
1839 civ = get_iv (data, TMR_INDEX (base));
1840 if (!civ)
1841 goto fail;
1842
1843 TMR_INDEX (base) = civ->base;
1844 astep = civ->step;
1845
1846 if (astep)
1847 {
1848 if (TMR_STEP (base))
1849 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1850
1851 step = fold_build2 (PLUS_EXPR, type, step, astep);
1852 }
1853 }
1854
1855 if (integer_zerop (step))
1856 goto fail;
1857 base = tree_mem_ref_addr (type, base);
1858 }
1859 else
1860 {
1861 ifs_ivopts_data.ivopts_data = data;
1862 ifs_ivopts_data.stmt = stmt;
1863 ifs_ivopts_data.step = size_zero_node;
1864 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1865 || integer_zerop (ifs_ivopts_data.step))
1866 goto fail;
1867 step = ifs_ivopts_data.step;
1868
1869 /* Check that the base expression is addressable. This needs
1870 to be done after substituting bases of IVs into it. */
1871 if (may_be_nonaddressable_p (base))
1872 goto fail;
1873
1874 /* Moreover, on strict alignment platforms, check that it is
1875 sufficiently aligned. */
1876 if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
1877 goto fail;
1878
1879 base = build_fold_addr_expr (base);
1880
1881 /* Substituting bases of IVs into the base expression might
1882 have caused folding opportunities. */
1883 if (TREE_CODE (base) == ADDR_EXPR)
1884 {
1885 tree *ref = &TREE_OPERAND (base, 0);
1886 while (handled_component_p (*ref))
1887 ref = &TREE_OPERAND (*ref, 0);
1888 if (TREE_CODE (*ref) == MEM_REF)
1889 {
1890 tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
1891 TREE_OPERAND (*ref, 0),
1892 TREE_OPERAND (*ref, 1));
1893 if (tem)
1894 *ref = tem;
1895 }
1896 }
1897 }
1898
1899 civ = alloc_iv (base, step);
1900 record_use (data, op_p, civ, stmt, USE_ADDRESS);
1901 return;
1902
1903 fail:
1904 for_each_index (op_p, idx_record_use, data);
1905 }
1906
1907 /* Finds and records invariants used in STMT. */
1908
1909 static void
1910 find_invariants_stmt (struct ivopts_data *data, gimple stmt)
1911 {
1912 ssa_op_iter iter;
1913 use_operand_p use_p;
1914 tree op;
1915
1916 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1917 {
1918 op = USE_FROM_PTR (use_p);
1919 record_invariant (data, op, false);
1920 }
1921 }
1922
1923 /* Finds interesting uses of induction variables in the statement STMT. */
1924
1925 static void
1926 find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt)
1927 {
1928 struct iv *iv;
1929 tree op, *lhs, *rhs;
1930 ssa_op_iter iter;
1931 use_operand_p use_p;
1932 enum tree_code code;
1933
1934 find_invariants_stmt (data, stmt);
1935
1936 if (gimple_code (stmt) == GIMPLE_COND)
1937 {
1938 find_interesting_uses_cond (data, stmt);
1939 return;
1940 }
1941
1942 if (is_gimple_assign (stmt))
1943 {
1944 lhs = gimple_assign_lhs_ptr (stmt);
1945 rhs = gimple_assign_rhs1_ptr (stmt);
1946
1947 if (TREE_CODE (*lhs) == SSA_NAME)
1948 {
1949 /* If the statement defines an induction variable, the uses are not
1950 interesting by themselves. */
1951
1952 iv = get_iv (data, *lhs);
1953
1954 if (iv && !integer_zerop (iv->step))
1955 return;
1956 }
1957
1958 code = gimple_assign_rhs_code (stmt);
1959 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
1960 && (REFERENCE_CLASS_P (*rhs)
1961 || is_gimple_val (*rhs)))
1962 {
1963 if (REFERENCE_CLASS_P (*rhs))
1964 find_interesting_uses_address (data, stmt, rhs);
1965 else
1966 find_interesting_uses_op (data, *rhs);
1967
1968 if (REFERENCE_CLASS_P (*lhs))
1969 find_interesting_uses_address (data, stmt, lhs);
1970 return;
1971 }
1972 else if (TREE_CODE_CLASS (code) == tcc_comparison)
1973 {
1974 find_interesting_uses_cond (data, stmt);
1975 return;
1976 }
1977
1978 /* TODO -- we should also handle address uses of type
1979
1980 memory = call (whatever);
1981
1982 and
1983
1984 call (memory). */
1985 }
1986
1987 if (gimple_code (stmt) == GIMPLE_PHI
1988 && gimple_bb (stmt) == data->current_loop->header)
1989 {
1990 iv = get_iv (data, PHI_RESULT (stmt));
1991
1992 if (iv && !integer_zerop (iv->step))
1993 return;
1994 }
1995
1996 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1997 {
1998 op = USE_FROM_PTR (use_p);
1999
2000 if (TREE_CODE (op) != SSA_NAME)
2001 continue;
2002
2003 iv = get_iv (data, op);
2004 if (!iv)
2005 continue;
2006
2007 find_interesting_uses_op (data, op);
2008 }
2009 }
2010
2011 /* Finds interesting uses of induction variables outside of loops
2012 on loop exit edge EXIT. */
2013
2014 static void
2015 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
2016 {
2017 gphi *phi;
2018 gphi_iterator psi;
2019 tree def;
2020
2021 for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
2022 {
2023 phi = psi.phi ();
2024 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2025 if (!virtual_operand_p (def))
2026 find_interesting_uses_op (data, def);
2027 }
2028 }
2029
2030 /* Finds uses of the induction variables that are interesting. */
2031
2032 static void
2033 find_interesting_uses (struct ivopts_data *data)
2034 {
2035 basic_block bb;
2036 gimple_stmt_iterator bsi;
2037 basic_block *body = get_loop_body (data->current_loop);
2038 unsigned i;
2039 struct version_info *info;
2040 edge e;
2041
2042 if (dump_file && (dump_flags & TDF_DETAILS))
2043 fprintf (dump_file, "Uses:\n\n");
2044
2045 for (i = 0; i < data->current_loop->num_nodes; i++)
2046 {
2047 edge_iterator ei;
2048 bb = body[i];
2049
2050 FOR_EACH_EDGE (e, ei, bb->succs)
2051 if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2052 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2053 find_interesting_uses_outside (data, e);
2054
2055 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2056 find_interesting_uses_stmt (data, gsi_stmt (bsi));
2057 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2058 if (!is_gimple_debug (gsi_stmt (bsi)))
2059 find_interesting_uses_stmt (data, gsi_stmt (bsi));
2060 }
2061
2062 if (dump_file && (dump_flags & TDF_DETAILS))
2063 {
2064 bitmap_iterator bi;
2065
2066 fprintf (dump_file, "\n");
2067
2068 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2069 {
2070 info = ver_info (data, i);
2071 if (info->inv_id)
2072 {
2073 fprintf (dump_file, " ");
2074 print_generic_expr (dump_file, info->name, TDF_SLIM);
2075 fprintf (dump_file, " is invariant (%d)%s\n",
2076 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
2077 }
2078 }
2079
2080 fprintf (dump_file, "\n");
2081 }
2082
2083 free (body);
2084 }
2085
2086 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
2087 is true, assume we are inside an address. If TOP_COMPREF is true, assume
2088 we are at the top-level of the processed address. */
2089
2090 static tree
2091 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2092 HOST_WIDE_INT *offset)
2093 {
2094 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2095 enum tree_code code;
2096 tree type, orig_type = TREE_TYPE (expr);
2097 HOST_WIDE_INT off0, off1, st;
2098 tree orig_expr = expr;
2099
2100 STRIP_NOPS (expr);
2101
2102 type = TREE_TYPE (expr);
2103 code = TREE_CODE (expr);
2104 *offset = 0;
2105
2106 switch (code)
2107 {
2108 case INTEGER_CST:
2109 if (!cst_and_fits_in_hwi (expr)
2110 || integer_zerop (expr))
2111 return orig_expr;
2112
2113 *offset = int_cst_value (expr);
2114 return build_int_cst (orig_type, 0);
2115
2116 case POINTER_PLUS_EXPR:
2117 case PLUS_EXPR:
2118 case MINUS_EXPR:
2119 op0 = TREE_OPERAND (expr, 0);
2120 op1 = TREE_OPERAND (expr, 1);
2121
2122 op0 = strip_offset_1 (op0, false, false, &off0);
2123 op1 = strip_offset_1 (op1, false, false, &off1);
2124
2125 *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2126 if (op0 == TREE_OPERAND (expr, 0)
2127 && op1 == TREE_OPERAND (expr, 1))
2128 return orig_expr;
2129
2130 if (integer_zerop (op1))
2131 expr = op0;
2132 else if (integer_zerop (op0))
2133 {
2134 if (code == MINUS_EXPR)
2135 expr = fold_build1 (NEGATE_EXPR, type, op1);
2136 else
2137 expr = op1;
2138 }
2139 else
2140 expr = fold_build2 (code, type, op0, op1);
2141
2142 return fold_convert (orig_type, expr);
2143
2144 case MULT_EXPR:
2145 op1 = TREE_OPERAND (expr, 1);
2146 if (!cst_and_fits_in_hwi (op1))
2147 return orig_expr;
2148
2149 op0 = TREE_OPERAND (expr, 0);
2150 op0 = strip_offset_1 (op0, false, false, &off0);
2151 if (op0 == TREE_OPERAND (expr, 0))
2152 return orig_expr;
2153
2154 *offset = off0 * int_cst_value (op1);
2155 if (integer_zerop (op0))
2156 expr = op0;
2157 else
2158 expr = fold_build2 (MULT_EXPR, type, op0, op1);
2159
2160 return fold_convert (orig_type, expr);
2161
2162 case ARRAY_REF:
2163 case ARRAY_RANGE_REF:
2164 if (!inside_addr)
2165 return orig_expr;
2166
2167 step = array_ref_element_size (expr);
2168 if (!cst_and_fits_in_hwi (step))
2169 break;
2170
2171 st = int_cst_value (step);
2172 op1 = TREE_OPERAND (expr, 1);
2173 op1 = strip_offset_1 (op1, false, false, &off1);
2174 *offset = off1 * st;
2175
2176 if (top_compref
2177 && integer_zerop (op1))
2178 {
2179 /* Strip the component reference completely. */
2180 op0 = TREE_OPERAND (expr, 0);
2181 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2182 *offset += off0;
2183 return op0;
2184 }
2185 break;
2186
2187 case COMPONENT_REF:
2188 {
2189 tree field;
2190
2191 if (!inside_addr)
2192 return orig_expr;
2193
2194 tmp = component_ref_field_offset (expr);
2195 field = TREE_OPERAND (expr, 1);
2196 if (top_compref
2197 && cst_and_fits_in_hwi (tmp)
2198 && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2199 {
2200 HOST_WIDE_INT boffset, abs_off;
2201
2202 /* Strip the component reference completely. */
2203 op0 = TREE_OPERAND (expr, 0);
2204 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2205 boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2206 abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2207 if (boffset < 0)
2208 abs_off = -abs_off;
2209
2210 *offset = off0 + int_cst_value (tmp) + abs_off;
2211 return op0;
2212 }
2213 }
2214 break;
2215
2216 case ADDR_EXPR:
2217 op0 = TREE_OPERAND (expr, 0);
2218 op0 = strip_offset_1 (op0, true, true, &off0);
2219 *offset += off0;
2220
2221 if (op0 == TREE_OPERAND (expr, 0))
2222 return orig_expr;
2223
2224 expr = build_fold_addr_expr (op0);
2225 return fold_convert (orig_type, expr);
2226
2227 case MEM_REF:
2228 /* ??? Offset operand? */
2229 inside_addr = false;
2230 break;
2231
2232 default:
2233 return orig_expr;
2234 }
2235
2236 /* Default handling of expressions for that we want to recurse into
2237 the first operand. */
2238 op0 = TREE_OPERAND (expr, 0);
2239 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2240 *offset += off0;
2241
2242 if (op0 == TREE_OPERAND (expr, 0)
2243 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2244 return orig_expr;
2245
2246 expr = copy_node (expr);
2247 TREE_OPERAND (expr, 0) = op0;
2248 if (op1)
2249 TREE_OPERAND (expr, 1) = op1;
2250
2251 /* Inside address, we might strip the top level component references,
2252 thus changing type of the expression. Handling of ADDR_EXPR
2253 will fix that. */
2254 expr = fold_convert (orig_type, expr);
2255
2256 return expr;
2257 }
2258
2259 /* Strips constant offsets from EXPR and stores them to OFFSET. */
2260
2261 static tree
2262 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2263 {
2264 HOST_WIDE_INT off;
2265 tree core = strip_offset_1 (expr, false, false, &off);
2266 *offset = off;
2267 return core;
2268 }
2269
2270 /* Returns variant of TYPE that can be used as base for different uses.
2271 We return unsigned type with the same precision, which avoids problems
2272 with overflows. */
2273
2274 static tree
2275 generic_type_for (tree type)
2276 {
2277 if (POINTER_TYPE_P (type))
2278 return unsigned_type_for (type);
2279
2280 if (TYPE_UNSIGNED (type))
2281 return type;
2282
2283 return unsigned_type_for (type);
2284 }
2285
2286 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2287 the bitmap to that we should store it. */
2288
2289 static struct ivopts_data *fd_ivopts_data;
2290 static tree
2291 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2292 {
2293 bitmap *depends_on = (bitmap *) data;
2294 struct version_info *info;
2295
2296 if (TREE_CODE (*expr_p) != SSA_NAME)
2297 return NULL_TREE;
2298 info = name_info (fd_ivopts_data, *expr_p);
2299
2300 if (!info->inv_id || info->has_nonlin_use)
2301 return NULL_TREE;
2302
2303 if (!*depends_on)
2304 *depends_on = BITMAP_ALLOC (NULL);
2305 bitmap_set_bit (*depends_on, info->inv_id);
2306
2307 return NULL_TREE;
2308 }
2309
2310 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2311 position to POS. If USE is not NULL, the candidate is set as related to
2312 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
2313 replacement of the final value of the iv by a direct computation. */
2314
2315 static struct iv_cand *
2316 add_candidate_1 (struct ivopts_data *data,
2317 tree base, tree step, bool important, enum iv_position pos,
2318 struct iv_use *use, gimple incremented_at)
2319 {
2320 unsigned i;
2321 struct iv_cand *cand = NULL;
2322 tree type, orig_type;
2323
2324 /* For non-original variables, make sure their values are computed in a type
2325 that does not invoke undefined behavior on overflows (since in general,
2326 we cannot prove that these induction variables are non-wrapping). */
2327 if (pos != IP_ORIGINAL)
2328 {
2329 orig_type = TREE_TYPE (base);
2330 type = generic_type_for (orig_type);
2331 if (type != orig_type)
2332 {
2333 base = fold_convert (type, base);
2334 step = fold_convert (type, step);
2335 }
2336 }
2337
2338 for (i = 0; i < n_iv_cands (data); i++)
2339 {
2340 cand = iv_cand (data, i);
2341
2342 if (cand->pos != pos)
2343 continue;
2344
2345 if (cand->incremented_at != incremented_at
2346 || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2347 && cand->ainc_use != use))
2348 continue;
2349
2350 if (!cand->iv)
2351 {
2352 if (!base && !step)
2353 break;
2354
2355 continue;
2356 }
2357
2358 if (!base && !step)
2359 continue;
2360
2361 if (operand_equal_p (base, cand->iv->base, 0)
2362 && operand_equal_p (step, cand->iv->step, 0)
2363 && (TYPE_PRECISION (TREE_TYPE (base))
2364 == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
2365 break;
2366 }
2367
2368 if (i == n_iv_cands (data))
2369 {
2370 cand = XCNEW (struct iv_cand);
2371 cand->id = i;
2372
2373 if (!base && !step)
2374 cand->iv = NULL;
2375 else
2376 cand->iv = alloc_iv (base, step);
2377
2378 cand->pos = pos;
2379 if (pos != IP_ORIGINAL && cand->iv)
2380 {
2381 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2382 cand->var_after = cand->var_before;
2383 }
2384 cand->important = important;
2385 cand->incremented_at = incremented_at;
2386 data->iv_candidates.safe_push (cand);
2387
2388 if (step
2389 && TREE_CODE (step) != INTEGER_CST)
2390 {
2391 fd_ivopts_data = data;
2392 walk_tree (&step, find_depends, &cand->depends_on, NULL);
2393 }
2394
2395 if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2396 cand->ainc_use = use;
2397 else
2398 cand->ainc_use = NULL;
2399
2400 if (dump_file && (dump_flags & TDF_DETAILS))
2401 dump_cand (dump_file, cand);
2402 }
2403
2404 if (important && !cand->important)
2405 {
2406 cand->important = true;
2407 if (dump_file && (dump_flags & TDF_DETAILS))
2408 fprintf (dump_file, "Candidate %d is important\n", cand->id);
2409 }
2410
2411 if (use)
2412 {
2413 bitmap_set_bit (use->related_cands, i);
2414 if (dump_file && (dump_flags & TDF_DETAILS))
2415 fprintf (dump_file, "Candidate %d is related to use %d\n",
2416 cand->id, use->id);
2417 }
2418
2419 return cand;
2420 }
2421
2422 /* Returns true if incrementing the induction variable at the end of the LOOP
2423 is allowed.
2424
2425 The purpose is to avoid splitting latch edge with a biv increment, thus
2426 creating a jump, possibly confusing other optimization passes and leaving
2427 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2428 is not available (so we do not have a better alternative), or if the latch
2429 edge is already nonempty. */
2430
2431 static bool
2432 allow_ip_end_pos_p (struct loop *loop)
2433 {
2434 if (!ip_normal_pos (loop))
2435 return true;
2436
2437 if (!empty_block_p (ip_end_pos (loop)))
2438 return true;
2439
2440 return false;
2441 }
2442
2443 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2444 Important field is set to IMPORTANT. */
2445
2446 static void
2447 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2448 bool important, struct iv_use *use)
2449 {
2450 basic_block use_bb = gimple_bb (use->stmt);
2451 machine_mode mem_mode;
2452 unsigned HOST_WIDE_INT cstepi;
2453
2454 /* If we insert the increment in any position other than the standard
2455 ones, we must ensure that it is incremented once per iteration.
2456 It must not be in an inner nested loop, or one side of an if
2457 statement. */
2458 if (use_bb->loop_father != data->current_loop
2459 || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2460 || stmt_could_throw_p (use->stmt)
2461 || !cst_and_fits_in_hwi (step))
2462 return;
2463
2464 cstepi = int_cst_value (step);
2465
2466 mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2467 if (((USE_LOAD_PRE_INCREMENT (mem_mode)
2468 || USE_STORE_PRE_INCREMENT (mem_mode))
2469 && GET_MODE_SIZE (mem_mode) == cstepi)
2470 || ((USE_LOAD_PRE_DECREMENT (mem_mode)
2471 || USE_STORE_PRE_DECREMENT (mem_mode))
2472 && GET_MODE_SIZE (mem_mode) == -cstepi))
2473 {
2474 enum tree_code code = MINUS_EXPR;
2475 tree new_base;
2476 tree new_step = step;
2477
2478 if (POINTER_TYPE_P (TREE_TYPE (base)))
2479 {
2480 new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2481 code = POINTER_PLUS_EXPR;
2482 }
2483 else
2484 new_step = fold_convert (TREE_TYPE (base), new_step);
2485 new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2486 add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2487 use->stmt);
2488 }
2489 if (((USE_LOAD_POST_INCREMENT (mem_mode)
2490 || USE_STORE_POST_INCREMENT (mem_mode))
2491 && GET_MODE_SIZE (mem_mode) == cstepi)
2492 || ((USE_LOAD_POST_DECREMENT (mem_mode)
2493 || USE_STORE_POST_DECREMENT (mem_mode))
2494 && GET_MODE_SIZE (mem_mode) == -cstepi))
2495 {
2496 add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
2497 use->stmt);
2498 }
2499 }
2500
2501 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2502 position to POS. If USE is not NULL, the candidate is set as related to
2503 it. The candidate computation is scheduled on all available positions. */
2504
2505 static void
2506 add_candidate (struct ivopts_data *data,
2507 tree base, tree step, bool important, struct iv_use *use)
2508 {
2509 if (ip_normal_pos (data->current_loop))
2510 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL);
2511 if (ip_end_pos (data->current_loop)
2512 && allow_ip_end_pos_p (data->current_loop))
2513 add_candidate_1 (data, base, step, important, IP_END, use, NULL);
2514
2515 if (use != NULL && use->type == USE_ADDRESS)
2516 add_autoinc_candidates (data, base, step, important, use);
2517 }
2518
2519 /* Adds standard iv candidates. */
2520
2521 static void
2522 add_standard_iv_candidates (struct ivopts_data *data)
2523 {
2524 add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
2525
2526 /* The same for a double-integer type if it is still fast enough. */
2527 if (TYPE_PRECISION
2528 (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
2529 && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
2530 add_candidate (data, build_int_cst (long_integer_type_node, 0),
2531 build_int_cst (long_integer_type_node, 1), true, NULL);
2532
2533 /* The same for a double-integer type if it is still fast enough. */
2534 if (TYPE_PRECISION
2535 (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
2536 && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
2537 add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
2538 build_int_cst (long_long_integer_type_node, 1), true, NULL);
2539 }
2540
2541
2542 /* Adds candidates bases on the old induction variable IV. */
2543
2544 static void
2545 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2546 {
2547 gimple phi;
2548 tree def;
2549 struct iv_cand *cand;
2550
2551 add_candidate (data, iv->base, iv->step, true, NULL);
2552
2553 /* The same, but with initial value zero. */
2554 if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
2555 add_candidate (data, size_int (0), iv->step, true, NULL);
2556 else
2557 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2558 iv->step, true, NULL);
2559
2560 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2561 if (gimple_code (phi) == GIMPLE_PHI)
2562 {
2563 /* Additionally record the possibility of leaving the original iv
2564 untouched. */
2565 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2566 /* Don't add candidate if it's from another PHI node because
2567 it's an affine iv appearing in the form of PEELED_CHREC. */
2568 phi = SSA_NAME_DEF_STMT (def);
2569 if (gimple_code (phi) != GIMPLE_PHI)
2570 {
2571 cand = add_candidate_1 (data,
2572 iv->base, iv->step, true, IP_ORIGINAL, NULL,
2573 SSA_NAME_DEF_STMT (def));
2574 cand->var_before = iv->ssa_name;
2575 cand->var_after = def;
2576 }
2577 else
2578 gcc_assert (gimple_bb (phi) == data->current_loop->header);
2579 }
2580 }
2581
2582 /* Adds candidates based on the old induction variables. */
2583
2584 static void
2585 add_old_ivs_candidates (struct ivopts_data *data)
2586 {
2587 unsigned i;
2588 struct iv *iv;
2589 bitmap_iterator bi;
2590
2591 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2592 {
2593 iv = ver_info (data, i)->iv;
2594 if (iv && iv->biv_p && !integer_zerop (iv->step))
2595 add_old_iv_candidates (data, iv);
2596 }
2597 }
2598
2599 /* Adds candidates based on the value of the induction variable IV and USE. */
2600
2601 static void
2602 add_iv_value_candidates (struct ivopts_data *data,
2603 struct iv *iv, struct iv_use *use)
2604 {
2605 unsigned HOST_WIDE_INT offset;
2606 tree base;
2607 tree basetype;
2608
2609 add_candidate (data, iv->base, iv->step, false, use);
2610
2611 /* The same, but with initial value zero. Make such variable important,
2612 since it is generic enough so that possibly many uses may be based
2613 on it. */
2614 basetype = TREE_TYPE (iv->base);
2615 if (POINTER_TYPE_P (basetype))
2616 basetype = sizetype;
2617 add_candidate (data, build_int_cst (basetype, 0),
2618 iv->step, true, use);
2619
2620 /* Third, try removing the constant offset. Make sure to even
2621 add a candidate for &a[0] vs. (T *)&a. */
2622 base = strip_offset (iv->base, &offset);
2623 if (offset
2624 || base != iv->base)
2625 add_candidate (data, base, iv->step, false, use);
2626 }
2627
2628 /* Adds candidates based on the uses. */
2629
2630 static void
2631 add_derived_ivs_candidates (struct ivopts_data *data)
2632 {
2633 unsigned i;
2634
2635 for (i = 0; i < n_iv_uses (data); i++)
2636 {
2637 struct iv_use *use = iv_use (data, i);
2638
2639 if (!use)
2640 continue;
2641
2642 switch (use->type)
2643 {
2644 case USE_NONLINEAR_EXPR:
2645 case USE_COMPARE:
2646 case USE_ADDRESS:
2647 /* Just add the ivs based on the value of the iv used here. */
2648 add_iv_value_candidates (data, use->iv, use);
2649 break;
2650
2651 default:
2652 gcc_unreachable ();
2653 }
2654 }
2655 }
2656
2657 /* Record important candidates and add them to related_cands bitmaps
2658 if needed. */
2659
2660 static void
2661 record_important_candidates (struct ivopts_data *data)
2662 {
2663 unsigned i;
2664 struct iv_use *use;
2665
2666 for (i = 0; i < n_iv_cands (data); i++)
2667 {
2668 struct iv_cand *cand = iv_cand (data, i);
2669
2670 if (cand->important)
2671 bitmap_set_bit (data->important_candidates, i);
2672 }
2673
2674 data->consider_all_candidates = (n_iv_cands (data)
2675 <= CONSIDER_ALL_CANDIDATES_BOUND);
2676
2677 if (data->consider_all_candidates)
2678 {
2679 /* We will not need "related_cands" bitmaps in this case,
2680 so release them to decrease peak memory consumption. */
2681 for (i = 0; i < n_iv_uses (data); i++)
2682 {
2683 use = iv_use (data, i);
2684 BITMAP_FREE (use->related_cands);
2685 }
2686 }
2687 else
2688 {
2689 /* Add important candidates to the related_cands bitmaps. */
2690 for (i = 0; i < n_iv_uses (data); i++)
2691 bitmap_ior_into (iv_use (data, i)->related_cands,
2692 data->important_candidates);
2693 }
2694 }
2695
2696 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2697 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2698 we allocate a simple list to every use. */
2699
2700 static void
2701 alloc_use_cost_map (struct ivopts_data *data)
2702 {
2703 unsigned i, size, s;
2704
2705 for (i = 0; i < n_iv_uses (data); i++)
2706 {
2707 struct iv_use *use = iv_use (data, i);
2708
2709 if (data->consider_all_candidates)
2710 size = n_iv_cands (data);
2711 else
2712 {
2713 s = bitmap_count_bits (use->related_cands);
2714
2715 /* Round up to the power of two, so that moduling by it is fast. */
2716 size = s ? (1 << ceil_log2 (s)) : 1;
2717 }
2718
2719 use->n_map_members = size;
2720 use->cost_map = XCNEWVEC (struct cost_pair, size);
2721 }
2722 }
2723
2724 /* Returns description of computation cost of expression whose runtime
2725 cost is RUNTIME and complexity corresponds to COMPLEXITY. */
2726
2727 static comp_cost
2728 new_cost (unsigned runtime, unsigned complexity)
2729 {
2730 comp_cost cost;
2731
2732 cost.cost = runtime;
2733 cost.complexity = complexity;
2734
2735 return cost;
2736 }
2737
2738 /* Adds costs COST1 and COST2. */
2739
2740 static comp_cost
2741 add_costs (comp_cost cost1, comp_cost cost2)
2742 {
2743 cost1.cost += cost2.cost;
2744 cost1.complexity += cost2.complexity;
2745
2746 return cost1;
2747 }
2748 /* Subtracts costs COST1 and COST2. */
2749
2750 static comp_cost
2751 sub_costs (comp_cost cost1, comp_cost cost2)
2752 {
2753 cost1.cost -= cost2.cost;
2754 cost1.complexity -= cost2.complexity;
2755
2756 return cost1;
2757 }
2758
2759 /* Returns a negative number if COST1 < COST2, a positive number if
2760 COST1 > COST2, and 0 if COST1 = COST2. */
2761
2762 static int
2763 compare_costs (comp_cost cost1, comp_cost cost2)
2764 {
2765 if (cost1.cost == cost2.cost)
2766 return cost1.complexity - cost2.complexity;
2767
2768 return cost1.cost - cost2.cost;
2769 }
2770
2771 /* Returns true if COST is infinite. */
2772
2773 static bool
2774 infinite_cost_p (comp_cost cost)
2775 {
2776 return cost.cost == INFTY;
2777 }
2778
2779 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2780 on invariants DEPENDS_ON and that the value used in expressing it
2781 is VALUE, and in case of iv elimination the comparison operator is COMP. */
2782
2783 static void
2784 set_use_iv_cost (struct ivopts_data *data,
2785 struct iv_use *use, struct iv_cand *cand,
2786 comp_cost cost, bitmap depends_on, tree value,
2787 enum tree_code comp, int inv_expr_id)
2788 {
2789 unsigned i, s;
2790
2791 if (infinite_cost_p (cost))
2792 {
2793 BITMAP_FREE (depends_on);
2794 return;
2795 }
2796
2797 if (data->consider_all_candidates)
2798 {
2799 use->cost_map[cand->id].cand = cand;
2800 use->cost_map[cand->id].cost = cost;
2801 use->cost_map[cand->id].depends_on = depends_on;
2802 use->cost_map[cand->id].value = value;
2803 use->cost_map[cand->id].comp = comp;
2804 use->cost_map[cand->id].inv_expr_id = inv_expr_id;
2805 return;
2806 }
2807
2808 /* n_map_members is a power of two, so this computes modulo. */
2809 s = cand->id & (use->n_map_members - 1);
2810 for (i = s; i < use->n_map_members; i++)
2811 if (!use->cost_map[i].cand)
2812 goto found;
2813 for (i = 0; i < s; i++)
2814 if (!use->cost_map[i].cand)
2815 goto found;
2816
2817 gcc_unreachable ();
2818
2819 found:
2820 use->cost_map[i].cand = cand;
2821 use->cost_map[i].cost = cost;
2822 use->cost_map[i].depends_on = depends_on;
2823 use->cost_map[i].value = value;
2824 use->cost_map[i].comp = comp;
2825 use->cost_map[i].inv_expr_id = inv_expr_id;
2826 }
2827
2828 /* Gets cost of (USE, CANDIDATE) pair. */
2829
2830 static struct cost_pair *
2831 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2832 struct iv_cand *cand)
2833 {
2834 unsigned i, s;
2835 struct cost_pair *ret;
2836
2837 if (!cand)
2838 return NULL;
2839
2840 if (data->consider_all_candidates)
2841 {
2842 ret = use->cost_map + cand->id;
2843 if (!ret->cand)
2844 return NULL;
2845
2846 return ret;
2847 }
2848
2849 /* n_map_members is a power of two, so this computes modulo. */
2850 s = cand->id & (use->n_map_members - 1);
2851 for (i = s; i < use->n_map_members; i++)
2852 if (use->cost_map[i].cand == cand)
2853 return use->cost_map + i;
2854 else if (use->cost_map[i].cand == NULL)
2855 return NULL;
2856 for (i = 0; i < s; i++)
2857 if (use->cost_map[i].cand == cand)
2858 return use->cost_map + i;
2859 else if (use->cost_map[i].cand == NULL)
2860 return NULL;
2861
2862 return NULL;
2863 }
2864
2865 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2866 static rtx
2867 produce_memory_decl_rtl (tree obj, int *regno)
2868 {
2869 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
2870 machine_mode address_mode = targetm.addr_space.address_mode (as);
2871 rtx x;
2872
2873 gcc_assert (obj);
2874 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2875 {
2876 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2877 x = gen_rtx_SYMBOL_REF (address_mode, name);
2878 SET_SYMBOL_REF_DECL (x, obj);
2879 x = gen_rtx_MEM (DECL_MODE (obj), x);
2880 set_mem_addr_space (x, as);
2881 targetm.encode_section_info (obj, x, true);
2882 }
2883 else
2884 {
2885 x = gen_raw_REG (address_mode, (*regno)++);
2886 x = gen_rtx_MEM (DECL_MODE (obj), x);
2887 set_mem_addr_space (x, as);
2888 }
2889
2890 return x;
2891 }
2892
2893 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2894 walk_tree. DATA contains the actual fake register number. */
2895
2896 static tree
2897 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2898 {
2899 tree obj = NULL_TREE;
2900 rtx x = NULL_RTX;
2901 int *regno = (int *) data;
2902
2903 switch (TREE_CODE (*expr_p))
2904 {
2905 case ADDR_EXPR:
2906 for (expr_p = &TREE_OPERAND (*expr_p, 0);
2907 handled_component_p (*expr_p);
2908 expr_p = &TREE_OPERAND (*expr_p, 0))
2909 continue;
2910 obj = *expr_p;
2911 if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
2912 x = produce_memory_decl_rtl (obj, regno);
2913 break;
2914
2915 case SSA_NAME:
2916 *ws = 0;
2917 obj = SSA_NAME_VAR (*expr_p);
2918 /* Defer handling of anonymous SSA_NAMEs to the expander. */
2919 if (!obj)
2920 return NULL_TREE;
2921 if (!DECL_RTL_SET_P (obj))
2922 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2923 break;
2924
2925 case VAR_DECL:
2926 case PARM_DECL:
2927 case RESULT_DECL:
2928 *ws = 0;
2929 obj = *expr_p;
2930
2931 if (DECL_RTL_SET_P (obj))
2932 break;
2933
2934 if (DECL_MODE (obj) == BLKmode)
2935 x = produce_memory_decl_rtl (obj, regno);
2936 else
2937 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2938
2939 break;
2940
2941 default:
2942 break;
2943 }
2944
2945 if (x)
2946 {
2947 decl_rtl_to_reset.safe_push (obj);
2948 SET_DECL_RTL (obj, x);
2949 }
2950
2951 return NULL_TREE;
2952 }
2953
2954 /* Determines cost of the computation of EXPR. */
2955
2956 static unsigned
2957 computation_cost (tree expr, bool speed)
2958 {
2959 rtx_insn *seq;
2960 rtx rslt;
2961 tree type = TREE_TYPE (expr);
2962 unsigned cost;
2963 /* Avoid using hard regs in ways which may be unsupported. */
2964 int regno = LAST_VIRTUAL_REGISTER + 1;
2965 struct cgraph_node *node = cgraph_node::get (current_function_decl);
2966 enum node_frequency real_frequency = node->frequency;
2967
2968 node->frequency = NODE_FREQUENCY_NORMAL;
2969 crtl->maybe_hot_insn_p = speed;
2970 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2971 start_sequence ();
2972 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2973 seq = get_insns ();
2974 end_sequence ();
2975 default_rtl_profile ();
2976 node->frequency = real_frequency;
2977
2978 cost = seq_cost (seq, speed);
2979 if (MEM_P (rslt))
2980 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
2981 TYPE_ADDR_SPACE (type), speed);
2982 else if (!REG_P (rslt))
2983 cost += set_src_cost (rslt, speed);
2984
2985 return cost;
2986 }
2987
2988 /* Returns variable containing the value of candidate CAND at statement AT. */
2989
2990 static tree
2991 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple stmt)
2992 {
2993 if (stmt_after_increment (loop, cand, stmt))
2994 return cand->var_after;
2995 else
2996 return cand->var_before;
2997 }
2998
2999 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
3000 same precision that is at least as wide as the precision of TYPE, stores
3001 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
3002 type of A and B. */
3003
3004 static tree
3005 determine_common_wider_type (tree *a, tree *b)
3006 {
3007 tree wider_type = NULL;
3008 tree suba, subb;
3009 tree atype = TREE_TYPE (*a);
3010
3011 if (CONVERT_EXPR_P (*a))
3012 {
3013 suba = TREE_OPERAND (*a, 0);
3014 wider_type = TREE_TYPE (suba);
3015 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
3016 return atype;
3017 }
3018 else
3019 return atype;
3020
3021 if (CONVERT_EXPR_P (*b))
3022 {
3023 subb = TREE_OPERAND (*b, 0);
3024 if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
3025 return atype;
3026 }
3027 else
3028 return atype;
3029
3030 *a = suba;
3031 *b = subb;
3032 return wider_type;
3033 }
3034
3035 /* Determines the expression by that USE is expressed from induction variable
3036 CAND at statement AT in LOOP. The expression is stored in a decomposed
3037 form into AFF. Returns false if USE cannot be expressed using CAND. */
3038
3039 static bool
3040 get_computation_aff (struct loop *loop,
3041 struct iv_use *use, struct iv_cand *cand, gimple at,
3042 struct aff_tree *aff)
3043 {
3044 tree ubase = use->iv->base;
3045 tree ustep = use->iv->step;
3046 tree cbase = cand->iv->base;
3047 tree cstep = cand->iv->step, cstep_common;
3048 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3049 tree common_type, var;
3050 tree uutype;
3051 aff_tree cbase_aff, var_aff;
3052 widest_int rat;
3053
3054 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3055 {
3056 /* We do not have a precision to express the values of use. */
3057 return false;
3058 }
3059
3060 var = var_at_stmt (loop, cand, at);
3061 uutype = unsigned_type_for (utype);
3062
3063 /* If the conversion is not noop, perform it. */
3064 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3065 {
3066 cstep = fold_convert (uutype, cstep);
3067 cbase = fold_convert (uutype, cbase);
3068 var = fold_convert (uutype, var);
3069 }
3070
3071 if (!constant_multiple_of (ustep, cstep, &rat))
3072 return false;
3073
3074 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3075 type, we achieve better folding by computing their difference in this
3076 wider type, and cast the result to UUTYPE. We do not need to worry about
3077 overflows, as all the arithmetics will in the end be performed in UUTYPE
3078 anyway. */
3079 common_type = determine_common_wider_type (&ubase, &cbase);
3080
3081 /* use = ubase - ratio * cbase + ratio * var. */
3082 tree_to_aff_combination (ubase, common_type, aff);
3083 tree_to_aff_combination (cbase, common_type, &cbase_aff);
3084 tree_to_aff_combination (var, uutype, &var_aff);
3085
3086 /* We need to shift the value if we are after the increment. */
3087 if (stmt_after_increment (loop, cand, at))
3088 {
3089 aff_tree cstep_aff;
3090
3091 if (common_type != uutype)
3092 cstep_common = fold_convert (common_type, cstep);
3093 else
3094 cstep_common = cstep;
3095
3096 tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
3097 aff_combination_add (&cbase_aff, &cstep_aff);
3098 }
3099
3100 aff_combination_scale (&cbase_aff, -rat);
3101 aff_combination_add (aff, &cbase_aff);
3102 if (common_type != uutype)
3103 aff_combination_convert (aff, uutype);
3104
3105 aff_combination_scale (&var_aff, rat);
3106 aff_combination_add (aff, &var_aff);
3107
3108 return true;
3109 }
3110
3111 /* Return the type of USE. */
3112
3113 static tree
3114 get_use_type (struct iv_use *use)
3115 {
3116 tree base_type = TREE_TYPE (use->iv->base);
3117 tree type;
3118
3119 if (use->type == USE_ADDRESS)
3120 {
3121 /* The base_type may be a void pointer. Create a pointer type based on
3122 the mem_ref instead. */
3123 type = build_pointer_type (TREE_TYPE (*use->op_p));
3124 gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
3125 == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
3126 }
3127 else
3128 type = base_type;
3129
3130 return type;
3131 }
3132
3133 /* Determines the expression by that USE is expressed from induction variable
3134 CAND at statement AT in LOOP. The computation is unshared. */
3135
3136 static tree
3137 get_computation_at (struct loop *loop,
3138 struct iv_use *use, struct iv_cand *cand, gimple at)
3139 {
3140 aff_tree aff;
3141 tree type = get_use_type (use);
3142
3143 if (!get_computation_aff (loop, use, cand, at, &aff))
3144 return NULL_TREE;
3145 unshare_aff_combination (&aff);
3146 return fold_convert (type, aff_combination_to_tree (&aff));
3147 }
3148
3149 /* Determines the expression by that USE is expressed from induction variable
3150 CAND in LOOP. The computation is unshared. */
3151
3152 static tree
3153 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3154 {
3155 return get_computation_at (loop, use, cand, use->stmt);
3156 }
3157
3158 /* Adjust the cost COST for being in loop setup rather than loop body.
3159 If we're optimizing for space, the loop setup overhead is constant;
3160 if we're optimizing for speed, amortize it over the per-iteration cost. */
3161 static unsigned
3162 adjust_setup_cost (struct ivopts_data *data, unsigned cost)
3163 {
3164 if (cost == INFTY)
3165 return cost;
3166 else if (optimize_loop_for_speed_p (data->current_loop))
3167 return cost / avg_loop_niter (data->current_loop);
3168 else
3169 return cost;
3170 }
3171
3172 /* Returns true if multiplying by RATIO is allowed in an address. Test the
3173 validity for a memory reference accessing memory of mode MODE in
3174 address space AS. */
3175
3176
3177 bool
3178 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, machine_mode mode,
3179 addr_space_t as)
3180 {
3181 #define MAX_RATIO 128
3182 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3183 static vec<sbitmap> valid_mult_list;
3184 sbitmap valid_mult;
3185
3186 if (data_index >= valid_mult_list.length ())
3187 valid_mult_list.safe_grow_cleared (data_index + 1);
3188
3189 valid_mult = valid_mult_list[data_index];
3190 if (!valid_mult)
3191 {
3192 machine_mode address_mode = targetm.addr_space.address_mode (as);
3193 rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3194 rtx reg2 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3195 rtx addr, scaled;
3196 HOST_WIDE_INT i;
3197
3198 valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3199 bitmap_clear (valid_mult);
3200 scaled = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3201 addr = gen_rtx_fmt_ee (PLUS, address_mode, scaled, reg2);
3202 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3203 {
3204 XEXP (scaled, 1) = gen_int_mode (i, address_mode);
3205 if (memory_address_addr_space_p (mode, addr, as)
3206 || memory_address_addr_space_p (mode, scaled, as))
3207 bitmap_set_bit (valid_mult, i + MAX_RATIO);
3208 }
3209
3210 if (dump_file && (dump_flags & TDF_DETAILS))
3211 {
3212 fprintf (dump_file, " allowed multipliers:");
3213 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3214 if (bitmap_bit_p (valid_mult, i + MAX_RATIO))
3215 fprintf (dump_file, " %d", (int) i);
3216 fprintf (dump_file, "\n");
3217 fprintf (dump_file, "\n");
3218 }
3219
3220 valid_mult_list[data_index] = valid_mult;
3221 }
3222
3223 if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3224 return false;
3225
3226 return bitmap_bit_p (valid_mult, ratio + MAX_RATIO);
3227 }
3228
3229 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3230 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3231 variable is omitted. Compute the cost for a memory reference that accesses
3232 a memory location of mode MEM_MODE in address space AS.
3233
3234 MAY_AUTOINC is set to true if the autoincrement (increasing index by
3235 size of MEM_MODE / RATIO) is available. To make this determination, we
3236 look at the size of the increment to be made, which is given in CSTEP.
3237 CSTEP may be zero if the step is unknown.
3238 STMT_AFTER_INC is true iff the statement we're looking at is after the
3239 increment of the original biv.
3240
3241 TODO -- there must be some better way. This all is quite crude. */
3242
3243 enum ainc_type
3244 {
3245 AINC_PRE_INC, /* Pre increment. */
3246 AINC_PRE_DEC, /* Pre decrement. */
3247 AINC_POST_INC, /* Post increment. */
3248 AINC_POST_DEC, /* Post decrement. */
3249 AINC_NONE /* Also the number of auto increment types. */
3250 };
3251
3252 typedef struct address_cost_data_s
3253 {
3254 HOST_WIDE_INT min_offset, max_offset;
3255 unsigned costs[2][2][2][2];
3256 unsigned ainc_costs[AINC_NONE];
3257 } *address_cost_data;
3258
3259
3260 static comp_cost
3261 get_address_cost (bool symbol_present, bool var_present,
3262 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3263 HOST_WIDE_INT cstep, machine_mode mem_mode,
3264 addr_space_t as, bool speed,
3265 bool stmt_after_inc, bool *may_autoinc)
3266 {
3267 machine_mode address_mode = targetm.addr_space.address_mode (as);
3268 static vec<address_cost_data> address_cost_data_list;
3269 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3270 address_cost_data data;
3271 static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3272 static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3273 unsigned cost, acost, complexity;
3274 enum ainc_type autoinc_type;
3275 bool offset_p, ratio_p, autoinc;
3276 HOST_WIDE_INT s_offset, autoinc_offset, msize;
3277 unsigned HOST_WIDE_INT mask;
3278 unsigned bits;
3279
3280 if (data_index >= address_cost_data_list.length ())
3281 address_cost_data_list.safe_grow_cleared (data_index + 1);
3282
3283 data = address_cost_data_list[data_index];
3284 if (!data)
3285 {
3286 HOST_WIDE_INT i;
3287 HOST_WIDE_INT rat, off = 0;
3288 int old_cse_not_expected, width;
3289 unsigned sym_p, var_p, off_p, rat_p, add_c;
3290 rtx_insn *seq;
3291 rtx addr, base;
3292 rtx reg0, reg1;
3293
3294 data = (address_cost_data) xcalloc (1, sizeof (*data));
3295
3296 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3297
3298 width = GET_MODE_BITSIZE (address_mode) - 1;
3299 if (width > (HOST_BITS_PER_WIDE_INT - 1))
3300 width = HOST_BITS_PER_WIDE_INT - 1;
3301 addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
3302
3303 for (i = width; i >= 0; i--)
3304 {
3305 off = -((unsigned HOST_WIDE_INT) 1 << i);
3306 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3307 if (memory_address_addr_space_p (mem_mode, addr, as))
3308 break;
3309 }
3310 data->min_offset = (i == -1? 0 : off);
3311
3312 for (i = width; i >= 0; i--)
3313 {
3314 off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
3315 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3316 if (memory_address_addr_space_p (mem_mode, addr, as))
3317 break;
3318 /* For some TARGET, like ARM THUMB1, the offset should be nature
3319 aligned. Try an aligned offset if address_mode is not QImode. */
3320 off = (address_mode == QImode)
3321 ? 0
3322 : ((unsigned HOST_WIDE_INT) 1 << i)
3323 - GET_MODE_SIZE (address_mode);
3324 if (off > 0)
3325 {
3326 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3327 if (memory_address_addr_space_p (mem_mode, addr, as))
3328 break;
3329 }
3330 }
3331 if (i == -1)
3332 off = 0;
3333 data->max_offset = off;
3334
3335 if (dump_file && (dump_flags & TDF_DETAILS))
3336 {
3337 fprintf (dump_file, "get_address_cost:\n");
3338 fprintf (dump_file, " min offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3339 GET_MODE_NAME (mem_mode),
3340 data->min_offset);
3341 fprintf (dump_file, " max offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3342 GET_MODE_NAME (mem_mode),
3343 data->max_offset);
3344 }
3345
3346 rat = 1;
3347 for (i = 2; i <= MAX_RATIO; i++)
3348 if (multiplier_allowed_in_address_p (i, mem_mode, as))
3349 {
3350 rat = i;
3351 break;
3352 }
3353
3354 /* Compute the cost of various addressing modes. */
3355 acost = 0;
3356 reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3357 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3358
3359 if (USE_LOAD_PRE_DECREMENT (mem_mode)
3360 || USE_STORE_PRE_DECREMENT (mem_mode))
3361 {
3362 addr = gen_rtx_PRE_DEC (address_mode, reg0);
3363 has_predec[mem_mode]
3364 = memory_address_addr_space_p (mem_mode, addr, as);
3365
3366 if (has_predec[mem_mode])
3367 data->ainc_costs[AINC_PRE_DEC]
3368 = address_cost (addr, mem_mode, as, speed);
3369 }
3370 if (USE_LOAD_POST_DECREMENT (mem_mode)
3371 || USE_STORE_POST_DECREMENT (mem_mode))
3372 {
3373 addr = gen_rtx_POST_DEC (address_mode, reg0);
3374 has_postdec[mem_mode]
3375 = memory_address_addr_space_p (mem_mode, addr, as);
3376
3377 if (has_postdec[mem_mode])
3378 data->ainc_costs[AINC_POST_DEC]
3379 = address_cost (addr, mem_mode, as, speed);
3380 }
3381 if (USE_LOAD_PRE_INCREMENT (mem_mode)
3382 || USE_STORE_PRE_DECREMENT (mem_mode))
3383 {
3384 addr = gen_rtx_PRE_INC (address_mode, reg0);
3385 has_preinc[mem_mode]
3386 = memory_address_addr_space_p (mem_mode, addr, as);
3387
3388 if (has_preinc[mem_mode])
3389 data->ainc_costs[AINC_PRE_INC]
3390 = address_cost (addr, mem_mode, as, speed);
3391 }
3392 if (USE_LOAD_POST_INCREMENT (mem_mode)
3393 || USE_STORE_POST_INCREMENT (mem_mode))
3394 {
3395 addr = gen_rtx_POST_INC (address_mode, reg0);
3396 has_postinc[mem_mode]
3397 = memory_address_addr_space_p (mem_mode, addr, as);
3398
3399 if (has_postinc[mem_mode])
3400 data->ainc_costs[AINC_POST_INC]
3401 = address_cost (addr, mem_mode, as, speed);
3402 }
3403 for (i = 0; i < 16; i++)
3404 {
3405 sym_p = i & 1;
3406 var_p = (i >> 1) & 1;
3407 off_p = (i >> 2) & 1;
3408 rat_p = (i >> 3) & 1;
3409
3410 addr = reg0;
3411 if (rat_p)
3412 addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
3413 gen_int_mode (rat, address_mode));
3414
3415 if (var_p)
3416 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
3417
3418 if (sym_p)
3419 {
3420 base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
3421 /* ??? We can run into trouble with some backends by presenting
3422 it with symbols which haven't been properly passed through
3423 targetm.encode_section_info. By setting the local bit, we
3424 enhance the probability of things working. */
3425 SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3426
3427 if (off_p)
3428 base = gen_rtx_fmt_e (CONST, address_mode,
3429 gen_rtx_fmt_ee
3430 (PLUS, address_mode, base,
3431 gen_int_mode (off, address_mode)));
3432 }
3433 else if (off_p)
3434 base = gen_int_mode (off, address_mode);
3435 else
3436 base = NULL_RTX;
3437
3438 if (base)
3439 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
3440
3441 start_sequence ();
3442 /* To avoid splitting addressing modes, pretend that no cse will
3443 follow. */
3444 old_cse_not_expected = cse_not_expected;
3445 cse_not_expected = true;
3446 addr = memory_address_addr_space (mem_mode, addr, as);
3447 cse_not_expected = old_cse_not_expected;
3448 seq = get_insns ();
3449 end_sequence ();
3450
3451 acost = seq_cost (seq, speed);
3452 acost += address_cost (addr, mem_mode, as, speed);
3453
3454 if (!acost)
3455 acost = 1;
3456 data->costs[sym_p][var_p][off_p][rat_p] = acost;
3457 }
3458
3459 /* On some targets, it is quite expensive to load symbol to a register,
3460 which makes addresses that contain symbols look much more expensive.
3461 However, the symbol will have to be loaded in any case before the
3462 loop (and quite likely we have it in register already), so it does not
3463 make much sense to penalize them too heavily. So make some final
3464 tweaks for the SYMBOL_PRESENT modes:
3465
3466 If VAR_PRESENT is false, and the mode obtained by changing symbol to
3467 var is cheaper, use this mode with small penalty.
3468 If VAR_PRESENT is true, try whether the mode with
3469 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3470 if this is the case, use it. */
3471 add_c = add_cost (speed, address_mode);
3472 for (i = 0; i < 8; i++)
3473 {
3474 var_p = i & 1;
3475 off_p = (i >> 1) & 1;
3476 rat_p = (i >> 2) & 1;
3477
3478 acost = data->costs[0][1][off_p][rat_p] + 1;
3479 if (var_p)
3480 acost += add_c;
3481
3482 if (acost < data->costs[1][var_p][off_p][rat_p])
3483 data->costs[1][var_p][off_p][rat_p] = acost;
3484 }
3485
3486 if (dump_file && (dump_flags & TDF_DETAILS))
3487 {
3488 fprintf (dump_file, "Address costs:\n");
3489
3490 for (i = 0; i < 16; i++)
3491 {
3492 sym_p = i & 1;
3493 var_p = (i >> 1) & 1;
3494 off_p = (i >> 2) & 1;
3495 rat_p = (i >> 3) & 1;
3496
3497 fprintf (dump_file, " ");
3498 if (sym_p)
3499 fprintf (dump_file, "sym + ");
3500 if (var_p)
3501 fprintf (dump_file, "var + ");
3502 if (off_p)
3503 fprintf (dump_file, "cst + ");
3504 if (rat_p)
3505 fprintf (dump_file, "rat * ");
3506
3507 acost = data->costs[sym_p][var_p][off_p][rat_p];
3508 fprintf (dump_file, "index costs %d\n", acost);
3509 }
3510 if (has_predec[mem_mode] || has_postdec[mem_mode]
3511 || has_preinc[mem_mode] || has_postinc[mem_mode])
3512 fprintf (dump_file, " May include autoinc/dec\n");
3513 fprintf (dump_file, "\n");
3514 }
3515
3516 address_cost_data_list[data_index] = data;
3517 }
3518
3519 bits = GET_MODE_BITSIZE (address_mode);
3520 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3521 offset &= mask;
3522 if ((offset >> (bits - 1) & 1))
3523 offset |= ~mask;
3524 s_offset = offset;
3525
3526 autoinc = false;
3527 autoinc_type = AINC_NONE;
3528 msize = GET_MODE_SIZE (mem_mode);
3529 autoinc_offset = offset;
3530 if (stmt_after_inc)
3531 autoinc_offset += ratio * cstep;
3532 if (symbol_present || var_present || ratio != 1)
3533 autoinc = false;
3534 else
3535 {
3536 if (has_postinc[mem_mode] && autoinc_offset == 0
3537 && msize == cstep)
3538 autoinc_type = AINC_POST_INC;
3539 else if (has_postdec[mem_mode] && autoinc_offset == 0
3540 && msize == -cstep)
3541 autoinc_type = AINC_POST_DEC;
3542 else if (has_preinc[mem_mode] && autoinc_offset == msize
3543 && msize == cstep)
3544 autoinc_type = AINC_PRE_INC;
3545 else if (has_predec[mem_mode] && autoinc_offset == -msize
3546 && msize == -cstep)
3547 autoinc_type = AINC_PRE_DEC;
3548
3549 if (autoinc_type != AINC_NONE)
3550 autoinc = true;
3551 }
3552
3553 cost = 0;
3554 offset_p = (s_offset != 0
3555 && data->min_offset <= s_offset
3556 && s_offset <= data->max_offset);
3557 ratio_p = (ratio != 1
3558 && multiplier_allowed_in_address_p (ratio, mem_mode, as));
3559
3560 if (ratio != 1 && !ratio_p)
3561 cost += mult_by_coeff_cost (ratio, address_mode, speed);
3562
3563 if (s_offset && !offset_p && !symbol_present)
3564 cost += add_cost (speed, address_mode);
3565
3566 if (may_autoinc)
3567 *may_autoinc = autoinc;
3568 if (autoinc)
3569 acost = data->ainc_costs[autoinc_type];
3570 else
3571 acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
3572 complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3573 return new_cost (cost + acost, complexity);
3574 }
3575
3576 /* Calculate the SPEED or size cost of shiftadd EXPR in MODE. MULT is the
3577 the EXPR operand holding the shift. COST0 and COST1 are the costs for
3578 calculating the operands of EXPR. Returns true if successful, and returns
3579 the cost in COST. */
3580
3581 static bool
3582 get_shiftadd_cost (tree expr, machine_mode mode, comp_cost cost0,
3583 comp_cost cost1, tree mult, bool speed, comp_cost *cost)
3584 {
3585 comp_cost res;
3586 tree op1 = TREE_OPERAND (expr, 1);
3587 tree cst = TREE_OPERAND (mult, 1);
3588 tree multop = TREE_OPERAND (mult, 0);
3589 int m = exact_log2 (int_cst_value (cst));
3590 int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
3591 int sa_cost;
3592 bool equal_p = false;
3593
3594 if (!(m >= 0 && m < maxm))
3595 return false;
3596
3597 if (operand_equal_p (op1, mult, 0))
3598 equal_p = true;
3599
3600 sa_cost = (TREE_CODE (expr) != MINUS_EXPR
3601 ? shiftadd_cost (speed, mode, m)
3602 : (equal_p
3603 ? shiftsub1_cost (speed, mode, m)
3604 : shiftsub0_cost (speed, mode, m)));
3605 res = new_cost (sa_cost, 0);
3606 res = add_costs (res, equal_p ? cost0 : cost1);
3607
3608 STRIP_NOPS (multop);
3609 if (!is_gimple_val (multop))
3610 res = add_costs (res, force_expr_to_var_cost (multop, speed));
3611
3612 *cost = res;
3613 return true;
3614 }
3615
3616 /* Estimates cost of forcing expression EXPR into a variable. */
3617
3618 static comp_cost
3619 force_expr_to_var_cost (tree expr, bool speed)
3620 {
3621 static bool costs_initialized = false;
3622 static unsigned integer_cost [2];
3623 static unsigned symbol_cost [2];
3624 static unsigned address_cost [2];
3625 tree op0, op1;
3626 comp_cost cost0, cost1, cost;
3627 machine_mode mode;
3628
3629 if (!costs_initialized)
3630 {
3631 tree type = build_pointer_type (integer_type_node);
3632 tree var, addr;
3633 rtx x;
3634 int i;
3635
3636 var = create_tmp_var_raw (integer_type_node, "test_var");
3637 TREE_STATIC (var) = 1;
3638 x = produce_memory_decl_rtl (var, NULL);
3639 SET_DECL_RTL (var, x);
3640
3641 addr = build1 (ADDR_EXPR, type, var);
3642
3643
3644 for (i = 0; i < 2; i++)
3645 {
3646 integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
3647 2000), i);
3648
3649 symbol_cost[i] = computation_cost (addr, i) + 1;
3650
3651 address_cost[i]
3652 = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
3653 if (dump_file && (dump_flags & TDF_DETAILS))
3654 {
3655 fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
3656 fprintf (dump_file, " integer %d\n", (int) integer_cost[i]);
3657 fprintf (dump_file, " symbol %d\n", (int) symbol_cost[i]);
3658 fprintf (dump_file, " address %d\n", (int) address_cost[i]);
3659 fprintf (dump_file, " other %d\n", (int) target_spill_cost[i]);
3660 fprintf (dump_file, "\n");
3661 }
3662 }
3663
3664 costs_initialized = true;
3665 }
3666
3667 STRIP_NOPS (expr);
3668
3669 if (SSA_VAR_P (expr))
3670 return no_cost;
3671
3672 if (is_gimple_min_invariant (expr))
3673 {
3674 if (TREE_CODE (expr) == INTEGER_CST)
3675 return new_cost (integer_cost [speed], 0);
3676
3677 if (TREE_CODE (expr) == ADDR_EXPR)
3678 {
3679 tree obj = TREE_OPERAND (expr, 0);
3680
3681 if (TREE_CODE (obj) == VAR_DECL
3682 || TREE_CODE (obj) == PARM_DECL
3683 || TREE_CODE (obj) == RESULT_DECL)
3684 return new_cost (symbol_cost [speed], 0);
3685 }
3686
3687 return new_cost (address_cost [speed], 0);
3688 }
3689
3690 switch (TREE_CODE (expr))
3691 {
3692 case POINTER_PLUS_EXPR:
3693 case PLUS_EXPR:
3694 case MINUS_EXPR:
3695 case MULT_EXPR:
3696 op0 = TREE_OPERAND (expr, 0);
3697 op1 = TREE_OPERAND (expr, 1);
3698 STRIP_NOPS (op0);
3699 STRIP_NOPS (op1);
3700 break;
3701
3702 CASE_CONVERT:
3703 case NEGATE_EXPR:
3704 op0 = TREE_OPERAND (expr, 0);
3705 STRIP_NOPS (op0);
3706 op1 = NULL_TREE;
3707 break;
3708
3709 default:
3710 /* Just an arbitrary value, FIXME. */
3711 return new_cost (target_spill_cost[speed], 0);
3712 }
3713
3714 if (op0 == NULL_TREE
3715 || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
3716 cost0 = no_cost;
3717 else
3718 cost0 = force_expr_to_var_cost (op0, speed);
3719
3720 if (op1 == NULL_TREE
3721 || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
3722 cost1 = no_cost;
3723 else
3724 cost1 = force_expr_to_var_cost (op1, speed);
3725
3726 mode = TYPE_MODE (TREE_TYPE (expr));
3727 switch (TREE_CODE (expr))
3728 {
3729 case POINTER_PLUS_EXPR:
3730 case PLUS_EXPR:
3731 case MINUS_EXPR:
3732 case NEGATE_EXPR:
3733 cost = new_cost (add_cost (speed, mode), 0);
3734 if (TREE_CODE (expr) != NEGATE_EXPR)
3735 {
3736 tree mult = NULL_TREE;
3737 comp_cost sa_cost;
3738 if (TREE_CODE (op1) == MULT_EXPR)
3739 mult = op1;
3740 else if (TREE_CODE (op0) == MULT_EXPR)
3741 mult = op0;
3742
3743 if (mult != NULL_TREE
3744 && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
3745 && get_shiftadd_cost (expr, mode, cost0, cost1, mult,
3746 speed, &sa_cost))
3747 return sa_cost;
3748 }
3749 break;
3750
3751 CASE_CONVERT:
3752 {
3753 tree inner_mode, outer_mode;
3754 outer_mode = TREE_TYPE (expr);
3755 inner_mode = TREE_TYPE (op0);
3756 cost = new_cost (convert_cost (TYPE_MODE (outer_mode),
3757 TYPE_MODE (inner_mode), speed), 0);
3758 }
3759 break;
3760
3761 case MULT_EXPR:
3762 if (cst_and_fits_in_hwi (op0))
3763 cost = new_cost (mult_by_coeff_cost (int_cst_value (op0),
3764 mode, speed), 0);
3765 else if (cst_and_fits_in_hwi (op1))
3766 cost = new_cost (mult_by_coeff_cost (int_cst_value (op1),
3767 mode, speed), 0);
3768 else
3769 return new_cost (target_spill_cost [speed], 0);
3770 break;
3771
3772 default:
3773 gcc_unreachable ();
3774 }
3775
3776 cost = add_costs (cost, cost0);
3777 cost = add_costs (cost, cost1);
3778
3779 /* Bound the cost by target_spill_cost. The parts of complicated
3780 computations often are either loop invariant or at least can
3781 be shared between several iv uses, so letting this grow without
3782 limits would not give reasonable results. */
3783 if (cost.cost > (int) target_spill_cost [speed])
3784 cost.cost = target_spill_cost [speed];
3785
3786 return cost;
3787 }
3788
3789 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
3790 invariants the computation depends on. */
3791
3792 static comp_cost
3793 force_var_cost (struct ivopts_data *data,
3794 tree expr, bitmap *depends_on)
3795 {
3796 if (depends_on)
3797 {
3798 fd_ivopts_data = data;
3799 walk_tree (&expr, find_depends, depends_on, NULL);
3800 }
3801
3802 return force_expr_to_var_cost (expr, data->speed);
3803 }
3804
3805 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
3806 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3807 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3808 invariants the computation depends on. */
3809
3810 static comp_cost
3811 split_address_cost (struct ivopts_data *data,
3812 tree addr, bool *symbol_present, bool *var_present,
3813 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3814 {
3815 tree core;
3816 HOST_WIDE_INT bitsize;
3817 HOST_WIDE_INT bitpos;
3818 tree toffset;
3819 machine_mode mode;
3820 int unsignedp, volatilep;
3821
3822 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3823 &unsignedp, &volatilep, false);
3824
3825 if (toffset != 0
3826 || bitpos % BITS_PER_UNIT != 0
3827 || TREE_CODE (core) != VAR_DECL)
3828 {
3829 *symbol_present = false;
3830 *var_present = true;
3831 fd_ivopts_data = data;
3832 walk_tree (&addr, find_depends, depends_on, NULL);
3833 return new_cost (target_spill_cost[data->speed], 0);
3834 }
3835
3836 *offset += bitpos / BITS_PER_UNIT;
3837 if (TREE_STATIC (core)
3838 || DECL_EXTERNAL (core))
3839 {
3840 *symbol_present = true;
3841 *var_present = false;
3842 return no_cost;
3843 }
3844
3845 *symbol_present = false;
3846 *var_present = true;
3847 return no_cost;
3848 }
3849
3850 /* Estimates cost of expressing difference of addresses E1 - E2 as
3851 var + symbol + offset. The value of offset is added to OFFSET,
3852 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3853 part is missing. DEPENDS_ON is a set of the invariants the computation
3854 depends on. */
3855
3856 static comp_cost
3857 ptr_difference_cost (struct ivopts_data *data,
3858 tree e1, tree e2, bool *symbol_present, bool *var_present,
3859 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3860 {
3861 HOST_WIDE_INT diff = 0;
3862 aff_tree aff_e1, aff_e2;
3863 tree type;
3864
3865 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3866
3867 if (ptr_difference_const (e1, e2, &diff))
3868 {
3869 *offset += diff;
3870 *symbol_present = false;
3871 *var_present = false;
3872 return no_cost;
3873 }
3874
3875 if (integer_zerop (e2))
3876 return split_address_cost (data, TREE_OPERAND (e1, 0),
3877 symbol_present, var_present, offset, depends_on);
3878
3879 *symbol_present = false;
3880 *var_present = true;
3881
3882 type = signed_type_for (TREE_TYPE (e1));
3883 tree_to_aff_combination (e1, type, &aff_e1);
3884 tree_to_aff_combination (e2, type, &aff_e2);
3885 aff_combination_scale (&aff_e2, -1);
3886 aff_combination_add (&aff_e1, &aff_e2);
3887
3888 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3889 }
3890
3891 /* Estimates cost of expressing difference E1 - E2 as
3892 var + symbol + offset. The value of offset is added to OFFSET,
3893 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3894 part is missing. DEPENDS_ON is a set of the invariants the computation
3895 depends on. */
3896
3897 static comp_cost
3898 difference_cost (struct ivopts_data *data,
3899 tree e1, tree e2, bool *symbol_present, bool *var_present,
3900 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3901 {
3902 machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3903 unsigned HOST_WIDE_INT off1, off2;
3904 aff_tree aff_e1, aff_e2;
3905 tree type;
3906
3907 e1 = strip_offset (e1, &off1);
3908 e2 = strip_offset (e2, &off2);
3909 *offset += off1 - off2;
3910
3911 STRIP_NOPS (e1);
3912 STRIP_NOPS (e2);
3913
3914 if (TREE_CODE (e1) == ADDR_EXPR)
3915 return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
3916 offset, depends_on);
3917 *symbol_present = false;
3918
3919 if (operand_equal_p (e1, e2, 0))
3920 {
3921 *var_present = false;
3922 return no_cost;
3923 }
3924
3925 *var_present = true;
3926
3927 if (integer_zerop (e2))
3928 return force_var_cost (data, e1, depends_on);
3929
3930 if (integer_zerop (e1))
3931 {
3932 comp_cost cost = force_var_cost (data, e2, depends_on);
3933 cost.cost += mult_by_coeff_cost (-1, mode, data->speed);
3934 return cost;
3935 }
3936
3937 type = signed_type_for (TREE_TYPE (e1));
3938 tree_to_aff_combination (e1, type, &aff_e1);
3939 tree_to_aff_combination (e2, type, &aff_e2);
3940 aff_combination_scale (&aff_e2, -1);
3941 aff_combination_add (&aff_e1, &aff_e2);
3942
3943 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3944 }
3945
3946 /* Returns true if AFF1 and AFF2 are identical. */
3947
3948 static bool
3949 compare_aff_trees (aff_tree *aff1, aff_tree *aff2)
3950 {
3951 unsigned i;
3952
3953 if (aff1->n != aff2->n)
3954 return false;
3955
3956 for (i = 0; i < aff1->n; i++)
3957 {
3958 if (aff1->elts[i].coef != aff2->elts[i].coef)
3959 return false;
3960
3961 if (!operand_equal_p (aff1->elts[i].val, aff2->elts[i].val, 0))
3962 return false;
3963 }
3964 return true;
3965 }
3966
3967 /* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id. */
3968
3969 static int
3970 get_expr_id (struct ivopts_data *data, tree expr)
3971 {
3972 struct iv_inv_expr_ent ent;
3973 struct iv_inv_expr_ent **slot;
3974
3975 ent.expr = expr;
3976 ent.hash = iterative_hash_expr (expr, 0);
3977 slot = data->inv_expr_tab->find_slot (&ent, INSERT);
3978 if (*slot)
3979 return (*slot)->id;
3980
3981 *slot = XNEW (struct iv_inv_expr_ent);
3982 (*slot)->expr = expr;
3983 (*slot)->hash = ent.hash;
3984 (*slot)->id = data->inv_expr_id++;
3985 return (*slot)->id;
3986 }
3987
3988 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
3989 requires a new compiler generated temporary. Returns -1 otherwise.
3990 ADDRESS_P is a flag indicating if the expression is for address
3991 computation. */
3992
3993 static int
3994 get_loop_invariant_expr_id (struct ivopts_data *data, tree ubase,
3995 tree cbase, HOST_WIDE_INT ratio,
3996 bool address_p)
3997 {
3998 aff_tree ubase_aff, cbase_aff;
3999 tree expr, ub, cb;
4000
4001 STRIP_NOPS (ubase);
4002 STRIP_NOPS (cbase);
4003 ub = ubase;
4004 cb = cbase;
4005
4006 if ((TREE_CODE (ubase) == INTEGER_CST)
4007 && (TREE_CODE (cbase) == INTEGER_CST))
4008 return -1;
4009
4010 /* Strips the constant part. */
4011 if (TREE_CODE (ubase) == PLUS_EXPR
4012 || TREE_CODE (ubase) == MINUS_EXPR
4013 || TREE_CODE (ubase) == POINTER_PLUS_EXPR)
4014 {
4015 if (TREE_CODE (TREE_OPERAND (ubase, 1)) == INTEGER_CST)
4016 ubase = TREE_OPERAND (ubase, 0);
4017 }
4018
4019 /* Strips the constant part. */
4020 if (TREE_CODE (cbase) == PLUS_EXPR
4021 || TREE_CODE (cbase) == MINUS_EXPR
4022 || TREE_CODE (cbase) == POINTER_PLUS_EXPR)
4023 {
4024 if (TREE_CODE (TREE_OPERAND (cbase, 1)) == INTEGER_CST)
4025 cbase = TREE_OPERAND (cbase, 0);
4026 }
4027
4028 if (address_p)
4029 {
4030 if (((TREE_CODE (ubase) == SSA_NAME)
4031 || (TREE_CODE (ubase) == ADDR_EXPR
4032 && is_gimple_min_invariant (ubase)))
4033 && (TREE_CODE (cbase) == INTEGER_CST))
4034 return -1;
4035
4036 if (((TREE_CODE (cbase) == SSA_NAME)
4037 || (TREE_CODE (cbase) == ADDR_EXPR
4038 && is_gimple_min_invariant (cbase)))
4039 && (TREE_CODE (ubase) == INTEGER_CST))
4040 return -1;
4041 }
4042
4043 if (ratio == 1)
4044 {
4045 if (operand_equal_p (ubase, cbase, 0))
4046 return -1;
4047
4048 if (TREE_CODE (ubase) == ADDR_EXPR
4049 && TREE_CODE (cbase) == ADDR_EXPR)
4050 {
4051 tree usym, csym;
4052
4053 usym = TREE_OPERAND (ubase, 0);
4054 csym = TREE_OPERAND (cbase, 0);
4055 if (TREE_CODE (usym) == ARRAY_REF)
4056 {
4057 tree ind = TREE_OPERAND (usym, 1);
4058 if (TREE_CODE (ind) == INTEGER_CST
4059 && tree_fits_shwi_p (ind)
4060 && tree_to_shwi (ind) == 0)
4061 usym = TREE_OPERAND (usym, 0);
4062 }
4063 if (TREE_CODE (csym) == ARRAY_REF)
4064 {
4065 tree ind = TREE_OPERAND (csym, 1);
4066 if (TREE_CODE (ind) == INTEGER_CST
4067 && tree_fits_shwi_p (ind)
4068 && tree_to_shwi (ind) == 0)
4069 csym = TREE_OPERAND (csym, 0);
4070 }
4071 if (operand_equal_p (usym, csym, 0))
4072 return -1;
4073 }
4074 /* Now do more complex comparison */
4075 tree_to_aff_combination (ubase, TREE_TYPE (ubase), &ubase_aff);
4076 tree_to_aff_combination (cbase, TREE_TYPE (cbase), &cbase_aff);
4077 if (compare_aff_trees (&ubase_aff, &cbase_aff))
4078 return -1;
4079 }
4080
4081 tree_to_aff_combination (ub, TREE_TYPE (ub), &ubase_aff);
4082 tree_to_aff_combination (cb, TREE_TYPE (cb), &cbase_aff);
4083
4084 aff_combination_scale (&cbase_aff, -1 * ratio);
4085 aff_combination_add (&ubase_aff, &cbase_aff);
4086 expr = aff_combination_to_tree (&ubase_aff);
4087 return get_expr_id (data, expr);
4088 }
4089
4090
4091
4092 /* Determines the cost of the computation by that USE is expressed
4093 from induction variable CAND. If ADDRESS_P is true, we just need
4094 to create an address from it, otherwise we want to get it into
4095 register. A set of invariants we depend on is stored in
4096 DEPENDS_ON. AT is the statement at that the value is computed.
4097 If CAN_AUTOINC is nonnull, use it to record whether autoinc
4098 addressing is likely. */
4099
4100 static comp_cost
4101 get_computation_cost_at (struct ivopts_data *data,
4102 struct iv_use *use, struct iv_cand *cand,
4103 bool address_p, bitmap *depends_on, gimple at,
4104 bool *can_autoinc,
4105 int *inv_expr_id)
4106 {
4107 tree ubase = use->iv->base, ustep = use->iv->step;
4108 tree cbase, cstep;
4109 tree utype = TREE_TYPE (ubase), ctype;
4110 unsigned HOST_WIDE_INT cstepi, offset = 0;
4111 HOST_WIDE_INT ratio, aratio;
4112 bool var_present, symbol_present, stmt_is_after_inc;
4113 comp_cost cost;
4114 widest_int rat;
4115 bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4116 machine_mode mem_mode = (address_p
4117 ? TYPE_MODE (TREE_TYPE (*use->op_p))
4118 : VOIDmode);
4119
4120 *depends_on = NULL;
4121
4122 /* Only consider real candidates. */
4123 if (!cand->iv)
4124 return infinite_cost;
4125
4126 cbase = cand->iv->base;
4127 cstep = cand->iv->step;
4128 ctype = TREE_TYPE (cbase);
4129
4130 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4131 {
4132 /* We do not have a precision to express the values of use. */
4133 return infinite_cost;
4134 }
4135
4136 if (address_p
4137 || (use->iv->base_object
4138 && cand->iv->base_object
4139 && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4140 && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4141 {
4142 /* Do not try to express address of an object with computation based
4143 on address of a different object. This may cause problems in rtl
4144 level alias analysis (that does not expect this to be happening,
4145 as this is illegal in C), and would be unlikely to be useful
4146 anyway. */
4147 if (use->iv->base_object
4148 && cand->iv->base_object
4149 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4150 return infinite_cost;
4151 }
4152
4153 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4154 {
4155 /* TODO -- add direct handling of this case. */
4156 goto fallback;
4157 }
4158
4159 /* CSTEPI is removed from the offset in case statement is after the
4160 increment. If the step is not constant, we use zero instead.
4161 This is a bit imprecise (there is the extra addition), but
4162 redundancy elimination is likely to transform the code so that
4163 it uses value of the variable before increment anyway,
4164 so it is not that much unrealistic. */
4165 if (cst_and_fits_in_hwi (cstep))
4166 cstepi = int_cst_value (cstep);
4167 else
4168 cstepi = 0;
4169
4170 if (!constant_multiple_of (ustep, cstep, &rat))
4171 return infinite_cost;
4172
4173 if (wi::fits_shwi_p (rat))
4174 ratio = rat.to_shwi ();
4175 else
4176 return infinite_cost;
4177
4178 STRIP_NOPS (cbase);
4179 ctype = TREE_TYPE (cbase);
4180
4181 stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
4182
4183 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
4184 or ratio == 1, it is better to handle this like
4185
4186 ubase - ratio * cbase + ratio * var
4187
4188 (also holds in the case ratio == -1, TODO. */
4189
4190 if (cst_and_fits_in_hwi (cbase))
4191 {
4192 offset = - ratio * (unsigned HOST_WIDE_INT) int_cst_value (cbase);
4193 cost = difference_cost (data,
4194 ubase, build_int_cst (utype, 0),
4195 &symbol_present, &var_present, &offset,
4196 depends_on);
4197 cost.cost /= avg_loop_niter (data->current_loop);
4198 }
4199 else if (ratio == 1)
4200 {
4201 tree real_cbase = cbase;
4202
4203 /* Check to see if any adjustment is needed. */
4204 if (cstepi == 0 && stmt_is_after_inc)
4205 {
4206 aff_tree real_cbase_aff;
4207 aff_tree cstep_aff;
4208
4209 tree_to_aff_combination (cbase, TREE_TYPE (real_cbase),
4210 &real_cbase_aff);
4211 tree_to_aff_combination (cstep, TREE_TYPE (cstep), &cstep_aff);
4212
4213 aff_combination_add (&real_cbase_aff, &cstep_aff);
4214 real_cbase = aff_combination_to_tree (&real_cbase_aff);
4215 }
4216
4217 cost = difference_cost (data,
4218 ubase, real_cbase,
4219 &symbol_present, &var_present, &offset,
4220 depends_on);
4221 cost.cost /= avg_loop_niter (data->current_loop);
4222 }
4223 else if (address_p
4224 && !POINTER_TYPE_P (ctype)
4225 && multiplier_allowed_in_address_p
4226 (ratio, mem_mode,
4227 TYPE_ADDR_SPACE (TREE_TYPE (utype))))
4228 {
4229 cbase
4230 = fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio));
4231 cost = difference_cost (data,
4232 ubase, cbase,
4233 &symbol_present, &var_present, &offset,
4234 depends_on);
4235 cost.cost /= avg_loop_niter (data->current_loop);
4236 }
4237 else
4238 {
4239 cost = force_var_cost (data, cbase, depends_on);
4240 cost = add_costs (cost,
4241 difference_cost (data,
4242 ubase, build_int_cst (utype, 0),
4243 &symbol_present, &var_present,
4244 &offset, depends_on));
4245 cost.cost /= avg_loop_niter (data->current_loop);
4246 cost.cost += add_cost (data->speed, TYPE_MODE (ctype));
4247 }
4248
4249 if (inv_expr_id)
4250 {
4251 *inv_expr_id =
4252 get_loop_invariant_expr_id (data, ubase, cbase, ratio, address_p);
4253 /* Clear depends on. */
4254 if (*inv_expr_id != -1 && depends_on && *depends_on)
4255 bitmap_clear (*depends_on);
4256 }
4257
4258 /* If we are after the increment, the value of the candidate is higher by
4259 one iteration. */
4260 if (stmt_is_after_inc)
4261 offset -= ratio * cstepi;
4262
4263 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4264 (symbol/var1/const parts may be omitted). If we are looking for an
4265 address, find the cost of addressing this. */
4266 if (address_p)
4267 return add_costs (cost,
4268 get_address_cost (symbol_present, var_present,
4269 offset, ratio, cstepi,
4270 mem_mode,
4271 TYPE_ADDR_SPACE (TREE_TYPE (utype)),
4272 speed, stmt_is_after_inc,
4273 can_autoinc));
4274
4275 /* Otherwise estimate the costs for computing the expression. */
4276 if (!symbol_present && !var_present && !offset)
4277 {
4278 if (ratio != 1)
4279 cost.cost += mult_by_coeff_cost (ratio, TYPE_MODE (ctype), speed);
4280 return cost;
4281 }
4282
4283 /* Symbol + offset should be compile-time computable so consider that they
4284 are added once to the variable, if present. */
4285 if (var_present && (symbol_present || offset))
4286 cost.cost += adjust_setup_cost (data,
4287 add_cost (speed, TYPE_MODE (ctype)));
4288
4289 /* Having offset does not affect runtime cost in case it is added to
4290 symbol, but it increases complexity. */
4291 if (offset)
4292 cost.complexity++;
4293
4294 cost.cost += add_cost (speed, TYPE_MODE (ctype));
4295
4296 aratio = ratio > 0 ? ratio : -ratio;
4297 if (aratio != 1)
4298 cost.cost += mult_by_coeff_cost (aratio, TYPE_MODE (ctype), speed);
4299 return cost;
4300
4301 fallback:
4302 if (can_autoinc)
4303 *can_autoinc = false;
4304
4305 {
4306 /* Just get the expression, expand it and measure the cost. */
4307 tree comp = get_computation_at (data->current_loop, use, cand, at);
4308
4309 if (!comp)
4310 return infinite_cost;
4311
4312 if (address_p)
4313 comp = build_simple_mem_ref (comp);
4314
4315 return new_cost (computation_cost (comp, speed), 0);
4316 }
4317 }
4318
4319 /* Determines the cost of the computation by that USE is expressed
4320 from induction variable CAND. If ADDRESS_P is true, we just need
4321 to create an address from it, otherwise we want to get it into
4322 register. A set of invariants we depend on is stored in
4323 DEPENDS_ON. If CAN_AUTOINC is nonnull, use it to record whether
4324 autoinc addressing is likely. */
4325
4326 static comp_cost
4327 get_computation_cost (struct ivopts_data *data,
4328 struct iv_use *use, struct iv_cand *cand,
4329 bool address_p, bitmap *depends_on,
4330 bool *can_autoinc, int *inv_expr_id)
4331 {
4332 return get_computation_cost_at (data,
4333 use, cand, address_p, depends_on, use->stmt,
4334 can_autoinc, inv_expr_id);
4335 }
4336
4337 /* Determines cost of basing replacement of USE on CAND in a generic
4338 expression. */
4339
4340 static bool
4341 determine_use_iv_cost_generic (struct ivopts_data *data,
4342 struct iv_use *use, struct iv_cand *cand)
4343 {
4344 bitmap depends_on;
4345 comp_cost cost;
4346 int inv_expr_id = -1;
4347
4348 /* The simple case first -- if we need to express value of the preserved
4349 original biv, the cost is 0. This also prevents us from counting the
4350 cost of increment twice -- once at this use and once in the cost of
4351 the candidate. */
4352 if (cand->pos == IP_ORIGINAL
4353 && cand->incremented_at == use->stmt)
4354 {
4355 set_use_iv_cost (data, use, cand, no_cost, NULL, NULL_TREE,
4356 ERROR_MARK, -1);
4357 return true;
4358 }
4359
4360 cost = get_computation_cost (data, use, cand, false, &depends_on,
4361 NULL, &inv_expr_id);
4362
4363 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4364 inv_expr_id);
4365
4366 return !infinite_cost_p (cost);
4367 }
4368
4369 /* Determines cost of basing replacement of USE on CAND in an address. */
4370
4371 static bool
4372 determine_use_iv_cost_address (struct ivopts_data *data,
4373 struct iv_use *use, struct iv_cand *cand)
4374 {
4375 bitmap depends_on;
4376 bool can_autoinc;
4377 int inv_expr_id = -1;
4378 comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
4379 &can_autoinc, &inv_expr_id);
4380
4381 if (cand->ainc_use == use)
4382 {
4383 if (can_autoinc)
4384 cost.cost -= cand->cost_step;
4385 /* If we generated the candidate solely for exploiting autoincrement
4386 opportunities, and it turns out it can't be used, set the cost to
4387 infinity to make sure we ignore it. */
4388 else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
4389 cost = infinite_cost;
4390 }
4391 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4392 inv_expr_id);
4393
4394 return !infinite_cost_p (cost);
4395 }
4396
4397 /* Computes value of candidate CAND at position AT in iteration NITER, and
4398 stores it to VAL. */
4399
4400 static void
4401 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple at, tree niter,
4402 aff_tree *val)
4403 {
4404 aff_tree step, delta, nit;
4405 struct iv *iv = cand->iv;
4406 tree type = TREE_TYPE (iv->base);
4407 tree steptype = type;
4408 if (POINTER_TYPE_P (type))
4409 steptype = sizetype;
4410 steptype = unsigned_type_for (type);
4411
4412 tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step);
4413 aff_combination_convert (&step, steptype);
4414 tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
4415 aff_combination_convert (&nit, steptype);
4416 aff_combination_mult (&nit, &step, &delta);
4417 if (stmt_after_increment (loop, cand, at))
4418 aff_combination_add (&delta, &step);
4419
4420 tree_to_aff_combination (iv->base, type, val);
4421 if (!POINTER_TYPE_P (type))
4422 aff_combination_convert (val, steptype);
4423 aff_combination_add (val, &delta);
4424 }
4425
4426 /* Returns period of induction variable iv. */
4427
4428 static tree
4429 iv_period (struct iv *iv)
4430 {
4431 tree step = iv->step, period, type;
4432 tree pow2div;
4433
4434 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4435
4436 type = unsigned_type_for (TREE_TYPE (step));
4437 /* Period of the iv is lcm (step, type_range)/step -1,
4438 i.e., N*type_range/step - 1. Since type range is power
4439 of two, N == (step >> num_of_ending_zeros_binary (step),
4440 so the final result is
4441
4442 (type_range >> num_of_ending_zeros_binary (step)) - 1
4443
4444 */
4445 pow2div = num_ending_zeros (step);
4446
4447 period = build_low_bits_mask (type,
4448 (TYPE_PRECISION (type)
4449 - tree_to_uhwi (pow2div)));
4450
4451 return period;
4452 }
4453
4454 /* Returns the comparison operator used when eliminating the iv USE. */
4455
4456 static enum tree_code
4457 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4458 {
4459 struct loop *loop = data->current_loop;
4460 basic_block ex_bb;
4461 edge exit;
4462
4463 ex_bb = gimple_bb (use->stmt);
4464 exit = EDGE_SUCC (ex_bb, 0);
4465 if (flow_bb_inside_loop_p (loop, exit->dest))
4466 exit = EDGE_SUCC (ex_bb, 1);
4467
4468 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4469 }
4470
4471 /* Returns true if we can prove that BASE - OFFSET does not overflow. For now,
4472 we only detect the situation that BASE = SOMETHING + OFFSET, where the
4473 calculation is performed in non-wrapping type.
4474
4475 TODO: More generally, we could test for the situation that
4476 BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
4477 This would require knowing the sign of OFFSET. */
4478
4479 static bool
4480 difference_cannot_overflow_p (struct ivopts_data *data, tree base, tree offset)
4481 {
4482 enum tree_code code;
4483 tree e1, e2;
4484 aff_tree aff_e1, aff_e2, aff_offset;
4485
4486 if (!nowrap_type_p (TREE_TYPE (base)))
4487 return false;
4488
4489 base = expand_simple_operations (base);
4490
4491 if (TREE_CODE (base) == SSA_NAME)
4492 {
4493 gimple stmt = SSA_NAME_DEF_STMT (base);
4494
4495 if (gimple_code (stmt) != GIMPLE_ASSIGN)
4496 return false;
4497
4498 code = gimple_assign_rhs_code (stmt);
4499 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4500 return false;
4501
4502 e1 = gimple_assign_rhs1 (stmt);
4503 e2 = gimple_assign_rhs2 (stmt);
4504 }
4505 else
4506 {
4507 code = TREE_CODE (base);
4508 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4509 return false;
4510 e1 = TREE_OPERAND (base, 0);
4511 e2 = TREE_OPERAND (base, 1);
4512 }
4513
4514 /* Use affine expansion as deeper inspection to prove the equality. */
4515 tree_to_aff_combination_expand (e2, TREE_TYPE (e2),
4516 &aff_e2, &data->name_expansion_cache);
4517 tree_to_aff_combination_expand (offset, TREE_TYPE (offset),
4518 &aff_offset, &data->name_expansion_cache);
4519 aff_combination_scale (&aff_offset, -1);
4520 switch (code)
4521 {
4522 case PLUS_EXPR:
4523 aff_combination_add (&aff_e2, &aff_offset);
4524 if (aff_combination_zero_p (&aff_e2))
4525 return true;
4526
4527 tree_to_aff_combination_expand (e1, TREE_TYPE (e1),
4528 &aff_e1, &data->name_expansion_cache);
4529 aff_combination_add (&aff_e1, &aff_offset);
4530 return aff_combination_zero_p (&aff_e1);
4531
4532 case POINTER_PLUS_EXPR:
4533 aff_combination_add (&aff_e2, &aff_offset);
4534 return aff_combination_zero_p (&aff_e2);
4535
4536 default:
4537 return false;
4538 }
4539 }
4540
4541 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
4542 comparison with CAND. NITER describes the number of iterations of
4543 the loops. If successful, the comparison in COMP_P is altered accordingly.
4544
4545 We aim to handle the following situation:
4546
4547 sometype *base, *p;
4548 int a, b, i;
4549
4550 i = a;
4551 p = p_0 = base + a;
4552
4553 do
4554 {
4555 bla (*p);
4556 p++;
4557 i++;
4558 }
4559 while (i < b);
4560
4561 Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
4562 We aim to optimize this to
4563
4564 p = p_0 = base + a;
4565 do
4566 {
4567 bla (*p);
4568 p++;
4569 }
4570 while (p < p_0 - a + b);
4571
4572 This preserves the correctness, since the pointer arithmetics does not
4573 overflow. More precisely:
4574
4575 1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
4576 overflow in computing it or the values of p.
4577 2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
4578 overflow. To prove this, we use the fact that p_0 = base + a. */
4579
4580 static bool
4581 iv_elimination_compare_lt (struct ivopts_data *data,
4582 struct iv_cand *cand, enum tree_code *comp_p,
4583 struct tree_niter_desc *niter)
4584 {
4585 tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
4586 struct aff_tree nit, tmpa, tmpb;
4587 enum tree_code comp;
4588 HOST_WIDE_INT step;
4589
4590 /* We need to know that the candidate induction variable does not overflow.
4591 While more complex analysis may be used to prove this, for now just
4592 check that the variable appears in the original program and that it
4593 is computed in a type that guarantees no overflows. */
4594 cand_type = TREE_TYPE (cand->iv->base);
4595 if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
4596 return false;
4597
4598 /* Make sure that the loop iterates till the loop bound is hit, as otherwise
4599 the calculation of the BOUND could overflow, making the comparison
4600 invalid. */
4601 if (!data->loop_single_exit_p)
4602 return false;
4603
4604 /* We need to be able to decide whether candidate is increasing or decreasing
4605 in order to choose the right comparison operator. */
4606 if (!cst_and_fits_in_hwi (cand->iv->step))
4607 return false;
4608 step = int_cst_value (cand->iv->step);
4609
4610 /* Check that the number of iterations matches the expected pattern:
4611 a + 1 > b ? 0 : b - a - 1. */
4612 mbz = niter->may_be_zero;
4613 if (TREE_CODE (mbz) == GT_EXPR)
4614 {
4615 /* Handle a + 1 > b. */
4616 tree op0 = TREE_OPERAND (mbz, 0);
4617 if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
4618 {
4619 a = TREE_OPERAND (op0, 0);
4620 b = TREE_OPERAND (mbz, 1);
4621 }
4622 else
4623 return false;
4624 }
4625 else if (TREE_CODE (mbz) == LT_EXPR)
4626 {
4627 tree op1 = TREE_OPERAND (mbz, 1);
4628
4629 /* Handle b < a + 1. */
4630 if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
4631 {
4632 a = TREE_OPERAND (op1, 0);
4633 b = TREE_OPERAND (mbz, 0);
4634 }
4635 else
4636 return false;
4637 }
4638 else
4639 return false;
4640
4641 /* Expected number of iterations is B - A - 1. Check that it matches
4642 the actual number, i.e., that B - A - NITER = 1. */
4643 tree_to_aff_combination (niter->niter, nit_type, &nit);
4644 tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
4645 tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
4646 aff_combination_scale (&nit, -1);
4647 aff_combination_scale (&tmpa, -1);
4648 aff_combination_add (&tmpb, &tmpa);
4649 aff_combination_add (&tmpb, &nit);
4650 if (tmpb.n != 0 || tmpb.offset != 1)
4651 return false;
4652
4653 /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
4654 overflow. */
4655 offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
4656 cand->iv->step,
4657 fold_convert (TREE_TYPE (cand->iv->step), a));
4658 if (!difference_cannot_overflow_p (data, cand->iv->base, offset))
4659 return false;
4660
4661 /* Determine the new comparison operator. */
4662 comp = step < 0 ? GT_EXPR : LT_EXPR;
4663 if (*comp_p == NE_EXPR)
4664 *comp_p = comp;
4665 else if (*comp_p == EQ_EXPR)
4666 *comp_p = invert_tree_comparison (comp, false);
4667 else
4668 gcc_unreachable ();
4669
4670 return true;
4671 }
4672
4673 /* Check whether it is possible to express the condition in USE by comparison
4674 of candidate CAND. If so, store the value compared with to BOUND, and the
4675 comparison operator to COMP. */
4676
4677 static bool
4678 may_eliminate_iv (struct ivopts_data *data,
4679 struct iv_use *use, struct iv_cand *cand, tree *bound,
4680 enum tree_code *comp)
4681 {
4682 basic_block ex_bb;
4683 edge exit;
4684 tree period;
4685 struct loop *loop = data->current_loop;
4686 aff_tree bnd;
4687 struct tree_niter_desc *desc = NULL;
4688
4689 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
4690 return false;
4691
4692 /* For now works only for exits that dominate the loop latch.
4693 TODO: extend to other conditions inside loop body. */
4694 ex_bb = gimple_bb (use->stmt);
4695 if (use->stmt != last_stmt (ex_bb)
4696 || gimple_code (use->stmt) != GIMPLE_COND
4697 || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
4698 return false;
4699
4700 exit = EDGE_SUCC (ex_bb, 0);
4701 if (flow_bb_inside_loop_p (loop, exit->dest))
4702 exit = EDGE_SUCC (ex_bb, 1);
4703 if (flow_bb_inside_loop_p (loop, exit->dest))
4704 return false;
4705
4706 desc = niter_for_exit (data, exit);
4707 if (!desc)
4708 return false;
4709
4710 /* Determine whether we can use the variable to test the exit condition.
4711 This is the case iff the period of the induction variable is greater
4712 than the number of iterations for which the exit condition is true. */
4713 period = iv_period (cand->iv);
4714
4715 /* If the number of iterations is constant, compare against it directly. */
4716 if (TREE_CODE (desc->niter) == INTEGER_CST)
4717 {
4718 /* See cand_value_at. */
4719 if (stmt_after_increment (loop, cand, use->stmt))
4720 {
4721 if (!tree_int_cst_lt (desc->niter, period))
4722 return false;
4723 }
4724 else
4725 {
4726 if (tree_int_cst_lt (period, desc->niter))
4727 return false;
4728 }
4729 }
4730
4731 /* If not, and if this is the only possible exit of the loop, see whether
4732 we can get a conservative estimate on the number of iterations of the
4733 entire loop and compare against that instead. */
4734 else
4735 {
4736 widest_int period_value, max_niter;
4737
4738 max_niter = desc->max;
4739 if (stmt_after_increment (loop, cand, use->stmt))
4740 max_niter += 1;
4741 period_value = wi::to_widest (period);
4742 if (wi::gtu_p (max_niter, period_value))
4743 {
4744 /* See if we can take advantage of inferred loop bound information. */
4745 if (data->loop_single_exit_p)
4746 {
4747 if (!max_loop_iterations (loop, &max_niter))
4748 return false;
4749 /* The loop bound is already adjusted by adding 1. */
4750 if (wi::gtu_p (max_niter, period_value))
4751 return false;
4752 }
4753 else
4754 return false;
4755 }
4756 }
4757
4758 cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
4759
4760 *bound = fold_convert (TREE_TYPE (cand->iv->base),
4761 aff_combination_to_tree (&bnd));
4762 *comp = iv_elimination_compare (data, use);
4763
4764 /* It is unlikely that computing the number of iterations using division
4765 would be more profitable than keeping the original induction variable. */
4766 if (expression_expensive_p (*bound))
4767 return false;
4768
4769 /* Sometimes, it is possible to handle the situation that the number of
4770 iterations may be zero unless additional assumtions by using <
4771 instead of != in the exit condition.
4772
4773 TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
4774 base the exit condition on it. However, that is often too
4775 expensive. */
4776 if (!integer_zerop (desc->may_be_zero))
4777 return iv_elimination_compare_lt (data, cand, comp, desc);
4778
4779 return true;
4780 }
4781
4782 /* Calculates the cost of BOUND, if it is a PARM_DECL. A PARM_DECL must
4783 be copied, if is is used in the loop body and DATA->body_includes_call. */
4784
4785 static int
4786 parm_decl_cost (struct ivopts_data *data, tree bound)
4787 {
4788 tree sbound = bound;
4789 STRIP_NOPS (sbound);
4790
4791 if (TREE_CODE (sbound) == SSA_NAME
4792 && SSA_NAME_IS_DEFAULT_DEF (sbound)
4793 && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
4794 && data->body_includes_call)
4795 return COSTS_N_INSNS (1);
4796
4797 return 0;
4798 }
4799
4800 /* Determines cost of basing replacement of USE on CAND in a condition. */
4801
4802 static bool
4803 determine_use_iv_cost_condition (struct ivopts_data *data,
4804 struct iv_use *use, struct iv_cand *cand)
4805 {
4806 tree bound = NULL_TREE;
4807 struct iv *cmp_iv;
4808 bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
4809 comp_cost elim_cost, express_cost, cost, bound_cost;
4810 bool ok;
4811 int elim_inv_expr_id = -1, express_inv_expr_id = -1, inv_expr_id;
4812 tree *control_var, *bound_cst;
4813 enum tree_code comp = ERROR_MARK;
4814
4815 /* Only consider real candidates. */
4816 if (!cand->iv)
4817 {
4818 set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE,
4819 ERROR_MARK, -1);
4820 return false;
4821 }
4822
4823 /* Try iv elimination. */
4824 if (may_eliminate_iv (data, use, cand, &bound, &comp))
4825 {
4826 elim_cost = force_var_cost (data, bound, &depends_on_elim);
4827 if (elim_cost.cost == 0)
4828 elim_cost.cost = parm_decl_cost (data, bound);
4829 else if (TREE_CODE (bound) == INTEGER_CST)
4830 elim_cost.cost = 0;
4831 /* If we replace a loop condition 'i < n' with 'p < base + n',
4832 depends_on_elim will have 'base' and 'n' set, which implies
4833 that both 'base' and 'n' will be live during the loop. More likely,
4834 'base + n' will be loop invariant, resulting in only one live value
4835 during the loop. So in that case we clear depends_on_elim and set
4836 elim_inv_expr_id instead. */
4837 if (depends_on_elim && bitmap_count_bits (depends_on_elim) > 1)
4838 {
4839 elim_inv_expr_id = get_expr_id (data, bound);
4840 bitmap_clear (depends_on_elim);
4841 }
4842 /* The bound is a loop invariant, so it will be only computed
4843 once. */
4844 elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
4845 }
4846 else
4847 elim_cost = infinite_cost;
4848
4849 /* Try expressing the original giv. If it is compared with an invariant,
4850 note that we cannot get rid of it. */
4851 ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
4852 NULL, &cmp_iv);
4853 gcc_assert (ok);
4854
4855 /* When the condition is a comparison of the candidate IV against
4856 zero, prefer this IV.
4857
4858 TODO: The constant that we're subtracting from the cost should
4859 be target-dependent. This information should be added to the
4860 target costs for each backend. */
4861 if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */
4862 && integer_zerop (*bound_cst)
4863 && (operand_equal_p (*control_var, cand->var_after, 0)
4864 || operand_equal_p (*control_var, cand->var_before, 0)))
4865 elim_cost.cost -= 1;
4866
4867 express_cost = get_computation_cost (data, use, cand, false,
4868 &depends_on_express, NULL,
4869 &express_inv_expr_id);
4870 fd_ivopts_data = data;
4871 walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
4872
4873 /* Count the cost of the original bound as well. */
4874 bound_cost = force_var_cost (data, *bound_cst, NULL);
4875 if (bound_cost.cost == 0)
4876 bound_cost.cost = parm_decl_cost (data, *bound_cst);
4877 else if (TREE_CODE (*bound_cst) == INTEGER_CST)
4878 bound_cost.cost = 0;
4879 express_cost.cost += bound_cost.cost;
4880
4881 /* Choose the better approach, preferring the eliminated IV. */
4882 if (compare_costs (elim_cost, express_cost) <= 0)
4883 {
4884 cost = elim_cost;
4885 depends_on = depends_on_elim;
4886 depends_on_elim = NULL;
4887 inv_expr_id = elim_inv_expr_id;
4888 }
4889 else
4890 {
4891 cost = express_cost;
4892 depends_on = depends_on_express;
4893 depends_on_express = NULL;
4894 bound = NULL_TREE;
4895 comp = ERROR_MARK;
4896 inv_expr_id = express_inv_expr_id;
4897 }
4898
4899 set_use_iv_cost (data, use, cand, cost, depends_on, bound, comp, inv_expr_id);
4900
4901 if (depends_on_elim)
4902 BITMAP_FREE (depends_on_elim);
4903 if (depends_on_express)
4904 BITMAP_FREE (depends_on_express);
4905
4906 return !infinite_cost_p (cost);
4907 }
4908
4909 /* Determines cost of basing replacement of USE on CAND. Returns false
4910 if USE cannot be based on CAND. */
4911
4912 static bool
4913 determine_use_iv_cost (struct ivopts_data *data,
4914 struct iv_use *use, struct iv_cand *cand)
4915 {
4916 switch (use->type)
4917 {
4918 case USE_NONLINEAR_EXPR:
4919 return determine_use_iv_cost_generic (data, use, cand);
4920
4921 case USE_ADDRESS:
4922 return determine_use_iv_cost_address (data, use, cand);
4923
4924 case USE_COMPARE:
4925 return determine_use_iv_cost_condition (data, use, cand);
4926
4927 default:
4928 gcc_unreachable ();
4929 }
4930 }
4931
4932 /* Return true if get_computation_cost indicates that autoincrement is
4933 a possibility for the pair of USE and CAND, false otherwise. */
4934
4935 static bool
4936 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
4937 struct iv_cand *cand)
4938 {
4939 bitmap depends_on;
4940 bool can_autoinc;
4941 comp_cost cost;
4942
4943 if (use->type != USE_ADDRESS)
4944 return false;
4945
4946 cost = get_computation_cost (data, use, cand, true, &depends_on,
4947 &can_autoinc, NULL);
4948
4949 BITMAP_FREE (depends_on);
4950
4951 return !infinite_cost_p (cost) && can_autoinc;
4952 }
4953
4954 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
4955 use that allows autoincrement, and set their AINC_USE if possible. */
4956
4957 static void
4958 set_autoinc_for_original_candidates (struct ivopts_data *data)
4959 {
4960 unsigned i, j;
4961
4962 for (i = 0; i < n_iv_cands (data); i++)
4963 {
4964 struct iv_cand *cand = iv_cand (data, i);
4965 struct iv_use *closest_before = NULL;
4966 struct iv_use *closest_after = NULL;
4967 if (cand->pos != IP_ORIGINAL)
4968 continue;
4969
4970 for (j = 0; j < n_iv_uses (data); j++)
4971 {
4972 struct iv_use *use = iv_use (data, j);
4973 unsigned uid = gimple_uid (use->stmt);
4974
4975 if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
4976 continue;
4977
4978 if (uid < gimple_uid (cand->incremented_at)
4979 && (closest_before == NULL
4980 || uid > gimple_uid (closest_before->stmt)))
4981 closest_before = use;
4982
4983 if (uid > gimple_uid (cand->incremented_at)
4984 && (closest_after == NULL
4985 || uid < gimple_uid (closest_after->stmt)))
4986 closest_after = use;
4987 }
4988
4989 if (closest_before != NULL
4990 && autoinc_possible_for_pair (data, closest_before, cand))
4991 cand->ainc_use = closest_before;
4992 else if (closest_after != NULL
4993 && autoinc_possible_for_pair (data, closest_after, cand))
4994 cand->ainc_use = closest_after;
4995 }
4996 }
4997
4998 /* Finds the candidates for the induction variables. */
4999
5000 static void
5001 find_iv_candidates (struct ivopts_data *data)
5002 {
5003 /* Add commonly used ivs. */
5004 add_standard_iv_candidates (data);
5005
5006 /* Add old induction variables. */
5007 add_old_ivs_candidates (data);
5008
5009 /* Add induction variables derived from uses. */
5010 add_derived_ivs_candidates (data);
5011
5012 set_autoinc_for_original_candidates (data);
5013
5014 /* Record the important candidates. */
5015 record_important_candidates (data);
5016 }
5017
5018 /* Determines costs of basing the use of the iv on an iv candidate. */
5019
5020 static void
5021 determine_use_iv_costs (struct ivopts_data *data)
5022 {
5023 unsigned i, j;
5024 struct iv_use *use;
5025 struct iv_cand *cand;
5026 bitmap to_clear = BITMAP_ALLOC (NULL);
5027
5028 alloc_use_cost_map (data);
5029
5030 for (i = 0; i < n_iv_uses (data); i++)
5031 {
5032 use = iv_use (data, i);
5033
5034 if (data->consider_all_candidates)
5035 {
5036 for (j = 0; j < n_iv_cands (data); j++)
5037 {
5038 cand = iv_cand (data, j);
5039 determine_use_iv_cost (data, use, cand);
5040 }
5041 }
5042 else
5043 {
5044 bitmap_iterator bi;
5045
5046 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
5047 {
5048 cand = iv_cand (data, j);
5049 if (!determine_use_iv_cost (data, use, cand))
5050 bitmap_set_bit (to_clear, j);
5051 }
5052
5053 /* Remove the candidates for that the cost is infinite from
5054 the list of related candidates. */
5055 bitmap_and_compl_into (use->related_cands, to_clear);
5056 bitmap_clear (to_clear);
5057 }
5058 }
5059
5060 BITMAP_FREE (to_clear);
5061
5062 if (dump_file && (dump_flags & TDF_DETAILS))
5063 {
5064 fprintf (dump_file, "Use-candidate costs:\n");
5065
5066 for (i = 0; i < n_iv_uses (data); i++)
5067 {
5068 use = iv_use (data, i);
5069
5070 fprintf (dump_file, "Use %d:\n", i);
5071 fprintf (dump_file, " cand\tcost\tcompl.\tdepends on\n");
5072 for (j = 0; j < use->n_map_members; j++)
5073 {
5074 if (!use->cost_map[j].cand
5075 || infinite_cost_p (use->cost_map[j].cost))
5076 continue;
5077
5078 fprintf (dump_file, " %d\t%d\t%d\t",
5079 use->cost_map[j].cand->id,
5080 use->cost_map[j].cost.cost,
5081 use->cost_map[j].cost.complexity);
5082 if (use->cost_map[j].depends_on)
5083 bitmap_print (dump_file,
5084 use->cost_map[j].depends_on, "","");
5085 if (use->cost_map[j].inv_expr_id != -1)
5086 fprintf (dump_file, " inv_expr:%d", use->cost_map[j].inv_expr_id);
5087 fprintf (dump_file, "\n");
5088 }
5089
5090 fprintf (dump_file, "\n");
5091 }
5092 fprintf (dump_file, "\n");
5093 }
5094 }
5095
5096 /* Determines cost of the candidate CAND. */
5097
5098 static void
5099 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5100 {
5101 comp_cost cost_base;
5102 unsigned cost, cost_step;
5103 tree base;
5104
5105 if (!cand->iv)
5106 {
5107 cand->cost = 0;
5108 return;
5109 }
5110
5111 /* There are two costs associated with the candidate -- its increment
5112 and its initialization. The second is almost negligible for any loop
5113 that rolls enough, so we take it just very little into account. */
5114
5115 base = cand->iv->base;
5116 cost_base = force_var_cost (data, base, NULL);
5117 /* It will be exceptional that the iv register happens to be initialized with
5118 the proper value at no cost. In general, there will at least be a regcopy
5119 or a const set. */
5120 if (cost_base.cost == 0)
5121 cost_base.cost = COSTS_N_INSNS (1);
5122 cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
5123
5124 cost = cost_step + adjust_setup_cost (data, cost_base.cost);
5125
5126 /* Prefer the original ivs unless we may gain something by replacing it.
5127 The reason is to make debugging simpler; so this is not relevant for
5128 artificial ivs created by other optimization passes. */
5129 if (cand->pos != IP_ORIGINAL
5130 || !SSA_NAME_VAR (cand->var_before)
5131 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
5132 cost++;
5133
5134 /* Prefer not to insert statements into latch unless there are some
5135 already (so that we do not create unnecessary jumps). */
5136 if (cand->pos == IP_END
5137 && empty_block_p (ip_end_pos (data->current_loop)))
5138 cost++;
5139
5140 cand->cost = cost;
5141 cand->cost_step = cost_step;
5142 }
5143
5144 /* Determines costs of computation of the candidates. */
5145
5146 static void
5147 determine_iv_costs (struct ivopts_data *data)
5148 {
5149 unsigned i;
5150
5151 if (dump_file && (dump_flags & TDF_DETAILS))
5152 {
5153 fprintf (dump_file, "Candidate costs:\n");
5154 fprintf (dump_file, " cand\tcost\n");
5155 }
5156
5157 for (i = 0; i < n_iv_cands (data); i++)
5158 {
5159 struct iv_cand *cand = iv_cand (data, i);
5160
5161 determine_iv_cost (data, cand);
5162
5163 if (dump_file && (dump_flags & TDF_DETAILS))
5164 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
5165 }
5166
5167 if (dump_file && (dump_flags & TDF_DETAILS))
5168 fprintf (dump_file, "\n");
5169 }
5170
5171 /* Calculates cost for having SIZE induction variables. */
5172
5173 static unsigned
5174 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
5175 {
5176 /* We add size to the cost, so that we prefer eliminating ivs
5177 if possible. */
5178 return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed,
5179 data->body_includes_call);
5180 }
5181
5182 /* For each size of the induction variable set determine the penalty. */
5183
5184 static void
5185 determine_set_costs (struct ivopts_data *data)
5186 {
5187 unsigned j, n;
5188 gphi *phi;
5189 gphi_iterator psi;
5190 tree op;
5191 struct loop *loop = data->current_loop;
5192 bitmap_iterator bi;
5193
5194 if (dump_file && (dump_flags & TDF_DETAILS))
5195 {
5196 fprintf (dump_file, "Global costs:\n");
5197 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
5198 fprintf (dump_file, " target_clobbered_regs %d\n", target_clobbered_regs);
5199 fprintf (dump_file, " target_reg_cost %d\n", target_reg_cost[data->speed]);
5200 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost[data->speed]);
5201 }
5202
5203 n = 0;
5204 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
5205 {
5206 phi = psi.phi ();
5207 op = PHI_RESULT (phi);
5208
5209 if (virtual_operand_p (op))
5210 continue;
5211
5212 if (get_iv (data, op))
5213 continue;
5214
5215 n++;
5216 }
5217
5218 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5219 {
5220 struct version_info *info = ver_info (data, j);
5221
5222 if (info->inv_id && info->has_nonlin_use)
5223 n++;
5224 }
5225
5226 data->regs_used = n;
5227 if (dump_file && (dump_flags & TDF_DETAILS))
5228 fprintf (dump_file, " regs_used %d\n", n);
5229
5230 if (dump_file && (dump_flags & TDF_DETAILS))
5231 {
5232 fprintf (dump_file, " cost for size:\n");
5233 fprintf (dump_file, " ivs\tcost\n");
5234 for (j = 0; j <= 2 * target_avail_regs; j++)
5235 fprintf (dump_file, " %d\t%d\n", j,
5236 ivopts_global_cost_for_size (data, j));
5237 fprintf (dump_file, "\n");
5238 }
5239 }
5240
5241 /* Returns true if A is a cheaper cost pair than B. */
5242
5243 static bool
5244 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
5245 {
5246 int cmp;
5247
5248 if (!a)
5249 return false;
5250
5251 if (!b)
5252 return true;
5253
5254 cmp = compare_costs (a->cost, b->cost);
5255 if (cmp < 0)
5256 return true;
5257
5258 if (cmp > 0)
5259 return false;
5260
5261 /* In case the costs are the same, prefer the cheaper candidate. */
5262 if (a->cand->cost < b->cand->cost)
5263 return true;
5264
5265 return false;
5266 }
5267
5268
5269 /* Returns candidate by that USE is expressed in IVS. */
5270
5271 static struct cost_pair *
5272 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
5273 {
5274 return ivs->cand_for_use[use->id];
5275 }
5276
5277 /* Computes the cost field of IVS structure. */
5278
5279 static void
5280 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
5281 {
5282 comp_cost cost = ivs->cand_use_cost;
5283
5284 cost.cost += ivs->cand_cost;
5285
5286 cost.cost += ivopts_global_cost_for_size (data,
5287 ivs->n_regs + ivs->num_used_inv_expr);
5288
5289 ivs->cost = cost;
5290 }
5291
5292 /* Remove invariants in set INVS to set IVS. */
5293
5294 static void
5295 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
5296 {
5297 bitmap_iterator bi;
5298 unsigned iid;
5299
5300 if (!invs)
5301 return;
5302
5303 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5304 {
5305 ivs->n_invariant_uses[iid]--;
5306 if (ivs->n_invariant_uses[iid] == 0)
5307 ivs->n_regs--;
5308 }
5309 }
5310
5311 /* Set USE not to be expressed by any candidate in IVS. */
5312
5313 static void
5314 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
5315 struct iv_use *use)
5316 {
5317 unsigned uid = use->id, cid;
5318 struct cost_pair *cp;
5319
5320 cp = ivs->cand_for_use[uid];
5321 if (!cp)
5322 return;
5323 cid = cp->cand->id;
5324
5325 ivs->bad_uses++;
5326 ivs->cand_for_use[uid] = NULL;
5327 ivs->n_cand_uses[cid]--;
5328
5329 if (ivs->n_cand_uses[cid] == 0)
5330 {
5331 bitmap_clear_bit (ivs->cands, cid);
5332 /* Do not count the pseudocandidates. */
5333 if (cp->cand->iv)
5334 ivs->n_regs--;
5335 ivs->n_cands--;
5336 ivs->cand_cost -= cp->cand->cost;
5337
5338 iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
5339 }
5340
5341 ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
5342
5343 iv_ca_set_remove_invariants (ivs, cp->depends_on);
5344
5345 if (cp->inv_expr_id != -1)
5346 {
5347 ivs->used_inv_expr[cp->inv_expr_id]--;
5348 if (ivs->used_inv_expr[cp->inv_expr_id] == 0)
5349 ivs->num_used_inv_expr--;
5350 }
5351 iv_ca_recount_cost (data, ivs);
5352 }
5353
5354 /* Add invariants in set INVS to set IVS. */
5355
5356 static void
5357 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
5358 {
5359 bitmap_iterator bi;
5360 unsigned iid;
5361
5362 if (!invs)
5363 return;
5364
5365 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5366 {
5367 ivs->n_invariant_uses[iid]++;
5368 if (ivs->n_invariant_uses[iid] == 1)
5369 ivs->n_regs++;
5370 }
5371 }
5372
5373 /* Set cost pair for USE in set IVS to CP. */
5374
5375 static void
5376 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
5377 struct iv_use *use, struct cost_pair *cp)
5378 {
5379 unsigned uid = use->id, cid;
5380
5381 if (ivs->cand_for_use[uid] == cp)
5382 return;
5383
5384 if (ivs->cand_for_use[uid])
5385 iv_ca_set_no_cp (data, ivs, use);
5386
5387 if (cp)
5388 {
5389 cid = cp->cand->id;
5390
5391 ivs->bad_uses--;
5392 ivs->cand_for_use[uid] = cp;
5393 ivs->n_cand_uses[cid]++;
5394 if (ivs->n_cand_uses[cid] == 1)
5395 {
5396 bitmap_set_bit (ivs->cands, cid);
5397 /* Do not count the pseudocandidates. */
5398 if (cp->cand->iv)
5399 ivs->n_regs++;
5400 ivs->n_cands++;
5401 ivs->cand_cost += cp->cand->cost;
5402
5403 iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
5404 }
5405
5406 ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
5407 iv_ca_set_add_invariants (ivs, cp->depends_on);
5408
5409 if (cp->inv_expr_id != -1)
5410 {
5411 ivs->used_inv_expr[cp->inv_expr_id]++;
5412 if (ivs->used_inv_expr[cp->inv_expr_id] == 1)
5413 ivs->num_used_inv_expr++;
5414 }
5415 iv_ca_recount_cost (data, ivs);
5416 }
5417 }
5418
5419 /* Extend set IVS by expressing USE by some of the candidates in it
5420 if possible. Consider all important candidates if candidates in
5421 set IVS don't give any result. */
5422
5423 static void
5424 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
5425 struct iv_use *use)
5426 {
5427 struct cost_pair *best_cp = NULL, *cp;
5428 bitmap_iterator bi;
5429 unsigned i;
5430 struct iv_cand *cand;
5431
5432 gcc_assert (ivs->upto >= use->id);
5433 ivs->upto++;
5434 ivs->bad_uses++;
5435
5436 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5437 {
5438 cand = iv_cand (data, i);
5439 cp = get_use_iv_cost (data, use, cand);
5440 if (cheaper_cost_pair (cp, best_cp))
5441 best_cp = cp;
5442 }
5443
5444 if (best_cp == NULL)
5445 {
5446 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5447 {
5448 cand = iv_cand (data, i);
5449 cp = get_use_iv_cost (data, use, cand);
5450 if (cheaper_cost_pair (cp, best_cp))
5451 best_cp = cp;
5452 }
5453 }
5454
5455 iv_ca_set_cp (data, ivs, use, best_cp);
5456 }
5457
5458 /* Get cost for assignment IVS. */
5459
5460 static comp_cost
5461 iv_ca_cost (struct iv_ca *ivs)
5462 {
5463 /* This was a conditional expression but it triggered a bug in
5464 Sun C 5.5. */
5465 if (ivs->bad_uses)
5466 return infinite_cost;
5467 else
5468 return ivs->cost;
5469 }
5470
5471 /* Returns true if all dependences of CP are among invariants in IVS. */
5472
5473 static bool
5474 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
5475 {
5476 unsigned i;
5477 bitmap_iterator bi;
5478
5479 if (!cp->depends_on)
5480 return true;
5481
5482 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
5483 {
5484 if (ivs->n_invariant_uses[i] == 0)
5485 return false;
5486 }
5487
5488 return true;
5489 }
5490
5491 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
5492 it before NEXT_CHANGE. */
5493
5494 static struct iv_ca_delta *
5495 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
5496 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
5497 {
5498 struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
5499
5500 change->use = use;
5501 change->old_cp = old_cp;
5502 change->new_cp = new_cp;
5503 change->next_change = next_change;
5504
5505 return change;
5506 }
5507
5508 /* Joins two lists of changes L1 and L2. Destructive -- old lists
5509 are rewritten. */
5510
5511 static struct iv_ca_delta *
5512 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
5513 {
5514 struct iv_ca_delta *last;
5515
5516 if (!l2)
5517 return l1;
5518
5519 if (!l1)
5520 return l2;
5521
5522 for (last = l1; last->next_change; last = last->next_change)
5523 continue;
5524 last->next_change = l2;
5525
5526 return l1;
5527 }
5528
5529 /* Reverse the list of changes DELTA, forming the inverse to it. */
5530
5531 static struct iv_ca_delta *
5532 iv_ca_delta_reverse (struct iv_ca_delta *delta)
5533 {
5534 struct iv_ca_delta *act, *next, *prev = NULL;
5535 struct cost_pair *tmp;
5536
5537 for (act = delta; act; act = next)
5538 {
5539 next = act->next_change;
5540 act->next_change = prev;
5541 prev = act;
5542
5543 tmp = act->old_cp;
5544 act->old_cp = act->new_cp;
5545 act->new_cp = tmp;
5546 }
5547
5548 return prev;
5549 }
5550
5551 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
5552 reverted instead. */
5553
5554 static void
5555 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
5556 struct iv_ca_delta *delta, bool forward)
5557 {
5558 struct cost_pair *from, *to;
5559 struct iv_ca_delta *act;
5560
5561 if (!forward)
5562 delta = iv_ca_delta_reverse (delta);
5563
5564 for (act = delta; act; act = act->next_change)
5565 {
5566 from = act->old_cp;
5567 to = act->new_cp;
5568 gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
5569 iv_ca_set_cp (data, ivs, act->use, to);
5570 }
5571
5572 if (!forward)
5573 iv_ca_delta_reverse (delta);
5574 }
5575
5576 /* Returns true if CAND is used in IVS. */
5577
5578 static bool
5579 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
5580 {
5581 return ivs->n_cand_uses[cand->id] > 0;
5582 }
5583
5584 /* Returns number of induction variable candidates in the set IVS. */
5585
5586 static unsigned
5587 iv_ca_n_cands (struct iv_ca *ivs)
5588 {
5589 return ivs->n_cands;
5590 }
5591
5592 /* Free the list of changes DELTA. */
5593
5594 static void
5595 iv_ca_delta_free (struct iv_ca_delta **delta)
5596 {
5597 struct iv_ca_delta *act, *next;
5598
5599 for (act = *delta; act; act = next)
5600 {
5601 next = act->next_change;
5602 free (act);
5603 }
5604
5605 *delta = NULL;
5606 }
5607
5608 /* Allocates new iv candidates assignment. */
5609
5610 static struct iv_ca *
5611 iv_ca_new (struct ivopts_data *data)
5612 {
5613 struct iv_ca *nw = XNEW (struct iv_ca);
5614
5615 nw->upto = 0;
5616 nw->bad_uses = 0;
5617 nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
5618 nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
5619 nw->cands = BITMAP_ALLOC (NULL);
5620 nw->n_cands = 0;
5621 nw->n_regs = 0;
5622 nw->cand_use_cost = no_cost;
5623 nw->cand_cost = 0;
5624 nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
5625 nw->cost = no_cost;
5626 nw->used_inv_expr = XCNEWVEC (unsigned, data->inv_expr_id + 1);
5627 nw->num_used_inv_expr = 0;
5628
5629 return nw;
5630 }
5631
5632 /* Free memory occupied by the set IVS. */
5633
5634 static void
5635 iv_ca_free (struct iv_ca **ivs)
5636 {
5637 free ((*ivs)->cand_for_use);
5638 free ((*ivs)->n_cand_uses);
5639 BITMAP_FREE ((*ivs)->cands);
5640 free ((*ivs)->n_invariant_uses);
5641 free ((*ivs)->used_inv_expr);
5642 free (*ivs);
5643 *ivs = NULL;
5644 }
5645
5646 /* Dumps IVS to FILE. */
5647
5648 static void
5649 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
5650 {
5651 const char *pref = " invariants ";
5652 unsigned i;
5653 comp_cost cost = iv_ca_cost (ivs);
5654
5655 fprintf (file, " cost: %d (complexity %d)\n", cost.cost, cost.complexity);
5656 fprintf (file, " cand_cost: %d\n cand_use_cost: %d (complexity %d)\n",
5657 ivs->cand_cost, ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
5658 bitmap_print (file, ivs->cands, " candidates: ","\n");
5659
5660 for (i = 0; i < ivs->upto; i++)
5661 {
5662 struct iv_use *use = iv_use (data, i);
5663 struct cost_pair *cp = iv_ca_cand_for_use (ivs, use);
5664 if (cp)
5665 fprintf (file, " use:%d --> iv_cand:%d, cost=(%d,%d)\n",
5666 use->id, cp->cand->id, cp->cost.cost, cp->cost.complexity);
5667 else
5668 fprintf (file, " use:%d --> ??\n", use->id);
5669 }
5670
5671 for (i = 1; i <= data->max_inv_id; i++)
5672 if (ivs->n_invariant_uses[i])
5673 {
5674 fprintf (file, "%s%d", pref, i);
5675 pref = ", ";
5676 }
5677 fprintf (file, "\n\n");
5678 }
5679
5680 /* Try changing candidate in IVS to CAND for each use. Return cost of the
5681 new set, and store differences in DELTA. Number of induction variables
5682 in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
5683 the function will try to find a solution with mimimal iv candidates. */
5684
5685 static comp_cost
5686 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
5687 struct iv_cand *cand, struct iv_ca_delta **delta,
5688 unsigned *n_ivs, bool min_ncand)
5689 {
5690 unsigned i;
5691 comp_cost cost;
5692 struct iv_use *use;
5693 struct cost_pair *old_cp, *new_cp;
5694
5695 *delta = NULL;
5696 for (i = 0; i < ivs->upto; i++)
5697 {
5698 use = iv_use (data, i);
5699 old_cp = iv_ca_cand_for_use (ivs, use);
5700
5701 if (old_cp
5702 && old_cp->cand == cand)
5703 continue;
5704
5705 new_cp = get_use_iv_cost (data, use, cand);
5706 if (!new_cp)
5707 continue;
5708
5709 if (!min_ncand && !iv_ca_has_deps (ivs, new_cp))
5710 continue;
5711
5712 if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp))
5713 continue;
5714
5715 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5716 }
5717
5718 iv_ca_delta_commit (data, ivs, *delta, true);
5719 cost = iv_ca_cost (ivs);
5720 if (n_ivs)
5721 *n_ivs = iv_ca_n_cands (ivs);
5722 iv_ca_delta_commit (data, ivs, *delta, false);
5723
5724 return cost;
5725 }
5726
5727 /* Try narrowing set IVS by removing CAND. Return the cost of
5728 the new set and store the differences in DELTA. START is
5729 the candidate with which we start narrowing. */
5730
5731 static comp_cost
5732 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
5733 struct iv_cand *cand, struct iv_cand *start,
5734 struct iv_ca_delta **delta)
5735 {
5736 unsigned i, ci;
5737 struct iv_use *use;
5738 struct cost_pair *old_cp, *new_cp, *cp;
5739 bitmap_iterator bi;
5740 struct iv_cand *cnd;
5741 comp_cost cost, best_cost, acost;
5742
5743 *delta = NULL;
5744 for (i = 0; i < n_iv_uses (data); i++)
5745 {
5746 use = iv_use (data, i);
5747
5748 old_cp = iv_ca_cand_for_use (ivs, use);
5749 if (old_cp->cand != cand)
5750 continue;
5751
5752 best_cost = iv_ca_cost (ivs);
5753 /* Start narrowing with START. */
5754 new_cp = get_use_iv_cost (data, use, start);
5755
5756 if (data->consider_all_candidates)
5757 {
5758 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
5759 {
5760 if (ci == cand->id || (start && ci == start->id))
5761 continue;
5762
5763 cnd = iv_cand (data, ci);
5764
5765 cp = get_use_iv_cost (data, use, cnd);
5766 if (!cp)
5767 continue;
5768
5769 iv_ca_set_cp (data, ivs, use, cp);
5770 acost = iv_ca_cost (ivs);
5771
5772 if (compare_costs (acost, best_cost) < 0)
5773 {
5774 best_cost = acost;
5775 new_cp = cp;
5776 }
5777 }
5778 }
5779 else
5780 {
5781 EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
5782 {
5783 if (ci == cand->id || (start && ci == start->id))
5784 continue;
5785
5786 cnd = iv_cand (data, ci);
5787
5788 cp = get_use_iv_cost (data, use, cnd);
5789 if (!cp)
5790 continue;
5791
5792 iv_ca_set_cp (data, ivs, use, cp);
5793 acost = iv_ca_cost (ivs);
5794
5795 if (compare_costs (acost, best_cost) < 0)
5796 {
5797 best_cost = acost;
5798 new_cp = cp;
5799 }
5800 }
5801 }
5802 /* Restore to old cp for use. */
5803 iv_ca_set_cp (data, ivs, use, old_cp);
5804
5805 if (!new_cp)
5806 {
5807 iv_ca_delta_free (delta);
5808 return infinite_cost;
5809 }
5810
5811 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5812 }
5813
5814 iv_ca_delta_commit (data, ivs, *delta, true);
5815 cost = iv_ca_cost (ivs);
5816 iv_ca_delta_commit (data, ivs, *delta, false);
5817
5818 return cost;
5819 }
5820
5821 /* Try optimizing the set of candidates IVS by removing candidates different
5822 from to EXCEPT_CAND from it. Return cost of the new set, and store
5823 differences in DELTA. */
5824
5825 static comp_cost
5826 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
5827 struct iv_cand *except_cand, struct iv_ca_delta **delta)
5828 {
5829 bitmap_iterator bi;
5830 struct iv_ca_delta *act_delta, *best_delta;
5831 unsigned i;
5832 comp_cost best_cost, acost;
5833 struct iv_cand *cand;
5834
5835 best_delta = NULL;
5836 best_cost = iv_ca_cost (ivs);
5837
5838 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5839 {
5840 cand = iv_cand (data, i);
5841
5842 if (cand == except_cand)
5843 continue;
5844
5845 acost = iv_ca_narrow (data, ivs, cand, except_cand, &act_delta);
5846
5847 if (compare_costs (acost, best_cost) < 0)
5848 {
5849 best_cost = acost;
5850 iv_ca_delta_free (&best_delta);
5851 best_delta = act_delta;
5852 }
5853 else
5854 iv_ca_delta_free (&act_delta);
5855 }
5856
5857 if (!best_delta)
5858 {
5859 *delta = NULL;
5860 return best_cost;
5861 }
5862
5863 /* Recurse to possibly remove other unnecessary ivs. */
5864 iv_ca_delta_commit (data, ivs, best_delta, true);
5865 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
5866 iv_ca_delta_commit (data, ivs, best_delta, false);
5867 *delta = iv_ca_delta_join (best_delta, *delta);
5868 return best_cost;
5869 }
5870
5871 /* Check if CAND_IDX is a candidate other than OLD_CAND and has
5872 cheaper local cost for USE than BEST_CP. Return pointer to
5873 the corresponding cost_pair, otherwise just return BEST_CP. */
5874
5875 static struct cost_pair*
5876 cheaper_cost_with_cand (struct ivopts_data *data, struct iv_use *use,
5877 unsigned int cand_idx, struct iv_cand *old_cand,
5878 struct cost_pair *best_cp)
5879 {
5880 struct iv_cand *cand;
5881 struct cost_pair *cp;
5882
5883 gcc_assert (old_cand != NULL && best_cp != NULL);
5884 if (cand_idx == old_cand->id)
5885 return best_cp;
5886
5887 cand = iv_cand (data, cand_idx);
5888 cp = get_use_iv_cost (data, use, cand);
5889 if (cp != NULL && cheaper_cost_pair (cp, best_cp))
5890 return cp;
5891
5892 return best_cp;
5893 }
5894
5895 /* Try breaking local optimal fixed-point for IVS by replacing candidates
5896 which are used by more than one iv uses. For each of those candidates,
5897 this function tries to represent iv uses under that candidate using
5898 other ones with lower local cost, then tries to prune the new set.
5899 If the new set has lower cost, It returns the new cost after recording
5900 candidate replacement in list DELTA. */
5901
5902 static comp_cost
5903 iv_ca_replace (struct ivopts_data *data, struct iv_ca *ivs,
5904 struct iv_ca_delta **delta)
5905 {
5906 bitmap_iterator bi, bj;
5907 unsigned int i, j, k;
5908 struct iv_use *use;
5909 struct iv_cand *cand;
5910 comp_cost orig_cost, acost;
5911 struct iv_ca_delta *act_delta, *tmp_delta;
5912 struct cost_pair *old_cp, *best_cp = NULL;
5913
5914 *delta = NULL;
5915 orig_cost = iv_ca_cost (ivs);
5916
5917 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5918 {
5919 if (ivs->n_cand_uses[i] == 1
5920 || ivs->n_cand_uses[i] > ALWAYS_PRUNE_CAND_SET_BOUND)
5921 continue;
5922
5923 cand = iv_cand (data, i);
5924
5925 act_delta = NULL;
5926 /* Represent uses under current candidate using other ones with
5927 lower local cost. */
5928 for (j = 0; j < ivs->upto; j++)
5929 {
5930 use = iv_use (data, j);
5931 old_cp = iv_ca_cand_for_use (ivs, use);
5932
5933 if (old_cp->cand != cand)
5934 continue;
5935
5936 best_cp = old_cp;
5937 if (data->consider_all_candidates)
5938 for (k = 0; k < n_iv_cands (data); k++)
5939 best_cp = cheaper_cost_with_cand (data, use, k,
5940 old_cp->cand, best_cp);
5941 else
5942 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, k, bj)
5943 best_cp = cheaper_cost_with_cand (data, use, k,
5944 old_cp->cand, best_cp);
5945
5946 if (best_cp == old_cp)
5947 continue;
5948
5949 act_delta = iv_ca_delta_add (use, old_cp, best_cp, act_delta);
5950 }
5951 /* No need for further prune. */
5952 if (!act_delta)
5953 continue;
5954
5955 /* Prune the new candidate set. */
5956 iv_ca_delta_commit (data, ivs, act_delta, true);
5957 acost = iv_ca_prune (data, ivs, NULL, &tmp_delta);
5958 iv_ca_delta_commit (data, ivs, act_delta, false);
5959 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
5960
5961 if (compare_costs (acost, orig_cost) < 0)
5962 {
5963 *delta = act_delta;
5964 return acost;
5965 }
5966 else
5967 iv_ca_delta_free (&act_delta);
5968 }
5969
5970 return orig_cost;
5971 }
5972
5973 /* Tries to extend the sets IVS in the best possible way in order
5974 to express the USE. If ORIGINALP is true, prefer candidates from
5975 the original set of IVs, otherwise favor important candidates not
5976 based on any memory object. */
5977
5978 static bool
5979 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
5980 struct iv_use *use, bool originalp)
5981 {
5982 comp_cost best_cost, act_cost;
5983 unsigned i;
5984 bitmap_iterator bi;
5985 struct iv_cand *cand;
5986 struct iv_ca_delta *best_delta = NULL, *act_delta;
5987 struct cost_pair *cp;
5988
5989 iv_ca_add_use (data, ivs, use);
5990 best_cost = iv_ca_cost (ivs);
5991 cp = iv_ca_cand_for_use (ivs, use);
5992 if (cp)
5993 {
5994 best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
5995 iv_ca_set_no_cp (data, ivs, use);
5996 }
5997
5998 /* If ORIGINALP is true, try to find the original IV for the use. Otherwise
5999 first try important candidates not based on any memory object. Only if
6000 this fails, try the specific ones. Rationale -- in loops with many
6001 variables the best choice often is to use just one generic biv. If we
6002 added here many ivs specific to the uses, the optimization algorithm later
6003 would be likely to get stuck in a local minimum, thus causing us to create
6004 too many ivs. The approach from few ivs to more seems more likely to be
6005 successful -- starting from few ivs, replacing an expensive use by a
6006 specific iv should always be a win. */
6007 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
6008 {
6009 cand = iv_cand (data, i);
6010
6011 if (originalp && cand->pos !=IP_ORIGINAL)
6012 continue;
6013
6014 if (!originalp && cand->iv->base_object != NULL_TREE)
6015 continue;
6016
6017 if (iv_ca_cand_used_p (ivs, cand))
6018 continue;
6019
6020 cp = get_use_iv_cost (data, use, cand);
6021 if (!cp)
6022 continue;
6023
6024 iv_ca_set_cp (data, ivs, use, cp);
6025 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
6026 true);
6027 iv_ca_set_no_cp (data, ivs, use);
6028 act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
6029
6030 if (compare_costs (act_cost, best_cost) < 0)
6031 {
6032 best_cost = act_cost;
6033
6034 iv_ca_delta_free (&best_delta);
6035 best_delta = act_delta;
6036 }
6037 else
6038 iv_ca_delta_free (&act_delta);
6039 }
6040
6041 if (infinite_cost_p (best_cost))
6042 {
6043 for (i = 0; i < use->n_map_members; i++)
6044 {
6045 cp = use->cost_map + i;
6046 cand = cp->cand;
6047 if (!cand)
6048 continue;
6049
6050 /* Already tried this. */
6051 if (cand->important)
6052 {
6053 if (originalp && cand->pos == IP_ORIGINAL)
6054 continue;
6055 if (!originalp && cand->iv->base_object == NULL_TREE)
6056 continue;
6057 }
6058
6059 if (iv_ca_cand_used_p (ivs, cand))
6060 continue;
6061
6062 act_delta = NULL;
6063 iv_ca_set_cp (data, ivs, use, cp);
6064 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
6065 iv_ca_set_no_cp (data, ivs, use);
6066 act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
6067 cp, act_delta);
6068
6069 if (compare_costs (act_cost, best_cost) < 0)
6070 {
6071 best_cost = act_cost;
6072
6073 if (best_delta)
6074 iv_ca_delta_free (&best_delta);
6075 best_delta = act_delta;
6076 }
6077 else
6078 iv_ca_delta_free (&act_delta);
6079 }
6080 }
6081
6082 iv_ca_delta_commit (data, ivs, best_delta, true);
6083 iv_ca_delta_free (&best_delta);
6084
6085 return !infinite_cost_p (best_cost);
6086 }
6087
6088 /* Finds an initial assignment of candidates to uses. */
6089
6090 static struct iv_ca *
6091 get_initial_solution (struct ivopts_data *data, bool originalp)
6092 {
6093 struct iv_ca *ivs = iv_ca_new (data);
6094 unsigned i;
6095
6096 for (i = 0; i < n_iv_uses (data); i++)
6097 if (!try_add_cand_for (data, ivs, iv_use (data, i), originalp))
6098 {
6099 iv_ca_free (&ivs);
6100 return NULL;
6101 }
6102
6103 return ivs;
6104 }
6105
6106 /* Tries to improve set of induction variables IVS. TRY_REPLACE_P
6107 points to a bool variable, this function tries to break local
6108 optimal fixed-point by replacing candidates in IVS if it's true. */
6109
6110 static bool
6111 try_improve_iv_set (struct ivopts_data *data,
6112 struct iv_ca *ivs, bool *try_replace_p)
6113 {
6114 unsigned i, n_ivs;
6115 comp_cost acost, best_cost = iv_ca_cost (ivs);
6116 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
6117 struct iv_cand *cand;
6118
6119 /* Try extending the set of induction variables by one. */
6120 for (i = 0; i < n_iv_cands (data); i++)
6121 {
6122 cand = iv_cand (data, i);
6123
6124 if (iv_ca_cand_used_p (ivs, cand))
6125 continue;
6126
6127 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
6128 if (!act_delta)
6129 continue;
6130
6131 /* If we successfully added the candidate and the set is small enough,
6132 try optimizing it by removing other candidates. */
6133 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
6134 {
6135 iv_ca_delta_commit (data, ivs, act_delta, true);
6136 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
6137 iv_ca_delta_commit (data, ivs, act_delta, false);
6138 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6139 }
6140
6141 if (compare_costs (acost, best_cost) < 0)
6142 {
6143 best_cost = acost;
6144 iv_ca_delta_free (&best_delta);
6145 best_delta = act_delta;
6146 }
6147 else
6148 iv_ca_delta_free (&act_delta);
6149 }
6150
6151 if (!best_delta)
6152 {
6153 /* Try removing the candidates from the set instead. */
6154 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
6155
6156 if (!best_delta && *try_replace_p)
6157 {
6158 *try_replace_p = false;
6159 /* So far candidate selecting algorithm tends to choose fewer IVs
6160 so that it can handle cases in which loops have many variables
6161 but the best choice is often to use only one general biv. One
6162 weakness is it can't handle opposite cases, in which different
6163 candidates should be chosen with respect to each use. To solve
6164 the problem, we replace candidates in a manner described by the
6165 comments of iv_ca_replace, thus give general algorithm a chance
6166 to break local optimal fixed-point in these cases. */
6167 best_cost = iv_ca_replace (data, ivs, &best_delta);
6168 }
6169
6170 if (!best_delta)
6171 return false;
6172 }
6173
6174 iv_ca_delta_commit (data, ivs, best_delta, true);
6175 gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
6176 iv_ca_delta_free (&best_delta);
6177 return true;
6178 }
6179
6180 /* Attempts to find the optimal set of induction variables. We do simple
6181 greedy heuristic -- we try to replace at most one candidate in the selected
6182 solution and remove the unused ivs while this improves the cost. */
6183
6184 static struct iv_ca *
6185 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
6186 {
6187 struct iv_ca *set;
6188 bool try_replace_p = true;
6189
6190 /* Get the initial solution. */
6191 set = get_initial_solution (data, originalp);
6192 if (!set)
6193 {
6194 if (dump_file && (dump_flags & TDF_DETAILS))
6195 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
6196 return NULL;
6197 }
6198
6199 if (dump_file && (dump_flags & TDF_DETAILS))
6200 {
6201 fprintf (dump_file, "Initial set of candidates:\n");
6202 iv_ca_dump (data, dump_file, set);
6203 }
6204
6205 while (try_improve_iv_set (data, set, &try_replace_p))
6206 {
6207 if (dump_file && (dump_flags & TDF_DETAILS))
6208 {
6209 fprintf (dump_file, "Improved to:\n");
6210 iv_ca_dump (data, dump_file, set);
6211 }
6212 }
6213
6214 return set;
6215 }
6216
6217 static struct iv_ca *
6218 find_optimal_iv_set (struct ivopts_data *data)
6219 {
6220 unsigned i;
6221 struct iv_ca *set, *origset;
6222 struct iv_use *use;
6223 comp_cost cost, origcost;
6224
6225 /* Determine the cost based on a strategy that starts with original IVs,
6226 and try again using a strategy that prefers candidates not based
6227 on any IVs. */
6228 origset = find_optimal_iv_set_1 (data, true);
6229 set = find_optimal_iv_set_1 (data, false);
6230
6231 if (!origset && !set)
6232 return NULL;
6233
6234 origcost = origset ? iv_ca_cost (origset) : infinite_cost;
6235 cost = set ? iv_ca_cost (set) : infinite_cost;
6236
6237 if (dump_file && (dump_flags & TDF_DETAILS))
6238 {
6239 fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
6240 origcost.cost, origcost.complexity);
6241 fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
6242 cost.cost, cost.complexity);
6243 }
6244
6245 /* Choose the one with the best cost. */
6246 if (compare_costs (origcost, cost) <= 0)
6247 {
6248 if (set)
6249 iv_ca_free (&set);
6250 set = origset;
6251 }
6252 else if (origset)
6253 iv_ca_free (&origset);
6254
6255 for (i = 0; i < n_iv_uses (data); i++)
6256 {
6257 use = iv_use (data, i);
6258 use->selected = iv_ca_cand_for_use (set, use)->cand;
6259 }
6260
6261 return set;
6262 }
6263
6264 /* Creates a new induction variable corresponding to CAND. */
6265
6266 static void
6267 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
6268 {
6269 gimple_stmt_iterator incr_pos;
6270 tree base;
6271 bool after = false;
6272
6273 if (!cand->iv)
6274 return;
6275
6276 switch (cand->pos)
6277 {
6278 case IP_NORMAL:
6279 incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
6280 break;
6281
6282 case IP_END:
6283 incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
6284 after = true;
6285 break;
6286
6287 case IP_AFTER_USE:
6288 after = true;
6289 /* fall through */
6290 case IP_BEFORE_USE:
6291 incr_pos = gsi_for_stmt (cand->incremented_at);
6292 break;
6293
6294 case IP_ORIGINAL:
6295 /* Mark that the iv is preserved. */
6296 name_info (data, cand->var_before)->preserve_biv = true;
6297 name_info (data, cand->var_after)->preserve_biv = true;
6298
6299 /* Rewrite the increment so that it uses var_before directly. */
6300 find_interesting_uses_op (data, cand->var_after)->selected = cand;
6301 return;
6302 }
6303
6304 gimple_add_tmp_var (cand->var_before);
6305
6306 base = unshare_expr (cand->iv->base);
6307
6308 create_iv (base, unshare_expr (cand->iv->step),
6309 cand->var_before, data->current_loop,
6310 &incr_pos, after, &cand->var_before, &cand->var_after);
6311 }
6312
6313 /* Creates new induction variables described in SET. */
6314
6315 static void
6316 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
6317 {
6318 unsigned i;
6319 struct iv_cand *cand;
6320 bitmap_iterator bi;
6321
6322 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6323 {
6324 cand = iv_cand (data, i);
6325 create_new_iv (data, cand);
6326 }
6327
6328 if (dump_file && (dump_flags & TDF_DETAILS))
6329 {
6330 fprintf (dump_file, "\nSelected IV set: \n");
6331 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6332 {
6333 cand = iv_cand (data, i);
6334 dump_cand (dump_file, cand);
6335 }
6336 fprintf (dump_file, "\n");
6337 }
6338 }
6339
6340 /* Rewrites USE (definition of iv used in a nonlinear expression)
6341 using candidate CAND. */
6342
6343 static void
6344 rewrite_use_nonlinear_expr (struct ivopts_data *data,
6345 struct iv_use *use, struct iv_cand *cand)
6346 {
6347 tree comp;
6348 tree op, tgt;
6349 gassign *ass;
6350 gimple_stmt_iterator bsi;
6351
6352 /* An important special case -- if we are asked to express value of
6353 the original iv by itself, just exit; there is no need to
6354 introduce a new computation (that might also need casting the
6355 variable to unsigned and back). */
6356 if (cand->pos == IP_ORIGINAL
6357 && cand->incremented_at == use->stmt)
6358 {
6359 enum tree_code stmt_code;
6360
6361 gcc_assert (is_gimple_assign (use->stmt));
6362 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
6363
6364 /* Check whether we may leave the computation unchanged.
6365 This is the case only if it does not rely on other
6366 computations in the loop -- otherwise, the computation
6367 we rely upon may be removed in remove_unused_ivs,
6368 thus leading to ICE. */
6369 stmt_code = gimple_assign_rhs_code (use->stmt);
6370 if (stmt_code == PLUS_EXPR
6371 || stmt_code == MINUS_EXPR
6372 || stmt_code == POINTER_PLUS_EXPR)
6373 {
6374 if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
6375 op = gimple_assign_rhs2 (use->stmt);
6376 else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
6377 op = gimple_assign_rhs1 (use->stmt);
6378 else
6379 op = NULL_TREE;
6380 }
6381 else
6382 op = NULL_TREE;
6383
6384 if (op && expr_invariant_in_loop_p (data->current_loop, op))
6385 return;
6386 }
6387
6388 comp = get_computation (data->current_loop, use, cand);
6389 gcc_assert (comp != NULL_TREE);
6390
6391 switch (gimple_code (use->stmt))
6392 {
6393 case GIMPLE_PHI:
6394 tgt = PHI_RESULT (use->stmt);
6395
6396 /* If we should keep the biv, do not replace it. */
6397 if (name_info (data, tgt)->preserve_biv)
6398 return;
6399
6400 bsi = gsi_after_labels (gimple_bb (use->stmt));
6401 break;
6402
6403 case GIMPLE_ASSIGN:
6404 tgt = gimple_assign_lhs (use->stmt);
6405 bsi = gsi_for_stmt (use->stmt);
6406 break;
6407
6408 default:
6409 gcc_unreachable ();
6410 }
6411
6412 if (!valid_gimple_rhs_p (comp)
6413 || (gimple_code (use->stmt) != GIMPLE_PHI
6414 /* We can't allow re-allocating the stmt as it might be pointed
6415 to still. */
6416 && (get_gimple_rhs_num_ops (TREE_CODE (comp))
6417 >= gimple_num_ops (gsi_stmt (bsi)))))
6418 {
6419 comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE,
6420 true, GSI_SAME_STMT);
6421 if (POINTER_TYPE_P (TREE_TYPE (tgt)))
6422 {
6423 duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
6424 /* As this isn't a plain copy we have to reset alignment
6425 information. */
6426 if (SSA_NAME_PTR_INFO (comp))
6427 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
6428 }
6429 }
6430
6431 if (gimple_code (use->stmt) == GIMPLE_PHI)
6432 {
6433 ass = gimple_build_assign (tgt, comp);
6434 gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
6435
6436 bsi = gsi_for_stmt (use->stmt);
6437 remove_phi_node (&bsi, false);
6438 }
6439 else
6440 {
6441 gimple_assign_set_rhs_from_tree (&bsi, comp);
6442 use->stmt = gsi_stmt (bsi);
6443 }
6444 }
6445
6446 /* Performs a peephole optimization to reorder the iv update statement with
6447 a mem ref to enable instruction combining in later phases. The mem ref uses
6448 the iv value before the update, so the reordering transformation requires
6449 adjustment of the offset. CAND is the selected IV_CAND.
6450
6451 Example:
6452
6453 t = MEM_REF (base, iv1, 8, 16); // base, index, stride, offset
6454 iv2 = iv1 + 1;
6455
6456 if (t < val) (1)
6457 goto L;
6458 goto Head;
6459
6460
6461 directly propagating t over to (1) will introduce overlapping live range
6462 thus increase register pressure. This peephole transform it into:
6463
6464
6465 iv2 = iv1 + 1;
6466 t = MEM_REF (base, iv2, 8, 8);
6467 if (t < val)
6468 goto L;
6469 goto Head;
6470 */
6471
6472 static void
6473 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
6474 {
6475 tree var_after;
6476 gimple iv_update, stmt;
6477 basic_block bb;
6478 gimple_stmt_iterator gsi, gsi_iv;
6479
6480 if (cand->pos != IP_NORMAL)
6481 return;
6482
6483 var_after = cand->var_after;
6484 iv_update = SSA_NAME_DEF_STMT (var_after);
6485
6486 bb = gimple_bb (iv_update);
6487 gsi = gsi_last_nondebug_bb (bb);
6488 stmt = gsi_stmt (gsi);
6489
6490 /* Only handle conditional statement for now. */
6491 if (gimple_code (stmt) != GIMPLE_COND)
6492 return;
6493
6494 gsi_prev_nondebug (&gsi);
6495 stmt = gsi_stmt (gsi);
6496 if (stmt != iv_update)
6497 return;
6498
6499 gsi_prev_nondebug (&gsi);
6500 if (gsi_end_p (gsi))
6501 return;
6502
6503 stmt = gsi_stmt (gsi);
6504 if (gimple_code (stmt) != GIMPLE_ASSIGN)
6505 return;
6506
6507 if (stmt != use->stmt)
6508 return;
6509
6510 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
6511 return;
6512
6513 if (dump_file && (dump_flags & TDF_DETAILS))
6514 {
6515 fprintf (dump_file, "Reordering \n");
6516 print_gimple_stmt (dump_file, iv_update, 0, 0);
6517 print_gimple_stmt (dump_file, use->stmt, 0, 0);
6518 fprintf (dump_file, "\n");
6519 }
6520
6521 gsi = gsi_for_stmt (use->stmt);
6522 gsi_iv = gsi_for_stmt (iv_update);
6523 gsi_move_before (&gsi_iv, &gsi);
6524
6525 cand->pos = IP_BEFORE_USE;
6526 cand->incremented_at = use->stmt;
6527 }
6528
6529 /* Rewrites USE (address that is an iv) using candidate CAND. */
6530
6531 static void
6532 rewrite_use_address (struct ivopts_data *data,
6533 struct iv_use *use, struct iv_cand *cand)
6534 {
6535 aff_tree aff;
6536 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6537 tree base_hint = NULL_TREE;
6538 tree ref, iv;
6539 bool ok;
6540
6541 adjust_iv_update_pos (cand, use);
6542 ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
6543 gcc_assert (ok);
6544 unshare_aff_combination (&aff);
6545
6546 /* To avoid undefined overflow problems, all IV candidates use unsigned
6547 integer types. The drawback is that this makes it impossible for
6548 create_mem_ref to distinguish an IV that is based on a memory object
6549 from one that represents simply an offset.
6550
6551 To work around this problem, we pass a hint to create_mem_ref that
6552 indicates which variable (if any) in aff is an IV based on a memory
6553 object. Note that we only consider the candidate. If this is not
6554 based on an object, the base of the reference is in some subexpression
6555 of the use -- but these will use pointer types, so they are recognized
6556 by the create_mem_ref heuristics anyway. */
6557 if (cand->iv->base_object)
6558 base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
6559
6560 iv = var_at_stmt (data->current_loop, cand, use->stmt);
6561 ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff,
6562 reference_alias_ptr_type (*use->op_p),
6563 iv, base_hint, data->speed);
6564 copy_ref_info (ref, *use->op_p);
6565 *use->op_p = ref;
6566 }
6567
6568 /* Rewrites USE (the condition such that one of the arguments is an iv) using
6569 candidate CAND. */
6570
6571 static void
6572 rewrite_use_compare (struct ivopts_data *data,
6573 struct iv_use *use, struct iv_cand *cand)
6574 {
6575 tree comp, *var_p, op, bound;
6576 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6577 enum tree_code compare;
6578 struct cost_pair *cp = get_use_iv_cost (data, use, cand);
6579 bool ok;
6580
6581 bound = cp->value;
6582 if (bound)
6583 {
6584 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
6585 tree var_type = TREE_TYPE (var);
6586 gimple_seq stmts;
6587
6588 if (dump_file && (dump_flags & TDF_DETAILS))
6589 {
6590 fprintf (dump_file, "Replacing exit test: ");
6591 print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
6592 }
6593 compare = cp->comp;
6594 bound = unshare_expr (fold_convert (var_type, bound));
6595 op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
6596 if (stmts)
6597 gsi_insert_seq_on_edge_immediate (
6598 loop_preheader_edge (data->current_loop),
6599 stmts);
6600
6601 gcond *cond_stmt = as_a <gcond *> (use->stmt);
6602 gimple_cond_set_lhs (cond_stmt, var);
6603 gimple_cond_set_code (cond_stmt, compare);
6604 gimple_cond_set_rhs (cond_stmt, op);
6605 return;
6606 }
6607
6608 /* The induction variable elimination failed; just express the original
6609 giv. */
6610 comp = get_computation (data->current_loop, use, cand);
6611 gcc_assert (comp != NULL_TREE);
6612
6613 ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
6614 gcc_assert (ok);
6615
6616 *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
6617 true, GSI_SAME_STMT);
6618 }
6619
6620 /* Rewrites USE using candidate CAND. */
6621
6622 static void
6623 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
6624 {
6625 switch (use->type)
6626 {
6627 case USE_NONLINEAR_EXPR:
6628 rewrite_use_nonlinear_expr (data, use, cand);
6629 break;
6630
6631 case USE_ADDRESS:
6632 rewrite_use_address (data, use, cand);
6633 break;
6634
6635 case USE_COMPARE:
6636 rewrite_use_compare (data, use, cand);
6637 break;
6638
6639 default:
6640 gcc_unreachable ();
6641 }
6642
6643 update_stmt (use->stmt);
6644 }
6645
6646 /* Rewrite the uses using the selected induction variables. */
6647
6648 static void
6649 rewrite_uses (struct ivopts_data *data)
6650 {
6651 unsigned i;
6652 struct iv_cand *cand;
6653 struct iv_use *use;
6654
6655 for (i = 0; i < n_iv_uses (data); i++)
6656 {
6657 use = iv_use (data, i);
6658 cand = use->selected;
6659 gcc_assert (cand);
6660
6661 rewrite_use (data, use, cand);
6662 }
6663 }
6664
6665 /* Removes the ivs that are not used after rewriting. */
6666
6667 static void
6668 remove_unused_ivs (struct ivopts_data *data)
6669 {
6670 unsigned j;
6671 bitmap_iterator bi;
6672 bitmap toremove = BITMAP_ALLOC (NULL);
6673
6674 /* Figure out an order in which to release SSA DEFs so that we don't
6675 release something that we'd have to propagate into a debug stmt
6676 afterwards. */
6677 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
6678 {
6679 struct version_info *info;
6680
6681 info = ver_info (data, j);
6682 if (info->iv
6683 && !integer_zerop (info->iv->step)
6684 && !info->inv_id
6685 && !info->iv->have_use_for
6686 && !info->preserve_biv)
6687 {
6688 bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
6689
6690 tree def = info->iv->ssa_name;
6691
6692 if (MAY_HAVE_DEBUG_STMTS && SSA_NAME_DEF_STMT (def))
6693 {
6694 imm_use_iterator imm_iter;
6695 use_operand_p use_p;
6696 gimple stmt;
6697 int count = 0;
6698
6699 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
6700 {
6701 if (!gimple_debug_bind_p (stmt))
6702 continue;
6703
6704 /* We just want to determine whether to do nothing
6705 (count == 0), to substitute the computed
6706 expression into a single use of the SSA DEF by
6707 itself (count == 1), or to use a debug temp
6708 because the SSA DEF is used multiple times or as
6709 part of a larger expression (count > 1). */
6710 count++;
6711 if (gimple_debug_bind_get_value (stmt) != def)
6712 count++;
6713
6714 if (count > 1)
6715 BREAK_FROM_IMM_USE_STMT (imm_iter);
6716 }
6717
6718 if (!count)
6719 continue;
6720
6721 struct iv_use dummy_use;
6722 struct iv_cand *best_cand = NULL, *cand;
6723 unsigned i, best_pref = 0, cand_pref;
6724
6725 memset (&dummy_use, 0, sizeof (dummy_use));
6726 dummy_use.iv = info->iv;
6727 for (i = 0; i < n_iv_uses (data) && i < 64; i++)
6728 {
6729 cand = iv_use (data, i)->selected;
6730 if (cand == best_cand)
6731 continue;
6732 cand_pref = operand_equal_p (cand->iv->step,
6733 info->iv->step, 0)
6734 ? 4 : 0;
6735 cand_pref
6736 += TYPE_MODE (TREE_TYPE (cand->iv->base))
6737 == TYPE_MODE (TREE_TYPE (info->iv->base))
6738 ? 2 : 0;
6739 cand_pref
6740 += TREE_CODE (cand->iv->base) == INTEGER_CST
6741 ? 1 : 0;
6742 if (best_cand == NULL || best_pref < cand_pref)
6743 {
6744 best_cand = cand;
6745 best_pref = cand_pref;
6746 }
6747 }
6748
6749 if (!best_cand)
6750 continue;
6751
6752 tree comp = get_computation_at (data->current_loop,
6753 &dummy_use, best_cand,
6754 SSA_NAME_DEF_STMT (def));
6755 if (!comp)
6756 continue;
6757
6758 if (count > 1)
6759 {
6760 tree vexpr = make_node (DEBUG_EXPR_DECL);
6761 DECL_ARTIFICIAL (vexpr) = 1;
6762 TREE_TYPE (vexpr) = TREE_TYPE (comp);
6763 if (SSA_NAME_VAR (def))
6764 DECL_MODE (vexpr) = DECL_MODE (SSA_NAME_VAR (def));
6765 else
6766 DECL_MODE (vexpr) = TYPE_MODE (TREE_TYPE (vexpr));
6767 gdebug *def_temp
6768 = gimple_build_debug_bind (vexpr, comp, NULL);
6769 gimple_stmt_iterator gsi;
6770
6771 if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
6772 gsi = gsi_after_labels (gimple_bb
6773 (SSA_NAME_DEF_STMT (def)));
6774 else
6775 gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
6776
6777 gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
6778 comp = vexpr;
6779 }
6780
6781 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
6782 {
6783 if (!gimple_debug_bind_p (stmt))
6784 continue;
6785
6786 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
6787 SET_USE (use_p, comp);
6788
6789 update_stmt (stmt);
6790 }
6791 }
6792 }
6793 }
6794
6795 release_defs_bitset (toremove);
6796
6797 BITMAP_FREE (toremove);
6798 }
6799
6800 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
6801 for hash_map::traverse. */
6802
6803 bool
6804 free_tree_niter_desc (edge const &, tree_niter_desc *const &value, void *)
6805 {
6806 free (value);
6807 return true;
6808 }
6809
6810 /* Frees data allocated by the optimization of a single loop. */
6811
6812 static void
6813 free_loop_data (struct ivopts_data *data)
6814 {
6815 unsigned i, j;
6816 bitmap_iterator bi;
6817 tree obj;
6818
6819 if (data->niters)
6820 {
6821 data->niters->traverse<void *, free_tree_niter_desc> (NULL);
6822 delete data->niters;
6823 data->niters = NULL;
6824 }
6825
6826 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
6827 {
6828 struct version_info *info;
6829
6830 info = ver_info (data, i);
6831 free (info->iv);
6832 info->iv = NULL;
6833 info->has_nonlin_use = false;
6834 info->preserve_biv = false;
6835 info->inv_id = 0;
6836 }
6837 bitmap_clear (data->relevant);
6838 bitmap_clear (data->important_candidates);
6839
6840 for (i = 0; i < n_iv_uses (data); i++)
6841 {
6842 struct iv_use *use = iv_use (data, i);
6843
6844 free (use->iv);
6845 BITMAP_FREE (use->related_cands);
6846 for (j = 0; j < use->n_map_members; j++)
6847 if (use->cost_map[j].depends_on)
6848 BITMAP_FREE (use->cost_map[j].depends_on);
6849 free (use->cost_map);
6850 free (use);
6851 }
6852 data->iv_uses.truncate (0);
6853
6854 for (i = 0; i < n_iv_cands (data); i++)
6855 {
6856 struct iv_cand *cand = iv_cand (data, i);
6857
6858 free (cand->iv);
6859 if (cand->depends_on)
6860 BITMAP_FREE (cand->depends_on);
6861 free (cand);
6862 }
6863 data->iv_candidates.truncate (0);
6864
6865 if (data->version_info_size < num_ssa_names)
6866 {
6867 data->version_info_size = 2 * num_ssa_names;
6868 free (data->version_info);
6869 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
6870 }
6871
6872 data->max_inv_id = 0;
6873
6874 FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
6875 SET_DECL_RTL (obj, NULL_RTX);
6876
6877 decl_rtl_to_reset.truncate (0);
6878
6879 data->inv_expr_tab->empty ();
6880 data->inv_expr_id = 0;
6881 }
6882
6883 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
6884 loop tree. */
6885
6886 static void
6887 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
6888 {
6889 free_loop_data (data);
6890 free (data->version_info);
6891 BITMAP_FREE (data->relevant);
6892 BITMAP_FREE (data->important_candidates);
6893
6894 decl_rtl_to_reset.release ();
6895 data->iv_uses.release ();
6896 data->iv_candidates.release ();
6897 delete data->inv_expr_tab;
6898 data->inv_expr_tab = NULL;
6899 free_affine_expand_cache (&data->name_expansion_cache);
6900 }
6901
6902 /* Returns true if the loop body BODY includes any function calls. */
6903
6904 static bool
6905 loop_body_includes_call (basic_block *body, unsigned num_nodes)
6906 {
6907 gimple_stmt_iterator gsi;
6908 unsigned i;
6909
6910 for (i = 0; i < num_nodes; i++)
6911 for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
6912 {
6913 gimple stmt = gsi_stmt (gsi);
6914 if (is_gimple_call (stmt)
6915 && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
6916 return true;
6917 }
6918 return false;
6919 }
6920
6921 /* Optimizes the LOOP. Returns true if anything changed. */
6922
6923 static bool
6924 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
6925 {
6926 bool changed = false;
6927 struct iv_ca *iv_ca;
6928 edge exit = single_dom_exit (loop);
6929 basic_block *body;
6930
6931 gcc_assert (!data->niters);
6932 data->current_loop = loop;
6933 data->speed = optimize_loop_for_speed_p (loop);
6934
6935 if (dump_file && (dump_flags & TDF_DETAILS))
6936 {
6937 fprintf (dump_file, "Processing loop %d\n", loop->num);
6938
6939 if (exit)
6940 {
6941 fprintf (dump_file, " single exit %d -> %d, exit condition ",
6942 exit->src->index, exit->dest->index);
6943 print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
6944 fprintf (dump_file, "\n");
6945 }
6946
6947 fprintf (dump_file, "\n");
6948 }
6949
6950 body = get_loop_body (loop);
6951 data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
6952 renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
6953 free (body);
6954
6955 data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
6956
6957 /* For each ssa name determines whether it behaves as an induction variable
6958 in some loop. */
6959 if (!find_induction_variables (data))
6960 goto finish;
6961
6962 /* Finds interesting uses (item 1). */
6963 find_interesting_uses (data);
6964 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
6965 goto finish;
6966
6967 /* Finds candidates for the induction variables (item 2). */
6968 find_iv_candidates (data);
6969
6970 /* Calculates the costs (item 3, part 1). */
6971 determine_iv_costs (data);
6972 determine_use_iv_costs (data);
6973 determine_set_costs (data);
6974
6975 /* Find the optimal set of induction variables (item 3, part 2). */
6976 iv_ca = find_optimal_iv_set (data);
6977 if (!iv_ca)
6978 goto finish;
6979 changed = true;
6980
6981 /* Create the new induction variables (item 4, part 1). */
6982 create_new_ivs (data, iv_ca);
6983 iv_ca_free (&iv_ca);
6984
6985 /* Rewrite the uses (item 4, part 2). */
6986 rewrite_uses (data);
6987
6988 /* Remove the ivs that are unused after rewriting. */
6989 remove_unused_ivs (data);
6990
6991 /* We have changed the structure of induction variables; it might happen
6992 that definitions in the scev database refer to some of them that were
6993 eliminated. */
6994 scev_reset ();
6995
6996 finish:
6997 free_loop_data (data);
6998
6999 return changed;
7000 }
7001
7002 /* Main entry point. Optimizes induction variables in loops. */
7003
7004 void
7005 tree_ssa_iv_optimize (void)
7006 {
7007 struct loop *loop;
7008 struct ivopts_data data;
7009
7010 tree_ssa_iv_optimize_init (&data);
7011
7012 /* Optimize the loops starting with the innermost ones. */
7013 FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
7014 {
7015 if (dump_file && (dump_flags & TDF_DETAILS))
7016 flow_loop_dump (loop, dump_file, NULL, 1);
7017
7018 tree_ssa_iv_optimize_loop (&data, loop);
7019 }
7020
7021 tree_ssa_iv_optimize_finalize (&data);
7022 }