]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/tree-ssa-loop-ivopts.c
re PR tree-optimization/58296 (ivopts is unable to handle some loops altered by the...
[thirdparty/gcc.git] / gcc / tree-ssa-loop-ivopts.c
1 /* Induction variable optimizations.
2 Copyright (C) 2003-2013 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
10
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
19
20 /* This pass tries to find the optimal set of induction variables for the loop.
21 It optimizes just the basic linear induction variables (although adding
22 support for other types should not be too hard). It includes the
23 optimizations commonly known as strength reduction, induction variable
24 coalescing and induction variable elimination. It does it in the
25 following steps:
26
27 1) The interesting uses of induction variables are found. This includes
28
29 -- uses of induction variables in non-linear expressions
30 -- addresses of arrays
31 -- comparisons of induction variables
32
33 2) Candidates for the induction variables are found. This includes
34
35 -- old induction variables
36 -- the variables defined by expressions derived from the "interesting
37 uses" above
38
39 3) The optimal (w.r. to a cost function) set of variables is chosen. The
40 cost function assigns a cost to sets of induction variables and consists
41 of three parts:
42
43 -- The use costs. Each of the interesting uses chooses the best induction
44 variable in the set and adds its cost to the sum. The cost reflects
45 the time spent on modifying the induction variables value to be usable
46 for the given purpose (adding base and offset for arrays, etc.).
47 -- The variable costs. Each of the variables has a cost assigned that
48 reflects the costs associated with incrementing the value of the
49 variable. The original variables are somewhat preferred.
50 -- The set cost. Depending on the size of the set, extra cost may be
51 added to reflect register pressure.
52
53 All the costs are defined in a machine-specific way, using the target
54 hooks and machine descriptions to determine them.
55
56 4) The trees are transformed to use the new variables, the dead code is
57 removed.
58
59 All of this is done loop by loop. Doing it globally is theoretically
60 possible, it might give a better performance and it might enable us
61 to decide costs more precisely, but getting all the interactions right
62 would be complicated. */
63
64 #include "config.h"
65 #include "system.h"
66 #include "coretypes.h"
67 #include "tm.h"
68 #include "tree.h"
69 #include "stor-layout.h"
70 #include "tm_p.h"
71 #include "basic-block.h"
72 #include "gimple-pretty-print.h"
73 #include "pointer-set.h"
74 #include "hash-table.h"
75 #include "tree-ssa-alias.h"
76 #include "internal-fn.h"
77 #include "tree-eh.h"
78 #include "gimple-expr.h"
79 #include "is-a.h"
80 #include "gimple.h"
81 #include "gimplify.h"
82 #include "gimple-iterator.h"
83 #include "gimplify-me.h"
84 #include "gimple-ssa.h"
85 #include "cgraph.h"
86 #include "tree-cfg.h"
87 #include "tree-phinodes.h"
88 #include "ssa-iterators.h"
89 #include "stringpool.h"
90 #include "tree-ssanames.h"
91 #include "tree-ssa-loop-ivopts.h"
92 #include "tree-ssa-loop-manip.h"
93 #include "tree-ssa-loop-niter.h"
94 #include "tree-ssa-loop.h"
95 #include "expr.h"
96 #include "tree-dfa.h"
97 #include "tree-ssa.h"
98 #include "cfgloop.h"
99 #include "tree-pass.h"
100 #include "insn-config.h"
101 #include "tree-chrec.h"
102 #include "tree-scalar-evolution.h"
103 #include "cfgloop.h"
104 #include "params.h"
105 #include "langhooks.h"
106 #include "tree-affine.h"
107 #include "target.h"
108 #include "tree-inline.h"
109 #include "tree-ssa-propagate.h"
110 #include "expmed.h"
111 #include "tree-ssa-address.h"
112
113 /* FIXME: Expressions are expanded to RTL in this pass to determine the
114 cost of different addressing modes. This should be moved to a TBD
115 interface between the GIMPLE and RTL worlds. */
116 #include "expr.h"
117 #include "recog.h"
118
119 /* The infinite cost. */
120 #define INFTY 10000000
121
122 #define AVG_LOOP_NITER(LOOP) 5
123
124 /* Returns the expected number of loop iterations for LOOP.
125 The average trip count is computed from profile data if it
126 exists. */
127
128 static inline HOST_WIDE_INT
129 avg_loop_niter (struct loop *loop)
130 {
131 HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
132 if (niter == -1)
133 return AVG_LOOP_NITER (loop);
134
135 return niter;
136 }
137
138 /* Representation of the induction variable. */
139 struct iv
140 {
141 tree base; /* Initial value of the iv. */
142 tree base_object; /* A memory object to that the induction variable points. */
143 tree step; /* Step of the iv (constant only). */
144 tree ssa_name; /* The ssa name with the value. */
145 bool biv_p; /* Is it a biv? */
146 bool have_use_for; /* Do we already have a use for it? */
147 unsigned use_id; /* The identifier in the use if it is the case. */
148 };
149
150 /* Per-ssa version information (induction variable descriptions, etc.). */
151 struct version_info
152 {
153 tree name; /* The ssa name. */
154 struct iv *iv; /* Induction variable description. */
155 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
156 an expression that is not an induction variable. */
157 bool preserve_biv; /* For the original biv, whether to preserve it. */
158 unsigned inv_id; /* Id of an invariant. */
159 };
160
161 /* Types of uses. */
162 enum use_type
163 {
164 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
165 USE_ADDRESS, /* Use in an address. */
166 USE_COMPARE /* Use is a compare. */
167 };
168
169 /* Cost of a computation. */
170 typedef struct
171 {
172 int cost; /* The runtime cost. */
173 unsigned complexity; /* The estimate of the complexity of the code for
174 the computation (in no concrete units --
175 complexity field should be larger for more
176 complex expressions and addressing modes). */
177 } comp_cost;
178
179 static const comp_cost no_cost = {0, 0};
180 static const comp_cost infinite_cost = {INFTY, INFTY};
181
182 /* The candidate - cost pair. */
183 struct cost_pair
184 {
185 struct iv_cand *cand; /* The candidate. */
186 comp_cost cost; /* The cost. */
187 bitmap depends_on; /* The list of invariants that have to be
188 preserved. */
189 tree value; /* For final value elimination, the expression for
190 the final value of the iv. For iv elimination,
191 the new bound to compare with. */
192 enum tree_code comp; /* For iv elimination, the comparison. */
193 int inv_expr_id; /* Loop invariant expression id. */
194 };
195
196 /* Use. */
197 struct iv_use
198 {
199 unsigned id; /* The id of the use. */
200 enum use_type type; /* Type of the use. */
201 struct iv *iv; /* The induction variable it is based on. */
202 gimple stmt; /* Statement in that it occurs. */
203 tree *op_p; /* The place where it occurs. */
204 bitmap related_cands; /* The set of "related" iv candidates, plus the common
205 important ones. */
206
207 unsigned n_map_members; /* Number of candidates in the cost_map list. */
208 struct cost_pair *cost_map;
209 /* The costs wrto the iv candidates. */
210
211 struct iv_cand *selected;
212 /* The selected candidate. */
213 };
214
215 /* The position where the iv is computed. */
216 enum iv_position
217 {
218 IP_NORMAL, /* At the end, just before the exit condition. */
219 IP_END, /* At the end of the latch block. */
220 IP_BEFORE_USE, /* Immediately before a specific use. */
221 IP_AFTER_USE, /* Immediately after a specific use. */
222 IP_ORIGINAL /* The original biv. */
223 };
224
225 /* The induction variable candidate. */
226 struct iv_cand
227 {
228 unsigned id; /* The number of the candidate. */
229 bool important; /* Whether this is an "important" candidate, i.e. such
230 that it should be considered by all uses. */
231 ENUM_BITFIELD(iv_position) pos : 8; /* Where it is computed. */
232 gimple incremented_at;/* For original biv, the statement where it is
233 incremented. */
234 tree var_before; /* The variable used for it before increment. */
235 tree var_after; /* The variable used for it after increment. */
236 struct iv *iv; /* The value of the candidate. NULL for
237 "pseudocandidate" used to indicate the possibility
238 to replace the final value of an iv by direct
239 computation of the value. */
240 unsigned cost; /* Cost of the candidate. */
241 unsigned cost_step; /* Cost of the candidate's increment operation. */
242 struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
243 where it is incremented. */
244 bitmap depends_on; /* The list of invariants that are used in step of the
245 biv. */
246 };
247
248 /* Loop invariant expression hashtable entry. */
249 struct iv_inv_expr_ent
250 {
251 tree expr;
252 int id;
253 hashval_t hash;
254 };
255
256 /* The data used by the induction variable optimizations. */
257
258 typedef struct iv_use *iv_use_p;
259
260 typedef struct iv_cand *iv_cand_p;
261
262 /* Hashtable helpers. */
263
264 struct iv_inv_expr_hasher : typed_free_remove <iv_inv_expr_ent>
265 {
266 typedef iv_inv_expr_ent value_type;
267 typedef iv_inv_expr_ent compare_type;
268 static inline hashval_t hash (const value_type *);
269 static inline bool equal (const value_type *, const compare_type *);
270 };
271
272 /* Hash function for loop invariant expressions. */
273
274 inline hashval_t
275 iv_inv_expr_hasher::hash (const value_type *expr)
276 {
277 return expr->hash;
278 }
279
280 /* Hash table equality function for expressions. */
281
282 inline bool
283 iv_inv_expr_hasher::equal (const value_type *expr1, const compare_type *expr2)
284 {
285 return expr1->hash == expr2->hash
286 && operand_equal_p (expr1->expr, expr2->expr, 0);
287 }
288
289 struct ivopts_data
290 {
291 /* The currently optimized loop. */
292 struct loop *current_loop;
293
294 /* Numbers of iterations for all exits of the current loop. */
295 struct pointer_map_t *niters;
296
297 /* Number of registers used in it. */
298 unsigned regs_used;
299
300 /* The size of version_info array allocated. */
301 unsigned version_info_size;
302
303 /* The array of information for the ssa names. */
304 struct version_info *version_info;
305
306 /* The hashtable of loop invariant expressions created
307 by ivopt. */
308 hash_table <iv_inv_expr_hasher> inv_expr_tab;
309
310 /* Loop invariant expression id. */
311 int inv_expr_id;
312
313 /* The bitmap of indices in version_info whose value was changed. */
314 bitmap relevant;
315
316 /* The uses of induction variables. */
317 vec<iv_use_p> iv_uses;
318
319 /* The candidates. */
320 vec<iv_cand_p> iv_candidates;
321
322 /* A bitmap of important candidates. */
323 bitmap important_candidates;
324
325 /* The maximum invariant id. */
326 unsigned max_inv_id;
327
328 /* Whether to consider just related and important candidates when replacing a
329 use. */
330 bool consider_all_candidates;
331
332 /* Are we optimizing for speed? */
333 bool speed;
334
335 /* Whether the loop body includes any function calls. */
336 bool body_includes_call;
337
338 /* Whether the loop body can only be exited via single exit. */
339 bool loop_single_exit_p;
340 };
341
342 /* An assignment of iv candidates to uses. */
343
344 struct iv_ca
345 {
346 /* The number of uses covered by the assignment. */
347 unsigned upto;
348
349 /* Number of uses that cannot be expressed by the candidates in the set. */
350 unsigned bad_uses;
351
352 /* Candidate assigned to a use, together with the related costs. */
353 struct cost_pair **cand_for_use;
354
355 /* Number of times each candidate is used. */
356 unsigned *n_cand_uses;
357
358 /* The candidates used. */
359 bitmap cands;
360
361 /* The number of candidates in the set. */
362 unsigned n_cands;
363
364 /* Total number of registers needed. */
365 unsigned n_regs;
366
367 /* Total cost of expressing uses. */
368 comp_cost cand_use_cost;
369
370 /* Total cost of candidates. */
371 unsigned cand_cost;
372
373 /* Number of times each invariant is used. */
374 unsigned *n_invariant_uses;
375
376 /* The array holding the number of uses of each loop
377 invariant expressions created by ivopt. */
378 unsigned *used_inv_expr;
379
380 /* The number of created loop invariants. */
381 unsigned num_used_inv_expr;
382
383 /* Total cost of the assignment. */
384 comp_cost cost;
385 };
386
387 /* Difference of two iv candidate assignments. */
388
389 struct iv_ca_delta
390 {
391 /* Changed use. */
392 struct iv_use *use;
393
394 /* An old assignment (for rollback purposes). */
395 struct cost_pair *old_cp;
396
397 /* A new assignment. */
398 struct cost_pair *new_cp;
399
400 /* Next change in the list. */
401 struct iv_ca_delta *next_change;
402 };
403
404 /* Bound on number of candidates below that all candidates are considered. */
405
406 #define CONSIDER_ALL_CANDIDATES_BOUND \
407 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
408
409 /* If there are more iv occurrences, we just give up (it is quite unlikely that
410 optimizing such a loop would help, and it would take ages). */
411
412 #define MAX_CONSIDERED_USES \
413 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
414
415 /* If there are at most this number of ivs in the set, try removing unnecessary
416 ivs from the set always. */
417
418 #define ALWAYS_PRUNE_CAND_SET_BOUND \
419 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
420
421 /* The list of trees for that the decl_rtl field must be reset is stored
422 here. */
423
424 static vec<tree> decl_rtl_to_reset;
425
426 static comp_cost force_expr_to_var_cost (tree, bool);
427
428 /* Number of uses recorded in DATA. */
429
430 static inline unsigned
431 n_iv_uses (struct ivopts_data *data)
432 {
433 return data->iv_uses.length ();
434 }
435
436 /* Ith use recorded in DATA. */
437
438 static inline struct iv_use *
439 iv_use (struct ivopts_data *data, unsigned i)
440 {
441 return data->iv_uses[i];
442 }
443
444 /* Number of candidates recorded in DATA. */
445
446 static inline unsigned
447 n_iv_cands (struct ivopts_data *data)
448 {
449 return data->iv_candidates.length ();
450 }
451
452 /* Ith candidate recorded in DATA. */
453
454 static inline struct iv_cand *
455 iv_cand (struct ivopts_data *data, unsigned i)
456 {
457 return data->iv_candidates[i];
458 }
459
460 /* The single loop exit if it dominates the latch, NULL otherwise. */
461
462 edge
463 single_dom_exit (struct loop *loop)
464 {
465 edge exit = single_exit (loop);
466
467 if (!exit)
468 return NULL;
469
470 if (!just_once_each_iteration_p (loop, exit->src))
471 return NULL;
472
473 return exit;
474 }
475
476 /* Dumps information about the induction variable IV to FILE. */
477
478 void
479 dump_iv (FILE *file, struct iv *iv)
480 {
481 if (iv->ssa_name)
482 {
483 fprintf (file, "ssa name ");
484 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
485 fprintf (file, "\n");
486 }
487
488 fprintf (file, " type ");
489 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
490 fprintf (file, "\n");
491
492 if (iv->step)
493 {
494 fprintf (file, " base ");
495 print_generic_expr (file, iv->base, TDF_SLIM);
496 fprintf (file, "\n");
497
498 fprintf (file, " step ");
499 print_generic_expr (file, iv->step, TDF_SLIM);
500 fprintf (file, "\n");
501 }
502 else
503 {
504 fprintf (file, " invariant ");
505 print_generic_expr (file, iv->base, TDF_SLIM);
506 fprintf (file, "\n");
507 }
508
509 if (iv->base_object)
510 {
511 fprintf (file, " base object ");
512 print_generic_expr (file, iv->base_object, TDF_SLIM);
513 fprintf (file, "\n");
514 }
515
516 if (iv->biv_p)
517 fprintf (file, " is a biv\n");
518 }
519
520 /* Dumps information about the USE to FILE. */
521
522 void
523 dump_use (FILE *file, struct iv_use *use)
524 {
525 fprintf (file, "use %d\n", use->id);
526
527 switch (use->type)
528 {
529 case USE_NONLINEAR_EXPR:
530 fprintf (file, " generic\n");
531 break;
532
533 case USE_ADDRESS:
534 fprintf (file, " address\n");
535 break;
536
537 case USE_COMPARE:
538 fprintf (file, " compare\n");
539 break;
540
541 default:
542 gcc_unreachable ();
543 }
544
545 fprintf (file, " in statement ");
546 print_gimple_stmt (file, use->stmt, 0, 0);
547 fprintf (file, "\n");
548
549 fprintf (file, " at position ");
550 if (use->op_p)
551 print_generic_expr (file, *use->op_p, TDF_SLIM);
552 fprintf (file, "\n");
553
554 dump_iv (file, use->iv);
555
556 if (use->related_cands)
557 {
558 fprintf (file, " related candidates ");
559 dump_bitmap (file, use->related_cands);
560 }
561 }
562
563 /* Dumps information about the uses to FILE. */
564
565 void
566 dump_uses (FILE *file, struct ivopts_data *data)
567 {
568 unsigned i;
569 struct iv_use *use;
570
571 for (i = 0; i < n_iv_uses (data); i++)
572 {
573 use = iv_use (data, i);
574
575 dump_use (file, use);
576 fprintf (file, "\n");
577 }
578 }
579
580 /* Dumps information about induction variable candidate CAND to FILE. */
581
582 void
583 dump_cand (FILE *file, struct iv_cand *cand)
584 {
585 struct iv *iv = cand->iv;
586
587 fprintf (file, "candidate %d%s\n",
588 cand->id, cand->important ? " (important)" : "");
589
590 if (cand->depends_on)
591 {
592 fprintf (file, " depends on ");
593 dump_bitmap (file, cand->depends_on);
594 }
595
596 if (!iv)
597 {
598 fprintf (file, " final value replacement\n");
599 return;
600 }
601
602 if (cand->var_before)
603 {
604 fprintf (file, " var_before ");
605 print_generic_expr (file, cand->var_before, TDF_SLIM);
606 fprintf (file, "\n");
607 }
608 if (cand->var_after)
609 {
610 fprintf (file, " var_after ");
611 print_generic_expr (file, cand->var_after, TDF_SLIM);
612 fprintf (file, "\n");
613 }
614
615 switch (cand->pos)
616 {
617 case IP_NORMAL:
618 fprintf (file, " incremented before exit test\n");
619 break;
620
621 case IP_BEFORE_USE:
622 fprintf (file, " incremented before use %d\n", cand->ainc_use->id);
623 break;
624
625 case IP_AFTER_USE:
626 fprintf (file, " incremented after use %d\n", cand->ainc_use->id);
627 break;
628
629 case IP_END:
630 fprintf (file, " incremented at end\n");
631 break;
632
633 case IP_ORIGINAL:
634 fprintf (file, " original biv\n");
635 break;
636 }
637
638 dump_iv (file, iv);
639 }
640
641 /* Returns the info for ssa version VER. */
642
643 static inline struct version_info *
644 ver_info (struct ivopts_data *data, unsigned ver)
645 {
646 return data->version_info + ver;
647 }
648
649 /* Returns the info for ssa name NAME. */
650
651 static inline struct version_info *
652 name_info (struct ivopts_data *data, tree name)
653 {
654 return ver_info (data, SSA_NAME_VERSION (name));
655 }
656
657 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
658 emitted in LOOP. */
659
660 static bool
661 stmt_after_ip_normal_pos (struct loop *loop, gimple stmt)
662 {
663 basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
664
665 gcc_assert (bb);
666
667 if (sbb == loop->latch)
668 return true;
669
670 if (sbb != bb)
671 return false;
672
673 return stmt == last_stmt (bb);
674 }
675
676 /* Returns true if STMT if after the place where the original induction
677 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
678 if the positions are identical. */
679
680 static bool
681 stmt_after_inc_pos (struct iv_cand *cand, gimple stmt, bool true_if_equal)
682 {
683 basic_block cand_bb = gimple_bb (cand->incremented_at);
684 basic_block stmt_bb = gimple_bb (stmt);
685
686 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
687 return false;
688
689 if (stmt_bb != cand_bb)
690 return true;
691
692 if (true_if_equal
693 && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
694 return true;
695 return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
696 }
697
698 /* Returns true if STMT if after the place where the induction variable
699 CAND is incremented in LOOP. */
700
701 static bool
702 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple stmt)
703 {
704 switch (cand->pos)
705 {
706 case IP_END:
707 return false;
708
709 case IP_NORMAL:
710 return stmt_after_ip_normal_pos (loop, stmt);
711
712 case IP_ORIGINAL:
713 case IP_AFTER_USE:
714 return stmt_after_inc_pos (cand, stmt, false);
715
716 case IP_BEFORE_USE:
717 return stmt_after_inc_pos (cand, stmt, true);
718
719 default:
720 gcc_unreachable ();
721 }
722 }
723
724 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
725
726 static bool
727 abnormal_ssa_name_p (tree exp)
728 {
729 if (!exp)
730 return false;
731
732 if (TREE_CODE (exp) != SSA_NAME)
733 return false;
734
735 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
736 }
737
738 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
739 abnormal phi node. Callback for for_each_index. */
740
741 static bool
742 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
743 void *data ATTRIBUTE_UNUSED)
744 {
745 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
746 {
747 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
748 return false;
749 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
750 return false;
751 }
752
753 return !abnormal_ssa_name_p (*index);
754 }
755
756 /* Returns true if EXPR contains a ssa name that occurs in an
757 abnormal phi node. */
758
759 bool
760 contains_abnormal_ssa_name_p (tree expr)
761 {
762 enum tree_code code;
763 enum tree_code_class codeclass;
764
765 if (!expr)
766 return false;
767
768 code = TREE_CODE (expr);
769 codeclass = TREE_CODE_CLASS (code);
770
771 if (code == SSA_NAME)
772 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
773
774 if (code == INTEGER_CST
775 || is_gimple_min_invariant (expr))
776 return false;
777
778 if (code == ADDR_EXPR)
779 return !for_each_index (&TREE_OPERAND (expr, 0),
780 idx_contains_abnormal_ssa_name_p,
781 NULL);
782
783 if (code == COND_EXPR)
784 return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
785 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
786 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
787
788 switch (codeclass)
789 {
790 case tcc_binary:
791 case tcc_comparison:
792 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
793 return true;
794
795 /* Fallthru. */
796 case tcc_unary:
797 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
798 return true;
799
800 break;
801
802 default:
803 gcc_unreachable ();
804 }
805
806 return false;
807 }
808
809 /* Returns the structure describing number of iterations determined from
810 EXIT of DATA->current_loop, or NULL if something goes wrong. */
811
812 static struct tree_niter_desc *
813 niter_for_exit (struct ivopts_data *data, edge exit)
814 {
815 struct tree_niter_desc *desc;
816 void **slot;
817
818 if (!data->niters)
819 {
820 data->niters = pointer_map_create ();
821 slot = NULL;
822 }
823 else
824 slot = pointer_map_contains (data->niters, exit);
825
826 if (!slot)
827 {
828 /* Try to determine number of iterations. We cannot safely work with ssa
829 names that appear in phi nodes on abnormal edges, so that we do not
830 create overlapping life ranges for them (PR 27283). */
831 desc = XNEW (struct tree_niter_desc);
832 if (!number_of_iterations_exit (data->current_loop,
833 exit, desc, true)
834 || contains_abnormal_ssa_name_p (desc->niter))
835 {
836 XDELETE (desc);
837 desc = NULL;
838 }
839 slot = pointer_map_insert (data->niters, exit);
840 *slot = desc;
841 }
842 else
843 desc = (struct tree_niter_desc *) *slot;
844
845 return desc;
846 }
847
848 /* Returns the structure describing number of iterations determined from
849 single dominating exit of DATA->current_loop, or NULL if something
850 goes wrong. */
851
852 static struct tree_niter_desc *
853 niter_for_single_dom_exit (struct ivopts_data *data)
854 {
855 edge exit = single_dom_exit (data->current_loop);
856
857 if (!exit)
858 return NULL;
859
860 return niter_for_exit (data, exit);
861 }
862
863 /* Initializes data structures used by the iv optimization pass, stored
864 in DATA. */
865
866 static void
867 tree_ssa_iv_optimize_init (struct ivopts_data *data)
868 {
869 data->version_info_size = 2 * num_ssa_names;
870 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
871 data->relevant = BITMAP_ALLOC (NULL);
872 data->important_candidates = BITMAP_ALLOC (NULL);
873 data->max_inv_id = 0;
874 data->niters = NULL;
875 data->iv_uses.create (20);
876 data->iv_candidates.create (20);
877 data->inv_expr_tab.create (10);
878 data->inv_expr_id = 0;
879 decl_rtl_to_reset.create (20);
880 }
881
882 /* Returns a memory object to that EXPR points. In case we are able to
883 determine that it does not point to any such object, NULL is returned. */
884
885 static tree
886 determine_base_object (tree expr)
887 {
888 enum tree_code code = TREE_CODE (expr);
889 tree base, obj;
890
891 /* If this is a pointer casted to any type, we need to determine
892 the base object for the pointer; so handle conversions before
893 throwing away non-pointer expressions. */
894 if (CONVERT_EXPR_P (expr))
895 return determine_base_object (TREE_OPERAND (expr, 0));
896
897 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
898 return NULL_TREE;
899
900 switch (code)
901 {
902 case INTEGER_CST:
903 return NULL_TREE;
904
905 case ADDR_EXPR:
906 obj = TREE_OPERAND (expr, 0);
907 base = get_base_address (obj);
908
909 if (!base)
910 return expr;
911
912 if (TREE_CODE (base) == MEM_REF)
913 return determine_base_object (TREE_OPERAND (base, 0));
914
915 return fold_convert (ptr_type_node,
916 build_fold_addr_expr (base));
917
918 case POINTER_PLUS_EXPR:
919 return determine_base_object (TREE_OPERAND (expr, 0));
920
921 case PLUS_EXPR:
922 case MINUS_EXPR:
923 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
924 gcc_unreachable ();
925
926 default:
927 return fold_convert (ptr_type_node, expr);
928 }
929 }
930
931 /* Allocates an induction variable with given initial value BASE and step STEP
932 for loop LOOP. */
933
934 static struct iv *
935 alloc_iv (tree base, tree step)
936 {
937 tree base_object = base;
938 struct iv *iv = XCNEW (struct iv);
939 gcc_assert (step != NULL_TREE);
940
941 /* Lower all address expressions except ones with DECL_P as operand.
942 By doing this:
943 1) More accurate cost can be computed for address expressions;
944 2) Duplicate candidates won't be created for bases in different
945 forms, like &a[0] and &a. */
946 STRIP_NOPS (base_object);
947 if (TREE_CODE (base_object) == ADDR_EXPR
948 && !DECL_P (TREE_OPERAND (base_object, 0)))
949 {
950 aff_tree comb;
951 double_int size;
952 base_object = get_inner_reference_aff (TREE_OPERAND (base_object, 0),
953 &comb, &size);
954 gcc_assert (base_object != NULL_TREE);
955 base_object = build_fold_addr_expr (base_object);
956 base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
957 }
958
959 iv->base = base;
960 iv->base_object = determine_base_object (base_object);
961 iv->step = step;
962 iv->biv_p = false;
963 iv->have_use_for = false;
964 iv->use_id = 0;
965 iv->ssa_name = NULL_TREE;
966
967 return iv;
968 }
969
970 /* Sets STEP and BASE for induction variable IV. */
971
972 static void
973 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
974 {
975 struct version_info *info = name_info (data, iv);
976
977 gcc_assert (!info->iv);
978
979 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
980 info->iv = alloc_iv (base, step);
981 info->iv->ssa_name = iv;
982 }
983
984 /* Finds induction variable declaration for VAR. */
985
986 static struct iv *
987 get_iv (struct ivopts_data *data, tree var)
988 {
989 basic_block bb;
990 tree type = TREE_TYPE (var);
991
992 if (!POINTER_TYPE_P (type)
993 && !INTEGRAL_TYPE_P (type))
994 return NULL;
995
996 if (!name_info (data, var)->iv)
997 {
998 bb = gimple_bb (SSA_NAME_DEF_STMT (var));
999
1000 if (!bb
1001 || !flow_bb_inside_loop_p (data->current_loop, bb))
1002 set_iv (data, var, var, build_int_cst (type, 0));
1003 }
1004
1005 return name_info (data, var)->iv;
1006 }
1007
1008 /* Determines the step of a biv defined in PHI. Returns NULL if PHI does
1009 not define a simple affine biv with nonzero step. */
1010
1011 static tree
1012 determine_biv_step (gimple phi)
1013 {
1014 struct loop *loop = gimple_bb (phi)->loop_father;
1015 tree name = PHI_RESULT (phi);
1016 affine_iv iv;
1017
1018 if (virtual_operand_p (name))
1019 return NULL_TREE;
1020
1021 if (!simple_iv (loop, loop, name, &iv, true))
1022 return NULL_TREE;
1023
1024 return integer_zerop (iv.step) ? NULL_TREE : iv.step;
1025 }
1026
1027 /* Finds basic ivs. */
1028
1029 static bool
1030 find_bivs (struct ivopts_data *data)
1031 {
1032 gimple phi;
1033 tree step, type, base;
1034 bool found = false;
1035 struct loop *loop = data->current_loop;
1036 gimple_stmt_iterator psi;
1037
1038 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1039 {
1040 phi = gsi_stmt (psi);
1041
1042 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1043 continue;
1044
1045 step = determine_biv_step (phi);
1046 if (!step)
1047 continue;
1048
1049 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1050 base = expand_simple_operations (base);
1051 if (contains_abnormal_ssa_name_p (base)
1052 || contains_abnormal_ssa_name_p (step))
1053 continue;
1054
1055 type = TREE_TYPE (PHI_RESULT (phi));
1056 base = fold_convert (type, base);
1057 if (step)
1058 {
1059 if (POINTER_TYPE_P (type))
1060 step = convert_to_ptrofftype (step);
1061 else
1062 step = fold_convert (type, step);
1063 }
1064
1065 set_iv (data, PHI_RESULT (phi), base, step);
1066 found = true;
1067 }
1068
1069 return found;
1070 }
1071
1072 /* Marks basic ivs. */
1073
1074 static void
1075 mark_bivs (struct ivopts_data *data)
1076 {
1077 gimple phi, def;
1078 tree var;
1079 struct iv *iv, *incr_iv;
1080 struct loop *loop = data->current_loop;
1081 basic_block incr_bb;
1082 gimple_stmt_iterator psi;
1083
1084 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1085 {
1086 phi = gsi_stmt (psi);
1087
1088 iv = get_iv (data, PHI_RESULT (phi));
1089 if (!iv)
1090 continue;
1091
1092 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1093 def = SSA_NAME_DEF_STMT (var);
1094 /* Don't mark iv peeled from other one as biv. */
1095 if (def
1096 && gimple_code (def) == GIMPLE_PHI
1097 && gimple_bb (def) == loop->header)
1098 continue;
1099
1100 incr_iv = get_iv (data, var);
1101 if (!incr_iv)
1102 continue;
1103
1104 /* If the increment is in the subloop, ignore it. */
1105 incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1106 if (incr_bb->loop_father != data->current_loop
1107 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1108 continue;
1109
1110 iv->biv_p = true;
1111 incr_iv->biv_p = true;
1112 }
1113 }
1114
1115 /* Checks whether STMT defines a linear induction variable and stores its
1116 parameters to IV. */
1117
1118 static bool
1119 find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv)
1120 {
1121 tree lhs;
1122 struct loop *loop = data->current_loop;
1123
1124 iv->base = NULL_TREE;
1125 iv->step = NULL_TREE;
1126
1127 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1128 return false;
1129
1130 lhs = gimple_assign_lhs (stmt);
1131 if (TREE_CODE (lhs) != SSA_NAME)
1132 return false;
1133
1134 if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1135 return false;
1136 iv->base = expand_simple_operations (iv->base);
1137
1138 if (contains_abnormal_ssa_name_p (iv->base)
1139 || contains_abnormal_ssa_name_p (iv->step))
1140 return false;
1141
1142 /* If STMT could throw, then do not consider STMT as defining a GIV.
1143 While this will suppress optimizations, we can not safely delete this
1144 GIV and associated statements, even if it appears it is not used. */
1145 if (stmt_could_throw_p (stmt))
1146 return false;
1147
1148 return true;
1149 }
1150
1151 /* Finds general ivs in statement STMT. */
1152
1153 static void
1154 find_givs_in_stmt (struct ivopts_data *data, gimple stmt)
1155 {
1156 affine_iv iv;
1157
1158 if (!find_givs_in_stmt_scev (data, stmt, &iv))
1159 return;
1160
1161 set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step);
1162 }
1163
1164 /* Finds general ivs in basic block BB. */
1165
1166 static void
1167 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1168 {
1169 gimple_stmt_iterator bsi;
1170
1171 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1172 find_givs_in_stmt (data, gsi_stmt (bsi));
1173 }
1174
1175 /* Finds general ivs. */
1176
1177 static void
1178 find_givs (struct ivopts_data *data)
1179 {
1180 struct loop *loop = data->current_loop;
1181 basic_block *body = get_loop_body_in_dom_order (loop);
1182 unsigned i;
1183
1184 for (i = 0; i < loop->num_nodes; i++)
1185 find_givs_in_bb (data, body[i]);
1186 free (body);
1187 }
1188
1189 /* For each ssa name defined in LOOP determines whether it is an induction
1190 variable and if so, its initial value and step. */
1191
1192 static bool
1193 find_induction_variables (struct ivopts_data *data)
1194 {
1195 unsigned i;
1196 bitmap_iterator bi;
1197
1198 if (!find_bivs (data))
1199 return false;
1200
1201 find_givs (data);
1202 mark_bivs (data);
1203
1204 if (dump_file && (dump_flags & TDF_DETAILS))
1205 {
1206 struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
1207
1208 if (niter)
1209 {
1210 fprintf (dump_file, " number of iterations ");
1211 print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1212 if (!integer_zerop (niter->may_be_zero))
1213 {
1214 fprintf (dump_file, "; zero if ");
1215 print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1216 }
1217 fprintf (dump_file, "\n\n");
1218 };
1219
1220 fprintf (dump_file, "Induction variables:\n\n");
1221
1222 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1223 {
1224 if (ver_info (data, i)->iv)
1225 dump_iv (dump_file, ver_info (data, i)->iv);
1226 }
1227 }
1228
1229 return true;
1230 }
1231
1232 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1233
1234 static struct iv_use *
1235 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1236 gimple stmt, enum use_type use_type)
1237 {
1238 struct iv_use *use = XCNEW (struct iv_use);
1239
1240 use->id = n_iv_uses (data);
1241 use->type = use_type;
1242 use->iv = iv;
1243 use->stmt = stmt;
1244 use->op_p = use_p;
1245 use->related_cands = BITMAP_ALLOC (NULL);
1246
1247 /* To avoid showing ssa name in the dumps, if it was not reset by the
1248 caller. */
1249 iv->ssa_name = NULL_TREE;
1250
1251 if (dump_file && (dump_flags & TDF_DETAILS))
1252 dump_use (dump_file, use);
1253
1254 data->iv_uses.safe_push (use);
1255
1256 return use;
1257 }
1258
1259 /* Checks whether OP is a loop-level invariant and if so, records it.
1260 NONLINEAR_USE is true if the invariant is used in a way we do not
1261 handle specially. */
1262
1263 static void
1264 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1265 {
1266 basic_block bb;
1267 struct version_info *info;
1268
1269 if (TREE_CODE (op) != SSA_NAME
1270 || virtual_operand_p (op))
1271 return;
1272
1273 bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1274 if (bb
1275 && flow_bb_inside_loop_p (data->current_loop, bb))
1276 return;
1277
1278 info = name_info (data, op);
1279 info->name = op;
1280 info->has_nonlin_use |= nonlinear_use;
1281 if (!info->inv_id)
1282 info->inv_id = ++data->max_inv_id;
1283 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1284 }
1285
1286 /* Checks whether the use OP is interesting and if so, records it. */
1287
1288 static struct iv_use *
1289 find_interesting_uses_op (struct ivopts_data *data, tree op)
1290 {
1291 struct iv *iv;
1292 struct iv *civ;
1293 gimple stmt;
1294 struct iv_use *use;
1295
1296 if (TREE_CODE (op) != SSA_NAME)
1297 return NULL;
1298
1299 iv = get_iv (data, op);
1300 if (!iv)
1301 return NULL;
1302
1303 if (iv->have_use_for)
1304 {
1305 use = iv_use (data, iv->use_id);
1306
1307 gcc_assert (use->type == USE_NONLINEAR_EXPR);
1308 return use;
1309 }
1310
1311 if (integer_zerop (iv->step))
1312 {
1313 record_invariant (data, op, true);
1314 return NULL;
1315 }
1316 iv->have_use_for = true;
1317
1318 civ = XNEW (struct iv);
1319 *civ = *iv;
1320
1321 stmt = SSA_NAME_DEF_STMT (op);
1322 gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1323 || is_gimple_assign (stmt));
1324
1325 use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1326 iv->use_id = use->id;
1327
1328 return use;
1329 }
1330
1331 /* Given a condition in statement STMT, checks whether it is a compare
1332 of an induction variable and an invariant. If this is the case,
1333 CONTROL_VAR is set to location of the iv, BOUND to the location of
1334 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1335 induction variable descriptions, and true is returned. If this is not
1336 the case, CONTROL_VAR and BOUND are set to the arguments of the
1337 condition and false is returned. */
1338
1339 static bool
1340 extract_cond_operands (struct ivopts_data *data, gimple stmt,
1341 tree **control_var, tree **bound,
1342 struct iv **iv_var, struct iv **iv_bound)
1343 {
1344 /* The objects returned when COND has constant operands. */
1345 static struct iv const_iv;
1346 static tree zero;
1347 tree *op0 = &zero, *op1 = &zero, *tmp_op;
1348 struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1349 bool ret = false;
1350
1351 if (gimple_code (stmt) == GIMPLE_COND)
1352 {
1353 op0 = gimple_cond_lhs_ptr (stmt);
1354 op1 = gimple_cond_rhs_ptr (stmt);
1355 }
1356 else
1357 {
1358 op0 = gimple_assign_rhs1_ptr (stmt);
1359 op1 = gimple_assign_rhs2_ptr (stmt);
1360 }
1361
1362 zero = integer_zero_node;
1363 const_iv.step = integer_zero_node;
1364
1365 if (TREE_CODE (*op0) == SSA_NAME)
1366 iv0 = get_iv (data, *op0);
1367 if (TREE_CODE (*op1) == SSA_NAME)
1368 iv1 = get_iv (data, *op1);
1369
1370 /* Exactly one of the compared values must be an iv, and the other one must
1371 be an invariant. */
1372 if (!iv0 || !iv1)
1373 goto end;
1374
1375 if (integer_zerop (iv0->step))
1376 {
1377 /* Control variable may be on the other side. */
1378 tmp_op = op0; op0 = op1; op1 = tmp_op;
1379 tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1380 }
1381 ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1382
1383 end:
1384 if (control_var)
1385 *control_var = op0;;
1386 if (iv_var)
1387 *iv_var = iv0;;
1388 if (bound)
1389 *bound = op1;
1390 if (iv_bound)
1391 *iv_bound = iv1;
1392
1393 return ret;
1394 }
1395
1396 /* Checks whether the condition in STMT is interesting and if so,
1397 records it. */
1398
1399 static void
1400 find_interesting_uses_cond (struct ivopts_data *data, gimple stmt)
1401 {
1402 tree *var_p, *bound_p;
1403 struct iv *var_iv, *civ;
1404
1405 if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1406 {
1407 find_interesting_uses_op (data, *var_p);
1408 find_interesting_uses_op (data, *bound_p);
1409 return;
1410 }
1411
1412 civ = XNEW (struct iv);
1413 *civ = *var_iv;
1414 record_use (data, NULL, civ, stmt, USE_COMPARE);
1415 }
1416
1417 /* Returns the outermost loop EXPR is obviously invariant in
1418 relative to the loop LOOP, i.e. if all its operands are defined
1419 outside of the returned loop. Returns NULL if EXPR is not
1420 even obviously invariant in LOOP. */
1421
1422 struct loop *
1423 outermost_invariant_loop_for_expr (struct loop *loop, tree expr)
1424 {
1425 basic_block def_bb;
1426 unsigned i, len;
1427
1428 if (is_gimple_min_invariant (expr))
1429 return current_loops->tree_root;
1430
1431 if (TREE_CODE (expr) == SSA_NAME)
1432 {
1433 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1434 if (def_bb)
1435 {
1436 if (flow_bb_inside_loop_p (loop, def_bb))
1437 return NULL;
1438 return superloop_at_depth (loop,
1439 loop_depth (def_bb->loop_father) + 1);
1440 }
1441
1442 return current_loops->tree_root;
1443 }
1444
1445 if (!EXPR_P (expr))
1446 return NULL;
1447
1448 unsigned maxdepth = 0;
1449 len = TREE_OPERAND_LENGTH (expr);
1450 for (i = 0; i < len; i++)
1451 {
1452 struct loop *ivloop;
1453 if (!TREE_OPERAND (expr, i))
1454 continue;
1455
1456 ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1457 if (!ivloop)
1458 return NULL;
1459 maxdepth = MAX (maxdepth, loop_depth (ivloop));
1460 }
1461
1462 return superloop_at_depth (loop, maxdepth);
1463 }
1464
1465 /* Returns true if expression EXPR is obviously invariant in LOOP,
1466 i.e. if all its operands are defined outside of the LOOP. LOOP
1467 should not be the function body. */
1468
1469 bool
1470 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1471 {
1472 basic_block def_bb;
1473 unsigned i, len;
1474
1475 gcc_assert (loop_depth (loop) > 0);
1476
1477 if (is_gimple_min_invariant (expr))
1478 return true;
1479
1480 if (TREE_CODE (expr) == SSA_NAME)
1481 {
1482 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1483 if (def_bb
1484 && flow_bb_inside_loop_p (loop, def_bb))
1485 return false;
1486
1487 return true;
1488 }
1489
1490 if (!EXPR_P (expr))
1491 return false;
1492
1493 len = TREE_OPERAND_LENGTH (expr);
1494 for (i = 0; i < len; i++)
1495 if (TREE_OPERAND (expr, i)
1496 && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1497 return false;
1498
1499 return true;
1500 }
1501
1502 /* Cumulates the steps of indices into DATA and replaces their values with the
1503 initial ones. Returns false when the value of the index cannot be determined.
1504 Callback for for_each_index. */
1505
1506 struct ifs_ivopts_data
1507 {
1508 struct ivopts_data *ivopts_data;
1509 gimple stmt;
1510 tree step;
1511 };
1512
1513 static bool
1514 idx_find_step (tree base, tree *idx, void *data)
1515 {
1516 struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1517 struct iv *iv;
1518 tree step, iv_base, iv_step, lbound, off;
1519 struct loop *loop = dta->ivopts_data->current_loop;
1520
1521 /* If base is a component ref, require that the offset of the reference
1522 be invariant. */
1523 if (TREE_CODE (base) == COMPONENT_REF)
1524 {
1525 off = component_ref_field_offset (base);
1526 return expr_invariant_in_loop_p (loop, off);
1527 }
1528
1529 /* If base is array, first check whether we will be able to move the
1530 reference out of the loop (in order to take its address in strength
1531 reduction). In order for this to work we need both lower bound
1532 and step to be loop invariants. */
1533 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1534 {
1535 /* Moreover, for a range, the size needs to be invariant as well. */
1536 if (TREE_CODE (base) == ARRAY_RANGE_REF
1537 && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1538 return false;
1539
1540 step = array_ref_element_size (base);
1541 lbound = array_ref_low_bound (base);
1542
1543 if (!expr_invariant_in_loop_p (loop, step)
1544 || !expr_invariant_in_loop_p (loop, lbound))
1545 return false;
1546 }
1547
1548 if (TREE_CODE (*idx) != SSA_NAME)
1549 return true;
1550
1551 iv = get_iv (dta->ivopts_data, *idx);
1552 if (!iv)
1553 return false;
1554
1555 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1556 *&x[0], which is not folded and does not trigger the
1557 ARRAY_REF path below. */
1558 *idx = iv->base;
1559
1560 if (integer_zerop (iv->step))
1561 return true;
1562
1563 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1564 {
1565 step = array_ref_element_size (base);
1566
1567 /* We only handle addresses whose step is an integer constant. */
1568 if (TREE_CODE (step) != INTEGER_CST)
1569 return false;
1570 }
1571 else
1572 /* The step for pointer arithmetics already is 1 byte. */
1573 step = size_one_node;
1574
1575 iv_base = iv->base;
1576 iv_step = iv->step;
1577 if (!convert_affine_scev (dta->ivopts_data->current_loop,
1578 sizetype, &iv_base, &iv_step, dta->stmt,
1579 false))
1580 {
1581 /* The index might wrap. */
1582 return false;
1583 }
1584
1585 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1586 dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1587
1588 return true;
1589 }
1590
1591 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1592 object is passed to it in DATA. */
1593
1594 static bool
1595 idx_record_use (tree base, tree *idx,
1596 void *vdata)
1597 {
1598 struct ivopts_data *data = (struct ivopts_data *) vdata;
1599 find_interesting_uses_op (data, *idx);
1600 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1601 {
1602 find_interesting_uses_op (data, array_ref_element_size (base));
1603 find_interesting_uses_op (data, array_ref_low_bound (base));
1604 }
1605 return true;
1606 }
1607
1608 /* If we can prove that TOP = cst * BOT for some constant cst,
1609 store cst to MUL and return true. Otherwise return false.
1610 The returned value is always sign-extended, regardless of the
1611 signedness of TOP and BOT. */
1612
1613 static bool
1614 constant_multiple_of (tree top, tree bot, double_int *mul)
1615 {
1616 tree mby;
1617 enum tree_code code;
1618 double_int res, p0, p1;
1619 unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1620
1621 STRIP_NOPS (top);
1622 STRIP_NOPS (bot);
1623
1624 if (operand_equal_p (top, bot, 0))
1625 {
1626 *mul = double_int_one;
1627 return true;
1628 }
1629
1630 code = TREE_CODE (top);
1631 switch (code)
1632 {
1633 case MULT_EXPR:
1634 mby = TREE_OPERAND (top, 1);
1635 if (TREE_CODE (mby) != INTEGER_CST)
1636 return false;
1637
1638 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1639 return false;
1640
1641 *mul = (res * tree_to_double_int (mby)).sext (precision);
1642 return true;
1643
1644 case PLUS_EXPR:
1645 case MINUS_EXPR:
1646 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1647 || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1648 return false;
1649
1650 if (code == MINUS_EXPR)
1651 p1 = -p1;
1652 *mul = (p0 + p1).sext (precision);
1653 return true;
1654
1655 case INTEGER_CST:
1656 if (TREE_CODE (bot) != INTEGER_CST)
1657 return false;
1658
1659 p0 = tree_to_double_int (top).sext (precision);
1660 p1 = tree_to_double_int (bot).sext (precision);
1661 if (p1.is_zero ())
1662 return false;
1663 *mul = p0.sdivmod (p1, FLOOR_DIV_EXPR, &res).sext (precision);
1664 return res.is_zero ();
1665
1666 default:
1667 return false;
1668 }
1669 }
1670
1671 /* Returns true if memory reference REF with step STEP may be unaligned. */
1672
1673 static bool
1674 may_be_unaligned_p (tree ref, tree step)
1675 {
1676 tree base;
1677 tree base_type;
1678 HOST_WIDE_INT bitsize;
1679 HOST_WIDE_INT bitpos;
1680 tree toffset;
1681 enum machine_mode mode;
1682 int unsignedp, volatilep;
1683 unsigned base_align;
1684
1685 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1686 thus they are not misaligned. */
1687 if (TREE_CODE (ref) == TARGET_MEM_REF)
1688 return false;
1689
1690 /* The test below is basically copy of what expr.c:normal_inner_ref
1691 does to check whether the object must be loaded by parts when
1692 STRICT_ALIGNMENT is true. */
1693 base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1694 &unsignedp, &volatilep, true);
1695 base_type = TREE_TYPE (base);
1696 base_align = get_object_alignment (base);
1697 base_align = MAX (base_align, TYPE_ALIGN (base_type));
1698
1699 if (mode != BLKmode)
1700 {
1701 unsigned mode_align = GET_MODE_ALIGNMENT (mode);
1702
1703 if (base_align < mode_align
1704 || (bitpos % mode_align) != 0
1705 || (bitpos % BITS_PER_UNIT) != 0)
1706 return true;
1707
1708 if (toffset
1709 && (highest_pow2_factor (toffset) * BITS_PER_UNIT) < mode_align)
1710 return true;
1711
1712 if ((highest_pow2_factor (step) * BITS_PER_UNIT) < mode_align)
1713 return true;
1714 }
1715
1716 return false;
1717 }
1718
1719 /* Return true if EXPR may be non-addressable. */
1720
1721 bool
1722 may_be_nonaddressable_p (tree expr)
1723 {
1724 switch (TREE_CODE (expr))
1725 {
1726 case TARGET_MEM_REF:
1727 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1728 target, thus they are always addressable. */
1729 return false;
1730
1731 case COMPONENT_REF:
1732 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1733 || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1734
1735 case VIEW_CONVERT_EXPR:
1736 /* This kind of view-conversions may wrap non-addressable objects
1737 and make them look addressable. After some processing the
1738 non-addressability may be uncovered again, causing ADDR_EXPRs
1739 of inappropriate objects to be built. */
1740 if (is_gimple_reg (TREE_OPERAND (expr, 0))
1741 || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
1742 return true;
1743
1744 /* ... fall through ... */
1745
1746 case ARRAY_REF:
1747 case ARRAY_RANGE_REF:
1748 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1749
1750 CASE_CONVERT:
1751 return true;
1752
1753 default:
1754 break;
1755 }
1756
1757 return false;
1758 }
1759
1760 /* Finds addresses in *OP_P inside STMT. */
1761
1762 static void
1763 find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p)
1764 {
1765 tree base = *op_p, step = size_zero_node;
1766 struct iv *civ;
1767 struct ifs_ivopts_data ifs_ivopts_data;
1768
1769 /* Do not play with volatile memory references. A bit too conservative,
1770 perhaps, but safe. */
1771 if (gimple_has_volatile_ops (stmt))
1772 goto fail;
1773
1774 /* Ignore bitfields for now. Not really something terribly complicated
1775 to handle. TODO. */
1776 if (TREE_CODE (base) == BIT_FIELD_REF)
1777 goto fail;
1778
1779 base = unshare_expr (base);
1780
1781 if (TREE_CODE (base) == TARGET_MEM_REF)
1782 {
1783 tree type = build_pointer_type (TREE_TYPE (base));
1784 tree astep;
1785
1786 if (TMR_BASE (base)
1787 && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1788 {
1789 civ = get_iv (data, TMR_BASE (base));
1790 if (!civ)
1791 goto fail;
1792
1793 TMR_BASE (base) = civ->base;
1794 step = civ->step;
1795 }
1796 if (TMR_INDEX2 (base)
1797 && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
1798 {
1799 civ = get_iv (data, TMR_INDEX2 (base));
1800 if (!civ)
1801 goto fail;
1802
1803 TMR_INDEX2 (base) = civ->base;
1804 step = civ->step;
1805 }
1806 if (TMR_INDEX (base)
1807 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1808 {
1809 civ = get_iv (data, TMR_INDEX (base));
1810 if (!civ)
1811 goto fail;
1812
1813 TMR_INDEX (base) = civ->base;
1814 astep = civ->step;
1815
1816 if (astep)
1817 {
1818 if (TMR_STEP (base))
1819 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1820
1821 step = fold_build2 (PLUS_EXPR, type, step, astep);
1822 }
1823 }
1824
1825 if (integer_zerop (step))
1826 goto fail;
1827 base = tree_mem_ref_addr (type, base);
1828 }
1829 else
1830 {
1831 ifs_ivopts_data.ivopts_data = data;
1832 ifs_ivopts_data.stmt = stmt;
1833 ifs_ivopts_data.step = size_zero_node;
1834 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1835 || integer_zerop (ifs_ivopts_data.step))
1836 goto fail;
1837 step = ifs_ivopts_data.step;
1838
1839 /* Check that the base expression is addressable. This needs
1840 to be done after substituting bases of IVs into it. */
1841 if (may_be_nonaddressable_p (base))
1842 goto fail;
1843
1844 /* Moreover, on strict alignment platforms, check that it is
1845 sufficiently aligned. */
1846 if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
1847 goto fail;
1848
1849 base = build_fold_addr_expr (base);
1850
1851 /* Substituting bases of IVs into the base expression might
1852 have caused folding opportunities. */
1853 if (TREE_CODE (base) == ADDR_EXPR)
1854 {
1855 tree *ref = &TREE_OPERAND (base, 0);
1856 while (handled_component_p (*ref))
1857 ref = &TREE_OPERAND (*ref, 0);
1858 if (TREE_CODE (*ref) == MEM_REF)
1859 {
1860 tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
1861 TREE_OPERAND (*ref, 0),
1862 TREE_OPERAND (*ref, 1));
1863 if (tem)
1864 *ref = tem;
1865 }
1866 }
1867 }
1868
1869 civ = alloc_iv (base, step);
1870 record_use (data, op_p, civ, stmt, USE_ADDRESS);
1871 return;
1872
1873 fail:
1874 for_each_index (op_p, idx_record_use, data);
1875 }
1876
1877 /* Finds and records invariants used in STMT. */
1878
1879 static void
1880 find_invariants_stmt (struct ivopts_data *data, gimple stmt)
1881 {
1882 ssa_op_iter iter;
1883 use_operand_p use_p;
1884 tree op;
1885
1886 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1887 {
1888 op = USE_FROM_PTR (use_p);
1889 record_invariant (data, op, false);
1890 }
1891 }
1892
1893 /* Finds interesting uses of induction variables in the statement STMT. */
1894
1895 static void
1896 find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt)
1897 {
1898 struct iv *iv;
1899 tree op, *lhs, *rhs;
1900 ssa_op_iter iter;
1901 use_operand_p use_p;
1902 enum tree_code code;
1903
1904 find_invariants_stmt (data, stmt);
1905
1906 if (gimple_code (stmt) == GIMPLE_COND)
1907 {
1908 find_interesting_uses_cond (data, stmt);
1909 return;
1910 }
1911
1912 if (is_gimple_assign (stmt))
1913 {
1914 lhs = gimple_assign_lhs_ptr (stmt);
1915 rhs = gimple_assign_rhs1_ptr (stmt);
1916
1917 if (TREE_CODE (*lhs) == SSA_NAME)
1918 {
1919 /* If the statement defines an induction variable, the uses are not
1920 interesting by themselves. */
1921
1922 iv = get_iv (data, *lhs);
1923
1924 if (iv && !integer_zerop (iv->step))
1925 return;
1926 }
1927
1928 code = gimple_assign_rhs_code (stmt);
1929 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
1930 && (REFERENCE_CLASS_P (*rhs)
1931 || is_gimple_val (*rhs)))
1932 {
1933 if (REFERENCE_CLASS_P (*rhs))
1934 find_interesting_uses_address (data, stmt, rhs);
1935 else
1936 find_interesting_uses_op (data, *rhs);
1937
1938 if (REFERENCE_CLASS_P (*lhs))
1939 find_interesting_uses_address (data, stmt, lhs);
1940 return;
1941 }
1942 else if (TREE_CODE_CLASS (code) == tcc_comparison)
1943 {
1944 find_interesting_uses_cond (data, stmt);
1945 return;
1946 }
1947
1948 /* TODO -- we should also handle address uses of type
1949
1950 memory = call (whatever);
1951
1952 and
1953
1954 call (memory). */
1955 }
1956
1957 if (gimple_code (stmt) == GIMPLE_PHI
1958 && gimple_bb (stmt) == data->current_loop->header)
1959 {
1960 iv = get_iv (data, PHI_RESULT (stmt));
1961
1962 if (iv && !integer_zerop (iv->step))
1963 return;
1964 }
1965
1966 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1967 {
1968 op = USE_FROM_PTR (use_p);
1969
1970 if (TREE_CODE (op) != SSA_NAME)
1971 continue;
1972
1973 iv = get_iv (data, op);
1974 if (!iv)
1975 continue;
1976
1977 find_interesting_uses_op (data, op);
1978 }
1979 }
1980
1981 /* Finds interesting uses of induction variables outside of loops
1982 on loop exit edge EXIT. */
1983
1984 static void
1985 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1986 {
1987 gimple phi;
1988 gimple_stmt_iterator psi;
1989 tree def;
1990
1991 for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
1992 {
1993 phi = gsi_stmt (psi);
1994 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1995 if (!virtual_operand_p (def))
1996 find_interesting_uses_op (data, def);
1997 }
1998 }
1999
2000 /* Finds uses of the induction variables that are interesting. */
2001
2002 static void
2003 find_interesting_uses (struct ivopts_data *data)
2004 {
2005 basic_block bb;
2006 gimple_stmt_iterator bsi;
2007 basic_block *body = get_loop_body (data->current_loop);
2008 unsigned i;
2009 struct version_info *info;
2010 edge e;
2011
2012 if (dump_file && (dump_flags & TDF_DETAILS))
2013 fprintf (dump_file, "Uses:\n\n");
2014
2015 for (i = 0; i < data->current_loop->num_nodes; i++)
2016 {
2017 edge_iterator ei;
2018 bb = body[i];
2019
2020 FOR_EACH_EDGE (e, ei, bb->succs)
2021 if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2022 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2023 find_interesting_uses_outside (data, e);
2024
2025 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2026 find_interesting_uses_stmt (data, gsi_stmt (bsi));
2027 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2028 if (!is_gimple_debug (gsi_stmt (bsi)))
2029 find_interesting_uses_stmt (data, gsi_stmt (bsi));
2030 }
2031
2032 if (dump_file && (dump_flags & TDF_DETAILS))
2033 {
2034 bitmap_iterator bi;
2035
2036 fprintf (dump_file, "\n");
2037
2038 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2039 {
2040 info = ver_info (data, i);
2041 if (info->inv_id)
2042 {
2043 fprintf (dump_file, " ");
2044 print_generic_expr (dump_file, info->name, TDF_SLIM);
2045 fprintf (dump_file, " is invariant (%d)%s\n",
2046 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
2047 }
2048 }
2049
2050 fprintf (dump_file, "\n");
2051 }
2052
2053 free (body);
2054 }
2055
2056 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
2057 is true, assume we are inside an address. If TOP_COMPREF is true, assume
2058 we are at the top-level of the processed address. */
2059
2060 static tree
2061 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2062 HOST_WIDE_INT *offset)
2063 {
2064 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2065 enum tree_code code;
2066 tree type, orig_type = TREE_TYPE (expr);
2067 HOST_WIDE_INT off0, off1, st;
2068 tree orig_expr = expr;
2069
2070 STRIP_NOPS (expr);
2071
2072 type = TREE_TYPE (expr);
2073 code = TREE_CODE (expr);
2074 *offset = 0;
2075
2076 switch (code)
2077 {
2078 case INTEGER_CST:
2079 if (!cst_and_fits_in_hwi (expr)
2080 || integer_zerop (expr))
2081 return orig_expr;
2082
2083 *offset = int_cst_value (expr);
2084 return build_int_cst (orig_type, 0);
2085
2086 case POINTER_PLUS_EXPR:
2087 case PLUS_EXPR:
2088 case MINUS_EXPR:
2089 op0 = TREE_OPERAND (expr, 0);
2090 op1 = TREE_OPERAND (expr, 1);
2091
2092 op0 = strip_offset_1 (op0, false, false, &off0);
2093 op1 = strip_offset_1 (op1, false, false, &off1);
2094
2095 *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2096 if (op0 == TREE_OPERAND (expr, 0)
2097 && op1 == TREE_OPERAND (expr, 1))
2098 return orig_expr;
2099
2100 if (integer_zerop (op1))
2101 expr = op0;
2102 else if (integer_zerop (op0))
2103 {
2104 if (code == MINUS_EXPR)
2105 expr = fold_build1 (NEGATE_EXPR, type, op1);
2106 else
2107 expr = op1;
2108 }
2109 else
2110 expr = fold_build2 (code, type, op0, op1);
2111
2112 return fold_convert (orig_type, expr);
2113
2114 case MULT_EXPR:
2115 op1 = TREE_OPERAND (expr, 1);
2116 if (!cst_and_fits_in_hwi (op1))
2117 return orig_expr;
2118
2119 op0 = TREE_OPERAND (expr, 0);
2120 op0 = strip_offset_1 (op0, false, false, &off0);
2121 if (op0 == TREE_OPERAND (expr, 0))
2122 return orig_expr;
2123
2124 *offset = off0 * int_cst_value (op1);
2125 if (integer_zerop (op0))
2126 expr = op0;
2127 else
2128 expr = fold_build2 (MULT_EXPR, type, op0, op1);
2129
2130 return fold_convert (orig_type, expr);
2131
2132 case ARRAY_REF:
2133 case ARRAY_RANGE_REF:
2134 if (!inside_addr)
2135 return orig_expr;
2136
2137 step = array_ref_element_size (expr);
2138 if (!cst_and_fits_in_hwi (step))
2139 break;
2140
2141 st = int_cst_value (step);
2142 op1 = TREE_OPERAND (expr, 1);
2143 op1 = strip_offset_1 (op1, false, false, &off1);
2144 *offset = off1 * st;
2145
2146 if (top_compref
2147 && integer_zerop (op1))
2148 {
2149 /* Strip the component reference completely. */
2150 op0 = TREE_OPERAND (expr, 0);
2151 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2152 *offset += off0;
2153 return op0;
2154 }
2155 break;
2156
2157 case COMPONENT_REF:
2158 {
2159 tree field;
2160
2161 if (!inside_addr)
2162 return orig_expr;
2163
2164 tmp = component_ref_field_offset (expr);
2165 field = TREE_OPERAND (expr, 1);
2166 if (top_compref
2167 && cst_and_fits_in_hwi (tmp)
2168 && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2169 {
2170 HOST_WIDE_INT boffset, abs_off;
2171
2172 /* Strip the component reference completely. */
2173 op0 = TREE_OPERAND (expr, 0);
2174 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2175 boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2176 abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2177 if (boffset < 0)
2178 abs_off = -abs_off;
2179
2180 *offset = off0 + int_cst_value (tmp) + abs_off;
2181 return op0;
2182 }
2183 }
2184 break;
2185
2186 case ADDR_EXPR:
2187 op0 = TREE_OPERAND (expr, 0);
2188 op0 = strip_offset_1 (op0, true, true, &off0);
2189 *offset += off0;
2190
2191 if (op0 == TREE_OPERAND (expr, 0))
2192 return orig_expr;
2193
2194 expr = build_fold_addr_expr (op0);
2195 return fold_convert (orig_type, expr);
2196
2197 case MEM_REF:
2198 /* ??? Offset operand? */
2199 inside_addr = false;
2200 break;
2201
2202 default:
2203 return orig_expr;
2204 }
2205
2206 /* Default handling of expressions for that we want to recurse into
2207 the first operand. */
2208 op0 = TREE_OPERAND (expr, 0);
2209 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2210 *offset += off0;
2211
2212 if (op0 == TREE_OPERAND (expr, 0)
2213 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2214 return orig_expr;
2215
2216 expr = copy_node (expr);
2217 TREE_OPERAND (expr, 0) = op0;
2218 if (op1)
2219 TREE_OPERAND (expr, 1) = op1;
2220
2221 /* Inside address, we might strip the top level component references,
2222 thus changing type of the expression. Handling of ADDR_EXPR
2223 will fix that. */
2224 expr = fold_convert (orig_type, expr);
2225
2226 return expr;
2227 }
2228
2229 /* Strips constant offsets from EXPR and stores them to OFFSET. */
2230
2231 static tree
2232 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2233 {
2234 HOST_WIDE_INT off;
2235 tree core = strip_offset_1 (expr, false, false, &off);
2236 *offset = off;
2237 return core;
2238 }
2239
2240 /* Returns variant of TYPE that can be used as base for different uses.
2241 We return unsigned type with the same precision, which avoids problems
2242 with overflows. */
2243
2244 static tree
2245 generic_type_for (tree type)
2246 {
2247 if (POINTER_TYPE_P (type))
2248 return unsigned_type_for (type);
2249
2250 if (TYPE_UNSIGNED (type))
2251 return type;
2252
2253 return unsigned_type_for (type);
2254 }
2255
2256 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2257 the bitmap to that we should store it. */
2258
2259 static struct ivopts_data *fd_ivopts_data;
2260 static tree
2261 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2262 {
2263 bitmap *depends_on = (bitmap *) data;
2264 struct version_info *info;
2265
2266 if (TREE_CODE (*expr_p) != SSA_NAME)
2267 return NULL_TREE;
2268 info = name_info (fd_ivopts_data, *expr_p);
2269
2270 if (!info->inv_id || info->has_nonlin_use)
2271 return NULL_TREE;
2272
2273 if (!*depends_on)
2274 *depends_on = BITMAP_ALLOC (NULL);
2275 bitmap_set_bit (*depends_on, info->inv_id);
2276
2277 return NULL_TREE;
2278 }
2279
2280 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2281 position to POS. If USE is not NULL, the candidate is set as related to
2282 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
2283 replacement of the final value of the iv by a direct computation. */
2284
2285 static struct iv_cand *
2286 add_candidate_1 (struct ivopts_data *data,
2287 tree base, tree step, bool important, enum iv_position pos,
2288 struct iv_use *use, gimple incremented_at)
2289 {
2290 unsigned i;
2291 struct iv_cand *cand = NULL;
2292 tree type, orig_type;
2293
2294 /* For non-original variables, make sure their values are computed in a type
2295 that does not invoke undefined behavior on overflows (since in general,
2296 we cannot prove that these induction variables are non-wrapping). */
2297 if (pos != IP_ORIGINAL)
2298 {
2299 orig_type = TREE_TYPE (base);
2300 type = generic_type_for (orig_type);
2301 if (type != orig_type)
2302 {
2303 base = fold_convert (type, base);
2304 step = fold_convert (type, step);
2305 }
2306 }
2307
2308 for (i = 0; i < n_iv_cands (data); i++)
2309 {
2310 cand = iv_cand (data, i);
2311
2312 if (cand->pos != pos)
2313 continue;
2314
2315 if (cand->incremented_at != incremented_at
2316 || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2317 && cand->ainc_use != use))
2318 continue;
2319
2320 if (!cand->iv)
2321 {
2322 if (!base && !step)
2323 break;
2324
2325 continue;
2326 }
2327
2328 if (!base && !step)
2329 continue;
2330
2331 if (operand_equal_p (base, cand->iv->base, 0)
2332 && operand_equal_p (step, cand->iv->step, 0)
2333 && (TYPE_PRECISION (TREE_TYPE (base))
2334 == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
2335 break;
2336 }
2337
2338 if (i == n_iv_cands (data))
2339 {
2340 cand = XCNEW (struct iv_cand);
2341 cand->id = i;
2342
2343 if (!base && !step)
2344 cand->iv = NULL;
2345 else
2346 cand->iv = alloc_iv (base, step);
2347
2348 cand->pos = pos;
2349 if (pos != IP_ORIGINAL && cand->iv)
2350 {
2351 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2352 cand->var_after = cand->var_before;
2353 }
2354 cand->important = important;
2355 cand->incremented_at = incremented_at;
2356 data->iv_candidates.safe_push (cand);
2357
2358 if (step
2359 && TREE_CODE (step) != INTEGER_CST)
2360 {
2361 fd_ivopts_data = data;
2362 walk_tree (&step, find_depends, &cand->depends_on, NULL);
2363 }
2364
2365 if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2366 cand->ainc_use = use;
2367 else
2368 cand->ainc_use = NULL;
2369
2370 if (dump_file && (dump_flags & TDF_DETAILS))
2371 dump_cand (dump_file, cand);
2372 }
2373
2374 if (important && !cand->important)
2375 {
2376 cand->important = true;
2377 if (dump_file && (dump_flags & TDF_DETAILS))
2378 fprintf (dump_file, "Candidate %d is important\n", cand->id);
2379 }
2380
2381 if (use)
2382 {
2383 bitmap_set_bit (use->related_cands, i);
2384 if (dump_file && (dump_flags & TDF_DETAILS))
2385 fprintf (dump_file, "Candidate %d is related to use %d\n",
2386 cand->id, use->id);
2387 }
2388
2389 return cand;
2390 }
2391
2392 /* Returns true if incrementing the induction variable at the end of the LOOP
2393 is allowed.
2394
2395 The purpose is to avoid splitting latch edge with a biv increment, thus
2396 creating a jump, possibly confusing other optimization passes and leaving
2397 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2398 is not available (so we do not have a better alternative), or if the latch
2399 edge is already nonempty. */
2400
2401 static bool
2402 allow_ip_end_pos_p (struct loop *loop)
2403 {
2404 if (!ip_normal_pos (loop))
2405 return true;
2406
2407 if (!empty_block_p (ip_end_pos (loop)))
2408 return true;
2409
2410 return false;
2411 }
2412
2413 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2414 Important field is set to IMPORTANT. */
2415
2416 static void
2417 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2418 bool important, struct iv_use *use)
2419 {
2420 basic_block use_bb = gimple_bb (use->stmt);
2421 enum machine_mode mem_mode;
2422 unsigned HOST_WIDE_INT cstepi;
2423
2424 /* If we insert the increment in any position other than the standard
2425 ones, we must ensure that it is incremented once per iteration.
2426 It must not be in an inner nested loop, or one side of an if
2427 statement. */
2428 if (use_bb->loop_father != data->current_loop
2429 || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2430 || stmt_could_throw_p (use->stmt)
2431 || !cst_and_fits_in_hwi (step))
2432 return;
2433
2434 cstepi = int_cst_value (step);
2435
2436 mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2437 if (((USE_LOAD_PRE_INCREMENT (mem_mode)
2438 || USE_STORE_PRE_INCREMENT (mem_mode))
2439 && GET_MODE_SIZE (mem_mode) == cstepi)
2440 || ((USE_LOAD_PRE_DECREMENT (mem_mode)
2441 || USE_STORE_PRE_DECREMENT (mem_mode))
2442 && GET_MODE_SIZE (mem_mode) == -cstepi))
2443 {
2444 enum tree_code code = MINUS_EXPR;
2445 tree new_base;
2446 tree new_step = step;
2447
2448 if (POINTER_TYPE_P (TREE_TYPE (base)))
2449 {
2450 new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2451 code = POINTER_PLUS_EXPR;
2452 }
2453 else
2454 new_step = fold_convert (TREE_TYPE (base), new_step);
2455 new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2456 add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2457 use->stmt);
2458 }
2459 if (((USE_LOAD_POST_INCREMENT (mem_mode)
2460 || USE_STORE_POST_INCREMENT (mem_mode))
2461 && GET_MODE_SIZE (mem_mode) == cstepi)
2462 || ((USE_LOAD_POST_DECREMENT (mem_mode)
2463 || USE_STORE_POST_DECREMENT (mem_mode))
2464 && GET_MODE_SIZE (mem_mode) == -cstepi))
2465 {
2466 add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
2467 use->stmt);
2468 }
2469 }
2470
2471 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2472 position to POS. If USE is not NULL, the candidate is set as related to
2473 it. The candidate computation is scheduled on all available positions. */
2474
2475 static void
2476 add_candidate (struct ivopts_data *data,
2477 tree base, tree step, bool important, struct iv_use *use)
2478 {
2479 if (ip_normal_pos (data->current_loop))
2480 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL);
2481 if (ip_end_pos (data->current_loop)
2482 && allow_ip_end_pos_p (data->current_loop))
2483 add_candidate_1 (data, base, step, important, IP_END, use, NULL);
2484
2485 if (use != NULL && use->type == USE_ADDRESS)
2486 add_autoinc_candidates (data, base, step, important, use);
2487 }
2488
2489 /* Adds standard iv candidates. */
2490
2491 static void
2492 add_standard_iv_candidates (struct ivopts_data *data)
2493 {
2494 add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
2495
2496 /* The same for a double-integer type if it is still fast enough. */
2497 if (TYPE_PRECISION
2498 (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
2499 && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
2500 add_candidate (data, build_int_cst (long_integer_type_node, 0),
2501 build_int_cst (long_integer_type_node, 1), true, NULL);
2502
2503 /* The same for a double-integer type if it is still fast enough. */
2504 if (TYPE_PRECISION
2505 (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
2506 && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
2507 add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
2508 build_int_cst (long_long_integer_type_node, 1), true, NULL);
2509 }
2510
2511
2512 /* Adds candidates bases on the old induction variable IV. */
2513
2514 static void
2515 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2516 {
2517 gimple phi;
2518 tree def;
2519 struct iv_cand *cand;
2520
2521 add_candidate (data, iv->base, iv->step, true, NULL);
2522
2523 /* The same, but with initial value zero. */
2524 if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
2525 add_candidate (data, size_int (0), iv->step, true, NULL);
2526 else
2527 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2528 iv->step, true, NULL);
2529
2530 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2531 if (gimple_code (phi) == GIMPLE_PHI)
2532 {
2533 /* Additionally record the possibility of leaving the original iv
2534 untouched. */
2535 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2536 /* Don't add candidate if it's from another PHI node because
2537 it's an affine iv appearing in the form of PEELED_CHREC. */
2538 phi = SSA_NAME_DEF_STMT (def);
2539 if (gimple_code (phi) != GIMPLE_PHI)
2540 {
2541 cand = add_candidate_1 (data,
2542 iv->base, iv->step, true, IP_ORIGINAL, NULL,
2543 SSA_NAME_DEF_STMT (def));
2544 cand->var_before = iv->ssa_name;
2545 cand->var_after = def;
2546 }
2547 else
2548 gcc_assert (gimple_bb (phi) == data->current_loop->header);
2549 }
2550 }
2551
2552 /* Adds candidates based on the old induction variables. */
2553
2554 static void
2555 add_old_ivs_candidates (struct ivopts_data *data)
2556 {
2557 unsigned i;
2558 struct iv *iv;
2559 bitmap_iterator bi;
2560
2561 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2562 {
2563 iv = ver_info (data, i)->iv;
2564 if (iv && iv->biv_p && !integer_zerop (iv->step))
2565 add_old_iv_candidates (data, iv);
2566 }
2567 }
2568
2569 /* Adds candidates based on the value of the induction variable IV and USE. */
2570
2571 static void
2572 add_iv_value_candidates (struct ivopts_data *data,
2573 struct iv *iv, struct iv_use *use)
2574 {
2575 unsigned HOST_WIDE_INT offset;
2576 tree base;
2577 tree basetype;
2578
2579 add_candidate (data, iv->base, iv->step, false, use);
2580
2581 /* The same, but with initial value zero. Make such variable important,
2582 since it is generic enough so that possibly many uses may be based
2583 on it. */
2584 basetype = TREE_TYPE (iv->base);
2585 if (POINTER_TYPE_P (basetype))
2586 basetype = sizetype;
2587 add_candidate (data, build_int_cst (basetype, 0),
2588 iv->step, true, use);
2589
2590 /* Third, try removing the constant offset. Make sure to even
2591 add a candidate for &a[0] vs. (T *)&a. */
2592 base = strip_offset (iv->base, &offset);
2593 if (offset
2594 || base != iv->base)
2595 add_candidate (data, base, iv->step, false, use);
2596 }
2597
2598 /* Adds candidates based on the uses. */
2599
2600 static void
2601 add_derived_ivs_candidates (struct ivopts_data *data)
2602 {
2603 unsigned i;
2604
2605 for (i = 0; i < n_iv_uses (data); i++)
2606 {
2607 struct iv_use *use = iv_use (data, i);
2608
2609 if (!use)
2610 continue;
2611
2612 switch (use->type)
2613 {
2614 case USE_NONLINEAR_EXPR:
2615 case USE_COMPARE:
2616 case USE_ADDRESS:
2617 /* Just add the ivs based on the value of the iv used here. */
2618 add_iv_value_candidates (data, use->iv, use);
2619 break;
2620
2621 default:
2622 gcc_unreachable ();
2623 }
2624 }
2625 }
2626
2627 /* Record important candidates and add them to related_cands bitmaps
2628 if needed. */
2629
2630 static void
2631 record_important_candidates (struct ivopts_data *data)
2632 {
2633 unsigned i;
2634 struct iv_use *use;
2635
2636 for (i = 0; i < n_iv_cands (data); i++)
2637 {
2638 struct iv_cand *cand = iv_cand (data, i);
2639
2640 if (cand->important)
2641 bitmap_set_bit (data->important_candidates, i);
2642 }
2643
2644 data->consider_all_candidates = (n_iv_cands (data)
2645 <= CONSIDER_ALL_CANDIDATES_BOUND);
2646
2647 if (data->consider_all_candidates)
2648 {
2649 /* We will not need "related_cands" bitmaps in this case,
2650 so release them to decrease peak memory consumption. */
2651 for (i = 0; i < n_iv_uses (data); i++)
2652 {
2653 use = iv_use (data, i);
2654 BITMAP_FREE (use->related_cands);
2655 }
2656 }
2657 else
2658 {
2659 /* Add important candidates to the related_cands bitmaps. */
2660 for (i = 0; i < n_iv_uses (data); i++)
2661 bitmap_ior_into (iv_use (data, i)->related_cands,
2662 data->important_candidates);
2663 }
2664 }
2665
2666 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2667 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2668 we allocate a simple list to every use. */
2669
2670 static void
2671 alloc_use_cost_map (struct ivopts_data *data)
2672 {
2673 unsigned i, size, s;
2674
2675 for (i = 0; i < n_iv_uses (data); i++)
2676 {
2677 struct iv_use *use = iv_use (data, i);
2678
2679 if (data->consider_all_candidates)
2680 size = n_iv_cands (data);
2681 else
2682 {
2683 s = bitmap_count_bits (use->related_cands);
2684
2685 /* Round up to the power of two, so that moduling by it is fast. */
2686 size = s ? (1 << ceil_log2 (s)) : 1;
2687 }
2688
2689 use->n_map_members = size;
2690 use->cost_map = XCNEWVEC (struct cost_pair, size);
2691 }
2692 }
2693
2694 /* Returns description of computation cost of expression whose runtime
2695 cost is RUNTIME and complexity corresponds to COMPLEXITY. */
2696
2697 static comp_cost
2698 new_cost (unsigned runtime, unsigned complexity)
2699 {
2700 comp_cost cost;
2701
2702 cost.cost = runtime;
2703 cost.complexity = complexity;
2704
2705 return cost;
2706 }
2707
2708 /* Adds costs COST1 and COST2. */
2709
2710 static comp_cost
2711 add_costs (comp_cost cost1, comp_cost cost2)
2712 {
2713 cost1.cost += cost2.cost;
2714 cost1.complexity += cost2.complexity;
2715
2716 return cost1;
2717 }
2718 /* Subtracts costs COST1 and COST2. */
2719
2720 static comp_cost
2721 sub_costs (comp_cost cost1, comp_cost cost2)
2722 {
2723 cost1.cost -= cost2.cost;
2724 cost1.complexity -= cost2.complexity;
2725
2726 return cost1;
2727 }
2728
2729 /* Returns a negative number if COST1 < COST2, a positive number if
2730 COST1 > COST2, and 0 if COST1 = COST2. */
2731
2732 static int
2733 compare_costs (comp_cost cost1, comp_cost cost2)
2734 {
2735 if (cost1.cost == cost2.cost)
2736 return cost1.complexity - cost2.complexity;
2737
2738 return cost1.cost - cost2.cost;
2739 }
2740
2741 /* Returns true if COST is infinite. */
2742
2743 static bool
2744 infinite_cost_p (comp_cost cost)
2745 {
2746 return cost.cost == INFTY;
2747 }
2748
2749 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2750 on invariants DEPENDS_ON and that the value used in expressing it
2751 is VALUE, and in case of iv elimination the comparison operator is COMP. */
2752
2753 static void
2754 set_use_iv_cost (struct ivopts_data *data,
2755 struct iv_use *use, struct iv_cand *cand,
2756 comp_cost cost, bitmap depends_on, tree value,
2757 enum tree_code comp, int inv_expr_id)
2758 {
2759 unsigned i, s;
2760
2761 if (infinite_cost_p (cost))
2762 {
2763 BITMAP_FREE (depends_on);
2764 return;
2765 }
2766
2767 if (data->consider_all_candidates)
2768 {
2769 use->cost_map[cand->id].cand = cand;
2770 use->cost_map[cand->id].cost = cost;
2771 use->cost_map[cand->id].depends_on = depends_on;
2772 use->cost_map[cand->id].value = value;
2773 use->cost_map[cand->id].comp = comp;
2774 use->cost_map[cand->id].inv_expr_id = inv_expr_id;
2775 return;
2776 }
2777
2778 /* n_map_members is a power of two, so this computes modulo. */
2779 s = cand->id & (use->n_map_members - 1);
2780 for (i = s; i < use->n_map_members; i++)
2781 if (!use->cost_map[i].cand)
2782 goto found;
2783 for (i = 0; i < s; i++)
2784 if (!use->cost_map[i].cand)
2785 goto found;
2786
2787 gcc_unreachable ();
2788
2789 found:
2790 use->cost_map[i].cand = cand;
2791 use->cost_map[i].cost = cost;
2792 use->cost_map[i].depends_on = depends_on;
2793 use->cost_map[i].value = value;
2794 use->cost_map[i].comp = comp;
2795 use->cost_map[i].inv_expr_id = inv_expr_id;
2796 }
2797
2798 /* Gets cost of (USE, CANDIDATE) pair. */
2799
2800 static struct cost_pair *
2801 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2802 struct iv_cand *cand)
2803 {
2804 unsigned i, s;
2805 struct cost_pair *ret;
2806
2807 if (!cand)
2808 return NULL;
2809
2810 if (data->consider_all_candidates)
2811 {
2812 ret = use->cost_map + cand->id;
2813 if (!ret->cand)
2814 return NULL;
2815
2816 return ret;
2817 }
2818
2819 /* n_map_members is a power of two, so this computes modulo. */
2820 s = cand->id & (use->n_map_members - 1);
2821 for (i = s; i < use->n_map_members; i++)
2822 if (use->cost_map[i].cand == cand)
2823 return use->cost_map + i;
2824 else if (use->cost_map[i].cand == NULL)
2825 return NULL;
2826 for (i = 0; i < s; i++)
2827 if (use->cost_map[i].cand == cand)
2828 return use->cost_map + i;
2829 else if (use->cost_map[i].cand == NULL)
2830 return NULL;
2831
2832 return NULL;
2833 }
2834
2835 /* Returns estimate on cost of computing SEQ. */
2836
2837 static unsigned
2838 seq_cost (rtx seq, bool speed)
2839 {
2840 unsigned cost = 0;
2841 rtx set;
2842
2843 for (; seq; seq = NEXT_INSN (seq))
2844 {
2845 set = single_set (seq);
2846 if (set)
2847 cost += set_src_cost (SET_SRC (set), speed);
2848 else
2849 cost++;
2850 }
2851
2852 return cost;
2853 }
2854
2855 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2856 static rtx
2857 produce_memory_decl_rtl (tree obj, int *regno)
2858 {
2859 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
2860 enum machine_mode address_mode = targetm.addr_space.address_mode (as);
2861 rtx x;
2862
2863 gcc_assert (obj);
2864 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2865 {
2866 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2867 x = gen_rtx_SYMBOL_REF (address_mode, name);
2868 SET_SYMBOL_REF_DECL (x, obj);
2869 x = gen_rtx_MEM (DECL_MODE (obj), x);
2870 set_mem_addr_space (x, as);
2871 targetm.encode_section_info (obj, x, true);
2872 }
2873 else
2874 {
2875 x = gen_raw_REG (address_mode, (*regno)++);
2876 x = gen_rtx_MEM (DECL_MODE (obj), x);
2877 set_mem_addr_space (x, as);
2878 }
2879
2880 return x;
2881 }
2882
2883 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2884 walk_tree. DATA contains the actual fake register number. */
2885
2886 static tree
2887 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2888 {
2889 tree obj = NULL_TREE;
2890 rtx x = NULL_RTX;
2891 int *regno = (int *) data;
2892
2893 switch (TREE_CODE (*expr_p))
2894 {
2895 case ADDR_EXPR:
2896 for (expr_p = &TREE_OPERAND (*expr_p, 0);
2897 handled_component_p (*expr_p);
2898 expr_p = &TREE_OPERAND (*expr_p, 0))
2899 continue;
2900 obj = *expr_p;
2901 if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
2902 x = produce_memory_decl_rtl (obj, regno);
2903 break;
2904
2905 case SSA_NAME:
2906 *ws = 0;
2907 obj = SSA_NAME_VAR (*expr_p);
2908 /* Defer handling of anonymous SSA_NAMEs to the expander. */
2909 if (!obj)
2910 return NULL_TREE;
2911 if (!DECL_RTL_SET_P (obj))
2912 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2913 break;
2914
2915 case VAR_DECL:
2916 case PARM_DECL:
2917 case RESULT_DECL:
2918 *ws = 0;
2919 obj = *expr_p;
2920
2921 if (DECL_RTL_SET_P (obj))
2922 break;
2923
2924 if (DECL_MODE (obj) == BLKmode)
2925 x = produce_memory_decl_rtl (obj, regno);
2926 else
2927 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2928
2929 break;
2930
2931 default:
2932 break;
2933 }
2934
2935 if (x)
2936 {
2937 decl_rtl_to_reset.safe_push (obj);
2938 SET_DECL_RTL (obj, x);
2939 }
2940
2941 return NULL_TREE;
2942 }
2943
2944 /* Determines cost of the computation of EXPR. */
2945
2946 static unsigned
2947 computation_cost (tree expr, bool speed)
2948 {
2949 rtx seq, rslt;
2950 tree type = TREE_TYPE (expr);
2951 unsigned cost;
2952 /* Avoid using hard regs in ways which may be unsupported. */
2953 int regno = LAST_VIRTUAL_REGISTER + 1;
2954 struct cgraph_node *node = cgraph_get_node (current_function_decl);
2955 enum node_frequency real_frequency = node->frequency;
2956
2957 node->frequency = NODE_FREQUENCY_NORMAL;
2958 crtl->maybe_hot_insn_p = speed;
2959 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2960 start_sequence ();
2961 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2962 seq = get_insns ();
2963 end_sequence ();
2964 default_rtl_profile ();
2965 node->frequency = real_frequency;
2966
2967 cost = seq_cost (seq, speed);
2968 if (MEM_P (rslt))
2969 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
2970 TYPE_ADDR_SPACE (type), speed);
2971 else if (!REG_P (rslt))
2972 cost += set_src_cost (rslt, speed);
2973
2974 return cost;
2975 }
2976
2977 /* Returns variable containing the value of candidate CAND at statement AT. */
2978
2979 static tree
2980 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple stmt)
2981 {
2982 if (stmt_after_increment (loop, cand, stmt))
2983 return cand->var_after;
2984 else
2985 return cand->var_before;
2986 }
2987
2988 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2989 same precision that is at least as wide as the precision of TYPE, stores
2990 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
2991 type of A and B. */
2992
2993 static tree
2994 determine_common_wider_type (tree *a, tree *b)
2995 {
2996 tree wider_type = NULL;
2997 tree suba, subb;
2998 tree atype = TREE_TYPE (*a);
2999
3000 if (CONVERT_EXPR_P (*a))
3001 {
3002 suba = TREE_OPERAND (*a, 0);
3003 wider_type = TREE_TYPE (suba);
3004 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
3005 return atype;
3006 }
3007 else
3008 return atype;
3009
3010 if (CONVERT_EXPR_P (*b))
3011 {
3012 subb = TREE_OPERAND (*b, 0);
3013 if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
3014 return atype;
3015 }
3016 else
3017 return atype;
3018
3019 *a = suba;
3020 *b = subb;
3021 return wider_type;
3022 }
3023
3024 /* Determines the expression by that USE is expressed from induction variable
3025 CAND at statement AT in LOOP. The expression is stored in a decomposed
3026 form into AFF. Returns false if USE cannot be expressed using CAND. */
3027
3028 static bool
3029 get_computation_aff (struct loop *loop,
3030 struct iv_use *use, struct iv_cand *cand, gimple at,
3031 struct aff_tree *aff)
3032 {
3033 tree ubase = use->iv->base;
3034 tree ustep = use->iv->step;
3035 tree cbase = cand->iv->base;
3036 tree cstep = cand->iv->step, cstep_common;
3037 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3038 tree common_type, var;
3039 tree uutype;
3040 aff_tree cbase_aff, var_aff;
3041 double_int rat;
3042
3043 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3044 {
3045 /* We do not have a precision to express the values of use. */
3046 return false;
3047 }
3048
3049 var = var_at_stmt (loop, cand, at);
3050 uutype = unsigned_type_for (utype);
3051
3052 /* If the conversion is not noop, perform it. */
3053 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3054 {
3055 cstep = fold_convert (uutype, cstep);
3056 cbase = fold_convert (uutype, cbase);
3057 var = fold_convert (uutype, var);
3058 }
3059
3060 if (!constant_multiple_of (ustep, cstep, &rat))
3061 return false;
3062
3063 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3064 type, we achieve better folding by computing their difference in this
3065 wider type, and cast the result to UUTYPE. We do not need to worry about
3066 overflows, as all the arithmetics will in the end be performed in UUTYPE
3067 anyway. */
3068 common_type = determine_common_wider_type (&ubase, &cbase);
3069
3070 /* use = ubase - ratio * cbase + ratio * var. */
3071 tree_to_aff_combination (ubase, common_type, aff);
3072 tree_to_aff_combination (cbase, common_type, &cbase_aff);
3073 tree_to_aff_combination (var, uutype, &var_aff);
3074
3075 /* We need to shift the value if we are after the increment. */
3076 if (stmt_after_increment (loop, cand, at))
3077 {
3078 aff_tree cstep_aff;
3079
3080 if (common_type != uutype)
3081 cstep_common = fold_convert (common_type, cstep);
3082 else
3083 cstep_common = cstep;
3084
3085 tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
3086 aff_combination_add (&cbase_aff, &cstep_aff);
3087 }
3088
3089 aff_combination_scale (&cbase_aff, -rat);
3090 aff_combination_add (aff, &cbase_aff);
3091 if (common_type != uutype)
3092 aff_combination_convert (aff, uutype);
3093
3094 aff_combination_scale (&var_aff, rat);
3095 aff_combination_add (aff, &var_aff);
3096
3097 return true;
3098 }
3099
3100 /* Return the type of USE. */
3101
3102 static tree
3103 get_use_type (struct iv_use *use)
3104 {
3105 tree base_type = TREE_TYPE (use->iv->base);
3106 tree type;
3107
3108 if (use->type == USE_ADDRESS)
3109 {
3110 /* The base_type may be a void pointer. Create a pointer type based on
3111 the mem_ref instead. */
3112 type = build_pointer_type (TREE_TYPE (*use->op_p));
3113 gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
3114 == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
3115 }
3116 else
3117 type = base_type;
3118
3119 return type;
3120 }
3121
3122 /* Determines the expression by that USE is expressed from induction variable
3123 CAND at statement AT in LOOP. The computation is unshared. */
3124
3125 static tree
3126 get_computation_at (struct loop *loop,
3127 struct iv_use *use, struct iv_cand *cand, gimple at)
3128 {
3129 aff_tree aff;
3130 tree type = get_use_type (use);
3131
3132 if (!get_computation_aff (loop, use, cand, at, &aff))
3133 return NULL_TREE;
3134 unshare_aff_combination (&aff);
3135 return fold_convert (type, aff_combination_to_tree (&aff));
3136 }
3137
3138 /* Determines the expression by that USE is expressed from induction variable
3139 CAND in LOOP. The computation is unshared. */
3140
3141 static tree
3142 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3143 {
3144 return get_computation_at (loop, use, cand, use->stmt);
3145 }
3146
3147 /* Adjust the cost COST for being in loop setup rather than loop body.
3148 If we're optimizing for space, the loop setup overhead is constant;
3149 if we're optimizing for speed, amortize it over the per-iteration cost. */
3150 static unsigned
3151 adjust_setup_cost (struct ivopts_data *data, unsigned cost)
3152 {
3153 if (cost == INFTY)
3154 return cost;
3155 else if (optimize_loop_for_speed_p (data->current_loop))
3156 return cost / avg_loop_niter (data->current_loop);
3157 else
3158 return cost;
3159 }
3160
3161 /* Returns true if multiplying by RATIO is allowed in an address. Test the
3162 validity for a memory reference accessing memory of mode MODE in
3163 address space AS. */
3164
3165
3166 bool
3167 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode,
3168 addr_space_t as)
3169 {
3170 #define MAX_RATIO 128
3171 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3172 static vec<sbitmap> valid_mult_list;
3173 sbitmap valid_mult;
3174
3175 if (data_index >= valid_mult_list.length ())
3176 valid_mult_list.safe_grow_cleared (data_index + 1);
3177
3178 valid_mult = valid_mult_list[data_index];
3179 if (!valid_mult)
3180 {
3181 enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3182 rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3183 rtx reg2 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3184 rtx addr, scaled;
3185 HOST_WIDE_INT i;
3186
3187 valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3188 bitmap_clear (valid_mult);
3189 scaled = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3190 addr = gen_rtx_fmt_ee (PLUS, address_mode, scaled, reg2);
3191 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3192 {
3193 XEXP (scaled, 1) = gen_int_mode (i, address_mode);
3194 if (memory_address_addr_space_p (mode, addr, as)
3195 || memory_address_addr_space_p (mode, scaled, as))
3196 bitmap_set_bit (valid_mult, i + MAX_RATIO);
3197 }
3198
3199 if (dump_file && (dump_flags & TDF_DETAILS))
3200 {
3201 fprintf (dump_file, " allowed multipliers:");
3202 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3203 if (bitmap_bit_p (valid_mult, i + MAX_RATIO))
3204 fprintf (dump_file, " %d", (int) i);
3205 fprintf (dump_file, "\n");
3206 fprintf (dump_file, "\n");
3207 }
3208
3209 valid_mult_list[data_index] = valid_mult;
3210 }
3211
3212 if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3213 return false;
3214
3215 return bitmap_bit_p (valid_mult, ratio + MAX_RATIO);
3216 }
3217
3218 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3219 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3220 variable is omitted. Compute the cost for a memory reference that accesses
3221 a memory location of mode MEM_MODE in address space AS.
3222
3223 MAY_AUTOINC is set to true if the autoincrement (increasing index by
3224 size of MEM_MODE / RATIO) is available. To make this determination, we
3225 look at the size of the increment to be made, which is given in CSTEP.
3226 CSTEP may be zero if the step is unknown.
3227 STMT_AFTER_INC is true iff the statement we're looking at is after the
3228 increment of the original biv.
3229
3230 TODO -- there must be some better way. This all is quite crude. */
3231
3232 enum ainc_type
3233 {
3234 AINC_PRE_INC, /* Pre increment. */
3235 AINC_PRE_DEC, /* Pre decrement. */
3236 AINC_POST_INC, /* Post increment. */
3237 AINC_POST_DEC, /* Post decrement. */
3238 AINC_NONE /* Also the number of auto increment types. */
3239 };
3240
3241 typedef struct address_cost_data_s
3242 {
3243 HOST_WIDE_INT min_offset, max_offset;
3244 unsigned costs[2][2][2][2];
3245 unsigned ainc_costs[AINC_NONE];
3246 } *address_cost_data;
3247
3248
3249 static comp_cost
3250 get_address_cost (bool symbol_present, bool var_present,
3251 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3252 HOST_WIDE_INT cstep, enum machine_mode mem_mode,
3253 addr_space_t as, bool speed,
3254 bool stmt_after_inc, bool *may_autoinc)
3255 {
3256 enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3257 static vec<address_cost_data> address_cost_data_list;
3258 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3259 address_cost_data data;
3260 static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3261 static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3262 unsigned cost, acost, complexity;
3263 enum ainc_type autoinc_type;
3264 bool offset_p, ratio_p, autoinc;
3265 HOST_WIDE_INT s_offset, autoinc_offset, msize;
3266 unsigned HOST_WIDE_INT mask;
3267 unsigned bits;
3268
3269 if (data_index >= address_cost_data_list.length ())
3270 address_cost_data_list.safe_grow_cleared (data_index + 1);
3271
3272 data = address_cost_data_list[data_index];
3273 if (!data)
3274 {
3275 HOST_WIDE_INT i;
3276 HOST_WIDE_INT rat, off = 0;
3277 int old_cse_not_expected, width;
3278 unsigned sym_p, var_p, off_p, rat_p, add_c;
3279 rtx seq, addr, base;
3280 rtx reg0, reg1;
3281
3282 data = (address_cost_data) xcalloc (1, sizeof (*data));
3283
3284 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3285
3286 width = GET_MODE_BITSIZE (address_mode) - 1;
3287 if (width > (HOST_BITS_PER_WIDE_INT - 1))
3288 width = HOST_BITS_PER_WIDE_INT - 1;
3289 addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
3290
3291 for (i = width; i >= 0; i--)
3292 {
3293 off = -((unsigned HOST_WIDE_INT) 1 << i);
3294 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3295 if (memory_address_addr_space_p (mem_mode, addr, as))
3296 break;
3297 }
3298 data->min_offset = (i == -1? 0 : off);
3299
3300 for (i = width; i >= 0; i--)
3301 {
3302 off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
3303 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3304 if (memory_address_addr_space_p (mem_mode, addr, as))
3305 break;
3306 }
3307 if (i == -1)
3308 off = 0;
3309 data->max_offset = off;
3310
3311 if (dump_file && (dump_flags & TDF_DETAILS))
3312 {
3313 fprintf (dump_file, "get_address_cost:\n");
3314 fprintf (dump_file, " min offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3315 GET_MODE_NAME (mem_mode),
3316 data->min_offset);
3317 fprintf (dump_file, " max offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3318 GET_MODE_NAME (mem_mode),
3319 data->max_offset);
3320 }
3321
3322 rat = 1;
3323 for (i = 2; i <= MAX_RATIO; i++)
3324 if (multiplier_allowed_in_address_p (i, mem_mode, as))
3325 {
3326 rat = i;
3327 break;
3328 }
3329
3330 /* Compute the cost of various addressing modes. */
3331 acost = 0;
3332 reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3333 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3334
3335 if (USE_LOAD_PRE_DECREMENT (mem_mode)
3336 || USE_STORE_PRE_DECREMENT (mem_mode))
3337 {
3338 addr = gen_rtx_PRE_DEC (address_mode, reg0);
3339 has_predec[mem_mode]
3340 = memory_address_addr_space_p (mem_mode, addr, as);
3341
3342 if (has_predec[mem_mode])
3343 data->ainc_costs[AINC_PRE_DEC]
3344 = address_cost (addr, mem_mode, as, speed);
3345 }
3346 if (USE_LOAD_POST_DECREMENT (mem_mode)
3347 || USE_STORE_POST_DECREMENT (mem_mode))
3348 {
3349 addr = gen_rtx_POST_DEC (address_mode, reg0);
3350 has_postdec[mem_mode]
3351 = memory_address_addr_space_p (mem_mode, addr, as);
3352
3353 if (has_postdec[mem_mode])
3354 data->ainc_costs[AINC_POST_DEC]
3355 = address_cost (addr, mem_mode, as, speed);
3356 }
3357 if (USE_LOAD_PRE_INCREMENT (mem_mode)
3358 || USE_STORE_PRE_DECREMENT (mem_mode))
3359 {
3360 addr = gen_rtx_PRE_INC (address_mode, reg0);
3361 has_preinc[mem_mode]
3362 = memory_address_addr_space_p (mem_mode, addr, as);
3363
3364 if (has_preinc[mem_mode])
3365 data->ainc_costs[AINC_PRE_INC]
3366 = address_cost (addr, mem_mode, as, speed);
3367 }
3368 if (USE_LOAD_POST_INCREMENT (mem_mode)
3369 || USE_STORE_POST_INCREMENT (mem_mode))
3370 {
3371 addr = gen_rtx_POST_INC (address_mode, reg0);
3372 has_postinc[mem_mode]
3373 = memory_address_addr_space_p (mem_mode, addr, as);
3374
3375 if (has_postinc[mem_mode])
3376 data->ainc_costs[AINC_POST_INC]
3377 = address_cost (addr, mem_mode, as, speed);
3378 }
3379 for (i = 0; i < 16; i++)
3380 {
3381 sym_p = i & 1;
3382 var_p = (i >> 1) & 1;
3383 off_p = (i >> 2) & 1;
3384 rat_p = (i >> 3) & 1;
3385
3386 addr = reg0;
3387 if (rat_p)
3388 addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
3389 gen_int_mode (rat, address_mode));
3390
3391 if (var_p)
3392 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
3393
3394 if (sym_p)
3395 {
3396 base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
3397 /* ??? We can run into trouble with some backends by presenting
3398 it with symbols which haven't been properly passed through
3399 targetm.encode_section_info. By setting the local bit, we
3400 enhance the probability of things working. */
3401 SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3402
3403 if (off_p)
3404 base = gen_rtx_fmt_e (CONST, address_mode,
3405 gen_rtx_fmt_ee
3406 (PLUS, address_mode, base,
3407 gen_int_mode (off, address_mode)));
3408 }
3409 else if (off_p)
3410 base = gen_int_mode (off, address_mode);
3411 else
3412 base = NULL_RTX;
3413
3414 if (base)
3415 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
3416
3417 start_sequence ();
3418 /* To avoid splitting addressing modes, pretend that no cse will
3419 follow. */
3420 old_cse_not_expected = cse_not_expected;
3421 cse_not_expected = true;
3422 addr = memory_address_addr_space (mem_mode, addr, as);
3423 cse_not_expected = old_cse_not_expected;
3424 seq = get_insns ();
3425 end_sequence ();
3426
3427 acost = seq_cost (seq, speed);
3428 acost += address_cost (addr, mem_mode, as, speed);
3429
3430 if (!acost)
3431 acost = 1;
3432 data->costs[sym_p][var_p][off_p][rat_p] = acost;
3433 }
3434
3435 /* On some targets, it is quite expensive to load symbol to a register,
3436 which makes addresses that contain symbols look much more expensive.
3437 However, the symbol will have to be loaded in any case before the
3438 loop (and quite likely we have it in register already), so it does not
3439 make much sense to penalize them too heavily. So make some final
3440 tweaks for the SYMBOL_PRESENT modes:
3441
3442 If VAR_PRESENT is false, and the mode obtained by changing symbol to
3443 var is cheaper, use this mode with small penalty.
3444 If VAR_PRESENT is true, try whether the mode with
3445 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3446 if this is the case, use it. */
3447 add_c = add_cost (speed, address_mode);
3448 for (i = 0; i < 8; i++)
3449 {
3450 var_p = i & 1;
3451 off_p = (i >> 1) & 1;
3452 rat_p = (i >> 2) & 1;
3453
3454 acost = data->costs[0][1][off_p][rat_p] + 1;
3455 if (var_p)
3456 acost += add_c;
3457
3458 if (acost < data->costs[1][var_p][off_p][rat_p])
3459 data->costs[1][var_p][off_p][rat_p] = acost;
3460 }
3461
3462 if (dump_file && (dump_flags & TDF_DETAILS))
3463 {
3464 fprintf (dump_file, "Address costs:\n");
3465
3466 for (i = 0; i < 16; i++)
3467 {
3468 sym_p = i & 1;
3469 var_p = (i >> 1) & 1;
3470 off_p = (i >> 2) & 1;
3471 rat_p = (i >> 3) & 1;
3472
3473 fprintf (dump_file, " ");
3474 if (sym_p)
3475 fprintf (dump_file, "sym + ");
3476 if (var_p)
3477 fprintf (dump_file, "var + ");
3478 if (off_p)
3479 fprintf (dump_file, "cst + ");
3480 if (rat_p)
3481 fprintf (dump_file, "rat * ");
3482
3483 acost = data->costs[sym_p][var_p][off_p][rat_p];
3484 fprintf (dump_file, "index costs %d\n", acost);
3485 }
3486 if (has_predec[mem_mode] || has_postdec[mem_mode]
3487 || has_preinc[mem_mode] || has_postinc[mem_mode])
3488 fprintf (dump_file, " May include autoinc/dec\n");
3489 fprintf (dump_file, "\n");
3490 }
3491
3492 address_cost_data_list[data_index] = data;
3493 }
3494
3495 bits = GET_MODE_BITSIZE (address_mode);
3496 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3497 offset &= mask;
3498 if ((offset >> (bits - 1) & 1))
3499 offset |= ~mask;
3500 s_offset = offset;
3501
3502 autoinc = false;
3503 autoinc_type = AINC_NONE;
3504 msize = GET_MODE_SIZE (mem_mode);
3505 autoinc_offset = offset;
3506 if (stmt_after_inc)
3507 autoinc_offset += ratio * cstep;
3508 if (symbol_present || var_present || ratio != 1)
3509 autoinc = false;
3510 else
3511 {
3512 if (has_postinc[mem_mode] && autoinc_offset == 0
3513 && msize == cstep)
3514 autoinc_type = AINC_POST_INC;
3515 else if (has_postdec[mem_mode] && autoinc_offset == 0
3516 && msize == -cstep)
3517 autoinc_type = AINC_POST_DEC;
3518 else if (has_preinc[mem_mode] && autoinc_offset == msize
3519 && msize == cstep)
3520 autoinc_type = AINC_PRE_INC;
3521 else if (has_predec[mem_mode] && autoinc_offset == -msize
3522 && msize == -cstep)
3523 autoinc_type = AINC_PRE_DEC;
3524
3525 if (autoinc_type != AINC_NONE)
3526 autoinc = true;
3527 }
3528
3529 cost = 0;
3530 offset_p = (s_offset != 0
3531 && data->min_offset <= s_offset
3532 && s_offset <= data->max_offset);
3533 ratio_p = (ratio != 1
3534 && multiplier_allowed_in_address_p (ratio, mem_mode, as));
3535
3536 if (ratio != 1 && !ratio_p)
3537 cost += mult_by_coeff_cost (ratio, address_mode, speed);
3538
3539 if (s_offset && !offset_p && !symbol_present)
3540 cost += add_cost (speed, address_mode);
3541
3542 if (may_autoinc)
3543 *may_autoinc = autoinc;
3544 if (autoinc)
3545 acost = data->ainc_costs[autoinc_type];
3546 else
3547 acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
3548 complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3549 return new_cost (cost + acost, complexity);
3550 }
3551
3552 /* Calculate the SPEED or size cost of shiftadd EXPR in MODE. MULT is the
3553 the EXPR operand holding the shift. COST0 and COST1 are the costs for
3554 calculating the operands of EXPR. Returns true if successful, and returns
3555 the cost in COST. */
3556
3557 static bool
3558 get_shiftadd_cost (tree expr, enum machine_mode mode, comp_cost cost0,
3559 comp_cost cost1, tree mult, bool speed, comp_cost *cost)
3560 {
3561 comp_cost res;
3562 tree op1 = TREE_OPERAND (expr, 1);
3563 tree cst = TREE_OPERAND (mult, 1);
3564 tree multop = TREE_OPERAND (mult, 0);
3565 int m = exact_log2 (int_cst_value (cst));
3566 int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
3567 int sa_cost;
3568 bool equal_p = false;
3569
3570 if (!(m >= 0 && m < maxm))
3571 return false;
3572
3573 if (operand_equal_p (op1, mult, 0))
3574 equal_p = true;
3575
3576 sa_cost = (TREE_CODE (expr) != MINUS_EXPR
3577 ? shiftadd_cost (speed, mode, m)
3578 : (equal_p
3579 ? shiftsub1_cost (speed, mode, m)
3580 : shiftsub0_cost (speed, mode, m)));
3581 res = new_cost (sa_cost, 0);
3582 res = add_costs (res, equal_p ? cost0 : cost1);
3583
3584 STRIP_NOPS (multop);
3585 if (!is_gimple_val (multop))
3586 res = add_costs (res, force_expr_to_var_cost (multop, speed));
3587
3588 *cost = res;
3589 return true;
3590 }
3591
3592 /* Estimates cost of forcing expression EXPR into a variable. */
3593
3594 static comp_cost
3595 force_expr_to_var_cost (tree expr, bool speed)
3596 {
3597 static bool costs_initialized = false;
3598 static unsigned integer_cost [2];
3599 static unsigned symbol_cost [2];
3600 static unsigned address_cost [2];
3601 tree op0, op1;
3602 comp_cost cost0, cost1, cost;
3603 enum machine_mode mode;
3604
3605 if (!costs_initialized)
3606 {
3607 tree type = build_pointer_type (integer_type_node);
3608 tree var, addr;
3609 rtx x;
3610 int i;
3611
3612 var = create_tmp_var_raw (integer_type_node, "test_var");
3613 TREE_STATIC (var) = 1;
3614 x = produce_memory_decl_rtl (var, NULL);
3615 SET_DECL_RTL (var, x);
3616
3617 addr = build1 (ADDR_EXPR, type, var);
3618
3619
3620 for (i = 0; i < 2; i++)
3621 {
3622 integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
3623 2000), i);
3624
3625 symbol_cost[i] = computation_cost (addr, i) + 1;
3626
3627 address_cost[i]
3628 = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
3629 if (dump_file && (dump_flags & TDF_DETAILS))
3630 {
3631 fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
3632 fprintf (dump_file, " integer %d\n", (int) integer_cost[i]);
3633 fprintf (dump_file, " symbol %d\n", (int) symbol_cost[i]);
3634 fprintf (dump_file, " address %d\n", (int) address_cost[i]);
3635 fprintf (dump_file, " other %d\n", (int) target_spill_cost[i]);
3636 fprintf (dump_file, "\n");
3637 }
3638 }
3639
3640 costs_initialized = true;
3641 }
3642
3643 STRIP_NOPS (expr);
3644
3645 if (SSA_VAR_P (expr))
3646 return no_cost;
3647
3648 if (is_gimple_min_invariant (expr))
3649 {
3650 if (TREE_CODE (expr) == INTEGER_CST)
3651 return new_cost (integer_cost [speed], 0);
3652
3653 if (TREE_CODE (expr) == ADDR_EXPR)
3654 {
3655 tree obj = TREE_OPERAND (expr, 0);
3656
3657 if (TREE_CODE (obj) == VAR_DECL
3658 || TREE_CODE (obj) == PARM_DECL
3659 || TREE_CODE (obj) == RESULT_DECL)
3660 return new_cost (symbol_cost [speed], 0);
3661 }
3662
3663 return new_cost (address_cost [speed], 0);
3664 }
3665
3666 switch (TREE_CODE (expr))
3667 {
3668 case POINTER_PLUS_EXPR:
3669 case PLUS_EXPR:
3670 case MINUS_EXPR:
3671 case MULT_EXPR:
3672 op0 = TREE_OPERAND (expr, 0);
3673 op1 = TREE_OPERAND (expr, 1);
3674 STRIP_NOPS (op0);
3675 STRIP_NOPS (op1);
3676 break;
3677
3678 CASE_CONVERT:
3679 case NEGATE_EXPR:
3680 op0 = TREE_OPERAND (expr, 0);
3681 STRIP_NOPS (op0);
3682 op1 = NULL_TREE;
3683 break;
3684
3685 default:
3686 /* Just an arbitrary value, FIXME. */
3687 return new_cost (target_spill_cost[speed], 0);
3688 }
3689
3690 if (op0 == NULL_TREE
3691 || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
3692 cost0 = no_cost;
3693 else
3694 cost0 = force_expr_to_var_cost (op0, speed);
3695
3696 if (op1 == NULL_TREE
3697 || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
3698 cost1 = no_cost;
3699 else
3700 cost1 = force_expr_to_var_cost (op1, speed);
3701
3702 mode = TYPE_MODE (TREE_TYPE (expr));
3703 switch (TREE_CODE (expr))
3704 {
3705 case POINTER_PLUS_EXPR:
3706 case PLUS_EXPR:
3707 case MINUS_EXPR:
3708 case NEGATE_EXPR:
3709 cost = new_cost (add_cost (speed, mode), 0);
3710 if (TREE_CODE (expr) != NEGATE_EXPR)
3711 {
3712 tree mult = NULL_TREE;
3713 comp_cost sa_cost;
3714 if (TREE_CODE (op1) == MULT_EXPR)
3715 mult = op1;
3716 else if (TREE_CODE (op0) == MULT_EXPR)
3717 mult = op0;
3718
3719 if (mult != NULL_TREE
3720 && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
3721 && get_shiftadd_cost (expr, mode, cost0, cost1, mult,
3722 speed, &sa_cost))
3723 return sa_cost;
3724 }
3725 break;
3726
3727 CASE_CONVERT:
3728 {
3729 tree inner_mode, outer_mode;
3730 outer_mode = TREE_TYPE (expr);
3731 inner_mode = TREE_TYPE (op0);
3732 cost = new_cost (convert_cost (TYPE_MODE (outer_mode),
3733 TYPE_MODE (inner_mode), speed), 0);
3734 }
3735 break;
3736
3737 case MULT_EXPR:
3738 if (cst_and_fits_in_hwi (op0))
3739 cost = new_cost (mult_by_coeff_cost (int_cst_value (op0),
3740 mode, speed), 0);
3741 else if (cst_and_fits_in_hwi (op1))
3742 cost = new_cost (mult_by_coeff_cost (int_cst_value (op1),
3743 mode, speed), 0);
3744 else
3745 return new_cost (target_spill_cost [speed], 0);
3746 break;
3747
3748 default:
3749 gcc_unreachable ();
3750 }
3751
3752 cost = add_costs (cost, cost0);
3753 cost = add_costs (cost, cost1);
3754
3755 /* Bound the cost by target_spill_cost. The parts of complicated
3756 computations often are either loop invariant or at least can
3757 be shared between several iv uses, so letting this grow without
3758 limits would not give reasonable results. */
3759 if (cost.cost > (int) target_spill_cost [speed])
3760 cost.cost = target_spill_cost [speed];
3761
3762 return cost;
3763 }
3764
3765 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
3766 invariants the computation depends on. */
3767
3768 static comp_cost
3769 force_var_cost (struct ivopts_data *data,
3770 tree expr, bitmap *depends_on)
3771 {
3772 if (depends_on)
3773 {
3774 fd_ivopts_data = data;
3775 walk_tree (&expr, find_depends, depends_on, NULL);
3776 }
3777
3778 return force_expr_to_var_cost (expr, data->speed);
3779 }
3780
3781 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
3782 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3783 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3784 invariants the computation depends on. */
3785
3786 static comp_cost
3787 split_address_cost (struct ivopts_data *data,
3788 tree addr, bool *symbol_present, bool *var_present,
3789 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3790 {
3791 tree core;
3792 HOST_WIDE_INT bitsize;
3793 HOST_WIDE_INT bitpos;
3794 tree toffset;
3795 enum machine_mode mode;
3796 int unsignedp, volatilep;
3797
3798 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3799 &unsignedp, &volatilep, false);
3800
3801 if (toffset != 0
3802 || bitpos % BITS_PER_UNIT != 0
3803 || TREE_CODE (core) != VAR_DECL)
3804 {
3805 *symbol_present = false;
3806 *var_present = true;
3807 fd_ivopts_data = data;
3808 walk_tree (&addr, find_depends, depends_on, NULL);
3809 return new_cost (target_spill_cost[data->speed], 0);
3810 }
3811
3812 *offset += bitpos / BITS_PER_UNIT;
3813 if (TREE_STATIC (core)
3814 || DECL_EXTERNAL (core))
3815 {
3816 *symbol_present = true;
3817 *var_present = false;
3818 return no_cost;
3819 }
3820
3821 *symbol_present = false;
3822 *var_present = true;
3823 return no_cost;
3824 }
3825
3826 /* Estimates cost of expressing difference of addresses E1 - E2 as
3827 var + symbol + offset. The value of offset is added to OFFSET,
3828 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3829 part is missing. DEPENDS_ON is a set of the invariants the computation
3830 depends on. */
3831
3832 static comp_cost
3833 ptr_difference_cost (struct ivopts_data *data,
3834 tree e1, tree e2, bool *symbol_present, bool *var_present,
3835 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3836 {
3837 HOST_WIDE_INT diff = 0;
3838 aff_tree aff_e1, aff_e2;
3839 tree type;
3840
3841 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3842
3843 if (ptr_difference_const (e1, e2, &diff))
3844 {
3845 *offset += diff;
3846 *symbol_present = false;
3847 *var_present = false;
3848 return no_cost;
3849 }
3850
3851 if (integer_zerop (e2))
3852 return split_address_cost (data, TREE_OPERAND (e1, 0),
3853 symbol_present, var_present, offset, depends_on);
3854
3855 *symbol_present = false;
3856 *var_present = true;
3857
3858 type = signed_type_for (TREE_TYPE (e1));
3859 tree_to_aff_combination (e1, type, &aff_e1);
3860 tree_to_aff_combination (e2, type, &aff_e2);
3861 aff_combination_scale (&aff_e2, double_int_minus_one);
3862 aff_combination_add (&aff_e1, &aff_e2);
3863
3864 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3865 }
3866
3867 /* Estimates cost of expressing difference E1 - E2 as
3868 var + symbol + offset. The value of offset is added to OFFSET,
3869 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3870 part is missing. DEPENDS_ON is a set of the invariants the computation
3871 depends on. */
3872
3873 static comp_cost
3874 difference_cost (struct ivopts_data *data,
3875 tree e1, tree e2, bool *symbol_present, bool *var_present,
3876 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3877 {
3878 enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3879 unsigned HOST_WIDE_INT off1, off2;
3880 aff_tree aff_e1, aff_e2;
3881 tree type;
3882
3883 e1 = strip_offset (e1, &off1);
3884 e2 = strip_offset (e2, &off2);
3885 *offset += off1 - off2;
3886
3887 STRIP_NOPS (e1);
3888 STRIP_NOPS (e2);
3889
3890 if (TREE_CODE (e1) == ADDR_EXPR)
3891 return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
3892 offset, depends_on);
3893 *symbol_present = false;
3894
3895 if (operand_equal_p (e1, e2, 0))
3896 {
3897 *var_present = false;
3898 return no_cost;
3899 }
3900
3901 *var_present = true;
3902
3903 if (integer_zerop (e2))
3904 return force_var_cost (data, e1, depends_on);
3905
3906 if (integer_zerop (e1))
3907 {
3908 comp_cost cost = force_var_cost (data, e2, depends_on);
3909 cost.cost += mult_by_coeff_cost (-1, mode, data->speed);
3910 return cost;
3911 }
3912
3913 type = signed_type_for (TREE_TYPE (e1));
3914 tree_to_aff_combination (e1, type, &aff_e1);
3915 tree_to_aff_combination (e2, type, &aff_e2);
3916 aff_combination_scale (&aff_e2, double_int_minus_one);
3917 aff_combination_add (&aff_e1, &aff_e2);
3918
3919 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3920 }
3921
3922 /* Returns true if AFF1 and AFF2 are identical. */
3923
3924 static bool
3925 compare_aff_trees (aff_tree *aff1, aff_tree *aff2)
3926 {
3927 unsigned i;
3928
3929 if (aff1->n != aff2->n)
3930 return false;
3931
3932 for (i = 0; i < aff1->n; i++)
3933 {
3934 if (aff1->elts[i].coef != aff2->elts[i].coef)
3935 return false;
3936
3937 if (!operand_equal_p (aff1->elts[i].val, aff2->elts[i].val, 0))
3938 return false;
3939 }
3940 return true;
3941 }
3942
3943 /* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id. */
3944
3945 static int
3946 get_expr_id (struct ivopts_data *data, tree expr)
3947 {
3948 struct iv_inv_expr_ent ent;
3949 struct iv_inv_expr_ent **slot;
3950
3951 ent.expr = expr;
3952 ent.hash = iterative_hash_expr (expr, 0);
3953 slot = data->inv_expr_tab.find_slot (&ent, INSERT);
3954 if (*slot)
3955 return (*slot)->id;
3956
3957 *slot = XNEW (struct iv_inv_expr_ent);
3958 (*slot)->expr = expr;
3959 (*slot)->hash = ent.hash;
3960 (*slot)->id = data->inv_expr_id++;
3961 return (*slot)->id;
3962 }
3963
3964 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
3965 requires a new compiler generated temporary. Returns -1 otherwise.
3966 ADDRESS_P is a flag indicating if the expression is for address
3967 computation. */
3968
3969 static int
3970 get_loop_invariant_expr_id (struct ivopts_data *data, tree ubase,
3971 tree cbase, HOST_WIDE_INT ratio,
3972 bool address_p)
3973 {
3974 aff_tree ubase_aff, cbase_aff;
3975 tree expr, ub, cb;
3976
3977 STRIP_NOPS (ubase);
3978 STRIP_NOPS (cbase);
3979 ub = ubase;
3980 cb = cbase;
3981
3982 if ((TREE_CODE (ubase) == INTEGER_CST)
3983 && (TREE_CODE (cbase) == INTEGER_CST))
3984 return -1;
3985
3986 /* Strips the constant part. */
3987 if (TREE_CODE (ubase) == PLUS_EXPR
3988 || TREE_CODE (ubase) == MINUS_EXPR
3989 || TREE_CODE (ubase) == POINTER_PLUS_EXPR)
3990 {
3991 if (TREE_CODE (TREE_OPERAND (ubase, 1)) == INTEGER_CST)
3992 ubase = TREE_OPERAND (ubase, 0);
3993 }
3994
3995 /* Strips the constant part. */
3996 if (TREE_CODE (cbase) == PLUS_EXPR
3997 || TREE_CODE (cbase) == MINUS_EXPR
3998 || TREE_CODE (cbase) == POINTER_PLUS_EXPR)
3999 {
4000 if (TREE_CODE (TREE_OPERAND (cbase, 1)) == INTEGER_CST)
4001 cbase = TREE_OPERAND (cbase, 0);
4002 }
4003
4004 if (address_p)
4005 {
4006 if (((TREE_CODE (ubase) == SSA_NAME)
4007 || (TREE_CODE (ubase) == ADDR_EXPR
4008 && is_gimple_min_invariant (ubase)))
4009 && (TREE_CODE (cbase) == INTEGER_CST))
4010 return -1;
4011
4012 if (((TREE_CODE (cbase) == SSA_NAME)
4013 || (TREE_CODE (cbase) == ADDR_EXPR
4014 && is_gimple_min_invariant (cbase)))
4015 && (TREE_CODE (ubase) == INTEGER_CST))
4016 return -1;
4017 }
4018
4019 if (ratio == 1)
4020 {
4021 if (operand_equal_p (ubase, cbase, 0))
4022 return -1;
4023
4024 if (TREE_CODE (ubase) == ADDR_EXPR
4025 && TREE_CODE (cbase) == ADDR_EXPR)
4026 {
4027 tree usym, csym;
4028
4029 usym = TREE_OPERAND (ubase, 0);
4030 csym = TREE_OPERAND (cbase, 0);
4031 if (TREE_CODE (usym) == ARRAY_REF)
4032 {
4033 tree ind = TREE_OPERAND (usym, 1);
4034 if (TREE_CODE (ind) == INTEGER_CST
4035 && tree_fits_shwi_p (ind)
4036 && tree_to_shwi (ind) == 0)
4037 usym = TREE_OPERAND (usym, 0);
4038 }
4039 if (TREE_CODE (csym) == ARRAY_REF)
4040 {
4041 tree ind = TREE_OPERAND (csym, 1);
4042 if (TREE_CODE (ind) == INTEGER_CST
4043 && tree_fits_shwi_p (ind)
4044 && tree_to_shwi (ind) == 0)
4045 csym = TREE_OPERAND (csym, 0);
4046 }
4047 if (operand_equal_p (usym, csym, 0))
4048 return -1;
4049 }
4050 /* Now do more complex comparison */
4051 tree_to_aff_combination (ubase, TREE_TYPE (ubase), &ubase_aff);
4052 tree_to_aff_combination (cbase, TREE_TYPE (cbase), &cbase_aff);
4053 if (compare_aff_trees (&ubase_aff, &cbase_aff))
4054 return -1;
4055 }
4056
4057 tree_to_aff_combination (ub, TREE_TYPE (ub), &ubase_aff);
4058 tree_to_aff_combination (cb, TREE_TYPE (cb), &cbase_aff);
4059
4060 aff_combination_scale (&cbase_aff, double_int::from_shwi (-1 * ratio));
4061 aff_combination_add (&ubase_aff, &cbase_aff);
4062 expr = aff_combination_to_tree (&ubase_aff);
4063 return get_expr_id (data, expr);
4064 }
4065
4066
4067
4068 /* Determines the cost of the computation by that USE is expressed
4069 from induction variable CAND. If ADDRESS_P is true, we just need
4070 to create an address from it, otherwise we want to get it into
4071 register. A set of invariants we depend on is stored in
4072 DEPENDS_ON. AT is the statement at that the value is computed.
4073 If CAN_AUTOINC is nonnull, use it to record whether autoinc
4074 addressing is likely. */
4075
4076 static comp_cost
4077 get_computation_cost_at (struct ivopts_data *data,
4078 struct iv_use *use, struct iv_cand *cand,
4079 bool address_p, bitmap *depends_on, gimple at,
4080 bool *can_autoinc,
4081 int *inv_expr_id)
4082 {
4083 tree ubase = use->iv->base, ustep = use->iv->step;
4084 tree cbase, cstep;
4085 tree utype = TREE_TYPE (ubase), ctype;
4086 unsigned HOST_WIDE_INT cstepi, offset = 0;
4087 HOST_WIDE_INT ratio, aratio;
4088 bool var_present, symbol_present, stmt_is_after_inc;
4089 comp_cost cost;
4090 double_int rat;
4091 bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4092 enum machine_mode mem_mode = (address_p
4093 ? TYPE_MODE (TREE_TYPE (*use->op_p))
4094 : VOIDmode);
4095
4096 *depends_on = NULL;
4097
4098 /* Only consider real candidates. */
4099 if (!cand->iv)
4100 return infinite_cost;
4101
4102 cbase = cand->iv->base;
4103 cstep = cand->iv->step;
4104 ctype = TREE_TYPE (cbase);
4105
4106 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4107 {
4108 /* We do not have a precision to express the values of use. */
4109 return infinite_cost;
4110 }
4111
4112 if (address_p
4113 || (use->iv->base_object
4114 && cand->iv->base_object
4115 && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4116 && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4117 {
4118 /* Do not try to express address of an object with computation based
4119 on address of a different object. This may cause problems in rtl
4120 level alias analysis (that does not expect this to be happening,
4121 as this is illegal in C), and would be unlikely to be useful
4122 anyway. */
4123 if (use->iv->base_object
4124 && cand->iv->base_object
4125 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4126 return infinite_cost;
4127 }
4128
4129 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4130 {
4131 /* TODO -- add direct handling of this case. */
4132 goto fallback;
4133 }
4134
4135 /* CSTEPI is removed from the offset in case statement is after the
4136 increment. If the step is not constant, we use zero instead.
4137 This is a bit imprecise (there is the extra addition), but
4138 redundancy elimination is likely to transform the code so that
4139 it uses value of the variable before increment anyway,
4140 so it is not that much unrealistic. */
4141 if (cst_and_fits_in_hwi (cstep))
4142 cstepi = int_cst_value (cstep);
4143 else
4144 cstepi = 0;
4145
4146 if (!constant_multiple_of (ustep, cstep, &rat))
4147 return infinite_cost;
4148
4149 if (rat.fits_shwi ())
4150 ratio = rat.to_shwi ();
4151 else
4152 return infinite_cost;
4153
4154 STRIP_NOPS (cbase);
4155 ctype = TREE_TYPE (cbase);
4156
4157 stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
4158
4159 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
4160 or ratio == 1, it is better to handle this like
4161
4162 ubase - ratio * cbase + ratio * var
4163
4164 (also holds in the case ratio == -1, TODO. */
4165
4166 if (cst_and_fits_in_hwi (cbase))
4167 {
4168 offset = - ratio * int_cst_value (cbase);
4169 cost = difference_cost (data,
4170 ubase, build_int_cst (utype, 0),
4171 &symbol_present, &var_present, &offset,
4172 depends_on);
4173 cost.cost /= avg_loop_niter (data->current_loop);
4174 }
4175 else if (ratio == 1)
4176 {
4177 tree real_cbase = cbase;
4178
4179 /* Check to see if any adjustment is needed. */
4180 if (cstepi == 0 && stmt_is_after_inc)
4181 {
4182 aff_tree real_cbase_aff;
4183 aff_tree cstep_aff;
4184
4185 tree_to_aff_combination (cbase, TREE_TYPE (real_cbase),
4186 &real_cbase_aff);
4187 tree_to_aff_combination (cstep, TREE_TYPE (cstep), &cstep_aff);
4188
4189 aff_combination_add (&real_cbase_aff, &cstep_aff);
4190 real_cbase = aff_combination_to_tree (&real_cbase_aff);
4191 }
4192
4193 cost = difference_cost (data,
4194 ubase, real_cbase,
4195 &symbol_present, &var_present, &offset,
4196 depends_on);
4197 cost.cost /= avg_loop_niter (data->current_loop);
4198 }
4199 else if (address_p
4200 && !POINTER_TYPE_P (ctype)
4201 && multiplier_allowed_in_address_p
4202 (ratio, mem_mode,
4203 TYPE_ADDR_SPACE (TREE_TYPE (utype))))
4204 {
4205 cbase
4206 = fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio));
4207 cost = difference_cost (data,
4208 ubase, cbase,
4209 &symbol_present, &var_present, &offset,
4210 depends_on);
4211 cost.cost /= avg_loop_niter (data->current_loop);
4212 }
4213 else
4214 {
4215 cost = force_var_cost (data, cbase, depends_on);
4216 cost = add_costs (cost,
4217 difference_cost (data,
4218 ubase, build_int_cst (utype, 0),
4219 &symbol_present, &var_present,
4220 &offset, depends_on));
4221 cost.cost /= avg_loop_niter (data->current_loop);
4222 cost.cost += add_cost (data->speed, TYPE_MODE (ctype));
4223 }
4224
4225 if (inv_expr_id)
4226 {
4227 *inv_expr_id =
4228 get_loop_invariant_expr_id (data, ubase, cbase, ratio, address_p);
4229 /* Clear depends on. */
4230 if (*inv_expr_id != -1 && depends_on && *depends_on)
4231 bitmap_clear (*depends_on);
4232 }
4233
4234 /* If we are after the increment, the value of the candidate is higher by
4235 one iteration. */
4236 if (stmt_is_after_inc)
4237 offset -= ratio * cstepi;
4238
4239 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4240 (symbol/var1/const parts may be omitted). If we are looking for an
4241 address, find the cost of addressing this. */
4242 if (address_p)
4243 return add_costs (cost,
4244 get_address_cost (symbol_present, var_present,
4245 offset, ratio, cstepi,
4246 mem_mode,
4247 TYPE_ADDR_SPACE (TREE_TYPE (utype)),
4248 speed, stmt_is_after_inc,
4249 can_autoinc));
4250
4251 /* Otherwise estimate the costs for computing the expression. */
4252 if (!symbol_present && !var_present && !offset)
4253 {
4254 if (ratio != 1)
4255 cost.cost += mult_by_coeff_cost (ratio, TYPE_MODE (ctype), speed);
4256 return cost;
4257 }
4258
4259 /* Symbol + offset should be compile-time computable so consider that they
4260 are added once to the variable, if present. */
4261 if (var_present && (symbol_present || offset))
4262 cost.cost += adjust_setup_cost (data,
4263 add_cost (speed, TYPE_MODE (ctype)));
4264
4265 /* Having offset does not affect runtime cost in case it is added to
4266 symbol, but it increases complexity. */
4267 if (offset)
4268 cost.complexity++;
4269
4270 cost.cost += add_cost (speed, TYPE_MODE (ctype));
4271
4272 aratio = ratio > 0 ? ratio : -ratio;
4273 if (aratio != 1)
4274 cost.cost += mult_by_coeff_cost (aratio, TYPE_MODE (ctype), speed);
4275 return cost;
4276
4277 fallback:
4278 if (can_autoinc)
4279 *can_autoinc = false;
4280
4281 {
4282 /* Just get the expression, expand it and measure the cost. */
4283 tree comp = get_computation_at (data->current_loop, use, cand, at);
4284
4285 if (!comp)
4286 return infinite_cost;
4287
4288 if (address_p)
4289 comp = build_simple_mem_ref (comp);
4290
4291 return new_cost (computation_cost (comp, speed), 0);
4292 }
4293 }
4294
4295 /* Determines the cost of the computation by that USE is expressed
4296 from induction variable CAND. If ADDRESS_P is true, we just need
4297 to create an address from it, otherwise we want to get it into
4298 register. A set of invariants we depend on is stored in
4299 DEPENDS_ON. If CAN_AUTOINC is nonnull, use it to record whether
4300 autoinc addressing is likely. */
4301
4302 static comp_cost
4303 get_computation_cost (struct ivopts_data *data,
4304 struct iv_use *use, struct iv_cand *cand,
4305 bool address_p, bitmap *depends_on,
4306 bool *can_autoinc, int *inv_expr_id)
4307 {
4308 return get_computation_cost_at (data,
4309 use, cand, address_p, depends_on, use->stmt,
4310 can_autoinc, inv_expr_id);
4311 }
4312
4313 /* Determines cost of basing replacement of USE on CAND in a generic
4314 expression. */
4315
4316 static bool
4317 determine_use_iv_cost_generic (struct ivopts_data *data,
4318 struct iv_use *use, struct iv_cand *cand)
4319 {
4320 bitmap depends_on;
4321 comp_cost cost;
4322 int inv_expr_id = -1;
4323
4324 /* The simple case first -- if we need to express value of the preserved
4325 original biv, the cost is 0. This also prevents us from counting the
4326 cost of increment twice -- once at this use and once in the cost of
4327 the candidate. */
4328 if (cand->pos == IP_ORIGINAL
4329 && cand->incremented_at == use->stmt)
4330 {
4331 set_use_iv_cost (data, use, cand, no_cost, NULL, NULL_TREE,
4332 ERROR_MARK, -1);
4333 return true;
4334 }
4335
4336 cost = get_computation_cost (data, use, cand, false, &depends_on,
4337 NULL, &inv_expr_id);
4338
4339 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4340 inv_expr_id);
4341
4342 return !infinite_cost_p (cost);
4343 }
4344
4345 /* Determines cost of basing replacement of USE on CAND in an address. */
4346
4347 static bool
4348 determine_use_iv_cost_address (struct ivopts_data *data,
4349 struct iv_use *use, struct iv_cand *cand)
4350 {
4351 bitmap depends_on;
4352 bool can_autoinc;
4353 int inv_expr_id = -1;
4354 comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
4355 &can_autoinc, &inv_expr_id);
4356
4357 if (cand->ainc_use == use)
4358 {
4359 if (can_autoinc)
4360 cost.cost -= cand->cost_step;
4361 /* If we generated the candidate solely for exploiting autoincrement
4362 opportunities, and it turns out it can't be used, set the cost to
4363 infinity to make sure we ignore it. */
4364 else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
4365 cost = infinite_cost;
4366 }
4367 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4368 inv_expr_id);
4369
4370 return !infinite_cost_p (cost);
4371 }
4372
4373 /* Computes value of candidate CAND at position AT in iteration NITER, and
4374 stores it to VAL. */
4375
4376 static void
4377 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple at, tree niter,
4378 aff_tree *val)
4379 {
4380 aff_tree step, delta, nit;
4381 struct iv *iv = cand->iv;
4382 tree type = TREE_TYPE (iv->base);
4383 tree steptype = type;
4384 if (POINTER_TYPE_P (type))
4385 steptype = sizetype;
4386
4387 tree_to_aff_combination (iv->step, steptype, &step);
4388 tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
4389 aff_combination_convert (&nit, steptype);
4390 aff_combination_mult (&nit, &step, &delta);
4391 if (stmt_after_increment (loop, cand, at))
4392 aff_combination_add (&delta, &step);
4393
4394 tree_to_aff_combination (iv->base, type, val);
4395 aff_combination_add (val, &delta);
4396 }
4397
4398 /* Returns period of induction variable iv. */
4399
4400 static tree
4401 iv_period (struct iv *iv)
4402 {
4403 tree step = iv->step, period, type;
4404 tree pow2div;
4405
4406 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4407
4408 type = unsigned_type_for (TREE_TYPE (step));
4409 /* Period of the iv is lcm (step, type_range)/step -1,
4410 i.e., N*type_range/step - 1. Since type range is power
4411 of two, N == (step >> num_of_ending_zeros_binary (step),
4412 so the final result is
4413
4414 (type_range >> num_of_ending_zeros_binary (step)) - 1
4415
4416 */
4417 pow2div = num_ending_zeros (step);
4418
4419 period = build_low_bits_mask (type,
4420 (TYPE_PRECISION (type)
4421 - tree_to_uhwi (pow2div)));
4422
4423 return period;
4424 }
4425
4426 /* Returns the comparison operator used when eliminating the iv USE. */
4427
4428 static enum tree_code
4429 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4430 {
4431 struct loop *loop = data->current_loop;
4432 basic_block ex_bb;
4433 edge exit;
4434
4435 ex_bb = gimple_bb (use->stmt);
4436 exit = EDGE_SUCC (ex_bb, 0);
4437 if (flow_bb_inside_loop_p (loop, exit->dest))
4438 exit = EDGE_SUCC (ex_bb, 1);
4439
4440 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4441 }
4442
4443 static tree
4444 strip_wrap_conserving_type_conversions (tree exp)
4445 {
4446 while (tree_ssa_useless_type_conversion (exp)
4447 && (nowrap_type_p (TREE_TYPE (exp))
4448 == nowrap_type_p (TREE_TYPE (TREE_OPERAND (exp, 0)))))
4449 exp = TREE_OPERAND (exp, 0);
4450 return exp;
4451 }
4452
4453 /* Walk the SSA form and check whether E == WHAT. Fairly simplistic, we
4454 check for an exact match. */
4455
4456 static bool
4457 expr_equal_p (tree e, tree what)
4458 {
4459 gimple stmt;
4460 enum tree_code code;
4461
4462 e = strip_wrap_conserving_type_conversions (e);
4463 what = strip_wrap_conserving_type_conversions (what);
4464
4465 code = TREE_CODE (what);
4466 if (TREE_TYPE (e) != TREE_TYPE (what))
4467 return false;
4468
4469 if (operand_equal_p (e, what, 0))
4470 return true;
4471
4472 if (TREE_CODE (e) != SSA_NAME)
4473 return false;
4474
4475 stmt = SSA_NAME_DEF_STMT (e);
4476 if (gimple_code (stmt) != GIMPLE_ASSIGN
4477 || gimple_assign_rhs_code (stmt) != code)
4478 return false;
4479
4480 switch (get_gimple_rhs_class (code))
4481 {
4482 case GIMPLE_BINARY_RHS:
4483 if (!expr_equal_p (gimple_assign_rhs2 (stmt), TREE_OPERAND (what, 1)))
4484 return false;
4485 /* Fallthru. */
4486
4487 case GIMPLE_UNARY_RHS:
4488 case GIMPLE_SINGLE_RHS:
4489 return expr_equal_p (gimple_assign_rhs1 (stmt), TREE_OPERAND (what, 0));
4490 default:
4491 return false;
4492 }
4493 }
4494
4495 /* Returns true if we can prove that BASE - OFFSET does not overflow. For now,
4496 we only detect the situation that BASE = SOMETHING + OFFSET, where the
4497 calculation is performed in non-wrapping type.
4498
4499 TODO: More generally, we could test for the situation that
4500 BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
4501 This would require knowing the sign of OFFSET.
4502
4503 Also, we only look for the first addition in the computation of BASE.
4504 More complex analysis would be better, but introducing it just for
4505 this optimization seems like an overkill. */
4506
4507 static bool
4508 difference_cannot_overflow_p (tree base, tree offset)
4509 {
4510 enum tree_code code;
4511 tree e1, e2;
4512
4513 if (!nowrap_type_p (TREE_TYPE (base)))
4514 return false;
4515
4516 base = expand_simple_operations (base);
4517
4518 if (TREE_CODE (base) == SSA_NAME)
4519 {
4520 gimple stmt = SSA_NAME_DEF_STMT (base);
4521
4522 if (gimple_code (stmt) != GIMPLE_ASSIGN)
4523 return false;
4524
4525 code = gimple_assign_rhs_code (stmt);
4526 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4527 return false;
4528
4529 e1 = gimple_assign_rhs1 (stmt);
4530 e2 = gimple_assign_rhs2 (stmt);
4531 }
4532 else
4533 {
4534 code = TREE_CODE (base);
4535 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4536 return false;
4537 e1 = TREE_OPERAND (base, 0);
4538 e2 = TREE_OPERAND (base, 1);
4539 }
4540
4541 /* TODO: deeper inspection may be necessary to prove the equality. */
4542 switch (code)
4543 {
4544 case PLUS_EXPR:
4545 return expr_equal_p (e1, offset) || expr_equal_p (e2, offset);
4546 case POINTER_PLUS_EXPR:
4547 return expr_equal_p (e2, offset);
4548
4549 default:
4550 return false;
4551 }
4552 }
4553
4554 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
4555 comparison with CAND. NITER describes the number of iterations of
4556 the loops. If successful, the comparison in COMP_P is altered accordingly.
4557
4558 We aim to handle the following situation:
4559
4560 sometype *base, *p;
4561 int a, b, i;
4562
4563 i = a;
4564 p = p_0 = base + a;
4565
4566 do
4567 {
4568 bla (*p);
4569 p++;
4570 i++;
4571 }
4572 while (i < b);
4573
4574 Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
4575 We aim to optimize this to
4576
4577 p = p_0 = base + a;
4578 do
4579 {
4580 bla (*p);
4581 p++;
4582 }
4583 while (p < p_0 - a + b);
4584
4585 This preserves the correctness, since the pointer arithmetics does not
4586 overflow. More precisely:
4587
4588 1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
4589 overflow in computing it or the values of p.
4590 2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
4591 overflow. To prove this, we use the fact that p_0 = base + a. */
4592
4593 static bool
4594 iv_elimination_compare_lt (struct ivopts_data *data,
4595 struct iv_cand *cand, enum tree_code *comp_p,
4596 struct tree_niter_desc *niter)
4597 {
4598 tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
4599 struct aff_tree nit, tmpa, tmpb;
4600 enum tree_code comp;
4601 HOST_WIDE_INT step;
4602
4603 /* We need to know that the candidate induction variable does not overflow.
4604 While more complex analysis may be used to prove this, for now just
4605 check that the variable appears in the original program and that it
4606 is computed in a type that guarantees no overflows. */
4607 cand_type = TREE_TYPE (cand->iv->base);
4608 if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
4609 return false;
4610
4611 /* Make sure that the loop iterates till the loop bound is hit, as otherwise
4612 the calculation of the BOUND could overflow, making the comparison
4613 invalid. */
4614 if (!data->loop_single_exit_p)
4615 return false;
4616
4617 /* We need to be able to decide whether candidate is increasing or decreasing
4618 in order to choose the right comparison operator. */
4619 if (!cst_and_fits_in_hwi (cand->iv->step))
4620 return false;
4621 step = int_cst_value (cand->iv->step);
4622
4623 /* Check that the number of iterations matches the expected pattern:
4624 a + 1 > b ? 0 : b - a - 1. */
4625 mbz = niter->may_be_zero;
4626 if (TREE_CODE (mbz) == GT_EXPR)
4627 {
4628 /* Handle a + 1 > b. */
4629 tree op0 = TREE_OPERAND (mbz, 0);
4630 if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
4631 {
4632 a = TREE_OPERAND (op0, 0);
4633 b = TREE_OPERAND (mbz, 1);
4634 }
4635 else
4636 return false;
4637 }
4638 else if (TREE_CODE (mbz) == LT_EXPR)
4639 {
4640 tree op1 = TREE_OPERAND (mbz, 1);
4641
4642 /* Handle b < a + 1. */
4643 if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
4644 {
4645 a = TREE_OPERAND (op1, 0);
4646 b = TREE_OPERAND (mbz, 0);
4647 }
4648 else
4649 return false;
4650 }
4651 else
4652 return false;
4653
4654 /* Expected number of iterations is B - A - 1. Check that it matches
4655 the actual number, i.e., that B - A - NITER = 1. */
4656 tree_to_aff_combination (niter->niter, nit_type, &nit);
4657 tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
4658 tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
4659 aff_combination_scale (&nit, double_int_minus_one);
4660 aff_combination_scale (&tmpa, double_int_minus_one);
4661 aff_combination_add (&tmpb, &tmpa);
4662 aff_combination_add (&tmpb, &nit);
4663 if (tmpb.n != 0 || tmpb.offset != double_int_one)
4664 return false;
4665
4666 /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
4667 overflow. */
4668 offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
4669 cand->iv->step,
4670 fold_convert (TREE_TYPE (cand->iv->step), a));
4671 if (!difference_cannot_overflow_p (cand->iv->base, offset))
4672 return false;
4673
4674 /* Determine the new comparison operator. */
4675 comp = step < 0 ? GT_EXPR : LT_EXPR;
4676 if (*comp_p == NE_EXPR)
4677 *comp_p = comp;
4678 else if (*comp_p == EQ_EXPR)
4679 *comp_p = invert_tree_comparison (comp, false);
4680 else
4681 gcc_unreachable ();
4682
4683 return true;
4684 }
4685
4686 /* Check whether it is possible to express the condition in USE by comparison
4687 of candidate CAND. If so, store the value compared with to BOUND, and the
4688 comparison operator to COMP. */
4689
4690 static bool
4691 may_eliminate_iv (struct ivopts_data *data,
4692 struct iv_use *use, struct iv_cand *cand, tree *bound,
4693 enum tree_code *comp)
4694 {
4695 basic_block ex_bb;
4696 edge exit;
4697 tree period;
4698 struct loop *loop = data->current_loop;
4699 aff_tree bnd;
4700 struct tree_niter_desc *desc = NULL;
4701
4702 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
4703 return false;
4704
4705 /* For now works only for exits that dominate the loop latch.
4706 TODO: extend to other conditions inside loop body. */
4707 ex_bb = gimple_bb (use->stmt);
4708 if (use->stmt != last_stmt (ex_bb)
4709 || gimple_code (use->stmt) != GIMPLE_COND
4710 || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
4711 return false;
4712
4713 exit = EDGE_SUCC (ex_bb, 0);
4714 if (flow_bb_inside_loop_p (loop, exit->dest))
4715 exit = EDGE_SUCC (ex_bb, 1);
4716 if (flow_bb_inside_loop_p (loop, exit->dest))
4717 return false;
4718
4719 desc = niter_for_exit (data, exit);
4720 if (!desc)
4721 return false;
4722
4723 /* Determine whether we can use the variable to test the exit condition.
4724 This is the case iff the period of the induction variable is greater
4725 than the number of iterations for which the exit condition is true. */
4726 period = iv_period (cand->iv);
4727
4728 /* If the number of iterations is constant, compare against it directly. */
4729 if (TREE_CODE (desc->niter) == INTEGER_CST)
4730 {
4731 /* See cand_value_at. */
4732 if (stmt_after_increment (loop, cand, use->stmt))
4733 {
4734 if (!tree_int_cst_lt (desc->niter, period))
4735 return false;
4736 }
4737 else
4738 {
4739 if (tree_int_cst_lt (period, desc->niter))
4740 return false;
4741 }
4742 }
4743
4744 /* If not, and if this is the only possible exit of the loop, see whether
4745 we can get a conservative estimate on the number of iterations of the
4746 entire loop and compare against that instead. */
4747 else
4748 {
4749 double_int period_value, max_niter;
4750
4751 max_niter = desc->max;
4752 if (stmt_after_increment (loop, cand, use->stmt))
4753 max_niter += double_int_one;
4754 period_value = tree_to_double_int (period);
4755 if (max_niter.ugt (period_value))
4756 {
4757 /* See if we can take advantage of inferred loop bound information. */
4758 if (data->loop_single_exit_p)
4759 {
4760 if (!max_loop_iterations (loop, &max_niter))
4761 return false;
4762 /* The loop bound is already adjusted by adding 1. */
4763 if (max_niter.ugt (period_value))
4764 return false;
4765 }
4766 else
4767 return false;
4768 }
4769 }
4770
4771 cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
4772
4773 *bound = aff_combination_to_tree (&bnd);
4774 *comp = iv_elimination_compare (data, use);
4775
4776 /* It is unlikely that computing the number of iterations using division
4777 would be more profitable than keeping the original induction variable. */
4778 if (expression_expensive_p (*bound))
4779 return false;
4780
4781 /* Sometimes, it is possible to handle the situation that the number of
4782 iterations may be zero unless additional assumtions by using <
4783 instead of != in the exit condition.
4784
4785 TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
4786 base the exit condition on it. However, that is often too
4787 expensive. */
4788 if (!integer_zerop (desc->may_be_zero))
4789 return iv_elimination_compare_lt (data, cand, comp, desc);
4790
4791 return true;
4792 }
4793
4794 /* Calculates the cost of BOUND, if it is a PARM_DECL. A PARM_DECL must
4795 be copied, if is is used in the loop body and DATA->body_includes_call. */
4796
4797 static int
4798 parm_decl_cost (struct ivopts_data *data, tree bound)
4799 {
4800 tree sbound = bound;
4801 STRIP_NOPS (sbound);
4802
4803 if (TREE_CODE (sbound) == SSA_NAME
4804 && SSA_NAME_IS_DEFAULT_DEF (sbound)
4805 && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
4806 && data->body_includes_call)
4807 return COSTS_N_INSNS (1);
4808
4809 return 0;
4810 }
4811
4812 /* Determines cost of basing replacement of USE on CAND in a condition. */
4813
4814 static bool
4815 determine_use_iv_cost_condition (struct ivopts_data *data,
4816 struct iv_use *use, struct iv_cand *cand)
4817 {
4818 tree bound = NULL_TREE;
4819 struct iv *cmp_iv;
4820 bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
4821 comp_cost elim_cost, express_cost, cost, bound_cost;
4822 bool ok;
4823 int elim_inv_expr_id = -1, express_inv_expr_id = -1, inv_expr_id;
4824 tree *control_var, *bound_cst;
4825 enum tree_code comp = ERROR_MARK;
4826
4827 /* Only consider real candidates. */
4828 if (!cand->iv)
4829 {
4830 set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE,
4831 ERROR_MARK, -1);
4832 return false;
4833 }
4834
4835 /* Try iv elimination. */
4836 if (may_eliminate_iv (data, use, cand, &bound, &comp))
4837 {
4838 elim_cost = force_var_cost (data, bound, &depends_on_elim);
4839 if (elim_cost.cost == 0)
4840 elim_cost.cost = parm_decl_cost (data, bound);
4841 else if (TREE_CODE (bound) == INTEGER_CST)
4842 elim_cost.cost = 0;
4843 /* If we replace a loop condition 'i < n' with 'p < base + n',
4844 depends_on_elim will have 'base' and 'n' set, which implies
4845 that both 'base' and 'n' will be live during the loop. More likely,
4846 'base + n' will be loop invariant, resulting in only one live value
4847 during the loop. So in that case we clear depends_on_elim and set
4848 elim_inv_expr_id instead. */
4849 if (depends_on_elim && bitmap_count_bits (depends_on_elim) > 1)
4850 {
4851 elim_inv_expr_id = get_expr_id (data, bound);
4852 bitmap_clear (depends_on_elim);
4853 }
4854 /* The bound is a loop invariant, so it will be only computed
4855 once. */
4856 elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
4857 }
4858 else
4859 elim_cost = infinite_cost;
4860
4861 /* Try expressing the original giv. If it is compared with an invariant,
4862 note that we cannot get rid of it. */
4863 ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
4864 NULL, &cmp_iv);
4865 gcc_assert (ok);
4866
4867 /* When the condition is a comparison of the candidate IV against
4868 zero, prefer this IV.
4869
4870 TODO: The constant that we're subtracting from the cost should
4871 be target-dependent. This information should be added to the
4872 target costs for each backend. */
4873 if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */
4874 && integer_zerop (*bound_cst)
4875 && (operand_equal_p (*control_var, cand->var_after, 0)
4876 || operand_equal_p (*control_var, cand->var_before, 0)))
4877 elim_cost.cost -= 1;
4878
4879 express_cost = get_computation_cost (data, use, cand, false,
4880 &depends_on_express, NULL,
4881 &express_inv_expr_id);
4882 fd_ivopts_data = data;
4883 walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
4884
4885 /* Count the cost of the original bound as well. */
4886 bound_cost = force_var_cost (data, *bound_cst, NULL);
4887 if (bound_cost.cost == 0)
4888 bound_cost.cost = parm_decl_cost (data, *bound_cst);
4889 else if (TREE_CODE (*bound_cst) == INTEGER_CST)
4890 bound_cost.cost = 0;
4891 express_cost.cost += bound_cost.cost;
4892
4893 /* Choose the better approach, preferring the eliminated IV. */
4894 if (compare_costs (elim_cost, express_cost) <= 0)
4895 {
4896 cost = elim_cost;
4897 depends_on = depends_on_elim;
4898 depends_on_elim = NULL;
4899 inv_expr_id = elim_inv_expr_id;
4900 }
4901 else
4902 {
4903 cost = express_cost;
4904 depends_on = depends_on_express;
4905 depends_on_express = NULL;
4906 bound = NULL_TREE;
4907 comp = ERROR_MARK;
4908 inv_expr_id = express_inv_expr_id;
4909 }
4910
4911 set_use_iv_cost (data, use, cand, cost, depends_on, bound, comp, inv_expr_id);
4912
4913 if (depends_on_elim)
4914 BITMAP_FREE (depends_on_elim);
4915 if (depends_on_express)
4916 BITMAP_FREE (depends_on_express);
4917
4918 return !infinite_cost_p (cost);
4919 }
4920
4921 /* Determines cost of basing replacement of USE on CAND. Returns false
4922 if USE cannot be based on CAND. */
4923
4924 static bool
4925 determine_use_iv_cost (struct ivopts_data *data,
4926 struct iv_use *use, struct iv_cand *cand)
4927 {
4928 switch (use->type)
4929 {
4930 case USE_NONLINEAR_EXPR:
4931 return determine_use_iv_cost_generic (data, use, cand);
4932
4933 case USE_ADDRESS:
4934 return determine_use_iv_cost_address (data, use, cand);
4935
4936 case USE_COMPARE:
4937 return determine_use_iv_cost_condition (data, use, cand);
4938
4939 default:
4940 gcc_unreachable ();
4941 }
4942 }
4943
4944 /* Return true if get_computation_cost indicates that autoincrement is
4945 a possibility for the pair of USE and CAND, false otherwise. */
4946
4947 static bool
4948 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
4949 struct iv_cand *cand)
4950 {
4951 bitmap depends_on;
4952 bool can_autoinc;
4953 comp_cost cost;
4954
4955 if (use->type != USE_ADDRESS)
4956 return false;
4957
4958 cost = get_computation_cost (data, use, cand, true, &depends_on,
4959 &can_autoinc, NULL);
4960
4961 BITMAP_FREE (depends_on);
4962
4963 return !infinite_cost_p (cost) && can_autoinc;
4964 }
4965
4966 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
4967 use that allows autoincrement, and set their AINC_USE if possible. */
4968
4969 static void
4970 set_autoinc_for_original_candidates (struct ivopts_data *data)
4971 {
4972 unsigned i, j;
4973
4974 for (i = 0; i < n_iv_cands (data); i++)
4975 {
4976 struct iv_cand *cand = iv_cand (data, i);
4977 struct iv_use *closest_before = NULL;
4978 struct iv_use *closest_after = NULL;
4979 if (cand->pos != IP_ORIGINAL)
4980 continue;
4981
4982 for (j = 0; j < n_iv_uses (data); j++)
4983 {
4984 struct iv_use *use = iv_use (data, j);
4985 unsigned uid = gimple_uid (use->stmt);
4986
4987 if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
4988 continue;
4989
4990 if (uid < gimple_uid (cand->incremented_at)
4991 && (closest_before == NULL
4992 || uid > gimple_uid (closest_before->stmt)))
4993 closest_before = use;
4994
4995 if (uid > gimple_uid (cand->incremented_at)
4996 && (closest_after == NULL
4997 || uid < gimple_uid (closest_after->stmt)))
4998 closest_after = use;
4999 }
5000
5001 if (closest_before != NULL
5002 && autoinc_possible_for_pair (data, closest_before, cand))
5003 cand->ainc_use = closest_before;
5004 else if (closest_after != NULL
5005 && autoinc_possible_for_pair (data, closest_after, cand))
5006 cand->ainc_use = closest_after;
5007 }
5008 }
5009
5010 /* Finds the candidates for the induction variables. */
5011
5012 static void
5013 find_iv_candidates (struct ivopts_data *data)
5014 {
5015 /* Add commonly used ivs. */
5016 add_standard_iv_candidates (data);
5017
5018 /* Add old induction variables. */
5019 add_old_ivs_candidates (data);
5020
5021 /* Add induction variables derived from uses. */
5022 add_derived_ivs_candidates (data);
5023
5024 set_autoinc_for_original_candidates (data);
5025
5026 /* Record the important candidates. */
5027 record_important_candidates (data);
5028 }
5029
5030 /* Determines costs of basing the use of the iv on an iv candidate. */
5031
5032 static void
5033 determine_use_iv_costs (struct ivopts_data *data)
5034 {
5035 unsigned i, j;
5036 struct iv_use *use;
5037 struct iv_cand *cand;
5038 bitmap to_clear = BITMAP_ALLOC (NULL);
5039
5040 alloc_use_cost_map (data);
5041
5042 for (i = 0; i < n_iv_uses (data); i++)
5043 {
5044 use = iv_use (data, i);
5045
5046 if (data->consider_all_candidates)
5047 {
5048 for (j = 0; j < n_iv_cands (data); j++)
5049 {
5050 cand = iv_cand (data, j);
5051 determine_use_iv_cost (data, use, cand);
5052 }
5053 }
5054 else
5055 {
5056 bitmap_iterator bi;
5057
5058 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
5059 {
5060 cand = iv_cand (data, j);
5061 if (!determine_use_iv_cost (data, use, cand))
5062 bitmap_set_bit (to_clear, j);
5063 }
5064
5065 /* Remove the candidates for that the cost is infinite from
5066 the list of related candidates. */
5067 bitmap_and_compl_into (use->related_cands, to_clear);
5068 bitmap_clear (to_clear);
5069 }
5070 }
5071
5072 BITMAP_FREE (to_clear);
5073
5074 if (dump_file && (dump_flags & TDF_DETAILS))
5075 {
5076 fprintf (dump_file, "Use-candidate costs:\n");
5077
5078 for (i = 0; i < n_iv_uses (data); i++)
5079 {
5080 use = iv_use (data, i);
5081
5082 fprintf (dump_file, "Use %d:\n", i);
5083 fprintf (dump_file, " cand\tcost\tcompl.\tdepends on\n");
5084 for (j = 0; j < use->n_map_members; j++)
5085 {
5086 if (!use->cost_map[j].cand
5087 || infinite_cost_p (use->cost_map[j].cost))
5088 continue;
5089
5090 fprintf (dump_file, " %d\t%d\t%d\t",
5091 use->cost_map[j].cand->id,
5092 use->cost_map[j].cost.cost,
5093 use->cost_map[j].cost.complexity);
5094 if (use->cost_map[j].depends_on)
5095 bitmap_print (dump_file,
5096 use->cost_map[j].depends_on, "","");
5097 if (use->cost_map[j].inv_expr_id != -1)
5098 fprintf (dump_file, " inv_expr:%d", use->cost_map[j].inv_expr_id);
5099 fprintf (dump_file, "\n");
5100 }
5101
5102 fprintf (dump_file, "\n");
5103 }
5104 fprintf (dump_file, "\n");
5105 }
5106 }
5107
5108 /* Determines cost of the candidate CAND. */
5109
5110 static void
5111 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5112 {
5113 comp_cost cost_base;
5114 unsigned cost, cost_step;
5115 tree base;
5116
5117 if (!cand->iv)
5118 {
5119 cand->cost = 0;
5120 return;
5121 }
5122
5123 /* There are two costs associated with the candidate -- its increment
5124 and its initialization. The second is almost negligible for any loop
5125 that rolls enough, so we take it just very little into account. */
5126
5127 base = cand->iv->base;
5128 cost_base = force_var_cost (data, base, NULL);
5129 /* It will be exceptional that the iv register happens to be initialized with
5130 the proper value at no cost. In general, there will at least be a regcopy
5131 or a const set. */
5132 if (cost_base.cost == 0)
5133 cost_base.cost = COSTS_N_INSNS (1);
5134 cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
5135
5136 cost = cost_step + adjust_setup_cost (data, cost_base.cost);
5137
5138 /* Prefer the original ivs unless we may gain something by replacing it.
5139 The reason is to make debugging simpler; so this is not relevant for
5140 artificial ivs created by other optimization passes. */
5141 if (cand->pos != IP_ORIGINAL
5142 || !SSA_NAME_VAR (cand->var_before)
5143 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
5144 cost++;
5145
5146 /* Prefer not to insert statements into latch unless there are some
5147 already (so that we do not create unnecessary jumps). */
5148 if (cand->pos == IP_END
5149 && empty_block_p (ip_end_pos (data->current_loop)))
5150 cost++;
5151
5152 cand->cost = cost;
5153 cand->cost_step = cost_step;
5154 }
5155
5156 /* Determines costs of computation of the candidates. */
5157
5158 static void
5159 determine_iv_costs (struct ivopts_data *data)
5160 {
5161 unsigned i;
5162
5163 if (dump_file && (dump_flags & TDF_DETAILS))
5164 {
5165 fprintf (dump_file, "Candidate costs:\n");
5166 fprintf (dump_file, " cand\tcost\n");
5167 }
5168
5169 for (i = 0; i < n_iv_cands (data); i++)
5170 {
5171 struct iv_cand *cand = iv_cand (data, i);
5172
5173 determine_iv_cost (data, cand);
5174
5175 if (dump_file && (dump_flags & TDF_DETAILS))
5176 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
5177 }
5178
5179 if (dump_file && (dump_flags & TDF_DETAILS))
5180 fprintf (dump_file, "\n");
5181 }
5182
5183 /* Calculates cost for having SIZE induction variables. */
5184
5185 static unsigned
5186 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
5187 {
5188 /* We add size to the cost, so that we prefer eliminating ivs
5189 if possible. */
5190 return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed,
5191 data->body_includes_call);
5192 }
5193
5194 /* For each size of the induction variable set determine the penalty. */
5195
5196 static void
5197 determine_set_costs (struct ivopts_data *data)
5198 {
5199 unsigned j, n;
5200 gimple phi;
5201 gimple_stmt_iterator psi;
5202 tree op;
5203 struct loop *loop = data->current_loop;
5204 bitmap_iterator bi;
5205
5206 if (dump_file && (dump_flags & TDF_DETAILS))
5207 {
5208 fprintf (dump_file, "Global costs:\n");
5209 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
5210 fprintf (dump_file, " target_clobbered_regs %d\n", target_clobbered_regs);
5211 fprintf (dump_file, " target_reg_cost %d\n", target_reg_cost[data->speed]);
5212 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost[data->speed]);
5213 }
5214
5215 n = 0;
5216 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
5217 {
5218 phi = gsi_stmt (psi);
5219 op = PHI_RESULT (phi);
5220
5221 if (virtual_operand_p (op))
5222 continue;
5223
5224 if (get_iv (data, op))
5225 continue;
5226
5227 n++;
5228 }
5229
5230 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5231 {
5232 struct version_info *info = ver_info (data, j);
5233
5234 if (info->inv_id && info->has_nonlin_use)
5235 n++;
5236 }
5237
5238 data->regs_used = n;
5239 if (dump_file && (dump_flags & TDF_DETAILS))
5240 fprintf (dump_file, " regs_used %d\n", n);
5241
5242 if (dump_file && (dump_flags & TDF_DETAILS))
5243 {
5244 fprintf (dump_file, " cost for size:\n");
5245 fprintf (dump_file, " ivs\tcost\n");
5246 for (j = 0; j <= 2 * target_avail_regs; j++)
5247 fprintf (dump_file, " %d\t%d\n", j,
5248 ivopts_global_cost_for_size (data, j));
5249 fprintf (dump_file, "\n");
5250 }
5251 }
5252
5253 /* Returns true if A is a cheaper cost pair than B. */
5254
5255 static bool
5256 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
5257 {
5258 int cmp;
5259
5260 if (!a)
5261 return false;
5262
5263 if (!b)
5264 return true;
5265
5266 cmp = compare_costs (a->cost, b->cost);
5267 if (cmp < 0)
5268 return true;
5269
5270 if (cmp > 0)
5271 return false;
5272
5273 /* In case the costs are the same, prefer the cheaper candidate. */
5274 if (a->cand->cost < b->cand->cost)
5275 return true;
5276
5277 return false;
5278 }
5279
5280
5281 /* Returns candidate by that USE is expressed in IVS. */
5282
5283 static struct cost_pair *
5284 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
5285 {
5286 return ivs->cand_for_use[use->id];
5287 }
5288
5289 /* Computes the cost field of IVS structure. */
5290
5291 static void
5292 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
5293 {
5294 comp_cost cost = ivs->cand_use_cost;
5295
5296 cost.cost += ivs->cand_cost;
5297
5298 cost.cost += ivopts_global_cost_for_size (data,
5299 ivs->n_regs + ivs->num_used_inv_expr);
5300
5301 ivs->cost = cost;
5302 }
5303
5304 /* Remove invariants in set INVS to set IVS. */
5305
5306 static void
5307 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
5308 {
5309 bitmap_iterator bi;
5310 unsigned iid;
5311
5312 if (!invs)
5313 return;
5314
5315 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5316 {
5317 ivs->n_invariant_uses[iid]--;
5318 if (ivs->n_invariant_uses[iid] == 0)
5319 ivs->n_regs--;
5320 }
5321 }
5322
5323 /* Set USE not to be expressed by any candidate in IVS. */
5324
5325 static void
5326 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
5327 struct iv_use *use)
5328 {
5329 unsigned uid = use->id, cid;
5330 struct cost_pair *cp;
5331
5332 cp = ivs->cand_for_use[uid];
5333 if (!cp)
5334 return;
5335 cid = cp->cand->id;
5336
5337 ivs->bad_uses++;
5338 ivs->cand_for_use[uid] = NULL;
5339 ivs->n_cand_uses[cid]--;
5340
5341 if (ivs->n_cand_uses[cid] == 0)
5342 {
5343 bitmap_clear_bit (ivs->cands, cid);
5344 /* Do not count the pseudocandidates. */
5345 if (cp->cand->iv)
5346 ivs->n_regs--;
5347 ivs->n_cands--;
5348 ivs->cand_cost -= cp->cand->cost;
5349
5350 iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
5351 }
5352
5353 ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
5354
5355 iv_ca_set_remove_invariants (ivs, cp->depends_on);
5356
5357 if (cp->inv_expr_id != -1)
5358 {
5359 ivs->used_inv_expr[cp->inv_expr_id]--;
5360 if (ivs->used_inv_expr[cp->inv_expr_id] == 0)
5361 ivs->num_used_inv_expr--;
5362 }
5363 iv_ca_recount_cost (data, ivs);
5364 }
5365
5366 /* Add invariants in set INVS to set IVS. */
5367
5368 static void
5369 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
5370 {
5371 bitmap_iterator bi;
5372 unsigned iid;
5373
5374 if (!invs)
5375 return;
5376
5377 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5378 {
5379 ivs->n_invariant_uses[iid]++;
5380 if (ivs->n_invariant_uses[iid] == 1)
5381 ivs->n_regs++;
5382 }
5383 }
5384
5385 /* Set cost pair for USE in set IVS to CP. */
5386
5387 static void
5388 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
5389 struct iv_use *use, struct cost_pair *cp)
5390 {
5391 unsigned uid = use->id, cid;
5392
5393 if (ivs->cand_for_use[uid] == cp)
5394 return;
5395
5396 if (ivs->cand_for_use[uid])
5397 iv_ca_set_no_cp (data, ivs, use);
5398
5399 if (cp)
5400 {
5401 cid = cp->cand->id;
5402
5403 ivs->bad_uses--;
5404 ivs->cand_for_use[uid] = cp;
5405 ivs->n_cand_uses[cid]++;
5406 if (ivs->n_cand_uses[cid] == 1)
5407 {
5408 bitmap_set_bit (ivs->cands, cid);
5409 /* Do not count the pseudocandidates. */
5410 if (cp->cand->iv)
5411 ivs->n_regs++;
5412 ivs->n_cands++;
5413 ivs->cand_cost += cp->cand->cost;
5414
5415 iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
5416 }
5417
5418 ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
5419 iv_ca_set_add_invariants (ivs, cp->depends_on);
5420
5421 if (cp->inv_expr_id != -1)
5422 {
5423 ivs->used_inv_expr[cp->inv_expr_id]++;
5424 if (ivs->used_inv_expr[cp->inv_expr_id] == 1)
5425 ivs->num_used_inv_expr++;
5426 }
5427 iv_ca_recount_cost (data, ivs);
5428 }
5429 }
5430
5431 /* Extend set IVS by expressing USE by some of the candidates in it
5432 if possible. All important candidates will be considered
5433 if IMPORTANT_CANDIDATES is true. */
5434
5435 static void
5436 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
5437 struct iv_use *use, bool important_candidates)
5438 {
5439 struct cost_pair *best_cp = NULL, *cp;
5440 bitmap_iterator bi;
5441 bitmap cands;
5442 unsigned i;
5443
5444 gcc_assert (ivs->upto >= use->id);
5445
5446 if (ivs->upto == use->id)
5447 {
5448 ivs->upto++;
5449 ivs->bad_uses++;
5450 }
5451
5452 cands = (important_candidates ? data->important_candidates : ivs->cands);
5453 EXECUTE_IF_SET_IN_BITMAP (cands, 0, i, bi)
5454 {
5455 struct iv_cand *cand = iv_cand (data, i);
5456
5457 cp = get_use_iv_cost (data, use, cand);
5458
5459 if (cheaper_cost_pair (cp, best_cp))
5460 best_cp = cp;
5461 }
5462
5463 iv_ca_set_cp (data, ivs, use, best_cp);
5464 }
5465
5466 /* Get cost for assignment IVS. */
5467
5468 static comp_cost
5469 iv_ca_cost (struct iv_ca *ivs)
5470 {
5471 /* This was a conditional expression but it triggered a bug in
5472 Sun C 5.5. */
5473 if (ivs->bad_uses)
5474 return infinite_cost;
5475 else
5476 return ivs->cost;
5477 }
5478
5479 /* Returns true if all dependences of CP are among invariants in IVS. */
5480
5481 static bool
5482 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
5483 {
5484 unsigned i;
5485 bitmap_iterator bi;
5486
5487 if (!cp->depends_on)
5488 return true;
5489
5490 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
5491 {
5492 if (ivs->n_invariant_uses[i] == 0)
5493 return false;
5494 }
5495
5496 return true;
5497 }
5498
5499 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
5500 it before NEXT_CHANGE. */
5501
5502 static struct iv_ca_delta *
5503 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
5504 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
5505 {
5506 struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
5507
5508 change->use = use;
5509 change->old_cp = old_cp;
5510 change->new_cp = new_cp;
5511 change->next_change = next_change;
5512
5513 return change;
5514 }
5515
5516 /* Joins two lists of changes L1 and L2. Destructive -- old lists
5517 are rewritten. */
5518
5519 static struct iv_ca_delta *
5520 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
5521 {
5522 struct iv_ca_delta *last;
5523
5524 if (!l2)
5525 return l1;
5526
5527 if (!l1)
5528 return l2;
5529
5530 for (last = l1; last->next_change; last = last->next_change)
5531 continue;
5532 last->next_change = l2;
5533
5534 return l1;
5535 }
5536
5537 /* Reverse the list of changes DELTA, forming the inverse to it. */
5538
5539 static struct iv_ca_delta *
5540 iv_ca_delta_reverse (struct iv_ca_delta *delta)
5541 {
5542 struct iv_ca_delta *act, *next, *prev = NULL;
5543 struct cost_pair *tmp;
5544
5545 for (act = delta; act; act = next)
5546 {
5547 next = act->next_change;
5548 act->next_change = prev;
5549 prev = act;
5550
5551 tmp = act->old_cp;
5552 act->old_cp = act->new_cp;
5553 act->new_cp = tmp;
5554 }
5555
5556 return prev;
5557 }
5558
5559 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
5560 reverted instead. */
5561
5562 static void
5563 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
5564 struct iv_ca_delta *delta, bool forward)
5565 {
5566 struct cost_pair *from, *to;
5567 struct iv_ca_delta *act;
5568
5569 if (!forward)
5570 delta = iv_ca_delta_reverse (delta);
5571
5572 for (act = delta; act; act = act->next_change)
5573 {
5574 from = act->old_cp;
5575 to = act->new_cp;
5576 gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
5577 iv_ca_set_cp (data, ivs, act->use, to);
5578 }
5579
5580 if (!forward)
5581 iv_ca_delta_reverse (delta);
5582 }
5583
5584 /* Returns true if CAND is used in IVS. */
5585
5586 static bool
5587 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
5588 {
5589 return ivs->n_cand_uses[cand->id] > 0;
5590 }
5591
5592 /* Returns number of induction variable candidates in the set IVS. */
5593
5594 static unsigned
5595 iv_ca_n_cands (struct iv_ca *ivs)
5596 {
5597 return ivs->n_cands;
5598 }
5599
5600 /* Free the list of changes DELTA. */
5601
5602 static void
5603 iv_ca_delta_free (struct iv_ca_delta **delta)
5604 {
5605 struct iv_ca_delta *act, *next;
5606
5607 for (act = *delta; act; act = next)
5608 {
5609 next = act->next_change;
5610 free (act);
5611 }
5612
5613 *delta = NULL;
5614 }
5615
5616 /* Allocates new iv candidates assignment. */
5617
5618 static struct iv_ca *
5619 iv_ca_new (struct ivopts_data *data)
5620 {
5621 struct iv_ca *nw = XNEW (struct iv_ca);
5622
5623 nw->upto = 0;
5624 nw->bad_uses = 0;
5625 nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
5626 nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
5627 nw->cands = BITMAP_ALLOC (NULL);
5628 nw->n_cands = 0;
5629 nw->n_regs = 0;
5630 nw->cand_use_cost = no_cost;
5631 nw->cand_cost = 0;
5632 nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
5633 nw->cost = no_cost;
5634 nw->used_inv_expr = XCNEWVEC (unsigned, data->inv_expr_id + 1);
5635 nw->num_used_inv_expr = 0;
5636
5637 return nw;
5638 }
5639
5640 /* Free memory occupied by the set IVS. */
5641
5642 static void
5643 iv_ca_free (struct iv_ca **ivs)
5644 {
5645 free ((*ivs)->cand_for_use);
5646 free ((*ivs)->n_cand_uses);
5647 BITMAP_FREE ((*ivs)->cands);
5648 free ((*ivs)->n_invariant_uses);
5649 free ((*ivs)->used_inv_expr);
5650 free (*ivs);
5651 *ivs = NULL;
5652 }
5653
5654 /* Dumps IVS to FILE. */
5655
5656 static void
5657 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
5658 {
5659 const char *pref = " invariants ";
5660 unsigned i;
5661 comp_cost cost = iv_ca_cost (ivs);
5662
5663 fprintf (file, " cost: %d (complexity %d)\n", cost.cost, cost.complexity);
5664 fprintf (file, " cand_cost: %d\n cand_use_cost: %d (complexity %d)\n",
5665 ivs->cand_cost, ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
5666 bitmap_print (file, ivs->cands, " candidates: ","\n");
5667
5668 for (i = 0; i < ivs->upto; i++)
5669 {
5670 struct iv_use *use = iv_use (data, i);
5671 struct cost_pair *cp = iv_ca_cand_for_use (ivs, use);
5672 if (cp)
5673 fprintf (file, " use:%d --> iv_cand:%d, cost=(%d,%d)\n",
5674 use->id, cp->cand->id, cp->cost.cost, cp->cost.complexity);
5675 else
5676 fprintf (file, " use:%d --> ??\n", use->id);
5677 }
5678
5679 for (i = 1; i <= data->max_inv_id; i++)
5680 if (ivs->n_invariant_uses[i])
5681 {
5682 fprintf (file, "%s%d", pref, i);
5683 pref = ", ";
5684 }
5685 fprintf (file, "\n\n");
5686 }
5687
5688 /* Try changing candidate in IVS to CAND for each use. Return cost of the
5689 new set, and store differences in DELTA. Number of induction variables
5690 in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
5691 the function will try to find a solution with mimimal iv candidates. */
5692
5693 static comp_cost
5694 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
5695 struct iv_cand *cand, struct iv_ca_delta **delta,
5696 unsigned *n_ivs, bool min_ncand)
5697 {
5698 unsigned i;
5699 comp_cost cost;
5700 struct iv_use *use;
5701 struct cost_pair *old_cp, *new_cp;
5702
5703 *delta = NULL;
5704 for (i = 0; i < ivs->upto; i++)
5705 {
5706 use = iv_use (data, i);
5707 old_cp = iv_ca_cand_for_use (ivs, use);
5708
5709 if (old_cp
5710 && old_cp->cand == cand)
5711 continue;
5712
5713 new_cp = get_use_iv_cost (data, use, cand);
5714 if (!new_cp)
5715 continue;
5716
5717 if (!min_ncand && !iv_ca_has_deps (ivs, new_cp))
5718 continue;
5719
5720 if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp))
5721 continue;
5722
5723 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5724 }
5725
5726 iv_ca_delta_commit (data, ivs, *delta, true);
5727 cost = iv_ca_cost (ivs);
5728 if (n_ivs)
5729 *n_ivs = iv_ca_n_cands (ivs);
5730 iv_ca_delta_commit (data, ivs, *delta, false);
5731
5732 return cost;
5733 }
5734
5735 /* Try narrowing set IVS by removing CAND. Return the cost of
5736 the new set and store the differences in DELTA. */
5737
5738 static comp_cost
5739 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
5740 struct iv_cand *cand, struct iv_ca_delta **delta)
5741 {
5742 unsigned i, ci;
5743 struct iv_use *use;
5744 struct cost_pair *old_cp, *new_cp, *cp;
5745 bitmap_iterator bi;
5746 struct iv_cand *cnd;
5747 comp_cost cost;
5748
5749 *delta = NULL;
5750 for (i = 0; i < n_iv_uses (data); i++)
5751 {
5752 use = iv_use (data, i);
5753
5754 old_cp = iv_ca_cand_for_use (ivs, use);
5755 if (old_cp->cand != cand)
5756 continue;
5757
5758 new_cp = NULL;
5759
5760 if (data->consider_all_candidates)
5761 {
5762 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
5763 {
5764 if (ci == cand->id)
5765 continue;
5766
5767 cnd = iv_cand (data, ci);
5768
5769 cp = get_use_iv_cost (data, use, cnd);
5770 if (!cp)
5771 continue;
5772
5773 if (!iv_ca_has_deps (ivs, cp))
5774 continue;
5775
5776 if (!cheaper_cost_pair (cp, new_cp))
5777 continue;
5778
5779 new_cp = cp;
5780 }
5781 }
5782 else
5783 {
5784 EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
5785 {
5786 if (ci == cand->id)
5787 continue;
5788
5789 cnd = iv_cand (data, ci);
5790
5791 cp = get_use_iv_cost (data, use, cnd);
5792 if (!cp)
5793 continue;
5794 if (!iv_ca_has_deps (ivs, cp))
5795 continue;
5796
5797 if (!cheaper_cost_pair (cp, new_cp))
5798 continue;
5799
5800 new_cp = cp;
5801 }
5802 }
5803
5804 if (!new_cp)
5805 {
5806 iv_ca_delta_free (delta);
5807 return infinite_cost;
5808 }
5809
5810 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5811 }
5812
5813 iv_ca_delta_commit (data, ivs, *delta, true);
5814 cost = iv_ca_cost (ivs);
5815 iv_ca_delta_commit (data, ivs, *delta, false);
5816
5817 return cost;
5818 }
5819
5820 /* Try optimizing the set of candidates IVS by removing candidates different
5821 from to EXCEPT_CAND from it. Return cost of the new set, and store
5822 differences in DELTA. */
5823
5824 static comp_cost
5825 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
5826 struct iv_cand *except_cand, struct iv_ca_delta **delta)
5827 {
5828 bitmap_iterator bi;
5829 struct iv_ca_delta *act_delta, *best_delta;
5830 unsigned i;
5831 comp_cost best_cost, acost;
5832 struct iv_cand *cand;
5833
5834 best_delta = NULL;
5835 best_cost = iv_ca_cost (ivs);
5836
5837 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5838 {
5839 cand = iv_cand (data, i);
5840
5841 if (cand == except_cand)
5842 continue;
5843
5844 acost = iv_ca_narrow (data, ivs, cand, &act_delta);
5845
5846 if (compare_costs (acost, best_cost) < 0)
5847 {
5848 best_cost = acost;
5849 iv_ca_delta_free (&best_delta);
5850 best_delta = act_delta;
5851 }
5852 else
5853 iv_ca_delta_free (&act_delta);
5854 }
5855
5856 if (!best_delta)
5857 {
5858 *delta = NULL;
5859 return best_cost;
5860 }
5861
5862 /* Recurse to possibly remove other unnecessary ivs. */
5863 iv_ca_delta_commit (data, ivs, best_delta, true);
5864 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
5865 iv_ca_delta_commit (data, ivs, best_delta, false);
5866 *delta = iv_ca_delta_join (best_delta, *delta);
5867 return best_cost;
5868 }
5869
5870 /* Tries to extend the sets IVS in the best possible way in order
5871 to express the USE. If ORIGINALP is true, prefer candidates from
5872 the original set of IVs, otherwise favor important candidates not
5873 based on any memory object. */
5874
5875 static bool
5876 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
5877 struct iv_use *use, bool originalp)
5878 {
5879 comp_cost best_cost, act_cost;
5880 unsigned i;
5881 bitmap_iterator bi;
5882 struct iv_cand *cand;
5883 struct iv_ca_delta *best_delta = NULL, *act_delta;
5884 struct cost_pair *cp;
5885
5886 iv_ca_add_use (data, ivs, use, false);
5887 best_cost = iv_ca_cost (ivs);
5888
5889 cp = iv_ca_cand_for_use (ivs, use);
5890 if (!cp)
5891 {
5892 ivs->upto--;
5893 ivs->bad_uses--;
5894 iv_ca_add_use (data, ivs, use, true);
5895 best_cost = iv_ca_cost (ivs);
5896 cp = iv_ca_cand_for_use (ivs, use);
5897 }
5898 if (cp)
5899 {
5900 best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
5901 iv_ca_set_no_cp (data, ivs, use);
5902 }
5903
5904 /* If ORIGINALP is true, try to find the original IV for the use. Otherwise
5905 first try important candidates not based on any memory object. Only if
5906 this fails, try the specific ones. Rationale -- in loops with many
5907 variables the best choice often is to use just one generic biv. If we
5908 added here many ivs specific to the uses, the optimization algorithm later
5909 would be likely to get stuck in a local minimum, thus causing us to create
5910 too many ivs. The approach from few ivs to more seems more likely to be
5911 successful -- starting from few ivs, replacing an expensive use by a
5912 specific iv should always be a win. */
5913 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5914 {
5915 cand = iv_cand (data, i);
5916
5917 if (originalp && cand->pos !=IP_ORIGINAL)
5918 continue;
5919
5920 if (!originalp && cand->iv->base_object != NULL_TREE)
5921 continue;
5922
5923 if (iv_ca_cand_used_p (ivs, cand))
5924 continue;
5925
5926 cp = get_use_iv_cost (data, use, cand);
5927 if (!cp)
5928 continue;
5929
5930 iv_ca_set_cp (data, ivs, use, cp);
5931 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
5932 true);
5933 iv_ca_set_no_cp (data, ivs, use);
5934 act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
5935
5936 if (compare_costs (act_cost, best_cost) < 0)
5937 {
5938 best_cost = act_cost;
5939
5940 iv_ca_delta_free (&best_delta);
5941 best_delta = act_delta;
5942 }
5943 else
5944 iv_ca_delta_free (&act_delta);
5945 }
5946
5947 if (infinite_cost_p (best_cost))
5948 {
5949 for (i = 0; i < use->n_map_members; i++)
5950 {
5951 cp = use->cost_map + i;
5952 cand = cp->cand;
5953 if (!cand)
5954 continue;
5955
5956 /* Already tried this. */
5957 if (cand->important)
5958 {
5959 if (originalp && cand->pos == IP_ORIGINAL)
5960 continue;
5961 if (!originalp && cand->iv->base_object == NULL_TREE)
5962 continue;
5963 }
5964
5965 if (iv_ca_cand_used_p (ivs, cand))
5966 continue;
5967
5968 act_delta = NULL;
5969 iv_ca_set_cp (data, ivs, use, cp);
5970 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
5971 iv_ca_set_no_cp (data, ivs, use);
5972 act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
5973 cp, act_delta);
5974
5975 if (compare_costs (act_cost, best_cost) < 0)
5976 {
5977 best_cost = act_cost;
5978
5979 if (best_delta)
5980 iv_ca_delta_free (&best_delta);
5981 best_delta = act_delta;
5982 }
5983 else
5984 iv_ca_delta_free (&act_delta);
5985 }
5986 }
5987
5988 iv_ca_delta_commit (data, ivs, best_delta, true);
5989 iv_ca_delta_free (&best_delta);
5990
5991 return !infinite_cost_p (best_cost);
5992 }
5993
5994 /* Finds an initial assignment of candidates to uses. */
5995
5996 static struct iv_ca *
5997 get_initial_solution (struct ivopts_data *data, bool originalp)
5998 {
5999 struct iv_ca *ivs = iv_ca_new (data);
6000 unsigned i;
6001
6002 for (i = 0; i < n_iv_uses (data); i++)
6003 if (!try_add_cand_for (data, ivs, iv_use (data, i), originalp))
6004 {
6005 iv_ca_free (&ivs);
6006 return NULL;
6007 }
6008
6009 return ivs;
6010 }
6011
6012 /* Tries to improve set of induction variables IVS. */
6013
6014 static bool
6015 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
6016 {
6017 unsigned i, n_ivs;
6018 comp_cost acost, best_cost = iv_ca_cost (ivs);
6019 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
6020 struct iv_cand *cand;
6021
6022 /* Try extending the set of induction variables by one. */
6023 for (i = 0; i < n_iv_cands (data); i++)
6024 {
6025 cand = iv_cand (data, i);
6026
6027 if (iv_ca_cand_used_p (ivs, cand))
6028 continue;
6029
6030 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
6031 if (!act_delta)
6032 continue;
6033
6034 /* If we successfully added the candidate and the set is small enough,
6035 try optimizing it by removing other candidates. */
6036 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
6037 {
6038 iv_ca_delta_commit (data, ivs, act_delta, true);
6039 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
6040 iv_ca_delta_commit (data, ivs, act_delta, false);
6041 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6042 }
6043
6044 if (compare_costs (acost, best_cost) < 0)
6045 {
6046 best_cost = acost;
6047 iv_ca_delta_free (&best_delta);
6048 best_delta = act_delta;
6049 }
6050 else
6051 iv_ca_delta_free (&act_delta);
6052 }
6053
6054 if (!best_delta)
6055 {
6056 /* Try removing the candidates from the set instead. */
6057 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
6058
6059 /* Nothing more we can do. */
6060 if (!best_delta)
6061 return false;
6062 }
6063
6064 iv_ca_delta_commit (data, ivs, best_delta, true);
6065 gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
6066 iv_ca_delta_free (&best_delta);
6067 return true;
6068 }
6069
6070 /* Attempts to find the optimal set of induction variables. We do simple
6071 greedy heuristic -- we try to replace at most one candidate in the selected
6072 solution and remove the unused ivs while this improves the cost. */
6073
6074 static struct iv_ca *
6075 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
6076 {
6077 struct iv_ca *set;
6078
6079 /* Get the initial solution. */
6080 set = get_initial_solution (data, originalp);
6081 if (!set)
6082 {
6083 if (dump_file && (dump_flags & TDF_DETAILS))
6084 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
6085 return NULL;
6086 }
6087
6088 if (dump_file && (dump_flags & TDF_DETAILS))
6089 {
6090 fprintf (dump_file, "Initial set of candidates:\n");
6091 iv_ca_dump (data, dump_file, set);
6092 }
6093
6094 while (try_improve_iv_set (data, set))
6095 {
6096 if (dump_file && (dump_flags & TDF_DETAILS))
6097 {
6098 fprintf (dump_file, "Improved to:\n");
6099 iv_ca_dump (data, dump_file, set);
6100 }
6101 }
6102
6103 return set;
6104 }
6105
6106 static struct iv_ca *
6107 find_optimal_iv_set (struct ivopts_data *data)
6108 {
6109 unsigned i;
6110 struct iv_ca *set, *origset;
6111 struct iv_use *use;
6112 comp_cost cost, origcost;
6113
6114 /* Determine the cost based on a strategy that starts with original IVs,
6115 and try again using a strategy that prefers candidates not based
6116 on any IVs. */
6117 origset = find_optimal_iv_set_1 (data, true);
6118 set = find_optimal_iv_set_1 (data, false);
6119
6120 if (!origset && !set)
6121 return NULL;
6122
6123 origcost = origset ? iv_ca_cost (origset) : infinite_cost;
6124 cost = set ? iv_ca_cost (set) : infinite_cost;
6125
6126 if (dump_file && (dump_flags & TDF_DETAILS))
6127 {
6128 fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
6129 origcost.cost, origcost.complexity);
6130 fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
6131 cost.cost, cost.complexity);
6132 }
6133
6134 /* Choose the one with the best cost. */
6135 if (compare_costs (origcost, cost) <= 0)
6136 {
6137 if (set)
6138 iv_ca_free (&set);
6139 set = origset;
6140 }
6141 else if (origset)
6142 iv_ca_free (&origset);
6143
6144 for (i = 0; i < n_iv_uses (data); i++)
6145 {
6146 use = iv_use (data, i);
6147 use->selected = iv_ca_cand_for_use (set, use)->cand;
6148 }
6149
6150 return set;
6151 }
6152
6153 /* Creates a new induction variable corresponding to CAND. */
6154
6155 static void
6156 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
6157 {
6158 gimple_stmt_iterator incr_pos;
6159 tree base;
6160 bool after = false;
6161
6162 if (!cand->iv)
6163 return;
6164
6165 switch (cand->pos)
6166 {
6167 case IP_NORMAL:
6168 incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
6169 break;
6170
6171 case IP_END:
6172 incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
6173 after = true;
6174 break;
6175
6176 case IP_AFTER_USE:
6177 after = true;
6178 /* fall through */
6179 case IP_BEFORE_USE:
6180 incr_pos = gsi_for_stmt (cand->incremented_at);
6181 break;
6182
6183 case IP_ORIGINAL:
6184 /* Mark that the iv is preserved. */
6185 name_info (data, cand->var_before)->preserve_biv = true;
6186 name_info (data, cand->var_after)->preserve_biv = true;
6187
6188 /* Rewrite the increment so that it uses var_before directly. */
6189 find_interesting_uses_op (data, cand->var_after)->selected = cand;
6190 return;
6191 }
6192
6193 gimple_add_tmp_var (cand->var_before);
6194
6195 base = unshare_expr (cand->iv->base);
6196
6197 create_iv (base, unshare_expr (cand->iv->step),
6198 cand->var_before, data->current_loop,
6199 &incr_pos, after, &cand->var_before, &cand->var_after);
6200 }
6201
6202 /* Creates new induction variables described in SET. */
6203
6204 static void
6205 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
6206 {
6207 unsigned i;
6208 struct iv_cand *cand;
6209 bitmap_iterator bi;
6210
6211 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6212 {
6213 cand = iv_cand (data, i);
6214 create_new_iv (data, cand);
6215 }
6216
6217 if (dump_file && (dump_flags & TDF_DETAILS))
6218 {
6219 fprintf (dump_file, "\nSelected IV set: \n");
6220 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6221 {
6222 cand = iv_cand (data, i);
6223 dump_cand (dump_file, cand);
6224 }
6225 fprintf (dump_file, "\n");
6226 }
6227 }
6228
6229 /* Rewrites USE (definition of iv used in a nonlinear expression)
6230 using candidate CAND. */
6231
6232 static void
6233 rewrite_use_nonlinear_expr (struct ivopts_data *data,
6234 struct iv_use *use, struct iv_cand *cand)
6235 {
6236 tree comp;
6237 tree op, tgt;
6238 gimple ass;
6239 gimple_stmt_iterator bsi;
6240
6241 /* An important special case -- if we are asked to express value of
6242 the original iv by itself, just exit; there is no need to
6243 introduce a new computation (that might also need casting the
6244 variable to unsigned and back). */
6245 if (cand->pos == IP_ORIGINAL
6246 && cand->incremented_at == use->stmt)
6247 {
6248 enum tree_code stmt_code;
6249
6250 gcc_assert (is_gimple_assign (use->stmt));
6251 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
6252
6253 /* Check whether we may leave the computation unchanged.
6254 This is the case only if it does not rely on other
6255 computations in the loop -- otherwise, the computation
6256 we rely upon may be removed in remove_unused_ivs,
6257 thus leading to ICE. */
6258 stmt_code = gimple_assign_rhs_code (use->stmt);
6259 if (stmt_code == PLUS_EXPR
6260 || stmt_code == MINUS_EXPR
6261 || stmt_code == POINTER_PLUS_EXPR)
6262 {
6263 if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
6264 op = gimple_assign_rhs2 (use->stmt);
6265 else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
6266 op = gimple_assign_rhs1 (use->stmt);
6267 else
6268 op = NULL_TREE;
6269 }
6270 else
6271 op = NULL_TREE;
6272
6273 if (op && expr_invariant_in_loop_p (data->current_loop, op))
6274 return;
6275 }
6276
6277 comp = get_computation (data->current_loop, use, cand);
6278 gcc_assert (comp != NULL_TREE);
6279
6280 switch (gimple_code (use->stmt))
6281 {
6282 case GIMPLE_PHI:
6283 tgt = PHI_RESULT (use->stmt);
6284
6285 /* If we should keep the biv, do not replace it. */
6286 if (name_info (data, tgt)->preserve_biv)
6287 return;
6288
6289 bsi = gsi_after_labels (gimple_bb (use->stmt));
6290 break;
6291
6292 case GIMPLE_ASSIGN:
6293 tgt = gimple_assign_lhs (use->stmt);
6294 bsi = gsi_for_stmt (use->stmt);
6295 break;
6296
6297 default:
6298 gcc_unreachable ();
6299 }
6300
6301 if (!valid_gimple_rhs_p (comp)
6302 || (gimple_code (use->stmt) != GIMPLE_PHI
6303 /* We can't allow re-allocating the stmt as it might be pointed
6304 to still. */
6305 && (get_gimple_rhs_num_ops (TREE_CODE (comp))
6306 >= gimple_num_ops (gsi_stmt (bsi)))))
6307 {
6308 comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE,
6309 true, GSI_SAME_STMT);
6310 if (POINTER_TYPE_P (TREE_TYPE (tgt)))
6311 {
6312 duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
6313 /* As this isn't a plain copy we have to reset alignment
6314 information. */
6315 if (SSA_NAME_PTR_INFO (comp))
6316 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
6317 }
6318 }
6319
6320 if (gimple_code (use->stmt) == GIMPLE_PHI)
6321 {
6322 ass = gimple_build_assign (tgt, comp);
6323 gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
6324
6325 bsi = gsi_for_stmt (use->stmt);
6326 remove_phi_node (&bsi, false);
6327 }
6328 else
6329 {
6330 gimple_assign_set_rhs_from_tree (&bsi, comp);
6331 use->stmt = gsi_stmt (bsi);
6332 }
6333 }
6334
6335 /* Performs a peephole optimization to reorder the iv update statement with
6336 a mem ref to enable instruction combining in later phases. The mem ref uses
6337 the iv value before the update, so the reordering transformation requires
6338 adjustment of the offset. CAND is the selected IV_CAND.
6339
6340 Example:
6341
6342 t = MEM_REF (base, iv1, 8, 16); // base, index, stride, offset
6343 iv2 = iv1 + 1;
6344
6345 if (t < val) (1)
6346 goto L;
6347 goto Head;
6348
6349
6350 directly propagating t over to (1) will introduce overlapping live range
6351 thus increase register pressure. This peephole transform it into:
6352
6353
6354 iv2 = iv1 + 1;
6355 t = MEM_REF (base, iv2, 8, 8);
6356 if (t < val)
6357 goto L;
6358 goto Head;
6359 */
6360
6361 static void
6362 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
6363 {
6364 tree var_after;
6365 gimple iv_update, stmt;
6366 basic_block bb;
6367 gimple_stmt_iterator gsi, gsi_iv;
6368
6369 if (cand->pos != IP_NORMAL)
6370 return;
6371
6372 var_after = cand->var_after;
6373 iv_update = SSA_NAME_DEF_STMT (var_after);
6374
6375 bb = gimple_bb (iv_update);
6376 gsi = gsi_last_nondebug_bb (bb);
6377 stmt = gsi_stmt (gsi);
6378
6379 /* Only handle conditional statement for now. */
6380 if (gimple_code (stmt) != GIMPLE_COND)
6381 return;
6382
6383 gsi_prev_nondebug (&gsi);
6384 stmt = gsi_stmt (gsi);
6385 if (stmt != iv_update)
6386 return;
6387
6388 gsi_prev_nondebug (&gsi);
6389 if (gsi_end_p (gsi))
6390 return;
6391
6392 stmt = gsi_stmt (gsi);
6393 if (gimple_code (stmt) != GIMPLE_ASSIGN)
6394 return;
6395
6396 if (stmt != use->stmt)
6397 return;
6398
6399 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
6400 return;
6401
6402 if (dump_file && (dump_flags & TDF_DETAILS))
6403 {
6404 fprintf (dump_file, "Reordering \n");
6405 print_gimple_stmt (dump_file, iv_update, 0, 0);
6406 print_gimple_stmt (dump_file, use->stmt, 0, 0);
6407 fprintf (dump_file, "\n");
6408 }
6409
6410 gsi = gsi_for_stmt (use->stmt);
6411 gsi_iv = gsi_for_stmt (iv_update);
6412 gsi_move_before (&gsi_iv, &gsi);
6413
6414 cand->pos = IP_BEFORE_USE;
6415 cand->incremented_at = use->stmt;
6416 }
6417
6418 /* Rewrites USE (address that is an iv) using candidate CAND. */
6419
6420 static void
6421 rewrite_use_address (struct ivopts_data *data,
6422 struct iv_use *use, struct iv_cand *cand)
6423 {
6424 aff_tree aff;
6425 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6426 tree base_hint = NULL_TREE;
6427 tree ref, iv;
6428 bool ok;
6429
6430 adjust_iv_update_pos (cand, use);
6431 ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
6432 gcc_assert (ok);
6433 unshare_aff_combination (&aff);
6434
6435 /* To avoid undefined overflow problems, all IV candidates use unsigned
6436 integer types. The drawback is that this makes it impossible for
6437 create_mem_ref to distinguish an IV that is based on a memory object
6438 from one that represents simply an offset.
6439
6440 To work around this problem, we pass a hint to create_mem_ref that
6441 indicates which variable (if any) in aff is an IV based on a memory
6442 object. Note that we only consider the candidate. If this is not
6443 based on an object, the base of the reference is in some subexpression
6444 of the use -- but these will use pointer types, so they are recognized
6445 by the create_mem_ref heuristics anyway. */
6446 if (cand->iv->base_object)
6447 base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
6448
6449 iv = var_at_stmt (data->current_loop, cand, use->stmt);
6450 ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff,
6451 reference_alias_ptr_type (*use->op_p),
6452 iv, base_hint, data->speed);
6453 copy_ref_info (ref, *use->op_p);
6454 *use->op_p = ref;
6455 }
6456
6457 /* Rewrites USE (the condition such that one of the arguments is an iv) using
6458 candidate CAND. */
6459
6460 static void
6461 rewrite_use_compare (struct ivopts_data *data,
6462 struct iv_use *use, struct iv_cand *cand)
6463 {
6464 tree comp, *var_p, op, bound;
6465 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6466 enum tree_code compare;
6467 struct cost_pair *cp = get_use_iv_cost (data, use, cand);
6468 bool ok;
6469
6470 bound = cp->value;
6471 if (bound)
6472 {
6473 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
6474 tree var_type = TREE_TYPE (var);
6475 gimple_seq stmts;
6476
6477 if (dump_file && (dump_flags & TDF_DETAILS))
6478 {
6479 fprintf (dump_file, "Replacing exit test: ");
6480 print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
6481 }
6482 compare = cp->comp;
6483 bound = unshare_expr (fold_convert (var_type, bound));
6484 op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
6485 if (stmts)
6486 gsi_insert_seq_on_edge_immediate (
6487 loop_preheader_edge (data->current_loop),
6488 stmts);
6489
6490 gimple_cond_set_lhs (use->stmt, var);
6491 gimple_cond_set_code (use->stmt, compare);
6492 gimple_cond_set_rhs (use->stmt, op);
6493 return;
6494 }
6495
6496 /* The induction variable elimination failed; just express the original
6497 giv. */
6498 comp = get_computation (data->current_loop, use, cand);
6499 gcc_assert (comp != NULL_TREE);
6500
6501 ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
6502 gcc_assert (ok);
6503
6504 *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
6505 true, GSI_SAME_STMT);
6506 }
6507
6508 /* Rewrites USE using candidate CAND. */
6509
6510 static void
6511 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
6512 {
6513 switch (use->type)
6514 {
6515 case USE_NONLINEAR_EXPR:
6516 rewrite_use_nonlinear_expr (data, use, cand);
6517 break;
6518
6519 case USE_ADDRESS:
6520 rewrite_use_address (data, use, cand);
6521 break;
6522
6523 case USE_COMPARE:
6524 rewrite_use_compare (data, use, cand);
6525 break;
6526
6527 default:
6528 gcc_unreachable ();
6529 }
6530
6531 update_stmt (use->stmt);
6532 }
6533
6534 /* Rewrite the uses using the selected induction variables. */
6535
6536 static void
6537 rewrite_uses (struct ivopts_data *data)
6538 {
6539 unsigned i;
6540 struct iv_cand *cand;
6541 struct iv_use *use;
6542
6543 for (i = 0; i < n_iv_uses (data); i++)
6544 {
6545 use = iv_use (data, i);
6546 cand = use->selected;
6547 gcc_assert (cand);
6548
6549 rewrite_use (data, use, cand);
6550 }
6551 }
6552
6553 /* Removes the ivs that are not used after rewriting. */
6554
6555 static void
6556 remove_unused_ivs (struct ivopts_data *data)
6557 {
6558 unsigned j;
6559 bitmap_iterator bi;
6560 bitmap toremove = BITMAP_ALLOC (NULL);
6561
6562 /* Figure out an order in which to release SSA DEFs so that we don't
6563 release something that we'd have to propagate into a debug stmt
6564 afterwards. */
6565 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
6566 {
6567 struct version_info *info;
6568
6569 info = ver_info (data, j);
6570 if (info->iv
6571 && !integer_zerop (info->iv->step)
6572 && !info->inv_id
6573 && !info->iv->have_use_for
6574 && !info->preserve_biv)
6575 {
6576 bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
6577
6578 tree def = info->iv->ssa_name;
6579
6580 if (MAY_HAVE_DEBUG_STMTS && SSA_NAME_DEF_STMT (def))
6581 {
6582 imm_use_iterator imm_iter;
6583 use_operand_p use_p;
6584 gimple stmt;
6585 int count = 0;
6586
6587 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
6588 {
6589 if (!gimple_debug_bind_p (stmt))
6590 continue;
6591
6592 /* We just want to determine whether to do nothing
6593 (count == 0), to substitute the computed
6594 expression into a single use of the SSA DEF by
6595 itself (count == 1), or to use a debug temp
6596 because the SSA DEF is used multiple times or as
6597 part of a larger expression (count > 1). */
6598 count++;
6599 if (gimple_debug_bind_get_value (stmt) != def)
6600 count++;
6601
6602 if (count > 1)
6603 BREAK_FROM_IMM_USE_STMT (imm_iter);
6604 }
6605
6606 if (!count)
6607 continue;
6608
6609 struct iv_use dummy_use;
6610 struct iv_cand *best_cand = NULL, *cand;
6611 unsigned i, best_pref = 0, cand_pref;
6612
6613 memset (&dummy_use, 0, sizeof (dummy_use));
6614 dummy_use.iv = info->iv;
6615 for (i = 0; i < n_iv_uses (data) && i < 64; i++)
6616 {
6617 cand = iv_use (data, i)->selected;
6618 if (cand == best_cand)
6619 continue;
6620 cand_pref = operand_equal_p (cand->iv->step,
6621 info->iv->step, 0)
6622 ? 4 : 0;
6623 cand_pref
6624 += TYPE_MODE (TREE_TYPE (cand->iv->base))
6625 == TYPE_MODE (TREE_TYPE (info->iv->base))
6626 ? 2 : 0;
6627 cand_pref
6628 += TREE_CODE (cand->iv->base) == INTEGER_CST
6629 ? 1 : 0;
6630 if (best_cand == NULL || best_pref < cand_pref)
6631 {
6632 best_cand = cand;
6633 best_pref = cand_pref;
6634 }
6635 }
6636
6637 if (!best_cand)
6638 continue;
6639
6640 tree comp = get_computation_at (data->current_loop,
6641 &dummy_use, best_cand,
6642 SSA_NAME_DEF_STMT (def));
6643 if (!comp)
6644 continue;
6645
6646 if (count > 1)
6647 {
6648 tree vexpr = make_node (DEBUG_EXPR_DECL);
6649 DECL_ARTIFICIAL (vexpr) = 1;
6650 TREE_TYPE (vexpr) = TREE_TYPE (comp);
6651 if (SSA_NAME_VAR (def))
6652 DECL_MODE (vexpr) = DECL_MODE (SSA_NAME_VAR (def));
6653 else
6654 DECL_MODE (vexpr) = TYPE_MODE (TREE_TYPE (vexpr));
6655 gimple def_temp = gimple_build_debug_bind (vexpr, comp, NULL);
6656 gimple_stmt_iterator gsi;
6657
6658 if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
6659 gsi = gsi_after_labels (gimple_bb
6660 (SSA_NAME_DEF_STMT (def)));
6661 else
6662 gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
6663
6664 gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
6665 comp = vexpr;
6666 }
6667
6668 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
6669 {
6670 if (!gimple_debug_bind_p (stmt))
6671 continue;
6672
6673 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
6674 SET_USE (use_p, comp);
6675
6676 update_stmt (stmt);
6677 }
6678 }
6679 }
6680 }
6681
6682 release_defs_bitset (toremove);
6683
6684 BITMAP_FREE (toremove);
6685 }
6686
6687 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
6688 for pointer_map_traverse. */
6689
6690 static bool
6691 free_tree_niter_desc (const void *key ATTRIBUTE_UNUSED, void **value,
6692 void *data ATTRIBUTE_UNUSED)
6693 {
6694 struct tree_niter_desc *const niter = (struct tree_niter_desc *) *value;
6695
6696 free (niter);
6697 return true;
6698 }
6699
6700 /* Frees data allocated by the optimization of a single loop. */
6701
6702 static void
6703 free_loop_data (struct ivopts_data *data)
6704 {
6705 unsigned i, j;
6706 bitmap_iterator bi;
6707 tree obj;
6708
6709 if (data->niters)
6710 {
6711 pointer_map_traverse (data->niters, free_tree_niter_desc, NULL);
6712 pointer_map_destroy (data->niters);
6713 data->niters = NULL;
6714 }
6715
6716 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
6717 {
6718 struct version_info *info;
6719
6720 info = ver_info (data, i);
6721 free (info->iv);
6722 info->iv = NULL;
6723 info->has_nonlin_use = false;
6724 info->preserve_biv = false;
6725 info->inv_id = 0;
6726 }
6727 bitmap_clear (data->relevant);
6728 bitmap_clear (data->important_candidates);
6729
6730 for (i = 0; i < n_iv_uses (data); i++)
6731 {
6732 struct iv_use *use = iv_use (data, i);
6733
6734 free (use->iv);
6735 BITMAP_FREE (use->related_cands);
6736 for (j = 0; j < use->n_map_members; j++)
6737 if (use->cost_map[j].depends_on)
6738 BITMAP_FREE (use->cost_map[j].depends_on);
6739 free (use->cost_map);
6740 free (use);
6741 }
6742 data->iv_uses.truncate (0);
6743
6744 for (i = 0; i < n_iv_cands (data); i++)
6745 {
6746 struct iv_cand *cand = iv_cand (data, i);
6747
6748 free (cand->iv);
6749 if (cand->depends_on)
6750 BITMAP_FREE (cand->depends_on);
6751 free (cand);
6752 }
6753 data->iv_candidates.truncate (0);
6754
6755 if (data->version_info_size < num_ssa_names)
6756 {
6757 data->version_info_size = 2 * num_ssa_names;
6758 free (data->version_info);
6759 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
6760 }
6761
6762 data->max_inv_id = 0;
6763
6764 FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
6765 SET_DECL_RTL (obj, NULL_RTX);
6766
6767 decl_rtl_to_reset.truncate (0);
6768
6769 data->inv_expr_tab.empty ();
6770 data->inv_expr_id = 0;
6771 }
6772
6773 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
6774 loop tree. */
6775
6776 static void
6777 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
6778 {
6779 free_loop_data (data);
6780 free (data->version_info);
6781 BITMAP_FREE (data->relevant);
6782 BITMAP_FREE (data->important_candidates);
6783
6784 decl_rtl_to_reset.release ();
6785 data->iv_uses.release ();
6786 data->iv_candidates.release ();
6787 data->inv_expr_tab.dispose ();
6788 }
6789
6790 /* Returns true if the loop body BODY includes any function calls. */
6791
6792 static bool
6793 loop_body_includes_call (basic_block *body, unsigned num_nodes)
6794 {
6795 gimple_stmt_iterator gsi;
6796 unsigned i;
6797
6798 for (i = 0; i < num_nodes; i++)
6799 for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
6800 {
6801 gimple stmt = gsi_stmt (gsi);
6802 if (is_gimple_call (stmt)
6803 && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
6804 return true;
6805 }
6806 return false;
6807 }
6808
6809 /* Optimizes the LOOP. Returns true if anything changed. */
6810
6811 static bool
6812 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
6813 {
6814 bool changed = false;
6815 struct iv_ca *iv_ca;
6816 edge exit = single_dom_exit (loop);
6817 basic_block *body;
6818
6819 gcc_assert (!data->niters);
6820 data->current_loop = loop;
6821 data->speed = optimize_loop_for_speed_p (loop);
6822
6823 if (dump_file && (dump_flags & TDF_DETAILS))
6824 {
6825 fprintf (dump_file, "Processing loop %d\n", loop->num);
6826
6827 if (exit)
6828 {
6829 fprintf (dump_file, " single exit %d -> %d, exit condition ",
6830 exit->src->index, exit->dest->index);
6831 print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
6832 fprintf (dump_file, "\n");
6833 }
6834
6835 fprintf (dump_file, "\n");
6836 }
6837
6838 body = get_loop_body (loop);
6839 data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
6840 renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
6841 free (body);
6842
6843 data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
6844
6845 /* For each ssa name determines whether it behaves as an induction variable
6846 in some loop. */
6847 if (!find_induction_variables (data))
6848 goto finish;
6849
6850 /* Finds interesting uses (item 1). */
6851 find_interesting_uses (data);
6852 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
6853 goto finish;
6854
6855 /* Finds candidates for the induction variables (item 2). */
6856 find_iv_candidates (data);
6857
6858 /* Calculates the costs (item 3, part 1). */
6859 determine_iv_costs (data);
6860 determine_use_iv_costs (data);
6861 determine_set_costs (data);
6862
6863 /* Find the optimal set of induction variables (item 3, part 2). */
6864 iv_ca = find_optimal_iv_set (data);
6865 if (!iv_ca)
6866 goto finish;
6867 changed = true;
6868
6869 /* Create the new induction variables (item 4, part 1). */
6870 create_new_ivs (data, iv_ca);
6871 iv_ca_free (&iv_ca);
6872
6873 /* Rewrite the uses (item 4, part 2). */
6874 rewrite_uses (data);
6875
6876 /* Remove the ivs that are unused after rewriting. */
6877 remove_unused_ivs (data);
6878
6879 /* We have changed the structure of induction variables; it might happen
6880 that definitions in the scev database refer to some of them that were
6881 eliminated. */
6882 scev_reset ();
6883
6884 finish:
6885 free_loop_data (data);
6886
6887 return changed;
6888 }
6889
6890 /* Main entry point. Optimizes induction variables in loops. */
6891
6892 void
6893 tree_ssa_iv_optimize (void)
6894 {
6895 struct loop *loop;
6896 struct ivopts_data data;
6897
6898 tree_ssa_iv_optimize_init (&data);
6899
6900 /* Optimize the loops starting with the innermost ones. */
6901 FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
6902 {
6903 if (dump_file && (dump_flags & TDF_DETAILS))
6904 flow_loop_dump (loop, dump_file, NULL, 1);
6905
6906 tree_ssa_iv_optimize_loop (&data, loop);
6907 }
6908
6909 tree_ssa_iv_optimize_finalize (&data);
6910 }