]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/tree-ssa-loop-ivopts.c
Merger of git branch "gimple-classes-v2-option-3"
[thirdparty/gcc.git] / gcc / tree-ssa-loop-ivopts.c
1 /* Induction variable optimizations.
2 Copyright (C) 2003-2014 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
10
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
19
20 /* This pass tries to find the optimal set of induction variables for the loop.
21 It optimizes just the basic linear induction variables (although adding
22 support for other types should not be too hard). It includes the
23 optimizations commonly known as strength reduction, induction variable
24 coalescing and induction variable elimination. It does it in the
25 following steps:
26
27 1) The interesting uses of induction variables are found. This includes
28
29 -- uses of induction variables in non-linear expressions
30 -- addresses of arrays
31 -- comparisons of induction variables
32
33 2) Candidates for the induction variables are found. This includes
34
35 -- old induction variables
36 -- the variables defined by expressions derived from the "interesting
37 uses" above
38
39 3) The optimal (w.r. to a cost function) set of variables is chosen. The
40 cost function assigns a cost to sets of induction variables and consists
41 of three parts:
42
43 -- The use costs. Each of the interesting uses chooses the best induction
44 variable in the set and adds its cost to the sum. The cost reflects
45 the time spent on modifying the induction variables value to be usable
46 for the given purpose (adding base and offset for arrays, etc.).
47 -- The variable costs. Each of the variables has a cost assigned that
48 reflects the costs associated with incrementing the value of the
49 variable. The original variables are somewhat preferred.
50 -- The set cost. Depending on the size of the set, extra cost may be
51 added to reflect register pressure.
52
53 All the costs are defined in a machine-specific way, using the target
54 hooks and machine descriptions to determine them.
55
56 4) The trees are transformed to use the new variables, the dead code is
57 removed.
58
59 All of this is done loop by loop. Doing it globally is theoretically
60 possible, it might give a better performance and it might enable us
61 to decide costs more precisely, but getting all the interactions right
62 would be complicated. */
63
64 #include "config.h"
65 #include "system.h"
66 #include "coretypes.h"
67 #include "tm.h"
68 #include "tree.h"
69 #include "stor-layout.h"
70 #include "tm_p.h"
71 #include "predict.h"
72 #include "vec.h"
73 #include "hashtab.h"
74 #include "hash-set.h"
75 #include "machmode.h"
76 #include "hard-reg-set.h"
77 #include "input.h"
78 #include "function.h"
79 #include "dominance.h"
80 #include "cfg.h"
81 #include "basic-block.h"
82 #include "gimple-pretty-print.h"
83 #include "hash-map.h"
84 #include "hash-table.h"
85 #include "tree-ssa-alias.h"
86 #include "internal-fn.h"
87 #include "tree-eh.h"
88 #include "gimple-expr.h"
89 #include "is-a.h"
90 #include "gimple.h"
91 #include "gimplify.h"
92 #include "gimple-iterator.h"
93 #include "gimplify-me.h"
94 #include "gimple-ssa.h"
95 #include "plugin-api.h"
96 #include "ipa-ref.h"
97 #include "cgraph.h"
98 #include "tree-cfg.h"
99 #include "tree-phinodes.h"
100 #include "ssa-iterators.h"
101 #include "stringpool.h"
102 #include "tree-ssanames.h"
103 #include "tree-ssa-loop-ivopts.h"
104 #include "tree-ssa-loop-manip.h"
105 #include "tree-ssa-loop-niter.h"
106 #include "tree-ssa-loop.h"
107 #include "expr.h"
108 #include "tree-dfa.h"
109 #include "tree-ssa.h"
110 #include "cfgloop.h"
111 #include "tree-pass.h"
112 #include "insn-config.h"
113 #include "tree-chrec.h"
114 #include "tree-scalar-evolution.h"
115 #include "cfgloop.h"
116 #include "params.h"
117 #include "langhooks.h"
118 #include "tree-affine.h"
119 #include "target.h"
120 #include "tree-inline.h"
121 #include "tree-ssa-propagate.h"
122 #include "expmed.h"
123 #include "tree-ssa-address.h"
124 #include "builtins.h"
125
126 /* FIXME: Expressions are expanded to RTL in this pass to determine the
127 cost of different addressing modes. This should be moved to a TBD
128 interface between the GIMPLE and RTL worlds. */
129 #include "expr.h"
130 #include "recog.h"
131
132 /* The infinite cost. */
133 #define INFTY 10000000
134
135 #define AVG_LOOP_NITER(LOOP) 5
136
137 /* Returns the expected number of loop iterations for LOOP.
138 The average trip count is computed from profile data if it
139 exists. */
140
141 static inline HOST_WIDE_INT
142 avg_loop_niter (struct loop *loop)
143 {
144 HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
145 if (niter == -1)
146 return AVG_LOOP_NITER (loop);
147
148 return niter;
149 }
150
151 /* Representation of the induction variable. */
152 struct iv
153 {
154 tree base; /* Initial value of the iv. */
155 tree base_object; /* A memory object to that the induction variable points. */
156 tree step; /* Step of the iv (constant only). */
157 tree ssa_name; /* The ssa name with the value. */
158 bool biv_p; /* Is it a biv? */
159 bool have_use_for; /* Do we already have a use for it? */
160 unsigned use_id; /* The identifier in the use if it is the case. */
161 };
162
163 /* Per-ssa version information (induction variable descriptions, etc.). */
164 struct version_info
165 {
166 tree name; /* The ssa name. */
167 struct iv *iv; /* Induction variable description. */
168 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
169 an expression that is not an induction variable. */
170 bool preserve_biv; /* For the original biv, whether to preserve it. */
171 unsigned inv_id; /* Id of an invariant. */
172 };
173
174 /* Types of uses. */
175 enum use_type
176 {
177 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
178 USE_ADDRESS, /* Use in an address. */
179 USE_COMPARE /* Use is a compare. */
180 };
181
182 /* Cost of a computation. */
183 typedef struct
184 {
185 int cost; /* The runtime cost. */
186 unsigned complexity; /* The estimate of the complexity of the code for
187 the computation (in no concrete units --
188 complexity field should be larger for more
189 complex expressions and addressing modes). */
190 } comp_cost;
191
192 static const comp_cost no_cost = {0, 0};
193 static const comp_cost infinite_cost = {INFTY, INFTY};
194
195 /* The candidate - cost pair. */
196 struct cost_pair
197 {
198 struct iv_cand *cand; /* The candidate. */
199 comp_cost cost; /* The cost. */
200 bitmap depends_on; /* The list of invariants that have to be
201 preserved. */
202 tree value; /* For final value elimination, the expression for
203 the final value of the iv. For iv elimination,
204 the new bound to compare with. */
205 enum tree_code comp; /* For iv elimination, the comparison. */
206 int inv_expr_id; /* Loop invariant expression id. */
207 };
208
209 /* Use. */
210 struct iv_use
211 {
212 unsigned id; /* The id of the use. */
213 enum use_type type; /* Type of the use. */
214 struct iv *iv; /* The induction variable it is based on. */
215 gimple stmt; /* Statement in that it occurs. */
216 tree *op_p; /* The place where it occurs. */
217 bitmap related_cands; /* The set of "related" iv candidates, plus the common
218 important ones. */
219
220 unsigned n_map_members; /* Number of candidates in the cost_map list. */
221 struct cost_pair *cost_map;
222 /* The costs wrto the iv candidates. */
223
224 struct iv_cand *selected;
225 /* The selected candidate. */
226 };
227
228 /* The position where the iv is computed. */
229 enum iv_position
230 {
231 IP_NORMAL, /* At the end, just before the exit condition. */
232 IP_END, /* At the end of the latch block. */
233 IP_BEFORE_USE, /* Immediately before a specific use. */
234 IP_AFTER_USE, /* Immediately after a specific use. */
235 IP_ORIGINAL /* The original biv. */
236 };
237
238 /* The induction variable candidate. */
239 struct iv_cand
240 {
241 unsigned id; /* The number of the candidate. */
242 bool important; /* Whether this is an "important" candidate, i.e. such
243 that it should be considered by all uses. */
244 ENUM_BITFIELD(iv_position) pos : 8; /* Where it is computed. */
245 gimple incremented_at;/* For original biv, the statement where it is
246 incremented. */
247 tree var_before; /* The variable used for it before increment. */
248 tree var_after; /* The variable used for it after increment. */
249 struct iv *iv; /* The value of the candidate. NULL for
250 "pseudocandidate" used to indicate the possibility
251 to replace the final value of an iv by direct
252 computation of the value. */
253 unsigned cost; /* Cost of the candidate. */
254 unsigned cost_step; /* Cost of the candidate's increment operation. */
255 struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
256 where it is incremented. */
257 bitmap depends_on; /* The list of invariants that are used in step of the
258 biv. */
259 };
260
261 /* Loop invariant expression hashtable entry. */
262 struct iv_inv_expr_ent
263 {
264 tree expr;
265 int id;
266 hashval_t hash;
267 };
268
269 /* The data used by the induction variable optimizations. */
270
271 typedef struct iv_use *iv_use_p;
272
273 typedef struct iv_cand *iv_cand_p;
274
275 /* Hashtable helpers. */
276
277 struct iv_inv_expr_hasher : typed_free_remove <iv_inv_expr_ent>
278 {
279 typedef iv_inv_expr_ent value_type;
280 typedef iv_inv_expr_ent compare_type;
281 static inline hashval_t hash (const value_type *);
282 static inline bool equal (const value_type *, const compare_type *);
283 };
284
285 /* Hash function for loop invariant expressions. */
286
287 inline hashval_t
288 iv_inv_expr_hasher::hash (const value_type *expr)
289 {
290 return expr->hash;
291 }
292
293 /* Hash table equality function for expressions. */
294
295 inline bool
296 iv_inv_expr_hasher::equal (const value_type *expr1, const compare_type *expr2)
297 {
298 return expr1->hash == expr2->hash
299 && operand_equal_p (expr1->expr, expr2->expr, 0);
300 }
301
302 struct ivopts_data
303 {
304 /* The currently optimized loop. */
305 struct loop *current_loop;
306
307 /* Numbers of iterations for all exits of the current loop. */
308 hash_map<edge, tree_niter_desc *> *niters;
309
310 /* Number of registers used in it. */
311 unsigned regs_used;
312
313 /* The size of version_info array allocated. */
314 unsigned version_info_size;
315
316 /* The array of information for the ssa names. */
317 struct version_info *version_info;
318
319 /* The hashtable of loop invariant expressions created
320 by ivopt. */
321 hash_table<iv_inv_expr_hasher> *inv_expr_tab;
322
323 /* Loop invariant expression id. */
324 int inv_expr_id;
325
326 /* The bitmap of indices in version_info whose value was changed. */
327 bitmap relevant;
328
329 /* The uses of induction variables. */
330 vec<iv_use_p> iv_uses;
331
332 /* The candidates. */
333 vec<iv_cand_p> iv_candidates;
334
335 /* A bitmap of important candidates. */
336 bitmap important_candidates;
337
338 /* Cache used by tree_to_aff_combination_expand. */
339 hash_map<tree, name_expansion *> *name_expansion_cache;
340
341 /* The maximum invariant id. */
342 unsigned max_inv_id;
343
344 /* Whether to consider just related and important candidates when replacing a
345 use. */
346 bool consider_all_candidates;
347
348 /* Are we optimizing for speed? */
349 bool speed;
350
351 /* Whether the loop body includes any function calls. */
352 bool body_includes_call;
353
354 /* Whether the loop body can only be exited via single exit. */
355 bool loop_single_exit_p;
356 };
357
358 /* An assignment of iv candidates to uses. */
359
360 struct iv_ca
361 {
362 /* The number of uses covered by the assignment. */
363 unsigned upto;
364
365 /* Number of uses that cannot be expressed by the candidates in the set. */
366 unsigned bad_uses;
367
368 /* Candidate assigned to a use, together with the related costs. */
369 struct cost_pair **cand_for_use;
370
371 /* Number of times each candidate is used. */
372 unsigned *n_cand_uses;
373
374 /* The candidates used. */
375 bitmap cands;
376
377 /* The number of candidates in the set. */
378 unsigned n_cands;
379
380 /* Total number of registers needed. */
381 unsigned n_regs;
382
383 /* Total cost of expressing uses. */
384 comp_cost cand_use_cost;
385
386 /* Total cost of candidates. */
387 unsigned cand_cost;
388
389 /* Number of times each invariant is used. */
390 unsigned *n_invariant_uses;
391
392 /* The array holding the number of uses of each loop
393 invariant expressions created by ivopt. */
394 unsigned *used_inv_expr;
395
396 /* The number of created loop invariants. */
397 unsigned num_used_inv_expr;
398
399 /* Total cost of the assignment. */
400 comp_cost cost;
401 };
402
403 /* Difference of two iv candidate assignments. */
404
405 struct iv_ca_delta
406 {
407 /* Changed use. */
408 struct iv_use *use;
409
410 /* An old assignment (for rollback purposes). */
411 struct cost_pair *old_cp;
412
413 /* A new assignment. */
414 struct cost_pair *new_cp;
415
416 /* Next change in the list. */
417 struct iv_ca_delta *next_change;
418 };
419
420 /* Bound on number of candidates below that all candidates are considered. */
421
422 #define CONSIDER_ALL_CANDIDATES_BOUND \
423 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
424
425 /* If there are more iv occurrences, we just give up (it is quite unlikely that
426 optimizing such a loop would help, and it would take ages). */
427
428 #define MAX_CONSIDERED_USES \
429 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
430
431 /* If there are at most this number of ivs in the set, try removing unnecessary
432 ivs from the set always. */
433
434 #define ALWAYS_PRUNE_CAND_SET_BOUND \
435 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
436
437 /* The list of trees for that the decl_rtl field must be reset is stored
438 here. */
439
440 static vec<tree> decl_rtl_to_reset;
441
442 static comp_cost force_expr_to_var_cost (tree, bool);
443
444 /* Number of uses recorded in DATA. */
445
446 static inline unsigned
447 n_iv_uses (struct ivopts_data *data)
448 {
449 return data->iv_uses.length ();
450 }
451
452 /* Ith use recorded in DATA. */
453
454 static inline struct iv_use *
455 iv_use (struct ivopts_data *data, unsigned i)
456 {
457 return data->iv_uses[i];
458 }
459
460 /* Number of candidates recorded in DATA. */
461
462 static inline unsigned
463 n_iv_cands (struct ivopts_data *data)
464 {
465 return data->iv_candidates.length ();
466 }
467
468 /* Ith candidate recorded in DATA. */
469
470 static inline struct iv_cand *
471 iv_cand (struct ivopts_data *data, unsigned i)
472 {
473 return data->iv_candidates[i];
474 }
475
476 /* The single loop exit if it dominates the latch, NULL otherwise. */
477
478 edge
479 single_dom_exit (struct loop *loop)
480 {
481 edge exit = single_exit (loop);
482
483 if (!exit)
484 return NULL;
485
486 if (!just_once_each_iteration_p (loop, exit->src))
487 return NULL;
488
489 return exit;
490 }
491
492 /* Dumps information about the induction variable IV to FILE. */
493
494 void
495 dump_iv (FILE *file, struct iv *iv)
496 {
497 if (iv->ssa_name)
498 {
499 fprintf (file, "ssa name ");
500 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
501 fprintf (file, "\n");
502 }
503
504 fprintf (file, " type ");
505 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
506 fprintf (file, "\n");
507
508 if (iv->step)
509 {
510 fprintf (file, " base ");
511 print_generic_expr (file, iv->base, TDF_SLIM);
512 fprintf (file, "\n");
513
514 fprintf (file, " step ");
515 print_generic_expr (file, iv->step, TDF_SLIM);
516 fprintf (file, "\n");
517 }
518 else
519 {
520 fprintf (file, " invariant ");
521 print_generic_expr (file, iv->base, TDF_SLIM);
522 fprintf (file, "\n");
523 }
524
525 if (iv->base_object)
526 {
527 fprintf (file, " base object ");
528 print_generic_expr (file, iv->base_object, TDF_SLIM);
529 fprintf (file, "\n");
530 }
531
532 if (iv->biv_p)
533 fprintf (file, " is a biv\n");
534 }
535
536 /* Dumps information about the USE to FILE. */
537
538 void
539 dump_use (FILE *file, struct iv_use *use)
540 {
541 fprintf (file, "use %d\n", use->id);
542
543 switch (use->type)
544 {
545 case USE_NONLINEAR_EXPR:
546 fprintf (file, " generic\n");
547 break;
548
549 case USE_ADDRESS:
550 fprintf (file, " address\n");
551 break;
552
553 case USE_COMPARE:
554 fprintf (file, " compare\n");
555 break;
556
557 default:
558 gcc_unreachable ();
559 }
560
561 fprintf (file, " in statement ");
562 print_gimple_stmt (file, use->stmt, 0, 0);
563 fprintf (file, "\n");
564
565 fprintf (file, " at position ");
566 if (use->op_p)
567 print_generic_expr (file, *use->op_p, TDF_SLIM);
568 fprintf (file, "\n");
569
570 dump_iv (file, use->iv);
571
572 if (use->related_cands)
573 {
574 fprintf (file, " related candidates ");
575 dump_bitmap (file, use->related_cands);
576 }
577 }
578
579 /* Dumps information about the uses to FILE. */
580
581 void
582 dump_uses (FILE *file, struct ivopts_data *data)
583 {
584 unsigned i;
585 struct iv_use *use;
586
587 for (i = 0; i < n_iv_uses (data); i++)
588 {
589 use = iv_use (data, i);
590
591 dump_use (file, use);
592 fprintf (file, "\n");
593 }
594 }
595
596 /* Dumps information about induction variable candidate CAND to FILE. */
597
598 void
599 dump_cand (FILE *file, struct iv_cand *cand)
600 {
601 struct iv *iv = cand->iv;
602
603 fprintf (file, "candidate %d%s\n",
604 cand->id, cand->important ? " (important)" : "");
605
606 if (cand->depends_on)
607 {
608 fprintf (file, " depends on ");
609 dump_bitmap (file, cand->depends_on);
610 }
611
612 if (!iv)
613 {
614 fprintf (file, " final value replacement\n");
615 return;
616 }
617
618 if (cand->var_before)
619 {
620 fprintf (file, " var_before ");
621 print_generic_expr (file, cand->var_before, TDF_SLIM);
622 fprintf (file, "\n");
623 }
624 if (cand->var_after)
625 {
626 fprintf (file, " var_after ");
627 print_generic_expr (file, cand->var_after, TDF_SLIM);
628 fprintf (file, "\n");
629 }
630
631 switch (cand->pos)
632 {
633 case IP_NORMAL:
634 fprintf (file, " incremented before exit test\n");
635 break;
636
637 case IP_BEFORE_USE:
638 fprintf (file, " incremented before use %d\n", cand->ainc_use->id);
639 break;
640
641 case IP_AFTER_USE:
642 fprintf (file, " incremented after use %d\n", cand->ainc_use->id);
643 break;
644
645 case IP_END:
646 fprintf (file, " incremented at end\n");
647 break;
648
649 case IP_ORIGINAL:
650 fprintf (file, " original biv\n");
651 break;
652 }
653
654 dump_iv (file, iv);
655 }
656
657 /* Returns the info for ssa version VER. */
658
659 static inline struct version_info *
660 ver_info (struct ivopts_data *data, unsigned ver)
661 {
662 return data->version_info + ver;
663 }
664
665 /* Returns the info for ssa name NAME. */
666
667 static inline struct version_info *
668 name_info (struct ivopts_data *data, tree name)
669 {
670 return ver_info (data, SSA_NAME_VERSION (name));
671 }
672
673 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
674 emitted in LOOP. */
675
676 static bool
677 stmt_after_ip_normal_pos (struct loop *loop, gimple stmt)
678 {
679 basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
680
681 gcc_assert (bb);
682
683 if (sbb == loop->latch)
684 return true;
685
686 if (sbb != bb)
687 return false;
688
689 return stmt == last_stmt (bb);
690 }
691
692 /* Returns true if STMT if after the place where the original induction
693 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
694 if the positions are identical. */
695
696 static bool
697 stmt_after_inc_pos (struct iv_cand *cand, gimple stmt, bool true_if_equal)
698 {
699 basic_block cand_bb = gimple_bb (cand->incremented_at);
700 basic_block stmt_bb = gimple_bb (stmt);
701
702 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
703 return false;
704
705 if (stmt_bb != cand_bb)
706 return true;
707
708 if (true_if_equal
709 && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
710 return true;
711 return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
712 }
713
714 /* Returns true if STMT if after the place where the induction variable
715 CAND is incremented in LOOP. */
716
717 static bool
718 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple stmt)
719 {
720 switch (cand->pos)
721 {
722 case IP_END:
723 return false;
724
725 case IP_NORMAL:
726 return stmt_after_ip_normal_pos (loop, stmt);
727
728 case IP_ORIGINAL:
729 case IP_AFTER_USE:
730 return stmt_after_inc_pos (cand, stmt, false);
731
732 case IP_BEFORE_USE:
733 return stmt_after_inc_pos (cand, stmt, true);
734
735 default:
736 gcc_unreachable ();
737 }
738 }
739
740 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
741
742 static bool
743 abnormal_ssa_name_p (tree exp)
744 {
745 if (!exp)
746 return false;
747
748 if (TREE_CODE (exp) != SSA_NAME)
749 return false;
750
751 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
752 }
753
754 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
755 abnormal phi node. Callback for for_each_index. */
756
757 static bool
758 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
759 void *data ATTRIBUTE_UNUSED)
760 {
761 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
762 {
763 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
764 return false;
765 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
766 return false;
767 }
768
769 return !abnormal_ssa_name_p (*index);
770 }
771
772 /* Returns true if EXPR contains a ssa name that occurs in an
773 abnormal phi node. */
774
775 bool
776 contains_abnormal_ssa_name_p (tree expr)
777 {
778 enum tree_code code;
779 enum tree_code_class codeclass;
780
781 if (!expr)
782 return false;
783
784 code = TREE_CODE (expr);
785 codeclass = TREE_CODE_CLASS (code);
786
787 if (code == SSA_NAME)
788 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
789
790 if (code == INTEGER_CST
791 || is_gimple_min_invariant (expr))
792 return false;
793
794 if (code == ADDR_EXPR)
795 return !for_each_index (&TREE_OPERAND (expr, 0),
796 idx_contains_abnormal_ssa_name_p,
797 NULL);
798
799 if (code == COND_EXPR)
800 return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
801 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
802 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
803
804 switch (codeclass)
805 {
806 case tcc_binary:
807 case tcc_comparison:
808 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
809 return true;
810
811 /* Fallthru. */
812 case tcc_unary:
813 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
814 return true;
815
816 break;
817
818 default:
819 gcc_unreachable ();
820 }
821
822 return false;
823 }
824
825 /* Returns the structure describing number of iterations determined from
826 EXIT of DATA->current_loop, or NULL if something goes wrong. */
827
828 static struct tree_niter_desc *
829 niter_for_exit (struct ivopts_data *data, edge exit)
830 {
831 struct tree_niter_desc *desc;
832 tree_niter_desc **slot;
833
834 if (!data->niters)
835 {
836 data->niters = new hash_map<edge, tree_niter_desc *>;
837 slot = NULL;
838 }
839 else
840 slot = data->niters->get (exit);
841
842 if (!slot)
843 {
844 /* Try to determine number of iterations. We cannot safely work with ssa
845 names that appear in phi nodes on abnormal edges, so that we do not
846 create overlapping life ranges for them (PR 27283). */
847 desc = XNEW (struct tree_niter_desc);
848 if (!number_of_iterations_exit (data->current_loop,
849 exit, desc, true)
850 || contains_abnormal_ssa_name_p (desc->niter))
851 {
852 XDELETE (desc);
853 desc = NULL;
854 }
855 data->niters->put (exit, desc);
856 }
857 else
858 desc = *slot;
859
860 return desc;
861 }
862
863 /* Returns the structure describing number of iterations determined from
864 single dominating exit of DATA->current_loop, or NULL if something
865 goes wrong. */
866
867 static struct tree_niter_desc *
868 niter_for_single_dom_exit (struct ivopts_data *data)
869 {
870 edge exit = single_dom_exit (data->current_loop);
871
872 if (!exit)
873 return NULL;
874
875 return niter_for_exit (data, exit);
876 }
877
878 /* Initializes data structures used by the iv optimization pass, stored
879 in DATA. */
880
881 static void
882 tree_ssa_iv_optimize_init (struct ivopts_data *data)
883 {
884 data->version_info_size = 2 * num_ssa_names;
885 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
886 data->relevant = BITMAP_ALLOC (NULL);
887 data->important_candidates = BITMAP_ALLOC (NULL);
888 data->max_inv_id = 0;
889 data->niters = NULL;
890 data->iv_uses.create (20);
891 data->iv_candidates.create (20);
892 data->inv_expr_tab = new hash_table<iv_inv_expr_hasher> (10);
893 data->inv_expr_id = 0;
894 data->name_expansion_cache = NULL;
895 decl_rtl_to_reset.create (20);
896 }
897
898 /* Returns a memory object to that EXPR points. In case we are able to
899 determine that it does not point to any such object, NULL is returned. */
900
901 static tree
902 determine_base_object (tree expr)
903 {
904 enum tree_code code = TREE_CODE (expr);
905 tree base, obj;
906
907 /* If this is a pointer casted to any type, we need to determine
908 the base object for the pointer; so handle conversions before
909 throwing away non-pointer expressions. */
910 if (CONVERT_EXPR_P (expr))
911 return determine_base_object (TREE_OPERAND (expr, 0));
912
913 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
914 return NULL_TREE;
915
916 switch (code)
917 {
918 case INTEGER_CST:
919 return NULL_TREE;
920
921 case ADDR_EXPR:
922 obj = TREE_OPERAND (expr, 0);
923 base = get_base_address (obj);
924
925 if (!base)
926 return expr;
927
928 if (TREE_CODE (base) == MEM_REF)
929 return determine_base_object (TREE_OPERAND (base, 0));
930
931 return fold_convert (ptr_type_node,
932 build_fold_addr_expr (base));
933
934 case POINTER_PLUS_EXPR:
935 return determine_base_object (TREE_OPERAND (expr, 0));
936
937 case PLUS_EXPR:
938 case MINUS_EXPR:
939 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
940 gcc_unreachable ();
941
942 default:
943 return fold_convert (ptr_type_node, expr);
944 }
945 }
946
947 /* Return true if address expression with non-DECL_P operand appears
948 in EXPR. */
949
950 static bool
951 contain_complex_addr_expr (tree expr)
952 {
953 bool res = false;
954
955 STRIP_NOPS (expr);
956 switch (TREE_CODE (expr))
957 {
958 case POINTER_PLUS_EXPR:
959 case PLUS_EXPR:
960 case MINUS_EXPR:
961 res |= contain_complex_addr_expr (TREE_OPERAND (expr, 0));
962 res |= contain_complex_addr_expr (TREE_OPERAND (expr, 1));
963 break;
964
965 case ADDR_EXPR:
966 return (!DECL_P (TREE_OPERAND (expr, 0)));
967
968 default:
969 return false;
970 }
971
972 return res;
973 }
974
975 /* Allocates an induction variable with given initial value BASE and step STEP
976 for loop LOOP. */
977
978 static struct iv *
979 alloc_iv (tree base, tree step)
980 {
981 tree expr = base;
982 struct iv *iv = XCNEW (struct iv);
983 gcc_assert (step != NULL_TREE);
984
985 /* Lower address expression in base except ones with DECL_P as operand.
986 By doing this:
987 1) More accurate cost can be computed for address expressions;
988 2) Duplicate candidates won't be created for bases in different
989 forms, like &a[0] and &a. */
990 STRIP_NOPS (expr);
991 if ((TREE_CODE (expr) == ADDR_EXPR && !DECL_P (TREE_OPERAND (expr, 0)))
992 || contain_complex_addr_expr (expr))
993 {
994 aff_tree comb;
995 tree_to_aff_combination (expr, TREE_TYPE (base), &comb);
996 base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
997 }
998
999 iv->base = base;
1000 iv->base_object = determine_base_object (base);
1001 iv->step = step;
1002 iv->biv_p = false;
1003 iv->have_use_for = false;
1004 iv->use_id = 0;
1005 iv->ssa_name = NULL_TREE;
1006
1007 return iv;
1008 }
1009
1010 /* Sets STEP and BASE for induction variable IV. */
1011
1012 static void
1013 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
1014 {
1015 struct version_info *info = name_info (data, iv);
1016
1017 gcc_assert (!info->iv);
1018
1019 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
1020 info->iv = alloc_iv (base, step);
1021 info->iv->ssa_name = iv;
1022 }
1023
1024 /* Finds induction variable declaration for VAR. */
1025
1026 static struct iv *
1027 get_iv (struct ivopts_data *data, tree var)
1028 {
1029 basic_block bb;
1030 tree type = TREE_TYPE (var);
1031
1032 if (!POINTER_TYPE_P (type)
1033 && !INTEGRAL_TYPE_P (type))
1034 return NULL;
1035
1036 if (!name_info (data, var)->iv)
1037 {
1038 bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1039
1040 if (!bb
1041 || !flow_bb_inside_loop_p (data->current_loop, bb))
1042 set_iv (data, var, var, build_int_cst (type, 0));
1043 }
1044
1045 return name_info (data, var)->iv;
1046 }
1047
1048 /* Determines the step of a biv defined in PHI. Returns NULL if PHI does
1049 not define a simple affine biv with nonzero step. */
1050
1051 static tree
1052 determine_biv_step (gphi *phi)
1053 {
1054 struct loop *loop = gimple_bb (phi)->loop_father;
1055 tree name = PHI_RESULT (phi);
1056 affine_iv iv;
1057
1058 if (virtual_operand_p (name))
1059 return NULL_TREE;
1060
1061 if (!simple_iv (loop, loop, name, &iv, true))
1062 return NULL_TREE;
1063
1064 return integer_zerop (iv.step) ? NULL_TREE : iv.step;
1065 }
1066
1067 /* Finds basic ivs. */
1068
1069 static bool
1070 find_bivs (struct ivopts_data *data)
1071 {
1072 gphi *phi;
1073 tree step, type, base;
1074 bool found = false;
1075 struct loop *loop = data->current_loop;
1076 gphi_iterator psi;
1077
1078 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1079 {
1080 phi = psi.phi ();
1081
1082 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1083 continue;
1084
1085 step = determine_biv_step (phi);
1086 if (!step)
1087 continue;
1088
1089 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1090 base = expand_simple_operations (base);
1091 if (contains_abnormal_ssa_name_p (base)
1092 || contains_abnormal_ssa_name_p (step))
1093 continue;
1094
1095 type = TREE_TYPE (PHI_RESULT (phi));
1096 base = fold_convert (type, base);
1097 if (step)
1098 {
1099 if (POINTER_TYPE_P (type))
1100 step = convert_to_ptrofftype (step);
1101 else
1102 step = fold_convert (type, step);
1103 }
1104
1105 set_iv (data, PHI_RESULT (phi), base, step);
1106 found = true;
1107 }
1108
1109 return found;
1110 }
1111
1112 /* Marks basic ivs. */
1113
1114 static void
1115 mark_bivs (struct ivopts_data *data)
1116 {
1117 gphi *phi;
1118 gimple def;
1119 tree var;
1120 struct iv *iv, *incr_iv;
1121 struct loop *loop = data->current_loop;
1122 basic_block incr_bb;
1123 gphi_iterator psi;
1124
1125 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1126 {
1127 phi = psi.phi ();
1128
1129 iv = get_iv (data, PHI_RESULT (phi));
1130 if (!iv)
1131 continue;
1132
1133 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1134 def = SSA_NAME_DEF_STMT (var);
1135 /* Don't mark iv peeled from other one as biv. */
1136 if (def
1137 && gimple_code (def) == GIMPLE_PHI
1138 && gimple_bb (def) == loop->header)
1139 continue;
1140
1141 incr_iv = get_iv (data, var);
1142 if (!incr_iv)
1143 continue;
1144
1145 /* If the increment is in the subloop, ignore it. */
1146 incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1147 if (incr_bb->loop_father != data->current_loop
1148 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1149 continue;
1150
1151 iv->biv_p = true;
1152 incr_iv->biv_p = true;
1153 }
1154 }
1155
1156 /* Checks whether STMT defines a linear induction variable and stores its
1157 parameters to IV. */
1158
1159 static bool
1160 find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv)
1161 {
1162 tree lhs;
1163 struct loop *loop = data->current_loop;
1164
1165 iv->base = NULL_TREE;
1166 iv->step = NULL_TREE;
1167
1168 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1169 return false;
1170
1171 lhs = gimple_assign_lhs (stmt);
1172 if (TREE_CODE (lhs) != SSA_NAME)
1173 return false;
1174
1175 if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1176 return false;
1177 iv->base = expand_simple_operations (iv->base);
1178
1179 if (contains_abnormal_ssa_name_p (iv->base)
1180 || contains_abnormal_ssa_name_p (iv->step))
1181 return false;
1182
1183 /* If STMT could throw, then do not consider STMT as defining a GIV.
1184 While this will suppress optimizations, we can not safely delete this
1185 GIV and associated statements, even if it appears it is not used. */
1186 if (stmt_could_throw_p (stmt))
1187 return false;
1188
1189 return true;
1190 }
1191
1192 /* Finds general ivs in statement STMT. */
1193
1194 static void
1195 find_givs_in_stmt (struct ivopts_data *data, gimple stmt)
1196 {
1197 affine_iv iv;
1198
1199 if (!find_givs_in_stmt_scev (data, stmt, &iv))
1200 return;
1201
1202 set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step);
1203 }
1204
1205 /* Finds general ivs in basic block BB. */
1206
1207 static void
1208 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1209 {
1210 gimple_stmt_iterator bsi;
1211
1212 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1213 find_givs_in_stmt (data, gsi_stmt (bsi));
1214 }
1215
1216 /* Finds general ivs. */
1217
1218 static void
1219 find_givs (struct ivopts_data *data)
1220 {
1221 struct loop *loop = data->current_loop;
1222 basic_block *body = get_loop_body_in_dom_order (loop);
1223 unsigned i;
1224
1225 for (i = 0; i < loop->num_nodes; i++)
1226 find_givs_in_bb (data, body[i]);
1227 free (body);
1228 }
1229
1230 /* For each ssa name defined in LOOP determines whether it is an induction
1231 variable and if so, its initial value and step. */
1232
1233 static bool
1234 find_induction_variables (struct ivopts_data *data)
1235 {
1236 unsigned i;
1237 bitmap_iterator bi;
1238
1239 if (!find_bivs (data))
1240 return false;
1241
1242 find_givs (data);
1243 mark_bivs (data);
1244
1245 if (dump_file && (dump_flags & TDF_DETAILS))
1246 {
1247 struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
1248
1249 if (niter)
1250 {
1251 fprintf (dump_file, " number of iterations ");
1252 print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1253 if (!integer_zerop (niter->may_be_zero))
1254 {
1255 fprintf (dump_file, "; zero if ");
1256 print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1257 }
1258 fprintf (dump_file, "\n\n");
1259 };
1260
1261 fprintf (dump_file, "Induction variables:\n\n");
1262
1263 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1264 {
1265 if (ver_info (data, i)->iv)
1266 dump_iv (dump_file, ver_info (data, i)->iv);
1267 }
1268 }
1269
1270 return true;
1271 }
1272
1273 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1274
1275 static struct iv_use *
1276 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1277 gimple stmt, enum use_type use_type)
1278 {
1279 struct iv_use *use = XCNEW (struct iv_use);
1280
1281 use->id = n_iv_uses (data);
1282 use->type = use_type;
1283 use->iv = iv;
1284 use->stmt = stmt;
1285 use->op_p = use_p;
1286 use->related_cands = BITMAP_ALLOC (NULL);
1287
1288 /* To avoid showing ssa name in the dumps, if it was not reset by the
1289 caller. */
1290 iv->ssa_name = NULL_TREE;
1291
1292 if (dump_file && (dump_flags & TDF_DETAILS))
1293 dump_use (dump_file, use);
1294
1295 data->iv_uses.safe_push (use);
1296
1297 return use;
1298 }
1299
1300 /* Checks whether OP is a loop-level invariant and if so, records it.
1301 NONLINEAR_USE is true if the invariant is used in a way we do not
1302 handle specially. */
1303
1304 static void
1305 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1306 {
1307 basic_block bb;
1308 struct version_info *info;
1309
1310 if (TREE_CODE (op) != SSA_NAME
1311 || virtual_operand_p (op))
1312 return;
1313
1314 bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1315 if (bb
1316 && flow_bb_inside_loop_p (data->current_loop, bb))
1317 return;
1318
1319 info = name_info (data, op);
1320 info->name = op;
1321 info->has_nonlin_use |= nonlinear_use;
1322 if (!info->inv_id)
1323 info->inv_id = ++data->max_inv_id;
1324 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1325 }
1326
1327 /* Checks whether the use OP is interesting and if so, records it. */
1328
1329 static struct iv_use *
1330 find_interesting_uses_op (struct ivopts_data *data, tree op)
1331 {
1332 struct iv *iv;
1333 struct iv *civ;
1334 gimple stmt;
1335 struct iv_use *use;
1336
1337 if (TREE_CODE (op) != SSA_NAME)
1338 return NULL;
1339
1340 iv = get_iv (data, op);
1341 if (!iv)
1342 return NULL;
1343
1344 if (iv->have_use_for)
1345 {
1346 use = iv_use (data, iv->use_id);
1347
1348 gcc_assert (use->type == USE_NONLINEAR_EXPR);
1349 return use;
1350 }
1351
1352 if (integer_zerop (iv->step))
1353 {
1354 record_invariant (data, op, true);
1355 return NULL;
1356 }
1357 iv->have_use_for = true;
1358
1359 civ = XNEW (struct iv);
1360 *civ = *iv;
1361
1362 stmt = SSA_NAME_DEF_STMT (op);
1363 gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1364 || is_gimple_assign (stmt));
1365
1366 use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1367 iv->use_id = use->id;
1368
1369 return use;
1370 }
1371
1372 /* Given a condition in statement STMT, checks whether it is a compare
1373 of an induction variable and an invariant. If this is the case,
1374 CONTROL_VAR is set to location of the iv, BOUND to the location of
1375 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1376 induction variable descriptions, and true is returned. If this is not
1377 the case, CONTROL_VAR and BOUND are set to the arguments of the
1378 condition and false is returned. */
1379
1380 static bool
1381 extract_cond_operands (struct ivopts_data *data, gimple stmt,
1382 tree **control_var, tree **bound,
1383 struct iv **iv_var, struct iv **iv_bound)
1384 {
1385 /* The objects returned when COND has constant operands. */
1386 static struct iv const_iv;
1387 static tree zero;
1388 tree *op0 = &zero, *op1 = &zero, *tmp_op;
1389 struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1390 bool ret = false;
1391
1392 if (gimple_code (stmt) == GIMPLE_COND)
1393 {
1394 gcond *cond_stmt = as_a <gcond *> (stmt);
1395 op0 = gimple_cond_lhs_ptr (cond_stmt);
1396 op1 = gimple_cond_rhs_ptr (cond_stmt);
1397 }
1398 else
1399 {
1400 op0 = gimple_assign_rhs1_ptr (stmt);
1401 op1 = gimple_assign_rhs2_ptr (stmt);
1402 }
1403
1404 zero = integer_zero_node;
1405 const_iv.step = integer_zero_node;
1406
1407 if (TREE_CODE (*op0) == SSA_NAME)
1408 iv0 = get_iv (data, *op0);
1409 if (TREE_CODE (*op1) == SSA_NAME)
1410 iv1 = get_iv (data, *op1);
1411
1412 /* Exactly one of the compared values must be an iv, and the other one must
1413 be an invariant. */
1414 if (!iv0 || !iv1)
1415 goto end;
1416
1417 if (integer_zerop (iv0->step))
1418 {
1419 /* Control variable may be on the other side. */
1420 tmp_op = op0; op0 = op1; op1 = tmp_op;
1421 tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1422 }
1423 ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1424
1425 end:
1426 if (control_var)
1427 *control_var = op0;;
1428 if (iv_var)
1429 *iv_var = iv0;;
1430 if (bound)
1431 *bound = op1;
1432 if (iv_bound)
1433 *iv_bound = iv1;
1434
1435 return ret;
1436 }
1437
1438 /* Checks whether the condition in STMT is interesting and if so,
1439 records it. */
1440
1441 static void
1442 find_interesting_uses_cond (struct ivopts_data *data, gimple stmt)
1443 {
1444 tree *var_p, *bound_p;
1445 struct iv *var_iv, *civ;
1446
1447 if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1448 {
1449 find_interesting_uses_op (data, *var_p);
1450 find_interesting_uses_op (data, *bound_p);
1451 return;
1452 }
1453
1454 civ = XNEW (struct iv);
1455 *civ = *var_iv;
1456 record_use (data, NULL, civ, stmt, USE_COMPARE);
1457 }
1458
1459 /* Returns the outermost loop EXPR is obviously invariant in
1460 relative to the loop LOOP, i.e. if all its operands are defined
1461 outside of the returned loop. Returns NULL if EXPR is not
1462 even obviously invariant in LOOP. */
1463
1464 struct loop *
1465 outermost_invariant_loop_for_expr (struct loop *loop, tree expr)
1466 {
1467 basic_block def_bb;
1468 unsigned i, len;
1469
1470 if (is_gimple_min_invariant (expr))
1471 return current_loops->tree_root;
1472
1473 if (TREE_CODE (expr) == SSA_NAME)
1474 {
1475 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1476 if (def_bb)
1477 {
1478 if (flow_bb_inside_loop_p (loop, def_bb))
1479 return NULL;
1480 return superloop_at_depth (loop,
1481 loop_depth (def_bb->loop_father) + 1);
1482 }
1483
1484 return current_loops->tree_root;
1485 }
1486
1487 if (!EXPR_P (expr))
1488 return NULL;
1489
1490 unsigned maxdepth = 0;
1491 len = TREE_OPERAND_LENGTH (expr);
1492 for (i = 0; i < len; i++)
1493 {
1494 struct loop *ivloop;
1495 if (!TREE_OPERAND (expr, i))
1496 continue;
1497
1498 ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1499 if (!ivloop)
1500 return NULL;
1501 maxdepth = MAX (maxdepth, loop_depth (ivloop));
1502 }
1503
1504 return superloop_at_depth (loop, maxdepth);
1505 }
1506
1507 /* Returns true if expression EXPR is obviously invariant in LOOP,
1508 i.e. if all its operands are defined outside of the LOOP. LOOP
1509 should not be the function body. */
1510
1511 bool
1512 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1513 {
1514 basic_block def_bb;
1515 unsigned i, len;
1516
1517 gcc_assert (loop_depth (loop) > 0);
1518
1519 if (is_gimple_min_invariant (expr))
1520 return true;
1521
1522 if (TREE_CODE (expr) == SSA_NAME)
1523 {
1524 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1525 if (def_bb
1526 && flow_bb_inside_loop_p (loop, def_bb))
1527 return false;
1528
1529 return true;
1530 }
1531
1532 if (!EXPR_P (expr))
1533 return false;
1534
1535 len = TREE_OPERAND_LENGTH (expr);
1536 for (i = 0; i < len; i++)
1537 if (TREE_OPERAND (expr, i)
1538 && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1539 return false;
1540
1541 return true;
1542 }
1543
1544 /* Cumulates the steps of indices into DATA and replaces their values with the
1545 initial ones. Returns false when the value of the index cannot be determined.
1546 Callback for for_each_index. */
1547
1548 struct ifs_ivopts_data
1549 {
1550 struct ivopts_data *ivopts_data;
1551 gimple stmt;
1552 tree step;
1553 };
1554
1555 static bool
1556 idx_find_step (tree base, tree *idx, void *data)
1557 {
1558 struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1559 struct iv *iv;
1560 tree step, iv_base, iv_step, lbound, off;
1561 struct loop *loop = dta->ivopts_data->current_loop;
1562
1563 /* If base is a component ref, require that the offset of the reference
1564 be invariant. */
1565 if (TREE_CODE (base) == COMPONENT_REF)
1566 {
1567 off = component_ref_field_offset (base);
1568 return expr_invariant_in_loop_p (loop, off);
1569 }
1570
1571 /* If base is array, first check whether we will be able to move the
1572 reference out of the loop (in order to take its address in strength
1573 reduction). In order for this to work we need both lower bound
1574 and step to be loop invariants. */
1575 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1576 {
1577 /* Moreover, for a range, the size needs to be invariant as well. */
1578 if (TREE_CODE (base) == ARRAY_RANGE_REF
1579 && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1580 return false;
1581
1582 step = array_ref_element_size (base);
1583 lbound = array_ref_low_bound (base);
1584
1585 if (!expr_invariant_in_loop_p (loop, step)
1586 || !expr_invariant_in_loop_p (loop, lbound))
1587 return false;
1588 }
1589
1590 if (TREE_CODE (*idx) != SSA_NAME)
1591 return true;
1592
1593 iv = get_iv (dta->ivopts_data, *idx);
1594 if (!iv)
1595 return false;
1596
1597 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1598 *&x[0], which is not folded and does not trigger the
1599 ARRAY_REF path below. */
1600 *idx = iv->base;
1601
1602 if (integer_zerop (iv->step))
1603 return true;
1604
1605 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1606 {
1607 step = array_ref_element_size (base);
1608
1609 /* We only handle addresses whose step is an integer constant. */
1610 if (TREE_CODE (step) != INTEGER_CST)
1611 return false;
1612 }
1613 else
1614 /* The step for pointer arithmetics already is 1 byte. */
1615 step = size_one_node;
1616
1617 iv_base = iv->base;
1618 iv_step = iv->step;
1619 if (!convert_affine_scev (dta->ivopts_data->current_loop,
1620 sizetype, &iv_base, &iv_step, dta->stmt,
1621 false))
1622 {
1623 /* The index might wrap. */
1624 return false;
1625 }
1626
1627 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1628 dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1629
1630 return true;
1631 }
1632
1633 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1634 object is passed to it in DATA. */
1635
1636 static bool
1637 idx_record_use (tree base, tree *idx,
1638 void *vdata)
1639 {
1640 struct ivopts_data *data = (struct ivopts_data *) vdata;
1641 find_interesting_uses_op (data, *idx);
1642 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1643 {
1644 find_interesting_uses_op (data, array_ref_element_size (base));
1645 find_interesting_uses_op (data, array_ref_low_bound (base));
1646 }
1647 return true;
1648 }
1649
1650 /* If we can prove that TOP = cst * BOT for some constant cst,
1651 store cst to MUL and return true. Otherwise return false.
1652 The returned value is always sign-extended, regardless of the
1653 signedness of TOP and BOT. */
1654
1655 static bool
1656 constant_multiple_of (tree top, tree bot, widest_int *mul)
1657 {
1658 tree mby;
1659 enum tree_code code;
1660 unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1661 widest_int res, p0, p1;
1662
1663 STRIP_NOPS (top);
1664 STRIP_NOPS (bot);
1665
1666 if (operand_equal_p (top, bot, 0))
1667 {
1668 *mul = 1;
1669 return true;
1670 }
1671
1672 code = TREE_CODE (top);
1673 switch (code)
1674 {
1675 case MULT_EXPR:
1676 mby = TREE_OPERAND (top, 1);
1677 if (TREE_CODE (mby) != INTEGER_CST)
1678 return false;
1679
1680 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1681 return false;
1682
1683 *mul = wi::sext (res * wi::to_widest (mby), precision);
1684 return true;
1685
1686 case PLUS_EXPR:
1687 case MINUS_EXPR:
1688 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1689 || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1690 return false;
1691
1692 if (code == MINUS_EXPR)
1693 p1 = -p1;
1694 *mul = wi::sext (p0 + p1, precision);
1695 return true;
1696
1697 case INTEGER_CST:
1698 if (TREE_CODE (bot) != INTEGER_CST)
1699 return false;
1700
1701 p0 = widest_int::from (top, SIGNED);
1702 p1 = widest_int::from (bot, SIGNED);
1703 if (p1 == 0)
1704 return false;
1705 *mul = wi::sext (wi::divmod_trunc (p0, p1, SIGNED, &res), precision);
1706 return res == 0;
1707
1708 default:
1709 return false;
1710 }
1711 }
1712
1713 /* Return true if memory reference REF with step STEP may be unaligned. */
1714
1715 static bool
1716 may_be_unaligned_p (tree ref, tree step)
1717 {
1718 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1719 thus they are not misaligned. */
1720 if (TREE_CODE (ref) == TARGET_MEM_REF)
1721 return false;
1722
1723 unsigned int align = TYPE_ALIGN (TREE_TYPE (ref));
1724 if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))) > align)
1725 align = GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref)));
1726
1727 unsigned HOST_WIDE_INT bitpos;
1728 unsigned int ref_align;
1729 get_object_alignment_1 (ref, &ref_align, &bitpos);
1730 if (ref_align < align
1731 || (bitpos % align) != 0
1732 || (bitpos % BITS_PER_UNIT) != 0)
1733 return true;
1734
1735 unsigned int trailing_zeros = tree_ctz (step);
1736 if (trailing_zeros < HOST_BITS_PER_INT
1737 && (1U << trailing_zeros) * BITS_PER_UNIT < align)
1738 return true;
1739
1740 return false;
1741 }
1742
1743 /* Return true if EXPR may be non-addressable. */
1744
1745 bool
1746 may_be_nonaddressable_p (tree expr)
1747 {
1748 switch (TREE_CODE (expr))
1749 {
1750 case TARGET_MEM_REF:
1751 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1752 target, thus they are always addressable. */
1753 return false;
1754
1755 case COMPONENT_REF:
1756 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1757 || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1758
1759 case VIEW_CONVERT_EXPR:
1760 /* This kind of view-conversions may wrap non-addressable objects
1761 and make them look addressable. After some processing the
1762 non-addressability may be uncovered again, causing ADDR_EXPRs
1763 of inappropriate objects to be built. */
1764 if (is_gimple_reg (TREE_OPERAND (expr, 0))
1765 || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
1766 return true;
1767
1768 /* ... fall through ... */
1769
1770 case ARRAY_REF:
1771 case ARRAY_RANGE_REF:
1772 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1773
1774 CASE_CONVERT:
1775 return true;
1776
1777 default:
1778 break;
1779 }
1780
1781 return false;
1782 }
1783
1784 /* Finds addresses in *OP_P inside STMT. */
1785
1786 static void
1787 find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p)
1788 {
1789 tree base = *op_p, step = size_zero_node;
1790 struct iv *civ;
1791 struct ifs_ivopts_data ifs_ivopts_data;
1792
1793 /* Do not play with volatile memory references. A bit too conservative,
1794 perhaps, but safe. */
1795 if (gimple_has_volatile_ops (stmt))
1796 goto fail;
1797
1798 /* Ignore bitfields for now. Not really something terribly complicated
1799 to handle. TODO. */
1800 if (TREE_CODE (base) == BIT_FIELD_REF)
1801 goto fail;
1802
1803 base = unshare_expr (base);
1804
1805 if (TREE_CODE (base) == TARGET_MEM_REF)
1806 {
1807 tree type = build_pointer_type (TREE_TYPE (base));
1808 tree astep;
1809
1810 if (TMR_BASE (base)
1811 && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1812 {
1813 civ = get_iv (data, TMR_BASE (base));
1814 if (!civ)
1815 goto fail;
1816
1817 TMR_BASE (base) = civ->base;
1818 step = civ->step;
1819 }
1820 if (TMR_INDEX2 (base)
1821 && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
1822 {
1823 civ = get_iv (data, TMR_INDEX2 (base));
1824 if (!civ)
1825 goto fail;
1826
1827 TMR_INDEX2 (base) = civ->base;
1828 step = civ->step;
1829 }
1830 if (TMR_INDEX (base)
1831 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1832 {
1833 civ = get_iv (data, TMR_INDEX (base));
1834 if (!civ)
1835 goto fail;
1836
1837 TMR_INDEX (base) = civ->base;
1838 astep = civ->step;
1839
1840 if (astep)
1841 {
1842 if (TMR_STEP (base))
1843 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1844
1845 step = fold_build2 (PLUS_EXPR, type, step, astep);
1846 }
1847 }
1848
1849 if (integer_zerop (step))
1850 goto fail;
1851 base = tree_mem_ref_addr (type, base);
1852 }
1853 else
1854 {
1855 ifs_ivopts_data.ivopts_data = data;
1856 ifs_ivopts_data.stmt = stmt;
1857 ifs_ivopts_data.step = size_zero_node;
1858 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1859 || integer_zerop (ifs_ivopts_data.step))
1860 goto fail;
1861 step = ifs_ivopts_data.step;
1862
1863 /* Check that the base expression is addressable. This needs
1864 to be done after substituting bases of IVs into it. */
1865 if (may_be_nonaddressable_p (base))
1866 goto fail;
1867
1868 /* Moreover, on strict alignment platforms, check that it is
1869 sufficiently aligned. */
1870 if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
1871 goto fail;
1872
1873 base = build_fold_addr_expr (base);
1874
1875 /* Substituting bases of IVs into the base expression might
1876 have caused folding opportunities. */
1877 if (TREE_CODE (base) == ADDR_EXPR)
1878 {
1879 tree *ref = &TREE_OPERAND (base, 0);
1880 while (handled_component_p (*ref))
1881 ref = &TREE_OPERAND (*ref, 0);
1882 if (TREE_CODE (*ref) == MEM_REF)
1883 {
1884 tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
1885 TREE_OPERAND (*ref, 0),
1886 TREE_OPERAND (*ref, 1));
1887 if (tem)
1888 *ref = tem;
1889 }
1890 }
1891 }
1892
1893 civ = alloc_iv (base, step);
1894 record_use (data, op_p, civ, stmt, USE_ADDRESS);
1895 return;
1896
1897 fail:
1898 for_each_index (op_p, idx_record_use, data);
1899 }
1900
1901 /* Finds and records invariants used in STMT. */
1902
1903 static void
1904 find_invariants_stmt (struct ivopts_data *data, gimple stmt)
1905 {
1906 ssa_op_iter iter;
1907 use_operand_p use_p;
1908 tree op;
1909
1910 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1911 {
1912 op = USE_FROM_PTR (use_p);
1913 record_invariant (data, op, false);
1914 }
1915 }
1916
1917 /* Finds interesting uses of induction variables in the statement STMT. */
1918
1919 static void
1920 find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt)
1921 {
1922 struct iv *iv;
1923 tree op, *lhs, *rhs;
1924 ssa_op_iter iter;
1925 use_operand_p use_p;
1926 enum tree_code code;
1927
1928 find_invariants_stmt (data, stmt);
1929
1930 if (gimple_code (stmt) == GIMPLE_COND)
1931 {
1932 find_interesting_uses_cond (data, stmt);
1933 return;
1934 }
1935
1936 if (is_gimple_assign (stmt))
1937 {
1938 lhs = gimple_assign_lhs_ptr (stmt);
1939 rhs = gimple_assign_rhs1_ptr (stmt);
1940
1941 if (TREE_CODE (*lhs) == SSA_NAME)
1942 {
1943 /* If the statement defines an induction variable, the uses are not
1944 interesting by themselves. */
1945
1946 iv = get_iv (data, *lhs);
1947
1948 if (iv && !integer_zerop (iv->step))
1949 return;
1950 }
1951
1952 code = gimple_assign_rhs_code (stmt);
1953 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
1954 && (REFERENCE_CLASS_P (*rhs)
1955 || is_gimple_val (*rhs)))
1956 {
1957 if (REFERENCE_CLASS_P (*rhs))
1958 find_interesting_uses_address (data, stmt, rhs);
1959 else
1960 find_interesting_uses_op (data, *rhs);
1961
1962 if (REFERENCE_CLASS_P (*lhs))
1963 find_interesting_uses_address (data, stmt, lhs);
1964 return;
1965 }
1966 else if (TREE_CODE_CLASS (code) == tcc_comparison)
1967 {
1968 find_interesting_uses_cond (data, stmt);
1969 return;
1970 }
1971
1972 /* TODO -- we should also handle address uses of type
1973
1974 memory = call (whatever);
1975
1976 and
1977
1978 call (memory). */
1979 }
1980
1981 if (gimple_code (stmt) == GIMPLE_PHI
1982 && gimple_bb (stmt) == data->current_loop->header)
1983 {
1984 iv = get_iv (data, PHI_RESULT (stmt));
1985
1986 if (iv && !integer_zerop (iv->step))
1987 return;
1988 }
1989
1990 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1991 {
1992 op = USE_FROM_PTR (use_p);
1993
1994 if (TREE_CODE (op) != SSA_NAME)
1995 continue;
1996
1997 iv = get_iv (data, op);
1998 if (!iv)
1999 continue;
2000
2001 find_interesting_uses_op (data, op);
2002 }
2003 }
2004
2005 /* Finds interesting uses of induction variables outside of loops
2006 on loop exit edge EXIT. */
2007
2008 static void
2009 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
2010 {
2011 gphi *phi;
2012 gphi_iterator psi;
2013 tree def;
2014
2015 for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
2016 {
2017 phi = psi.phi ();
2018 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2019 if (!virtual_operand_p (def))
2020 find_interesting_uses_op (data, def);
2021 }
2022 }
2023
2024 /* Finds uses of the induction variables that are interesting. */
2025
2026 static void
2027 find_interesting_uses (struct ivopts_data *data)
2028 {
2029 basic_block bb;
2030 gimple_stmt_iterator bsi;
2031 basic_block *body = get_loop_body (data->current_loop);
2032 unsigned i;
2033 struct version_info *info;
2034 edge e;
2035
2036 if (dump_file && (dump_flags & TDF_DETAILS))
2037 fprintf (dump_file, "Uses:\n\n");
2038
2039 for (i = 0; i < data->current_loop->num_nodes; i++)
2040 {
2041 edge_iterator ei;
2042 bb = body[i];
2043
2044 FOR_EACH_EDGE (e, ei, bb->succs)
2045 if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2046 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2047 find_interesting_uses_outside (data, e);
2048
2049 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2050 find_interesting_uses_stmt (data, gsi_stmt (bsi));
2051 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2052 if (!is_gimple_debug (gsi_stmt (bsi)))
2053 find_interesting_uses_stmt (data, gsi_stmt (bsi));
2054 }
2055
2056 if (dump_file && (dump_flags & TDF_DETAILS))
2057 {
2058 bitmap_iterator bi;
2059
2060 fprintf (dump_file, "\n");
2061
2062 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2063 {
2064 info = ver_info (data, i);
2065 if (info->inv_id)
2066 {
2067 fprintf (dump_file, " ");
2068 print_generic_expr (dump_file, info->name, TDF_SLIM);
2069 fprintf (dump_file, " is invariant (%d)%s\n",
2070 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
2071 }
2072 }
2073
2074 fprintf (dump_file, "\n");
2075 }
2076
2077 free (body);
2078 }
2079
2080 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
2081 is true, assume we are inside an address. If TOP_COMPREF is true, assume
2082 we are at the top-level of the processed address. */
2083
2084 static tree
2085 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2086 HOST_WIDE_INT *offset)
2087 {
2088 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2089 enum tree_code code;
2090 tree type, orig_type = TREE_TYPE (expr);
2091 HOST_WIDE_INT off0, off1, st;
2092 tree orig_expr = expr;
2093
2094 STRIP_NOPS (expr);
2095
2096 type = TREE_TYPE (expr);
2097 code = TREE_CODE (expr);
2098 *offset = 0;
2099
2100 switch (code)
2101 {
2102 case INTEGER_CST:
2103 if (!cst_and_fits_in_hwi (expr)
2104 || integer_zerop (expr))
2105 return orig_expr;
2106
2107 *offset = int_cst_value (expr);
2108 return build_int_cst (orig_type, 0);
2109
2110 case POINTER_PLUS_EXPR:
2111 case PLUS_EXPR:
2112 case MINUS_EXPR:
2113 op0 = TREE_OPERAND (expr, 0);
2114 op1 = TREE_OPERAND (expr, 1);
2115
2116 op0 = strip_offset_1 (op0, false, false, &off0);
2117 op1 = strip_offset_1 (op1, false, false, &off1);
2118
2119 *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2120 if (op0 == TREE_OPERAND (expr, 0)
2121 && op1 == TREE_OPERAND (expr, 1))
2122 return orig_expr;
2123
2124 if (integer_zerop (op1))
2125 expr = op0;
2126 else if (integer_zerop (op0))
2127 {
2128 if (code == MINUS_EXPR)
2129 expr = fold_build1 (NEGATE_EXPR, type, op1);
2130 else
2131 expr = op1;
2132 }
2133 else
2134 expr = fold_build2 (code, type, op0, op1);
2135
2136 return fold_convert (orig_type, expr);
2137
2138 case MULT_EXPR:
2139 op1 = TREE_OPERAND (expr, 1);
2140 if (!cst_and_fits_in_hwi (op1))
2141 return orig_expr;
2142
2143 op0 = TREE_OPERAND (expr, 0);
2144 op0 = strip_offset_1 (op0, false, false, &off0);
2145 if (op0 == TREE_OPERAND (expr, 0))
2146 return orig_expr;
2147
2148 *offset = off0 * int_cst_value (op1);
2149 if (integer_zerop (op0))
2150 expr = op0;
2151 else
2152 expr = fold_build2 (MULT_EXPR, type, op0, op1);
2153
2154 return fold_convert (orig_type, expr);
2155
2156 case ARRAY_REF:
2157 case ARRAY_RANGE_REF:
2158 if (!inside_addr)
2159 return orig_expr;
2160
2161 step = array_ref_element_size (expr);
2162 if (!cst_and_fits_in_hwi (step))
2163 break;
2164
2165 st = int_cst_value (step);
2166 op1 = TREE_OPERAND (expr, 1);
2167 op1 = strip_offset_1 (op1, false, false, &off1);
2168 *offset = off1 * st;
2169
2170 if (top_compref
2171 && integer_zerop (op1))
2172 {
2173 /* Strip the component reference completely. */
2174 op0 = TREE_OPERAND (expr, 0);
2175 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2176 *offset += off0;
2177 return op0;
2178 }
2179 break;
2180
2181 case COMPONENT_REF:
2182 {
2183 tree field;
2184
2185 if (!inside_addr)
2186 return orig_expr;
2187
2188 tmp = component_ref_field_offset (expr);
2189 field = TREE_OPERAND (expr, 1);
2190 if (top_compref
2191 && cst_and_fits_in_hwi (tmp)
2192 && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2193 {
2194 HOST_WIDE_INT boffset, abs_off;
2195
2196 /* Strip the component reference completely. */
2197 op0 = TREE_OPERAND (expr, 0);
2198 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2199 boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2200 abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2201 if (boffset < 0)
2202 abs_off = -abs_off;
2203
2204 *offset = off0 + int_cst_value (tmp) + abs_off;
2205 return op0;
2206 }
2207 }
2208 break;
2209
2210 case ADDR_EXPR:
2211 op0 = TREE_OPERAND (expr, 0);
2212 op0 = strip_offset_1 (op0, true, true, &off0);
2213 *offset += off0;
2214
2215 if (op0 == TREE_OPERAND (expr, 0))
2216 return orig_expr;
2217
2218 expr = build_fold_addr_expr (op0);
2219 return fold_convert (orig_type, expr);
2220
2221 case MEM_REF:
2222 /* ??? Offset operand? */
2223 inside_addr = false;
2224 break;
2225
2226 default:
2227 return orig_expr;
2228 }
2229
2230 /* Default handling of expressions for that we want to recurse into
2231 the first operand. */
2232 op0 = TREE_OPERAND (expr, 0);
2233 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2234 *offset += off0;
2235
2236 if (op0 == TREE_OPERAND (expr, 0)
2237 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2238 return orig_expr;
2239
2240 expr = copy_node (expr);
2241 TREE_OPERAND (expr, 0) = op0;
2242 if (op1)
2243 TREE_OPERAND (expr, 1) = op1;
2244
2245 /* Inside address, we might strip the top level component references,
2246 thus changing type of the expression. Handling of ADDR_EXPR
2247 will fix that. */
2248 expr = fold_convert (orig_type, expr);
2249
2250 return expr;
2251 }
2252
2253 /* Strips constant offsets from EXPR and stores them to OFFSET. */
2254
2255 static tree
2256 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2257 {
2258 HOST_WIDE_INT off;
2259 tree core = strip_offset_1 (expr, false, false, &off);
2260 *offset = off;
2261 return core;
2262 }
2263
2264 /* Returns variant of TYPE that can be used as base for different uses.
2265 We return unsigned type with the same precision, which avoids problems
2266 with overflows. */
2267
2268 static tree
2269 generic_type_for (tree type)
2270 {
2271 if (POINTER_TYPE_P (type))
2272 return unsigned_type_for (type);
2273
2274 if (TYPE_UNSIGNED (type))
2275 return type;
2276
2277 return unsigned_type_for (type);
2278 }
2279
2280 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2281 the bitmap to that we should store it. */
2282
2283 static struct ivopts_data *fd_ivopts_data;
2284 static tree
2285 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2286 {
2287 bitmap *depends_on = (bitmap *) data;
2288 struct version_info *info;
2289
2290 if (TREE_CODE (*expr_p) != SSA_NAME)
2291 return NULL_TREE;
2292 info = name_info (fd_ivopts_data, *expr_p);
2293
2294 if (!info->inv_id || info->has_nonlin_use)
2295 return NULL_TREE;
2296
2297 if (!*depends_on)
2298 *depends_on = BITMAP_ALLOC (NULL);
2299 bitmap_set_bit (*depends_on, info->inv_id);
2300
2301 return NULL_TREE;
2302 }
2303
2304 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2305 position to POS. If USE is not NULL, the candidate is set as related to
2306 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
2307 replacement of the final value of the iv by a direct computation. */
2308
2309 static struct iv_cand *
2310 add_candidate_1 (struct ivopts_data *data,
2311 tree base, tree step, bool important, enum iv_position pos,
2312 struct iv_use *use, gimple incremented_at)
2313 {
2314 unsigned i;
2315 struct iv_cand *cand = NULL;
2316 tree type, orig_type;
2317
2318 /* For non-original variables, make sure their values are computed in a type
2319 that does not invoke undefined behavior on overflows (since in general,
2320 we cannot prove that these induction variables are non-wrapping). */
2321 if (pos != IP_ORIGINAL)
2322 {
2323 orig_type = TREE_TYPE (base);
2324 type = generic_type_for (orig_type);
2325 if (type != orig_type)
2326 {
2327 base = fold_convert (type, base);
2328 step = fold_convert (type, step);
2329 }
2330 }
2331
2332 for (i = 0; i < n_iv_cands (data); i++)
2333 {
2334 cand = iv_cand (data, i);
2335
2336 if (cand->pos != pos)
2337 continue;
2338
2339 if (cand->incremented_at != incremented_at
2340 || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2341 && cand->ainc_use != use))
2342 continue;
2343
2344 if (!cand->iv)
2345 {
2346 if (!base && !step)
2347 break;
2348
2349 continue;
2350 }
2351
2352 if (!base && !step)
2353 continue;
2354
2355 if (operand_equal_p (base, cand->iv->base, 0)
2356 && operand_equal_p (step, cand->iv->step, 0)
2357 && (TYPE_PRECISION (TREE_TYPE (base))
2358 == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
2359 break;
2360 }
2361
2362 if (i == n_iv_cands (data))
2363 {
2364 cand = XCNEW (struct iv_cand);
2365 cand->id = i;
2366
2367 if (!base && !step)
2368 cand->iv = NULL;
2369 else
2370 cand->iv = alloc_iv (base, step);
2371
2372 cand->pos = pos;
2373 if (pos != IP_ORIGINAL && cand->iv)
2374 {
2375 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2376 cand->var_after = cand->var_before;
2377 }
2378 cand->important = important;
2379 cand->incremented_at = incremented_at;
2380 data->iv_candidates.safe_push (cand);
2381
2382 if (step
2383 && TREE_CODE (step) != INTEGER_CST)
2384 {
2385 fd_ivopts_data = data;
2386 walk_tree (&step, find_depends, &cand->depends_on, NULL);
2387 }
2388
2389 if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2390 cand->ainc_use = use;
2391 else
2392 cand->ainc_use = NULL;
2393
2394 if (dump_file && (dump_flags & TDF_DETAILS))
2395 dump_cand (dump_file, cand);
2396 }
2397
2398 if (important && !cand->important)
2399 {
2400 cand->important = true;
2401 if (dump_file && (dump_flags & TDF_DETAILS))
2402 fprintf (dump_file, "Candidate %d is important\n", cand->id);
2403 }
2404
2405 if (use)
2406 {
2407 bitmap_set_bit (use->related_cands, i);
2408 if (dump_file && (dump_flags & TDF_DETAILS))
2409 fprintf (dump_file, "Candidate %d is related to use %d\n",
2410 cand->id, use->id);
2411 }
2412
2413 return cand;
2414 }
2415
2416 /* Returns true if incrementing the induction variable at the end of the LOOP
2417 is allowed.
2418
2419 The purpose is to avoid splitting latch edge with a biv increment, thus
2420 creating a jump, possibly confusing other optimization passes and leaving
2421 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2422 is not available (so we do not have a better alternative), or if the latch
2423 edge is already nonempty. */
2424
2425 static bool
2426 allow_ip_end_pos_p (struct loop *loop)
2427 {
2428 if (!ip_normal_pos (loop))
2429 return true;
2430
2431 if (!empty_block_p (ip_end_pos (loop)))
2432 return true;
2433
2434 return false;
2435 }
2436
2437 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2438 Important field is set to IMPORTANT. */
2439
2440 static void
2441 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2442 bool important, struct iv_use *use)
2443 {
2444 basic_block use_bb = gimple_bb (use->stmt);
2445 machine_mode mem_mode;
2446 unsigned HOST_WIDE_INT cstepi;
2447
2448 /* If we insert the increment in any position other than the standard
2449 ones, we must ensure that it is incremented once per iteration.
2450 It must not be in an inner nested loop, or one side of an if
2451 statement. */
2452 if (use_bb->loop_father != data->current_loop
2453 || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2454 || stmt_could_throw_p (use->stmt)
2455 || !cst_and_fits_in_hwi (step))
2456 return;
2457
2458 cstepi = int_cst_value (step);
2459
2460 mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2461 if (((USE_LOAD_PRE_INCREMENT (mem_mode)
2462 || USE_STORE_PRE_INCREMENT (mem_mode))
2463 && GET_MODE_SIZE (mem_mode) == cstepi)
2464 || ((USE_LOAD_PRE_DECREMENT (mem_mode)
2465 || USE_STORE_PRE_DECREMENT (mem_mode))
2466 && GET_MODE_SIZE (mem_mode) == -cstepi))
2467 {
2468 enum tree_code code = MINUS_EXPR;
2469 tree new_base;
2470 tree new_step = step;
2471
2472 if (POINTER_TYPE_P (TREE_TYPE (base)))
2473 {
2474 new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2475 code = POINTER_PLUS_EXPR;
2476 }
2477 else
2478 new_step = fold_convert (TREE_TYPE (base), new_step);
2479 new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2480 add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2481 use->stmt);
2482 }
2483 if (((USE_LOAD_POST_INCREMENT (mem_mode)
2484 || USE_STORE_POST_INCREMENT (mem_mode))
2485 && GET_MODE_SIZE (mem_mode) == cstepi)
2486 || ((USE_LOAD_POST_DECREMENT (mem_mode)
2487 || USE_STORE_POST_DECREMENT (mem_mode))
2488 && GET_MODE_SIZE (mem_mode) == -cstepi))
2489 {
2490 add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
2491 use->stmt);
2492 }
2493 }
2494
2495 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2496 position to POS. If USE is not NULL, the candidate is set as related to
2497 it. The candidate computation is scheduled on all available positions. */
2498
2499 static void
2500 add_candidate (struct ivopts_data *data,
2501 tree base, tree step, bool important, struct iv_use *use)
2502 {
2503 if (ip_normal_pos (data->current_loop))
2504 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL);
2505 if (ip_end_pos (data->current_loop)
2506 && allow_ip_end_pos_p (data->current_loop))
2507 add_candidate_1 (data, base, step, important, IP_END, use, NULL);
2508
2509 if (use != NULL && use->type == USE_ADDRESS)
2510 add_autoinc_candidates (data, base, step, important, use);
2511 }
2512
2513 /* Adds standard iv candidates. */
2514
2515 static void
2516 add_standard_iv_candidates (struct ivopts_data *data)
2517 {
2518 add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
2519
2520 /* The same for a double-integer type if it is still fast enough. */
2521 if (TYPE_PRECISION
2522 (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
2523 && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
2524 add_candidate (data, build_int_cst (long_integer_type_node, 0),
2525 build_int_cst (long_integer_type_node, 1), true, NULL);
2526
2527 /* The same for a double-integer type if it is still fast enough. */
2528 if (TYPE_PRECISION
2529 (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
2530 && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
2531 add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
2532 build_int_cst (long_long_integer_type_node, 1), true, NULL);
2533 }
2534
2535
2536 /* Adds candidates bases on the old induction variable IV. */
2537
2538 static void
2539 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2540 {
2541 gimple phi;
2542 tree def;
2543 struct iv_cand *cand;
2544
2545 add_candidate (data, iv->base, iv->step, true, NULL);
2546
2547 /* The same, but with initial value zero. */
2548 if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
2549 add_candidate (data, size_int (0), iv->step, true, NULL);
2550 else
2551 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2552 iv->step, true, NULL);
2553
2554 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2555 if (gimple_code (phi) == GIMPLE_PHI)
2556 {
2557 /* Additionally record the possibility of leaving the original iv
2558 untouched. */
2559 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2560 /* Don't add candidate if it's from another PHI node because
2561 it's an affine iv appearing in the form of PEELED_CHREC. */
2562 phi = SSA_NAME_DEF_STMT (def);
2563 if (gimple_code (phi) != GIMPLE_PHI)
2564 {
2565 cand = add_candidate_1 (data,
2566 iv->base, iv->step, true, IP_ORIGINAL, NULL,
2567 SSA_NAME_DEF_STMT (def));
2568 cand->var_before = iv->ssa_name;
2569 cand->var_after = def;
2570 }
2571 else
2572 gcc_assert (gimple_bb (phi) == data->current_loop->header);
2573 }
2574 }
2575
2576 /* Adds candidates based on the old induction variables. */
2577
2578 static void
2579 add_old_ivs_candidates (struct ivopts_data *data)
2580 {
2581 unsigned i;
2582 struct iv *iv;
2583 bitmap_iterator bi;
2584
2585 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2586 {
2587 iv = ver_info (data, i)->iv;
2588 if (iv && iv->biv_p && !integer_zerop (iv->step))
2589 add_old_iv_candidates (data, iv);
2590 }
2591 }
2592
2593 /* Adds candidates based on the value of the induction variable IV and USE. */
2594
2595 static void
2596 add_iv_value_candidates (struct ivopts_data *data,
2597 struct iv *iv, struct iv_use *use)
2598 {
2599 unsigned HOST_WIDE_INT offset;
2600 tree base;
2601 tree basetype;
2602
2603 add_candidate (data, iv->base, iv->step, false, use);
2604
2605 /* The same, but with initial value zero. Make such variable important,
2606 since it is generic enough so that possibly many uses may be based
2607 on it. */
2608 basetype = TREE_TYPE (iv->base);
2609 if (POINTER_TYPE_P (basetype))
2610 basetype = sizetype;
2611 add_candidate (data, build_int_cst (basetype, 0),
2612 iv->step, true, use);
2613
2614 /* Third, try removing the constant offset. Make sure to even
2615 add a candidate for &a[0] vs. (T *)&a. */
2616 base = strip_offset (iv->base, &offset);
2617 if (offset
2618 || base != iv->base)
2619 add_candidate (data, base, iv->step, false, use);
2620 }
2621
2622 /* Adds candidates based on the uses. */
2623
2624 static void
2625 add_derived_ivs_candidates (struct ivopts_data *data)
2626 {
2627 unsigned i;
2628
2629 for (i = 0; i < n_iv_uses (data); i++)
2630 {
2631 struct iv_use *use = iv_use (data, i);
2632
2633 if (!use)
2634 continue;
2635
2636 switch (use->type)
2637 {
2638 case USE_NONLINEAR_EXPR:
2639 case USE_COMPARE:
2640 case USE_ADDRESS:
2641 /* Just add the ivs based on the value of the iv used here. */
2642 add_iv_value_candidates (data, use->iv, use);
2643 break;
2644
2645 default:
2646 gcc_unreachable ();
2647 }
2648 }
2649 }
2650
2651 /* Record important candidates and add them to related_cands bitmaps
2652 if needed. */
2653
2654 static void
2655 record_important_candidates (struct ivopts_data *data)
2656 {
2657 unsigned i;
2658 struct iv_use *use;
2659
2660 for (i = 0; i < n_iv_cands (data); i++)
2661 {
2662 struct iv_cand *cand = iv_cand (data, i);
2663
2664 if (cand->important)
2665 bitmap_set_bit (data->important_candidates, i);
2666 }
2667
2668 data->consider_all_candidates = (n_iv_cands (data)
2669 <= CONSIDER_ALL_CANDIDATES_BOUND);
2670
2671 if (data->consider_all_candidates)
2672 {
2673 /* We will not need "related_cands" bitmaps in this case,
2674 so release them to decrease peak memory consumption. */
2675 for (i = 0; i < n_iv_uses (data); i++)
2676 {
2677 use = iv_use (data, i);
2678 BITMAP_FREE (use->related_cands);
2679 }
2680 }
2681 else
2682 {
2683 /* Add important candidates to the related_cands bitmaps. */
2684 for (i = 0; i < n_iv_uses (data); i++)
2685 bitmap_ior_into (iv_use (data, i)->related_cands,
2686 data->important_candidates);
2687 }
2688 }
2689
2690 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2691 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2692 we allocate a simple list to every use. */
2693
2694 static void
2695 alloc_use_cost_map (struct ivopts_data *data)
2696 {
2697 unsigned i, size, s;
2698
2699 for (i = 0; i < n_iv_uses (data); i++)
2700 {
2701 struct iv_use *use = iv_use (data, i);
2702
2703 if (data->consider_all_candidates)
2704 size = n_iv_cands (data);
2705 else
2706 {
2707 s = bitmap_count_bits (use->related_cands);
2708
2709 /* Round up to the power of two, so that moduling by it is fast. */
2710 size = s ? (1 << ceil_log2 (s)) : 1;
2711 }
2712
2713 use->n_map_members = size;
2714 use->cost_map = XCNEWVEC (struct cost_pair, size);
2715 }
2716 }
2717
2718 /* Returns description of computation cost of expression whose runtime
2719 cost is RUNTIME and complexity corresponds to COMPLEXITY. */
2720
2721 static comp_cost
2722 new_cost (unsigned runtime, unsigned complexity)
2723 {
2724 comp_cost cost;
2725
2726 cost.cost = runtime;
2727 cost.complexity = complexity;
2728
2729 return cost;
2730 }
2731
2732 /* Adds costs COST1 and COST2. */
2733
2734 static comp_cost
2735 add_costs (comp_cost cost1, comp_cost cost2)
2736 {
2737 cost1.cost += cost2.cost;
2738 cost1.complexity += cost2.complexity;
2739
2740 return cost1;
2741 }
2742 /* Subtracts costs COST1 and COST2. */
2743
2744 static comp_cost
2745 sub_costs (comp_cost cost1, comp_cost cost2)
2746 {
2747 cost1.cost -= cost2.cost;
2748 cost1.complexity -= cost2.complexity;
2749
2750 return cost1;
2751 }
2752
2753 /* Returns a negative number if COST1 < COST2, a positive number if
2754 COST1 > COST2, and 0 if COST1 = COST2. */
2755
2756 static int
2757 compare_costs (comp_cost cost1, comp_cost cost2)
2758 {
2759 if (cost1.cost == cost2.cost)
2760 return cost1.complexity - cost2.complexity;
2761
2762 return cost1.cost - cost2.cost;
2763 }
2764
2765 /* Returns true if COST is infinite. */
2766
2767 static bool
2768 infinite_cost_p (comp_cost cost)
2769 {
2770 return cost.cost == INFTY;
2771 }
2772
2773 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2774 on invariants DEPENDS_ON and that the value used in expressing it
2775 is VALUE, and in case of iv elimination the comparison operator is COMP. */
2776
2777 static void
2778 set_use_iv_cost (struct ivopts_data *data,
2779 struct iv_use *use, struct iv_cand *cand,
2780 comp_cost cost, bitmap depends_on, tree value,
2781 enum tree_code comp, int inv_expr_id)
2782 {
2783 unsigned i, s;
2784
2785 if (infinite_cost_p (cost))
2786 {
2787 BITMAP_FREE (depends_on);
2788 return;
2789 }
2790
2791 if (data->consider_all_candidates)
2792 {
2793 use->cost_map[cand->id].cand = cand;
2794 use->cost_map[cand->id].cost = cost;
2795 use->cost_map[cand->id].depends_on = depends_on;
2796 use->cost_map[cand->id].value = value;
2797 use->cost_map[cand->id].comp = comp;
2798 use->cost_map[cand->id].inv_expr_id = inv_expr_id;
2799 return;
2800 }
2801
2802 /* n_map_members is a power of two, so this computes modulo. */
2803 s = cand->id & (use->n_map_members - 1);
2804 for (i = s; i < use->n_map_members; i++)
2805 if (!use->cost_map[i].cand)
2806 goto found;
2807 for (i = 0; i < s; i++)
2808 if (!use->cost_map[i].cand)
2809 goto found;
2810
2811 gcc_unreachable ();
2812
2813 found:
2814 use->cost_map[i].cand = cand;
2815 use->cost_map[i].cost = cost;
2816 use->cost_map[i].depends_on = depends_on;
2817 use->cost_map[i].value = value;
2818 use->cost_map[i].comp = comp;
2819 use->cost_map[i].inv_expr_id = inv_expr_id;
2820 }
2821
2822 /* Gets cost of (USE, CANDIDATE) pair. */
2823
2824 static struct cost_pair *
2825 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2826 struct iv_cand *cand)
2827 {
2828 unsigned i, s;
2829 struct cost_pair *ret;
2830
2831 if (!cand)
2832 return NULL;
2833
2834 if (data->consider_all_candidates)
2835 {
2836 ret = use->cost_map + cand->id;
2837 if (!ret->cand)
2838 return NULL;
2839
2840 return ret;
2841 }
2842
2843 /* n_map_members is a power of two, so this computes modulo. */
2844 s = cand->id & (use->n_map_members - 1);
2845 for (i = s; i < use->n_map_members; i++)
2846 if (use->cost_map[i].cand == cand)
2847 return use->cost_map + i;
2848 else if (use->cost_map[i].cand == NULL)
2849 return NULL;
2850 for (i = 0; i < s; i++)
2851 if (use->cost_map[i].cand == cand)
2852 return use->cost_map + i;
2853 else if (use->cost_map[i].cand == NULL)
2854 return NULL;
2855
2856 return NULL;
2857 }
2858
2859 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2860 static rtx
2861 produce_memory_decl_rtl (tree obj, int *regno)
2862 {
2863 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
2864 machine_mode address_mode = targetm.addr_space.address_mode (as);
2865 rtx x;
2866
2867 gcc_assert (obj);
2868 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2869 {
2870 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2871 x = gen_rtx_SYMBOL_REF (address_mode, name);
2872 SET_SYMBOL_REF_DECL (x, obj);
2873 x = gen_rtx_MEM (DECL_MODE (obj), x);
2874 set_mem_addr_space (x, as);
2875 targetm.encode_section_info (obj, x, true);
2876 }
2877 else
2878 {
2879 x = gen_raw_REG (address_mode, (*regno)++);
2880 x = gen_rtx_MEM (DECL_MODE (obj), x);
2881 set_mem_addr_space (x, as);
2882 }
2883
2884 return x;
2885 }
2886
2887 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2888 walk_tree. DATA contains the actual fake register number. */
2889
2890 static tree
2891 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2892 {
2893 tree obj = NULL_TREE;
2894 rtx x = NULL_RTX;
2895 int *regno = (int *) data;
2896
2897 switch (TREE_CODE (*expr_p))
2898 {
2899 case ADDR_EXPR:
2900 for (expr_p = &TREE_OPERAND (*expr_p, 0);
2901 handled_component_p (*expr_p);
2902 expr_p = &TREE_OPERAND (*expr_p, 0))
2903 continue;
2904 obj = *expr_p;
2905 if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
2906 x = produce_memory_decl_rtl (obj, regno);
2907 break;
2908
2909 case SSA_NAME:
2910 *ws = 0;
2911 obj = SSA_NAME_VAR (*expr_p);
2912 /* Defer handling of anonymous SSA_NAMEs to the expander. */
2913 if (!obj)
2914 return NULL_TREE;
2915 if (!DECL_RTL_SET_P (obj))
2916 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2917 break;
2918
2919 case VAR_DECL:
2920 case PARM_DECL:
2921 case RESULT_DECL:
2922 *ws = 0;
2923 obj = *expr_p;
2924
2925 if (DECL_RTL_SET_P (obj))
2926 break;
2927
2928 if (DECL_MODE (obj) == BLKmode)
2929 x = produce_memory_decl_rtl (obj, regno);
2930 else
2931 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2932
2933 break;
2934
2935 default:
2936 break;
2937 }
2938
2939 if (x)
2940 {
2941 decl_rtl_to_reset.safe_push (obj);
2942 SET_DECL_RTL (obj, x);
2943 }
2944
2945 return NULL_TREE;
2946 }
2947
2948 /* Determines cost of the computation of EXPR. */
2949
2950 static unsigned
2951 computation_cost (tree expr, bool speed)
2952 {
2953 rtx_insn *seq;
2954 rtx rslt;
2955 tree type = TREE_TYPE (expr);
2956 unsigned cost;
2957 /* Avoid using hard regs in ways which may be unsupported. */
2958 int regno = LAST_VIRTUAL_REGISTER + 1;
2959 struct cgraph_node *node = cgraph_node::get (current_function_decl);
2960 enum node_frequency real_frequency = node->frequency;
2961
2962 node->frequency = NODE_FREQUENCY_NORMAL;
2963 crtl->maybe_hot_insn_p = speed;
2964 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2965 start_sequence ();
2966 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2967 seq = get_insns ();
2968 end_sequence ();
2969 default_rtl_profile ();
2970 node->frequency = real_frequency;
2971
2972 cost = seq_cost (seq, speed);
2973 if (MEM_P (rslt))
2974 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
2975 TYPE_ADDR_SPACE (type), speed);
2976 else if (!REG_P (rslt))
2977 cost += set_src_cost (rslt, speed);
2978
2979 return cost;
2980 }
2981
2982 /* Returns variable containing the value of candidate CAND at statement AT. */
2983
2984 static tree
2985 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple stmt)
2986 {
2987 if (stmt_after_increment (loop, cand, stmt))
2988 return cand->var_after;
2989 else
2990 return cand->var_before;
2991 }
2992
2993 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2994 same precision that is at least as wide as the precision of TYPE, stores
2995 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
2996 type of A and B. */
2997
2998 static tree
2999 determine_common_wider_type (tree *a, tree *b)
3000 {
3001 tree wider_type = NULL;
3002 tree suba, subb;
3003 tree atype = TREE_TYPE (*a);
3004
3005 if (CONVERT_EXPR_P (*a))
3006 {
3007 suba = TREE_OPERAND (*a, 0);
3008 wider_type = TREE_TYPE (suba);
3009 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
3010 return atype;
3011 }
3012 else
3013 return atype;
3014
3015 if (CONVERT_EXPR_P (*b))
3016 {
3017 subb = TREE_OPERAND (*b, 0);
3018 if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
3019 return atype;
3020 }
3021 else
3022 return atype;
3023
3024 *a = suba;
3025 *b = subb;
3026 return wider_type;
3027 }
3028
3029 /* Determines the expression by that USE is expressed from induction variable
3030 CAND at statement AT in LOOP. The expression is stored in a decomposed
3031 form into AFF. Returns false if USE cannot be expressed using CAND. */
3032
3033 static bool
3034 get_computation_aff (struct loop *loop,
3035 struct iv_use *use, struct iv_cand *cand, gimple at,
3036 struct aff_tree *aff)
3037 {
3038 tree ubase = use->iv->base;
3039 tree ustep = use->iv->step;
3040 tree cbase = cand->iv->base;
3041 tree cstep = cand->iv->step, cstep_common;
3042 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3043 tree common_type, var;
3044 tree uutype;
3045 aff_tree cbase_aff, var_aff;
3046 widest_int rat;
3047
3048 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3049 {
3050 /* We do not have a precision to express the values of use. */
3051 return false;
3052 }
3053
3054 var = var_at_stmt (loop, cand, at);
3055 uutype = unsigned_type_for (utype);
3056
3057 /* If the conversion is not noop, perform it. */
3058 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3059 {
3060 cstep = fold_convert (uutype, cstep);
3061 cbase = fold_convert (uutype, cbase);
3062 var = fold_convert (uutype, var);
3063 }
3064
3065 if (!constant_multiple_of (ustep, cstep, &rat))
3066 return false;
3067
3068 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3069 type, we achieve better folding by computing their difference in this
3070 wider type, and cast the result to UUTYPE. We do not need to worry about
3071 overflows, as all the arithmetics will in the end be performed in UUTYPE
3072 anyway. */
3073 common_type = determine_common_wider_type (&ubase, &cbase);
3074
3075 /* use = ubase - ratio * cbase + ratio * var. */
3076 tree_to_aff_combination (ubase, common_type, aff);
3077 tree_to_aff_combination (cbase, common_type, &cbase_aff);
3078 tree_to_aff_combination (var, uutype, &var_aff);
3079
3080 /* We need to shift the value if we are after the increment. */
3081 if (stmt_after_increment (loop, cand, at))
3082 {
3083 aff_tree cstep_aff;
3084
3085 if (common_type != uutype)
3086 cstep_common = fold_convert (common_type, cstep);
3087 else
3088 cstep_common = cstep;
3089
3090 tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
3091 aff_combination_add (&cbase_aff, &cstep_aff);
3092 }
3093
3094 aff_combination_scale (&cbase_aff, -rat);
3095 aff_combination_add (aff, &cbase_aff);
3096 if (common_type != uutype)
3097 aff_combination_convert (aff, uutype);
3098
3099 aff_combination_scale (&var_aff, rat);
3100 aff_combination_add (aff, &var_aff);
3101
3102 return true;
3103 }
3104
3105 /* Return the type of USE. */
3106
3107 static tree
3108 get_use_type (struct iv_use *use)
3109 {
3110 tree base_type = TREE_TYPE (use->iv->base);
3111 tree type;
3112
3113 if (use->type == USE_ADDRESS)
3114 {
3115 /* The base_type may be a void pointer. Create a pointer type based on
3116 the mem_ref instead. */
3117 type = build_pointer_type (TREE_TYPE (*use->op_p));
3118 gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
3119 == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
3120 }
3121 else
3122 type = base_type;
3123
3124 return type;
3125 }
3126
3127 /* Determines the expression by that USE is expressed from induction variable
3128 CAND at statement AT in LOOP. The computation is unshared. */
3129
3130 static tree
3131 get_computation_at (struct loop *loop,
3132 struct iv_use *use, struct iv_cand *cand, gimple at)
3133 {
3134 aff_tree aff;
3135 tree type = get_use_type (use);
3136
3137 if (!get_computation_aff (loop, use, cand, at, &aff))
3138 return NULL_TREE;
3139 unshare_aff_combination (&aff);
3140 return fold_convert (type, aff_combination_to_tree (&aff));
3141 }
3142
3143 /* Determines the expression by that USE is expressed from induction variable
3144 CAND in LOOP. The computation is unshared. */
3145
3146 static tree
3147 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3148 {
3149 return get_computation_at (loop, use, cand, use->stmt);
3150 }
3151
3152 /* Adjust the cost COST for being in loop setup rather than loop body.
3153 If we're optimizing for space, the loop setup overhead is constant;
3154 if we're optimizing for speed, amortize it over the per-iteration cost. */
3155 static unsigned
3156 adjust_setup_cost (struct ivopts_data *data, unsigned cost)
3157 {
3158 if (cost == INFTY)
3159 return cost;
3160 else if (optimize_loop_for_speed_p (data->current_loop))
3161 return cost / avg_loop_niter (data->current_loop);
3162 else
3163 return cost;
3164 }
3165
3166 /* Returns true if multiplying by RATIO is allowed in an address. Test the
3167 validity for a memory reference accessing memory of mode MODE in
3168 address space AS. */
3169
3170
3171 bool
3172 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, machine_mode mode,
3173 addr_space_t as)
3174 {
3175 #define MAX_RATIO 128
3176 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3177 static vec<sbitmap> valid_mult_list;
3178 sbitmap valid_mult;
3179
3180 if (data_index >= valid_mult_list.length ())
3181 valid_mult_list.safe_grow_cleared (data_index + 1);
3182
3183 valid_mult = valid_mult_list[data_index];
3184 if (!valid_mult)
3185 {
3186 machine_mode address_mode = targetm.addr_space.address_mode (as);
3187 rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3188 rtx reg2 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3189 rtx addr, scaled;
3190 HOST_WIDE_INT i;
3191
3192 valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3193 bitmap_clear (valid_mult);
3194 scaled = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3195 addr = gen_rtx_fmt_ee (PLUS, address_mode, scaled, reg2);
3196 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3197 {
3198 XEXP (scaled, 1) = gen_int_mode (i, address_mode);
3199 if (memory_address_addr_space_p (mode, addr, as)
3200 || memory_address_addr_space_p (mode, scaled, as))
3201 bitmap_set_bit (valid_mult, i + MAX_RATIO);
3202 }
3203
3204 if (dump_file && (dump_flags & TDF_DETAILS))
3205 {
3206 fprintf (dump_file, " allowed multipliers:");
3207 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3208 if (bitmap_bit_p (valid_mult, i + MAX_RATIO))
3209 fprintf (dump_file, " %d", (int) i);
3210 fprintf (dump_file, "\n");
3211 fprintf (dump_file, "\n");
3212 }
3213
3214 valid_mult_list[data_index] = valid_mult;
3215 }
3216
3217 if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3218 return false;
3219
3220 return bitmap_bit_p (valid_mult, ratio + MAX_RATIO);
3221 }
3222
3223 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3224 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3225 variable is omitted. Compute the cost for a memory reference that accesses
3226 a memory location of mode MEM_MODE in address space AS.
3227
3228 MAY_AUTOINC is set to true if the autoincrement (increasing index by
3229 size of MEM_MODE / RATIO) is available. To make this determination, we
3230 look at the size of the increment to be made, which is given in CSTEP.
3231 CSTEP may be zero if the step is unknown.
3232 STMT_AFTER_INC is true iff the statement we're looking at is after the
3233 increment of the original biv.
3234
3235 TODO -- there must be some better way. This all is quite crude. */
3236
3237 enum ainc_type
3238 {
3239 AINC_PRE_INC, /* Pre increment. */
3240 AINC_PRE_DEC, /* Pre decrement. */
3241 AINC_POST_INC, /* Post increment. */
3242 AINC_POST_DEC, /* Post decrement. */
3243 AINC_NONE /* Also the number of auto increment types. */
3244 };
3245
3246 typedef struct address_cost_data_s
3247 {
3248 HOST_WIDE_INT min_offset, max_offset;
3249 unsigned costs[2][2][2][2];
3250 unsigned ainc_costs[AINC_NONE];
3251 } *address_cost_data;
3252
3253
3254 static comp_cost
3255 get_address_cost (bool symbol_present, bool var_present,
3256 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3257 HOST_WIDE_INT cstep, machine_mode mem_mode,
3258 addr_space_t as, bool speed,
3259 bool stmt_after_inc, bool *may_autoinc)
3260 {
3261 machine_mode address_mode = targetm.addr_space.address_mode (as);
3262 static vec<address_cost_data> address_cost_data_list;
3263 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3264 address_cost_data data;
3265 static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3266 static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3267 unsigned cost, acost, complexity;
3268 enum ainc_type autoinc_type;
3269 bool offset_p, ratio_p, autoinc;
3270 HOST_WIDE_INT s_offset, autoinc_offset, msize;
3271 unsigned HOST_WIDE_INT mask;
3272 unsigned bits;
3273
3274 if (data_index >= address_cost_data_list.length ())
3275 address_cost_data_list.safe_grow_cleared (data_index + 1);
3276
3277 data = address_cost_data_list[data_index];
3278 if (!data)
3279 {
3280 HOST_WIDE_INT i;
3281 HOST_WIDE_INT rat, off = 0;
3282 int old_cse_not_expected, width;
3283 unsigned sym_p, var_p, off_p, rat_p, add_c;
3284 rtx_insn *seq;
3285 rtx addr, base;
3286 rtx reg0, reg1;
3287
3288 data = (address_cost_data) xcalloc (1, sizeof (*data));
3289
3290 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3291
3292 width = GET_MODE_BITSIZE (address_mode) - 1;
3293 if (width > (HOST_BITS_PER_WIDE_INT - 1))
3294 width = HOST_BITS_PER_WIDE_INT - 1;
3295 addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
3296
3297 for (i = width; i >= 0; i--)
3298 {
3299 off = -((unsigned HOST_WIDE_INT) 1 << i);
3300 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3301 if (memory_address_addr_space_p (mem_mode, addr, as))
3302 break;
3303 }
3304 data->min_offset = (i == -1? 0 : off);
3305
3306 for (i = width; i >= 0; i--)
3307 {
3308 off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
3309 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3310 if (memory_address_addr_space_p (mem_mode, addr, as))
3311 break;
3312 /* For some TARGET, like ARM THUMB1, the offset should be nature
3313 aligned. Try an aligned offset if address_mode is not QImode. */
3314 off = (address_mode == QImode)
3315 ? 0
3316 : ((unsigned HOST_WIDE_INT) 1 << i)
3317 - GET_MODE_SIZE (address_mode);
3318 if (off > 0)
3319 {
3320 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3321 if (memory_address_addr_space_p (mem_mode, addr, as))
3322 break;
3323 }
3324 }
3325 if (i == -1)
3326 off = 0;
3327 data->max_offset = off;
3328
3329 if (dump_file && (dump_flags & TDF_DETAILS))
3330 {
3331 fprintf (dump_file, "get_address_cost:\n");
3332 fprintf (dump_file, " min offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3333 GET_MODE_NAME (mem_mode),
3334 data->min_offset);
3335 fprintf (dump_file, " max offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3336 GET_MODE_NAME (mem_mode),
3337 data->max_offset);
3338 }
3339
3340 rat = 1;
3341 for (i = 2; i <= MAX_RATIO; i++)
3342 if (multiplier_allowed_in_address_p (i, mem_mode, as))
3343 {
3344 rat = i;
3345 break;
3346 }
3347
3348 /* Compute the cost of various addressing modes. */
3349 acost = 0;
3350 reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3351 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3352
3353 if (USE_LOAD_PRE_DECREMENT (mem_mode)
3354 || USE_STORE_PRE_DECREMENT (mem_mode))
3355 {
3356 addr = gen_rtx_PRE_DEC (address_mode, reg0);
3357 has_predec[mem_mode]
3358 = memory_address_addr_space_p (mem_mode, addr, as);
3359
3360 if (has_predec[mem_mode])
3361 data->ainc_costs[AINC_PRE_DEC]
3362 = address_cost (addr, mem_mode, as, speed);
3363 }
3364 if (USE_LOAD_POST_DECREMENT (mem_mode)
3365 || USE_STORE_POST_DECREMENT (mem_mode))
3366 {
3367 addr = gen_rtx_POST_DEC (address_mode, reg0);
3368 has_postdec[mem_mode]
3369 = memory_address_addr_space_p (mem_mode, addr, as);
3370
3371 if (has_postdec[mem_mode])
3372 data->ainc_costs[AINC_POST_DEC]
3373 = address_cost (addr, mem_mode, as, speed);
3374 }
3375 if (USE_LOAD_PRE_INCREMENT (mem_mode)
3376 || USE_STORE_PRE_DECREMENT (mem_mode))
3377 {
3378 addr = gen_rtx_PRE_INC (address_mode, reg0);
3379 has_preinc[mem_mode]
3380 = memory_address_addr_space_p (mem_mode, addr, as);
3381
3382 if (has_preinc[mem_mode])
3383 data->ainc_costs[AINC_PRE_INC]
3384 = address_cost (addr, mem_mode, as, speed);
3385 }
3386 if (USE_LOAD_POST_INCREMENT (mem_mode)
3387 || USE_STORE_POST_INCREMENT (mem_mode))
3388 {
3389 addr = gen_rtx_POST_INC (address_mode, reg0);
3390 has_postinc[mem_mode]
3391 = memory_address_addr_space_p (mem_mode, addr, as);
3392
3393 if (has_postinc[mem_mode])
3394 data->ainc_costs[AINC_POST_INC]
3395 = address_cost (addr, mem_mode, as, speed);
3396 }
3397 for (i = 0; i < 16; i++)
3398 {
3399 sym_p = i & 1;
3400 var_p = (i >> 1) & 1;
3401 off_p = (i >> 2) & 1;
3402 rat_p = (i >> 3) & 1;
3403
3404 addr = reg0;
3405 if (rat_p)
3406 addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
3407 gen_int_mode (rat, address_mode));
3408
3409 if (var_p)
3410 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
3411
3412 if (sym_p)
3413 {
3414 base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
3415 /* ??? We can run into trouble with some backends by presenting
3416 it with symbols which haven't been properly passed through
3417 targetm.encode_section_info. By setting the local bit, we
3418 enhance the probability of things working. */
3419 SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3420
3421 if (off_p)
3422 base = gen_rtx_fmt_e (CONST, address_mode,
3423 gen_rtx_fmt_ee
3424 (PLUS, address_mode, base,
3425 gen_int_mode (off, address_mode)));
3426 }
3427 else if (off_p)
3428 base = gen_int_mode (off, address_mode);
3429 else
3430 base = NULL_RTX;
3431
3432 if (base)
3433 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
3434
3435 start_sequence ();
3436 /* To avoid splitting addressing modes, pretend that no cse will
3437 follow. */
3438 old_cse_not_expected = cse_not_expected;
3439 cse_not_expected = true;
3440 addr = memory_address_addr_space (mem_mode, addr, as);
3441 cse_not_expected = old_cse_not_expected;
3442 seq = get_insns ();
3443 end_sequence ();
3444
3445 acost = seq_cost (seq, speed);
3446 acost += address_cost (addr, mem_mode, as, speed);
3447
3448 if (!acost)
3449 acost = 1;
3450 data->costs[sym_p][var_p][off_p][rat_p] = acost;
3451 }
3452
3453 /* On some targets, it is quite expensive to load symbol to a register,
3454 which makes addresses that contain symbols look much more expensive.
3455 However, the symbol will have to be loaded in any case before the
3456 loop (and quite likely we have it in register already), so it does not
3457 make much sense to penalize them too heavily. So make some final
3458 tweaks for the SYMBOL_PRESENT modes:
3459
3460 If VAR_PRESENT is false, and the mode obtained by changing symbol to
3461 var is cheaper, use this mode with small penalty.
3462 If VAR_PRESENT is true, try whether the mode with
3463 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3464 if this is the case, use it. */
3465 add_c = add_cost (speed, address_mode);
3466 for (i = 0; i < 8; i++)
3467 {
3468 var_p = i & 1;
3469 off_p = (i >> 1) & 1;
3470 rat_p = (i >> 2) & 1;
3471
3472 acost = data->costs[0][1][off_p][rat_p] + 1;
3473 if (var_p)
3474 acost += add_c;
3475
3476 if (acost < data->costs[1][var_p][off_p][rat_p])
3477 data->costs[1][var_p][off_p][rat_p] = acost;
3478 }
3479
3480 if (dump_file && (dump_flags & TDF_DETAILS))
3481 {
3482 fprintf (dump_file, "Address costs:\n");
3483
3484 for (i = 0; i < 16; i++)
3485 {
3486 sym_p = i & 1;
3487 var_p = (i >> 1) & 1;
3488 off_p = (i >> 2) & 1;
3489 rat_p = (i >> 3) & 1;
3490
3491 fprintf (dump_file, " ");
3492 if (sym_p)
3493 fprintf (dump_file, "sym + ");
3494 if (var_p)
3495 fprintf (dump_file, "var + ");
3496 if (off_p)
3497 fprintf (dump_file, "cst + ");
3498 if (rat_p)
3499 fprintf (dump_file, "rat * ");
3500
3501 acost = data->costs[sym_p][var_p][off_p][rat_p];
3502 fprintf (dump_file, "index costs %d\n", acost);
3503 }
3504 if (has_predec[mem_mode] || has_postdec[mem_mode]
3505 || has_preinc[mem_mode] || has_postinc[mem_mode])
3506 fprintf (dump_file, " May include autoinc/dec\n");
3507 fprintf (dump_file, "\n");
3508 }
3509
3510 address_cost_data_list[data_index] = data;
3511 }
3512
3513 bits = GET_MODE_BITSIZE (address_mode);
3514 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3515 offset &= mask;
3516 if ((offset >> (bits - 1) & 1))
3517 offset |= ~mask;
3518 s_offset = offset;
3519
3520 autoinc = false;
3521 autoinc_type = AINC_NONE;
3522 msize = GET_MODE_SIZE (mem_mode);
3523 autoinc_offset = offset;
3524 if (stmt_after_inc)
3525 autoinc_offset += ratio * cstep;
3526 if (symbol_present || var_present || ratio != 1)
3527 autoinc = false;
3528 else
3529 {
3530 if (has_postinc[mem_mode] && autoinc_offset == 0
3531 && msize == cstep)
3532 autoinc_type = AINC_POST_INC;
3533 else if (has_postdec[mem_mode] && autoinc_offset == 0
3534 && msize == -cstep)
3535 autoinc_type = AINC_POST_DEC;
3536 else if (has_preinc[mem_mode] && autoinc_offset == msize
3537 && msize == cstep)
3538 autoinc_type = AINC_PRE_INC;
3539 else if (has_predec[mem_mode] && autoinc_offset == -msize
3540 && msize == -cstep)
3541 autoinc_type = AINC_PRE_DEC;
3542
3543 if (autoinc_type != AINC_NONE)
3544 autoinc = true;
3545 }
3546
3547 cost = 0;
3548 offset_p = (s_offset != 0
3549 && data->min_offset <= s_offset
3550 && s_offset <= data->max_offset);
3551 ratio_p = (ratio != 1
3552 && multiplier_allowed_in_address_p (ratio, mem_mode, as));
3553
3554 if (ratio != 1 && !ratio_p)
3555 cost += mult_by_coeff_cost (ratio, address_mode, speed);
3556
3557 if (s_offset && !offset_p && !symbol_present)
3558 cost += add_cost (speed, address_mode);
3559
3560 if (may_autoinc)
3561 *may_autoinc = autoinc;
3562 if (autoinc)
3563 acost = data->ainc_costs[autoinc_type];
3564 else
3565 acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
3566 complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3567 return new_cost (cost + acost, complexity);
3568 }
3569
3570 /* Calculate the SPEED or size cost of shiftadd EXPR in MODE. MULT is the
3571 the EXPR operand holding the shift. COST0 and COST1 are the costs for
3572 calculating the operands of EXPR. Returns true if successful, and returns
3573 the cost in COST. */
3574
3575 static bool
3576 get_shiftadd_cost (tree expr, machine_mode mode, comp_cost cost0,
3577 comp_cost cost1, tree mult, bool speed, comp_cost *cost)
3578 {
3579 comp_cost res;
3580 tree op1 = TREE_OPERAND (expr, 1);
3581 tree cst = TREE_OPERAND (mult, 1);
3582 tree multop = TREE_OPERAND (mult, 0);
3583 int m = exact_log2 (int_cst_value (cst));
3584 int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
3585 int sa_cost;
3586 bool equal_p = false;
3587
3588 if (!(m >= 0 && m < maxm))
3589 return false;
3590
3591 if (operand_equal_p (op1, mult, 0))
3592 equal_p = true;
3593
3594 sa_cost = (TREE_CODE (expr) != MINUS_EXPR
3595 ? shiftadd_cost (speed, mode, m)
3596 : (equal_p
3597 ? shiftsub1_cost (speed, mode, m)
3598 : shiftsub0_cost (speed, mode, m)));
3599 res = new_cost (sa_cost, 0);
3600 res = add_costs (res, equal_p ? cost0 : cost1);
3601
3602 STRIP_NOPS (multop);
3603 if (!is_gimple_val (multop))
3604 res = add_costs (res, force_expr_to_var_cost (multop, speed));
3605
3606 *cost = res;
3607 return true;
3608 }
3609
3610 /* Estimates cost of forcing expression EXPR into a variable. */
3611
3612 static comp_cost
3613 force_expr_to_var_cost (tree expr, bool speed)
3614 {
3615 static bool costs_initialized = false;
3616 static unsigned integer_cost [2];
3617 static unsigned symbol_cost [2];
3618 static unsigned address_cost [2];
3619 tree op0, op1;
3620 comp_cost cost0, cost1, cost;
3621 machine_mode mode;
3622
3623 if (!costs_initialized)
3624 {
3625 tree type = build_pointer_type (integer_type_node);
3626 tree var, addr;
3627 rtx x;
3628 int i;
3629
3630 var = create_tmp_var_raw (integer_type_node, "test_var");
3631 TREE_STATIC (var) = 1;
3632 x = produce_memory_decl_rtl (var, NULL);
3633 SET_DECL_RTL (var, x);
3634
3635 addr = build1 (ADDR_EXPR, type, var);
3636
3637
3638 for (i = 0; i < 2; i++)
3639 {
3640 integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
3641 2000), i);
3642
3643 symbol_cost[i] = computation_cost (addr, i) + 1;
3644
3645 address_cost[i]
3646 = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
3647 if (dump_file && (dump_flags & TDF_DETAILS))
3648 {
3649 fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
3650 fprintf (dump_file, " integer %d\n", (int) integer_cost[i]);
3651 fprintf (dump_file, " symbol %d\n", (int) symbol_cost[i]);
3652 fprintf (dump_file, " address %d\n", (int) address_cost[i]);
3653 fprintf (dump_file, " other %d\n", (int) target_spill_cost[i]);
3654 fprintf (dump_file, "\n");
3655 }
3656 }
3657
3658 costs_initialized = true;
3659 }
3660
3661 STRIP_NOPS (expr);
3662
3663 if (SSA_VAR_P (expr))
3664 return no_cost;
3665
3666 if (is_gimple_min_invariant (expr))
3667 {
3668 if (TREE_CODE (expr) == INTEGER_CST)
3669 return new_cost (integer_cost [speed], 0);
3670
3671 if (TREE_CODE (expr) == ADDR_EXPR)
3672 {
3673 tree obj = TREE_OPERAND (expr, 0);
3674
3675 if (TREE_CODE (obj) == VAR_DECL
3676 || TREE_CODE (obj) == PARM_DECL
3677 || TREE_CODE (obj) == RESULT_DECL)
3678 return new_cost (symbol_cost [speed], 0);
3679 }
3680
3681 return new_cost (address_cost [speed], 0);
3682 }
3683
3684 switch (TREE_CODE (expr))
3685 {
3686 case POINTER_PLUS_EXPR:
3687 case PLUS_EXPR:
3688 case MINUS_EXPR:
3689 case MULT_EXPR:
3690 op0 = TREE_OPERAND (expr, 0);
3691 op1 = TREE_OPERAND (expr, 1);
3692 STRIP_NOPS (op0);
3693 STRIP_NOPS (op1);
3694 break;
3695
3696 CASE_CONVERT:
3697 case NEGATE_EXPR:
3698 op0 = TREE_OPERAND (expr, 0);
3699 STRIP_NOPS (op0);
3700 op1 = NULL_TREE;
3701 break;
3702
3703 default:
3704 /* Just an arbitrary value, FIXME. */
3705 return new_cost (target_spill_cost[speed], 0);
3706 }
3707
3708 if (op0 == NULL_TREE
3709 || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
3710 cost0 = no_cost;
3711 else
3712 cost0 = force_expr_to_var_cost (op0, speed);
3713
3714 if (op1 == NULL_TREE
3715 || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
3716 cost1 = no_cost;
3717 else
3718 cost1 = force_expr_to_var_cost (op1, speed);
3719
3720 mode = TYPE_MODE (TREE_TYPE (expr));
3721 switch (TREE_CODE (expr))
3722 {
3723 case POINTER_PLUS_EXPR:
3724 case PLUS_EXPR:
3725 case MINUS_EXPR:
3726 case NEGATE_EXPR:
3727 cost = new_cost (add_cost (speed, mode), 0);
3728 if (TREE_CODE (expr) != NEGATE_EXPR)
3729 {
3730 tree mult = NULL_TREE;
3731 comp_cost sa_cost;
3732 if (TREE_CODE (op1) == MULT_EXPR)
3733 mult = op1;
3734 else if (TREE_CODE (op0) == MULT_EXPR)
3735 mult = op0;
3736
3737 if (mult != NULL_TREE
3738 && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
3739 && get_shiftadd_cost (expr, mode, cost0, cost1, mult,
3740 speed, &sa_cost))
3741 return sa_cost;
3742 }
3743 break;
3744
3745 CASE_CONVERT:
3746 {
3747 tree inner_mode, outer_mode;
3748 outer_mode = TREE_TYPE (expr);
3749 inner_mode = TREE_TYPE (op0);
3750 cost = new_cost (convert_cost (TYPE_MODE (outer_mode),
3751 TYPE_MODE (inner_mode), speed), 0);
3752 }
3753 break;
3754
3755 case MULT_EXPR:
3756 if (cst_and_fits_in_hwi (op0))
3757 cost = new_cost (mult_by_coeff_cost (int_cst_value (op0),
3758 mode, speed), 0);
3759 else if (cst_and_fits_in_hwi (op1))
3760 cost = new_cost (mult_by_coeff_cost (int_cst_value (op1),
3761 mode, speed), 0);
3762 else
3763 return new_cost (target_spill_cost [speed], 0);
3764 break;
3765
3766 default:
3767 gcc_unreachable ();
3768 }
3769
3770 cost = add_costs (cost, cost0);
3771 cost = add_costs (cost, cost1);
3772
3773 /* Bound the cost by target_spill_cost. The parts of complicated
3774 computations often are either loop invariant or at least can
3775 be shared between several iv uses, so letting this grow without
3776 limits would not give reasonable results. */
3777 if (cost.cost > (int) target_spill_cost [speed])
3778 cost.cost = target_spill_cost [speed];
3779
3780 return cost;
3781 }
3782
3783 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
3784 invariants the computation depends on. */
3785
3786 static comp_cost
3787 force_var_cost (struct ivopts_data *data,
3788 tree expr, bitmap *depends_on)
3789 {
3790 if (depends_on)
3791 {
3792 fd_ivopts_data = data;
3793 walk_tree (&expr, find_depends, depends_on, NULL);
3794 }
3795
3796 return force_expr_to_var_cost (expr, data->speed);
3797 }
3798
3799 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
3800 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3801 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3802 invariants the computation depends on. */
3803
3804 static comp_cost
3805 split_address_cost (struct ivopts_data *data,
3806 tree addr, bool *symbol_present, bool *var_present,
3807 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3808 {
3809 tree core;
3810 HOST_WIDE_INT bitsize;
3811 HOST_WIDE_INT bitpos;
3812 tree toffset;
3813 machine_mode mode;
3814 int unsignedp, volatilep;
3815
3816 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3817 &unsignedp, &volatilep, false);
3818
3819 if (toffset != 0
3820 || bitpos % BITS_PER_UNIT != 0
3821 || TREE_CODE (core) != VAR_DECL)
3822 {
3823 *symbol_present = false;
3824 *var_present = true;
3825 fd_ivopts_data = data;
3826 walk_tree (&addr, find_depends, depends_on, NULL);
3827 return new_cost (target_spill_cost[data->speed], 0);
3828 }
3829
3830 *offset += bitpos / BITS_PER_UNIT;
3831 if (TREE_STATIC (core)
3832 || DECL_EXTERNAL (core))
3833 {
3834 *symbol_present = true;
3835 *var_present = false;
3836 return no_cost;
3837 }
3838
3839 *symbol_present = false;
3840 *var_present = true;
3841 return no_cost;
3842 }
3843
3844 /* Estimates cost of expressing difference of addresses E1 - E2 as
3845 var + symbol + offset. The value of offset is added to OFFSET,
3846 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3847 part is missing. DEPENDS_ON is a set of the invariants the computation
3848 depends on. */
3849
3850 static comp_cost
3851 ptr_difference_cost (struct ivopts_data *data,
3852 tree e1, tree e2, bool *symbol_present, bool *var_present,
3853 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3854 {
3855 HOST_WIDE_INT diff = 0;
3856 aff_tree aff_e1, aff_e2;
3857 tree type;
3858
3859 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3860
3861 if (ptr_difference_const (e1, e2, &diff))
3862 {
3863 *offset += diff;
3864 *symbol_present = false;
3865 *var_present = false;
3866 return no_cost;
3867 }
3868
3869 if (integer_zerop (e2))
3870 return split_address_cost (data, TREE_OPERAND (e1, 0),
3871 symbol_present, var_present, offset, depends_on);
3872
3873 *symbol_present = false;
3874 *var_present = true;
3875
3876 type = signed_type_for (TREE_TYPE (e1));
3877 tree_to_aff_combination (e1, type, &aff_e1);
3878 tree_to_aff_combination (e2, type, &aff_e2);
3879 aff_combination_scale (&aff_e2, -1);
3880 aff_combination_add (&aff_e1, &aff_e2);
3881
3882 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3883 }
3884
3885 /* Estimates cost of expressing difference E1 - E2 as
3886 var + symbol + offset. The value of offset is added to OFFSET,
3887 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3888 part is missing. DEPENDS_ON is a set of the invariants the computation
3889 depends on. */
3890
3891 static comp_cost
3892 difference_cost (struct ivopts_data *data,
3893 tree e1, tree e2, bool *symbol_present, bool *var_present,
3894 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3895 {
3896 machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3897 unsigned HOST_WIDE_INT off1, off2;
3898 aff_tree aff_e1, aff_e2;
3899 tree type;
3900
3901 e1 = strip_offset (e1, &off1);
3902 e2 = strip_offset (e2, &off2);
3903 *offset += off1 - off2;
3904
3905 STRIP_NOPS (e1);
3906 STRIP_NOPS (e2);
3907
3908 if (TREE_CODE (e1) == ADDR_EXPR)
3909 return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
3910 offset, depends_on);
3911 *symbol_present = false;
3912
3913 if (operand_equal_p (e1, e2, 0))
3914 {
3915 *var_present = false;
3916 return no_cost;
3917 }
3918
3919 *var_present = true;
3920
3921 if (integer_zerop (e2))
3922 return force_var_cost (data, e1, depends_on);
3923
3924 if (integer_zerop (e1))
3925 {
3926 comp_cost cost = force_var_cost (data, e2, depends_on);
3927 cost.cost += mult_by_coeff_cost (-1, mode, data->speed);
3928 return cost;
3929 }
3930
3931 type = signed_type_for (TREE_TYPE (e1));
3932 tree_to_aff_combination (e1, type, &aff_e1);
3933 tree_to_aff_combination (e2, type, &aff_e2);
3934 aff_combination_scale (&aff_e2, -1);
3935 aff_combination_add (&aff_e1, &aff_e2);
3936
3937 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3938 }
3939
3940 /* Returns true if AFF1 and AFF2 are identical. */
3941
3942 static bool
3943 compare_aff_trees (aff_tree *aff1, aff_tree *aff2)
3944 {
3945 unsigned i;
3946
3947 if (aff1->n != aff2->n)
3948 return false;
3949
3950 for (i = 0; i < aff1->n; i++)
3951 {
3952 if (aff1->elts[i].coef != aff2->elts[i].coef)
3953 return false;
3954
3955 if (!operand_equal_p (aff1->elts[i].val, aff2->elts[i].val, 0))
3956 return false;
3957 }
3958 return true;
3959 }
3960
3961 /* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id. */
3962
3963 static int
3964 get_expr_id (struct ivopts_data *data, tree expr)
3965 {
3966 struct iv_inv_expr_ent ent;
3967 struct iv_inv_expr_ent **slot;
3968
3969 ent.expr = expr;
3970 ent.hash = iterative_hash_expr (expr, 0);
3971 slot = data->inv_expr_tab->find_slot (&ent, INSERT);
3972 if (*slot)
3973 return (*slot)->id;
3974
3975 *slot = XNEW (struct iv_inv_expr_ent);
3976 (*slot)->expr = expr;
3977 (*slot)->hash = ent.hash;
3978 (*slot)->id = data->inv_expr_id++;
3979 return (*slot)->id;
3980 }
3981
3982 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
3983 requires a new compiler generated temporary. Returns -1 otherwise.
3984 ADDRESS_P is a flag indicating if the expression is for address
3985 computation. */
3986
3987 static int
3988 get_loop_invariant_expr_id (struct ivopts_data *data, tree ubase,
3989 tree cbase, HOST_WIDE_INT ratio,
3990 bool address_p)
3991 {
3992 aff_tree ubase_aff, cbase_aff;
3993 tree expr, ub, cb;
3994
3995 STRIP_NOPS (ubase);
3996 STRIP_NOPS (cbase);
3997 ub = ubase;
3998 cb = cbase;
3999
4000 if ((TREE_CODE (ubase) == INTEGER_CST)
4001 && (TREE_CODE (cbase) == INTEGER_CST))
4002 return -1;
4003
4004 /* Strips the constant part. */
4005 if (TREE_CODE (ubase) == PLUS_EXPR
4006 || TREE_CODE (ubase) == MINUS_EXPR
4007 || TREE_CODE (ubase) == POINTER_PLUS_EXPR)
4008 {
4009 if (TREE_CODE (TREE_OPERAND (ubase, 1)) == INTEGER_CST)
4010 ubase = TREE_OPERAND (ubase, 0);
4011 }
4012
4013 /* Strips the constant part. */
4014 if (TREE_CODE (cbase) == PLUS_EXPR
4015 || TREE_CODE (cbase) == MINUS_EXPR
4016 || TREE_CODE (cbase) == POINTER_PLUS_EXPR)
4017 {
4018 if (TREE_CODE (TREE_OPERAND (cbase, 1)) == INTEGER_CST)
4019 cbase = TREE_OPERAND (cbase, 0);
4020 }
4021
4022 if (address_p)
4023 {
4024 if (((TREE_CODE (ubase) == SSA_NAME)
4025 || (TREE_CODE (ubase) == ADDR_EXPR
4026 && is_gimple_min_invariant (ubase)))
4027 && (TREE_CODE (cbase) == INTEGER_CST))
4028 return -1;
4029
4030 if (((TREE_CODE (cbase) == SSA_NAME)
4031 || (TREE_CODE (cbase) == ADDR_EXPR
4032 && is_gimple_min_invariant (cbase)))
4033 && (TREE_CODE (ubase) == INTEGER_CST))
4034 return -1;
4035 }
4036
4037 if (ratio == 1)
4038 {
4039 if (operand_equal_p (ubase, cbase, 0))
4040 return -1;
4041
4042 if (TREE_CODE (ubase) == ADDR_EXPR
4043 && TREE_CODE (cbase) == ADDR_EXPR)
4044 {
4045 tree usym, csym;
4046
4047 usym = TREE_OPERAND (ubase, 0);
4048 csym = TREE_OPERAND (cbase, 0);
4049 if (TREE_CODE (usym) == ARRAY_REF)
4050 {
4051 tree ind = TREE_OPERAND (usym, 1);
4052 if (TREE_CODE (ind) == INTEGER_CST
4053 && tree_fits_shwi_p (ind)
4054 && tree_to_shwi (ind) == 0)
4055 usym = TREE_OPERAND (usym, 0);
4056 }
4057 if (TREE_CODE (csym) == ARRAY_REF)
4058 {
4059 tree ind = TREE_OPERAND (csym, 1);
4060 if (TREE_CODE (ind) == INTEGER_CST
4061 && tree_fits_shwi_p (ind)
4062 && tree_to_shwi (ind) == 0)
4063 csym = TREE_OPERAND (csym, 0);
4064 }
4065 if (operand_equal_p (usym, csym, 0))
4066 return -1;
4067 }
4068 /* Now do more complex comparison */
4069 tree_to_aff_combination (ubase, TREE_TYPE (ubase), &ubase_aff);
4070 tree_to_aff_combination (cbase, TREE_TYPE (cbase), &cbase_aff);
4071 if (compare_aff_trees (&ubase_aff, &cbase_aff))
4072 return -1;
4073 }
4074
4075 tree_to_aff_combination (ub, TREE_TYPE (ub), &ubase_aff);
4076 tree_to_aff_combination (cb, TREE_TYPE (cb), &cbase_aff);
4077
4078 aff_combination_scale (&cbase_aff, -1 * ratio);
4079 aff_combination_add (&ubase_aff, &cbase_aff);
4080 expr = aff_combination_to_tree (&ubase_aff);
4081 return get_expr_id (data, expr);
4082 }
4083
4084
4085
4086 /* Determines the cost of the computation by that USE is expressed
4087 from induction variable CAND. If ADDRESS_P is true, we just need
4088 to create an address from it, otherwise we want to get it into
4089 register. A set of invariants we depend on is stored in
4090 DEPENDS_ON. AT is the statement at that the value is computed.
4091 If CAN_AUTOINC is nonnull, use it to record whether autoinc
4092 addressing is likely. */
4093
4094 static comp_cost
4095 get_computation_cost_at (struct ivopts_data *data,
4096 struct iv_use *use, struct iv_cand *cand,
4097 bool address_p, bitmap *depends_on, gimple at,
4098 bool *can_autoinc,
4099 int *inv_expr_id)
4100 {
4101 tree ubase = use->iv->base, ustep = use->iv->step;
4102 tree cbase, cstep;
4103 tree utype = TREE_TYPE (ubase), ctype;
4104 unsigned HOST_WIDE_INT cstepi, offset = 0;
4105 HOST_WIDE_INT ratio, aratio;
4106 bool var_present, symbol_present, stmt_is_after_inc;
4107 comp_cost cost;
4108 widest_int rat;
4109 bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4110 machine_mode mem_mode = (address_p
4111 ? TYPE_MODE (TREE_TYPE (*use->op_p))
4112 : VOIDmode);
4113
4114 *depends_on = NULL;
4115
4116 /* Only consider real candidates. */
4117 if (!cand->iv)
4118 return infinite_cost;
4119
4120 cbase = cand->iv->base;
4121 cstep = cand->iv->step;
4122 ctype = TREE_TYPE (cbase);
4123
4124 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4125 {
4126 /* We do not have a precision to express the values of use. */
4127 return infinite_cost;
4128 }
4129
4130 if (address_p
4131 || (use->iv->base_object
4132 && cand->iv->base_object
4133 && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4134 && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4135 {
4136 /* Do not try to express address of an object with computation based
4137 on address of a different object. This may cause problems in rtl
4138 level alias analysis (that does not expect this to be happening,
4139 as this is illegal in C), and would be unlikely to be useful
4140 anyway. */
4141 if (use->iv->base_object
4142 && cand->iv->base_object
4143 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4144 return infinite_cost;
4145 }
4146
4147 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4148 {
4149 /* TODO -- add direct handling of this case. */
4150 goto fallback;
4151 }
4152
4153 /* CSTEPI is removed from the offset in case statement is after the
4154 increment. If the step is not constant, we use zero instead.
4155 This is a bit imprecise (there is the extra addition), but
4156 redundancy elimination is likely to transform the code so that
4157 it uses value of the variable before increment anyway,
4158 so it is not that much unrealistic. */
4159 if (cst_and_fits_in_hwi (cstep))
4160 cstepi = int_cst_value (cstep);
4161 else
4162 cstepi = 0;
4163
4164 if (!constant_multiple_of (ustep, cstep, &rat))
4165 return infinite_cost;
4166
4167 if (wi::fits_shwi_p (rat))
4168 ratio = rat.to_shwi ();
4169 else
4170 return infinite_cost;
4171
4172 STRIP_NOPS (cbase);
4173 ctype = TREE_TYPE (cbase);
4174
4175 stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
4176
4177 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
4178 or ratio == 1, it is better to handle this like
4179
4180 ubase - ratio * cbase + ratio * var
4181
4182 (also holds in the case ratio == -1, TODO. */
4183
4184 if (cst_and_fits_in_hwi (cbase))
4185 {
4186 offset = - ratio * int_cst_value (cbase);
4187 cost = difference_cost (data,
4188 ubase, build_int_cst (utype, 0),
4189 &symbol_present, &var_present, &offset,
4190 depends_on);
4191 cost.cost /= avg_loop_niter (data->current_loop);
4192 }
4193 else if (ratio == 1)
4194 {
4195 tree real_cbase = cbase;
4196
4197 /* Check to see if any adjustment is needed. */
4198 if (cstepi == 0 && stmt_is_after_inc)
4199 {
4200 aff_tree real_cbase_aff;
4201 aff_tree cstep_aff;
4202
4203 tree_to_aff_combination (cbase, TREE_TYPE (real_cbase),
4204 &real_cbase_aff);
4205 tree_to_aff_combination (cstep, TREE_TYPE (cstep), &cstep_aff);
4206
4207 aff_combination_add (&real_cbase_aff, &cstep_aff);
4208 real_cbase = aff_combination_to_tree (&real_cbase_aff);
4209 }
4210
4211 cost = difference_cost (data,
4212 ubase, real_cbase,
4213 &symbol_present, &var_present, &offset,
4214 depends_on);
4215 cost.cost /= avg_loop_niter (data->current_loop);
4216 }
4217 else if (address_p
4218 && !POINTER_TYPE_P (ctype)
4219 && multiplier_allowed_in_address_p
4220 (ratio, mem_mode,
4221 TYPE_ADDR_SPACE (TREE_TYPE (utype))))
4222 {
4223 cbase
4224 = fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio));
4225 cost = difference_cost (data,
4226 ubase, cbase,
4227 &symbol_present, &var_present, &offset,
4228 depends_on);
4229 cost.cost /= avg_loop_niter (data->current_loop);
4230 }
4231 else
4232 {
4233 cost = force_var_cost (data, cbase, depends_on);
4234 cost = add_costs (cost,
4235 difference_cost (data,
4236 ubase, build_int_cst (utype, 0),
4237 &symbol_present, &var_present,
4238 &offset, depends_on));
4239 cost.cost /= avg_loop_niter (data->current_loop);
4240 cost.cost += add_cost (data->speed, TYPE_MODE (ctype));
4241 }
4242
4243 if (inv_expr_id)
4244 {
4245 *inv_expr_id =
4246 get_loop_invariant_expr_id (data, ubase, cbase, ratio, address_p);
4247 /* Clear depends on. */
4248 if (*inv_expr_id != -1 && depends_on && *depends_on)
4249 bitmap_clear (*depends_on);
4250 }
4251
4252 /* If we are after the increment, the value of the candidate is higher by
4253 one iteration. */
4254 if (stmt_is_after_inc)
4255 offset -= ratio * cstepi;
4256
4257 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4258 (symbol/var1/const parts may be omitted). If we are looking for an
4259 address, find the cost of addressing this. */
4260 if (address_p)
4261 return add_costs (cost,
4262 get_address_cost (symbol_present, var_present,
4263 offset, ratio, cstepi,
4264 mem_mode,
4265 TYPE_ADDR_SPACE (TREE_TYPE (utype)),
4266 speed, stmt_is_after_inc,
4267 can_autoinc));
4268
4269 /* Otherwise estimate the costs for computing the expression. */
4270 if (!symbol_present && !var_present && !offset)
4271 {
4272 if (ratio != 1)
4273 cost.cost += mult_by_coeff_cost (ratio, TYPE_MODE (ctype), speed);
4274 return cost;
4275 }
4276
4277 /* Symbol + offset should be compile-time computable so consider that they
4278 are added once to the variable, if present. */
4279 if (var_present && (symbol_present || offset))
4280 cost.cost += adjust_setup_cost (data,
4281 add_cost (speed, TYPE_MODE (ctype)));
4282
4283 /* Having offset does not affect runtime cost in case it is added to
4284 symbol, but it increases complexity. */
4285 if (offset)
4286 cost.complexity++;
4287
4288 cost.cost += add_cost (speed, TYPE_MODE (ctype));
4289
4290 aratio = ratio > 0 ? ratio : -ratio;
4291 if (aratio != 1)
4292 cost.cost += mult_by_coeff_cost (aratio, TYPE_MODE (ctype), speed);
4293 return cost;
4294
4295 fallback:
4296 if (can_autoinc)
4297 *can_autoinc = false;
4298
4299 {
4300 /* Just get the expression, expand it and measure the cost. */
4301 tree comp = get_computation_at (data->current_loop, use, cand, at);
4302
4303 if (!comp)
4304 return infinite_cost;
4305
4306 if (address_p)
4307 comp = build_simple_mem_ref (comp);
4308
4309 return new_cost (computation_cost (comp, speed), 0);
4310 }
4311 }
4312
4313 /* Determines the cost of the computation by that USE is expressed
4314 from induction variable CAND. If ADDRESS_P is true, we just need
4315 to create an address from it, otherwise we want to get it into
4316 register. A set of invariants we depend on is stored in
4317 DEPENDS_ON. If CAN_AUTOINC is nonnull, use it to record whether
4318 autoinc addressing is likely. */
4319
4320 static comp_cost
4321 get_computation_cost (struct ivopts_data *data,
4322 struct iv_use *use, struct iv_cand *cand,
4323 bool address_p, bitmap *depends_on,
4324 bool *can_autoinc, int *inv_expr_id)
4325 {
4326 return get_computation_cost_at (data,
4327 use, cand, address_p, depends_on, use->stmt,
4328 can_autoinc, inv_expr_id);
4329 }
4330
4331 /* Determines cost of basing replacement of USE on CAND in a generic
4332 expression. */
4333
4334 static bool
4335 determine_use_iv_cost_generic (struct ivopts_data *data,
4336 struct iv_use *use, struct iv_cand *cand)
4337 {
4338 bitmap depends_on;
4339 comp_cost cost;
4340 int inv_expr_id = -1;
4341
4342 /* The simple case first -- if we need to express value of the preserved
4343 original biv, the cost is 0. This also prevents us from counting the
4344 cost of increment twice -- once at this use and once in the cost of
4345 the candidate. */
4346 if (cand->pos == IP_ORIGINAL
4347 && cand->incremented_at == use->stmt)
4348 {
4349 set_use_iv_cost (data, use, cand, no_cost, NULL, NULL_TREE,
4350 ERROR_MARK, -1);
4351 return true;
4352 }
4353
4354 cost = get_computation_cost (data, use, cand, false, &depends_on,
4355 NULL, &inv_expr_id);
4356
4357 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4358 inv_expr_id);
4359
4360 return !infinite_cost_p (cost);
4361 }
4362
4363 /* Determines cost of basing replacement of USE on CAND in an address. */
4364
4365 static bool
4366 determine_use_iv_cost_address (struct ivopts_data *data,
4367 struct iv_use *use, struct iv_cand *cand)
4368 {
4369 bitmap depends_on;
4370 bool can_autoinc;
4371 int inv_expr_id = -1;
4372 comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
4373 &can_autoinc, &inv_expr_id);
4374
4375 if (cand->ainc_use == use)
4376 {
4377 if (can_autoinc)
4378 cost.cost -= cand->cost_step;
4379 /* If we generated the candidate solely for exploiting autoincrement
4380 opportunities, and it turns out it can't be used, set the cost to
4381 infinity to make sure we ignore it. */
4382 else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
4383 cost = infinite_cost;
4384 }
4385 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4386 inv_expr_id);
4387
4388 return !infinite_cost_p (cost);
4389 }
4390
4391 /* Computes value of candidate CAND at position AT in iteration NITER, and
4392 stores it to VAL. */
4393
4394 static void
4395 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple at, tree niter,
4396 aff_tree *val)
4397 {
4398 aff_tree step, delta, nit;
4399 struct iv *iv = cand->iv;
4400 tree type = TREE_TYPE (iv->base);
4401 tree steptype = type;
4402 if (POINTER_TYPE_P (type))
4403 steptype = sizetype;
4404 steptype = unsigned_type_for (type);
4405
4406 tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step);
4407 aff_combination_convert (&step, steptype);
4408 tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
4409 aff_combination_convert (&nit, steptype);
4410 aff_combination_mult (&nit, &step, &delta);
4411 if (stmt_after_increment (loop, cand, at))
4412 aff_combination_add (&delta, &step);
4413
4414 tree_to_aff_combination (iv->base, type, val);
4415 if (!POINTER_TYPE_P (type))
4416 aff_combination_convert (val, steptype);
4417 aff_combination_add (val, &delta);
4418 }
4419
4420 /* Returns period of induction variable iv. */
4421
4422 static tree
4423 iv_period (struct iv *iv)
4424 {
4425 tree step = iv->step, period, type;
4426 tree pow2div;
4427
4428 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4429
4430 type = unsigned_type_for (TREE_TYPE (step));
4431 /* Period of the iv is lcm (step, type_range)/step -1,
4432 i.e., N*type_range/step - 1. Since type range is power
4433 of two, N == (step >> num_of_ending_zeros_binary (step),
4434 so the final result is
4435
4436 (type_range >> num_of_ending_zeros_binary (step)) - 1
4437
4438 */
4439 pow2div = num_ending_zeros (step);
4440
4441 period = build_low_bits_mask (type,
4442 (TYPE_PRECISION (type)
4443 - tree_to_uhwi (pow2div)));
4444
4445 return period;
4446 }
4447
4448 /* Returns the comparison operator used when eliminating the iv USE. */
4449
4450 static enum tree_code
4451 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4452 {
4453 struct loop *loop = data->current_loop;
4454 basic_block ex_bb;
4455 edge exit;
4456
4457 ex_bb = gimple_bb (use->stmt);
4458 exit = EDGE_SUCC (ex_bb, 0);
4459 if (flow_bb_inside_loop_p (loop, exit->dest))
4460 exit = EDGE_SUCC (ex_bb, 1);
4461
4462 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4463 }
4464
4465 /* Returns true if we can prove that BASE - OFFSET does not overflow. For now,
4466 we only detect the situation that BASE = SOMETHING + OFFSET, where the
4467 calculation is performed in non-wrapping type.
4468
4469 TODO: More generally, we could test for the situation that
4470 BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
4471 This would require knowing the sign of OFFSET. */
4472
4473 static bool
4474 difference_cannot_overflow_p (struct ivopts_data *data, tree base, tree offset)
4475 {
4476 enum tree_code code;
4477 tree e1, e2;
4478 aff_tree aff_e1, aff_e2, aff_offset;
4479
4480 if (!nowrap_type_p (TREE_TYPE (base)))
4481 return false;
4482
4483 base = expand_simple_operations (base);
4484
4485 if (TREE_CODE (base) == SSA_NAME)
4486 {
4487 gimple stmt = SSA_NAME_DEF_STMT (base);
4488
4489 if (gimple_code (stmt) != GIMPLE_ASSIGN)
4490 return false;
4491
4492 code = gimple_assign_rhs_code (stmt);
4493 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4494 return false;
4495
4496 e1 = gimple_assign_rhs1 (stmt);
4497 e2 = gimple_assign_rhs2 (stmt);
4498 }
4499 else
4500 {
4501 code = TREE_CODE (base);
4502 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4503 return false;
4504 e1 = TREE_OPERAND (base, 0);
4505 e2 = TREE_OPERAND (base, 1);
4506 }
4507
4508 /* Use affine expansion as deeper inspection to prove the equality. */
4509 tree_to_aff_combination_expand (e2, TREE_TYPE (e2),
4510 &aff_e2, &data->name_expansion_cache);
4511 tree_to_aff_combination_expand (offset, TREE_TYPE (offset),
4512 &aff_offset, &data->name_expansion_cache);
4513 aff_combination_scale (&aff_offset, -1);
4514 switch (code)
4515 {
4516 case PLUS_EXPR:
4517 aff_combination_add (&aff_e2, &aff_offset);
4518 if (aff_combination_zero_p (&aff_e2))
4519 return true;
4520
4521 tree_to_aff_combination_expand (e1, TREE_TYPE (e1),
4522 &aff_e1, &data->name_expansion_cache);
4523 aff_combination_add (&aff_e1, &aff_offset);
4524 return aff_combination_zero_p (&aff_e1);
4525
4526 case POINTER_PLUS_EXPR:
4527 aff_combination_add (&aff_e2, &aff_offset);
4528 return aff_combination_zero_p (&aff_e2);
4529
4530 default:
4531 return false;
4532 }
4533 }
4534
4535 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
4536 comparison with CAND. NITER describes the number of iterations of
4537 the loops. If successful, the comparison in COMP_P is altered accordingly.
4538
4539 We aim to handle the following situation:
4540
4541 sometype *base, *p;
4542 int a, b, i;
4543
4544 i = a;
4545 p = p_0 = base + a;
4546
4547 do
4548 {
4549 bla (*p);
4550 p++;
4551 i++;
4552 }
4553 while (i < b);
4554
4555 Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
4556 We aim to optimize this to
4557
4558 p = p_0 = base + a;
4559 do
4560 {
4561 bla (*p);
4562 p++;
4563 }
4564 while (p < p_0 - a + b);
4565
4566 This preserves the correctness, since the pointer arithmetics does not
4567 overflow. More precisely:
4568
4569 1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
4570 overflow in computing it or the values of p.
4571 2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
4572 overflow. To prove this, we use the fact that p_0 = base + a. */
4573
4574 static bool
4575 iv_elimination_compare_lt (struct ivopts_data *data,
4576 struct iv_cand *cand, enum tree_code *comp_p,
4577 struct tree_niter_desc *niter)
4578 {
4579 tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
4580 struct aff_tree nit, tmpa, tmpb;
4581 enum tree_code comp;
4582 HOST_WIDE_INT step;
4583
4584 /* We need to know that the candidate induction variable does not overflow.
4585 While more complex analysis may be used to prove this, for now just
4586 check that the variable appears in the original program and that it
4587 is computed in a type that guarantees no overflows. */
4588 cand_type = TREE_TYPE (cand->iv->base);
4589 if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
4590 return false;
4591
4592 /* Make sure that the loop iterates till the loop bound is hit, as otherwise
4593 the calculation of the BOUND could overflow, making the comparison
4594 invalid. */
4595 if (!data->loop_single_exit_p)
4596 return false;
4597
4598 /* We need to be able to decide whether candidate is increasing or decreasing
4599 in order to choose the right comparison operator. */
4600 if (!cst_and_fits_in_hwi (cand->iv->step))
4601 return false;
4602 step = int_cst_value (cand->iv->step);
4603
4604 /* Check that the number of iterations matches the expected pattern:
4605 a + 1 > b ? 0 : b - a - 1. */
4606 mbz = niter->may_be_zero;
4607 if (TREE_CODE (mbz) == GT_EXPR)
4608 {
4609 /* Handle a + 1 > b. */
4610 tree op0 = TREE_OPERAND (mbz, 0);
4611 if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
4612 {
4613 a = TREE_OPERAND (op0, 0);
4614 b = TREE_OPERAND (mbz, 1);
4615 }
4616 else
4617 return false;
4618 }
4619 else if (TREE_CODE (mbz) == LT_EXPR)
4620 {
4621 tree op1 = TREE_OPERAND (mbz, 1);
4622
4623 /* Handle b < a + 1. */
4624 if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
4625 {
4626 a = TREE_OPERAND (op1, 0);
4627 b = TREE_OPERAND (mbz, 0);
4628 }
4629 else
4630 return false;
4631 }
4632 else
4633 return false;
4634
4635 /* Expected number of iterations is B - A - 1. Check that it matches
4636 the actual number, i.e., that B - A - NITER = 1. */
4637 tree_to_aff_combination (niter->niter, nit_type, &nit);
4638 tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
4639 tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
4640 aff_combination_scale (&nit, -1);
4641 aff_combination_scale (&tmpa, -1);
4642 aff_combination_add (&tmpb, &tmpa);
4643 aff_combination_add (&tmpb, &nit);
4644 if (tmpb.n != 0 || tmpb.offset != 1)
4645 return false;
4646
4647 /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
4648 overflow. */
4649 offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
4650 cand->iv->step,
4651 fold_convert (TREE_TYPE (cand->iv->step), a));
4652 if (!difference_cannot_overflow_p (data, cand->iv->base, offset))
4653 return false;
4654
4655 /* Determine the new comparison operator. */
4656 comp = step < 0 ? GT_EXPR : LT_EXPR;
4657 if (*comp_p == NE_EXPR)
4658 *comp_p = comp;
4659 else if (*comp_p == EQ_EXPR)
4660 *comp_p = invert_tree_comparison (comp, false);
4661 else
4662 gcc_unreachable ();
4663
4664 return true;
4665 }
4666
4667 /* Check whether it is possible to express the condition in USE by comparison
4668 of candidate CAND. If so, store the value compared with to BOUND, and the
4669 comparison operator to COMP. */
4670
4671 static bool
4672 may_eliminate_iv (struct ivopts_data *data,
4673 struct iv_use *use, struct iv_cand *cand, tree *bound,
4674 enum tree_code *comp)
4675 {
4676 basic_block ex_bb;
4677 edge exit;
4678 tree period;
4679 struct loop *loop = data->current_loop;
4680 aff_tree bnd;
4681 struct tree_niter_desc *desc = NULL;
4682
4683 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
4684 return false;
4685
4686 /* For now works only for exits that dominate the loop latch.
4687 TODO: extend to other conditions inside loop body. */
4688 ex_bb = gimple_bb (use->stmt);
4689 if (use->stmt != last_stmt (ex_bb)
4690 || gimple_code (use->stmt) != GIMPLE_COND
4691 || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
4692 return false;
4693
4694 exit = EDGE_SUCC (ex_bb, 0);
4695 if (flow_bb_inside_loop_p (loop, exit->dest))
4696 exit = EDGE_SUCC (ex_bb, 1);
4697 if (flow_bb_inside_loop_p (loop, exit->dest))
4698 return false;
4699
4700 desc = niter_for_exit (data, exit);
4701 if (!desc)
4702 return false;
4703
4704 /* Determine whether we can use the variable to test the exit condition.
4705 This is the case iff the period of the induction variable is greater
4706 than the number of iterations for which the exit condition is true. */
4707 period = iv_period (cand->iv);
4708
4709 /* If the number of iterations is constant, compare against it directly. */
4710 if (TREE_CODE (desc->niter) == INTEGER_CST)
4711 {
4712 /* See cand_value_at. */
4713 if (stmt_after_increment (loop, cand, use->stmt))
4714 {
4715 if (!tree_int_cst_lt (desc->niter, period))
4716 return false;
4717 }
4718 else
4719 {
4720 if (tree_int_cst_lt (period, desc->niter))
4721 return false;
4722 }
4723 }
4724
4725 /* If not, and if this is the only possible exit of the loop, see whether
4726 we can get a conservative estimate on the number of iterations of the
4727 entire loop and compare against that instead. */
4728 else
4729 {
4730 widest_int period_value, max_niter;
4731
4732 max_niter = desc->max;
4733 if (stmt_after_increment (loop, cand, use->stmt))
4734 max_niter += 1;
4735 period_value = wi::to_widest (period);
4736 if (wi::gtu_p (max_niter, period_value))
4737 {
4738 /* See if we can take advantage of inferred loop bound information. */
4739 if (data->loop_single_exit_p)
4740 {
4741 if (!max_loop_iterations (loop, &max_niter))
4742 return false;
4743 /* The loop bound is already adjusted by adding 1. */
4744 if (wi::gtu_p (max_niter, period_value))
4745 return false;
4746 }
4747 else
4748 return false;
4749 }
4750 }
4751
4752 cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
4753
4754 *bound = fold_convert (TREE_TYPE (cand->iv->base),
4755 aff_combination_to_tree (&bnd));
4756 *comp = iv_elimination_compare (data, use);
4757
4758 /* It is unlikely that computing the number of iterations using division
4759 would be more profitable than keeping the original induction variable. */
4760 if (expression_expensive_p (*bound))
4761 return false;
4762
4763 /* Sometimes, it is possible to handle the situation that the number of
4764 iterations may be zero unless additional assumtions by using <
4765 instead of != in the exit condition.
4766
4767 TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
4768 base the exit condition on it. However, that is often too
4769 expensive. */
4770 if (!integer_zerop (desc->may_be_zero))
4771 return iv_elimination_compare_lt (data, cand, comp, desc);
4772
4773 return true;
4774 }
4775
4776 /* Calculates the cost of BOUND, if it is a PARM_DECL. A PARM_DECL must
4777 be copied, if is is used in the loop body and DATA->body_includes_call. */
4778
4779 static int
4780 parm_decl_cost (struct ivopts_data *data, tree bound)
4781 {
4782 tree sbound = bound;
4783 STRIP_NOPS (sbound);
4784
4785 if (TREE_CODE (sbound) == SSA_NAME
4786 && SSA_NAME_IS_DEFAULT_DEF (sbound)
4787 && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
4788 && data->body_includes_call)
4789 return COSTS_N_INSNS (1);
4790
4791 return 0;
4792 }
4793
4794 /* Determines cost of basing replacement of USE on CAND in a condition. */
4795
4796 static bool
4797 determine_use_iv_cost_condition (struct ivopts_data *data,
4798 struct iv_use *use, struct iv_cand *cand)
4799 {
4800 tree bound = NULL_TREE;
4801 struct iv *cmp_iv;
4802 bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
4803 comp_cost elim_cost, express_cost, cost, bound_cost;
4804 bool ok;
4805 int elim_inv_expr_id = -1, express_inv_expr_id = -1, inv_expr_id;
4806 tree *control_var, *bound_cst;
4807 enum tree_code comp = ERROR_MARK;
4808
4809 /* Only consider real candidates. */
4810 if (!cand->iv)
4811 {
4812 set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE,
4813 ERROR_MARK, -1);
4814 return false;
4815 }
4816
4817 /* Try iv elimination. */
4818 if (may_eliminate_iv (data, use, cand, &bound, &comp))
4819 {
4820 elim_cost = force_var_cost (data, bound, &depends_on_elim);
4821 if (elim_cost.cost == 0)
4822 elim_cost.cost = parm_decl_cost (data, bound);
4823 else if (TREE_CODE (bound) == INTEGER_CST)
4824 elim_cost.cost = 0;
4825 /* If we replace a loop condition 'i < n' with 'p < base + n',
4826 depends_on_elim will have 'base' and 'n' set, which implies
4827 that both 'base' and 'n' will be live during the loop. More likely,
4828 'base + n' will be loop invariant, resulting in only one live value
4829 during the loop. So in that case we clear depends_on_elim and set
4830 elim_inv_expr_id instead. */
4831 if (depends_on_elim && bitmap_count_bits (depends_on_elim) > 1)
4832 {
4833 elim_inv_expr_id = get_expr_id (data, bound);
4834 bitmap_clear (depends_on_elim);
4835 }
4836 /* The bound is a loop invariant, so it will be only computed
4837 once. */
4838 elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
4839 }
4840 else
4841 elim_cost = infinite_cost;
4842
4843 /* Try expressing the original giv. If it is compared with an invariant,
4844 note that we cannot get rid of it. */
4845 ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
4846 NULL, &cmp_iv);
4847 gcc_assert (ok);
4848
4849 /* When the condition is a comparison of the candidate IV against
4850 zero, prefer this IV.
4851
4852 TODO: The constant that we're subtracting from the cost should
4853 be target-dependent. This information should be added to the
4854 target costs for each backend. */
4855 if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */
4856 && integer_zerop (*bound_cst)
4857 && (operand_equal_p (*control_var, cand->var_after, 0)
4858 || operand_equal_p (*control_var, cand->var_before, 0)))
4859 elim_cost.cost -= 1;
4860
4861 express_cost = get_computation_cost (data, use, cand, false,
4862 &depends_on_express, NULL,
4863 &express_inv_expr_id);
4864 fd_ivopts_data = data;
4865 walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
4866
4867 /* Count the cost of the original bound as well. */
4868 bound_cost = force_var_cost (data, *bound_cst, NULL);
4869 if (bound_cost.cost == 0)
4870 bound_cost.cost = parm_decl_cost (data, *bound_cst);
4871 else if (TREE_CODE (*bound_cst) == INTEGER_CST)
4872 bound_cost.cost = 0;
4873 express_cost.cost += bound_cost.cost;
4874
4875 /* Choose the better approach, preferring the eliminated IV. */
4876 if (compare_costs (elim_cost, express_cost) <= 0)
4877 {
4878 cost = elim_cost;
4879 depends_on = depends_on_elim;
4880 depends_on_elim = NULL;
4881 inv_expr_id = elim_inv_expr_id;
4882 }
4883 else
4884 {
4885 cost = express_cost;
4886 depends_on = depends_on_express;
4887 depends_on_express = NULL;
4888 bound = NULL_TREE;
4889 comp = ERROR_MARK;
4890 inv_expr_id = express_inv_expr_id;
4891 }
4892
4893 set_use_iv_cost (data, use, cand, cost, depends_on, bound, comp, inv_expr_id);
4894
4895 if (depends_on_elim)
4896 BITMAP_FREE (depends_on_elim);
4897 if (depends_on_express)
4898 BITMAP_FREE (depends_on_express);
4899
4900 return !infinite_cost_p (cost);
4901 }
4902
4903 /* Determines cost of basing replacement of USE on CAND. Returns false
4904 if USE cannot be based on CAND. */
4905
4906 static bool
4907 determine_use_iv_cost (struct ivopts_data *data,
4908 struct iv_use *use, struct iv_cand *cand)
4909 {
4910 switch (use->type)
4911 {
4912 case USE_NONLINEAR_EXPR:
4913 return determine_use_iv_cost_generic (data, use, cand);
4914
4915 case USE_ADDRESS:
4916 return determine_use_iv_cost_address (data, use, cand);
4917
4918 case USE_COMPARE:
4919 return determine_use_iv_cost_condition (data, use, cand);
4920
4921 default:
4922 gcc_unreachable ();
4923 }
4924 }
4925
4926 /* Return true if get_computation_cost indicates that autoincrement is
4927 a possibility for the pair of USE and CAND, false otherwise. */
4928
4929 static bool
4930 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
4931 struct iv_cand *cand)
4932 {
4933 bitmap depends_on;
4934 bool can_autoinc;
4935 comp_cost cost;
4936
4937 if (use->type != USE_ADDRESS)
4938 return false;
4939
4940 cost = get_computation_cost (data, use, cand, true, &depends_on,
4941 &can_autoinc, NULL);
4942
4943 BITMAP_FREE (depends_on);
4944
4945 return !infinite_cost_p (cost) && can_autoinc;
4946 }
4947
4948 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
4949 use that allows autoincrement, and set their AINC_USE if possible. */
4950
4951 static void
4952 set_autoinc_for_original_candidates (struct ivopts_data *data)
4953 {
4954 unsigned i, j;
4955
4956 for (i = 0; i < n_iv_cands (data); i++)
4957 {
4958 struct iv_cand *cand = iv_cand (data, i);
4959 struct iv_use *closest_before = NULL;
4960 struct iv_use *closest_after = NULL;
4961 if (cand->pos != IP_ORIGINAL)
4962 continue;
4963
4964 for (j = 0; j < n_iv_uses (data); j++)
4965 {
4966 struct iv_use *use = iv_use (data, j);
4967 unsigned uid = gimple_uid (use->stmt);
4968
4969 if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
4970 continue;
4971
4972 if (uid < gimple_uid (cand->incremented_at)
4973 && (closest_before == NULL
4974 || uid > gimple_uid (closest_before->stmt)))
4975 closest_before = use;
4976
4977 if (uid > gimple_uid (cand->incremented_at)
4978 && (closest_after == NULL
4979 || uid < gimple_uid (closest_after->stmt)))
4980 closest_after = use;
4981 }
4982
4983 if (closest_before != NULL
4984 && autoinc_possible_for_pair (data, closest_before, cand))
4985 cand->ainc_use = closest_before;
4986 else if (closest_after != NULL
4987 && autoinc_possible_for_pair (data, closest_after, cand))
4988 cand->ainc_use = closest_after;
4989 }
4990 }
4991
4992 /* Finds the candidates for the induction variables. */
4993
4994 static void
4995 find_iv_candidates (struct ivopts_data *data)
4996 {
4997 /* Add commonly used ivs. */
4998 add_standard_iv_candidates (data);
4999
5000 /* Add old induction variables. */
5001 add_old_ivs_candidates (data);
5002
5003 /* Add induction variables derived from uses. */
5004 add_derived_ivs_candidates (data);
5005
5006 set_autoinc_for_original_candidates (data);
5007
5008 /* Record the important candidates. */
5009 record_important_candidates (data);
5010 }
5011
5012 /* Determines costs of basing the use of the iv on an iv candidate. */
5013
5014 static void
5015 determine_use_iv_costs (struct ivopts_data *data)
5016 {
5017 unsigned i, j;
5018 struct iv_use *use;
5019 struct iv_cand *cand;
5020 bitmap to_clear = BITMAP_ALLOC (NULL);
5021
5022 alloc_use_cost_map (data);
5023
5024 for (i = 0; i < n_iv_uses (data); i++)
5025 {
5026 use = iv_use (data, i);
5027
5028 if (data->consider_all_candidates)
5029 {
5030 for (j = 0; j < n_iv_cands (data); j++)
5031 {
5032 cand = iv_cand (data, j);
5033 determine_use_iv_cost (data, use, cand);
5034 }
5035 }
5036 else
5037 {
5038 bitmap_iterator bi;
5039
5040 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
5041 {
5042 cand = iv_cand (data, j);
5043 if (!determine_use_iv_cost (data, use, cand))
5044 bitmap_set_bit (to_clear, j);
5045 }
5046
5047 /* Remove the candidates for that the cost is infinite from
5048 the list of related candidates. */
5049 bitmap_and_compl_into (use->related_cands, to_clear);
5050 bitmap_clear (to_clear);
5051 }
5052 }
5053
5054 BITMAP_FREE (to_clear);
5055
5056 if (dump_file && (dump_flags & TDF_DETAILS))
5057 {
5058 fprintf (dump_file, "Use-candidate costs:\n");
5059
5060 for (i = 0; i < n_iv_uses (data); i++)
5061 {
5062 use = iv_use (data, i);
5063
5064 fprintf (dump_file, "Use %d:\n", i);
5065 fprintf (dump_file, " cand\tcost\tcompl.\tdepends on\n");
5066 for (j = 0; j < use->n_map_members; j++)
5067 {
5068 if (!use->cost_map[j].cand
5069 || infinite_cost_p (use->cost_map[j].cost))
5070 continue;
5071
5072 fprintf (dump_file, " %d\t%d\t%d\t",
5073 use->cost_map[j].cand->id,
5074 use->cost_map[j].cost.cost,
5075 use->cost_map[j].cost.complexity);
5076 if (use->cost_map[j].depends_on)
5077 bitmap_print (dump_file,
5078 use->cost_map[j].depends_on, "","");
5079 if (use->cost_map[j].inv_expr_id != -1)
5080 fprintf (dump_file, " inv_expr:%d", use->cost_map[j].inv_expr_id);
5081 fprintf (dump_file, "\n");
5082 }
5083
5084 fprintf (dump_file, "\n");
5085 }
5086 fprintf (dump_file, "\n");
5087 }
5088 }
5089
5090 /* Determines cost of the candidate CAND. */
5091
5092 static void
5093 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5094 {
5095 comp_cost cost_base;
5096 unsigned cost, cost_step;
5097 tree base;
5098
5099 if (!cand->iv)
5100 {
5101 cand->cost = 0;
5102 return;
5103 }
5104
5105 /* There are two costs associated with the candidate -- its increment
5106 and its initialization. The second is almost negligible for any loop
5107 that rolls enough, so we take it just very little into account. */
5108
5109 base = cand->iv->base;
5110 cost_base = force_var_cost (data, base, NULL);
5111 /* It will be exceptional that the iv register happens to be initialized with
5112 the proper value at no cost. In general, there will at least be a regcopy
5113 or a const set. */
5114 if (cost_base.cost == 0)
5115 cost_base.cost = COSTS_N_INSNS (1);
5116 cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
5117
5118 cost = cost_step + adjust_setup_cost (data, cost_base.cost);
5119
5120 /* Prefer the original ivs unless we may gain something by replacing it.
5121 The reason is to make debugging simpler; so this is not relevant for
5122 artificial ivs created by other optimization passes. */
5123 if (cand->pos != IP_ORIGINAL
5124 || !SSA_NAME_VAR (cand->var_before)
5125 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
5126 cost++;
5127
5128 /* Prefer not to insert statements into latch unless there are some
5129 already (so that we do not create unnecessary jumps). */
5130 if (cand->pos == IP_END
5131 && empty_block_p (ip_end_pos (data->current_loop)))
5132 cost++;
5133
5134 cand->cost = cost;
5135 cand->cost_step = cost_step;
5136 }
5137
5138 /* Determines costs of computation of the candidates. */
5139
5140 static void
5141 determine_iv_costs (struct ivopts_data *data)
5142 {
5143 unsigned i;
5144
5145 if (dump_file && (dump_flags & TDF_DETAILS))
5146 {
5147 fprintf (dump_file, "Candidate costs:\n");
5148 fprintf (dump_file, " cand\tcost\n");
5149 }
5150
5151 for (i = 0; i < n_iv_cands (data); i++)
5152 {
5153 struct iv_cand *cand = iv_cand (data, i);
5154
5155 determine_iv_cost (data, cand);
5156
5157 if (dump_file && (dump_flags & TDF_DETAILS))
5158 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
5159 }
5160
5161 if (dump_file && (dump_flags & TDF_DETAILS))
5162 fprintf (dump_file, "\n");
5163 }
5164
5165 /* Calculates cost for having SIZE induction variables. */
5166
5167 static unsigned
5168 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
5169 {
5170 /* We add size to the cost, so that we prefer eliminating ivs
5171 if possible. */
5172 return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed,
5173 data->body_includes_call);
5174 }
5175
5176 /* For each size of the induction variable set determine the penalty. */
5177
5178 static void
5179 determine_set_costs (struct ivopts_data *data)
5180 {
5181 unsigned j, n;
5182 gphi *phi;
5183 gphi_iterator psi;
5184 tree op;
5185 struct loop *loop = data->current_loop;
5186 bitmap_iterator bi;
5187
5188 if (dump_file && (dump_flags & TDF_DETAILS))
5189 {
5190 fprintf (dump_file, "Global costs:\n");
5191 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
5192 fprintf (dump_file, " target_clobbered_regs %d\n", target_clobbered_regs);
5193 fprintf (dump_file, " target_reg_cost %d\n", target_reg_cost[data->speed]);
5194 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost[data->speed]);
5195 }
5196
5197 n = 0;
5198 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
5199 {
5200 phi = psi.phi ();
5201 op = PHI_RESULT (phi);
5202
5203 if (virtual_operand_p (op))
5204 continue;
5205
5206 if (get_iv (data, op))
5207 continue;
5208
5209 n++;
5210 }
5211
5212 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5213 {
5214 struct version_info *info = ver_info (data, j);
5215
5216 if (info->inv_id && info->has_nonlin_use)
5217 n++;
5218 }
5219
5220 data->regs_used = n;
5221 if (dump_file && (dump_flags & TDF_DETAILS))
5222 fprintf (dump_file, " regs_used %d\n", n);
5223
5224 if (dump_file && (dump_flags & TDF_DETAILS))
5225 {
5226 fprintf (dump_file, " cost for size:\n");
5227 fprintf (dump_file, " ivs\tcost\n");
5228 for (j = 0; j <= 2 * target_avail_regs; j++)
5229 fprintf (dump_file, " %d\t%d\n", j,
5230 ivopts_global_cost_for_size (data, j));
5231 fprintf (dump_file, "\n");
5232 }
5233 }
5234
5235 /* Returns true if A is a cheaper cost pair than B. */
5236
5237 static bool
5238 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
5239 {
5240 int cmp;
5241
5242 if (!a)
5243 return false;
5244
5245 if (!b)
5246 return true;
5247
5248 cmp = compare_costs (a->cost, b->cost);
5249 if (cmp < 0)
5250 return true;
5251
5252 if (cmp > 0)
5253 return false;
5254
5255 /* In case the costs are the same, prefer the cheaper candidate. */
5256 if (a->cand->cost < b->cand->cost)
5257 return true;
5258
5259 return false;
5260 }
5261
5262
5263 /* Returns candidate by that USE is expressed in IVS. */
5264
5265 static struct cost_pair *
5266 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
5267 {
5268 return ivs->cand_for_use[use->id];
5269 }
5270
5271 /* Computes the cost field of IVS structure. */
5272
5273 static void
5274 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
5275 {
5276 comp_cost cost = ivs->cand_use_cost;
5277
5278 cost.cost += ivs->cand_cost;
5279
5280 cost.cost += ivopts_global_cost_for_size (data,
5281 ivs->n_regs + ivs->num_used_inv_expr);
5282
5283 ivs->cost = cost;
5284 }
5285
5286 /* Remove invariants in set INVS to set IVS. */
5287
5288 static void
5289 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
5290 {
5291 bitmap_iterator bi;
5292 unsigned iid;
5293
5294 if (!invs)
5295 return;
5296
5297 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5298 {
5299 ivs->n_invariant_uses[iid]--;
5300 if (ivs->n_invariant_uses[iid] == 0)
5301 ivs->n_regs--;
5302 }
5303 }
5304
5305 /* Set USE not to be expressed by any candidate in IVS. */
5306
5307 static void
5308 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
5309 struct iv_use *use)
5310 {
5311 unsigned uid = use->id, cid;
5312 struct cost_pair *cp;
5313
5314 cp = ivs->cand_for_use[uid];
5315 if (!cp)
5316 return;
5317 cid = cp->cand->id;
5318
5319 ivs->bad_uses++;
5320 ivs->cand_for_use[uid] = NULL;
5321 ivs->n_cand_uses[cid]--;
5322
5323 if (ivs->n_cand_uses[cid] == 0)
5324 {
5325 bitmap_clear_bit (ivs->cands, cid);
5326 /* Do not count the pseudocandidates. */
5327 if (cp->cand->iv)
5328 ivs->n_regs--;
5329 ivs->n_cands--;
5330 ivs->cand_cost -= cp->cand->cost;
5331
5332 iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
5333 }
5334
5335 ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
5336
5337 iv_ca_set_remove_invariants (ivs, cp->depends_on);
5338
5339 if (cp->inv_expr_id != -1)
5340 {
5341 ivs->used_inv_expr[cp->inv_expr_id]--;
5342 if (ivs->used_inv_expr[cp->inv_expr_id] == 0)
5343 ivs->num_used_inv_expr--;
5344 }
5345 iv_ca_recount_cost (data, ivs);
5346 }
5347
5348 /* Add invariants in set INVS to set IVS. */
5349
5350 static void
5351 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
5352 {
5353 bitmap_iterator bi;
5354 unsigned iid;
5355
5356 if (!invs)
5357 return;
5358
5359 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5360 {
5361 ivs->n_invariant_uses[iid]++;
5362 if (ivs->n_invariant_uses[iid] == 1)
5363 ivs->n_regs++;
5364 }
5365 }
5366
5367 /* Set cost pair for USE in set IVS to CP. */
5368
5369 static void
5370 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
5371 struct iv_use *use, struct cost_pair *cp)
5372 {
5373 unsigned uid = use->id, cid;
5374
5375 if (ivs->cand_for_use[uid] == cp)
5376 return;
5377
5378 if (ivs->cand_for_use[uid])
5379 iv_ca_set_no_cp (data, ivs, use);
5380
5381 if (cp)
5382 {
5383 cid = cp->cand->id;
5384
5385 ivs->bad_uses--;
5386 ivs->cand_for_use[uid] = cp;
5387 ivs->n_cand_uses[cid]++;
5388 if (ivs->n_cand_uses[cid] == 1)
5389 {
5390 bitmap_set_bit (ivs->cands, cid);
5391 /* Do not count the pseudocandidates. */
5392 if (cp->cand->iv)
5393 ivs->n_regs++;
5394 ivs->n_cands++;
5395 ivs->cand_cost += cp->cand->cost;
5396
5397 iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
5398 }
5399
5400 ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
5401 iv_ca_set_add_invariants (ivs, cp->depends_on);
5402
5403 if (cp->inv_expr_id != -1)
5404 {
5405 ivs->used_inv_expr[cp->inv_expr_id]++;
5406 if (ivs->used_inv_expr[cp->inv_expr_id] == 1)
5407 ivs->num_used_inv_expr++;
5408 }
5409 iv_ca_recount_cost (data, ivs);
5410 }
5411 }
5412
5413 /* Extend set IVS by expressing USE by some of the candidates in it
5414 if possible. Consider all important candidates if candidates in
5415 set IVS don't give any result. */
5416
5417 static void
5418 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
5419 struct iv_use *use)
5420 {
5421 struct cost_pair *best_cp = NULL, *cp;
5422 bitmap_iterator bi;
5423 unsigned i;
5424 struct iv_cand *cand;
5425
5426 gcc_assert (ivs->upto >= use->id);
5427 ivs->upto++;
5428 ivs->bad_uses++;
5429
5430 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5431 {
5432 cand = iv_cand (data, i);
5433 cp = get_use_iv_cost (data, use, cand);
5434 if (cheaper_cost_pair (cp, best_cp))
5435 best_cp = cp;
5436 }
5437
5438 if (best_cp == NULL)
5439 {
5440 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5441 {
5442 cand = iv_cand (data, i);
5443 cp = get_use_iv_cost (data, use, cand);
5444 if (cheaper_cost_pair (cp, best_cp))
5445 best_cp = cp;
5446 }
5447 }
5448
5449 iv_ca_set_cp (data, ivs, use, best_cp);
5450 }
5451
5452 /* Get cost for assignment IVS. */
5453
5454 static comp_cost
5455 iv_ca_cost (struct iv_ca *ivs)
5456 {
5457 /* This was a conditional expression but it triggered a bug in
5458 Sun C 5.5. */
5459 if (ivs->bad_uses)
5460 return infinite_cost;
5461 else
5462 return ivs->cost;
5463 }
5464
5465 /* Returns true if all dependences of CP are among invariants in IVS. */
5466
5467 static bool
5468 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
5469 {
5470 unsigned i;
5471 bitmap_iterator bi;
5472
5473 if (!cp->depends_on)
5474 return true;
5475
5476 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
5477 {
5478 if (ivs->n_invariant_uses[i] == 0)
5479 return false;
5480 }
5481
5482 return true;
5483 }
5484
5485 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
5486 it before NEXT_CHANGE. */
5487
5488 static struct iv_ca_delta *
5489 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
5490 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
5491 {
5492 struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
5493
5494 change->use = use;
5495 change->old_cp = old_cp;
5496 change->new_cp = new_cp;
5497 change->next_change = next_change;
5498
5499 return change;
5500 }
5501
5502 /* Joins two lists of changes L1 and L2. Destructive -- old lists
5503 are rewritten. */
5504
5505 static struct iv_ca_delta *
5506 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
5507 {
5508 struct iv_ca_delta *last;
5509
5510 if (!l2)
5511 return l1;
5512
5513 if (!l1)
5514 return l2;
5515
5516 for (last = l1; last->next_change; last = last->next_change)
5517 continue;
5518 last->next_change = l2;
5519
5520 return l1;
5521 }
5522
5523 /* Reverse the list of changes DELTA, forming the inverse to it. */
5524
5525 static struct iv_ca_delta *
5526 iv_ca_delta_reverse (struct iv_ca_delta *delta)
5527 {
5528 struct iv_ca_delta *act, *next, *prev = NULL;
5529 struct cost_pair *tmp;
5530
5531 for (act = delta; act; act = next)
5532 {
5533 next = act->next_change;
5534 act->next_change = prev;
5535 prev = act;
5536
5537 tmp = act->old_cp;
5538 act->old_cp = act->new_cp;
5539 act->new_cp = tmp;
5540 }
5541
5542 return prev;
5543 }
5544
5545 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
5546 reverted instead. */
5547
5548 static void
5549 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
5550 struct iv_ca_delta *delta, bool forward)
5551 {
5552 struct cost_pair *from, *to;
5553 struct iv_ca_delta *act;
5554
5555 if (!forward)
5556 delta = iv_ca_delta_reverse (delta);
5557
5558 for (act = delta; act; act = act->next_change)
5559 {
5560 from = act->old_cp;
5561 to = act->new_cp;
5562 gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
5563 iv_ca_set_cp (data, ivs, act->use, to);
5564 }
5565
5566 if (!forward)
5567 iv_ca_delta_reverse (delta);
5568 }
5569
5570 /* Returns true if CAND is used in IVS. */
5571
5572 static bool
5573 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
5574 {
5575 return ivs->n_cand_uses[cand->id] > 0;
5576 }
5577
5578 /* Returns number of induction variable candidates in the set IVS. */
5579
5580 static unsigned
5581 iv_ca_n_cands (struct iv_ca *ivs)
5582 {
5583 return ivs->n_cands;
5584 }
5585
5586 /* Free the list of changes DELTA. */
5587
5588 static void
5589 iv_ca_delta_free (struct iv_ca_delta **delta)
5590 {
5591 struct iv_ca_delta *act, *next;
5592
5593 for (act = *delta; act; act = next)
5594 {
5595 next = act->next_change;
5596 free (act);
5597 }
5598
5599 *delta = NULL;
5600 }
5601
5602 /* Allocates new iv candidates assignment. */
5603
5604 static struct iv_ca *
5605 iv_ca_new (struct ivopts_data *data)
5606 {
5607 struct iv_ca *nw = XNEW (struct iv_ca);
5608
5609 nw->upto = 0;
5610 nw->bad_uses = 0;
5611 nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
5612 nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
5613 nw->cands = BITMAP_ALLOC (NULL);
5614 nw->n_cands = 0;
5615 nw->n_regs = 0;
5616 nw->cand_use_cost = no_cost;
5617 nw->cand_cost = 0;
5618 nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
5619 nw->cost = no_cost;
5620 nw->used_inv_expr = XCNEWVEC (unsigned, data->inv_expr_id + 1);
5621 nw->num_used_inv_expr = 0;
5622
5623 return nw;
5624 }
5625
5626 /* Free memory occupied by the set IVS. */
5627
5628 static void
5629 iv_ca_free (struct iv_ca **ivs)
5630 {
5631 free ((*ivs)->cand_for_use);
5632 free ((*ivs)->n_cand_uses);
5633 BITMAP_FREE ((*ivs)->cands);
5634 free ((*ivs)->n_invariant_uses);
5635 free ((*ivs)->used_inv_expr);
5636 free (*ivs);
5637 *ivs = NULL;
5638 }
5639
5640 /* Dumps IVS to FILE. */
5641
5642 static void
5643 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
5644 {
5645 const char *pref = " invariants ";
5646 unsigned i;
5647 comp_cost cost = iv_ca_cost (ivs);
5648
5649 fprintf (file, " cost: %d (complexity %d)\n", cost.cost, cost.complexity);
5650 fprintf (file, " cand_cost: %d\n cand_use_cost: %d (complexity %d)\n",
5651 ivs->cand_cost, ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
5652 bitmap_print (file, ivs->cands, " candidates: ","\n");
5653
5654 for (i = 0; i < ivs->upto; i++)
5655 {
5656 struct iv_use *use = iv_use (data, i);
5657 struct cost_pair *cp = iv_ca_cand_for_use (ivs, use);
5658 if (cp)
5659 fprintf (file, " use:%d --> iv_cand:%d, cost=(%d,%d)\n",
5660 use->id, cp->cand->id, cp->cost.cost, cp->cost.complexity);
5661 else
5662 fprintf (file, " use:%d --> ??\n", use->id);
5663 }
5664
5665 for (i = 1; i <= data->max_inv_id; i++)
5666 if (ivs->n_invariant_uses[i])
5667 {
5668 fprintf (file, "%s%d", pref, i);
5669 pref = ", ";
5670 }
5671 fprintf (file, "\n\n");
5672 }
5673
5674 /* Try changing candidate in IVS to CAND for each use. Return cost of the
5675 new set, and store differences in DELTA. Number of induction variables
5676 in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
5677 the function will try to find a solution with mimimal iv candidates. */
5678
5679 static comp_cost
5680 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
5681 struct iv_cand *cand, struct iv_ca_delta **delta,
5682 unsigned *n_ivs, bool min_ncand)
5683 {
5684 unsigned i;
5685 comp_cost cost;
5686 struct iv_use *use;
5687 struct cost_pair *old_cp, *new_cp;
5688
5689 *delta = NULL;
5690 for (i = 0; i < ivs->upto; i++)
5691 {
5692 use = iv_use (data, i);
5693 old_cp = iv_ca_cand_for_use (ivs, use);
5694
5695 if (old_cp
5696 && old_cp->cand == cand)
5697 continue;
5698
5699 new_cp = get_use_iv_cost (data, use, cand);
5700 if (!new_cp)
5701 continue;
5702
5703 if (!min_ncand && !iv_ca_has_deps (ivs, new_cp))
5704 continue;
5705
5706 if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp))
5707 continue;
5708
5709 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5710 }
5711
5712 iv_ca_delta_commit (data, ivs, *delta, true);
5713 cost = iv_ca_cost (ivs);
5714 if (n_ivs)
5715 *n_ivs = iv_ca_n_cands (ivs);
5716 iv_ca_delta_commit (data, ivs, *delta, false);
5717
5718 return cost;
5719 }
5720
5721 /* Try narrowing set IVS by removing CAND. Return the cost of
5722 the new set and store the differences in DELTA. START is
5723 the candidate with which we start narrowing. */
5724
5725 static comp_cost
5726 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
5727 struct iv_cand *cand, struct iv_cand *start,
5728 struct iv_ca_delta **delta)
5729 {
5730 unsigned i, ci;
5731 struct iv_use *use;
5732 struct cost_pair *old_cp, *new_cp, *cp;
5733 bitmap_iterator bi;
5734 struct iv_cand *cnd;
5735 comp_cost cost, best_cost, acost;
5736
5737 *delta = NULL;
5738 for (i = 0; i < n_iv_uses (data); i++)
5739 {
5740 use = iv_use (data, i);
5741
5742 old_cp = iv_ca_cand_for_use (ivs, use);
5743 if (old_cp->cand != cand)
5744 continue;
5745
5746 best_cost = iv_ca_cost (ivs);
5747 /* Start narrowing with START. */
5748 new_cp = get_use_iv_cost (data, use, start);
5749
5750 if (data->consider_all_candidates)
5751 {
5752 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
5753 {
5754 if (ci == cand->id || (start && ci == start->id))
5755 continue;
5756
5757 cnd = iv_cand (data, ci);
5758
5759 cp = get_use_iv_cost (data, use, cnd);
5760 if (!cp)
5761 continue;
5762
5763 iv_ca_set_cp (data, ivs, use, cp);
5764 acost = iv_ca_cost (ivs);
5765
5766 if (compare_costs (acost, best_cost) < 0)
5767 {
5768 best_cost = acost;
5769 new_cp = cp;
5770 }
5771 }
5772 }
5773 else
5774 {
5775 EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
5776 {
5777 if (ci == cand->id || (start && ci == start->id))
5778 continue;
5779
5780 cnd = iv_cand (data, ci);
5781
5782 cp = get_use_iv_cost (data, use, cnd);
5783 if (!cp)
5784 continue;
5785
5786 iv_ca_set_cp (data, ivs, use, cp);
5787 acost = iv_ca_cost (ivs);
5788
5789 if (compare_costs (acost, best_cost) < 0)
5790 {
5791 best_cost = acost;
5792 new_cp = cp;
5793 }
5794 }
5795 }
5796 /* Restore to old cp for use. */
5797 iv_ca_set_cp (data, ivs, use, old_cp);
5798
5799 if (!new_cp)
5800 {
5801 iv_ca_delta_free (delta);
5802 return infinite_cost;
5803 }
5804
5805 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5806 }
5807
5808 iv_ca_delta_commit (data, ivs, *delta, true);
5809 cost = iv_ca_cost (ivs);
5810 iv_ca_delta_commit (data, ivs, *delta, false);
5811
5812 return cost;
5813 }
5814
5815 /* Try optimizing the set of candidates IVS by removing candidates different
5816 from to EXCEPT_CAND from it. Return cost of the new set, and store
5817 differences in DELTA. */
5818
5819 static comp_cost
5820 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
5821 struct iv_cand *except_cand, struct iv_ca_delta **delta)
5822 {
5823 bitmap_iterator bi;
5824 struct iv_ca_delta *act_delta, *best_delta;
5825 unsigned i;
5826 comp_cost best_cost, acost;
5827 struct iv_cand *cand;
5828
5829 best_delta = NULL;
5830 best_cost = iv_ca_cost (ivs);
5831
5832 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5833 {
5834 cand = iv_cand (data, i);
5835
5836 if (cand == except_cand)
5837 continue;
5838
5839 acost = iv_ca_narrow (data, ivs, cand, except_cand, &act_delta);
5840
5841 if (compare_costs (acost, best_cost) < 0)
5842 {
5843 best_cost = acost;
5844 iv_ca_delta_free (&best_delta);
5845 best_delta = act_delta;
5846 }
5847 else
5848 iv_ca_delta_free (&act_delta);
5849 }
5850
5851 if (!best_delta)
5852 {
5853 *delta = NULL;
5854 return best_cost;
5855 }
5856
5857 /* Recurse to possibly remove other unnecessary ivs. */
5858 iv_ca_delta_commit (data, ivs, best_delta, true);
5859 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
5860 iv_ca_delta_commit (data, ivs, best_delta, false);
5861 *delta = iv_ca_delta_join (best_delta, *delta);
5862 return best_cost;
5863 }
5864
5865 /* Tries to extend the sets IVS in the best possible way in order
5866 to express the USE. If ORIGINALP is true, prefer candidates from
5867 the original set of IVs, otherwise favor important candidates not
5868 based on any memory object. */
5869
5870 static bool
5871 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
5872 struct iv_use *use, bool originalp)
5873 {
5874 comp_cost best_cost, act_cost;
5875 unsigned i;
5876 bitmap_iterator bi;
5877 struct iv_cand *cand;
5878 struct iv_ca_delta *best_delta = NULL, *act_delta;
5879 struct cost_pair *cp;
5880
5881 iv_ca_add_use (data, ivs, use);
5882 best_cost = iv_ca_cost (ivs);
5883 cp = iv_ca_cand_for_use (ivs, use);
5884 if (cp)
5885 {
5886 best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
5887 iv_ca_set_no_cp (data, ivs, use);
5888 }
5889
5890 /* If ORIGINALP is true, try to find the original IV for the use. Otherwise
5891 first try important candidates not based on any memory object. Only if
5892 this fails, try the specific ones. Rationale -- in loops with many
5893 variables the best choice often is to use just one generic biv. If we
5894 added here many ivs specific to the uses, the optimization algorithm later
5895 would be likely to get stuck in a local minimum, thus causing us to create
5896 too many ivs. The approach from few ivs to more seems more likely to be
5897 successful -- starting from few ivs, replacing an expensive use by a
5898 specific iv should always be a win. */
5899 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5900 {
5901 cand = iv_cand (data, i);
5902
5903 if (originalp && cand->pos !=IP_ORIGINAL)
5904 continue;
5905
5906 if (!originalp && cand->iv->base_object != NULL_TREE)
5907 continue;
5908
5909 if (iv_ca_cand_used_p (ivs, cand))
5910 continue;
5911
5912 cp = get_use_iv_cost (data, use, cand);
5913 if (!cp)
5914 continue;
5915
5916 iv_ca_set_cp (data, ivs, use, cp);
5917 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
5918 true);
5919 iv_ca_set_no_cp (data, ivs, use);
5920 act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
5921
5922 if (compare_costs (act_cost, best_cost) < 0)
5923 {
5924 best_cost = act_cost;
5925
5926 iv_ca_delta_free (&best_delta);
5927 best_delta = act_delta;
5928 }
5929 else
5930 iv_ca_delta_free (&act_delta);
5931 }
5932
5933 if (infinite_cost_p (best_cost))
5934 {
5935 for (i = 0; i < use->n_map_members; i++)
5936 {
5937 cp = use->cost_map + i;
5938 cand = cp->cand;
5939 if (!cand)
5940 continue;
5941
5942 /* Already tried this. */
5943 if (cand->important)
5944 {
5945 if (originalp && cand->pos == IP_ORIGINAL)
5946 continue;
5947 if (!originalp && cand->iv->base_object == NULL_TREE)
5948 continue;
5949 }
5950
5951 if (iv_ca_cand_used_p (ivs, cand))
5952 continue;
5953
5954 act_delta = NULL;
5955 iv_ca_set_cp (data, ivs, use, cp);
5956 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
5957 iv_ca_set_no_cp (data, ivs, use);
5958 act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
5959 cp, act_delta);
5960
5961 if (compare_costs (act_cost, best_cost) < 0)
5962 {
5963 best_cost = act_cost;
5964
5965 if (best_delta)
5966 iv_ca_delta_free (&best_delta);
5967 best_delta = act_delta;
5968 }
5969 else
5970 iv_ca_delta_free (&act_delta);
5971 }
5972 }
5973
5974 iv_ca_delta_commit (data, ivs, best_delta, true);
5975 iv_ca_delta_free (&best_delta);
5976
5977 return !infinite_cost_p (best_cost);
5978 }
5979
5980 /* Finds an initial assignment of candidates to uses. */
5981
5982 static struct iv_ca *
5983 get_initial_solution (struct ivopts_data *data, bool originalp)
5984 {
5985 struct iv_ca *ivs = iv_ca_new (data);
5986 unsigned i;
5987
5988 for (i = 0; i < n_iv_uses (data); i++)
5989 if (!try_add_cand_for (data, ivs, iv_use (data, i), originalp))
5990 {
5991 iv_ca_free (&ivs);
5992 return NULL;
5993 }
5994
5995 return ivs;
5996 }
5997
5998 /* Tries to improve set of induction variables IVS. */
5999
6000 static bool
6001 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
6002 {
6003 unsigned i, n_ivs;
6004 comp_cost acost, best_cost = iv_ca_cost (ivs);
6005 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
6006 struct iv_cand *cand;
6007
6008 /* Try extending the set of induction variables by one. */
6009 for (i = 0; i < n_iv_cands (data); i++)
6010 {
6011 cand = iv_cand (data, i);
6012
6013 if (iv_ca_cand_used_p (ivs, cand))
6014 continue;
6015
6016 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
6017 if (!act_delta)
6018 continue;
6019
6020 /* If we successfully added the candidate and the set is small enough,
6021 try optimizing it by removing other candidates. */
6022 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
6023 {
6024 iv_ca_delta_commit (data, ivs, act_delta, true);
6025 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
6026 iv_ca_delta_commit (data, ivs, act_delta, false);
6027 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6028 }
6029
6030 if (compare_costs (acost, best_cost) < 0)
6031 {
6032 best_cost = acost;
6033 iv_ca_delta_free (&best_delta);
6034 best_delta = act_delta;
6035 }
6036 else
6037 iv_ca_delta_free (&act_delta);
6038 }
6039
6040 if (!best_delta)
6041 {
6042 /* Try removing the candidates from the set instead. */
6043 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
6044
6045 /* Nothing more we can do. */
6046 if (!best_delta)
6047 return false;
6048 }
6049
6050 iv_ca_delta_commit (data, ivs, best_delta, true);
6051 gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
6052 iv_ca_delta_free (&best_delta);
6053 return true;
6054 }
6055
6056 /* Attempts to find the optimal set of induction variables. We do simple
6057 greedy heuristic -- we try to replace at most one candidate in the selected
6058 solution and remove the unused ivs while this improves the cost. */
6059
6060 static struct iv_ca *
6061 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
6062 {
6063 struct iv_ca *set;
6064
6065 /* Get the initial solution. */
6066 set = get_initial_solution (data, originalp);
6067 if (!set)
6068 {
6069 if (dump_file && (dump_flags & TDF_DETAILS))
6070 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
6071 return NULL;
6072 }
6073
6074 if (dump_file && (dump_flags & TDF_DETAILS))
6075 {
6076 fprintf (dump_file, "Initial set of candidates:\n");
6077 iv_ca_dump (data, dump_file, set);
6078 }
6079
6080 while (try_improve_iv_set (data, set))
6081 {
6082 if (dump_file && (dump_flags & TDF_DETAILS))
6083 {
6084 fprintf (dump_file, "Improved to:\n");
6085 iv_ca_dump (data, dump_file, set);
6086 }
6087 }
6088
6089 return set;
6090 }
6091
6092 static struct iv_ca *
6093 find_optimal_iv_set (struct ivopts_data *data)
6094 {
6095 unsigned i;
6096 struct iv_ca *set, *origset;
6097 struct iv_use *use;
6098 comp_cost cost, origcost;
6099
6100 /* Determine the cost based on a strategy that starts with original IVs,
6101 and try again using a strategy that prefers candidates not based
6102 on any IVs. */
6103 origset = find_optimal_iv_set_1 (data, true);
6104 set = find_optimal_iv_set_1 (data, false);
6105
6106 if (!origset && !set)
6107 return NULL;
6108
6109 origcost = origset ? iv_ca_cost (origset) : infinite_cost;
6110 cost = set ? iv_ca_cost (set) : infinite_cost;
6111
6112 if (dump_file && (dump_flags & TDF_DETAILS))
6113 {
6114 fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
6115 origcost.cost, origcost.complexity);
6116 fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
6117 cost.cost, cost.complexity);
6118 }
6119
6120 /* Choose the one with the best cost. */
6121 if (compare_costs (origcost, cost) <= 0)
6122 {
6123 if (set)
6124 iv_ca_free (&set);
6125 set = origset;
6126 }
6127 else if (origset)
6128 iv_ca_free (&origset);
6129
6130 for (i = 0; i < n_iv_uses (data); i++)
6131 {
6132 use = iv_use (data, i);
6133 use->selected = iv_ca_cand_for_use (set, use)->cand;
6134 }
6135
6136 return set;
6137 }
6138
6139 /* Creates a new induction variable corresponding to CAND. */
6140
6141 static void
6142 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
6143 {
6144 gimple_stmt_iterator incr_pos;
6145 tree base;
6146 bool after = false;
6147
6148 if (!cand->iv)
6149 return;
6150
6151 switch (cand->pos)
6152 {
6153 case IP_NORMAL:
6154 incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
6155 break;
6156
6157 case IP_END:
6158 incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
6159 after = true;
6160 break;
6161
6162 case IP_AFTER_USE:
6163 after = true;
6164 /* fall through */
6165 case IP_BEFORE_USE:
6166 incr_pos = gsi_for_stmt (cand->incremented_at);
6167 break;
6168
6169 case IP_ORIGINAL:
6170 /* Mark that the iv is preserved. */
6171 name_info (data, cand->var_before)->preserve_biv = true;
6172 name_info (data, cand->var_after)->preserve_biv = true;
6173
6174 /* Rewrite the increment so that it uses var_before directly. */
6175 find_interesting_uses_op (data, cand->var_after)->selected = cand;
6176 return;
6177 }
6178
6179 gimple_add_tmp_var (cand->var_before);
6180
6181 base = unshare_expr (cand->iv->base);
6182
6183 create_iv (base, unshare_expr (cand->iv->step),
6184 cand->var_before, data->current_loop,
6185 &incr_pos, after, &cand->var_before, &cand->var_after);
6186 }
6187
6188 /* Creates new induction variables described in SET. */
6189
6190 static void
6191 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
6192 {
6193 unsigned i;
6194 struct iv_cand *cand;
6195 bitmap_iterator bi;
6196
6197 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6198 {
6199 cand = iv_cand (data, i);
6200 create_new_iv (data, cand);
6201 }
6202
6203 if (dump_file && (dump_flags & TDF_DETAILS))
6204 {
6205 fprintf (dump_file, "\nSelected IV set: \n");
6206 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6207 {
6208 cand = iv_cand (data, i);
6209 dump_cand (dump_file, cand);
6210 }
6211 fprintf (dump_file, "\n");
6212 }
6213 }
6214
6215 /* Rewrites USE (definition of iv used in a nonlinear expression)
6216 using candidate CAND. */
6217
6218 static void
6219 rewrite_use_nonlinear_expr (struct ivopts_data *data,
6220 struct iv_use *use, struct iv_cand *cand)
6221 {
6222 tree comp;
6223 tree op, tgt;
6224 gassign *ass;
6225 gimple_stmt_iterator bsi;
6226
6227 /* An important special case -- if we are asked to express value of
6228 the original iv by itself, just exit; there is no need to
6229 introduce a new computation (that might also need casting the
6230 variable to unsigned and back). */
6231 if (cand->pos == IP_ORIGINAL
6232 && cand->incremented_at == use->stmt)
6233 {
6234 enum tree_code stmt_code;
6235
6236 gcc_assert (is_gimple_assign (use->stmt));
6237 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
6238
6239 /* Check whether we may leave the computation unchanged.
6240 This is the case only if it does not rely on other
6241 computations in the loop -- otherwise, the computation
6242 we rely upon may be removed in remove_unused_ivs,
6243 thus leading to ICE. */
6244 stmt_code = gimple_assign_rhs_code (use->stmt);
6245 if (stmt_code == PLUS_EXPR
6246 || stmt_code == MINUS_EXPR
6247 || stmt_code == POINTER_PLUS_EXPR)
6248 {
6249 if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
6250 op = gimple_assign_rhs2 (use->stmt);
6251 else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
6252 op = gimple_assign_rhs1 (use->stmt);
6253 else
6254 op = NULL_TREE;
6255 }
6256 else
6257 op = NULL_TREE;
6258
6259 if (op && expr_invariant_in_loop_p (data->current_loop, op))
6260 return;
6261 }
6262
6263 comp = get_computation (data->current_loop, use, cand);
6264 gcc_assert (comp != NULL_TREE);
6265
6266 switch (gimple_code (use->stmt))
6267 {
6268 case GIMPLE_PHI:
6269 tgt = PHI_RESULT (use->stmt);
6270
6271 /* If we should keep the biv, do not replace it. */
6272 if (name_info (data, tgt)->preserve_biv)
6273 return;
6274
6275 bsi = gsi_after_labels (gimple_bb (use->stmt));
6276 break;
6277
6278 case GIMPLE_ASSIGN:
6279 tgt = gimple_assign_lhs (use->stmt);
6280 bsi = gsi_for_stmt (use->stmt);
6281 break;
6282
6283 default:
6284 gcc_unreachable ();
6285 }
6286
6287 if (!valid_gimple_rhs_p (comp)
6288 || (gimple_code (use->stmt) != GIMPLE_PHI
6289 /* We can't allow re-allocating the stmt as it might be pointed
6290 to still. */
6291 && (get_gimple_rhs_num_ops (TREE_CODE (comp))
6292 >= gimple_num_ops (gsi_stmt (bsi)))))
6293 {
6294 comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE,
6295 true, GSI_SAME_STMT);
6296 if (POINTER_TYPE_P (TREE_TYPE (tgt)))
6297 {
6298 duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
6299 /* As this isn't a plain copy we have to reset alignment
6300 information. */
6301 if (SSA_NAME_PTR_INFO (comp))
6302 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
6303 }
6304 }
6305
6306 if (gimple_code (use->stmt) == GIMPLE_PHI)
6307 {
6308 ass = gimple_build_assign (tgt, comp);
6309 gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
6310
6311 bsi = gsi_for_stmt (use->stmt);
6312 remove_phi_node (&bsi, false);
6313 }
6314 else
6315 {
6316 gimple_assign_set_rhs_from_tree (&bsi, comp);
6317 use->stmt = gsi_stmt (bsi);
6318 }
6319 }
6320
6321 /* Performs a peephole optimization to reorder the iv update statement with
6322 a mem ref to enable instruction combining in later phases. The mem ref uses
6323 the iv value before the update, so the reordering transformation requires
6324 adjustment of the offset. CAND is the selected IV_CAND.
6325
6326 Example:
6327
6328 t = MEM_REF (base, iv1, 8, 16); // base, index, stride, offset
6329 iv2 = iv1 + 1;
6330
6331 if (t < val) (1)
6332 goto L;
6333 goto Head;
6334
6335
6336 directly propagating t over to (1) will introduce overlapping live range
6337 thus increase register pressure. This peephole transform it into:
6338
6339
6340 iv2 = iv1 + 1;
6341 t = MEM_REF (base, iv2, 8, 8);
6342 if (t < val)
6343 goto L;
6344 goto Head;
6345 */
6346
6347 static void
6348 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
6349 {
6350 tree var_after;
6351 gimple iv_update, stmt;
6352 basic_block bb;
6353 gimple_stmt_iterator gsi, gsi_iv;
6354
6355 if (cand->pos != IP_NORMAL)
6356 return;
6357
6358 var_after = cand->var_after;
6359 iv_update = SSA_NAME_DEF_STMT (var_after);
6360
6361 bb = gimple_bb (iv_update);
6362 gsi = gsi_last_nondebug_bb (bb);
6363 stmt = gsi_stmt (gsi);
6364
6365 /* Only handle conditional statement for now. */
6366 if (gimple_code (stmt) != GIMPLE_COND)
6367 return;
6368
6369 gsi_prev_nondebug (&gsi);
6370 stmt = gsi_stmt (gsi);
6371 if (stmt != iv_update)
6372 return;
6373
6374 gsi_prev_nondebug (&gsi);
6375 if (gsi_end_p (gsi))
6376 return;
6377
6378 stmt = gsi_stmt (gsi);
6379 if (gimple_code (stmt) != GIMPLE_ASSIGN)
6380 return;
6381
6382 if (stmt != use->stmt)
6383 return;
6384
6385 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
6386 return;
6387
6388 if (dump_file && (dump_flags & TDF_DETAILS))
6389 {
6390 fprintf (dump_file, "Reordering \n");
6391 print_gimple_stmt (dump_file, iv_update, 0, 0);
6392 print_gimple_stmt (dump_file, use->stmt, 0, 0);
6393 fprintf (dump_file, "\n");
6394 }
6395
6396 gsi = gsi_for_stmt (use->stmt);
6397 gsi_iv = gsi_for_stmt (iv_update);
6398 gsi_move_before (&gsi_iv, &gsi);
6399
6400 cand->pos = IP_BEFORE_USE;
6401 cand->incremented_at = use->stmt;
6402 }
6403
6404 /* Rewrites USE (address that is an iv) using candidate CAND. */
6405
6406 static void
6407 rewrite_use_address (struct ivopts_data *data,
6408 struct iv_use *use, struct iv_cand *cand)
6409 {
6410 aff_tree aff;
6411 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6412 tree base_hint = NULL_TREE;
6413 tree ref, iv;
6414 bool ok;
6415
6416 adjust_iv_update_pos (cand, use);
6417 ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
6418 gcc_assert (ok);
6419 unshare_aff_combination (&aff);
6420
6421 /* To avoid undefined overflow problems, all IV candidates use unsigned
6422 integer types. The drawback is that this makes it impossible for
6423 create_mem_ref to distinguish an IV that is based on a memory object
6424 from one that represents simply an offset.
6425
6426 To work around this problem, we pass a hint to create_mem_ref that
6427 indicates which variable (if any) in aff is an IV based on a memory
6428 object. Note that we only consider the candidate. If this is not
6429 based on an object, the base of the reference is in some subexpression
6430 of the use -- but these will use pointer types, so they are recognized
6431 by the create_mem_ref heuristics anyway. */
6432 if (cand->iv->base_object)
6433 base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
6434
6435 iv = var_at_stmt (data->current_loop, cand, use->stmt);
6436 ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff,
6437 reference_alias_ptr_type (*use->op_p),
6438 iv, base_hint, data->speed);
6439 copy_ref_info (ref, *use->op_p);
6440 *use->op_p = ref;
6441 }
6442
6443 /* Rewrites USE (the condition such that one of the arguments is an iv) using
6444 candidate CAND. */
6445
6446 static void
6447 rewrite_use_compare (struct ivopts_data *data,
6448 struct iv_use *use, struct iv_cand *cand)
6449 {
6450 tree comp, *var_p, op, bound;
6451 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6452 enum tree_code compare;
6453 struct cost_pair *cp = get_use_iv_cost (data, use, cand);
6454 bool ok;
6455
6456 bound = cp->value;
6457 if (bound)
6458 {
6459 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
6460 tree var_type = TREE_TYPE (var);
6461 gimple_seq stmts;
6462
6463 if (dump_file && (dump_flags & TDF_DETAILS))
6464 {
6465 fprintf (dump_file, "Replacing exit test: ");
6466 print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
6467 }
6468 compare = cp->comp;
6469 bound = unshare_expr (fold_convert (var_type, bound));
6470 op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
6471 if (stmts)
6472 gsi_insert_seq_on_edge_immediate (
6473 loop_preheader_edge (data->current_loop),
6474 stmts);
6475
6476 gcond *cond_stmt = as_a <gcond *> (use->stmt);
6477 gimple_cond_set_lhs (cond_stmt, var);
6478 gimple_cond_set_code (cond_stmt, compare);
6479 gimple_cond_set_rhs (cond_stmt, op);
6480 return;
6481 }
6482
6483 /* The induction variable elimination failed; just express the original
6484 giv. */
6485 comp = get_computation (data->current_loop, use, cand);
6486 gcc_assert (comp != NULL_TREE);
6487
6488 ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
6489 gcc_assert (ok);
6490
6491 *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
6492 true, GSI_SAME_STMT);
6493 }
6494
6495 /* Rewrites USE using candidate CAND. */
6496
6497 static void
6498 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
6499 {
6500 switch (use->type)
6501 {
6502 case USE_NONLINEAR_EXPR:
6503 rewrite_use_nonlinear_expr (data, use, cand);
6504 break;
6505
6506 case USE_ADDRESS:
6507 rewrite_use_address (data, use, cand);
6508 break;
6509
6510 case USE_COMPARE:
6511 rewrite_use_compare (data, use, cand);
6512 break;
6513
6514 default:
6515 gcc_unreachable ();
6516 }
6517
6518 update_stmt (use->stmt);
6519 }
6520
6521 /* Rewrite the uses using the selected induction variables. */
6522
6523 static void
6524 rewrite_uses (struct ivopts_data *data)
6525 {
6526 unsigned i;
6527 struct iv_cand *cand;
6528 struct iv_use *use;
6529
6530 for (i = 0; i < n_iv_uses (data); i++)
6531 {
6532 use = iv_use (data, i);
6533 cand = use->selected;
6534 gcc_assert (cand);
6535
6536 rewrite_use (data, use, cand);
6537 }
6538 }
6539
6540 /* Removes the ivs that are not used after rewriting. */
6541
6542 static void
6543 remove_unused_ivs (struct ivopts_data *data)
6544 {
6545 unsigned j;
6546 bitmap_iterator bi;
6547 bitmap toremove = BITMAP_ALLOC (NULL);
6548
6549 /* Figure out an order in which to release SSA DEFs so that we don't
6550 release something that we'd have to propagate into a debug stmt
6551 afterwards. */
6552 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
6553 {
6554 struct version_info *info;
6555
6556 info = ver_info (data, j);
6557 if (info->iv
6558 && !integer_zerop (info->iv->step)
6559 && !info->inv_id
6560 && !info->iv->have_use_for
6561 && !info->preserve_biv)
6562 {
6563 bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
6564
6565 tree def = info->iv->ssa_name;
6566
6567 if (MAY_HAVE_DEBUG_STMTS && SSA_NAME_DEF_STMT (def))
6568 {
6569 imm_use_iterator imm_iter;
6570 use_operand_p use_p;
6571 gimple stmt;
6572 int count = 0;
6573
6574 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
6575 {
6576 if (!gimple_debug_bind_p (stmt))
6577 continue;
6578
6579 /* We just want to determine whether to do nothing
6580 (count == 0), to substitute the computed
6581 expression into a single use of the SSA DEF by
6582 itself (count == 1), or to use a debug temp
6583 because the SSA DEF is used multiple times or as
6584 part of a larger expression (count > 1). */
6585 count++;
6586 if (gimple_debug_bind_get_value (stmt) != def)
6587 count++;
6588
6589 if (count > 1)
6590 BREAK_FROM_IMM_USE_STMT (imm_iter);
6591 }
6592
6593 if (!count)
6594 continue;
6595
6596 struct iv_use dummy_use;
6597 struct iv_cand *best_cand = NULL, *cand;
6598 unsigned i, best_pref = 0, cand_pref;
6599
6600 memset (&dummy_use, 0, sizeof (dummy_use));
6601 dummy_use.iv = info->iv;
6602 for (i = 0; i < n_iv_uses (data) && i < 64; i++)
6603 {
6604 cand = iv_use (data, i)->selected;
6605 if (cand == best_cand)
6606 continue;
6607 cand_pref = operand_equal_p (cand->iv->step,
6608 info->iv->step, 0)
6609 ? 4 : 0;
6610 cand_pref
6611 += TYPE_MODE (TREE_TYPE (cand->iv->base))
6612 == TYPE_MODE (TREE_TYPE (info->iv->base))
6613 ? 2 : 0;
6614 cand_pref
6615 += TREE_CODE (cand->iv->base) == INTEGER_CST
6616 ? 1 : 0;
6617 if (best_cand == NULL || best_pref < cand_pref)
6618 {
6619 best_cand = cand;
6620 best_pref = cand_pref;
6621 }
6622 }
6623
6624 if (!best_cand)
6625 continue;
6626
6627 tree comp = get_computation_at (data->current_loop,
6628 &dummy_use, best_cand,
6629 SSA_NAME_DEF_STMT (def));
6630 if (!comp)
6631 continue;
6632
6633 if (count > 1)
6634 {
6635 tree vexpr = make_node (DEBUG_EXPR_DECL);
6636 DECL_ARTIFICIAL (vexpr) = 1;
6637 TREE_TYPE (vexpr) = TREE_TYPE (comp);
6638 if (SSA_NAME_VAR (def))
6639 DECL_MODE (vexpr) = DECL_MODE (SSA_NAME_VAR (def));
6640 else
6641 DECL_MODE (vexpr) = TYPE_MODE (TREE_TYPE (vexpr));
6642 gdebug *def_temp
6643 = gimple_build_debug_bind (vexpr, comp, NULL);
6644 gimple_stmt_iterator gsi;
6645
6646 if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
6647 gsi = gsi_after_labels (gimple_bb
6648 (SSA_NAME_DEF_STMT (def)));
6649 else
6650 gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
6651
6652 gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
6653 comp = vexpr;
6654 }
6655
6656 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
6657 {
6658 if (!gimple_debug_bind_p (stmt))
6659 continue;
6660
6661 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
6662 SET_USE (use_p, comp);
6663
6664 update_stmt (stmt);
6665 }
6666 }
6667 }
6668 }
6669
6670 release_defs_bitset (toremove);
6671
6672 BITMAP_FREE (toremove);
6673 }
6674
6675 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
6676 for hash_map::traverse. */
6677
6678 bool
6679 free_tree_niter_desc (edge const &, tree_niter_desc *const &value, void *)
6680 {
6681 free (value);
6682 return true;
6683 }
6684
6685 /* Frees data allocated by the optimization of a single loop. */
6686
6687 static void
6688 free_loop_data (struct ivopts_data *data)
6689 {
6690 unsigned i, j;
6691 bitmap_iterator bi;
6692 tree obj;
6693
6694 if (data->niters)
6695 {
6696 data->niters->traverse<void *, free_tree_niter_desc> (NULL);
6697 delete data->niters;
6698 data->niters = NULL;
6699 }
6700
6701 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
6702 {
6703 struct version_info *info;
6704
6705 info = ver_info (data, i);
6706 free (info->iv);
6707 info->iv = NULL;
6708 info->has_nonlin_use = false;
6709 info->preserve_biv = false;
6710 info->inv_id = 0;
6711 }
6712 bitmap_clear (data->relevant);
6713 bitmap_clear (data->important_candidates);
6714
6715 for (i = 0; i < n_iv_uses (data); i++)
6716 {
6717 struct iv_use *use = iv_use (data, i);
6718
6719 free (use->iv);
6720 BITMAP_FREE (use->related_cands);
6721 for (j = 0; j < use->n_map_members; j++)
6722 if (use->cost_map[j].depends_on)
6723 BITMAP_FREE (use->cost_map[j].depends_on);
6724 free (use->cost_map);
6725 free (use);
6726 }
6727 data->iv_uses.truncate (0);
6728
6729 for (i = 0; i < n_iv_cands (data); i++)
6730 {
6731 struct iv_cand *cand = iv_cand (data, i);
6732
6733 free (cand->iv);
6734 if (cand->depends_on)
6735 BITMAP_FREE (cand->depends_on);
6736 free (cand);
6737 }
6738 data->iv_candidates.truncate (0);
6739
6740 if (data->version_info_size < num_ssa_names)
6741 {
6742 data->version_info_size = 2 * num_ssa_names;
6743 free (data->version_info);
6744 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
6745 }
6746
6747 data->max_inv_id = 0;
6748
6749 FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
6750 SET_DECL_RTL (obj, NULL_RTX);
6751
6752 decl_rtl_to_reset.truncate (0);
6753
6754 data->inv_expr_tab->empty ();
6755 data->inv_expr_id = 0;
6756 }
6757
6758 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
6759 loop tree. */
6760
6761 static void
6762 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
6763 {
6764 free_loop_data (data);
6765 free (data->version_info);
6766 BITMAP_FREE (data->relevant);
6767 BITMAP_FREE (data->important_candidates);
6768
6769 decl_rtl_to_reset.release ();
6770 data->iv_uses.release ();
6771 data->iv_candidates.release ();
6772 delete data->inv_expr_tab;
6773 data->inv_expr_tab = NULL;
6774 free_affine_expand_cache (&data->name_expansion_cache);
6775 }
6776
6777 /* Returns true if the loop body BODY includes any function calls. */
6778
6779 static bool
6780 loop_body_includes_call (basic_block *body, unsigned num_nodes)
6781 {
6782 gimple_stmt_iterator gsi;
6783 unsigned i;
6784
6785 for (i = 0; i < num_nodes; i++)
6786 for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
6787 {
6788 gimple stmt = gsi_stmt (gsi);
6789 if (is_gimple_call (stmt)
6790 && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
6791 return true;
6792 }
6793 return false;
6794 }
6795
6796 /* Optimizes the LOOP. Returns true if anything changed. */
6797
6798 static bool
6799 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
6800 {
6801 bool changed = false;
6802 struct iv_ca *iv_ca;
6803 edge exit = single_dom_exit (loop);
6804 basic_block *body;
6805
6806 gcc_assert (!data->niters);
6807 data->current_loop = loop;
6808 data->speed = optimize_loop_for_speed_p (loop);
6809
6810 if (dump_file && (dump_flags & TDF_DETAILS))
6811 {
6812 fprintf (dump_file, "Processing loop %d\n", loop->num);
6813
6814 if (exit)
6815 {
6816 fprintf (dump_file, " single exit %d -> %d, exit condition ",
6817 exit->src->index, exit->dest->index);
6818 print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
6819 fprintf (dump_file, "\n");
6820 }
6821
6822 fprintf (dump_file, "\n");
6823 }
6824
6825 body = get_loop_body (loop);
6826 data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
6827 renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
6828 free (body);
6829
6830 data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
6831
6832 /* For each ssa name determines whether it behaves as an induction variable
6833 in some loop. */
6834 if (!find_induction_variables (data))
6835 goto finish;
6836
6837 /* Finds interesting uses (item 1). */
6838 find_interesting_uses (data);
6839 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
6840 goto finish;
6841
6842 /* Finds candidates for the induction variables (item 2). */
6843 find_iv_candidates (data);
6844
6845 /* Calculates the costs (item 3, part 1). */
6846 determine_iv_costs (data);
6847 determine_use_iv_costs (data);
6848 determine_set_costs (data);
6849
6850 /* Find the optimal set of induction variables (item 3, part 2). */
6851 iv_ca = find_optimal_iv_set (data);
6852 if (!iv_ca)
6853 goto finish;
6854 changed = true;
6855
6856 /* Create the new induction variables (item 4, part 1). */
6857 create_new_ivs (data, iv_ca);
6858 iv_ca_free (&iv_ca);
6859
6860 /* Rewrite the uses (item 4, part 2). */
6861 rewrite_uses (data);
6862
6863 /* Remove the ivs that are unused after rewriting. */
6864 remove_unused_ivs (data);
6865
6866 /* We have changed the structure of induction variables; it might happen
6867 that definitions in the scev database refer to some of them that were
6868 eliminated. */
6869 scev_reset ();
6870
6871 finish:
6872 free_loop_data (data);
6873
6874 return changed;
6875 }
6876
6877 /* Main entry point. Optimizes induction variables in loops. */
6878
6879 void
6880 tree_ssa_iv_optimize (void)
6881 {
6882 struct loop *loop;
6883 struct ivopts_data data;
6884
6885 tree_ssa_iv_optimize_init (&data);
6886
6887 /* Optimize the loops starting with the innermost ones. */
6888 FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
6889 {
6890 if (dump_file && (dump_flags & TDF_DETAILS))
6891 flow_loop_dump (loop, dump_file, NULL, 1);
6892
6893 tree_ssa_iv_optimize_loop (&data, loop);
6894 }
6895
6896 tree_ssa_iv_optimize_finalize (&data);
6897 }