]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/tree-ssa-loop-ivopts.c
re PR tree-optimization/64705 (Bad code generation of sieve on x86-64 because of...
[thirdparty/gcc.git] / gcc / tree-ssa-loop-ivopts.c
1 /* Induction variable optimizations.
2 Copyright (C) 2003-2015 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
10
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
19
20 /* This pass tries to find the optimal set of induction variables for the loop.
21 It optimizes just the basic linear induction variables (although adding
22 support for other types should not be too hard). It includes the
23 optimizations commonly known as strength reduction, induction variable
24 coalescing and induction variable elimination. It does it in the
25 following steps:
26
27 1) The interesting uses of induction variables are found. This includes
28
29 -- uses of induction variables in non-linear expressions
30 -- addresses of arrays
31 -- comparisons of induction variables
32
33 2) Candidates for the induction variables are found. This includes
34
35 -- old induction variables
36 -- the variables defined by expressions derived from the "interesting
37 uses" above
38
39 3) The optimal (w.r. to a cost function) set of variables is chosen. The
40 cost function assigns a cost to sets of induction variables and consists
41 of three parts:
42
43 -- The use costs. Each of the interesting uses chooses the best induction
44 variable in the set and adds its cost to the sum. The cost reflects
45 the time spent on modifying the induction variables value to be usable
46 for the given purpose (adding base and offset for arrays, etc.).
47 -- The variable costs. Each of the variables has a cost assigned that
48 reflects the costs associated with incrementing the value of the
49 variable. The original variables are somewhat preferred.
50 -- The set cost. Depending on the size of the set, extra cost may be
51 added to reflect register pressure.
52
53 All the costs are defined in a machine-specific way, using the target
54 hooks and machine descriptions to determine them.
55
56 4) The trees are transformed to use the new variables, the dead code is
57 removed.
58
59 All of this is done loop by loop. Doing it globally is theoretically
60 possible, it might give a better performance and it might enable us
61 to decide costs more precisely, but getting all the interactions right
62 would be complicated. */
63
64 #include "config.h"
65 #include "system.h"
66 #include "coretypes.h"
67 #include "tm.h"
68 #include "hash-set.h"
69 #include "machmode.h"
70 #include "vec.h"
71 #include "double-int.h"
72 #include "input.h"
73 #include "alias.h"
74 #include "symtab.h"
75 #include "wide-int.h"
76 #include "inchash.h"
77 #include "tree.h"
78 #include "fold-const.h"
79 #include "stor-layout.h"
80 #include "tm_p.h"
81 #include "predict.h"
82 #include "hard-reg-set.h"
83 #include "function.h"
84 #include "dominance.h"
85 #include "cfg.h"
86 #include "basic-block.h"
87 #include "gimple-pretty-print.h"
88 #include "hash-map.h"
89 #include "hash-table.h"
90 #include "tree-ssa-alias.h"
91 #include "internal-fn.h"
92 #include "tree-eh.h"
93 #include "gimple-expr.h"
94 #include "is-a.h"
95 #include "gimple.h"
96 #include "gimplify.h"
97 #include "gimple-iterator.h"
98 #include "gimplify-me.h"
99 #include "gimple-ssa.h"
100 #include "plugin-api.h"
101 #include "ipa-ref.h"
102 #include "cgraph.h"
103 #include "tree-cfg.h"
104 #include "tree-phinodes.h"
105 #include "ssa-iterators.h"
106 #include "stringpool.h"
107 #include "tree-ssanames.h"
108 #include "tree-ssa-loop-ivopts.h"
109 #include "tree-ssa-loop-manip.h"
110 #include "tree-ssa-loop-niter.h"
111 #include "tree-ssa-loop.h"
112 #include "hashtab.h"
113 #include "rtl.h"
114 #include "flags.h"
115 #include "statistics.h"
116 #include "real.h"
117 #include "fixed-value.h"
118 #include "insn-config.h"
119 #include "expmed.h"
120 #include "dojump.h"
121 #include "explow.h"
122 #include "calls.h"
123 #include "emit-rtl.h"
124 #include "varasm.h"
125 #include "stmt.h"
126 #include "expr.h"
127 #include "tree-dfa.h"
128 #include "tree-ssa.h"
129 #include "cfgloop.h"
130 #include "tree-pass.h"
131 #include "tree-chrec.h"
132 #include "tree-scalar-evolution.h"
133 #include "params.h"
134 #include "langhooks.h"
135 #include "tree-affine.h"
136 #include "target.h"
137 #include "tree-inline.h"
138 #include "tree-ssa-propagate.h"
139 #include "tree-ssa-address.h"
140 #include "builtins.h"
141 #include "tree-vectorizer.h"
142
143 /* FIXME: Expressions are expanded to RTL in this pass to determine the
144 cost of different addressing modes. This should be moved to a TBD
145 interface between the GIMPLE and RTL worlds. */
146 #include "recog.h"
147
148 /* The infinite cost. */
149 #define INFTY 10000000
150
151 #define AVG_LOOP_NITER(LOOP) 5
152
153 /* Returns the expected number of loop iterations for LOOP.
154 The average trip count is computed from profile data if it
155 exists. */
156
157 static inline HOST_WIDE_INT
158 avg_loop_niter (struct loop *loop)
159 {
160 HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
161 if (niter == -1)
162 return AVG_LOOP_NITER (loop);
163
164 return niter;
165 }
166
167 /* Representation of the induction variable. */
168 struct iv
169 {
170 tree base; /* Initial value of the iv. */
171 tree base_object; /* A memory object to that the induction variable points. */
172 tree step; /* Step of the iv (constant only). */
173 tree ssa_name; /* The ssa name with the value. */
174 bool biv_p; /* Is it a biv? */
175 bool have_use_for; /* Do we already have a use for it? */
176 unsigned use_id; /* The identifier in the use if it is the case. */
177 };
178
179 /* Per-ssa version information (induction variable descriptions, etc.). */
180 struct version_info
181 {
182 tree name; /* The ssa name. */
183 struct iv *iv; /* Induction variable description. */
184 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
185 an expression that is not an induction variable. */
186 bool preserve_biv; /* For the original biv, whether to preserve it. */
187 unsigned inv_id; /* Id of an invariant. */
188 };
189
190 /* Types of uses. */
191 enum use_type
192 {
193 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
194 USE_ADDRESS, /* Use in an address. */
195 USE_COMPARE /* Use is a compare. */
196 };
197
198 /* Cost of a computation. */
199 typedef struct
200 {
201 int cost; /* The runtime cost. */
202 unsigned complexity; /* The estimate of the complexity of the code for
203 the computation (in no concrete units --
204 complexity field should be larger for more
205 complex expressions and addressing modes). */
206 } comp_cost;
207
208 static const comp_cost no_cost = {0, 0};
209 static const comp_cost infinite_cost = {INFTY, INFTY};
210
211 /* The candidate - cost pair. */
212 struct cost_pair
213 {
214 struct iv_cand *cand; /* The candidate. */
215 comp_cost cost; /* The cost. */
216 bitmap depends_on; /* The list of invariants that have to be
217 preserved. */
218 tree value; /* For final value elimination, the expression for
219 the final value of the iv. For iv elimination,
220 the new bound to compare with. */
221 enum tree_code comp; /* For iv elimination, the comparison. */
222 int inv_expr_id; /* Loop invariant expression id. */
223 };
224
225 /* Use. */
226 struct iv_use
227 {
228 unsigned id; /* The id of the use. */
229 enum use_type type; /* Type of the use. */
230 struct iv *iv; /* The induction variable it is based on. */
231 gimple stmt; /* Statement in that it occurs. */
232 tree *op_p; /* The place where it occurs. */
233 bitmap related_cands; /* The set of "related" iv candidates, plus the common
234 important ones. */
235
236 unsigned n_map_members; /* Number of candidates in the cost_map list. */
237 struct cost_pair *cost_map;
238 /* The costs wrto the iv candidates. */
239
240 struct iv_cand *selected;
241 /* The selected candidate. */
242 };
243
244 /* The position where the iv is computed. */
245 enum iv_position
246 {
247 IP_NORMAL, /* At the end, just before the exit condition. */
248 IP_END, /* At the end of the latch block. */
249 IP_BEFORE_USE, /* Immediately before a specific use. */
250 IP_AFTER_USE, /* Immediately after a specific use. */
251 IP_ORIGINAL /* The original biv. */
252 };
253
254 /* The induction variable candidate. */
255 struct iv_cand
256 {
257 unsigned id; /* The number of the candidate. */
258 bool important; /* Whether this is an "important" candidate, i.e. such
259 that it should be considered by all uses. */
260 ENUM_BITFIELD(iv_position) pos : 8; /* Where it is computed. */
261 gimple incremented_at;/* For original biv, the statement where it is
262 incremented. */
263 tree var_before; /* The variable used for it before increment. */
264 tree var_after; /* The variable used for it after increment. */
265 struct iv *iv; /* The value of the candidate. NULL for
266 "pseudocandidate" used to indicate the possibility
267 to replace the final value of an iv by direct
268 computation of the value. */
269 unsigned cost; /* Cost of the candidate. */
270 unsigned cost_step; /* Cost of the candidate's increment operation. */
271 struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
272 where it is incremented. */
273 bitmap depends_on; /* The list of invariants that are used in step of the
274 biv. */
275 };
276
277 /* Loop invariant expression hashtable entry. */
278 struct iv_inv_expr_ent
279 {
280 tree expr;
281 int id;
282 hashval_t hash;
283 };
284
285 /* The data used by the induction variable optimizations. */
286
287 typedef struct iv_use *iv_use_p;
288
289 typedef struct iv_cand *iv_cand_p;
290
291 /* Hashtable helpers. */
292
293 struct iv_inv_expr_hasher : typed_free_remove <iv_inv_expr_ent>
294 {
295 typedef iv_inv_expr_ent value_type;
296 typedef iv_inv_expr_ent compare_type;
297 static inline hashval_t hash (const value_type *);
298 static inline bool equal (const value_type *, const compare_type *);
299 };
300
301 /* Hash function for loop invariant expressions. */
302
303 inline hashval_t
304 iv_inv_expr_hasher::hash (const value_type *expr)
305 {
306 return expr->hash;
307 }
308
309 /* Hash table equality function for expressions. */
310
311 inline bool
312 iv_inv_expr_hasher::equal (const value_type *expr1, const compare_type *expr2)
313 {
314 return expr1->hash == expr2->hash
315 && operand_equal_p (expr1->expr, expr2->expr, 0);
316 }
317
318 struct ivopts_data
319 {
320 /* The currently optimized loop. */
321 struct loop *current_loop;
322 source_location loop_loc;
323
324 /* Numbers of iterations for all exits of the current loop. */
325 hash_map<edge, tree_niter_desc *> *niters;
326
327 /* Number of registers used in it. */
328 unsigned regs_used;
329
330 /* The size of version_info array allocated. */
331 unsigned version_info_size;
332
333 /* The array of information for the ssa names. */
334 struct version_info *version_info;
335
336 /* The hashtable of loop invariant expressions created
337 by ivopt. */
338 hash_table<iv_inv_expr_hasher> *inv_expr_tab;
339
340 /* Loop invariant expression id. */
341 int inv_expr_id;
342
343 /* The bitmap of indices in version_info whose value was changed. */
344 bitmap relevant;
345
346 /* The uses of induction variables. */
347 vec<iv_use_p> iv_uses;
348
349 /* The candidates. */
350 vec<iv_cand_p> iv_candidates;
351
352 /* A bitmap of important candidates. */
353 bitmap important_candidates;
354
355 /* Cache used by tree_to_aff_combination_expand. */
356 hash_map<tree, name_expansion *> *name_expansion_cache;
357
358 /* The maximum invariant id. */
359 unsigned max_inv_id;
360
361 /* Whether to consider just related and important candidates when replacing a
362 use. */
363 bool consider_all_candidates;
364
365 /* Are we optimizing for speed? */
366 bool speed;
367
368 /* Whether the loop body includes any function calls. */
369 bool body_includes_call;
370
371 /* Whether the loop body can only be exited via single exit. */
372 bool loop_single_exit_p;
373 };
374
375 /* An assignment of iv candidates to uses. */
376
377 struct iv_ca
378 {
379 /* The number of uses covered by the assignment. */
380 unsigned upto;
381
382 /* Number of uses that cannot be expressed by the candidates in the set. */
383 unsigned bad_uses;
384
385 /* Candidate assigned to a use, together with the related costs. */
386 struct cost_pair **cand_for_use;
387
388 /* Number of times each candidate is used. */
389 unsigned *n_cand_uses;
390
391 /* The candidates used. */
392 bitmap cands;
393
394 /* The number of candidates in the set. */
395 unsigned n_cands;
396
397 /* Total number of registers needed. */
398 unsigned n_regs;
399
400 /* Total cost of expressing uses. */
401 comp_cost cand_use_cost;
402
403 /* Total cost of candidates. */
404 unsigned cand_cost;
405
406 /* Number of times each invariant is used. */
407 unsigned *n_invariant_uses;
408
409 /* The array holding the number of uses of each loop
410 invariant expressions created by ivopt. */
411 unsigned *used_inv_expr;
412
413 /* The number of created loop invariants. */
414 unsigned num_used_inv_expr;
415
416 /* Total cost of the assignment. */
417 comp_cost cost;
418 };
419
420 /* Difference of two iv candidate assignments. */
421
422 struct iv_ca_delta
423 {
424 /* Changed use. */
425 struct iv_use *use;
426
427 /* An old assignment (for rollback purposes). */
428 struct cost_pair *old_cp;
429
430 /* A new assignment. */
431 struct cost_pair *new_cp;
432
433 /* Next change in the list. */
434 struct iv_ca_delta *next_change;
435 };
436
437 /* Bound on number of candidates below that all candidates are considered. */
438
439 #define CONSIDER_ALL_CANDIDATES_BOUND \
440 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
441
442 /* If there are more iv occurrences, we just give up (it is quite unlikely that
443 optimizing such a loop would help, and it would take ages). */
444
445 #define MAX_CONSIDERED_USES \
446 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
447
448 /* If there are at most this number of ivs in the set, try removing unnecessary
449 ivs from the set always. */
450
451 #define ALWAYS_PRUNE_CAND_SET_BOUND \
452 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
453
454 /* The list of trees for that the decl_rtl field must be reset is stored
455 here. */
456
457 static vec<tree> decl_rtl_to_reset;
458
459 static comp_cost force_expr_to_var_cost (tree, bool);
460
461 /* Number of uses recorded in DATA. */
462
463 static inline unsigned
464 n_iv_uses (struct ivopts_data *data)
465 {
466 return data->iv_uses.length ();
467 }
468
469 /* Ith use recorded in DATA. */
470
471 static inline struct iv_use *
472 iv_use (struct ivopts_data *data, unsigned i)
473 {
474 return data->iv_uses[i];
475 }
476
477 /* Number of candidates recorded in DATA. */
478
479 static inline unsigned
480 n_iv_cands (struct ivopts_data *data)
481 {
482 return data->iv_candidates.length ();
483 }
484
485 /* Ith candidate recorded in DATA. */
486
487 static inline struct iv_cand *
488 iv_cand (struct ivopts_data *data, unsigned i)
489 {
490 return data->iv_candidates[i];
491 }
492
493 /* The single loop exit if it dominates the latch, NULL otherwise. */
494
495 edge
496 single_dom_exit (struct loop *loop)
497 {
498 edge exit = single_exit (loop);
499
500 if (!exit)
501 return NULL;
502
503 if (!just_once_each_iteration_p (loop, exit->src))
504 return NULL;
505
506 return exit;
507 }
508
509 /* Dumps information about the induction variable IV to FILE. */
510
511 void
512 dump_iv (FILE *file, struct iv *iv)
513 {
514 if (iv->ssa_name)
515 {
516 fprintf (file, "ssa name ");
517 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
518 fprintf (file, "\n");
519 }
520
521 fprintf (file, " type ");
522 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
523 fprintf (file, "\n");
524
525 if (iv->step)
526 {
527 fprintf (file, " base ");
528 print_generic_expr (file, iv->base, TDF_SLIM);
529 fprintf (file, "\n");
530
531 fprintf (file, " step ");
532 print_generic_expr (file, iv->step, TDF_SLIM);
533 fprintf (file, "\n");
534 }
535 else
536 {
537 fprintf (file, " invariant ");
538 print_generic_expr (file, iv->base, TDF_SLIM);
539 fprintf (file, "\n");
540 }
541
542 if (iv->base_object)
543 {
544 fprintf (file, " base object ");
545 print_generic_expr (file, iv->base_object, TDF_SLIM);
546 fprintf (file, "\n");
547 }
548
549 if (iv->biv_p)
550 fprintf (file, " is a biv\n");
551 }
552
553 /* Dumps information about the USE to FILE. */
554
555 void
556 dump_use (FILE *file, struct iv_use *use)
557 {
558 fprintf (file, "use %d\n", use->id);
559
560 switch (use->type)
561 {
562 case USE_NONLINEAR_EXPR:
563 fprintf (file, " generic\n");
564 break;
565
566 case USE_ADDRESS:
567 fprintf (file, " address\n");
568 break;
569
570 case USE_COMPARE:
571 fprintf (file, " compare\n");
572 break;
573
574 default:
575 gcc_unreachable ();
576 }
577
578 fprintf (file, " in statement ");
579 print_gimple_stmt (file, use->stmt, 0, 0);
580 fprintf (file, "\n");
581
582 fprintf (file, " at position ");
583 if (use->op_p)
584 print_generic_expr (file, *use->op_p, TDF_SLIM);
585 fprintf (file, "\n");
586
587 dump_iv (file, use->iv);
588
589 if (use->related_cands)
590 {
591 fprintf (file, " related candidates ");
592 dump_bitmap (file, use->related_cands);
593 }
594 }
595
596 /* Dumps information about the uses to FILE. */
597
598 void
599 dump_uses (FILE *file, struct ivopts_data *data)
600 {
601 unsigned i;
602 struct iv_use *use;
603
604 for (i = 0; i < n_iv_uses (data); i++)
605 {
606 use = iv_use (data, i);
607
608 dump_use (file, use);
609 fprintf (file, "\n");
610 }
611 }
612
613 /* Dumps information about induction variable candidate CAND to FILE. */
614
615 void
616 dump_cand (FILE *file, struct iv_cand *cand)
617 {
618 struct iv *iv = cand->iv;
619
620 fprintf (file, "candidate %d%s\n",
621 cand->id, cand->important ? " (important)" : "");
622
623 if (cand->depends_on)
624 {
625 fprintf (file, " depends on ");
626 dump_bitmap (file, cand->depends_on);
627 }
628
629 if (!iv)
630 {
631 fprintf (file, " final value replacement\n");
632 return;
633 }
634
635 if (cand->var_before)
636 {
637 fprintf (file, " var_before ");
638 print_generic_expr (file, cand->var_before, TDF_SLIM);
639 fprintf (file, "\n");
640 }
641 if (cand->var_after)
642 {
643 fprintf (file, " var_after ");
644 print_generic_expr (file, cand->var_after, TDF_SLIM);
645 fprintf (file, "\n");
646 }
647
648 switch (cand->pos)
649 {
650 case IP_NORMAL:
651 fprintf (file, " incremented before exit test\n");
652 break;
653
654 case IP_BEFORE_USE:
655 fprintf (file, " incremented before use %d\n", cand->ainc_use->id);
656 break;
657
658 case IP_AFTER_USE:
659 fprintf (file, " incremented after use %d\n", cand->ainc_use->id);
660 break;
661
662 case IP_END:
663 fprintf (file, " incremented at end\n");
664 break;
665
666 case IP_ORIGINAL:
667 fprintf (file, " original biv\n");
668 break;
669 }
670
671 dump_iv (file, iv);
672 }
673
674 /* Returns the info for ssa version VER. */
675
676 static inline struct version_info *
677 ver_info (struct ivopts_data *data, unsigned ver)
678 {
679 return data->version_info + ver;
680 }
681
682 /* Returns the info for ssa name NAME. */
683
684 static inline struct version_info *
685 name_info (struct ivopts_data *data, tree name)
686 {
687 return ver_info (data, SSA_NAME_VERSION (name));
688 }
689
690 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
691 emitted in LOOP. */
692
693 static bool
694 stmt_after_ip_normal_pos (struct loop *loop, gimple stmt)
695 {
696 basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
697
698 gcc_assert (bb);
699
700 if (sbb == loop->latch)
701 return true;
702
703 if (sbb != bb)
704 return false;
705
706 return stmt == last_stmt (bb);
707 }
708
709 /* Returns true if STMT if after the place where the original induction
710 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
711 if the positions are identical. */
712
713 static bool
714 stmt_after_inc_pos (struct iv_cand *cand, gimple stmt, bool true_if_equal)
715 {
716 basic_block cand_bb = gimple_bb (cand->incremented_at);
717 basic_block stmt_bb = gimple_bb (stmt);
718
719 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
720 return false;
721
722 if (stmt_bb != cand_bb)
723 return true;
724
725 if (true_if_equal
726 && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
727 return true;
728 return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
729 }
730
731 /* Returns true if STMT if after the place where the induction variable
732 CAND is incremented in LOOP. */
733
734 static bool
735 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple stmt)
736 {
737 switch (cand->pos)
738 {
739 case IP_END:
740 return false;
741
742 case IP_NORMAL:
743 return stmt_after_ip_normal_pos (loop, stmt);
744
745 case IP_ORIGINAL:
746 case IP_AFTER_USE:
747 return stmt_after_inc_pos (cand, stmt, false);
748
749 case IP_BEFORE_USE:
750 return stmt_after_inc_pos (cand, stmt, true);
751
752 default:
753 gcc_unreachable ();
754 }
755 }
756
757 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
758
759 static bool
760 abnormal_ssa_name_p (tree exp)
761 {
762 if (!exp)
763 return false;
764
765 if (TREE_CODE (exp) != SSA_NAME)
766 return false;
767
768 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
769 }
770
771 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
772 abnormal phi node. Callback for for_each_index. */
773
774 static bool
775 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
776 void *data ATTRIBUTE_UNUSED)
777 {
778 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
779 {
780 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
781 return false;
782 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
783 return false;
784 }
785
786 return !abnormal_ssa_name_p (*index);
787 }
788
789 /* Returns true if EXPR contains a ssa name that occurs in an
790 abnormal phi node. */
791
792 bool
793 contains_abnormal_ssa_name_p (tree expr)
794 {
795 enum tree_code code;
796 enum tree_code_class codeclass;
797
798 if (!expr)
799 return false;
800
801 code = TREE_CODE (expr);
802 codeclass = TREE_CODE_CLASS (code);
803
804 if (code == SSA_NAME)
805 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
806
807 if (code == INTEGER_CST
808 || is_gimple_min_invariant (expr))
809 return false;
810
811 if (code == ADDR_EXPR)
812 return !for_each_index (&TREE_OPERAND (expr, 0),
813 idx_contains_abnormal_ssa_name_p,
814 NULL);
815
816 if (code == COND_EXPR)
817 return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
818 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
819 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
820
821 switch (codeclass)
822 {
823 case tcc_binary:
824 case tcc_comparison:
825 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
826 return true;
827
828 /* Fallthru. */
829 case tcc_unary:
830 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
831 return true;
832
833 break;
834
835 default:
836 gcc_unreachable ();
837 }
838
839 return false;
840 }
841
842 /* Returns the structure describing number of iterations determined from
843 EXIT of DATA->current_loop, or NULL if something goes wrong. */
844
845 static struct tree_niter_desc *
846 niter_for_exit (struct ivopts_data *data, edge exit)
847 {
848 struct tree_niter_desc *desc;
849 tree_niter_desc **slot;
850
851 if (!data->niters)
852 {
853 data->niters = new hash_map<edge, tree_niter_desc *>;
854 slot = NULL;
855 }
856 else
857 slot = data->niters->get (exit);
858
859 if (!slot)
860 {
861 /* Try to determine number of iterations. We cannot safely work with ssa
862 names that appear in phi nodes on abnormal edges, so that we do not
863 create overlapping life ranges for them (PR 27283). */
864 desc = XNEW (struct tree_niter_desc);
865 if (!number_of_iterations_exit (data->current_loop,
866 exit, desc, true)
867 || contains_abnormal_ssa_name_p (desc->niter))
868 {
869 XDELETE (desc);
870 desc = NULL;
871 }
872 data->niters->put (exit, desc);
873 }
874 else
875 desc = *slot;
876
877 return desc;
878 }
879
880 /* Returns the structure describing number of iterations determined from
881 single dominating exit of DATA->current_loop, or NULL if something
882 goes wrong. */
883
884 static struct tree_niter_desc *
885 niter_for_single_dom_exit (struct ivopts_data *data)
886 {
887 edge exit = single_dom_exit (data->current_loop);
888
889 if (!exit)
890 return NULL;
891
892 return niter_for_exit (data, exit);
893 }
894
895 /* Initializes data structures used by the iv optimization pass, stored
896 in DATA. */
897
898 static void
899 tree_ssa_iv_optimize_init (struct ivopts_data *data)
900 {
901 data->version_info_size = 2 * num_ssa_names;
902 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
903 data->relevant = BITMAP_ALLOC (NULL);
904 data->important_candidates = BITMAP_ALLOC (NULL);
905 data->max_inv_id = 0;
906 data->niters = NULL;
907 data->iv_uses.create (20);
908 data->iv_candidates.create (20);
909 data->inv_expr_tab = new hash_table<iv_inv_expr_hasher> (10);
910 data->inv_expr_id = 0;
911 data->name_expansion_cache = NULL;
912 decl_rtl_to_reset.create (20);
913 }
914
915 /* Returns a memory object to that EXPR points. In case we are able to
916 determine that it does not point to any such object, NULL is returned. */
917
918 static tree
919 determine_base_object (tree expr)
920 {
921 enum tree_code code = TREE_CODE (expr);
922 tree base, obj;
923
924 /* If this is a pointer casted to any type, we need to determine
925 the base object for the pointer; so handle conversions before
926 throwing away non-pointer expressions. */
927 if (CONVERT_EXPR_P (expr))
928 return determine_base_object (TREE_OPERAND (expr, 0));
929
930 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
931 return NULL_TREE;
932
933 switch (code)
934 {
935 case INTEGER_CST:
936 return NULL_TREE;
937
938 case ADDR_EXPR:
939 obj = TREE_OPERAND (expr, 0);
940 base = get_base_address (obj);
941
942 if (!base)
943 return expr;
944
945 if (TREE_CODE (base) == MEM_REF)
946 return determine_base_object (TREE_OPERAND (base, 0));
947
948 return fold_convert (ptr_type_node,
949 build_fold_addr_expr (base));
950
951 case POINTER_PLUS_EXPR:
952 return determine_base_object (TREE_OPERAND (expr, 0));
953
954 case PLUS_EXPR:
955 case MINUS_EXPR:
956 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
957 gcc_unreachable ();
958
959 default:
960 return fold_convert (ptr_type_node, expr);
961 }
962 }
963
964 /* Return true if address expression with non-DECL_P operand appears
965 in EXPR. */
966
967 static bool
968 contain_complex_addr_expr (tree expr)
969 {
970 bool res = false;
971
972 STRIP_NOPS (expr);
973 switch (TREE_CODE (expr))
974 {
975 case POINTER_PLUS_EXPR:
976 case PLUS_EXPR:
977 case MINUS_EXPR:
978 res |= contain_complex_addr_expr (TREE_OPERAND (expr, 0));
979 res |= contain_complex_addr_expr (TREE_OPERAND (expr, 1));
980 break;
981
982 case ADDR_EXPR:
983 return (!DECL_P (TREE_OPERAND (expr, 0)));
984
985 default:
986 return false;
987 }
988
989 return res;
990 }
991
992 /* Allocates an induction variable with given initial value BASE and step STEP
993 for loop LOOP. */
994
995 static struct iv *
996 alloc_iv (tree base, tree step)
997 {
998 tree expr = base;
999 struct iv *iv = XCNEW (struct iv);
1000 gcc_assert (step != NULL_TREE);
1001
1002 /* Lower address expression in base except ones with DECL_P as operand.
1003 By doing this:
1004 1) More accurate cost can be computed for address expressions;
1005 2) Duplicate candidates won't be created for bases in different
1006 forms, like &a[0] and &a. */
1007 STRIP_NOPS (expr);
1008 if ((TREE_CODE (expr) == ADDR_EXPR && !DECL_P (TREE_OPERAND (expr, 0)))
1009 || contain_complex_addr_expr (expr))
1010 {
1011 aff_tree comb;
1012 tree_to_aff_combination (expr, TREE_TYPE (base), &comb);
1013 base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
1014 }
1015
1016 iv->base = base;
1017 iv->base_object = determine_base_object (base);
1018 iv->step = step;
1019 iv->biv_p = false;
1020 iv->have_use_for = false;
1021 iv->use_id = 0;
1022 iv->ssa_name = NULL_TREE;
1023
1024 return iv;
1025 }
1026
1027 /* Sets STEP and BASE for induction variable IV. */
1028
1029 static void
1030 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
1031 {
1032 struct version_info *info = name_info (data, iv);
1033
1034 gcc_assert (!info->iv);
1035
1036 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
1037 info->iv = alloc_iv (base, step);
1038 info->iv->ssa_name = iv;
1039 }
1040
1041 /* Finds induction variable declaration for VAR. */
1042
1043 static struct iv *
1044 get_iv (struct ivopts_data *data, tree var)
1045 {
1046 basic_block bb;
1047 tree type = TREE_TYPE (var);
1048
1049 if (!POINTER_TYPE_P (type)
1050 && !INTEGRAL_TYPE_P (type))
1051 return NULL;
1052
1053 if (!name_info (data, var)->iv)
1054 {
1055 bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1056
1057 if (!bb
1058 || !flow_bb_inside_loop_p (data->current_loop, bb))
1059 set_iv (data, var, var, build_int_cst (type, 0));
1060 }
1061
1062 return name_info (data, var)->iv;
1063 }
1064
1065 /* Determines the step of a biv defined in PHI. Returns NULL if PHI does
1066 not define a simple affine biv with nonzero step. */
1067
1068 static tree
1069 determine_biv_step (gphi *phi)
1070 {
1071 struct loop *loop = gimple_bb (phi)->loop_father;
1072 tree name = PHI_RESULT (phi);
1073 affine_iv iv;
1074
1075 if (virtual_operand_p (name))
1076 return NULL_TREE;
1077
1078 if (!simple_iv (loop, loop, name, &iv, true))
1079 return NULL_TREE;
1080
1081 return integer_zerop (iv.step) ? NULL_TREE : iv.step;
1082 }
1083
1084 /* Return the first non-invariant ssa var found in EXPR. */
1085
1086 static tree
1087 extract_single_var_from_expr (tree expr)
1088 {
1089 int i, n;
1090 tree tmp;
1091 enum tree_code code;
1092
1093 if (!expr || is_gimple_min_invariant (expr))
1094 return NULL;
1095
1096 code = TREE_CODE (expr);
1097 if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1098 {
1099 n = TREE_OPERAND_LENGTH (expr);
1100 for (i = 0; i < n; i++)
1101 {
1102 tmp = extract_single_var_from_expr (TREE_OPERAND (expr, i));
1103
1104 if (tmp)
1105 return tmp;
1106 }
1107 }
1108 return (TREE_CODE (expr) == SSA_NAME) ? expr : NULL;
1109 }
1110
1111 /* Finds basic ivs. */
1112
1113 static bool
1114 find_bivs (struct ivopts_data *data)
1115 {
1116 gphi *phi;
1117 tree step, type, base, stop;
1118 bool found = false;
1119 struct loop *loop = data->current_loop;
1120 gphi_iterator psi;
1121
1122 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1123 {
1124 phi = psi.phi ();
1125
1126 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1127 continue;
1128
1129 step = determine_biv_step (phi);
1130 if (!step)
1131 continue;
1132
1133 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1134 /* Stop expanding iv base at the first ssa var referred by iv step.
1135 Ideally we should stop at any ssa var, because that's expensive
1136 and unusual to happen, we just do it on the first one.
1137
1138 See PR64705 for the rationale. */
1139 stop = extract_single_var_from_expr (step);
1140 base = expand_simple_operations (base, stop);
1141 if (contains_abnormal_ssa_name_p (base)
1142 || contains_abnormal_ssa_name_p (step))
1143 continue;
1144
1145 type = TREE_TYPE (PHI_RESULT (phi));
1146 base = fold_convert (type, base);
1147 if (step)
1148 {
1149 if (POINTER_TYPE_P (type))
1150 step = convert_to_ptrofftype (step);
1151 else
1152 step = fold_convert (type, step);
1153 }
1154
1155 set_iv (data, PHI_RESULT (phi), base, step);
1156 found = true;
1157 }
1158
1159 return found;
1160 }
1161
1162 /* Marks basic ivs. */
1163
1164 static void
1165 mark_bivs (struct ivopts_data *data)
1166 {
1167 gphi *phi;
1168 gimple def;
1169 tree var;
1170 struct iv *iv, *incr_iv;
1171 struct loop *loop = data->current_loop;
1172 basic_block incr_bb;
1173 gphi_iterator psi;
1174
1175 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1176 {
1177 phi = psi.phi ();
1178
1179 iv = get_iv (data, PHI_RESULT (phi));
1180 if (!iv)
1181 continue;
1182
1183 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1184 def = SSA_NAME_DEF_STMT (var);
1185 /* Don't mark iv peeled from other one as biv. */
1186 if (def
1187 && gimple_code (def) == GIMPLE_PHI
1188 && gimple_bb (def) == loop->header)
1189 continue;
1190
1191 incr_iv = get_iv (data, var);
1192 if (!incr_iv)
1193 continue;
1194
1195 /* If the increment is in the subloop, ignore it. */
1196 incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1197 if (incr_bb->loop_father != data->current_loop
1198 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1199 continue;
1200
1201 iv->biv_p = true;
1202 incr_iv->biv_p = true;
1203 }
1204 }
1205
1206 /* Checks whether STMT defines a linear induction variable and stores its
1207 parameters to IV. */
1208
1209 static bool
1210 find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv)
1211 {
1212 tree lhs, stop;
1213 struct loop *loop = data->current_loop;
1214
1215 iv->base = NULL_TREE;
1216 iv->step = NULL_TREE;
1217
1218 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1219 return false;
1220
1221 lhs = gimple_assign_lhs (stmt);
1222 if (TREE_CODE (lhs) != SSA_NAME)
1223 return false;
1224
1225 if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1226 return false;
1227
1228 /* Stop expanding iv base at the first ssa var referred by iv step.
1229 Ideally we should stop at any ssa var, because that's expensive
1230 and unusual to happen, we just do it on the first one.
1231
1232 See PR64705 for the rationale. */
1233 stop = extract_single_var_from_expr (iv->step);
1234 iv->base = expand_simple_operations (iv->base, stop);
1235 if (contains_abnormal_ssa_name_p (iv->base)
1236 || contains_abnormal_ssa_name_p (iv->step))
1237 return false;
1238
1239 /* If STMT could throw, then do not consider STMT as defining a GIV.
1240 While this will suppress optimizations, we can not safely delete this
1241 GIV and associated statements, even if it appears it is not used. */
1242 if (stmt_could_throw_p (stmt))
1243 return false;
1244
1245 return true;
1246 }
1247
1248 /* Finds general ivs in statement STMT. */
1249
1250 static void
1251 find_givs_in_stmt (struct ivopts_data *data, gimple stmt)
1252 {
1253 affine_iv iv;
1254
1255 if (!find_givs_in_stmt_scev (data, stmt, &iv))
1256 return;
1257
1258 set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step);
1259 }
1260
1261 /* Finds general ivs in basic block BB. */
1262
1263 static void
1264 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1265 {
1266 gimple_stmt_iterator bsi;
1267
1268 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1269 find_givs_in_stmt (data, gsi_stmt (bsi));
1270 }
1271
1272 /* Finds general ivs. */
1273
1274 static void
1275 find_givs (struct ivopts_data *data)
1276 {
1277 struct loop *loop = data->current_loop;
1278 basic_block *body = get_loop_body_in_dom_order (loop);
1279 unsigned i;
1280
1281 for (i = 0; i < loop->num_nodes; i++)
1282 find_givs_in_bb (data, body[i]);
1283 free (body);
1284 }
1285
1286 /* For each ssa name defined in LOOP determines whether it is an induction
1287 variable and if so, its initial value and step. */
1288
1289 static bool
1290 find_induction_variables (struct ivopts_data *data)
1291 {
1292 unsigned i;
1293 bitmap_iterator bi;
1294
1295 if (!find_bivs (data))
1296 return false;
1297
1298 find_givs (data);
1299 mark_bivs (data);
1300
1301 if (dump_file && (dump_flags & TDF_DETAILS))
1302 {
1303 struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
1304
1305 if (niter)
1306 {
1307 fprintf (dump_file, " number of iterations ");
1308 print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1309 if (!integer_zerop (niter->may_be_zero))
1310 {
1311 fprintf (dump_file, "; zero if ");
1312 print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1313 }
1314 fprintf (dump_file, "\n\n");
1315 };
1316
1317 fprintf (dump_file, "Induction variables:\n\n");
1318
1319 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1320 {
1321 if (ver_info (data, i)->iv)
1322 dump_iv (dump_file, ver_info (data, i)->iv);
1323 }
1324 }
1325
1326 return true;
1327 }
1328
1329 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1330
1331 static struct iv_use *
1332 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1333 gimple stmt, enum use_type use_type)
1334 {
1335 struct iv_use *use = XCNEW (struct iv_use);
1336
1337 use->id = n_iv_uses (data);
1338 use->type = use_type;
1339 use->iv = iv;
1340 use->stmt = stmt;
1341 use->op_p = use_p;
1342 use->related_cands = BITMAP_ALLOC (NULL);
1343
1344 /* To avoid showing ssa name in the dumps, if it was not reset by the
1345 caller. */
1346 iv->ssa_name = NULL_TREE;
1347
1348 if (dump_file && (dump_flags & TDF_DETAILS))
1349 dump_use (dump_file, use);
1350
1351 data->iv_uses.safe_push (use);
1352
1353 return use;
1354 }
1355
1356 /* Checks whether OP is a loop-level invariant and if so, records it.
1357 NONLINEAR_USE is true if the invariant is used in a way we do not
1358 handle specially. */
1359
1360 static void
1361 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1362 {
1363 basic_block bb;
1364 struct version_info *info;
1365
1366 if (TREE_CODE (op) != SSA_NAME
1367 || virtual_operand_p (op))
1368 return;
1369
1370 bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1371 if (bb
1372 && flow_bb_inside_loop_p (data->current_loop, bb))
1373 return;
1374
1375 info = name_info (data, op);
1376 info->name = op;
1377 info->has_nonlin_use |= nonlinear_use;
1378 if (!info->inv_id)
1379 info->inv_id = ++data->max_inv_id;
1380 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1381 }
1382
1383 /* Checks whether the use OP is interesting and if so, records it. */
1384
1385 static struct iv_use *
1386 find_interesting_uses_op (struct ivopts_data *data, tree op)
1387 {
1388 struct iv *iv;
1389 struct iv *civ;
1390 gimple stmt;
1391 struct iv_use *use;
1392
1393 if (TREE_CODE (op) != SSA_NAME)
1394 return NULL;
1395
1396 iv = get_iv (data, op);
1397 if (!iv)
1398 return NULL;
1399
1400 if (iv->have_use_for)
1401 {
1402 use = iv_use (data, iv->use_id);
1403
1404 gcc_assert (use->type == USE_NONLINEAR_EXPR);
1405 return use;
1406 }
1407
1408 if (integer_zerop (iv->step))
1409 {
1410 record_invariant (data, op, true);
1411 return NULL;
1412 }
1413 iv->have_use_for = true;
1414
1415 civ = XNEW (struct iv);
1416 *civ = *iv;
1417
1418 stmt = SSA_NAME_DEF_STMT (op);
1419 gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1420 || is_gimple_assign (stmt));
1421
1422 use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1423 iv->use_id = use->id;
1424
1425 return use;
1426 }
1427
1428 /* Given a condition in statement STMT, checks whether it is a compare
1429 of an induction variable and an invariant. If this is the case,
1430 CONTROL_VAR is set to location of the iv, BOUND to the location of
1431 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1432 induction variable descriptions, and true is returned. If this is not
1433 the case, CONTROL_VAR and BOUND are set to the arguments of the
1434 condition and false is returned. */
1435
1436 static bool
1437 extract_cond_operands (struct ivopts_data *data, gimple stmt,
1438 tree **control_var, tree **bound,
1439 struct iv **iv_var, struct iv **iv_bound)
1440 {
1441 /* The objects returned when COND has constant operands. */
1442 static struct iv const_iv;
1443 static tree zero;
1444 tree *op0 = &zero, *op1 = &zero, *tmp_op;
1445 struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1446 bool ret = false;
1447
1448 if (gimple_code (stmt) == GIMPLE_COND)
1449 {
1450 gcond *cond_stmt = as_a <gcond *> (stmt);
1451 op0 = gimple_cond_lhs_ptr (cond_stmt);
1452 op1 = gimple_cond_rhs_ptr (cond_stmt);
1453 }
1454 else
1455 {
1456 op0 = gimple_assign_rhs1_ptr (stmt);
1457 op1 = gimple_assign_rhs2_ptr (stmt);
1458 }
1459
1460 zero = integer_zero_node;
1461 const_iv.step = integer_zero_node;
1462
1463 if (TREE_CODE (*op0) == SSA_NAME)
1464 iv0 = get_iv (data, *op0);
1465 if (TREE_CODE (*op1) == SSA_NAME)
1466 iv1 = get_iv (data, *op1);
1467
1468 /* Exactly one of the compared values must be an iv, and the other one must
1469 be an invariant. */
1470 if (!iv0 || !iv1)
1471 goto end;
1472
1473 if (integer_zerop (iv0->step))
1474 {
1475 /* Control variable may be on the other side. */
1476 tmp_op = op0; op0 = op1; op1 = tmp_op;
1477 tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1478 }
1479 ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1480
1481 end:
1482 if (control_var)
1483 *control_var = op0;;
1484 if (iv_var)
1485 *iv_var = iv0;;
1486 if (bound)
1487 *bound = op1;
1488 if (iv_bound)
1489 *iv_bound = iv1;
1490
1491 return ret;
1492 }
1493
1494 /* Checks whether the condition in STMT is interesting and if so,
1495 records it. */
1496
1497 static void
1498 find_interesting_uses_cond (struct ivopts_data *data, gimple stmt)
1499 {
1500 tree *var_p, *bound_p;
1501 struct iv *var_iv, *civ;
1502
1503 if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1504 {
1505 find_interesting_uses_op (data, *var_p);
1506 find_interesting_uses_op (data, *bound_p);
1507 return;
1508 }
1509
1510 civ = XNEW (struct iv);
1511 *civ = *var_iv;
1512 record_use (data, NULL, civ, stmt, USE_COMPARE);
1513 }
1514
1515 /* Returns the outermost loop EXPR is obviously invariant in
1516 relative to the loop LOOP, i.e. if all its operands are defined
1517 outside of the returned loop. Returns NULL if EXPR is not
1518 even obviously invariant in LOOP. */
1519
1520 struct loop *
1521 outermost_invariant_loop_for_expr (struct loop *loop, tree expr)
1522 {
1523 basic_block def_bb;
1524 unsigned i, len;
1525
1526 if (is_gimple_min_invariant (expr))
1527 return current_loops->tree_root;
1528
1529 if (TREE_CODE (expr) == SSA_NAME)
1530 {
1531 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1532 if (def_bb)
1533 {
1534 if (flow_bb_inside_loop_p (loop, def_bb))
1535 return NULL;
1536 return superloop_at_depth (loop,
1537 loop_depth (def_bb->loop_father) + 1);
1538 }
1539
1540 return current_loops->tree_root;
1541 }
1542
1543 if (!EXPR_P (expr))
1544 return NULL;
1545
1546 unsigned maxdepth = 0;
1547 len = TREE_OPERAND_LENGTH (expr);
1548 for (i = 0; i < len; i++)
1549 {
1550 struct loop *ivloop;
1551 if (!TREE_OPERAND (expr, i))
1552 continue;
1553
1554 ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1555 if (!ivloop)
1556 return NULL;
1557 maxdepth = MAX (maxdepth, loop_depth (ivloop));
1558 }
1559
1560 return superloop_at_depth (loop, maxdepth);
1561 }
1562
1563 /* Returns true if expression EXPR is obviously invariant in LOOP,
1564 i.e. if all its operands are defined outside of the LOOP. LOOP
1565 should not be the function body. */
1566
1567 bool
1568 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1569 {
1570 basic_block def_bb;
1571 unsigned i, len;
1572
1573 gcc_assert (loop_depth (loop) > 0);
1574
1575 if (is_gimple_min_invariant (expr))
1576 return true;
1577
1578 if (TREE_CODE (expr) == SSA_NAME)
1579 {
1580 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1581 if (def_bb
1582 && flow_bb_inside_loop_p (loop, def_bb))
1583 return false;
1584
1585 return true;
1586 }
1587
1588 if (!EXPR_P (expr))
1589 return false;
1590
1591 len = TREE_OPERAND_LENGTH (expr);
1592 for (i = 0; i < len; i++)
1593 if (TREE_OPERAND (expr, i)
1594 && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1595 return false;
1596
1597 return true;
1598 }
1599
1600 /* Cumulates the steps of indices into DATA and replaces their values with the
1601 initial ones. Returns false when the value of the index cannot be determined.
1602 Callback for for_each_index. */
1603
1604 struct ifs_ivopts_data
1605 {
1606 struct ivopts_data *ivopts_data;
1607 gimple stmt;
1608 tree step;
1609 };
1610
1611 static bool
1612 idx_find_step (tree base, tree *idx, void *data)
1613 {
1614 struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1615 struct iv *iv;
1616 tree step, iv_base, iv_step, lbound, off;
1617 struct loop *loop = dta->ivopts_data->current_loop;
1618
1619 /* If base is a component ref, require that the offset of the reference
1620 be invariant. */
1621 if (TREE_CODE (base) == COMPONENT_REF)
1622 {
1623 off = component_ref_field_offset (base);
1624 return expr_invariant_in_loop_p (loop, off);
1625 }
1626
1627 /* If base is array, first check whether we will be able to move the
1628 reference out of the loop (in order to take its address in strength
1629 reduction). In order for this to work we need both lower bound
1630 and step to be loop invariants. */
1631 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1632 {
1633 /* Moreover, for a range, the size needs to be invariant as well. */
1634 if (TREE_CODE (base) == ARRAY_RANGE_REF
1635 && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1636 return false;
1637
1638 step = array_ref_element_size (base);
1639 lbound = array_ref_low_bound (base);
1640
1641 if (!expr_invariant_in_loop_p (loop, step)
1642 || !expr_invariant_in_loop_p (loop, lbound))
1643 return false;
1644 }
1645
1646 if (TREE_CODE (*idx) != SSA_NAME)
1647 return true;
1648
1649 iv = get_iv (dta->ivopts_data, *idx);
1650 if (!iv)
1651 return false;
1652
1653 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1654 *&x[0], which is not folded and does not trigger the
1655 ARRAY_REF path below. */
1656 *idx = iv->base;
1657
1658 if (integer_zerop (iv->step))
1659 return true;
1660
1661 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1662 {
1663 step = array_ref_element_size (base);
1664
1665 /* We only handle addresses whose step is an integer constant. */
1666 if (TREE_CODE (step) != INTEGER_CST)
1667 return false;
1668 }
1669 else
1670 /* The step for pointer arithmetics already is 1 byte. */
1671 step = size_one_node;
1672
1673 iv_base = iv->base;
1674 iv_step = iv->step;
1675 if (!convert_affine_scev (dta->ivopts_data->current_loop,
1676 sizetype, &iv_base, &iv_step, dta->stmt,
1677 false))
1678 {
1679 /* The index might wrap. */
1680 return false;
1681 }
1682
1683 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1684 dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1685
1686 return true;
1687 }
1688
1689 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1690 object is passed to it in DATA. */
1691
1692 static bool
1693 idx_record_use (tree base, tree *idx,
1694 void *vdata)
1695 {
1696 struct ivopts_data *data = (struct ivopts_data *) vdata;
1697 find_interesting_uses_op (data, *idx);
1698 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1699 {
1700 find_interesting_uses_op (data, array_ref_element_size (base));
1701 find_interesting_uses_op (data, array_ref_low_bound (base));
1702 }
1703 return true;
1704 }
1705
1706 /* If we can prove that TOP = cst * BOT for some constant cst,
1707 store cst to MUL and return true. Otherwise return false.
1708 The returned value is always sign-extended, regardless of the
1709 signedness of TOP and BOT. */
1710
1711 static bool
1712 constant_multiple_of (tree top, tree bot, widest_int *mul)
1713 {
1714 tree mby;
1715 enum tree_code code;
1716 unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1717 widest_int res, p0, p1;
1718
1719 STRIP_NOPS (top);
1720 STRIP_NOPS (bot);
1721
1722 if (operand_equal_p (top, bot, 0))
1723 {
1724 *mul = 1;
1725 return true;
1726 }
1727
1728 code = TREE_CODE (top);
1729 switch (code)
1730 {
1731 case MULT_EXPR:
1732 mby = TREE_OPERAND (top, 1);
1733 if (TREE_CODE (mby) != INTEGER_CST)
1734 return false;
1735
1736 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1737 return false;
1738
1739 *mul = wi::sext (res * wi::to_widest (mby), precision);
1740 return true;
1741
1742 case PLUS_EXPR:
1743 case MINUS_EXPR:
1744 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1745 || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1746 return false;
1747
1748 if (code == MINUS_EXPR)
1749 p1 = -p1;
1750 *mul = wi::sext (p0 + p1, precision);
1751 return true;
1752
1753 case INTEGER_CST:
1754 if (TREE_CODE (bot) != INTEGER_CST)
1755 return false;
1756
1757 p0 = widest_int::from (top, SIGNED);
1758 p1 = widest_int::from (bot, SIGNED);
1759 if (p1 == 0)
1760 return false;
1761 *mul = wi::sext (wi::divmod_trunc (p0, p1, SIGNED, &res), precision);
1762 return res == 0;
1763
1764 default:
1765 return false;
1766 }
1767 }
1768
1769 /* Return true if memory reference REF with step STEP may be unaligned. */
1770
1771 static bool
1772 may_be_unaligned_p (tree ref, tree step)
1773 {
1774 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1775 thus they are not misaligned. */
1776 if (TREE_CODE (ref) == TARGET_MEM_REF)
1777 return false;
1778
1779 unsigned int align = TYPE_ALIGN (TREE_TYPE (ref));
1780 if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))) > align)
1781 align = GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref)));
1782
1783 unsigned HOST_WIDE_INT bitpos;
1784 unsigned int ref_align;
1785 get_object_alignment_1 (ref, &ref_align, &bitpos);
1786 if (ref_align < align
1787 || (bitpos % align) != 0
1788 || (bitpos % BITS_PER_UNIT) != 0)
1789 return true;
1790
1791 unsigned int trailing_zeros = tree_ctz (step);
1792 if (trailing_zeros < HOST_BITS_PER_INT
1793 && (1U << trailing_zeros) * BITS_PER_UNIT < align)
1794 return true;
1795
1796 return false;
1797 }
1798
1799 /* Return true if EXPR may be non-addressable. */
1800
1801 bool
1802 may_be_nonaddressable_p (tree expr)
1803 {
1804 switch (TREE_CODE (expr))
1805 {
1806 case TARGET_MEM_REF:
1807 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1808 target, thus they are always addressable. */
1809 return false;
1810
1811 case COMPONENT_REF:
1812 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1813 || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1814
1815 case VIEW_CONVERT_EXPR:
1816 /* This kind of view-conversions may wrap non-addressable objects
1817 and make them look addressable. After some processing the
1818 non-addressability may be uncovered again, causing ADDR_EXPRs
1819 of inappropriate objects to be built. */
1820 if (is_gimple_reg (TREE_OPERAND (expr, 0))
1821 || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
1822 return true;
1823
1824 /* ... fall through ... */
1825
1826 case ARRAY_REF:
1827 case ARRAY_RANGE_REF:
1828 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1829
1830 CASE_CONVERT:
1831 return true;
1832
1833 default:
1834 break;
1835 }
1836
1837 return false;
1838 }
1839
1840 /* Finds addresses in *OP_P inside STMT. */
1841
1842 static void
1843 find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p)
1844 {
1845 tree base = *op_p, step = size_zero_node;
1846 struct iv *civ;
1847 struct ifs_ivopts_data ifs_ivopts_data;
1848
1849 /* Do not play with volatile memory references. A bit too conservative,
1850 perhaps, but safe. */
1851 if (gimple_has_volatile_ops (stmt))
1852 goto fail;
1853
1854 /* Ignore bitfields for now. Not really something terribly complicated
1855 to handle. TODO. */
1856 if (TREE_CODE (base) == BIT_FIELD_REF)
1857 goto fail;
1858
1859 base = unshare_expr (base);
1860
1861 if (TREE_CODE (base) == TARGET_MEM_REF)
1862 {
1863 tree type = build_pointer_type (TREE_TYPE (base));
1864 tree astep;
1865
1866 if (TMR_BASE (base)
1867 && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1868 {
1869 civ = get_iv (data, TMR_BASE (base));
1870 if (!civ)
1871 goto fail;
1872
1873 TMR_BASE (base) = civ->base;
1874 step = civ->step;
1875 }
1876 if (TMR_INDEX2 (base)
1877 && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
1878 {
1879 civ = get_iv (data, TMR_INDEX2 (base));
1880 if (!civ)
1881 goto fail;
1882
1883 TMR_INDEX2 (base) = civ->base;
1884 step = civ->step;
1885 }
1886 if (TMR_INDEX (base)
1887 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1888 {
1889 civ = get_iv (data, TMR_INDEX (base));
1890 if (!civ)
1891 goto fail;
1892
1893 TMR_INDEX (base) = civ->base;
1894 astep = civ->step;
1895
1896 if (astep)
1897 {
1898 if (TMR_STEP (base))
1899 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1900
1901 step = fold_build2 (PLUS_EXPR, type, step, astep);
1902 }
1903 }
1904
1905 if (integer_zerop (step))
1906 goto fail;
1907 base = tree_mem_ref_addr (type, base);
1908 }
1909 else
1910 {
1911 ifs_ivopts_data.ivopts_data = data;
1912 ifs_ivopts_data.stmt = stmt;
1913 ifs_ivopts_data.step = size_zero_node;
1914 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1915 || integer_zerop (ifs_ivopts_data.step))
1916 goto fail;
1917 step = ifs_ivopts_data.step;
1918
1919 /* Check that the base expression is addressable. This needs
1920 to be done after substituting bases of IVs into it. */
1921 if (may_be_nonaddressable_p (base))
1922 goto fail;
1923
1924 /* Moreover, on strict alignment platforms, check that it is
1925 sufficiently aligned. */
1926 if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
1927 goto fail;
1928
1929 base = build_fold_addr_expr (base);
1930
1931 /* Substituting bases of IVs into the base expression might
1932 have caused folding opportunities. */
1933 if (TREE_CODE (base) == ADDR_EXPR)
1934 {
1935 tree *ref = &TREE_OPERAND (base, 0);
1936 while (handled_component_p (*ref))
1937 ref = &TREE_OPERAND (*ref, 0);
1938 if (TREE_CODE (*ref) == MEM_REF)
1939 {
1940 tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
1941 TREE_OPERAND (*ref, 0),
1942 TREE_OPERAND (*ref, 1));
1943 if (tem)
1944 *ref = tem;
1945 }
1946 }
1947 }
1948
1949 civ = alloc_iv (base, step);
1950 record_use (data, op_p, civ, stmt, USE_ADDRESS);
1951 return;
1952
1953 fail:
1954 for_each_index (op_p, idx_record_use, data);
1955 }
1956
1957 /* Finds and records invariants used in STMT. */
1958
1959 static void
1960 find_invariants_stmt (struct ivopts_data *data, gimple stmt)
1961 {
1962 ssa_op_iter iter;
1963 use_operand_p use_p;
1964 tree op;
1965
1966 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1967 {
1968 op = USE_FROM_PTR (use_p);
1969 record_invariant (data, op, false);
1970 }
1971 }
1972
1973 /* Finds interesting uses of induction variables in the statement STMT. */
1974
1975 static void
1976 find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt)
1977 {
1978 struct iv *iv;
1979 tree op, *lhs, *rhs;
1980 ssa_op_iter iter;
1981 use_operand_p use_p;
1982 enum tree_code code;
1983
1984 find_invariants_stmt (data, stmt);
1985
1986 if (gimple_code (stmt) == GIMPLE_COND)
1987 {
1988 find_interesting_uses_cond (data, stmt);
1989 return;
1990 }
1991
1992 if (is_gimple_assign (stmt))
1993 {
1994 lhs = gimple_assign_lhs_ptr (stmt);
1995 rhs = gimple_assign_rhs1_ptr (stmt);
1996
1997 if (TREE_CODE (*lhs) == SSA_NAME)
1998 {
1999 /* If the statement defines an induction variable, the uses are not
2000 interesting by themselves. */
2001
2002 iv = get_iv (data, *lhs);
2003
2004 if (iv && !integer_zerop (iv->step))
2005 return;
2006 }
2007
2008 code = gimple_assign_rhs_code (stmt);
2009 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
2010 && (REFERENCE_CLASS_P (*rhs)
2011 || is_gimple_val (*rhs)))
2012 {
2013 if (REFERENCE_CLASS_P (*rhs))
2014 find_interesting_uses_address (data, stmt, rhs);
2015 else
2016 find_interesting_uses_op (data, *rhs);
2017
2018 if (REFERENCE_CLASS_P (*lhs))
2019 find_interesting_uses_address (data, stmt, lhs);
2020 return;
2021 }
2022 else if (TREE_CODE_CLASS (code) == tcc_comparison)
2023 {
2024 find_interesting_uses_cond (data, stmt);
2025 return;
2026 }
2027
2028 /* TODO -- we should also handle address uses of type
2029
2030 memory = call (whatever);
2031
2032 and
2033
2034 call (memory). */
2035 }
2036
2037 if (gimple_code (stmt) == GIMPLE_PHI
2038 && gimple_bb (stmt) == data->current_loop->header)
2039 {
2040 iv = get_iv (data, PHI_RESULT (stmt));
2041
2042 if (iv && !integer_zerop (iv->step))
2043 return;
2044 }
2045
2046 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2047 {
2048 op = USE_FROM_PTR (use_p);
2049
2050 if (TREE_CODE (op) != SSA_NAME)
2051 continue;
2052
2053 iv = get_iv (data, op);
2054 if (!iv)
2055 continue;
2056
2057 find_interesting_uses_op (data, op);
2058 }
2059 }
2060
2061 /* Finds interesting uses of induction variables outside of loops
2062 on loop exit edge EXIT. */
2063
2064 static void
2065 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
2066 {
2067 gphi *phi;
2068 gphi_iterator psi;
2069 tree def;
2070
2071 for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
2072 {
2073 phi = psi.phi ();
2074 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2075 if (!virtual_operand_p (def))
2076 find_interesting_uses_op (data, def);
2077 }
2078 }
2079
2080 /* Finds uses of the induction variables that are interesting. */
2081
2082 static void
2083 find_interesting_uses (struct ivopts_data *data)
2084 {
2085 basic_block bb;
2086 gimple_stmt_iterator bsi;
2087 basic_block *body = get_loop_body (data->current_loop);
2088 unsigned i;
2089 struct version_info *info;
2090 edge e;
2091
2092 if (dump_file && (dump_flags & TDF_DETAILS))
2093 fprintf (dump_file, "Uses:\n\n");
2094
2095 for (i = 0; i < data->current_loop->num_nodes; i++)
2096 {
2097 edge_iterator ei;
2098 bb = body[i];
2099
2100 FOR_EACH_EDGE (e, ei, bb->succs)
2101 if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2102 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2103 find_interesting_uses_outside (data, e);
2104
2105 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2106 find_interesting_uses_stmt (data, gsi_stmt (bsi));
2107 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2108 if (!is_gimple_debug (gsi_stmt (bsi)))
2109 find_interesting_uses_stmt (data, gsi_stmt (bsi));
2110 }
2111
2112 if (dump_file && (dump_flags & TDF_DETAILS))
2113 {
2114 bitmap_iterator bi;
2115
2116 fprintf (dump_file, "\n");
2117
2118 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2119 {
2120 info = ver_info (data, i);
2121 if (info->inv_id)
2122 {
2123 fprintf (dump_file, " ");
2124 print_generic_expr (dump_file, info->name, TDF_SLIM);
2125 fprintf (dump_file, " is invariant (%d)%s\n",
2126 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
2127 }
2128 }
2129
2130 fprintf (dump_file, "\n");
2131 }
2132
2133 free (body);
2134 }
2135
2136 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
2137 is true, assume we are inside an address. If TOP_COMPREF is true, assume
2138 we are at the top-level of the processed address. */
2139
2140 static tree
2141 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2142 HOST_WIDE_INT *offset)
2143 {
2144 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2145 enum tree_code code;
2146 tree type, orig_type = TREE_TYPE (expr);
2147 HOST_WIDE_INT off0, off1, st;
2148 tree orig_expr = expr;
2149
2150 STRIP_NOPS (expr);
2151
2152 type = TREE_TYPE (expr);
2153 code = TREE_CODE (expr);
2154 *offset = 0;
2155
2156 switch (code)
2157 {
2158 case INTEGER_CST:
2159 if (!cst_and_fits_in_hwi (expr)
2160 || integer_zerop (expr))
2161 return orig_expr;
2162
2163 *offset = int_cst_value (expr);
2164 return build_int_cst (orig_type, 0);
2165
2166 case POINTER_PLUS_EXPR:
2167 case PLUS_EXPR:
2168 case MINUS_EXPR:
2169 op0 = TREE_OPERAND (expr, 0);
2170 op1 = TREE_OPERAND (expr, 1);
2171
2172 op0 = strip_offset_1 (op0, false, false, &off0);
2173 op1 = strip_offset_1 (op1, false, false, &off1);
2174
2175 *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2176 if (op0 == TREE_OPERAND (expr, 0)
2177 && op1 == TREE_OPERAND (expr, 1))
2178 return orig_expr;
2179
2180 if (integer_zerop (op1))
2181 expr = op0;
2182 else if (integer_zerop (op0))
2183 {
2184 if (code == MINUS_EXPR)
2185 expr = fold_build1 (NEGATE_EXPR, type, op1);
2186 else
2187 expr = op1;
2188 }
2189 else
2190 expr = fold_build2 (code, type, op0, op1);
2191
2192 return fold_convert (orig_type, expr);
2193
2194 case MULT_EXPR:
2195 op1 = TREE_OPERAND (expr, 1);
2196 if (!cst_and_fits_in_hwi (op1))
2197 return orig_expr;
2198
2199 op0 = TREE_OPERAND (expr, 0);
2200 op0 = strip_offset_1 (op0, false, false, &off0);
2201 if (op0 == TREE_OPERAND (expr, 0))
2202 return orig_expr;
2203
2204 *offset = off0 * int_cst_value (op1);
2205 if (integer_zerop (op0))
2206 expr = op0;
2207 else
2208 expr = fold_build2 (MULT_EXPR, type, op0, op1);
2209
2210 return fold_convert (orig_type, expr);
2211
2212 case ARRAY_REF:
2213 case ARRAY_RANGE_REF:
2214 if (!inside_addr)
2215 return orig_expr;
2216
2217 step = array_ref_element_size (expr);
2218 if (!cst_and_fits_in_hwi (step))
2219 break;
2220
2221 st = int_cst_value (step);
2222 op1 = TREE_OPERAND (expr, 1);
2223 op1 = strip_offset_1 (op1, false, false, &off1);
2224 *offset = off1 * st;
2225
2226 if (top_compref
2227 && integer_zerop (op1))
2228 {
2229 /* Strip the component reference completely. */
2230 op0 = TREE_OPERAND (expr, 0);
2231 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2232 *offset += off0;
2233 return op0;
2234 }
2235 break;
2236
2237 case COMPONENT_REF:
2238 {
2239 tree field;
2240
2241 if (!inside_addr)
2242 return orig_expr;
2243
2244 tmp = component_ref_field_offset (expr);
2245 field = TREE_OPERAND (expr, 1);
2246 if (top_compref
2247 && cst_and_fits_in_hwi (tmp)
2248 && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2249 {
2250 HOST_WIDE_INT boffset, abs_off;
2251
2252 /* Strip the component reference completely. */
2253 op0 = TREE_OPERAND (expr, 0);
2254 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2255 boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2256 abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2257 if (boffset < 0)
2258 abs_off = -abs_off;
2259
2260 *offset = off0 + int_cst_value (tmp) + abs_off;
2261 return op0;
2262 }
2263 }
2264 break;
2265
2266 case ADDR_EXPR:
2267 op0 = TREE_OPERAND (expr, 0);
2268 op0 = strip_offset_1 (op0, true, true, &off0);
2269 *offset += off0;
2270
2271 if (op0 == TREE_OPERAND (expr, 0))
2272 return orig_expr;
2273
2274 expr = build_fold_addr_expr (op0);
2275 return fold_convert (orig_type, expr);
2276
2277 case MEM_REF:
2278 /* ??? Offset operand? */
2279 inside_addr = false;
2280 break;
2281
2282 default:
2283 return orig_expr;
2284 }
2285
2286 /* Default handling of expressions for that we want to recurse into
2287 the first operand. */
2288 op0 = TREE_OPERAND (expr, 0);
2289 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2290 *offset += off0;
2291
2292 if (op0 == TREE_OPERAND (expr, 0)
2293 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2294 return orig_expr;
2295
2296 expr = copy_node (expr);
2297 TREE_OPERAND (expr, 0) = op0;
2298 if (op1)
2299 TREE_OPERAND (expr, 1) = op1;
2300
2301 /* Inside address, we might strip the top level component references,
2302 thus changing type of the expression. Handling of ADDR_EXPR
2303 will fix that. */
2304 expr = fold_convert (orig_type, expr);
2305
2306 return expr;
2307 }
2308
2309 /* Strips constant offsets from EXPR and stores them to OFFSET. */
2310
2311 static tree
2312 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2313 {
2314 HOST_WIDE_INT off;
2315 tree core = strip_offset_1 (expr, false, false, &off);
2316 *offset = off;
2317 return core;
2318 }
2319
2320 /* Returns variant of TYPE that can be used as base for different uses.
2321 We return unsigned type with the same precision, which avoids problems
2322 with overflows. */
2323
2324 static tree
2325 generic_type_for (tree type)
2326 {
2327 if (POINTER_TYPE_P (type))
2328 return unsigned_type_for (type);
2329
2330 if (TYPE_UNSIGNED (type))
2331 return type;
2332
2333 return unsigned_type_for (type);
2334 }
2335
2336 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2337 the bitmap to that we should store it. */
2338
2339 static struct ivopts_data *fd_ivopts_data;
2340 static tree
2341 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2342 {
2343 bitmap *depends_on = (bitmap *) data;
2344 struct version_info *info;
2345
2346 if (TREE_CODE (*expr_p) != SSA_NAME)
2347 return NULL_TREE;
2348 info = name_info (fd_ivopts_data, *expr_p);
2349
2350 if (!info->inv_id || info->has_nonlin_use)
2351 return NULL_TREE;
2352
2353 if (!*depends_on)
2354 *depends_on = BITMAP_ALLOC (NULL);
2355 bitmap_set_bit (*depends_on, info->inv_id);
2356
2357 return NULL_TREE;
2358 }
2359
2360 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2361 position to POS. If USE is not NULL, the candidate is set as related to
2362 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
2363 replacement of the final value of the iv by a direct computation. */
2364
2365 static struct iv_cand *
2366 add_candidate_1 (struct ivopts_data *data,
2367 tree base, tree step, bool important, enum iv_position pos,
2368 struct iv_use *use, gimple incremented_at)
2369 {
2370 unsigned i;
2371 struct iv_cand *cand = NULL;
2372 tree type, orig_type;
2373
2374 /* For non-original variables, make sure their values are computed in a type
2375 that does not invoke undefined behavior on overflows (since in general,
2376 we cannot prove that these induction variables are non-wrapping). */
2377 if (pos != IP_ORIGINAL)
2378 {
2379 orig_type = TREE_TYPE (base);
2380 type = generic_type_for (orig_type);
2381 if (type != orig_type)
2382 {
2383 base = fold_convert (type, base);
2384 step = fold_convert (type, step);
2385 }
2386 }
2387
2388 for (i = 0; i < n_iv_cands (data); i++)
2389 {
2390 cand = iv_cand (data, i);
2391
2392 if (cand->pos != pos)
2393 continue;
2394
2395 if (cand->incremented_at != incremented_at
2396 || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2397 && cand->ainc_use != use))
2398 continue;
2399
2400 if (!cand->iv)
2401 {
2402 if (!base && !step)
2403 break;
2404
2405 continue;
2406 }
2407
2408 if (!base && !step)
2409 continue;
2410
2411 if (operand_equal_p (base, cand->iv->base, 0)
2412 && operand_equal_p (step, cand->iv->step, 0)
2413 && (TYPE_PRECISION (TREE_TYPE (base))
2414 == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
2415 break;
2416 }
2417
2418 if (i == n_iv_cands (data))
2419 {
2420 cand = XCNEW (struct iv_cand);
2421 cand->id = i;
2422
2423 if (!base && !step)
2424 cand->iv = NULL;
2425 else
2426 cand->iv = alloc_iv (base, step);
2427
2428 cand->pos = pos;
2429 if (pos != IP_ORIGINAL && cand->iv)
2430 {
2431 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2432 cand->var_after = cand->var_before;
2433 }
2434 cand->important = important;
2435 cand->incremented_at = incremented_at;
2436 data->iv_candidates.safe_push (cand);
2437
2438 if (step
2439 && TREE_CODE (step) != INTEGER_CST)
2440 {
2441 fd_ivopts_data = data;
2442 walk_tree (&step, find_depends, &cand->depends_on, NULL);
2443 }
2444
2445 if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2446 cand->ainc_use = use;
2447 else
2448 cand->ainc_use = NULL;
2449
2450 if (dump_file && (dump_flags & TDF_DETAILS))
2451 dump_cand (dump_file, cand);
2452 }
2453
2454 if (important && !cand->important)
2455 {
2456 cand->important = true;
2457 if (dump_file && (dump_flags & TDF_DETAILS))
2458 fprintf (dump_file, "Candidate %d is important\n", cand->id);
2459 }
2460
2461 if (use)
2462 {
2463 bitmap_set_bit (use->related_cands, i);
2464 if (dump_file && (dump_flags & TDF_DETAILS))
2465 fprintf (dump_file, "Candidate %d is related to use %d\n",
2466 cand->id, use->id);
2467 }
2468
2469 return cand;
2470 }
2471
2472 /* Returns true if incrementing the induction variable at the end of the LOOP
2473 is allowed.
2474
2475 The purpose is to avoid splitting latch edge with a biv increment, thus
2476 creating a jump, possibly confusing other optimization passes and leaving
2477 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2478 is not available (so we do not have a better alternative), or if the latch
2479 edge is already nonempty. */
2480
2481 static bool
2482 allow_ip_end_pos_p (struct loop *loop)
2483 {
2484 if (!ip_normal_pos (loop))
2485 return true;
2486
2487 if (!empty_block_p (ip_end_pos (loop)))
2488 return true;
2489
2490 return false;
2491 }
2492
2493 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2494 Important field is set to IMPORTANT. */
2495
2496 static void
2497 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2498 bool important, struct iv_use *use)
2499 {
2500 basic_block use_bb = gimple_bb (use->stmt);
2501 machine_mode mem_mode;
2502 unsigned HOST_WIDE_INT cstepi;
2503
2504 /* If we insert the increment in any position other than the standard
2505 ones, we must ensure that it is incremented once per iteration.
2506 It must not be in an inner nested loop, or one side of an if
2507 statement. */
2508 if (use_bb->loop_father != data->current_loop
2509 || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2510 || stmt_could_throw_p (use->stmt)
2511 || !cst_and_fits_in_hwi (step))
2512 return;
2513
2514 cstepi = int_cst_value (step);
2515
2516 mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2517 if (((USE_LOAD_PRE_INCREMENT (mem_mode)
2518 || USE_STORE_PRE_INCREMENT (mem_mode))
2519 && GET_MODE_SIZE (mem_mode) == cstepi)
2520 || ((USE_LOAD_PRE_DECREMENT (mem_mode)
2521 || USE_STORE_PRE_DECREMENT (mem_mode))
2522 && GET_MODE_SIZE (mem_mode) == -cstepi))
2523 {
2524 enum tree_code code = MINUS_EXPR;
2525 tree new_base;
2526 tree new_step = step;
2527
2528 if (POINTER_TYPE_P (TREE_TYPE (base)))
2529 {
2530 new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2531 code = POINTER_PLUS_EXPR;
2532 }
2533 else
2534 new_step = fold_convert (TREE_TYPE (base), new_step);
2535 new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2536 add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2537 use->stmt);
2538 }
2539 if (((USE_LOAD_POST_INCREMENT (mem_mode)
2540 || USE_STORE_POST_INCREMENT (mem_mode))
2541 && GET_MODE_SIZE (mem_mode) == cstepi)
2542 || ((USE_LOAD_POST_DECREMENT (mem_mode)
2543 || USE_STORE_POST_DECREMENT (mem_mode))
2544 && GET_MODE_SIZE (mem_mode) == -cstepi))
2545 {
2546 add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
2547 use->stmt);
2548 }
2549 }
2550
2551 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2552 position to POS. If USE is not NULL, the candidate is set as related to
2553 it. The candidate computation is scheduled on all available positions. */
2554
2555 static void
2556 add_candidate (struct ivopts_data *data,
2557 tree base, tree step, bool important, struct iv_use *use)
2558 {
2559 if (ip_normal_pos (data->current_loop))
2560 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL);
2561 if (ip_end_pos (data->current_loop)
2562 && allow_ip_end_pos_p (data->current_loop))
2563 add_candidate_1 (data, base, step, important, IP_END, use, NULL);
2564
2565 if (use != NULL && use->type == USE_ADDRESS)
2566 add_autoinc_candidates (data, base, step, important, use);
2567 }
2568
2569 /* Adds standard iv candidates. */
2570
2571 static void
2572 add_standard_iv_candidates (struct ivopts_data *data)
2573 {
2574 add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
2575
2576 /* The same for a double-integer type if it is still fast enough. */
2577 if (TYPE_PRECISION
2578 (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
2579 && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
2580 add_candidate (data, build_int_cst (long_integer_type_node, 0),
2581 build_int_cst (long_integer_type_node, 1), true, NULL);
2582
2583 /* The same for a double-integer type if it is still fast enough. */
2584 if (TYPE_PRECISION
2585 (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
2586 && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
2587 add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
2588 build_int_cst (long_long_integer_type_node, 1), true, NULL);
2589 }
2590
2591
2592 /* Adds candidates bases on the old induction variable IV. */
2593
2594 static void
2595 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2596 {
2597 gimple phi;
2598 tree def;
2599 struct iv_cand *cand;
2600
2601 add_candidate (data, iv->base, iv->step, true, NULL);
2602
2603 /* The same, but with initial value zero. */
2604 if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
2605 add_candidate (data, size_int (0), iv->step, true, NULL);
2606 else
2607 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2608 iv->step, true, NULL);
2609
2610 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2611 if (gimple_code (phi) == GIMPLE_PHI)
2612 {
2613 /* Additionally record the possibility of leaving the original iv
2614 untouched. */
2615 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2616 /* Don't add candidate if it's from another PHI node because
2617 it's an affine iv appearing in the form of PEELED_CHREC. */
2618 phi = SSA_NAME_DEF_STMT (def);
2619 if (gimple_code (phi) != GIMPLE_PHI)
2620 {
2621 cand = add_candidate_1 (data,
2622 iv->base, iv->step, true, IP_ORIGINAL, NULL,
2623 SSA_NAME_DEF_STMT (def));
2624 cand->var_before = iv->ssa_name;
2625 cand->var_after = def;
2626 }
2627 else
2628 gcc_assert (gimple_bb (phi) == data->current_loop->header);
2629 }
2630 }
2631
2632 /* Adds candidates based on the old induction variables. */
2633
2634 static void
2635 add_old_ivs_candidates (struct ivopts_data *data)
2636 {
2637 unsigned i;
2638 struct iv *iv;
2639 bitmap_iterator bi;
2640
2641 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2642 {
2643 iv = ver_info (data, i)->iv;
2644 if (iv && iv->biv_p && !integer_zerop (iv->step))
2645 add_old_iv_candidates (data, iv);
2646 }
2647 }
2648
2649 /* Adds candidates based on the value of the induction variable IV and USE. */
2650
2651 static void
2652 add_iv_value_candidates (struct ivopts_data *data,
2653 struct iv *iv, struct iv_use *use)
2654 {
2655 unsigned HOST_WIDE_INT offset;
2656 tree base;
2657 tree basetype;
2658
2659 add_candidate (data, iv->base, iv->step, false, use);
2660
2661 /* The same, but with initial value zero. Make such variable important,
2662 since it is generic enough so that possibly many uses may be based
2663 on it. */
2664 basetype = TREE_TYPE (iv->base);
2665 if (POINTER_TYPE_P (basetype))
2666 basetype = sizetype;
2667 add_candidate (data, build_int_cst (basetype, 0),
2668 iv->step, true, use);
2669
2670 /* Third, try removing the constant offset. Make sure to even
2671 add a candidate for &a[0] vs. (T *)&a. */
2672 base = strip_offset (iv->base, &offset);
2673 if (offset
2674 || base != iv->base)
2675 add_candidate (data, base, iv->step, false, use);
2676 }
2677
2678 /* Adds candidates based on the uses. */
2679
2680 static void
2681 add_derived_ivs_candidates (struct ivopts_data *data)
2682 {
2683 unsigned i;
2684
2685 for (i = 0; i < n_iv_uses (data); i++)
2686 {
2687 struct iv_use *use = iv_use (data, i);
2688
2689 if (!use)
2690 continue;
2691
2692 switch (use->type)
2693 {
2694 case USE_NONLINEAR_EXPR:
2695 case USE_COMPARE:
2696 case USE_ADDRESS:
2697 /* Just add the ivs based on the value of the iv used here. */
2698 add_iv_value_candidates (data, use->iv, use);
2699 break;
2700
2701 default:
2702 gcc_unreachable ();
2703 }
2704 }
2705 }
2706
2707 /* Record important candidates and add them to related_cands bitmaps
2708 if needed. */
2709
2710 static void
2711 record_important_candidates (struct ivopts_data *data)
2712 {
2713 unsigned i;
2714 struct iv_use *use;
2715
2716 for (i = 0; i < n_iv_cands (data); i++)
2717 {
2718 struct iv_cand *cand = iv_cand (data, i);
2719
2720 if (cand->important)
2721 bitmap_set_bit (data->important_candidates, i);
2722 }
2723
2724 data->consider_all_candidates = (n_iv_cands (data)
2725 <= CONSIDER_ALL_CANDIDATES_BOUND);
2726
2727 if (data->consider_all_candidates)
2728 {
2729 /* We will not need "related_cands" bitmaps in this case,
2730 so release them to decrease peak memory consumption. */
2731 for (i = 0; i < n_iv_uses (data); i++)
2732 {
2733 use = iv_use (data, i);
2734 BITMAP_FREE (use->related_cands);
2735 }
2736 }
2737 else
2738 {
2739 /* Add important candidates to the related_cands bitmaps. */
2740 for (i = 0; i < n_iv_uses (data); i++)
2741 bitmap_ior_into (iv_use (data, i)->related_cands,
2742 data->important_candidates);
2743 }
2744 }
2745
2746 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2747 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2748 we allocate a simple list to every use. */
2749
2750 static void
2751 alloc_use_cost_map (struct ivopts_data *data)
2752 {
2753 unsigned i, size, s;
2754
2755 for (i = 0; i < n_iv_uses (data); i++)
2756 {
2757 struct iv_use *use = iv_use (data, i);
2758
2759 if (data->consider_all_candidates)
2760 size = n_iv_cands (data);
2761 else
2762 {
2763 s = bitmap_count_bits (use->related_cands);
2764
2765 /* Round up to the power of two, so that moduling by it is fast. */
2766 size = s ? (1 << ceil_log2 (s)) : 1;
2767 }
2768
2769 use->n_map_members = size;
2770 use->cost_map = XCNEWVEC (struct cost_pair, size);
2771 }
2772 }
2773
2774 /* Returns description of computation cost of expression whose runtime
2775 cost is RUNTIME and complexity corresponds to COMPLEXITY. */
2776
2777 static comp_cost
2778 new_cost (unsigned runtime, unsigned complexity)
2779 {
2780 comp_cost cost;
2781
2782 cost.cost = runtime;
2783 cost.complexity = complexity;
2784
2785 return cost;
2786 }
2787
2788 /* Adds costs COST1 and COST2. */
2789
2790 static comp_cost
2791 add_costs (comp_cost cost1, comp_cost cost2)
2792 {
2793 cost1.cost += cost2.cost;
2794 cost1.complexity += cost2.complexity;
2795
2796 return cost1;
2797 }
2798 /* Subtracts costs COST1 and COST2. */
2799
2800 static comp_cost
2801 sub_costs (comp_cost cost1, comp_cost cost2)
2802 {
2803 cost1.cost -= cost2.cost;
2804 cost1.complexity -= cost2.complexity;
2805
2806 return cost1;
2807 }
2808
2809 /* Returns a negative number if COST1 < COST2, a positive number if
2810 COST1 > COST2, and 0 if COST1 = COST2. */
2811
2812 static int
2813 compare_costs (comp_cost cost1, comp_cost cost2)
2814 {
2815 if (cost1.cost == cost2.cost)
2816 return cost1.complexity - cost2.complexity;
2817
2818 return cost1.cost - cost2.cost;
2819 }
2820
2821 /* Returns true if COST is infinite. */
2822
2823 static bool
2824 infinite_cost_p (comp_cost cost)
2825 {
2826 return cost.cost == INFTY;
2827 }
2828
2829 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2830 on invariants DEPENDS_ON and that the value used in expressing it
2831 is VALUE, and in case of iv elimination the comparison operator is COMP. */
2832
2833 static void
2834 set_use_iv_cost (struct ivopts_data *data,
2835 struct iv_use *use, struct iv_cand *cand,
2836 comp_cost cost, bitmap depends_on, tree value,
2837 enum tree_code comp, int inv_expr_id)
2838 {
2839 unsigned i, s;
2840
2841 if (infinite_cost_p (cost))
2842 {
2843 BITMAP_FREE (depends_on);
2844 return;
2845 }
2846
2847 if (data->consider_all_candidates)
2848 {
2849 use->cost_map[cand->id].cand = cand;
2850 use->cost_map[cand->id].cost = cost;
2851 use->cost_map[cand->id].depends_on = depends_on;
2852 use->cost_map[cand->id].value = value;
2853 use->cost_map[cand->id].comp = comp;
2854 use->cost_map[cand->id].inv_expr_id = inv_expr_id;
2855 return;
2856 }
2857
2858 /* n_map_members is a power of two, so this computes modulo. */
2859 s = cand->id & (use->n_map_members - 1);
2860 for (i = s; i < use->n_map_members; i++)
2861 if (!use->cost_map[i].cand)
2862 goto found;
2863 for (i = 0; i < s; i++)
2864 if (!use->cost_map[i].cand)
2865 goto found;
2866
2867 gcc_unreachable ();
2868
2869 found:
2870 use->cost_map[i].cand = cand;
2871 use->cost_map[i].cost = cost;
2872 use->cost_map[i].depends_on = depends_on;
2873 use->cost_map[i].value = value;
2874 use->cost_map[i].comp = comp;
2875 use->cost_map[i].inv_expr_id = inv_expr_id;
2876 }
2877
2878 /* Gets cost of (USE, CANDIDATE) pair. */
2879
2880 static struct cost_pair *
2881 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2882 struct iv_cand *cand)
2883 {
2884 unsigned i, s;
2885 struct cost_pair *ret;
2886
2887 if (!cand)
2888 return NULL;
2889
2890 if (data->consider_all_candidates)
2891 {
2892 ret = use->cost_map + cand->id;
2893 if (!ret->cand)
2894 return NULL;
2895
2896 return ret;
2897 }
2898
2899 /* n_map_members is a power of two, so this computes modulo. */
2900 s = cand->id & (use->n_map_members - 1);
2901 for (i = s; i < use->n_map_members; i++)
2902 if (use->cost_map[i].cand == cand)
2903 return use->cost_map + i;
2904 else if (use->cost_map[i].cand == NULL)
2905 return NULL;
2906 for (i = 0; i < s; i++)
2907 if (use->cost_map[i].cand == cand)
2908 return use->cost_map + i;
2909 else if (use->cost_map[i].cand == NULL)
2910 return NULL;
2911
2912 return NULL;
2913 }
2914
2915 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2916 static rtx
2917 produce_memory_decl_rtl (tree obj, int *regno)
2918 {
2919 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
2920 machine_mode address_mode = targetm.addr_space.address_mode (as);
2921 rtx x;
2922
2923 gcc_assert (obj);
2924 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2925 {
2926 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2927 x = gen_rtx_SYMBOL_REF (address_mode, name);
2928 SET_SYMBOL_REF_DECL (x, obj);
2929 x = gen_rtx_MEM (DECL_MODE (obj), x);
2930 set_mem_addr_space (x, as);
2931 targetm.encode_section_info (obj, x, true);
2932 }
2933 else
2934 {
2935 x = gen_raw_REG (address_mode, (*regno)++);
2936 x = gen_rtx_MEM (DECL_MODE (obj), x);
2937 set_mem_addr_space (x, as);
2938 }
2939
2940 return x;
2941 }
2942
2943 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2944 walk_tree. DATA contains the actual fake register number. */
2945
2946 static tree
2947 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2948 {
2949 tree obj = NULL_TREE;
2950 rtx x = NULL_RTX;
2951 int *regno = (int *) data;
2952
2953 switch (TREE_CODE (*expr_p))
2954 {
2955 case ADDR_EXPR:
2956 for (expr_p = &TREE_OPERAND (*expr_p, 0);
2957 handled_component_p (*expr_p);
2958 expr_p = &TREE_OPERAND (*expr_p, 0))
2959 continue;
2960 obj = *expr_p;
2961 if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
2962 x = produce_memory_decl_rtl (obj, regno);
2963 break;
2964
2965 case SSA_NAME:
2966 *ws = 0;
2967 obj = SSA_NAME_VAR (*expr_p);
2968 /* Defer handling of anonymous SSA_NAMEs to the expander. */
2969 if (!obj)
2970 return NULL_TREE;
2971 if (!DECL_RTL_SET_P (obj))
2972 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2973 break;
2974
2975 case VAR_DECL:
2976 case PARM_DECL:
2977 case RESULT_DECL:
2978 *ws = 0;
2979 obj = *expr_p;
2980
2981 if (DECL_RTL_SET_P (obj))
2982 break;
2983
2984 if (DECL_MODE (obj) == BLKmode)
2985 x = produce_memory_decl_rtl (obj, regno);
2986 else
2987 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2988
2989 break;
2990
2991 default:
2992 break;
2993 }
2994
2995 if (x)
2996 {
2997 decl_rtl_to_reset.safe_push (obj);
2998 SET_DECL_RTL (obj, x);
2999 }
3000
3001 return NULL_TREE;
3002 }
3003
3004 /* Determines cost of the computation of EXPR. */
3005
3006 static unsigned
3007 computation_cost (tree expr, bool speed)
3008 {
3009 rtx_insn *seq;
3010 rtx rslt;
3011 tree type = TREE_TYPE (expr);
3012 unsigned cost;
3013 /* Avoid using hard regs in ways which may be unsupported. */
3014 int regno = LAST_VIRTUAL_REGISTER + 1;
3015 struct cgraph_node *node = cgraph_node::get (current_function_decl);
3016 enum node_frequency real_frequency = node->frequency;
3017
3018 node->frequency = NODE_FREQUENCY_NORMAL;
3019 crtl->maybe_hot_insn_p = speed;
3020 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
3021 start_sequence ();
3022 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
3023 seq = get_insns ();
3024 end_sequence ();
3025 default_rtl_profile ();
3026 node->frequency = real_frequency;
3027
3028 cost = seq_cost (seq, speed);
3029 if (MEM_P (rslt))
3030 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
3031 TYPE_ADDR_SPACE (type), speed);
3032 else if (!REG_P (rslt))
3033 cost += set_src_cost (rslt, speed);
3034
3035 return cost;
3036 }
3037
3038 /* Returns variable containing the value of candidate CAND at statement AT. */
3039
3040 static tree
3041 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple stmt)
3042 {
3043 if (stmt_after_increment (loop, cand, stmt))
3044 return cand->var_after;
3045 else
3046 return cand->var_before;
3047 }
3048
3049 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
3050 same precision that is at least as wide as the precision of TYPE, stores
3051 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
3052 type of A and B. */
3053
3054 static tree
3055 determine_common_wider_type (tree *a, tree *b)
3056 {
3057 tree wider_type = NULL;
3058 tree suba, subb;
3059 tree atype = TREE_TYPE (*a);
3060
3061 if (CONVERT_EXPR_P (*a))
3062 {
3063 suba = TREE_OPERAND (*a, 0);
3064 wider_type = TREE_TYPE (suba);
3065 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
3066 return atype;
3067 }
3068 else
3069 return atype;
3070
3071 if (CONVERT_EXPR_P (*b))
3072 {
3073 subb = TREE_OPERAND (*b, 0);
3074 if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
3075 return atype;
3076 }
3077 else
3078 return atype;
3079
3080 *a = suba;
3081 *b = subb;
3082 return wider_type;
3083 }
3084
3085 /* Determines the expression by that USE is expressed from induction variable
3086 CAND at statement AT in LOOP. The expression is stored in a decomposed
3087 form into AFF. Returns false if USE cannot be expressed using CAND. */
3088
3089 static bool
3090 get_computation_aff (struct loop *loop,
3091 struct iv_use *use, struct iv_cand *cand, gimple at,
3092 struct aff_tree *aff)
3093 {
3094 tree ubase = use->iv->base;
3095 tree ustep = use->iv->step;
3096 tree cbase = cand->iv->base;
3097 tree cstep = cand->iv->step, cstep_common;
3098 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3099 tree common_type, var;
3100 tree uutype;
3101 aff_tree cbase_aff, var_aff;
3102 widest_int rat;
3103
3104 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3105 {
3106 /* We do not have a precision to express the values of use. */
3107 return false;
3108 }
3109
3110 var = var_at_stmt (loop, cand, at);
3111 uutype = unsigned_type_for (utype);
3112
3113 /* If the conversion is not noop, perform it. */
3114 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3115 {
3116 cstep = fold_convert (uutype, cstep);
3117 cbase = fold_convert (uutype, cbase);
3118 var = fold_convert (uutype, var);
3119 }
3120
3121 if (!constant_multiple_of (ustep, cstep, &rat))
3122 return false;
3123
3124 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3125 type, we achieve better folding by computing their difference in this
3126 wider type, and cast the result to UUTYPE. We do not need to worry about
3127 overflows, as all the arithmetics will in the end be performed in UUTYPE
3128 anyway. */
3129 common_type = determine_common_wider_type (&ubase, &cbase);
3130
3131 /* use = ubase - ratio * cbase + ratio * var. */
3132 tree_to_aff_combination (ubase, common_type, aff);
3133 tree_to_aff_combination (cbase, common_type, &cbase_aff);
3134 tree_to_aff_combination (var, uutype, &var_aff);
3135
3136 /* We need to shift the value if we are after the increment. */
3137 if (stmt_after_increment (loop, cand, at))
3138 {
3139 aff_tree cstep_aff;
3140
3141 if (common_type != uutype)
3142 cstep_common = fold_convert (common_type, cstep);
3143 else
3144 cstep_common = cstep;
3145
3146 tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
3147 aff_combination_add (&cbase_aff, &cstep_aff);
3148 }
3149
3150 aff_combination_scale (&cbase_aff, -rat);
3151 aff_combination_add (aff, &cbase_aff);
3152 if (common_type != uutype)
3153 aff_combination_convert (aff, uutype);
3154
3155 aff_combination_scale (&var_aff, rat);
3156 aff_combination_add (aff, &var_aff);
3157
3158 return true;
3159 }
3160
3161 /* Return the type of USE. */
3162
3163 static tree
3164 get_use_type (struct iv_use *use)
3165 {
3166 tree base_type = TREE_TYPE (use->iv->base);
3167 tree type;
3168
3169 if (use->type == USE_ADDRESS)
3170 {
3171 /* The base_type may be a void pointer. Create a pointer type based on
3172 the mem_ref instead. */
3173 type = build_pointer_type (TREE_TYPE (*use->op_p));
3174 gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
3175 == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
3176 }
3177 else
3178 type = base_type;
3179
3180 return type;
3181 }
3182
3183 /* Determines the expression by that USE is expressed from induction variable
3184 CAND at statement AT in LOOP. The computation is unshared. */
3185
3186 static tree
3187 get_computation_at (struct loop *loop,
3188 struct iv_use *use, struct iv_cand *cand, gimple at)
3189 {
3190 aff_tree aff;
3191 tree type = get_use_type (use);
3192
3193 if (!get_computation_aff (loop, use, cand, at, &aff))
3194 return NULL_TREE;
3195 unshare_aff_combination (&aff);
3196 return fold_convert (type, aff_combination_to_tree (&aff));
3197 }
3198
3199 /* Determines the expression by that USE is expressed from induction variable
3200 CAND in LOOP. The computation is unshared. */
3201
3202 static tree
3203 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3204 {
3205 return get_computation_at (loop, use, cand, use->stmt);
3206 }
3207
3208 /* Adjust the cost COST for being in loop setup rather than loop body.
3209 If we're optimizing for space, the loop setup overhead is constant;
3210 if we're optimizing for speed, amortize it over the per-iteration cost. */
3211 static unsigned
3212 adjust_setup_cost (struct ivopts_data *data, unsigned cost)
3213 {
3214 if (cost == INFTY)
3215 return cost;
3216 else if (optimize_loop_for_speed_p (data->current_loop))
3217 return cost / avg_loop_niter (data->current_loop);
3218 else
3219 return cost;
3220 }
3221
3222 /* Returns true if multiplying by RATIO is allowed in an address. Test the
3223 validity for a memory reference accessing memory of mode MODE in
3224 address space AS. */
3225
3226
3227 bool
3228 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, machine_mode mode,
3229 addr_space_t as)
3230 {
3231 #define MAX_RATIO 128
3232 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3233 static vec<sbitmap> valid_mult_list;
3234 sbitmap valid_mult;
3235
3236 if (data_index >= valid_mult_list.length ())
3237 valid_mult_list.safe_grow_cleared (data_index + 1);
3238
3239 valid_mult = valid_mult_list[data_index];
3240 if (!valid_mult)
3241 {
3242 machine_mode address_mode = targetm.addr_space.address_mode (as);
3243 rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3244 rtx reg2 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3245 rtx addr, scaled;
3246 HOST_WIDE_INT i;
3247
3248 valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3249 bitmap_clear (valid_mult);
3250 scaled = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3251 addr = gen_rtx_fmt_ee (PLUS, address_mode, scaled, reg2);
3252 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3253 {
3254 XEXP (scaled, 1) = gen_int_mode (i, address_mode);
3255 if (memory_address_addr_space_p (mode, addr, as)
3256 || memory_address_addr_space_p (mode, scaled, as))
3257 bitmap_set_bit (valid_mult, i + MAX_RATIO);
3258 }
3259
3260 if (dump_file && (dump_flags & TDF_DETAILS))
3261 {
3262 fprintf (dump_file, " allowed multipliers:");
3263 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3264 if (bitmap_bit_p (valid_mult, i + MAX_RATIO))
3265 fprintf (dump_file, " %d", (int) i);
3266 fprintf (dump_file, "\n");
3267 fprintf (dump_file, "\n");
3268 }
3269
3270 valid_mult_list[data_index] = valid_mult;
3271 }
3272
3273 if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3274 return false;
3275
3276 return bitmap_bit_p (valid_mult, ratio + MAX_RATIO);
3277 }
3278
3279 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3280 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3281 variable is omitted. Compute the cost for a memory reference that accesses
3282 a memory location of mode MEM_MODE in address space AS.
3283
3284 MAY_AUTOINC is set to true if the autoincrement (increasing index by
3285 size of MEM_MODE / RATIO) is available. To make this determination, we
3286 look at the size of the increment to be made, which is given in CSTEP.
3287 CSTEP may be zero if the step is unknown.
3288 STMT_AFTER_INC is true iff the statement we're looking at is after the
3289 increment of the original biv.
3290
3291 TODO -- there must be some better way. This all is quite crude. */
3292
3293 enum ainc_type
3294 {
3295 AINC_PRE_INC, /* Pre increment. */
3296 AINC_PRE_DEC, /* Pre decrement. */
3297 AINC_POST_INC, /* Post increment. */
3298 AINC_POST_DEC, /* Post decrement. */
3299 AINC_NONE /* Also the number of auto increment types. */
3300 };
3301
3302 typedef struct address_cost_data_s
3303 {
3304 HOST_WIDE_INT min_offset, max_offset;
3305 unsigned costs[2][2][2][2];
3306 unsigned ainc_costs[AINC_NONE];
3307 } *address_cost_data;
3308
3309
3310 static comp_cost
3311 get_address_cost (bool symbol_present, bool var_present,
3312 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3313 HOST_WIDE_INT cstep, machine_mode mem_mode,
3314 addr_space_t as, bool speed,
3315 bool stmt_after_inc, bool *may_autoinc)
3316 {
3317 machine_mode address_mode = targetm.addr_space.address_mode (as);
3318 static vec<address_cost_data> address_cost_data_list;
3319 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3320 address_cost_data data;
3321 static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3322 static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3323 unsigned cost, acost, complexity;
3324 enum ainc_type autoinc_type;
3325 bool offset_p, ratio_p, autoinc;
3326 HOST_WIDE_INT s_offset, autoinc_offset, msize;
3327 unsigned HOST_WIDE_INT mask;
3328 unsigned bits;
3329
3330 if (data_index >= address_cost_data_list.length ())
3331 address_cost_data_list.safe_grow_cleared (data_index + 1);
3332
3333 data = address_cost_data_list[data_index];
3334 if (!data)
3335 {
3336 HOST_WIDE_INT i;
3337 HOST_WIDE_INT rat, off = 0;
3338 int old_cse_not_expected, width;
3339 unsigned sym_p, var_p, off_p, rat_p, add_c;
3340 rtx_insn *seq;
3341 rtx addr, base;
3342 rtx reg0, reg1;
3343
3344 data = (address_cost_data) xcalloc (1, sizeof (*data));
3345
3346 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3347
3348 width = GET_MODE_BITSIZE (address_mode) - 1;
3349 if (width > (HOST_BITS_PER_WIDE_INT - 1))
3350 width = HOST_BITS_PER_WIDE_INT - 1;
3351 addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
3352
3353 for (i = width; i >= 0; i--)
3354 {
3355 off = -((unsigned HOST_WIDE_INT) 1 << i);
3356 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3357 if (memory_address_addr_space_p (mem_mode, addr, as))
3358 break;
3359 }
3360 data->min_offset = (i == -1? 0 : off);
3361
3362 for (i = width; i >= 0; i--)
3363 {
3364 off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
3365 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3366 if (memory_address_addr_space_p (mem_mode, addr, as))
3367 break;
3368 /* For some strict-alignment targets, the offset must be naturally
3369 aligned. Try an aligned offset if mem_mode is not QImode. */
3370 off = mem_mode != QImode
3371 ? ((unsigned HOST_WIDE_INT) 1 << i)
3372 - GET_MODE_SIZE (mem_mode)
3373 : 0;
3374 if (off > 0)
3375 {
3376 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3377 if (memory_address_addr_space_p (mem_mode, addr, as))
3378 break;
3379 }
3380 }
3381 if (i == -1)
3382 off = 0;
3383 data->max_offset = off;
3384
3385 if (dump_file && (dump_flags & TDF_DETAILS))
3386 {
3387 fprintf (dump_file, "get_address_cost:\n");
3388 fprintf (dump_file, " min offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3389 GET_MODE_NAME (mem_mode),
3390 data->min_offset);
3391 fprintf (dump_file, " max offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3392 GET_MODE_NAME (mem_mode),
3393 data->max_offset);
3394 }
3395
3396 rat = 1;
3397 for (i = 2; i <= MAX_RATIO; i++)
3398 if (multiplier_allowed_in_address_p (i, mem_mode, as))
3399 {
3400 rat = i;
3401 break;
3402 }
3403
3404 /* Compute the cost of various addressing modes. */
3405 acost = 0;
3406 reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3407 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3408
3409 if (USE_LOAD_PRE_DECREMENT (mem_mode)
3410 || USE_STORE_PRE_DECREMENT (mem_mode))
3411 {
3412 addr = gen_rtx_PRE_DEC (address_mode, reg0);
3413 has_predec[mem_mode]
3414 = memory_address_addr_space_p (mem_mode, addr, as);
3415
3416 if (has_predec[mem_mode])
3417 data->ainc_costs[AINC_PRE_DEC]
3418 = address_cost (addr, mem_mode, as, speed);
3419 }
3420 if (USE_LOAD_POST_DECREMENT (mem_mode)
3421 || USE_STORE_POST_DECREMENT (mem_mode))
3422 {
3423 addr = gen_rtx_POST_DEC (address_mode, reg0);
3424 has_postdec[mem_mode]
3425 = memory_address_addr_space_p (mem_mode, addr, as);
3426
3427 if (has_postdec[mem_mode])
3428 data->ainc_costs[AINC_POST_DEC]
3429 = address_cost (addr, mem_mode, as, speed);
3430 }
3431 if (USE_LOAD_PRE_INCREMENT (mem_mode)
3432 || USE_STORE_PRE_DECREMENT (mem_mode))
3433 {
3434 addr = gen_rtx_PRE_INC (address_mode, reg0);
3435 has_preinc[mem_mode]
3436 = memory_address_addr_space_p (mem_mode, addr, as);
3437
3438 if (has_preinc[mem_mode])
3439 data->ainc_costs[AINC_PRE_INC]
3440 = address_cost (addr, mem_mode, as, speed);
3441 }
3442 if (USE_LOAD_POST_INCREMENT (mem_mode)
3443 || USE_STORE_POST_INCREMENT (mem_mode))
3444 {
3445 addr = gen_rtx_POST_INC (address_mode, reg0);
3446 has_postinc[mem_mode]
3447 = memory_address_addr_space_p (mem_mode, addr, as);
3448
3449 if (has_postinc[mem_mode])
3450 data->ainc_costs[AINC_POST_INC]
3451 = address_cost (addr, mem_mode, as, speed);
3452 }
3453 for (i = 0; i < 16; i++)
3454 {
3455 sym_p = i & 1;
3456 var_p = (i >> 1) & 1;
3457 off_p = (i >> 2) & 1;
3458 rat_p = (i >> 3) & 1;
3459
3460 addr = reg0;
3461 if (rat_p)
3462 addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
3463 gen_int_mode (rat, address_mode));
3464
3465 if (var_p)
3466 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
3467
3468 if (sym_p)
3469 {
3470 base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
3471 /* ??? We can run into trouble with some backends by presenting
3472 it with symbols which haven't been properly passed through
3473 targetm.encode_section_info. By setting the local bit, we
3474 enhance the probability of things working. */
3475 SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3476
3477 if (off_p)
3478 base = gen_rtx_fmt_e (CONST, address_mode,
3479 gen_rtx_fmt_ee
3480 (PLUS, address_mode, base,
3481 gen_int_mode (off, address_mode)));
3482 }
3483 else if (off_p)
3484 base = gen_int_mode (off, address_mode);
3485 else
3486 base = NULL_RTX;
3487
3488 if (base)
3489 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
3490
3491 start_sequence ();
3492 /* To avoid splitting addressing modes, pretend that no cse will
3493 follow. */
3494 old_cse_not_expected = cse_not_expected;
3495 cse_not_expected = true;
3496 addr = memory_address_addr_space (mem_mode, addr, as);
3497 cse_not_expected = old_cse_not_expected;
3498 seq = get_insns ();
3499 end_sequence ();
3500
3501 acost = seq_cost (seq, speed);
3502 acost += address_cost (addr, mem_mode, as, speed);
3503
3504 if (!acost)
3505 acost = 1;
3506 data->costs[sym_p][var_p][off_p][rat_p] = acost;
3507 }
3508
3509 /* On some targets, it is quite expensive to load symbol to a register,
3510 which makes addresses that contain symbols look much more expensive.
3511 However, the symbol will have to be loaded in any case before the
3512 loop (and quite likely we have it in register already), so it does not
3513 make much sense to penalize them too heavily. So make some final
3514 tweaks for the SYMBOL_PRESENT modes:
3515
3516 If VAR_PRESENT is false, and the mode obtained by changing symbol to
3517 var is cheaper, use this mode with small penalty.
3518 If VAR_PRESENT is true, try whether the mode with
3519 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3520 if this is the case, use it. */
3521 add_c = add_cost (speed, address_mode);
3522 for (i = 0; i < 8; i++)
3523 {
3524 var_p = i & 1;
3525 off_p = (i >> 1) & 1;
3526 rat_p = (i >> 2) & 1;
3527
3528 acost = data->costs[0][1][off_p][rat_p] + 1;
3529 if (var_p)
3530 acost += add_c;
3531
3532 if (acost < data->costs[1][var_p][off_p][rat_p])
3533 data->costs[1][var_p][off_p][rat_p] = acost;
3534 }
3535
3536 if (dump_file && (dump_flags & TDF_DETAILS))
3537 {
3538 fprintf (dump_file, "Address costs:\n");
3539
3540 for (i = 0; i < 16; i++)
3541 {
3542 sym_p = i & 1;
3543 var_p = (i >> 1) & 1;
3544 off_p = (i >> 2) & 1;
3545 rat_p = (i >> 3) & 1;
3546
3547 fprintf (dump_file, " ");
3548 if (sym_p)
3549 fprintf (dump_file, "sym + ");
3550 if (var_p)
3551 fprintf (dump_file, "var + ");
3552 if (off_p)
3553 fprintf (dump_file, "cst + ");
3554 if (rat_p)
3555 fprintf (dump_file, "rat * ");
3556
3557 acost = data->costs[sym_p][var_p][off_p][rat_p];
3558 fprintf (dump_file, "index costs %d\n", acost);
3559 }
3560 if (has_predec[mem_mode] || has_postdec[mem_mode]
3561 || has_preinc[mem_mode] || has_postinc[mem_mode])
3562 fprintf (dump_file, " May include autoinc/dec\n");
3563 fprintf (dump_file, "\n");
3564 }
3565
3566 address_cost_data_list[data_index] = data;
3567 }
3568
3569 bits = GET_MODE_BITSIZE (address_mode);
3570 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3571 offset &= mask;
3572 if ((offset >> (bits - 1) & 1))
3573 offset |= ~mask;
3574 s_offset = offset;
3575
3576 autoinc = false;
3577 autoinc_type = AINC_NONE;
3578 msize = GET_MODE_SIZE (mem_mode);
3579 autoinc_offset = offset;
3580 if (stmt_after_inc)
3581 autoinc_offset += ratio * cstep;
3582 if (symbol_present || var_present || ratio != 1)
3583 autoinc = false;
3584 else
3585 {
3586 if (has_postinc[mem_mode] && autoinc_offset == 0
3587 && msize == cstep)
3588 autoinc_type = AINC_POST_INC;
3589 else if (has_postdec[mem_mode] && autoinc_offset == 0
3590 && msize == -cstep)
3591 autoinc_type = AINC_POST_DEC;
3592 else if (has_preinc[mem_mode] && autoinc_offset == msize
3593 && msize == cstep)
3594 autoinc_type = AINC_PRE_INC;
3595 else if (has_predec[mem_mode] && autoinc_offset == -msize
3596 && msize == -cstep)
3597 autoinc_type = AINC_PRE_DEC;
3598
3599 if (autoinc_type != AINC_NONE)
3600 autoinc = true;
3601 }
3602
3603 cost = 0;
3604 offset_p = (s_offset != 0
3605 && data->min_offset <= s_offset
3606 && s_offset <= data->max_offset);
3607 ratio_p = (ratio != 1
3608 && multiplier_allowed_in_address_p (ratio, mem_mode, as));
3609
3610 if (ratio != 1 && !ratio_p)
3611 cost += mult_by_coeff_cost (ratio, address_mode, speed);
3612
3613 if (s_offset && !offset_p && !symbol_present)
3614 cost += add_cost (speed, address_mode);
3615
3616 if (may_autoinc)
3617 *may_autoinc = autoinc;
3618 if (autoinc)
3619 acost = data->ainc_costs[autoinc_type];
3620 else
3621 acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
3622 complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3623 return new_cost (cost + acost, complexity);
3624 }
3625
3626 /* Calculate the SPEED or size cost of shiftadd EXPR in MODE. MULT is the
3627 the EXPR operand holding the shift. COST0 and COST1 are the costs for
3628 calculating the operands of EXPR. Returns true if successful, and returns
3629 the cost in COST. */
3630
3631 static bool
3632 get_shiftadd_cost (tree expr, machine_mode mode, comp_cost cost0,
3633 comp_cost cost1, tree mult, bool speed, comp_cost *cost)
3634 {
3635 comp_cost res;
3636 tree op1 = TREE_OPERAND (expr, 1);
3637 tree cst = TREE_OPERAND (mult, 1);
3638 tree multop = TREE_OPERAND (mult, 0);
3639 int m = exact_log2 (int_cst_value (cst));
3640 int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
3641 int as_cost, sa_cost;
3642 bool mult_in_op1;
3643
3644 if (!(m >= 0 && m < maxm))
3645 return false;
3646
3647 mult_in_op1 = operand_equal_p (op1, mult, 0);
3648
3649 as_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
3650
3651 /* If the target has a cheap shift-and-add or shift-and-sub instruction,
3652 use that in preference to a shift insn followed by an add insn. */
3653 sa_cost = (TREE_CODE (expr) != MINUS_EXPR
3654 ? shiftadd_cost (speed, mode, m)
3655 : (mult_in_op1
3656 ? shiftsub1_cost (speed, mode, m)
3657 : shiftsub0_cost (speed, mode, m)));
3658
3659 res = new_cost (MIN (as_cost, sa_cost), 0);
3660 res = add_costs (res, mult_in_op1 ? cost0 : cost1);
3661
3662 STRIP_NOPS (multop);
3663 if (!is_gimple_val (multop))
3664 res = add_costs (res, force_expr_to_var_cost (multop, speed));
3665
3666 *cost = res;
3667 return true;
3668 }
3669
3670 /* Estimates cost of forcing expression EXPR into a variable. */
3671
3672 static comp_cost
3673 force_expr_to_var_cost (tree expr, bool speed)
3674 {
3675 static bool costs_initialized = false;
3676 static unsigned integer_cost [2];
3677 static unsigned symbol_cost [2];
3678 static unsigned address_cost [2];
3679 tree op0, op1;
3680 comp_cost cost0, cost1, cost;
3681 machine_mode mode;
3682
3683 if (!costs_initialized)
3684 {
3685 tree type = build_pointer_type (integer_type_node);
3686 tree var, addr;
3687 rtx x;
3688 int i;
3689
3690 var = create_tmp_var_raw (integer_type_node, "test_var");
3691 TREE_STATIC (var) = 1;
3692 x = produce_memory_decl_rtl (var, NULL);
3693 SET_DECL_RTL (var, x);
3694
3695 addr = build1 (ADDR_EXPR, type, var);
3696
3697
3698 for (i = 0; i < 2; i++)
3699 {
3700 integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
3701 2000), i);
3702
3703 symbol_cost[i] = computation_cost (addr, i) + 1;
3704
3705 address_cost[i]
3706 = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
3707 if (dump_file && (dump_flags & TDF_DETAILS))
3708 {
3709 fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
3710 fprintf (dump_file, " integer %d\n", (int) integer_cost[i]);
3711 fprintf (dump_file, " symbol %d\n", (int) symbol_cost[i]);
3712 fprintf (dump_file, " address %d\n", (int) address_cost[i]);
3713 fprintf (dump_file, " other %d\n", (int) target_spill_cost[i]);
3714 fprintf (dump_file, "\n");
3715 }
3716 }
3717
3718 costs_initialized = true;
3719 }
3720
3721 STRIP_NOPS (expr);
3722
3723 if (SSA_VAR_P (expr))
3724 return no_cost;
3725
3726 if (is_gimple_min_invariant (expr))
3727 {
3728 if (TREE_CODE (expr) == INTEGER_CST)
3729 return new_cost (integer_cost [speed], 0);
3730
3731 if (TREE_CODE (expr) == ADDR_EXPR)
3732 {
3733 tree obj = TREE_OPERAND (expr, 0);
3734
3735 if (TREE_CODE (obj) == VAR_DECL
3736 || TREE_CODE (obj) == PARM_DECL
3737 || TREE_CODE (obj) == RESULT_DECL)
3738 return new_cost (symbol_cost [speed], 0);
3739 }
3740
3741 return new_cost (address_cost [speed], 0);
3742 }
3743
3744 switch (TREE_CODE (expr))
3745 {
3746 case POINTER_PLUS_EXPR:
3747 case PLUS_EXPR:
3748 case MINUS_EXPR:
3749 case MULT_EXPR:
3750 op0 = TREE_OPERAND (expr, 0);
3751 op1 = TREE_OPERAND (expr, 1);
3752 STRIP_NOPS (op0);
3753 STRIP_NOPS (op1);
3754 break;
3755
3756 CASE_CONVERT:
3757 case NEGATE_EXPR:
3758 op0 = TREE_OPERAND (expr, 0);
3759 STRIP_NOPS (op0);
3760 op1 = NULL_TREE;
3761 break;
3762
3763 default:
3764 /* Just an arbitrary value, FIXME. */
3765 return new_cost (target_spill_cost[speed], 0);
3766 }
3767
3768 if (op0 == NULL_TREE
3769 || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
3770 cost0 = no_cost;
3771 else
3772 cost0 = force_expr_to_var_cost (op0, speed);
3773
3774 if (op1 == NULL_TREE
3775 || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
3776 cost1 = no_cost;
3777 else
3778 cost1 = force_expr_to_var_cost (op1, speed);
3779
3780 mode = TYPE_MODE (TREE_TYPE (expr));
3781 switch (TREE_CODE (expr))
3782 {
3783 case POINTER_PLUS_EXPR:
3784 case PLUS_EXPR:
3785 case MINUS_EXPR:
3786 case NEGATE_EXPR:
3787 cost = new_cost (add_cost (speed, mode), 0);
3788 if (TREE_CODE (expr) != NEGATE_EXPR)
3789 {
3790 tree mult = NULL_TREE;
3791 comp_cost sa_cost;
3792 if (TREE_CODE (op1) == MULT_EXPR)
3793 mult = op1;
3794 else if (TREE_CODE (op0) == MULT_EXPR)
3795 mult = op0;
3796
3797 if (mult != NULL_TREE
3798 && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
3799 && get_shiftadd_cost (expr, mode, cost0, cost1, mult,
3800 speed, &sa_cost))
3801 return sa_cost;
3802 }
3803 break;
3804
3805 CASE_CONVERT:
3806 {
3807 tree inner_mode, outer_mode;
3808 outer_mode = TREE_TYPE (expr);
3809 inner_mode = TREE_TYPE (op0);
3810 cost = new_cost (convert_cost (TYPE_MODE (outer_mode),
3811 TYPE_MODE (inner_mode), speed), 0);
3812 }
3813 break;
3814
3815 case MULT_EXPR:
3816 if (cst_and_fits_in_hwi (op0))
3817 cost = new_cost (mult_by_coeff_cost (int_cst_value (op0),
3818 mode, speed), 0);
3819 else if (cst_and_fits_in_hwi (op1))
3820 cost = new_cost (mult_by_coeff_cost (int_cst_value (op1),
3821 mode, speed), 0);
3822 else
3823 return new_cost (target_spill_cost [speed], 0);
3824 break;
3825
3826 default:
3827 gcc_unreachable ();
3828 }
3829
3830 cost = add_costs (cost, cost0);
3831 cost = add_costs (cost, cost1);
3832
3833 /* Bound the cost by target_spill_cost. The parts of complicated
3834 computations often are either loop invariant or at least can
3835 be shared between several iv uses, so letting this grow without
3836 limits would not give reasonable results. */
3837 if (cost.cost > (int) target_spill_cost [speed])
3838 cost.cost = target_spill_cost [speed];
3839
3840 return cost;
3841 }
3842
3843 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
3844 invariants the computation depends on. */
3845
3846 static comp_cost
3847 force_var_cost (struct ivopts_data *data,
3848 tree expr, bitmap *depends_on)
3849 {
3850 if (depends_on)
3851 {
3852 fd_ivopts_data = data;
3853 walk_tree (&expr, find_depends, depends_on, NULL);
3854 }
3855
3856 return force_expr_to_var_cost (expr, data->speed);
3857 }
3858
3859 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
3860 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3861 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3862 invariants the computation depends on. */
3863
3864 static comp_cost
3865 split_address_cost (struct ivopts_data *data,
3866 tree addr, bool *symbol_present, bool *var_present,
3867 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3868 {
3869 tree core;
3870 HOST_WIDE_INT bitsize;
3871 HOST_WIDE_INT bitpos;
3872 tree toffset;
3873 machine_mode mode;
3874 int unsignedp, volatilep;
3875
3876 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3877 &unsignedp, &volatilep, false);
3878
3879 if (toffset != 0
3880 || bitpos % BITS_PER_UNIT != 0
3881 || TREE_CODE (core) != VAR_DECL)
3882 {
3883 *symbol_present = false;
3884 *var_present = true;
3885 fd_ivopts_data = data;
3886 walk_tree (&addr, find_depends, depends_on, NULL);
3887 return new_cost (target_spill_cost[data->speed], 0);
3888 }
3889
3890 *offset += bitpos / BITS_PER_UNIT;
3891 if (TREE_STATIC (core)
3892 || DECL_EXTERNAL (core))
3893 {
3894 *symbol_present = true;
3895 *var_present = false;
3896 return no_cost;
3897 }
3898
3899 *symbol_present = false;
3900 *var_present = true;
3901 return no_cost;
3902 }
3903
3904 /* Estimates cost of expressing difference of addresses E1 - E2 as
3905 var + symbol + offset. The value of offset is added to OFFSET,
3906 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3907 part is missing. DEPENDS_ON is a set of the invariants the computation
3908 depends on. */
3909
3910 static comp_cost
3911 ptr_difference_cost (struct ivopts_data *data,
3912 tree e1, tree e2, bool *symbol_present, bool *var_present,
3913 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3914 {
3915 HOST_WIDE_INT diff = 0;
3916 aff_tree aff_e1, aff_e2;
3917 tree type;
3918
3919 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3920
3921 if (ptr_difference_const (e1, e2, &diff))
3922 {
3923 *offset += diff;
3924 *symbol_present = false;
3925 *var_present = false;
3926 return no_cost;
3927 }
3928
3929 if (integer_zerop (e2))
3930 return split_address_cost (data, TREE_OPERAND (e1, 0),
3931 symbol_present, var_present, offset, depends_on);
3932
3933 *symbol_present = false;
3934 *var_present = true;
3935
3936 type = signed_type_for (TREE_TYPE (e1));
3937 tree_to_aff_combination (e1, type, &aff_e1);
3938 tree_to_aff_combination (e2, type, &aff_e2);
3939 aff_combination_scale (&aff_e2, -1);
3940 aff_combination_add (&aff_e1, &aff_e2);
3941
3942 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3943 }
3944
3945 /* Estimates cost of expressing difference E1 - E2 as
3946 var + symbol + offset. The value of offset is added to OFFSET,
3947 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3948 part is missing. DEPENDS_ON is a set of the invariants the computation
3949 depends on. */
3950
3951 static comp_cost
3952 difference_cost (struct ivopts_data *data,
3953 tree e1, tree e2, bool *symbol_present, bool *var_present,
3954 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3955 {
3956 machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3957 unsigned HOST_WIDE_INT off1, off2;
3958 aff_tree aff_e1, aff_e2;
3959 tree type;
3960
3961 e1 = strip_offset (e1, &off1);
3962 e2 = strip_offset (e2, &off2);
3963 *offset += off1 - off2;
3964
3965 STRIP_NOPS (e1);
3966 STRIP_NOPS (e2);
3967
3968 if (TREE_CODE (e1) == ADDR_EXPR)
3969 return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
3970 offset, depends_on);
3971 *symbol_present = false;
3972
3973 if (operand_equal_p (e1, e2, 0))
3974 {
3975 *var_present = false;
3976 return no_cost;
3977 }
3978
3979 *var_present = true;
3980
3981 if (integer_zerop (e2))
3982 return force_var_cost (data, e1, depends_on);
3983
3984 if (integer_zerop (e1))
3985 {
3986 comp_cost cost = force_var_cost (data, e2, depends_on);
3987 cost.cost += mult_by_coeff_cost (-1, mode, data->speed);
3988 return cost;
3989 }
3990
3991 type = signed_type_for (TREE_TYPE (e1));
3992 tree_to_aff_combination (e1, type, &aff_e1);
3993 tree_to_aff_combination (e2, type, &aff_e2);
3994 aff_combination_scale (&aff_e2, -1);
3995 aff_combination_add (&aff_e1, &aff_e2);
3996
3997 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3998 }
3999
4000 /* Returns true if AFF1 and AFF2 are identical. */
4001
4002 static bool
4003 compare_aff_trees (aff_tree *aff1, aff_tree *aff2)
4004 {
4005 unsigned i;
4006
4007 if (aff1->n != aff2->n)
4008 return false;
4009
4010 for (i = 0; i < aff1->n; i++)
4011 {
4012 if (aff1->elts[i].coef != aff2->elts[i].coef)
4013 return false;
4014
4015 if (!operand_equal_p (aff1->elts[i].val, aff2->elts[i].val, 0))
4016 return false;
4017 }
4018 return true;
4019 }
4020
4021 /* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id. */
4022
4023 static int
4024 get_expr_id (struct ivopts_data *data, tree expr)
4025 {
4026 struct iv_inv_expr_ent ent;
4027 struct iv_inv_expr_ent **slot;
4028
4029 ent.expr = expr;
4030 ent.hash = iterative_hash_expr (expr, 0);
4031 slot = data->inv_expr_tab->find_slot (&ent, INSERT);
4032 if (*slot)
4033 return (*slot)->id;
4034
4035 *slot = XNEW (struct iv_inv_expr_ent);
4036 (*slot)->expr = expr;
4037 (*slot)->hash = ent.hash;
4038 (*slot)->id = data->inv_expr_id++;
4039 return (*slot)->id;
4040 }
4041
4042 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
4043 requires a new compiler generated temporary. Returns -1 otherwise.
4044 ADDRESS_P is a flag indicating if the expression is for address
4045 computation. */
4046
4047 static int
4048 get_loop_invariant_expr_id (struct ivopts_data *data, tree ubase,
4049 tree cbase, HOST_WIDE_INT ratio,
4050 bool address_p)
4051 {
4052 aff_tree ubase_aff, cbase_aff;
4053 tree expr, ub, cb;
4054
4055 STRIP_NOPS (ubase);
4056 STRIP_NOPS (cbase);
4057 ub = ubase;
4058 cb = cbase;
4059
4060 if ((TREE_CODE (ubase) == INTEGER_CST)
4061 && (TREE_CODE (cbase) == INTEGER_CST))
4062 return -1;
4063
4064 /* Strips the constant part. */
4065 if (TREE_CODE (ubase) == PLUS_EXPR
4066 || TREE_CODE (ubase) == MINUS_EXPR
4067 || TREE_CODE (ubase) == POINTER_PLUS_EXPR)
4068 {
4069 if (TREE_CODE (TREE_OPERAND (ubase, 1)) == INTEGER_CST)
4070 ubase = TREE_OPERAND (ubase, 0);
4071 }
4072
4073 /* Strips the constant part. */
4074 if (TREE_CODE (cbase) == PLUS_EXPR
4075 || TREE_CODE (cbase) == MINUS_EXPR
4076 || TREE_CODE (cbase) == POINTER_PLUS_EXPR)
4077 {
4078 if (TREE_CODE (TREE_OPERAND (cbase, 1)) == INTEGER_CST)
4079 cbase = TREE_OPERAND (cbase, 0);
4080 }
4081
4082 if (address_p)
4083 {
4084 if (((TREE_CODE (ubase) == SSA_NAME)
4085 || (TREE_CODE (ubase) == ADDR_EXPR
4086 && is_gimple_min_invariant (ubase)))
4087 && (TREE_CODE (cbase) == INTEGER_CST))
4088 return -1;
4089
4090 if (((TREE_CODE (cbase) == SSA_NAME)
4091 || (TREE_CODE (cbase) == ADDR_EXPR
4092 && is_gimple_min_invariant (cbase)))
4093 && (TREE_CODE (ubase) == INTEGER_CST))
4094 return -1;
4095 }
4096
4097 if (ratio == 1)
4098 {
4099 if (operand_equal_p (ubase, cbase, 0))
4100 return -1;
4101
4102 if (TREE_CODE (ubase) == ADDR_EXPR
4103 && TREE_CODE (cbase) == ADDR_EXPR)
4104 {
4105 tree usym, csym;
4106
4107 usym = TREE_OPERAND (ubase, 0);
4108 csym = TREE_OPERAND (cbase, 0);
4109 if (TREE_CODE (usym) == ARRAY_REF)
4110 {
4111 tree ind = TREE_OPERAND (usym, 1);
4112 if (TREE_CODE (ind) == INTEGER_CST
4113 && tree_fits_shwi_p (ind)
4114 && tree_to_shwi (ind) == 0)
4115 usym = TREE_OPERAND (usym, 0);
4116 }
4117 if (TREE_CODE (csym) == ARRAY_REF)
4118 {
4119 tree ind = TREE_OPERAND (csym, 1);
4120 if (TREE_CODE (ind) == INTEGER_CST
4121 && tree_fits_shwi_p (ind)
4122 && tree_to_shwi (ind) == 0)
4123 csym = TREE_OPERAND (csym, 0);
4124 }
4125 if (operand_equal_p (usym, csym, 0))
4126 return -1;
4127 }
4128 /* Now do more complex comparison */
4129 tree_to_aff_combination (ubase, TREE_TYPE (ubase), &ubase_aff);
4130 tree_to_aff_combination (cbase, TREE_TYPE (cbase), &cbase_aff);
4131 if (compare_aff_trees (&ubase_aff, &cbase_aff))
4132 return -1;
4133 }
4134
4135 tree_to_aff_combination (ub, TREE_TYPE (ub), &ubase_aff);
4136 tree_to_aff_combination (cb, TREE_TYPE (cb), &cbase_aff);
4137
4138 aff_combination_scale (&cbase_aff, -1 * ratio);
4139 aff_combination_add (&ubase_aff, &cbase_aff);
4140 expr = aff_combination_to_tree (&ubase_aff);
4141 return get_expr_id (data, expr);
4142 }
4143
4144
4145
4146 /* Determines the cost of the computation by that USE is expressed
4147 from induction variable CAND. If ADDRESS_P is true, we just need
4148 to create an address from it, otherwise we want to get it into
4149 register. A set of invariants we depend on is stored in
4150 DEPENDS_ON. AT is the statement at that the value is computed.
4151 If CAN_AUTOINC is nonnull, use it to record whether autoinc
4152 addressing is likely. */
4153
4154 static comp_cost
4155 get_computation_cost_at (struct ivopts_data *data,
4156 struct iv_use *use, struct iv_cand *cand,
4157 bool address_p, bitmap *depends_on, gimple at,
4158 bool *can_autoinc,
4159 int *inv_expr_id)
4160 {
4161 tree ubase = use->iv->base, ustep = use->iv->step;
4162 tree cbase, cstep;
4163 tree utype = TREE_TYPE (ubase), ctype;
4164 unsigned HOST_WIDE_INT cstepi, offset = 0;
4165 HOST_WIDE_INT ratio, aratio;
4166 bool var_present, symbol_present, stmt_is_after_inc;
4167 comp_cost cost;
4168 widest_int rat;
4169 bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4170 machine_mode mem_mode = (address_p
4171 ? TYPE_MODE (TREE_TYPE (*use->op_p))
4172 : VOIDmode);
4173
4174 *depends_on = NULL;
4175
4176 /* Only consider real candidates. */
4177 if (!cand->iv)
4178 return infinite_cost;
4179
4180 cbase = cand->iv->base;
4181 cstep = cand->iv->step;
4182 ctype = TREE_TYPE (cbase);
4183
4184 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4185 {
4186 /* We do not have a precision to express the values of use. */
4187 return infinite_cost;
4188 }
4189
4190 if (address_p
4191 || (use->iv->base_object
4192 && cand->iv->base_object
4193 && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4194 && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4195 {
4196 /* Do not try to express address of an object with computation based
4197 on address of a different object. This may cause problems in rtl
4198 level alias analysis (that does not expect this to be happening,
4199 as this is illegal in C), and would be unlikely to be useful
4200 anyway. */
4201 if (use->iv->base_object
4202 && cand->iv->base_object
4203 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4204 return infinite_cost;
4205 }
4206
4207 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4208 {
4209 /* TODO -- add direct handling of this case. */
4210 goto fallback;
4211 }
4212
4213 /* CSTEPI is removed from the offset in case statement is after the
4214 increment. If the step is not constant, we use zero instead.
4215 This is a bit imprecise (there is the extra addition), but
4216 redundancy elimination is likely to transform the code so that
4217 it uses value of the variable before increment anyway,
4218 so it is not that much unrealistic. */
4219 if (cst_and_fits_in_hwi (cstep))
4220 cstepi = int_cst_value (cstep);
4221 else
4222 cstepi = 0;
4223
4224 if (!constant_multiple_of (ustep, cstep, &rat))
4225 return infinite_cost;
4226
4227 if (wi::fits_shwi_p (rat))
4228 ratio = rat.to_shwi ();
4229 else
4230 return infinite_cost;
4231
4232 STRIP_NOPS (cbase);
4233 ctype = TREE_TYPE (cbase);
4234
4235 stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
4236
4237 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
4238 or ratio == 1, it is better to handle this like
4239
4240 ubase - ratio * cbase + ratio * var
4241
4242 (also holds in the case ratio == -1, TODO. */
4243
4244 if (cst_and_fits_in_hwi (cbase))
4245 {
4246 offset = - ratio * (unsigned HOST_WIDE_INT) int_cst_value (cbase);
4247 cost = difference_cost (data,
4248 ubase, build_int_cst (utype, 0),
4249 &symbol_present, &var_present, &offset,
4250 depends_on);
4251 cost.cost /= avg_loop_niter (data->current_loop);
4252 }
4253 else if (ratio == 1)
4254 {
4255 tree real_cbase = cbase;
4256
4257 /* Check to see if any adjustment is needed. */
4258 if (cstepi == 0 && stmt_is_after_inc)
4259 {
4260 aff_tree real_cbase_aff;
4261 aff_tree cstep_aff;
4262
4263 tree_to_aff_combination (cbase, TREE_TYPE (real_cbase),
4264 &real_cbase_aff);
4265 tree_to_aff_combination (cstep, TREE_TYPE (cstep), &cstep_aff);
4266
4267 aff_combination_add (&real_cbase_aff, &cstep_aff);
4268 real_cbase = aff_combination_to_tree (&real_cbase_aff);
4269 }
4270
4271 cost = difference_cost (data,
4272 ubase, real_cbase,
4273 &symbol_present, &var_present, &offset,
4274 depends_on);
4275 cost.cost /= avg_loop_niter (data->current_loop);
4276 }
4277 else if (address_p
4278 && !POINTER_TYPE_P (ctype)
4279 && multiplier_allowed_in_address_p
4280 (ratio, mem_mode,
4281 TYPE_ADDR_SPACE (TREE_TYPE (utype))))
4282 {
4283 cbase
4284 = fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio));
4285 cost = difference_cost (data,
4286 ubase, cbase,
4287 &symbol_present, &var_present, &offset,
4288 depends_on);
4289 cost.cost /= avg_loop_niter (data->current_loop);
4290 }
4291 else
4292 {
4293 cost = force_var_cost (data, cbase, depends_on);
4294 cost = add_costs (cost,
4295 difference_cost (data,
4296 ubase, build_int_cst (utype, 0),
4297 &symbol_present, &var_present,
4298 &offset, depends_on));
4299 cost.cost /= avg_loop_niter (data->current_loop);
4300 cost.cost += add_cost (data->speed, TYPE_MODE (ctype));
4301 }
4302
4303 if (inv_expr_id)
4304 {
4305 *inv_expr_id =
4306 get_loop_invariant_expr_id (data, ubase, cbase, ratio, address_p);
4307 /* Clear depends on. */
4308 if (*inv_expr_id != -1 && depends_on && *depends_on)
4309 bitmap_clear (*depends_on);
4310 }
4311
4312 /* If we are after the increment, the value of the candidate is higher by
4313 one iteration. */
4314 if (stmt_is_after_inc)
4315 offset -= ratio * cstepi;
4316
4317 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4318 (symbol/var1/const parts may be omitted). If we are looking for an
4319 address, find the cost of addressing this. */
4320 if (address_p)
4321 return add_costs (cost,
4322 get_address_cost (symbol_present, var_present,
4323 offset, ratio, cstepi,
4324 mem_mode,
4325 TYPE_ADDR_SPACE (TREE_TYPE (utype)),
4326 speed, stmt_is_after_inc,
4327 can_autoinc));
4328
4329 /* Otherwise estimate the costs for computing the expression. */
4330 if (!symbol_present && !var_present && !offset)
4331 {
4332 if (ratio != 1)
4333 cost.cost += mult_by_coeff_cost (ratio, TYPE_MODE (ctype), speed);
4334 return cost;
4335 }
4336
4337 /* Symbol + offset should be compile-time computable so consider that they
4338 are added once to the variable, if present. */
4339 if (var_present && (symbol_present || offset))
4340 cost.cost += adjust_setup_cost (data,
4341 add_cost (speed, TYPE_MODE (ctype)));
4342
4343 /* Having offset does not affect runtime cost in case it is added to
4344 symbol, but it increases complexity. */
4345 if (offset)
4346 cost.complexity++;
4347
4348 cost.cost += add_cost (speed, TYPE_MODE (ctype));
4349
4350 aratio = ratio > 0 ? ratio : -ratio;
4351 if (aratio != 1)
4352 cost.cost += mult_by_coeff_cost (aratio, TYPE_MODE (ctype), speed);
4353 return cost;
4354
4355 fallback:
4356 if (can_autoinc)
4357 *can_autoinc = false;
4358
4359 {
4360 /* Just get the expression, expand it and measure the cost. */
4361 tree comp = get_computation_at (data->current_loop, use, cand, at);
4362
4363 if (!comp)
4364 return infinite_cost;
4365
4366 if (address_p)
4367 comp = build_simple_mem_ref (comp);
4368
4369 return new_cost (computation_cost (comp, speed), 0);
4370 }
4371 }
4372
4373 /* Determines the cost of the computation by that USE is expressed
4374 from induction variable CAND. If ADDRESS_P is true, we just need
4375 to create an address from it, otherwise we want to get it into
4376 register. A set of invariants we depend on is stored in
4377 DEPENDS_ON. If CAN_AUTOINC is nonnull, use it to record whether
4378 autoinc addressing is likely. */
4379
4380 static comp_cost
4381 get_computation_cost (struct ivopts_data *data,
4382 struct iv_use *use, struct iv_cand *cand,
4383 bool address_p, bitmap *depends_on,
4384 bool *can_autoinc, int *inv_expr_id)
4385 {
4386 return get_computation_cost_at (data,
4387 use, cand, address_p, depends_on, use->stmt,
4388 can_autoinc, inv_expr_id);
4389 }
4390
4391 /* Determines cost of basing replacement of USE on CAND in a generic
4392 expression. */
4393
4394 static bool
4395 determine_use_iv_cost_generic (struct ivopts_data *data,
4396 struct iv_use *use, struct iv_cand *cand)
4397 {
4398 bitmap depends_on;
4399 comp_cost cost;
4400 int inv_expr_id = -1;
4401
4402 /* The simple case first -- if we need to express value of the preserved
4403 original biv, the cost is 0. This also prevents us from counting the
4404 cost of increment twice -- once at this use and once in the cost of
4405 the candidate. */
4406 if (cand->pos == IP_ORIGINAL
4407 && cand->incremented_at == use->stmt)
4408 {
4409 set_use_iv_cost (data, use, cand, no_cost, NULL, NULL_TREE,
4410 ERROR_MARK, -1);
4411 return true;
4412 }
4413
4414 cost = get_computation_cost (data, use, cand, false, &depends_on,
4415 NULL, &inv_expr_id);
4416
4417 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4418 inv_expr_id);
4419
4420 return !infinite_cost_p (cost);
4421 }
4422
4423 /* Determines cost of basing replacement of USE on CAND in an address. */
4424
4425 static bool
4426 determine_use_iv_cost_address (struct ivopts_data *data,
4427 struct iv_use *use, struct iv_cand *cand)
4428 {
4429 bitmap depends_on;
4430 bool can_autoinc;
4431 int inv_expr_id = -1;
4432 comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
4433 &can_autoinc, &inv_expr_id);
4434
4435 if (cand->ainc_use == use)
4436 {
4437 if (can_autoinc)
4438 cost.cost -= cand->cost_step;
4439 /* If we generated the candidate solely for exploiting autoincrement
4440 opportunities, and it turns out it can't be used, set the cost to
4441 infinity to make sure we ignore it. */
4442 else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
4443 cost = infinite_cost;
4444 }
4445 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4446 inv_expr_id);
4447
4448 return !infinite_cost_p (cost);
4449 }
4450
4451 /* Computes value of candidate CAND at position AT in iteration NITER, and
4452 stores it to VAL. */
4453
4454 static void
4455 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple at, tree niter,
4456 aff_tree *val)
4457 {
4458 aff_tree step, delta, nit;
4459 struct iv *iv = cand->iv;
4460 tree type = TREE_TYPE (iv->base);
4461 tree steptype = type;
4462 if (POINTER_TYPE_P (type))
4463 steptype = sizetype;
4464 steptype = unsigned_type_for (type);
4465
4466 tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step);
4467 aff_combination_convert (&step, steptype);
4468 tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
4469 aff_combination_convert (&nit, steptype);
4470 aff_combination_mult (&nit, &step, &delta);
4471 if (stmt_after_increment (loop, cand, at))
4472 aff_combination_add (&delta, &step);
4473
4474 tree_to_aff_combination (iv->base, type, val);
4475 if (!POINTER_TYPE_P (type))
4476 aff_combination_convert (val, steptype);
4477 aff_combination_add (val, &delta);
4478 }
4479
4480 /* Returns period of induction variable iv. */
4481
4482 static tree
4483 iv_period (struct iv *iv)
4484 {
4485 tree step = iv->step, period, type;
4486 tree pow2div;
4487
4488 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4489
4490 type = unsigned_type_for (TREE_TYPE (step));
4491 /* Period of the iv is lcm (step, type_range)/step -1,
4492 i.e., N*type_range/step - 1. Since type range is power
4493 of two, N == (step >> num_of_ending_zeros_binary (step),
4494 so the final result is
4495
4496 (type_range >> num_of_ending_zeros_binary (step)) - 1
4497
4498 */
4499 pow2div = num_ending_zeros (step);
4500
4501 period = build_low_bits_mask (type,
4502 (TYPE_PRECISION (type)
4503 - tree_to_uhwi (pow2div)));
4504
4505 return period;
4506 }
4507
4508 /* Returns the comparison operator used when eliminating the iv USE. */
4509
4510 static enum tree_code
4511 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4512 {
4513 struct loop *loop = data->current_loop;
4514 basic_block ex_bb;
4515 edge exit;
4516
4517 ex_bb = gimple_bb (use->stmt);
4518 exit = EDGE_SUCC (ex_bb, 0);
4519 if (flow_bb_inside_loop_p (loop, exit->dest))
4520 exit = EDGE_SUCC (ex_bb, 1);
4521
4522 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4523 }
4524
4525 /* Returns true if we can prove that BASE - OFFSET does not overflow. For now,
4526 we only detect the situation that BASE = SOMETHING + OFFSET, where the
4527 calculation is performed in non-wrapping type.
4528
4529 TODO: More generally, we could test for the situation that
4530 BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
4531 This would require knowing the sign of OFFSET. */
4532
4533 static bool
4534 difference_cannot_overflow_p (struct ivopts_data *data, tree base, tree offset)
4535 {
4536 enum tree_code code;
4537 tree e1, e2;
4538 aff_tree aff_e1, aff_e2, aff_offset;
4539
4540 if (!nowrap_type_p (TREE_TYPE (base)))
4541 return false;
4542
4543 base = expand_simple_operations (base);
4544
4545 if (TREE_CODE (base) == SSA_NAME)
4546 {
4547 gimple stmt = SSA_NAME_DEF_STMT (base);
4548
4549 if (gimple_code (stmt) != GIMPLE_ASSIGN)
4550 return false;
4551
4552 code = gimple_assign_rhs_code (stmt);
4553 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4554 return false;
4555
4556 e1 = gimple_assign_rhs1 (stmt);
4557 e2 = gimple_assign_rhs2 (stmt);
4558 }
4559 else
4560 {
4561 code = TREE_CODE (base);
4562 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4563 return false;
4564 e1 = TREE_OPERAND (base, 0);
4565 e2 = TREE_OPERAND (base, 1);
4566 }
4567
4568 /* Use affine expansion as deeper inspection to prove the equality. */
4569 tree_to_aff_combination_expand (e2, TREE_TYPE (e2),
4570 &aff_e2, &data->name_expansion_cache);
4571 tree_to_aff_combination_expand (offset, TREE_TYPE (offset),
4572 &aff_offset, &data->name_expansion_cache);
4573 aff_combination_scale (&aff_offset, -1);
4574 switch (code)
4575 {
4576 case PLUS_EXPR:
4577 aff_combination_add (&aff_e2, &aff_offset);
4578 if (aff_combination_zero_p (&aff_e2))
4579 return true;
4580
4581 tree_to_aff_combination_expand (e1, TREE_TYPE (e1),
4582 &aff_e1, &data->name_expansion_cache);
4583 aff_combination_add (&aff_e1, &aff_offset);
4584 return aff_combination_zero_p (&aff_e1);
4585
4586 case POINTER_PLUS_EXPR:
4587 aff_combination_add (&aff_e2, &aff_offset);
4588 return aff_combination_zero_p (&aff_e2);
4589
4590 default:
4591 return false;
4592 }
4593 }
4594
4595 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
4596 comparison with CAND. NITER describes the number of iterations of
4597 the loops. If successful, the comparison in COMP_P is altered accordingly.
4598
4599 We aim to handle the following situation:
4600
4601 sometype *base, *p;
4602 int a, b, i;
4603
4604 i = a;
4605 p = p_0 = base + a;
4606
4607 do
4608 {
4609 bla (*p);
4610 p++;
4611 i++;
4612 }
4613 while (i < b);
4614
4615 Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
4616 We aim to optimize this to
4617
4618 p = p_0 = base + a;
4619 do
4620 {
4621 bla (*p);
4622 p++;
4623 }
4624 while (p < p_0 - a + b);
4625
4626 This preserves the correctness, since the pointer arithmetics does not
4627 overflow. More precisely:
4628
4629 1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
4630 overflow in computing it or the values of p.
4631 2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
4632 overflow. To prove this, we use the fact that p_0 = base + a. */
4633
4634 static bool
4635 iv_elimination_compare_lt (struct ivopts_data *data,
4636 struct iv_cand *cand, enum tree_code *comp_p,
4637 struct tree_niter_desc *niter)
4638 {
4639 tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
4640 struct aff_tree nit, tmpa, tmpb;
4641 enum tree_code comp;
4642 HOST_WIDE_INT step;
4643
4644 /* We need to know that the candidate induction variable does not overflow.
4645 While more complex analysis may be used to prove this, for now just
4646 check that the variable appears in the original program and that it
4647 is computed in a type that guarantees no overflows. */
4648 cand_type = TREE_TYPE (cand->iv->base);
4649 if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
4650 return false;
4651
4652 /* Make sure that the loop iterates till the loop bound is hit, as otherwise
4653 the calculation of the BOUND could overflow, making the comparison
4654 invalid. */
4655 if (!data->loop_single_exit_p)
4656 return false;
4657
4658 /* We need to be able to decide whether candidate is increasing or decreasing
4659 in order to choose the right comparison operator. */
4660 if (!cst_and_fits_in_hwi (cand->iv->step))
4661 return false;
4662 step = int_cst_value (cand->iv->step);
4663
4664 /* Check that the number of iterations matches the expected pattern:
4665 a + 1 > b ? 0 : b - a - 1. */
4666 mbz = niter->may_be_zero;
4667 if (TREE_CODE (mbz) == GT_EXPR)
4668 {
4669 /* Handle a + 1 > b. */
4670 tree op0 = TREE_OPERAND (mbz, 0);
4671 if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
4672 {
4673 a = TREE_OPERAND (op0, 0);
4674 b = TREE_OPERAND (mbz, 1);
4675 }
4676 else
4677 return false;
4678 }
4679 else if (TREE_CODE (mbz) == LT_EXPR)
4680 {
4681 tree op1 = TREE_OPERAND (mbz, 1);
4682
4683 /* Handle b < a + 1. */
4684 if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
4685 {
4686 a = TREE_OPERAND (op1, 0);
4687 b = TREE_OPERAND (mbz, 0);
4688 }
4689 else
4690 return false;
4691 }
4692 else
4693 return false;
4694
4695 /* Expected number of iterations is B - A - 1. Check that it matches
4696 the actual number, i.e., that B - A - NITER = 1. */
4697 tree_to_aff_combination (niter->niter, nit_type, &nit);
4698 tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
4699 tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
4700 aff_combination_scale (&nit, -1);
4701 aff_combination_scale (&tmpa, -1);
4702 aff_combination_add (&tmpb, &tmpa);
4703 aff_combination_add (&tmpb, &nit);
4704 if (tmpb.n != 0 || tmpb.offset != 1)
4705 return false;
4706
4707 /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
4708 overflow. */
4709 offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
4710 cand->iv->step,
4711 fold_convert (TREE_TYPE (cand->iv->step), a));
4712 if (!difference_cannot_overflow_p (data, cand->iv->base, offset))
4713 return false;
4714
4715 /* Determine the new comparison operator. */
4716 comp = step < 0 ? GT_EXPR : LT_EXPR;
4717 if (*comp_p == NE_EXPR)
4718 *comp_p = comp;
4719 else if (*comp_p == EQ_EXPR)
4720 *comp_p = invert_tree_comparison (comp, false);
4721 else
4722 gcc_unreachable ();
4723
4724 return true;
4725 }
4726
4727 /* Check whether it is possible to express the condition in USE by comparison
4728 of candidate CAND. If so, store the value compared with to BOUND, and the
4729 comparison operator to COMP. */
4730
4731 static bool
4732 may_eliminate_iv (struct ivopts_data *data,
4733 struct iv_use *use, struct iv_cand *cand, tree *bound,
4734 enum tree_code *comp)
4735 {
4736 basic_block ex_bb;
4737 edge exit;
4738 tree period;
4739 struct loop *loop = data->current_loop;
4740 aff_tree bnd;
4741 struct tree_niter_desc *desc = NULL;
4742
4743 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
4744 return false;
4745
4746 /* For now works only for exits that dominate the loop latch.
4747 TODO: extend to other conditions inside loop body. */
4748 ex_bb = gimple_bb (use->stmt);
4749 if (use->stmt != last_stmt (ex_bb)
4750 || gimple_code (use->stmt) != GIMPLE_COND
4751 || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
4752 return false;
4753
4754 exit = EDGE_SUCC (ex_bb, 0);
4755 if (flow_bb_inside_loop_p (loop, exit->dest))
4756 exit = EDGE_SUCC (ex_bb, 1);
4757 if (flow_bb_inside_loop_p (loop, exit->dest))
4758 return false;
4759
4760 desc = niter_for_exit (data, exit);
4761 if (!desc)
4762 return false;
4763
4764 /* Determine whether we can use the variable to test the exit condition.
4765 This is the case iff the period of the induction variable is greater
4766 than the number of iterations for which the exit condition is true. */
4767 period = iv_period (cand->iv);
4768
4769 /* If the number of iterations is constant, compare against it directly. */
4770 if (TREE_CODE (desc->niter) == INTEGER_CST)
4771 {
4772 /* See cand_value_at. */
4773 if (stmt_after_increment (loop, cand, use->stmt))
4774 {
4775 if (!tree_int_cst_lt (desc->niter, period))
4776 return false;
4777 }
4778 else
4779 {
4780 if (tree_int_cst_lt (period, desc->niter))
4781 return false;
4782 }
4783 }
4784
4785 /* If not, and if this is the only possible exit of the loop, see whether
4786 we can get a conservative estimate on the number of iterations of the
4787 entire loop and compare against that instead. */
4788 else
4789 {
4790 widest_int period_value, max_niter;
4791
4792 max_niter = desc->max;
4793 if (stmt_after_increment (loop, cand, use->stmt))
4794 max_niter += 1;
4795 period_value = wi::to_widest (period);
4796 if (wi::gtu_p (max_niter, period_value))
4797 {
4798 /* See if we can take advantage of inferred loop bound information. */
4799 if (data->loop_single_exit_p)
4800 {
4801 if (!max_loop_iterations (loop, &max_niter))
4802 return false;
4803 /* The loop bound is already adjusted by adding 1. */
4804 if (wi::gtu_p (max_niter, period_value))
4805 return false;
4806 }
4807 else
4808 return false;
4809 }
4810 }
4811
4812 cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
4813
4814 *bound = fold_convert (TREE_TYPE (cand->iv->base),
4815 aff_combination_to_tree (&bnd));
4816 *comp = iv_elimination_compare (data, use);
4817
4818 /* It is unlikely that computing the number of iterations using division
4819 would be more profitable than keeping the original induction variable. */
4820 if (expression_expensive_p (*bound))
4821 return false;
4822
4823 /* Sometimes, it is possible to handle the situation that the number of
4824 iterations may be zero unless additional assumtions by using <
4825 instead of != in the exit condition.
4826
4827 TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
4828 base the exit condition on it. However, that is often too
4829 expensive. */
4830 if (!integer_zerop (desc->may_be_zero))
4831 return iv_elimination_compare_lt (data, cand, comp, desc);
4832
4833 return true;
4834 }
4835
4836 /* Calculates the cost of BOUND, if it is a PARM_DECL. A PARM_DECL must
4837 be copied, if is is used in the loop body and DATA->body_includes_call. */
4838
4839 static int
4840 parm_decl_cost (struct ivopts_data *data, tree bound)
4841 {
4842 tree sbound = bound;
4843 STRIP_NOPS (sbound);
4844
4845 if (TREE_CODE (sbound) == SSA_NAME
4846 && SSA_NAME_IS_DEFAULT_DEF (sbound)
4847 && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
4848 && data->body_includes_call)
4849 return COSTS_N_INSNS (1);
4850
4851 return 0;
4852 }
4853
4854 /* Determines cost of basing replacement of USE on CAND in a condition. */
4855
4856 static bool
4857 determine_use_iv_cost_condition (struct ivopts_data *data,
4858 struct iv_use *use, struct iv_cand *cand)
4859 {
4860 tree bound = NULL_TREE;
4861 struct iv *cmp_iv;
4862 bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
4863 comp_cost elim_cost, express_cost, cost, bound_cost;
4864 bool ok;
4865 int elim_inv_expr_id = -1, express_inv_expr_id = -1, inv_expr_id;
4866 tree *control_var, *bound_cst;
4867 enum tree_code comp = ERROR_MARK;
4868
4869 /* Only consider real candidates. */
4870 if (!cand->iv)
4871 {
4872 set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE,
4873 ERROR_MARK, -1);
4874 return false;
4875 }
4876
4877 /* Try iv elimination. */
4878 if (may_eliminate_iv (data, use, cand, &bound, &comp))
4879 {
4880 elim_cost = force_var_cost (data, bound, &depends_on_elim);
4881 if (elim_cost.cost == 0)
4882 elim_cost.cost = parm_decl_cost (data, bound);
4883 else if (TREE_CODE (bound) == INTEGER_CST)
4884 elim_cost.cost = 0;
4885 /* If we replace a loop condition 'i < n' with 'p < base + n',
4886 depends_on_elim will have 'base' and 'n' set, which implies
4887 that both 'base' and 'n' will be live during the loop. More likely,
4888 'base + n' will be loop invariant, resulting in only one live value
4889 during the loop. So in that case we clear depends_on_elim and set
4890 elim_inv_expr_id instead. */
4891 if (depends_on_elim && bitmap_count_bits (depends_on_elim) > 1)
4892 {
4893 elim_inv_expr_id = get_expr_id (data, bound);
4894 bitmap_clear (depends_on_elim);
4895 }
4896 /* The bound is a loop invariant, so it will be only computed
4897 once. */
4898 elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
4899 }
4900 else
4901 elim_cost = infinite_cost;
4902
4903 /* Try expressing the original giv. If it is compared with an invariant,
4904 note that we cannot get rid of it. */
4905 ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
4906 NULL, &cmp_iv);
4907 gcc_assert (ok);
4908
4909 /* When the condition is a comparison of the candidate IV against
4910 zero, prefer this IV.
4911
4912 TODO: The constant that we're subtracting from the cost should
4913 be target-dependent. This information should be added to the
4914 target costs for each backend. */
4915 if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */
4916 && integer_zerop (*bound_cst)
4917 && (operand_equal_p (*control_var, cand->var_after, 0)
4918 || operand_equal_p (*control_var, cand->var_before, 0)))
4919 elim_cost.cost -= 1;
4920
4921 express_cost = get_computation_cost (data, use, cand, false,
4922 &depends_on_express, NULL,
4923 &express_inv_expr_id);
4924 fd_ivopts_data = data;
4925 walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
4926
4927 /* Count the cost of the original bound as well. */
4928 bound_cost = force_var_cost (data, *bound_cst, NULL);
4929 if (bound_cost.cost == 0)
4930 bound_cost.cost = parm_decl_cost (data, *bound_cst);
4931 else if (TREE_CODE (*bound_cst) == INTEGER_CST)
4932 bound_cost.cost = 0;
4933 express_cost.cost += bound_cost.cost;
4934
4935 /* Choose the better approach, preferring the eliminated IV. */
4936 if (compare_costs (elim_cost, express_cost) <= 0)
4937 {
4938 cost = elim_cost;
4939 depends_on = depends_on_elim;
4940 depends_on_elim = NULL;
4941 inv_expr_id = elim_inv_expr_id;
4942 }
4943 else
4944 {
4945 cost = express_cost;
4946 depends_on = depends_on_express;
4947 depends_on_express = NULL;
4948 bound = NULL_TREE;
4949 comp = ERROR_MARK;
4950 inv_expr_id = express_inv_expr_id;
4951 }
4952
4953 set_use_iv_cost (data, use, cand, cost, depends_on, bound, comp, inv_expr_id);
4954
4955 if (depends_on_elim)
4956 BITMAP_FREE (depends_on_elim);
4957 if (depends_on_express)
4958 BITMAP_FREE (depends_on_express);
4959
4960 return !infinite_cost_p (cost);
4961 }
4962
4963 /* Determines cost of basing replacement of USE on CAND. Returns false
4964 if USE cannot be based on CAND. */
4965
4966 static bool
4967 determine_use_iv_cost (struct ivopts_data *data,
4968 struct iv_use *use, struct iv_cand *cand)
4969 {
4970 switch (use->type)
4971 {
4972 case USE_NONLINEAR_EXPR:
4973 return determine_use_iv_cost_generic (data, use, cand);
4974
4975 case USE_ADDRESS:
4976 return determine_use_iv_cost_address (data, use, cand);
4977
4978 case USE_COMPARE:
4979 return determine_use_iv_cost_condition (data, use, cand);
4980
4981 default:
4982 gcc_unreachable ();
4983 }
4984 }
4985
4986 /* Return true if get_computation_cost indicates that autoincrement is
4987 a possibility for the pair of USE and CAND, false otherwise. */
4988
4989 static bool
4990 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
4991 struct iv_cand *cand)
4992 {
4993 bitmap depends_on;
4994 bool can_autoinc;
4995 comp_cost cost;
4996
4997 if (use->type != USE_ADDRESS)
4998 return false;
4999
5000 cost = get_computation_cost (data, use, cand, true, &depends_on,
5001 &can_autoinc, NULL);
5002
5003 BITMAP_FREE (depends_on);
5004
5005 return !infinite_cost_p (cost) && can_autoinc;
5006 }
5007
5008 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
5009 use that allows autoincrement, and set their AINC_USE if possible. */
5010
5011 static void
5012 set_autoinc_for_original_candidates (struct ivopts_data *data)
5013 {
5014 unsigned i, j;
5015
5016 for (i = 0; i < n_iv_cands (data); i++)
5017 {
5018 struct iv_cand *cand = iv_cand (data, i);
5019 struct iv_use *closest_before = NULL;
5020 struct iv_use *closest_after = NULL;
5021 if (cand->pos != IP_ORIGINAL)
5022 continue;
5023
5024 for (j = 0; j < n_iv_uses (data); j++)
5025 {
5026 struct iv_use *use = iv_use (data, j);
5027 unsigned uid = gimple_uid (use->stmt);
5028
5029 if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
5030 continue;
5031
5032 if (uid < gimple_uid (cand->incremented_at)
5033 && (closest_before == NULL
5034 || uid > gimple_uid (closest_before->stmt)))
5035 closest_before = use;
5036
5037 if (uid > gimple_uid (cand->incremented_at)
5038 && (closest_after == NULL
5039 || uid < gimple_uid (closest_after->stmt)))
5040 closest_after = use;
5041 }
5042
5043 if (closest_before != NULL
5044 && autoinc_possible_for_pair (data, closest_before, cand))
5045 cand->ainc_use = closest_before;
5046 else if (closest_after != NULL
5047 && autoinc_possible_for_pair (data, closest_after, cand))
5048 cand->ainc_use = closest_after;
5049 }
5050 }
5051
5052 /* Finds the candidates for the induction variables. */
5053
5054 static void
5055 find_iv_candidates (struct ivopts_data *data)
5056 {
5057 /* Add commonly used ivs. */
5058 add_standard_iv_candidates (data);
5059
5060 /* Add old induction variables. */
5061 add_old_ivs_candidates (data);
5062
5063 /* Add induction variables derived from uses. */
5064 add_derived_ivs_candidates (data);
5065
5066 set_autoinc_for_original_candidates (data);
5067
5068 /* Record the important candidates. */
5069 record_important_candidates (data);
5070 }
5071
5072 /* Determines costs of basing the use of the iv on an iv candidate. */
5073
5074 static void
5075 determine_use_iv_costs (struct ivopts_data *data)
5076 {
5077 unsigned i, j;
5078 struct iv_use *use;
5079 struct iv_cand *cand;
5080 bitmap to_clear = BITMAP_ALLOC (NULL);
5081
5082 alloc_use_cost_map (data);
5083
5084 for (i = 0; i < n_iv_uses (data); i++)
5085 {
5086 use = iv_use (data, i);
5087
5088 if (data->consider_all_candidates)
5089 {
5090 for (j = 0; j < n_iv_cands (data); j++)
5091 {
5092 cand = iv_cand (data, j);
5093 determine_use_iv_cost (data, use, cand);
5094 }
5095 }
5096 else
5097 {
5098 bitmap_iterator bi;
5099
5100 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
5101 {
5102 cand = iv_cand (data, j);
5103 if (!determine_use_iv_cost (data, use, cand))
5104 bitmap_set_bit (to_clear, j);
5105 }
5106
5107 /* Remove the candidates for that the cost is infinite from
5108 the list of related candidates. */
5109 bitmap_and_compl_into (use->related_cands, to_clear);
5110 bitmap_clear (to_clear);
5111 }
5112 }
5113
5114 BITMAP_FREE (to_clear);
5115
5116 if (dump_file && (dump_flags & TDF_DETAILS))
5117 {
5118 fprintf (dump_file, "Use-candidate costs:\n");
5119
5120 for (i = 0; i < n_iv_uses (data); i++)
5121 {
5122 use = iv_use (data, i);
5123
5124 fprintf (dump_file, "Use %d:\n", i);
5125 fprintf (dump_file, " cand\tcost\tcompl.\tdepends on\n");
5126 for (j = 0; j < use->n_map_members; j++)
5127 {
5128 if (!use->cost_map[j].cand
5129 || infinite_cost_p (use->cost_map[j].cost))
5130 continue;
5131
5132 fprintf (dump_file, " %d\t%d\t%d\t",
5133 use->cost_map[j].cand->id,
5134 use->cost_map[j].cost.cost,
5135 use->cost_map[j].cost.complexity);
5136 if (use->cost_map[j].depends_on)
5137 bitmap_print (dump_file,
5138 use->cost_map[j].depends_on, "","");
5139 if (use->cost_map[j].inv_expr_id != -1)
5140 fprintf (dump_file, " inv_expr:%d", use->cost_map[j].inv_expr_id);
5141 fprintf (dump_file, "\n");
5142 }
5143
5144 fprintf (dump_file, "\n");
5145 }
5146 fprintf (dump_file, "\n");
5147 }
5148 }
5149
5150 /* Determines cost of the candidate CAND. */
5151
5152 static void
5153 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5154 {
5155 comp_cost cost_base;
5156 unsigned cost, cost_step;
5157 tree base;
5158
5159 if (!cand->iv)
5160 {
5161 cand->cost = 0;
5162 return;
5163 }
5164
5165 /* There are two costs associated with the candidate -- its increment
5166 and its initialization. The second is almost negligible for any loop
5167 that rolls enough, so we take it just very little into account. */
5168
5169 base = cand->iv->base;
5170 cost_base = force_var_cost (data, base, NULL);
5171 /* It will be exceptional that the iv register happens to be initialized with
5172 the proper value at no cost. In general, there will at least be a regcopy
5173 or a const set. */
5174 if (cost_base.cost == 0)
5175 cost_base.cost = COSTS_N_INSNS (1);
5176 cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
5177
5178 cost = cost_step + adjust_setup_cost (data, cost_base.cost);
5179
5180 /* Prefer the original ivs unless we may gain something by replacing it.
5181 The reason is to make debugging simpler; so this is not relevant for
5182 artificial ivs created by other optimization passes. */
5183 if (cand->pos != IP_ORIGINAL
5184 || !SSA_NAME_VAR (cand->var_before)
5185 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
5186 cost++;
5187
5188 /* Prefer not to insert statements into latch unless there are some
5189 already (so that we do not create unnecessary jumps). */
5190 if (cand->pos == IP_END
5191 && empty_block_p (ip_end_pos (data->current_loop)))
5192 cost++;
5193
5194 cand->cost = cost;
5195 cand->cost_step = cost_step;
5196 }
5197
5198 /* Determines costs of computation of the candidates. */
5199
5200 static void
5201 determine_iv_costs (struct ivopts_data *data)
5202 {
5203 unsigned i;
5204
5205 if (dump_file && (dump_flags & TDF_DETAILS))
5206 {
5207 fprintf (dump_file, "Candidate costs:\n");
5208 fprintf (dump_file, " cand\tcost\n");
5209 }
5210
5211 for (i = 0; i < n_iv_cands (data); i++)
5212 {
5213 struct iv_cand *cand = iv_cand (data, i);
5214
5215 determine_iv_cost (data, cand);
5216
5217 if (dump_file && (dump_flags & TDF_DETAILS))
5218 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
5219 }
5220
5221 if (dump_file && (dump_flags & TDF_DETAILS))
5222 fprintf (dump_file, "\n");
5223 }
5224
5225 /* Calculates cost for having SIZE induction variables. */
5226
5227 static unsigned
5228 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
5229 {
5230 /* We add size to the cost, so that we prefer eliminating ivs
5231 if possible. */
5232 return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed,
5233 data->body_includes_call);
5234 }
5235
5236 /* For each size of the induction variable set determine the penalty. */
5237
5238 static void
5239 determine_set_costs (struct ivopts_data *data)
5240 {
5241 unsigned j, n;
5242 gphi *phi;
5243 gphi_iterator psi;
5244 tree op;
5245 struct loop *loop = data->current_loop;
5246 bitmap_iterator bi;
5247
5248 if (dump_file && (dump_flags & TDF_DETAILS))
5249 {
5250 fprintf (dump_file, "Global costs:\n");
5251 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
5252 fprintf (dump_file, " target_clobbered_regs %d\n", target_clobbered_regs);
5253 fprintf (dump_file, " target_reg_cost %d\n", target_reg_cost[data->speed]);
5254 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost[data->speed]);
5255 }
5256
5257 n = 0;
5258 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
5259 {
5260 phi = psi.phi ();
5261 op = PHI_RESULT (phi);
5262
5263 if (virtual_operand_p (op))
5264 continue;
5265
5266 if (get_iv (data, op))
5267 continue;
5268
5269 n++;
5270 }
5271
5272 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5273 {
5274 struct version_info *info = ver_info (data, j);
5275
5276 if (info->inv_id && info->has_nonlin_use)
5277 n++;
5278 }
5279
5280 data->regs_used = n;
5281 if (dump_file && (dump_flags & TDF_DETAILS))
5282 fprintf (dump_file, " regs_used %d\n", n);
5283
5284 if (dump_file && (dump_flags & TDF_DETAILS))
5285 {
5286 fprintf (dump_file, " cost for size:\n");
5287 fprintf (dump_file, " ivs\tcost\n");
5288 for (j = 0; j <= 2 * target_avail_regs; j++)
5289 fprintf (dump_file, " %d\t%d\n", j,
5290 ivopts_global_cost_for_size (data, j));
5291 fprintf (dump_file, "\n");
5292 }
5293 }
5294
5295 /* Returns true if A is a cheaper cost pair than B. */
5296
5297 static bool
5298 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
5299 {
5300 int cmp;
5301
5302 if (!a)
5303 return false;
5304
5305 if (!b)
5306 return true;
5307
5308 cmp = compare_costs (a->cost, b->cost);
5309 if (cmp < 0)
5310 return true;
5311
5312 if (cmp > 0)
5313 return false;
5314
5315 /* In case the costs are the same, prefer the cheaper candidate. */
5316 if (a->cand->cost < b->cand->cost)
5317 return true;
5318
5319 return false;
5320 }
5321
5322
5323 /* Returns candidate by that USE is expressed in IVS. */
5324
5325 static struct cost_pair *
5326 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
5327 {
5328 return ivs->cand_for_use[use->id];
5329 }
5330
5331 /* Computes the cost field of IVS structure. */
5332
5333 static void
5334 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
5335 {
5336 comp_cost cost = ivs->cand_use_cost;
5337
5338 cost.cost += ivs->cand_cost;
5339
5340 cost.cost += ivopts_global_cost_for_size (data,
5341 ivs->n_regs + ivs->num_used_inv_expr);
5342
5343 ivs->cost = cost;
5344 }
5345
5346 /* Remove invariants in set INVS to set IVS. */
5347
5348 static void
5349 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
5350 {
5351 bitmap_iterator bi;
5352 unsigned iid;
5353
5354 if (!invs)
5355 return;
5356
5357 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5358 {
5359 ivs->n_invariant_uses[iid]--;
5360 if (ivs->n_invariant_uses[iid] == 0)
5361 ivs->n_regs--;
5362 }
5363 }
5364
5365 /* Set USE not to be expressed by any candidate in IVS. */
5366
5367 static void
5368 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
5369 struct iv_use *use)
5370 {
5371 unsigned uid = use->id, cid;
5372 struct cost_pair *cp;
5373
5374 cp = ivs->cand_for_use[uid];
5375 if (!cp)
5376 return;
5377 cid = cp->cand->id;
5378
5379 ivs->bad_uses++;
5380 ivs->cand_for_use[uid] = NULL;
5381 ivs->n_cand_uses[cid]--;
5382
5383 if (ivs->n_cand_uses[cid] == 0)
5384 {
5385 bitmap_clear_bit (ivs->cands, cid);
5386 /* Do not count the pseudocandidates. */
5387 if (cp->cand->iv)
5388 ivs->n_regs--;
5389 ivs->n_cands--;
5390 ivs->cand_cost -= cp->cand->cost;
5391
5392 iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
5393 }
5394
5395 ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
5396
5397 iv_ca_set_remove_invariants (ivs, cp->depends_on);
5398
5399 if (cp->inv_expr_id != -1)
5400 {
5401 ivs->used_inv_expr[cp->inv_expr_id]--;
5402 if (ivs->used_inv_expr[cp->inv_expr_id] == 0)
5403 ivs->num_used_inv_expr--;
5404 }
5405 iv_ca_recount_cost (data, ivs);
5406 }
5407
5408 /* Add invariants in set INVS to set IVS. */
5409
5410 static void
5411 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
5412 {
5413 bitmap_iterator bi;
5414 unsigned iid;
5415
5416 if (!invs)
5417 return;
5418
5419 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5420 {
5421 ivs->n_invariant_uses[iid]++;
5422 if (ivs->n_invariant_uses[iid] == 1)
5423 ivs->n_regs++;
5424 }
5425 }
5426
5427 /* Set cost pair for USE in set IVS to CP. */
5428
5429 static void
5430 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
5431 struct iv_use *use, struct cost_pair *cp)
5432 {
5433 unsigned uid = use->id, cid;
5434
5435 if (ivs->cand_for_use[uid] == cp)
5436 return;
5437
5438 if (ivs->cand_for_use[uid])
5439 iv_ca_set_no_cp (data, ivs, use);
5440
5441 if (cp)
5442 {
5443 cid = cp->cand->id;
5444
5445 ivs->bad_uses--;
5446 ivs->cand_for_use[uid] = cp;
5447 ivs->n_cand_uses[cid]++;
5448 if (ivs->n_cand_uses[cid] == 1)
5449 {
5450 bitmap_set_bit (ivs->cands, cid);
5451 /* Do not count the pseudocandidates. */
5452 if (cp->cand->iv)
5453 ivs->n_regs++;
5454 ivs->n_cands++;
5455 ivs->cand_cost += cp->cand->cost;
5456
5457 iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
5458 }
5459
5460 ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
5461 iv_ca_set_add_invariants (ivs, cp->depends_on);
5462
5463 if (cp->inv_expr_id != -1)
5464 {
5465 ivs->used_inv_expr[cp->inv_expr_id]++;
5466 if (ivs->used_inv_expr[cp->inv_expr_id] == 1)
5467 ivs->num_used_inv_expr++;
5468 }
5469 iv_ca_recount_cost (data, ivs);
5470 }
5471 }
5472
5473 /* Extend set IVS by expressing USE by some of the candidates in it
5474 if possible. Consider all important candidates if candidates in
5475 set IVS don't give any result. */
5476
5477 static void
5478 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
5479 struct iv_use *use)
5480 {
5481 struct cost_pair *best_cp = NULL, *cp;
5482 bitmap_iterator bi;
5483 unsigned i;
5484 struct iv_cand *cand;
5485
5486 gcc_assert (ivs->upto >= use->id);
5487 ivs->upto++;
5488 ivs->bad_uses++;
5489
5490 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5491 {
5492 cand = iv_cand (data, i);
5493 cp = get_use_iv_cost (data, use, cand);
5494 if (cheaper_cost_pair (cp, best_cp))
5495 best_cp = cp;
5496 }
5497
5498 if (best_cp == NULL)
5499 {
5500 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5501 {
5502 cand = iv_cand (data, i);
5503 cp = get_use_iv_cost (data, use, cand);
5504 if (cheaper_cost_pair (cp, best_cp))
5505 best_cp = cp;
5506 }
5507 }
5508
5509 iv_ca_set_cp (data, ivs, use, best_cp);
5510 }
5511
5512 /* Get cost for assignment IVS. */
5513
5514 static comp_cost
5515 iv_ca_cost (struct iv_ca *ivs)
5516 {
5517 /* This was a conditional expression but it triggered a bug in
5518 Sun C 5.5. */
5519 if (ivs->bad_uses)
5520 return infinite_cost;
5521 else
5522 return ivs->cost;
5523 }
5524
5525 /* Returns true if all dependences of CP are among invariants in IVS. */
5526
5527 static bool
5528 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
5529 {
5530 unsigned i;
5531 bitmap_iterator bi;
5532
5533 if (!cp->depends_on)
5534 return true;
5535
5536 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
5537 {
5538 if (ivs->n_invariant_uses[i] == 0)
5539 return false;
5540 }
5541
5542 return true;
5543 }
5544
5545 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
5546 it before NEXT_CHANGE. */
5547
5548 static struct iv_ca_delta *
5549 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
5550 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
5551 {
5552 struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
5553
5554 change->use = use;
5555 change->old_cp = old_cp;
5556 change->new_cp = new_cp;
5557 change->next_change = next_change;
5558
5559 return change;
5560 }
5561
5562 /* Joins two lists of changes L1 and L2. Destructive -- old lists
5563 are rewritten. */
5564
5565 static struct iv_ca_delta *
5566 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
5567 {
5568 struct iv_ca_delta *last;
5569
5570 if (!l2)
5571 return l1;
5572
5573 if (!l1)
5574 return l2;
5575
5576 for (last = l1; last->next_change; last = last->next_change)
5577 continue;
5578 last->next_change = l2;
5579
5580 return l1;
5581 }
5582
5583 /* Reverse the list of changes DELTA, forming the inverse to it. */
5584
5585 static struct iv_ca_delta *
5586 iv_ca_delta_reverse (struct iv_ca_delta *delta)
5587 {
5588 struct iv_ca_delta *act, *next, *prev = NULL;
5589 struct cost_pair *tmp;
5590
5591 for (act = delta; act; act = next)
5592 {
5593 next = act->next_change;
5594 act->next_change = prev;
5595 prev = act;
5596
5597 tmp = act->old_cp;
5598 act->old_cp = act->new_cp;
5599 act->new_cp = tmp;
5600 }
5601
5602 return prev;
5603 }
5604
5605 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
5606 reverted instead. */
5607
5608 static void
5609 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
5610 struct iv_ca_delta *delta, bool forward)
5611 {
5612 struct cost_pair *from, *to;
5613 struct iv_ca_delta *act;
5614
5615 if (!forward)
5616 delta = iv_ca_delta_reverse (delta);
5617
5618 for (act = delta; act; act = act->next_change)
5619 {
5620 from = act->old_cp;
5621 to = act->new_cp;
5622 gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
5623 iv_ca_set_cp (data, ivs, act->use, to);
5624 }
5625
5626 if (!forward)
5627 iv_ca_delta_reverse (delta);
5628 }
5629
5630 /* Returns true if CAND is used in IVS. */
5631
5632 static bool
5633 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
5634 {
5635 return ivs->n_cand_uses[cand->id] > 0;
5636 }
5637
5638 /* Returns number of induction variable candidates in the set IVS. */
5639
5640 static unsigned
5641 iv_ca_n_cands (struct iv_ca *ivs)
5642 {
5643 return ivs->n_cands;
5644 }
5645
5646 /* Free the list of changes DELTA. */
5647
5648 static void
5649 iv_ca_delta_free (struct iv_ca_delta **delta)
5650 {
5651 struct iv_ca_delta *act, *next;
5652
5653 for (act = *delta; act; act = next)
5654 {
5655 next = act->next_change;
5656 free (act);
5657 }
5658
5659 *delta = NULL;
5660 }
5661
5662 /* Allocates new iv candidates assignment. */
5663
5664 static struct iv_ca *
5665 iv_ca_new (struct ivopts_data *data)
5666 {
5667 struct iv_ca *nw = XNEW (struct iv_ca);
5668
5669 nw->upto = 0;
5670 nw->bad_uses = 0;
5671 nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
5672 nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
5673 nw->cands = BITMAP_ALLOC (NULL);
5674 nw->n_cands = 0;
5675 nw->n_regs = 0;
5676 nw->cand_use_cost = no_cost;
5677 nw->cand_cost = 0;
5678 nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
5679 nw->cost = no_cost;
5680 nw->used_inv_expr = XCNEWVEC (unsigned, data->inv_expr_id + 1);
5681 nw->num_used_inv_expr = 0;
5682
5683 return nw;
5684 }
5685
5686 /* Free memory occupied by the set IVS. */
5687
5688 static void
5689 iv_ca_free (struct iv_ca **ivs)
5690 {
5691 free ((*ivs)->cand_for_use);
5692 free ((*ivs)->n_cand_uses);
5693 BITMAP_FREE ((*ivs)->cands);
5694 free ((*ivs)->n_invariant_uses);
5695 free ((*ivs)->used_inv_expr);
5696 free (*ivs);
5697 *ivs = NULL;
5698 }
5699
5700 /* Dumps IVS to FILE. */
5701
5702 static void
5703 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
5704 {
5705 const char *pref = " invariants ";
5706 unsigned i;
5707 comp_cost cost = iv_ca_cost (ivs);
5708
5709 fprintf (file, " cost: %d (complexity %d)\n", cost.cost, cost.complexity);
5710 fprintf (file, " cand_cost: %d\n cand_use_cost: %d (complexity %d)\n",
5711 ivs->cand_cost, ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
5712 bitmap_print (file, ivs->cands, " candidates: ","\n");
5713
5714 for (i = 0; i < ivs->upto; i++)
5715 {
5716 struct iv_use *use = iv_use (data, i);
5717 struct cost_pair *cp = iv_ca_cand_for_use (ivs, use);
5718 if (cp)
5719 fprintf (file, " use:%d --> iv_cand:%d, cost=(%d,%d)\n",
5720 use->id, cp->cand->id, cp->cost.cost, cp->cost.complexity);
5721 else
5722 fprintf (file, " use:%d --> ??\n", use->id);
5723 }
5724
5725 for (i = 1; i <= data->max_inv_id; i++)
5726 if (ivs->n_invariant_uses[i])
5727 {
5728 fprintf (file, "%s%d", pref, i);
5729 pref = ", ";
5730 }
5731 fprintf (file, "\n\n");
5732 }
5733
5734 /* Try changing candidate in IVS to CAND for each use. Return cost of the
5735 new set, and store differences in DELTA. Number of induction variables
5736 in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
5737 the function will try to find a solution with mimimal iv candidates. */
5738
5739 static comp_cost
5740 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
5741 struct iv_cand *cand, struct iv_ca_delta **delta,
5742 unsigned *n_ivs, bool min_ncand)
5743 {
5744 unsigned i;
5745 comp_cost cost;
5746 struct iv_use *use;
5747 struct cost_pair *old_cp, *new_cp;
5748
5749 *delta = NULL;
5750 for (i = 0; i < ivs->upto; i++)
5751 {
5752 use = iv_use (data, i);
5753 old_cp = iv_ca_cand_for_use (ivs, use);
5754
5755 if (old_cp
5756 && old_cp->cand == cand)
5757 continue;
5758
5759 new_cp = get_use_iv_cost (data, use, cand);
5760 if (!new_cp)
5761 continue;
5762
5763 if (!min_ncand && !iv_ca_has_deps (ivs, new_cp))
5764 continue;
5765
5766 if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp))
5767 continue;
5768
5769 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5770 }
5771
5772 iv_ca_delta_commit (data, ivs, *delta, true);
5773 cost = iv_ca_cost (ivs);
5774 if (n_ivs)
5775 *n_ivs = iv_ca_n_cands (ivs);
5776 iv_ca_delta_commit (data, ivs, *delta, false);
5777
5778 return cost;
5779 }
5780
5781 /* Try narrowing set IVS by removing CAND. Return the cost of
5782 the new set and store the differences in DELTA. START is
5783 the candidate with which we start narrowing. */
5784
5785 static comp_cost
5786 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
5787 struct iv_cand *cand, struct iv_cand *start,
5788 struct iv_ca_delta **delta)
5789 {
5790 unsigned i, ci;
5791 struct iv_use *use;
5792 struct cost_pair *old_cp, *new_cp, *cp;
5793 bitmap_iterator bi;
5794 struct iv_cand *cnd;
5795 comp_cost cost, best_cost, acost;
5796
5797 *delta = NULL;
5798 for (i = 0; i < n_iv_uses (data); i++)
5799 {
5800 use = iv_use (data, i);
5801
5802 old_cp = iv_ca_cand_for_use (ivs, use);
5803 if (old_cp->cand != cand)
5804 continue;
5805
5806 best_cost = iv_ca_cost (ivs);
5807 /* Start narrowing with START. */
5808 new_cp = get_use_iv_cost (data, use, start);
5809
5810 if (data->consider_all_candidates)
5811 {
5812 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
5813 {
5814 if (ci == cand->id || (start && ci == start->id))
5815 continue;
5816
5817 cnd = iv_cand (data, ci);
5818
5819 cp = get_use_iv_cost (data, use, cnd);
5820 if (!cp)
5821 continue;
5822
5823 iv_ca_set_cp (data, ivs, use, cp);
5824 acost = iv_ca_cost (ivs);
5825
5826 if (compare_costs (acost, best_cost) < 0)
5827 {
5828 best_cost = acost;
5829 new_cp = cp;
5830 }
5831 }
5832 }
5833 else
5834 {
5835 EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
5836 {
5837 if (ci == cand->id || (start && ci == start->id))
5838 continue;
5839
5840 cnd = iv_cand (data, ci);
5841
5842 cp = get_use_iv_cost (data, use, cnd);
5843 if (!cp)
5844 continue;
5845
5846 iv_ca_set_cp (data, ivs, use, cp);
5847 acost = iv_ca_cost (ivs);
5848
5849 if (compare_costs (acost, best_cost) < 0)
5850 {
5851 best_cost = acost;
5852 new_cp = cp;
5853 }
5854 }
5855 }
5856 /* Restore to old cp for use. */
5857 iv_ca_set_cp (data, ivs, use, old_cp);
5858
5859 if (!new_cp)
5860 {
5861 iv_ca_delta_free (delta);
5862 return infinite_cost;
5863 }
5864
5865 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5866 }
5867
5868 iv_ca_delta_commit (data, ivs, *delta, true);
5869 cost = iv_ca_cost (ivs);
5870 iv_ca_delta_commit (data, ivs, *delta, false);
5871
5872 return cost;
5873 }
5874
5875 /* Try optimizing the set of candidates IVS by removing candidates different
5876 from to EXCEPT_CAND from it. Return cost of the new set, and store
5877 differences in DELTA. */
5878
5879 static comp_cost
5880 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
5881 struct iv_cand *except_cand, struct iv_ca_delta **delta)
5882 {
5883 bitmap_iterator bi;
5884 struct iv_ca_delta *act_delta, *best_delta;
5885 unsigned i;
5886 comp_cost best_cost, acost;
5887 struct iv_cand *cand;
5888
5889 best_delta = NULL;
5890 best_cost = iv_ca_cost (ivs);
5891
5892 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5893 {
5894 cand = iv_cand (data, i);
5895
5896 if (cand == except_cand)
5897 continue;
5898
5899 acost = iv_ca_narrow (data, ivs, cand, except_cand, &act_delta);
5900
5901 if (compare_costs (acost, best_cost) < 0)
5902 {
5903 best_cost = acost;
5904 iv_ca_delta_free (&best_delta);
5905 best_delta = act_delta;
5906 }
5907 else
5908 iv_ca_delta_free (&act_delta);
5909 }
5910
5911 if (!best_delta)
5912 {
5913 *delta = NULL;
5914 return best_cost;
5915 }
5916
5917 /* Recurse to possibly remove other unnecessary ivs. */
5918 iv_ca_delta_commit (data, ivs, best_delta, true);
5919 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
5920 iv_ca_delta_commit (data, ivs, best_delta, false);
5921 *delta = iv_ca_delta_join (best_delta, *delta);
5922 return best_cost;
5923 }
5924
5925 /* Check if CAND_IDX is a candidate other than OLD_CAND and has
5926 cheaper local cost for USE than BEST_CP. Return pointer to
5927 the corresponding cost_pair, otherwise just return BEST_CP. */
5928
5929 static struct cost_pair*
5930 cheaper_cost_with_cand (struct ivopts_data *data, struct iv_use *use,
5931 unsigned int cand_idx, struct iv_cand *old_cand,
5932 struct cost_pair *best_cp)
5933 {
5934 struct iv_cand *cand;
5935 struct cost_pair *cp;
5936
5937 gcc_assert (old_cand != NULL && best_cp != NULL);
5938 if (cand_idx == old_cand->id)
5939 return best_cp;
5940
5941 cand = iv_cand (data, cand_idx);
5942 cp = get_use_iv_cost (data, use, cand);
5943 if (cp != NULL && cheaper_cost_pair (cp, best_cp))
5944 return cp;
5945
5946 return best_cp;
5947 }
5948
5949 /* Try breaking local optimal fixed-point for IVS by replacing candidates
5950 which are used by more than one iv uses. For each of those candidates,
5951 this function tries to represent iv uses under that candidate using
5952 other ones with lower local cost, then tries to prune the new set.
5953 If the new set has lower cost, It returns the new cost after recording
5954 candidate replacement in list DELTA. */
5955
5956 static comp_cost
5957 iv_ca_replace (struct ivopts_data *data, struct iv_ca *ivs,
5958 struct iv_ca_delta **delta)
5959 {
5960 bitmap_iterator bi, bj;
5961 unsigned int i, j, k;
5962 struct iv_use *use;
5963 struct iv_cand *cand;
5964 comp_cost orig_cost, acost;
5965 struct iv_ca_delta *act_delta, *tmp_delta;
5966 struct cost_pair *old_cp, *best_cp = NULL;
5967
5968 *delta = NULL;
5969 orig_cost = iv_ca_cost (ivs);
5970
5971 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5972 {
5973 if (ivs->n_cand_uses[i] == 1
5974 || ivs->n_cand_uses[i] > ALWAYS_PRUNE_CAND_SET_BOUND)
5975 continue;
5976
5977 cand = iv_cand (data, i);
5978
5979 act_delta = NULL;
5980 /* Represent uses under current candidate using other ones with
5981 lower local cost. */
5982 for (j = 0; j < ivs->upto; j++)
5983 {
5984 use = iv_use (data, j);
5985 old_cp = iv_ca_cand_for_use (ivs, use);
5986
5987 if (old_cp->cand != cand)
5988 continue;
5989
5990 best_cp = old_cp;
5991 if (data->consider_all_candidates)
5992 for (k = 0; k < n_iv_cands (data); k++)
5993 best_cp = cheaper_cost_with_cand (data, use, k,
5994 old_cp->cand, best_cp);
5995 else
5996 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, k, bj)
5997 best_cp = cheaper_cost_with_cand (data, use, k,
5998 old_cp->cand, best_cp);
5999
6000 if (best_cp == old_cp)
6001 continue;
6002
6003 act_delta = iv_ca_delta_add (use, old_cp, best_cp, act_delta);
6004 }
6005 /* No need for further prune. */
6006 if (!act_delta)
6007 continue;
6008
6009 /* Prune the new candidate set. */
6010 iv_ca_delta_commit (data, ivs, act_delta, true);
6011 acost = iv_ca_prune (data, ivs, NULL, &tmp_delta);
6012 iv_ca_delta_commit (data, ivs, act_delta, false);
6013 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6014
6015 if (compare_costs (acost, orig_cost) < 0)
6016 {
6017 *delta = act_delta;
6018 return acost;
6019 }
6020 else
6021 iv_ca_delta_free (&act_delta);
6022 }
6023
6024 return orig_cost;
6025 }
6026
6027 /* Tries to extend the sets IVS in the best possible way in order
6028 to express the USE. If ORIGINALP is true, prefer candidates from
6029 the original set of IVs, otherwise favor important candidates not
6030 based on any memory object. */
6031
6032 static bool
6033 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
6034 struct iv_use *use, bool originalp)
6035 {
6036 comp_cost best_cost, act_cost;
6037 unsigned i;
6038 bitmap_iterator bi;
6039 struct iv_cand *cand;
6040 struct iv_ca_delta *best_delta = NULL, *act_delta;
6041 struct cost_pair *cp;
6042
6043 iv_ca_add_use (data, ivs, use);
6044 best_cost = iv_ca_cost (ivs);
6045 cp = iv_ca_cand_for_use (ivs, use);
6046 if (cp)
6047 {
6048 best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
6049 iv_ca_set_no_cp (data, ivs, use);
6050 }
6051
6052 /* If ORIGINALP is true, try to find the original IV for the use. Otherwise
6053 first try important candidates not based on any memory object. Only if
6054 this fails, try the specific ones. Rationale -- in loops with many
6055 variables the best choice often is to use just one generic biv. If we
6056 added here many ivs specific to the uses, the optimization algorithm later
6057 would be likely to get stuck in a local minimum, thus causing us to create
6058 too many ivs. The approach from few ivs to more seems more likely to be
6059 successful -- starting from few ivs, replacing an expensive use by a
6060 specific iv should always be a win. */
6061 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
6062 {
6063 cand = iv_cand (data, i);
6064
6065 if (originalp && cand->pos !=IP_ORIGINAL)
6066 continue;
6067
6068 if (!originalp && cand->iv->base_object != NULL_TREE)
6069 continue;
6070
6071 if (iv_ca_cand_used_p (ivs, cand))
6072 continue;
6073
6074 cp = get_use_iv_cost (data, use, cand);
6075 if (!cp)
6076 continue;
6077
6078 iv_ca_set_cp (data, ivs, use, cp);
6079 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
6080 true);
6081 iv_ca_set_no_cp (data, ivs, use);
6082 act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
6083
6084 if (compare_costs (act_cost, best_cost) < 0)
6085 {
6086 best_cost = act_cost;
6087
6088 iv_ca_delta_free (&best_delta);
6089 best_delta = act_delta;
6090 }
6091 else
6092 iv_ca_delta_free (&act_delta);
6093 }
6094
6095 if (infinite_cost_p (best_cost))
6096 {
6097 for (i = 0; i < use->n_map_members; i++)
6098 {
6099 cp = use->cost_map + i;
6100 cand = cp->cand;
6101 if (!cand)
6102 continue;
6103
6104 /* Already tried this. */
6105 if (cand->important)
6106 {
6107 if (originalp && cand->pos == IP_ORIGINAL)
6108 continue;
6109 if (!originalp && cand->iv->base_object == NULL_TREE)
6110 continue;
6111 }
6112
6113 if (iv_ca_cand_used_p (ivs, cand))
6114 continue;
6115
6116 act_delta = NULL;
6117 iv_ca_set_cp (data, ivs, use, cp);
6118 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
6119 iv_ca_set_no_cp (data, ivs, use);
6120 act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
6121 cp, act_delta);
6122
6123 if (compare_costs (act_cost, best_cost) < 0)
6124 {
6125 best_cost = act_cost;
6126
6127 if (best_delta)
6128 iv_ca_delta_free (&best_delta);
6129 best_delta = act_delta;
6130 }
6131 else
6132 iv_ca_delta_free (&act_delta);
6133 }
6134 }
6135
6136 iv_ca_delta_commit (data, ivs, best_delta, true);
6137 iv_ca_delta_free (&best_delta);
6138
6139 return !infinite_cost_p (best_cost);
6140 }
6141
6142 /* Finds an initial assignment of candidates to uses. */
6143
6144 static struct iv_ca *
6145 get_initial_solution (struct ivopts_data *data, bool originalp)
6146 {
6147 struct iv_ca *ivs = iv_ca_new (data);
6148 unsigned i;
6149
6150 for (i = 0; i < n_iv_uses (data); i++)
6151 if (!try_add_cand_for (data, ivs, iv_use (data, i), originalp))
6152 {
6153 iv_ca_free (&ivs);
6154 return NULL;
6155 }
6156
6157 return ivs;
6158 }
6159
6160 /* Tries to improve set of induction variables IVS. TRY_REPLACE_P
6161 points to a bool variable, this function tries to break local
6162 optimal fixed-point by replacing candidates in IVS if it's true. */
6163
6164 static bool
6165 try_improve_iv_set (struct ivopts_data *data,
6166 struct iv_ca *ivs, bool *try_replace_p)
6167 {
6168 unsigned i, n_ivs;
6169 comp_cost acost, best_cost = iv_ca_cost (ivs);
6170 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
6171 struct iv_cand *cand;
6172
6173 /* Try extending the set of induction variables by one. */
6174 for (i = 0; i < n_iv_cands (data); i++)
6175 {
6176 cand = iv_cand (data, i);
6177
6178 if (iv_ca_cand_used_p (ivs, cand))
6179 continue;
6180
6181 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
6182 if (!act_delta)
6183 continue;
6184
6185 /* If we successfully added the candidate and the set is small enough,
6186 try optimizing it by removing other candidates. */
6187 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
6188 {
6189 iv_ca_delta_commit (data, ivs, act_delta, true);
6190 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
6191 iv_ca_delta_commit (data, ivs, act_delta, false);
6192 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6193 }
6194
6195 if (compare_costs (acost, best_cost) < 0)
6196 {
6197 best_cost = acost;
6198 iv_ca_delta_free (&best_delta);
6199 best_delta = act_delta;
6200 }
6201 else
6202 iv_ca_delta_free (&act_delta);
6203 }
6204
6205 if (!best_delta)
6206 {
6207 /* Try removing the candidates from the set instead. */
6208 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
6209
6210 if (!best_delta && *try_replace_p)
6211 {
6212 *try_replace_p = false;
6213 /* So far candidate selecting algorithm tends to choose fewer IVs
6214 so that it can handle cases in which loops have many variables
6215 but the best choice is often to use only one general biv. One
6216 weakness is it can't handle opposite cases, in which different
6217 candidates should be chosen with respect to each use. To solve
6218 the problem, we replace candidates in a manner described by the
6219 comments of iv_ca_replace, thus give general algorithm a chance
6220 to break local optimal fixed-point in these cases. */
6221 best_cost = iv_ca_replace (data, ivs, &best_delta);
6222 }
6223
6224 if (!best_delta)
6225 return false;
6226 }
6227
6228 iv_ca_delta_commit (data, ivs, best_delta, true);
6229 gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
6230 iv_ca_delta_free (&best_delta);
6231 return true;
6232 }
6233
6234 /* Attempts to find the optimal set of induction variables. We do simple
6235 greedy heuristic -- we try to replace at most one candidate in the selected
6236 solution and remove the unused ivs while this improves the cost. */
6237
6238 static struct iv_ca *
6239 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
6240 {
6241 struct iv_ca *set;
6242 bool try_replace_p = true;
6243
6244 /* Get the initial solution. */
6245 set = get_initial_solution (data, originalp);
6246 if (!set)
6247 {
6248 if (dump_file && (dump_flags & TDF_DETAILS))
6249 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
6250 return NULL;
6251 }
6252
6253 if (dump_file && (dump_flags & TDF_DETAILS))
6254 {
6255 fprintf (dump_file, "Initial set of candidates:\n");
6256 iv_ca_dump (data, dump_file, set);
6257 }
6258
6259 while (try_improve_iv_set (data, set, &try_replace_p))
6260 {
6261 if (dump_file && (dump_flags & TDF_DETAILS))
6262 {
6263 fprintf (dump_file, "Improved to:\n");
6264 iv_ca_dump (data, dump_file, set);
6265 }
6266 }
6267
6268 return set;
6269 }
6270
6271 static struct iv_ca *
6272 find_optimal_iv_set (struct ivopts_data *data)
6273 {
6274 unsigned i;
6275 struct iv_ca *set, *origset;
6276 struct iv_use *use;
6277 comp_cost cost, origcost;
6278
6279 /* Determine the cost based on a strategy that starts with original IVs,
6280 and try again using a strategy that prefers candidates not based
6281 on any IVs. */
6282 origset = find_optimal_iv_set_1 (data, true);
6283 set = find_optimal_iv_set_1 (data, false);
6284
6285 if (!origset && !set)
6286 return NULL;
6287
6288 origcost = origset ? iv_ca_cost (origset) : infinite_cost;
6289 cost = set ? iv_ca_cost (set) : infinite_cost;
6290
6291 if (dump_file && (dump_flags & TDF_DETAILS))
6292 {
6293 fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
6294 origcost.cost, origcost.complexity);
6295 fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
6296 cost.cost, cost.complexity);
6297 }
6298
6299 /* Choose the one with the best cost. */
6300 if (compare_costs (origcost, cost) <= 0)
6301 {
6302 if (set)
6303 iv_ca_free (&set);
6304 set = origset;
6305 }
6306 else if (origset)
6307 iv_ca_free (&origset);
6308
6309 for (i = 0; i < n_iv_uses (data); i++)
6310 {
6311 use = iv_use (data, i);
6312 use->selected = iv_ca_cand_for_use (set, use)->cand;
6313 }
6314
6315 return set;
6316 }
6317
6318 /* Creates a new induction variable corresponding to CAND. */
6319
6320 static void
6321 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
6322 {
6323 gimple_stmt_iterator incr_pos;
6324 tree base;
6325 bool after = false;
6326
6327 if (!cand->iv)
6328 return;
6329
6330 switch (cand->pos)
6331 {
6332 case IP_NORMAL:
6333 incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
6334 break;
6335
6336 case IP_END:
6337 incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
6338 after = true;
6339 break;
6340
6341 case IP_AFTER_USE:
6342 after = true;
6343 /* fall through */
6344 case IP_BEFORE_USE:
6345 incr_pos = gsi_for_stmt (cand->incremented_at);
6346 break;
6347
6348 case IP_ORIGINAL:
6349 /* Mark that the iv is preserved. */
6350 name_info (data, cand->var_before)->preserve_biv = true;
6351 name_info (data, cand->var_after)->preserve_biv = true;
6352
6353 /* Rewrite the increment so that it uses var_before directly. */
6354 find_interesting_uses_op (data, cand->var_after)->selected = cand;
6355 return;
6356 }
6357
6358 gimple_add_tmp_var (cand->var_before);
6359
6360 base = unshare_expr (cand->iv->base);
6361
6362 create_iv (base, unshare_expr (cand->iv->step),
6363 cand->var_before, data->current_loop,
6364 &incr_pos, after, &cand->var_before, &cand->var_after);
6365 }
6366
6367 /* Creates new induction variables described in SET. */
6368
6369 static void
6370 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
6371 {
6372 unsigned i;
6373 struct iv_cand *cand;
6374 bitmap_iterator bi;
6375
6376 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6377 {
6378 cand = iv_cand (data, i);
6379 create_new_iv (data, cand);
6380 }
6381
6382 if (dump_file && (dump_flags & TDF_DETAILS))
6383 {
6384 fprintf (dump_file, "Selected IV set for loop %d",
6385 data->current_loop->num);
6386 if (data->loop_loc != UNKNOWN_LOCATION)
6387 fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
6388 LOCATION_LINE (data->loop_loc));
6389 fprintf (dump_file, ", %lu IVs:\n", bitmap_count_bits (set->cands));
6390 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6391 {
6392 cand = iv_cand (data, i);
6393 dump_cand (dump_file, cand);
6394 }
6395 fprintf (dump_file, "\n");
6396 }
6397 }
6398
6399 /* Rewrites USE (definition of iv used in a nonlinear expression)
6400 using candidate CAND. */
6401
6402 static void
6403 rewrite_use_nonlinear_expr (struct ivopts_data *data,
6404 struct iv_use *use, struct iv_cand *cand)
6405 {
6406 tree comp;
6407 tree op, tgt;
6408 gassign *ass;
6409 gimple_stmt_iterator bsi;
6410
6411 /* An important special case -- if we are asked to express value of
6412 the original iv by itself, just exit; there is no need to
6413 introduce a new computation (that might also need casting the
6414 variable to unsigned and back). */
6415 if (cand->pos == IP_ORIGINAL
6416 && cand->incremented_at == use->stmt)
6417 {
6418 enum tree_code stmt_code;
6419
6420 gcc_assert (is_gimple_assign (use->stmt));
6421 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
6422
6423 /* Check whether we may leave the computation unchanged.
6424 This is the case only if it does not rely on other
6425 computations in the loop -- otherwise, the computation
6426 we rely upon may be removed in remove_unused_ivs,
6427 thus leading to ICE. */
6428 stmt_code = gimple_assign_rhs_code (use->stmt);
6429 if (stmt_code == PLUS_EXPR
6430 || stmt_code == MINUS_EXPR
6431 || stmt_code == POINTER_PLUS_EXPR)
6432 {
6433 if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
6434 op = gimple_assign_rhs2 (use->stmt);
6435 else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
6436 op = gimple_assign_rhs1 (use->stmt);
6437 else
6438 op = NULL_TREE;
6439 }
6440 else
6441 op = NULL_TREE;
6442
6443 if (op && expr_invariant_in_loop_p (data->current_loop, op))
6444 return;
6445 }
6446
6447 comp = get_computation (data->current_loop, use, cand);
6448 gcc_assert (comp != NULL_TREE);
6449
6450 switch (gimple_code (use->stmt))
6451 {
6452 case GIMPLE_PHI:
6453 tgt = PHI_RESULT (use->stmt);
6454
6455 /* If we should keep the biv, do not replace it. */
6456 if (name_info (data, tgt)->preserve_biv)
6457 return;
6458
6459 bsi = gsi_after_labels (gimple_bb (use->stmt));
6460 break;
6461
6462 case GIMPLE_ASSIGN:
6463 tgt = gimple_assign_lhs (use->stmt);
6464 bsi = gsi_for_stmt (use->stmt);
6465 break;
6466
6467 default:
6468 gcc_unreachable ();
6469 }
6470
6471 if (!valid_gimple_rhs_p (comp)
6472 || (gimple_code (use->stmt) != GIMPLE_PHI
6473 /* We can't allow re-allocating the stmt as it might be pointed
6474 to still. */
6475 && (get_gimple_rhs_num_ops (TREE_CODE (comp))
6476 >= gimple_num_ops (gsi_stmt (bsi)))))
6477 {
6478 comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE,
6479 true, GSI_SAME_STMT);
6480 if (POINTER_TYPE_P (TREE_TYPE (tgt)))
6481 {
6482 duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
6483 /* As this isn't a plain copy we have to reset alignment
6484 information. */
6485 if (SSA_NAME_PTR_INFO (comp))
6486 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
6487 }
6488 }
6489
6490 if (gimple_code (use->stmt) == GIMPLE_PHI)
6491 {
6492 ass = gimple_build_assign (tgt, comp);
6493 gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
6494
6495 bsi = gsi_for_stmt (use->stmt);
6496 remove_phi_node (&bsi, false);
6497 }
6498 else
6499 {
6500 gimple_assign_set_rhs_from_tree (&bsi, comp);
6501 use->stmt = gsi_stmt (bsi);
6502 }
6503 }
6504
6505 /* Performs a peephole optimization to reorder the iv update statement with
6506 a mem ref to enable instruction combining in later phases. The mem ref uses
6507 the iv value before the update, so the reordering transformation requires
6508 adjustment of the offset. CAND is the selected IV_CAND.
6509
6510 Example:
6511
6512 t = MEM_REF (base, iv1, 8, 16); // base, index, stride, offset
6513 iv2 = iv1 + 1;
6514
6515 if (t < val) (1)
6516 goto L;
6517 goto Head;
6518
6519
6520 directly propagating t over to (1) will introduce overlapping live range
6521 thus increase register pressure. This peephole transform it into:
6522
6523
6524 iv2 = iv1 + 1;
6525 t = MEM_REF (base, iv2, 8, 8);
6526 if (t < val)
6527 goto L;
6528 goto Head;
6529 */
6530
6531 static void
6532 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
6533 {
6534 tree var_after;
6535 gimple iv_update, stmt;
6536 basic_block bb;
6537 gimple_stmt_iterator gsi, gsi_iv;
6538
6539 if (cand->pos != IP_NORMAL)
6540 return;
6541
6542 var_after = cand->var_after;
6543 iv_update = SSA_NAME_DEF_STMT (var_after);
6544
6545 bb = gimple_bb (iv_update);
6546 gsi = gsi_last_nondebug_bb (bb);
6547 stmt = gsi_stmt (gsi);
6548
6549 /* Only handle conditional statement for now. */
6550 if (gimple_code (stmt) != GIMPLE_COND)
6551 return;
6552
6553 gsi_prev_nondebug (&gsi);
6554 stmt = gsi_stmt (gsi);
6555 if (stmt != iv_update)
6556 return;
6557
6558 gsi_prev_nondebug (&gsi);
6559 if (gsi_end_p (gsi))
6560 return;
6561
6562 stmt = gsi_stmt (gsi);
6563 if (gimple_code (stmt) != GIMPLE_ASSIGN)
6564 return;
6565
6566 if (stmt != use->stmt)
6567 return;
6568
6569 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
6570 return;
6571
6572 if (dump_file && (dump_flags & TDF_DETAILS))
6573 {
6574 fprintf (dump_file, "Reordering \n");
6575 print_gimple_stmt (dump_file, iv_update, 0, 0);
6576 print_gimple_stmt (dump_file, use->stmt, 0, 0);
6577 fprintf (dump_file, "\n");
6578 }
6579
6580 gsi = gsi_for_stmt (use->stmt);
6581 gsi_iv = gsi_for_stmt (iv_update);
6582 gsi_move_before (&gsi_iv, &gsi);
6583
6584 cand->pos = IP_BEFORE_USE;
6585 cand->incremented_at = use->stmt;
6586 }
6587
6588 /* Rewrites USE (address that is an iv) using candidate CAND. */
6589
6590 static void
6591 rewrite_use_address (struct ivopts_data *data,
6592 struct iv_use *use, struct iv_cand *cand)
6593 {
6594 aff_tree aff;
6595 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6596 tree base_hint = NULL_TREE;
6597 tree ref, iv;
6598 bool ok;
6599
6600 adjust_iv_update_pos (cand, use);
6601 ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
6602 gcc_assert (ok);
6603 unshare_aff_combination (&aff);
6604
6605 /* To avoid undefined overflow problems, all IV candidates use unsigned
6606 integer types. The drawback is that this makes it impossible for
6607 create_mem_ref to distinguish an IV that is based on a memory object
6608 from one that represents simply an offset.
6609
6610 To work around this problem, we pass a hint to create_mem_ref that
6611 indicates which variable (if any) in aff is an IV based on a memory
6612 object. Note that we only consider the candidate. If this is not
6613 based on an object, the base of the reference is in some subexpression
6614 of the use -- but these will use pointer types, so they are recognized
6615 by the create_mem_ref heuristics anyway. */
6616 if (cand->iv->base_object)
6617 base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
6618
6619 iv = var_at_stmt (data->current_loop, cand, use->stmt);
6620 ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff,
6621 reference_alias_ptr_type (*use->op_p),
6622 iv, base_hint, data->speed);
6623 copy_ref_info (ref, *use->op_p);
6624 *use->op_p = ref;
6625 }
6626
6627 /* Rewrites USE (the condition such that one of the arguments is an iv) using
6628 candidate CAND. */
6629
6630 static void
6631 rewrite_use_compare (struct ivopts_data *data,
6632 struct iv_use *use, struct iv_cand *cand)
6633 {
6634 tree comp, *var_p, op, bound;
6635 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6636 enum tree_code compare;
6637 struct cost_pair *cp = get_use_iv_cost (data, use, cand);
6638 bool ok;
6639
6640 bound = cp->value;
6641 if (bound)
6642 {
6643 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
6644 tree var_type = TREE_TYPE (var);
6645 gimple_seq stmts;
6646
6647 if (dump_file && (dump_flags & TDF_DETAILS))
6648 {
6649 fprintf (dump_file, "Replacing exit test: ");
6650 print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
6651 }
6652 compare = cp->comp;
6653 bound = unshare_expr (fold_convert (var_type, bound));
6654 op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
6655 if (stmts)
6656 gsi_insert_seq_on_edge_immediate (
6657 loop_preheader_edge (data->current_loop),
6658 stmts);
6659
6660 gcond *cond_stmt = as_a <gcond *> (use->stmt);
6661 gimple_cond_set_lhs (cond_stmt, var);
6662 gimple_cond_set_code (cond_stmt, compare);
6663 gimple_cond_set_rhs (cond_stmt, op);
6664 return;
6665 }
6666
6667 /* The induction variable elimination failed; just express the original
6668 giv. */
6669 comp = get_computation (data->current_loop, use, cand);
6670 gcc_assert (comp != NULL_TREE);
6671
6672 ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
6673 gcc_assert (ok);
6674
6675 *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
6676 true, GSI_SAME_STMT);
6677 }
6678
6679 /* Rewrites USE using candidate CAND. */
6680
6681 static void
6682 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
6683 {
6684 switch (use->type)
6685 {
6686 case USE_NONLINEAR_EXPR:
6687 rewrite_use_nonlinear_expr (data, use, cand);
6688 break;
6689
6690 case USE_ADDRESS:
6691 rewrite_use_address (data, use, cand);
6692 break;
6693
6694 case USE_COMPARE:
6695 rewrite_use_compare (data, use, cand);
6696 break;
6697
6698 default:
6699 gcc_unreachable ();
6700 }
6701
6702 update_stmt (use->stmt);
6703 }
6704
6705 /* Rewrite the uses using the selected induction variables. */
6706
6707 static void
6708 rewrite_uses (struct ivopts_data *data)
6709 {
6710 unsigned i;
6711 struct iv_cand *cand;
6712 struct iv_use *use;
6713
6714 for (i = 0; i < n_iv_uses (data); i++)
6715 {
6716 use = iv_use (data, i);
6717 cand = use->selected;
6718 gcc_assert (cand);
6719
6720 rewrite_use (data, use, cand);
6721 }
6722 }
6723
6724 /* Removes the ivs that are not used after rewriting. */
6725
6726 static void
6727 remove_unused_ivs (struct ivopts_data *data)
6728 {
6729 unsigned j;
6730 bitmap_iterator bi;
6731 bitmap toremove = BITMAP_ALLOC (NULL);
6732
6733 /* Figure out an order in which to release SSA DEFs so that we don't
6734 release something that we'd have to propagate into a debug stmt
6735 afterwards. */
6736 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
6737 {
6738 struct version_info *info;
6739
6740 info = ver_info (data, j);
6741 if (info->iv
6742 && !integer_zerop (info->iv->step)
6743 && !info->inv_id
6744 && !info->iv->have_use_for
6745 && !info->preserve_biv)
6746 {
6747 bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
6748
6749 tree def = info->iv->ssa_name;
6750
6751 if (MAY_HAVE_DEBUG_STMTS && SSA_NAME_DEF_STMT (def))
6752 {
6753 imm_use_iterator imm_iter;
6754 use_operand_p use_p;
6755 gimple stmt;
6756 int count = 0;
6757
6758 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
6759 {
6760 if (!gimple_debug_bind_p (stmt))
6761 continue;
6762
6763 /* We just want to determine whether to do nothing
6764 (count == 0), to substitute the computed
6765 expression into a single use of the SSA DEF by
6766 itself (count == 1), or to use a debug temp
6767 because the SSA DEF is used multiple times or as
6768 part of a larger expression (count > 1). */
6769 count++;
6770 if (gimple_debug_bind_get_value (stmt) != def)
6771 count++;
6772
6773 if (count > 1)
6774 BREAK_FROM_IMM_USE_STMT (imm_iter);
6775 }
6776
6777 if (!count)
6778 continue;
6779
6780 struct iv_use dummy_use;
6781 struct iv_cand *best_cand = NULL, *cand;
6782 unsigned i, best_pref = 0, cand_pref;
6783
6784 memset (&dummy_use, 0, sizeof (dummy_use));
6785 dummy_use.iv = info->iv;
6786 for (i = 0; i < n_iv_uses (data) && i < 64; i++)
6787 {
6788 cand = iv_use (data, i)->selected;
6789 if (cand == best_cand)
6790 continue;
6791 cand_pref = operand_equal_p (cand->iv->step,
6792 info->iv->step, 0)
6793 ? 4 : 0;
6794 cand_pref
6795 += TYPE_MODE (TREE_TYPE (cand->iv->base))
6796 == TYPE_MODE (TREE_TYPE (info->iv->base))
6797 ? 2 : 0;
6798 cand_pref
6799 += TREE_CODE (cand->iv->base) == INTEGER_CST
6800 ? 1 : 0;
6801 if (best_cand == NULL || best_pref < cand_pref)
6802 {
6803 best_cand = cand;
6804 best_pref = cand_pref;
6805 }
6806 }
6807
6808 if (!best_cand)
6809 continue;
6810
6811 tree comp = get_computation_at (data->current_loop,
6812 &dummy_use, best_cand,
6813 SSA_NAME_DEF_STMT (def));
6814 if (!comp)
6815 continue;
6816
6817 if (count > 1)
6818 {
6819 tree vexpr = make_node (DEBUG_EXPR_DECL);
6820 DECL_ARTIFICIAL (vexpr) = 1;
6821 TREE_TYPE (vexpr) = TREE_TYPE (comp);
6822 if (SSA_NAME_VAR (def))
6823 DECL_MODE (vexpr) = DECL_MODE (SSA_NAME_VAR (def));
6824 else
6825 DECL_MODE (vexpr) = TYPE_MODE (TREE_TYPE (vexpr));
6826 gdebug *def_temp
6827 = gimple_build_debug_bind (vexpr, comp, NULL);
6828 gimple_stmt_iterator gsi;
6829
6830 if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
6831 gsi = gsi_after_labels (gimple_bb
6832 (SSA_NAME_DEF_STMT (def)));
6833 else
6834 gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
6835
6836 gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
6837 comp = vexpr;
6838 }
6839
6840 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
6841 {
6842 if (!gimple_debug_bind_p (stmt))
6843 continue;
6844
6845 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
6846 SET_USE (use_p, comp);
6847
6848 update_stmt (stmt);
6849 }
6850 }
6851 }
6852 }
6853
6854 release_defs_bitset (toremove);
6855
6856 BITMAP_FREE (toremove);
6857 }
6858
6859 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
6860 for hash_map::traverse. */
6861
6862 bool
6863 free_tree_niter_desc (edge const &, tree_niter_desc *const &value, void *)
6864 {
6865 free (value);
6866 return true;
6867 }
6868
6869 /* Frees data allocated by the optimization of a single loop. */
6870
6871 static void
6872 free_loop_data (struct ivopts_data *data)
6873 {
6874 unsigned i, j;
6875 bitmap_iterator bi;
6876 tree obj;
6877
6878 if (data->niters)
6879 {
6880 data->niters->traverse<void *, free_tree_niter_desc> (NULL);
6881 delete data->niters;
6882 data->niters = NULL;
6883 }
6884
6885 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
6886 {
6887 struct version_info *info;
6888
6889 info = ver_info (data, i);
6890 free (info->iv);
6891 info->iv = NULL;
6892 info->has_nonlin_use = false;
6893 info->preserve_biv = false;
6894 info->inv_id = 0;
6895 }
6896 bitmap_clear (data->relevant);
6897 bitmap_clear (data->important_candidates);
6898
6899 for (i = 0; i < n_iv_uses (data); i++)
6900 {
6901 struct iv_use *use = iv_use (data, i);
6902
6903 free (use->iv);
6904 BITMAP_FREE (use->related_cands);
6905 for (j = 0; j < use->n_map_members; j++)
6906 if (use->cost_map[j].depends_on)
6907 BITMAP_FREE (use->cost_map[j].depends_on);
6908 free (use->cost_map);
6909 free (use);
6910 }
6911 data->iv_uses.truncate (0);
6912
6913 for (i = 0; i < n_iv_cands (data); i++)
6914 {
6915 struct iv_cand *cand = iv_cand (data, i);
6916
6917 free (cand->iv);
6918 if (cand->depends_on)
6919 BITMAP_FREE (cand->depends_on);
6920 free (cand);
6921 }
6922 data->iv_candidates.truncate (0);
6923
6924 if (data->version_info_size < num_ssa_names)
6925 {
6926 data->version_info_size = 2 * num_ssa_names;
6927 free (data->version_info);
6928 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
6929 }
6930
6931 data->max_inv_id = 0;
6932
6933 FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
6934 SET_DECL_RTL (obj, NULL_RTX);
6935
6936 decl_rtl_to_reset.truncate (0);
6937
6938 data->inv_expr_tab->empty ();
6939 data->inv_expr_id = 0;
6940 }
6941
6942 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
6943 loop tree. */
6944
6945 static void
6946 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
6947 {
6948 free_loop_data (data);
6949 free (data->version_info);
6950 BITMAP_FREE (data->relevant);
6951 BITMAP_FREE (data->important_candidates);
6952
6953 decl_rtl_to_reset.release ();
6954 data->iv_uses.release ();
6955 data->iv_candidates.release ();
6956 delete data->inv_expr_tab;
6957 data->inv_expr_tab = NULL;
6958 free_affine_expand_cache (&data->name_expansion_cache);
6959 }
6960
6961 /* Returns true if the loop body BODY includes any function calls. */
6962
6963 static bool
6964 loop_body_includes_call (basic_block *body, unsigned num_nodes)
6965 {
6966 gimple_stmt_iterator gsi;
6967 unsigned i;
6968
6969 for (i = 0; i < num_nodes; i++)
6970 for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
6971 {
6972 gimple stmt = gsi_stmt (gsi);
6973 if (is_gimple_call (stmt)
6974 && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
6975 return true;
6976 }
6977 return false;
6978 }
6979
6980 /* Optimizes the LOOP. Returns true if anything changed. */
6981
6982 static bool
6983 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
6984 {
6985 bool changed = false;
6986 struct iv_ca *iv_ca;
6987 edge exit = single_dom_exit (loop);
6988 basic_block *body;
6989
6990 gcc_assert (!data->niters);
6991 data->current_loop = loop;
6992 data->loop_loc = find_loop_location (loop);
6993 data->speed = optimize_loop_for_speed_p (loop);
6994
6995 if (dump_file && (dump_flags & TDF_DETAILS))
6996 {
6997 fprintf (dump_file, "Processing loop %d", loop->num);
6998 if (data->loop_loc != UNKNOWN_LOCATION)
6999 fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
7000 LOCATION_LINE (data->loop_loc));
7001 fprintf (dump_file, "\n");
7002
7003 if (exit)
7004 {
7005 fprintf (dump_file, " single exit %d -> %d, exit condition ",
7006 exit->src->index, exit->dest->index);
7007 print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
7008 fprintf (dump_file, "\n");
7009 }
7010
7011 fprintf (dump_file, "\n");
7012 }
7013
7014 body = get_loop_body (loop);
7015 data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
7016 renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
7017 free (body);
7018
7019 data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
7020
7021 /* For each ssa name determines whether it behaves as an induction variable
7022 in some loop. */
7023 if (!find_induction_variables (data))
7024 goto finish;
7025
7026 /* Finds interesting uses (item 1). */
7027 find_interesting_uses (data);
7028 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
7029 goto finish;
7030
7031 /* Finds candidates for the induction variables (item 2). */
7032 find_iv_candidates (data);
7033
7034 /* Calculates the costs (item 3, part 1). */
7035 determine_iv_costs (data);
7036 determine_use_iv_costs (data);
7037 determine_set_costs (data);
7038
7039 /* Find the optimal set of induction variables (item 3, part 2). */
7040 iv_ca = find_optimal_iv_set (data);
7041 if (!iv_ca)
7042 goto finish;
7043 changed = true;
7044
7045 /* Create the new induction variables (item 4, part 1). */
7046 create_new_ivs (data, iv_ca);
7047 iv_ca_free (&iv_ca);
7048
7049 /* Rewrite the uses (item 4, part 2). */
7050 rewrite_uses (data);
7051
7052 /* Remove the ivs that are unused after rewriting. */
7053 remove_unused_ivs (data);
7054
7055 /* We have changed the structure of induction variables; it might happen
7056 that definitions in the scev database refer to some of them that were
7057 eliminated. */
7058 scev_reset ();
7059
7060 finish:
7061 free_loop_data (data);
7062
7063 return changed;
7064 }
7065
7066 /* Main entry point. Optimizes induction variables in loops. */
7067
7068 void
7069 tree_ssa_iv_optimize (void)
7070 {
7071 struct loop *loop;
7072 struct ivopts_data data;
7073
7074 tree_ssa_iv_optimize_init (&data);
7075
7076 /* Optimize the loops starting with the innermost ones. */
7077 FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
7078 {
7079 if (dump_file && (dump_flags & TDF_DETAILS))
7080 flow_loop_dump (loop, dump_file, NULL, 1);
7081
7082 tree_ssa_iv_optimize_loop (&data, loop);
7083 }
7084
7085 tree_ssa_iv_optimize_finalize (&data);
7086 }