]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/tree-ssa-loop-ivopts.c
e1ab00c4827f8b7d8884d30c2822d235f11056f9
[thirdparty/gcc.git] / gcc / tree-ssa-loop-ivopts.c
1 /* Induction variable optimizations.
2 Copyright (C) 2003-2014 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
10
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
19
20 /* This pass tries to find the optimal set of induction variables for the loop.
21 It optimizes just the basic linear induction variables (although adding
22 support for other types should not be too hard). It includes the
23 optimizations commonly known as strength reduction, induction variable
24 coalescing and induction variable elimination. It does it in the
25 following steps:
26
27 1) The interesting uses of induction variables are found. This includes
28
29 -- uses of induction variables in non-linear expressions
30 -- addresses of arrays
31 -- comparisons of induction variables
32
33 2) Candidates for the induction variables are found. This includes
34
35 -- old induction variables
36 -- the variables defined by expressions derived from the "interesting
37 uses" above
38
39 3) The optimal (w.r. to a cost function) set of variables is chosen. The
40 cost function assigns a cost to sets of induction variables and consists
41 of three parts:
42
43 -- The use costs. Each of the interesting uses chooses the best induction
44 variable in the set and adds its cost to the sum. The cost reflects
45 the time spent on modifying the induction variables value to be usable
46 for the given purpose (adding base and offset for arrays, etc.).
47 -- The variable costs. Each of the variables has a cost assigned that
48 reflects the costs associated with incrementing the value of the
49 variable. The original variables are somewhat preferred.
50 -- The set cost. Depending on the size of the set, extra cost may be
51 added to reflect register pressure.
52
53 All the costs are defined in a machine-specific way, using the target
54 hooks and machine descriptions to determine them.
55
56 4) The trees are transformed to use the new variables, the dead code is
57 removed.
58
59 All of this is done loop by loop. Doing it globally is theoretically
60 possible, it might give a better performance and it might enable us
61 to decide costs more precisely, but getting all the interactions right
62 would be complicated. */
63
64 #include "config.h"
65 #include "system.h"
66 #include "coretypes.h"
67 #include "tm.h"
68 #include "tree.h"
69 #include "stor-layout.h"
70 #include "tm_p.h"
71 #include "basic-block.h"
72 #include "gimple-pretty-print.h"
73 #include "hash-map.h"
74 #include "hash-table.h"
75 #include "tree-ssa-alias.h"
76 #include "internal-fn.h"
77 #include "tree-eh.h"
78 #include "gimple-expr.h"
79 #include "is-a.h"
80 #include "gimple.h"
81 #include "gimplify.h"
82 #include "gimple-iterator.h"
83 #include "gimplify-me.h"
84 #include "gimple-ssa.h"
85 #include "cgraph.h"
86 #include "tree-cfg.h"
87 #include "tree-phinodes.h"
88 #include "ssa-iterators.h"
89 #include "stringpool.h"
90 #include "tree-ssanames.h"
91 #include "tree-ssa-loop-ivopts.h"
92 #include "tree-ssa-loop-manip.h"
93 #include "tree-ssa-loop-niter.h"
94 #include "tree-ssa-loop.h"
95 #include "expr.h"
96 #include "tree-dfa.h"
97 #include "tree-ssa.h"
98 #include "cfgloop.h"
99 #include "tree-pass.h"
100 #include "insn-config.h"
101 #include "tree-chrec.h"
102 #include "tree-scalar-evolution.h"
103 #include "cfgloop.h"
104 #include "params.h"
105 #include "langhooks.h"
106 #include "tree-affine.h"
107 #include "target.h"
108 #include "tree-inline.h"
109 #include "tree-ssa-propagate.h"
110 #include "expmed.h"
111 #include "tree-ssa-address.h"
112 #include "builtins.h"
113
114 /* FIXME: Expressions are expanded to RTL in this pass to determine the
115 cost of different addressing modes. This should be moved to a TBD
116 interface between the GIMPLE and RTL worlds. */
117 #include "expr.h"
118 #include "recog.h"
119
120 /* The infinite cost. */
121 #define INFTY 10000000
122
123 #define AVG_LOOP_NITER(LOOP) 5
124
125 /* Returns the expected number of loop iterations for LOOP.
126 The average trip count is computed from profile data if it
127 exists. */
128
129 static inline HOST_WIDE_INT
130 avg_loop_niter (struct loop *loop)
131 {
132 HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
133 if (niter == -1)
134 return AVG_LOOP_NITER (loop);
135
136 return niter;
137 }
138
139 /* Representation of the induction variable. */
140 struct iv
141 {
142 tree base; /* Initial value of the iv. */
143 tree base_object; /* A memory object to that the induction variable points. */
144 tree step; /* Step of the iv (constant only). */
145 tree ssa_name; /* The ssa name with the value. */
146 bool biv_p; /* Is it a biv? */
147 bool have_use_for; /* Do we already have a use for it? */
148 unsigned use_id; /* The identifier in the use if it is the case. */
149 };
150
151 /* Per-ssa version information (induction variable descriptions, etc.). */
152 struct version_info
153 {
154 tree name; /* The ssa name. */
155 struct iv *iv; /* Induction variable description. */
156 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
157 an expression that is not an induction variable. */
158 bool preserve_biv; /* For the original biv, whether to preserve it. */
159 unsigned inv_id; /* Id of an invariant. */
160 };
161
162 /* Types of uses. */
163 enum use_type
164 {
165 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
166 USE_ADDRESS, /* Use in an address. */
167 USE_COMPARE /* Use is a compare. */
168 };
169
170 /* Cost of a computation. */
171 typedef struct
172 {
173 int cost; /* The runtime cost. */
174 unsigned complexity; /* The estimate of the complexity of the code for
175 the computation (in no concrete units --
176 complexity field should be larger for more
177 complex expressions and addressing modes). */
178 } comp_cost;
179
180 static const comp_cost no_cost = {0, 0};
181 static const comp_cost infinite_cost = {INFTY, INFTY};
182
183 /* The candidate - cost pair. */
184 struct cost_pair
185 {
186 struct iv_cand *cand; /* The candidate. */
187 comp_cost cost; /* The cost. */
188 bitmap depends_on; /* The list of invariants that have to be
189 preserved. */
190 tree value; /* For final value elimination, the expression for
191 the final value of the iv. For iv elimination,
192 the new bound to compare with. */
193 enum tree_code comp; /* For iv elimination, the comparison. */
194 int inv_expr_id; /* Loop invariant expression id. */
195 };
196
197 /* Use. */
198 struct iv_use
199 {
200 unsigned id; /* The id of the use. */
201 enum use_type type; /* Type of the use. */
202 struct iv *iv; /* The induction variable it is based on. */
203 gimple stmt; /* Statement in that it occurs. */
204 tree *op_p; /* The place where it occurs. */
205 bitmap related_cands; /* The set of "related" iv candidates, plus the common
206 important ones. */
207
208 unsigned n_map_members; /* Number of candidates in the cost_map list. */
209 struct cost_pair *cost_map;
210 /* The costs wrto the iv candidates. */
211
212 struct iv_cand *selected;
213 /* The selected candidate. */
214 };
215
216 /* The position where the iv is computed. */
217 enum iv_position
218 {
219 IP_NORMAL, /* At the end, just before the exit condition. */
220 IP_END, /* At the end of the latch block. */
221 IP_BEFORE_USE, /* Immediately before a specific use. */
222 IP_AFTER_USE, /* Immediately after a specific use. */
223 IP_ORIGINAL /* The original biv. */
224 };
225
226 /* The induction variable candidate. */
227 struct iv_cand
228 {
229 unsigned id; /* The number of the candidate. */
230 bool important; /* Whether this is an "important" candidate, i.e. such
231 that it should be considered by all uses. */
232 ENUM_BITFIELD(iv_position) pos : 8; /* Where it is computed. */
233 gimple incremented_at;/* For original biv, the statement where it is
234 incremented. */
235 tree var_before; /* The variable used for it before increment. */
236 tree var_after; /* The variable used for it after increment. */
237 struct iv *iv; /* The value of the candidate. NULL for
238 "pseudocandidate" used to indicate the possibility
239 to replace the final value of an iv by direct
240 computation of the value. */
241 unsigned cost; /* Cost of the candidate. */
242 unsigned cost_step; /* Cost of the candidate's increment operation. */
243 struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
244 where it is incremented. */
245 bitmap depends_on; /* The list of invariants that are used in step of the
246 biv. */
247 };
248
249 /* Loop invariant expression hashtable entry. */
250 struct iv_inv_expr_ent
251 {
252 tree expr;
253 int id;
254 hashval_t hash;
255 };
256
257 /* The data used by the induction variable optimizations. */
258
259 typedef struct iv_use *iv_use_p;
260
261 typedef struct iv_cand *iv_cand_p;
262
263 /* Hashtable helpers. */
264
265 struct iv_inv_expr_hasher : typed_free_remove <iv_inv_expr_ent>
266 {
267 typedef iv_inv_expr_ent value_type;
268 typedef iv_inv_expr_ent compare_type;
269 static inline hashval_t hash (const value_type *);
270 static inline bool equal (const value_type *, const compare_type *);
271 };
272
273 /* Hash function for loop invariant expressions. */
274
275 inline hashval_t
276 iv_inv_expr_hasher::hash (const value_type *expr)
277 {
278 return expr->hash;
279 }
280
281 /* Hash table equality function for expressions. */
282
283 inline bool
284 iv_inv_expr_hasher::equal (const value_type *expr1, const compare_type *expr2)
285 {
286 return expr1->hash == expr2->hash
287 && operand_equal_p (expr1->expr, expr2->expr, 0);
288 }
289
290 struct ivopts_data
291 {
292 /* The currently optimized loop. */
293 struct loop *current_loop;
294
295 /* Numbers of iterations for all exits of the current loop. */
296 hash_map<edge, tree_niter_desc *> *niters;
297
298 /* Number of registers used in it. */
299 unsigned regs_used;
300
301 /* The size of version_info array allocated. */
302 unsigned version_info_size;
303
304 /* The array of information for the ssa names. */
305 struct version_info *version_info;
306
307 /* The hashtable of loop invariant expressions created
308 by ivopt. */
309 hash_table<iv_inv_expr_hasher> *inv_expr_tab;
310
311 /* Loop invariant expression id. */
312 int inv_expr_id;
313
314 /* The bitmap of indices in version_info whose value was changed. */
315 bitmap relevant;
316
317 /* The uses of induction variables. */
318 vec<iv_use_p> iv_uses;
319
320 /* The candidates. */
321 vec<iv_cand_p> iv_candidates;
322
323 /* A bitmap of important candidates. */
324 bitmap important_candidates;
325
326 /* Cache used by tree_to_aff_combination_expand. */
327 hash_map<tree, name_expansion *> *name_expansion_cache;
328
329 /* The maximum invariant id. */
330 unsigned max_inv_id;
331
332 /* Whether to consider just related and important candidates when replacing a
333 use. */
334 bool consider_all_candidates;
335
336 /* Are we optimizing for speed? */
337 bool speed;
338
339 /* Whether the loop body includes any function calls. */
340 bool body_includes_call;
341
342 /* Whether the loop body can only be exited via single exit. */
343 bool loop_single_exit_p;
344 };
345
346 /* An assignment of iv candidates to uses. */
347
348 struct iv_ca
349 {
350 /* The number of uses covered by the assignment. */
351 unsigned upto;
352
353 /* Number of uses that cannot be expressed by the candidates in the set. */
354 unsigned bad_uses;
355
356 /* Candidate assigned to a use, together with the related costs. */
357 struct cost_pair **cand_for_use;
358
359 /* Number of times each candidate is used. */
360 unsigned *n_cand_uses;
361
362 /* The candidates used. */
363 bitmap cands;
364
365 /* The number of candidates in the set. */
366 unsigned n_cands;
367
368 /* Total number of registers needed. */
369 unsigned n_regs;
370
371 /* Total cost of expressing uses. */
372 comp_cost cand_use_cost;
373
374 /* Total cost of candidates. */
375 unsigned cand_cost;
376
377 /* Number of times each invariant is used. */
378 unsigned *n_invariant_uses;
379
380 /* The array holding the number of uses of each loop
381 invariant expressions created by ivopt. */
382 unsigned *used_inv_expr;
383
384 /* The number of created loop invariants. */
385 unsigned num_used_inv_expr;
386
387 /* Total cost of the assignment. */
388 comp_cost cost;
389 };
390
391 /* Difference of two iv candidate assignments. */
392
393 struct iv_ca_delta
394 {
395 /* Changed use. */
396 struct iv_use *use;
397
398 /* An old assignment (for rollback purposes). */
399 struct cost_pair *old_cp;
400
401 /* A new assignment. */
402 struct cost_pair *new_cp;
403
404 /* Next change in the list. */
405 struct iv_ca_delta *next_change;
406 };
407
408 /* Bound on number of candidates below that all candidates are considered. */
409
410 #define CONSIDER_ALL_CANDIDATES_BOUND \
411 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
412
413 /* If there are more iv occurrences, we just give up (it is quite unlikely that
414 optimizing such a loop would help, and it would take ages). */
415
416 #define MAX_CONSIDERED_USES \
417 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
418
419 /* If there are at most this number of ivs in the set, try removing unnecessary
420 ivs from the set always. */
421
422 #define ALWAYS_PRUNE_CAND_SET_BOUND \
423 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
424
425 /* The list of trees for that the decl_rtl field must be reset is stored
426 here. */
427
428 static vec<tree> decl_rtl_to_reset;
429
430 static comp_cost force_expr_to_var_cost (tree, bool);
431
432 /* Number of uses recorded in DATA. */
433
434 static inline unsigned
435 n_iv_uses (struct ivopts_data *data)
436 {
437 return data->iv_uses.length ();
438 }
439
440 /* Ith use recorded in DATA. */
441
442 static inline struct iv_use *
443 iv_use (struct ivopts_data *data, unsigned i)
444 {
445 return data->iv_uses[i];
446 }
447
448 /* Number of candidates recorded in DATA. */
449
450 static inline unsigned
451 n_iv_cands (struct ivopts_data *data)
452 {
453 return data->iv_candidates.length ();
454 }
455
456 /* Ith candidate recorded in DATA. */
457
458 static inline struct iv_cand *
459 iv_cand (struct ivopts_data *data, unsigned i)
460 {
461 return data->iv_candidates[i];
462 }
463
464 /* The single loop exit if it dominates the latch, NULL otherwise. */
465
466 edge
467 single_dom_exit (struct loop *loop)
468 {
469 edge exit = single_exit (loop);
470
471 if (!exit)
472 return NULL;
473
474 if (!just_once_each_iteration_p (loop, exit->src))
475 return NULL;
476
477 return exit;
478 }
479
480 /* Dumps information about the induction variable IV to FILE. */
481
482 void
483 dump_iv (FILE *file, struct iv *iv)
484 {
485 if (iv->ssa_name)
486 {
487 fprintf (file, "ssa name ");
488 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
489 fprintf (file, "\n");
490 }
491
492 fprintf (file, " type ");
493 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
494 fprintf (file, "\n");
495
496 if (iv->step)
497 {
498 fprintf (file, " base ");
499 print_generic_expr (file, iv->base, TDF_SLIM);
500 fprintf (file, "\n");
501
502 fprintf (file, " step ");
503 print_generic_expr (file, iv->step, TDF_SLIM);
504 fprintf (file, "\n");
505 }
506 else
507 {
508 fprintf (file, " invariant ");
509 print_generic_expr (file, iv->base, TDF_SLIM);
510 fprintf (file, "\n");
511 }
512
513 if (iv->base_object)
514 {
515 fprintf (file, " base object ");
516 print_generic_expr (file, iv->base_object, TDF_SLIM);
517 fprintf (file, "\n");
518 }
519
520 if (iv->biv_p)
521 fprintf (file, " is a biv\n");
522 }
523
524 /* Dumps information about the USE to FILE. */
525
526 void
527 dump_use (FILE *file, struct iv_use *use)
528 {
529 fprintf (file, "use %d\n", use->id);
530
531 switch (use->type)
532 {
533 case USE_NONLINEAR_EXPR:
534 fprintf (file, " generic\n");
535 break;
536
537 case USE_ADDRESS:
538 fprintf (file, " address\n");
539 break;
540
541 case USE_COMPARE:
542 fprintf (file, " compare\n");
543 break;
544
545 default:
546 gcc_unreachable ();
547 }
548
549 fprintf (file, " in statement ");
550 print_gimple_stmt (file, use->stmt, 0, 0);
551 fprintf (file, "\n");
552
553 fprintf (file, " at position ");
554 if (use->op_p)
555 print_generic_expr (file, *use->op_p, TDF_SLIM);
556 fprintf (file, "\n");
557
558 dump_iv (file, use->iv);
559
560 if (use->related_cands)
561 {
562 fprintf (file, " related candidates ");
563 dump_bitmap (file, use->related_cands);
564 }
565 }
566
567 /* Dumps information about the uses to FILE. */
568
569 void
570 dump_uses (FILE *file, struct ivopts_data *data)
571 {
572 unsigned i;
573 struct iv_use *use;
574
575 for (i = 0; i < n_iv_uses (data); i++)
576 {
577 use = iv_use (data, i);
578
579 dump_use (file, use);
580 fprintf (file, "\n");
581 }
582 }
583
584 /* Dumps information about induction variable candidate CAND to FILE. */
585
586 void
587 dump_cand (FILE *file, struct iv_cand *cand)
588 {
589 struct iv *iv = cand->iv;
590
591 fprintf (file, "candidate %d%s\n",
592 cand->id, cand->important ? " (important)" : "");
593
594 if (cand->depends_on)
595 {
596 fprintf (file, " depends on ");
597 dump_bitmap (file, cand->depends_on);
598 }
599
600 if (!iv)
601 {
602 fprintf (file, " final value replacement\n");
603 return;
604 }
605
606 if (cand->var_before)
607 {
608 fprintf (file, " var_before ");
609 print_generic_expr (file, cand->var_before, TDF_SLIM);
610 fprintf (file, "\n");
611 }
612 if (cand->var_after)
613 {
614 fprintf (file, " var_after ");
615 print_generic_expr (file, cand->var_after, TDF_SLIM);
616 fprintf (file, "\n");
617 }
618
619 switch (cand->pos)
620 {
621 case IP_NORMAL:
622 fprintf (file, " incremented before exit test\n");
623 break;
624
625 case IP_BEFORE_USE:
626 fprintf (file, " incremented before use %d\n", cand->ainc_use->id);
627 break;
628
629 case IP_AFTER_USE:
630 fprintf (file, " incremented after use %d\n", cand->ainc_use->id);
631 break;
632
633 case IP_END:
634 fprintf (file, " incremented at end\n");
635 break;
636
637 case IP_ORIGINAL:
638 fprintf (file, " original biv\n");
639 break;
640 }
641
642 dump_iv (file, iv);
643 }
644
645 /* Returns the info for ssa version VER. */
646
647 static inline struct version_info *
648 ver_info (struct ivopts_data *data, unsigned ver)
649 {
650 return data->version_info + ver;
651 }
652
653 /* Returns the info for ssa name NAME. */
654
655 static inline struct version_info *
656 name_info (struct ivopts_data *data, tree name)
657 {
658 return ver_info (data, SSA_NAME_VERSION (name));
659 }
660
661 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
662 emitted in LOOP. */
663
664 static bool
665 stmt_after_ip_normal_pos (struct loop *loop, gimple stmt)
666 {
667 basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
668
669 gcc_assert (bb);
670
671 if (sbb == loop->latch)
672 return true;
673
674 if (sbb != bb)
675 return false;
676
677 return stmt == last_stmt (bb);
678 }
679
680 /* Returns true if STMT if after the place where the original induction
681 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
682 if the positions are identical. */
683
684 static bool
685 stmt_after_inc_pos (struct iv_cand *cand, gimple stmt, bool true_if_equal)
686 {
687 basic_block cand_bb = gimple_bb (cand->incremented_at);
688 basic_block stmt_bb = gimple_bb (stmt);
689
690 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
691 return false;
692
693 if (stmt_bb != cand_bb)
694 return true;
695
696 if (true_if_equal
697 && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
698 return true;
699 return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
700 }
701
702 /* Returns true if STMT if after the place where the induction variable
703 CAND is incremented in LOOP. */
704
705 static bool
706 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple stmt)
707 {
708 switch (cand->pos)
709 {
710 case IP_END:
711 return false;
712
713 case IP_NORMAL:
714 return stmt_after_ip_normal_pos (loop, stmt);
715
716 case IP_ORIGINAL:
717 case IP_AFTER_USE:
718 return stmt_after_inc_pos (cand, stmt, false);
719
720 case IP_BEFORE_USE:
721 return stmt_after_inc_pos (cand, stmt, true);
722
723 default:
724 gcc_unreachable ();
725 }
726 }
727
728 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
729
730 static bool
731 abnormal_ssa_name_p (tree exp)
732 {
733 if (!exp)
734 return false;
735
736 if (TREE_CODE (exp) != SSA_NAME)
737 return false;
738
739 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
740 }
741
742 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
743 abnormal phi node. Callback for for_each_index. */
744
745 static bool
746 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
747 void *data ATTRIBUTE_UNUSED)
748 {
749 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
750 {
751 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
752 return false;
753 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
754 return false;
755 }
756
757 return !abnormal_ssa_name_p (*index);
758 }
759
760 /* Returns true if EXPR contains a ssa name that occurs in an
761 abnormal phi node. */
762
763 bool
764 contains_abnormal_ssa_name_p (tree expr)
765 {
766 enum tree_code code;
767 enum tree_code_class codeclass;
768
769 if (!expr)
770 return false;
771
772 code = TREE_CODE (expr);
773 codeclass = TREE_CODE_CLASS (code);
774
775 if (code == SSA_NAME)
776 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
777
778 if (code == INTEGER_CST
779 || is_gimple_min_invariant (expr))
780 return false;
781
782 if (code == ADDR_EXPR)
783 return !for_each_index (&TREE_OPERAND (expr, 0),
784 idx_contains_abnormal_ssa_name_p,
785 NULL);
786
787 if (code == COND_EXPR)
788 return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
789 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
790 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
791
792 switch (codeclass)
793 {
794 case tcc_binary:
795 case tcc_comparison:
796 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
797 return true;
798
799 /* Fallthru. */
800 case tcc_unary:
801 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
802 return true;
803
804 break;
805
806 default:
807 gcc_unreachable ();
808 }
809
810 return false;
811 }
812
813 /* Returns the structure describing number of iterations determined from
814 EXIT of DATA->current_loop, or NULL if something goes wrong. */
815
816 static struct tree_niter_desc *
817 niter_for_exit (struct ivopts_data *data, edge exit)
818 {
819 struct tree_niter_desc *desc;
820 tree_niter_desc **slot;
821
822 if (!data->niters)
823 {
824 data->niters = new hash_map<edge, tree_niter_desc *>;
825 slot = NULL;
826 }
827 else
828 slot = data->niters->get (exit);
829
830 if (!slot)
831 {
832 /* Try to determine number of iterations. We cannot safely work with ssa
833 names that appear in phi nodes on abnormal edges, so that we do not
834 create overlapping life ranges for them (PR 27283). */
835 desc = XNEW (struct tree_niter_desc);
836 if (!number_of_iterations_exit (data->current_loop,
837 exit, desc, true)
838 || contains_abnormal_ssa_name_p (desc->niter))
839 {
840 XDELETE (desc);
841 desc = NULL;
842 }
843 data->niters->put (exit, desc);
844 }
845 else
846 desc = *slot;
847
848 return desc;
849 }
850
851 /* Returns the structure describing number of iterations determined from
852 single dominating exit of DATA->current_loop, or NULL if something
853 goes wrong. */
854
855 static struct tree_niter_desc *
856 niter_for_single_dom_exit (struct ivopts_data *data)
857 {
858 edge exit = single_dom_exit (data->current_loop);
859
860 if (!exit)
861 return NULL;
862
863 return niter_for_exit (data, exit);
864 }
865
866 /* Initializes data structures used by the iv optimization pass, stored
867 in DATA. */
868
869 static void
870 tree_ssa_iv_optimize_init (struct ivopts_data *data)
871 {
872 data->version_info_size = 2 * num_ssa_names;
873 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
874 data->relevant = BITMAP_ALLOC (NULL);
875 data->important_candidates = BITMAP_ALLOC (NULL);
876 data->max_inv_id = 0;
877 data->niters = NULL;
878 data->iv_uses.create (20);
879 data->iv_candidates.create (20);
880 data->inv_expr_tab = new hash_table<iv_inv_expr_hasher> (10);
881 data->inv_expr_id = 0;
882 data->name_expansion_cache = NULL;
883 decl_rtl_to_reset.create (20);
884 }
885
886 /* Returns a memory object to that EXPR points. In case we are able to
887 determine that it does not point to any such object, NULL is returned. */
888
889 static tree
890 determine_base_object (tree expr)
891 {
892 enum tree_code code = TREE_CODE (expr);
893 tree base, obj;
894
895 /* If this is a pointer casted to any type, we need to determine
896 the base object for the pointer; so handle conversions before
897 throwing away non-pointer expressions. */
898 if (CONVERT_EXPR_P (expr))
899 return determine_base_object (TREE_OPERAND (expr, 0));
900
901 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
902 return NULL_TREE;
903
904 switch (code)
905 {
906 case INTEGER_CST:
907 return NULL_TREE;
908
909 case ADDR_EXPR:
910 obj = TREE_OPERAND (expr, 0);
911 base = get_base_address (obj);
912
913 if (!base)
914 return expr;
915
916 if (TREE_CODE (base) == MEM_REF)
917 return determine_base_object (TREE_OPERAND (base, 0));
918
919 return fold_convert (ptr_type_node,
920 build_fold_addr_expr (base));
921
922 case POINTER_PLUS_EXPR:
923 return determine_base_object (TREE_OPERAND (expr, 0));
924
925 case PLUS_EXPR:
926 case MINUS_EXPR:
927 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
928 gcc_unreachable ();
929
930 default:
931 return fold_convert (ptr_type_node, expr);
932 }
933 }
934
935 /* Return true if address expression with non-DECL_P operand appears
936 in EXPR. */
937
938 static bool
939 contain_complex_addr_expr (tree expr)
940 {
941 bool res = false;
942
943 STRIP_NOPS (expr);
944 switch (TREE_CODE (expr))
945 {
946 case POINTER_PLUS_EXPR:
947 case PLUS_EXPR:
948 case MINUS_EXPR:
949 res |= contain_complex_addr_expr (TREE_OPERAND (expr, 0));
950 res |= contain_complex_addr_expr (TREE_OPERAND (expr, 1));
951 break;
952
953 case ADDR_EXPR:
954 return (!DECL_P (TREE_OPERAND (expr, 0)));
955
956 default:
957 return false;
958 }
959
960 return res;
961 }
962
963 /* Allocates an induction variable with given initial value BASE and step STEP
964 for loop LOOP. */
965
966 static struct iv *
967 alloc_iv (tree base, tree step)
968 {
969 tree expr = base;
970 struct iv *iv = XCNEW (struct iv);
971 gcc_assert (step != NULL_TREE);
972
973 /* Lower address expression in base except ones with DECL_P as operand.
974 By doing this:
975 1) More accurate cost can be computed for address expressions;
976 2) Duplicate candidates won't be created for bases in different
977 forms, like &a[0] and &a. */
978 STRIP_NOPS (expr);
979 if ((TREE_CODE (expr) == ADDR_EXPR && !DECL_P (TREE_OPERAND (expr, 0)))
980 || contain_complex_addr_expr (expr))
981 {
982 aff_tree comb;
983 tree_to_aff_combination (expr, TREE_TYPE (base), &comb);
984 base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
985 }
986
987 iv->base = base;
988 iv->base_object = determine_base_object (base);
989 iv->step = step;
990 iv->biv_p = false;
991 iv->have_use_for = false;
992 iv->use_id = 0;
993 iv->ssa_name = NULL_TREE;
994
995 return iv;
996 }
997
998 /* Sets STEP and BASE for induction variable IV. */
999
1000 static void
1001 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
1002 {
1003 struct version_info *info = name_info (data, iv);
1004
1005 gcc_assert (!info->iv);
1006
1007 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
1008 info->iv = alloc_iv (base, step);
1009 info->iv->ssa_name = iv;
1010 }
1011
1012 /* Finds induction variable declaration for VAR. */
1013
1014 static struct iv *
1015 get_iv (struct ivopts_data *data, tree var)
1016 {
1017 basic_block bb;
1018 tree type = TREE_TYPE (var);
1019
1020 if (!POINTER_TYPE_P (type)
1021 && !INTEGRAL_TYPE_P (type))
1022 return NULL;
1023
1024 if (!name_info (data, var)->iv)
1025 {
1026 bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1027
1028 if (!bb
1029 || !flow_bb_inside_loop_p (data->current_loop, bb))
1030 set_iv (data, var, var, build_int_cst (type, 0));
1031 }
1032
1033 return name_info (data, var)->iv;
1034 }
1035
1036 /* Determines the step of a biv defined in PHI. Returns NULL if PHI does
1037 not define a simple affine biv with nonzero step. */
1038
1039 static tree
1040 determine_biv_step (gimple phi)
1041 {
1042 struct loop *loop = gimple_bb (phi)->loop_father;
1043 tree name = PHI_RESULT (phi);
1044 affine_iv iv;
1045
1046 if (virtual_operand_p (name))
1047 return NULL_TREE;
1048
1049 if (!simple_iv (loop, loop, name, &iv, true))
1050 return NULL_TREE;
1051
1052 return integer_zerop (iv.step) ? NULL_TREE : iv.step;
1053 }
1054
1055 /* Finds basic ivs. */
1056
1057 static bool
1058 find_bivs (struct ivopts_data *data)
1059 {
1060 gimple phi;
1061 tree step, type, base;
1062 bool found = false;
1063 struct loop *loop = data->current_loop;
1064 gimple_stmt_iterator psi;
1065
1066 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1067 {
1068 phi = gsi_stmt (psi);
1069
1070 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1071 continue;
1072
1073 step = determine_biv_step (phi);
1074 if (!step)
1075 continue;
1076
1077 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1078 base = expand_simple_operations (base);
1079 if (contains_abnormal_ssa_name_p (base)
1080 || contains_abnormal_ssa_name_p (step))
1081 continue;
1082
1083 type = TREE_TYPE (PHI_RESULT (phi));
1084 base = fold_convert (type, base);
1085 if (step)
1086 {
1087 if (POINTER_TYPE_P (type))
1088 step = convert_to_ptrofftype (step);
1089 else
1090 step = fold_convert (type, step);
1091 }
1092
1093 set_iv (data, PHI_RESULT (phi), base, step);
1094 found = true;
1095 }
1096
1097 return found;
1098 }
1099
1100 /* Marks basic ivs. */
1101
1102 static void
1103 mark_bivs (struct ivopts_data *data)
1104 {
1105 gimple phi, def;
1106 tree var;
1107 struct iv *iv, *incr_iv;
1108 struct loop *loop = data->current_loop;
1109 basic_block incr_bb;
1110 gimple_stmt_iterator psi;
1111
1112 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1113 {
1114 phi = gsi_stmt (psi);
1115
1116 iv = get_iv (data, PHI_RESULT (phi));
1117 if (!iv)
1118 continue;
1119
1120 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1121 def = SSA_NAME_DEF_STMT (var);
1122 /* Don't mark iv peeled from other one as biv. */
1123 if (def
1124 && gimple_code (def) == GIMPLE_PHI
1125 && gimple_bb (def) == loop->header)
1126 continue;
1127
1128 incr_iv = get_iv (data, var);
1129 if (!incr_iv)
1130 continue;
1131
1132 /* If the increment is in the subloop, ignore it. */
1133 incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1134 if (incr_bb->loop_father != data->current_loop
1135 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1136 continue;
1137
1138 iv->biv_p = true;
1139 incr_iv->biv_p = true;
1140 }
1141 }
1142
1143 /* Checks whether STMT defines a linear induction variable and stores its
1144 parameters to IV. */
1145
1146 static bool
1147 find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv)
1148 {
1149 tree lhs;
1150 struct loop *loop = data->current_loop;
1151
1152 iv->base = NULL_TREE;
1153 iv->step = NULL_TREE;
1154
1155 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1156 return false;
1157
1158 lhs = gimple_assign_lhs (stmt);
1159 if (TREE_CODE (lhs) != SSA_NAME)
1160 return false;
1161
1162 if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1163 return false;
1164 iv->base = expand_simple_operations (iv->base);
1165
1166 if (contains_abnormal_ssa_name_p (iv->base)
1167 || contains_abnormal_ssa_name_p (iv->step))
1168 return false;
1169
1170 /* If STMT could throw, then do not consider STMT as defining a GIV.
1171 While this will suppress optimizations, we can not safely delete this
1172 GIV and associated statements, even if it appears it is not used. */
1173 if (stmt_could_throw_p (stmt))
1174 return false;
1175
1176 return true;
1177 }
1178
1179 /* Finds general ivs in statement STMT. */
1180
1181 static void
1182 find_givs_in_stmt (struct ivopts_data *data, gimple stmt)
1183 {
1184 affine_iv iv;
1185
1186 if (!find_givs_in_stmt_scev (data, stmt, &iv))
1187 return;
1188
1189 set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step);
1190 }
1191
1192 /* Finds general ivs in basic block BB. */
1193
1194 static void
1195 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1196 {
1197 gimple_stmt_iterator bsi;
1198
1199 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1200 find_givs_in_stmt (data, gsi_stmt (bsi));
1201 }
1202
1203 /* Finds general ivs. */
1204
1205 static void
1206 find_givs (struct ivopts_data *data)
1207 {
1208 struct loop *loop = data->current_loop;
1209 basic_block *body = get_loop_body_in_dom_order (loop);
1210 unsigned i;
1211
1212 for (i = 0; i < loop->num_nodes; i++)
1213 find_givs_in_bb (data, body[i]);
1214 free (body);
1215 }
1216
1217 /* For each ssa name defined in LOOP determines whether it is an induction
1218 variable and if so, its initial value and step. */
1219
1220 static bool
1221 find_induction_variables (struct ivopts_data *data)
1222 {
1223 unsigned i;
1224 bitmap_iterator bi;
1225
1226 if (!find_bivs (data))
1227 return false;
1228
1229 find_givs (data);
1230 mark_bivs (data);
1231
1232 if (dump_file && (dump_flags & TDF_DETAILS))
1233 {
1234 struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
1235
1236 if (niter)
1237 {
1238 fprintf (dump_file, " number of iterations ");
1239 print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1240 if (!integer_zerop (niter->may_be_zero))
1241 {
1242 fprintf (dump_file, "; zero if ");
1243 print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1244 }
1245 fprintf (dump_file, "\n\n");
1246 };
1247
1248 fprintf (dump_file, "Induction variables:\n\n");
1249
1250 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1251 {
1252 if (ver_info (data, i)->iv)
1253 dump_iv (dump_file, ver_info (data, i)->iv);
1254 }
1255 }
1256
1257 return true;
1258 }
1259
1260 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1261
1262 static struct iv_use *
1263 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1264 gimple stmt, enum use_type use_type)
1265 {
1266 struct iv_use *use = XCNEW (struct iv_use);
1267
1268 use->id = n_iv_uses (data);
1269 use->type = use_type;
1270 use->iv = iv;
1271 use->stmt = stmt;
1272 use->op_p = use_p;
1273 use->related_cands = BITMAP_ALLOC (NULL);
1274
1275 /* To avoid showing ssa name in the dumps, if it was not reset by the
1276 caller. */
1277 iv->ssa_name = NULL_TREE;
1278
1279 if (dump_file && (dump_flags & TDF_DETAILS))
1280 dump_use (dump_file, use);
1281
1282 data->iv_uses.safe_push (use);
1283
1284 return use;
1285 }
1286
1287 /* Checks whether OP is a loop-level invariant and if so, records it.
1288 NONLINEAR_USE is true if the invariant is used in a way we do not
1289 handle specially. */
1290
1291 static void
1292 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1293 {
1294 basic_block bb;
1295 struct version_info *info;
1296
1297 if (TREE_CODE (op) != SSA_NAME
1298 || virtual_operand_p (op))
1299 return;
1300
1301 bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1302 if (bb
1303 && flow_bb_inside_loop_p (data->current_loop, bb))
1304 return;
1305
1306 info = name_info (data, op);
1307 info->name = op;
1308 info->has_nonlin_use |= nonlinear_use;
1309 if (!info->inv_id)
1310 info->inv_id = ++data->max_inv_id;
1311 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1312 }
1313
1314 /* Checks whether the use OP is interesting and if so, records it. */
1315
1316 static struct iv_use *
1317 find_interesting_uses_op (struct ivopts_data *data, tree op)
1318 {
1319 struct iv *iv;
1320 struct iv *civ;
1321 gimple stmt;
1322 struct iv_use *use;
1323
1324 if (TREE_CODE (op) != SSA_NAME)
1325 return NULL;
1326
1327 iv = get_iv (data, op);
1328 if (!iv)
1329 return NULL;
1330
1331 if (iv->have_use_for)
1332 {
1333 use = iv_use (data, iv->use_id);
1334
1335 gcc_assert (use->type == USE_NONLINEAR_EXPR);
1336 return use;
1337 }
1338
1339 if (integer_zerop (iv->step))
1340 {
1341 record_invariant (data, op, true);
1342 return NULL;
1343 }
1344 iv->have_use_for = true;
1345
1346 civ = XNEW (struct iv);
1347 *civ = *iv;
1348
1349 stmt = SSA_NAME_DEF_STMT (op);
1350 gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1351 || is_gimple_assign (stmt));
1352
1353 use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1354 iv->use_id = use->id;
1355
1356 return use;
1357 }
1358
1359 /* Given a condition in statement STMT, checks whether it is a compare
1360 of an induction variable and an invariant. If this is the case,
1361 CONTROL_VAR is set to location of the iv, BOUND to the location of
1362 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1363 induction variable descriptions, and true is returned. If this is not
1364 the case, CONTROL_VAR and BOUND are set to the arguments of the
1365 condition and false is returned. */
1366
1367 static bool
1368 extract_cond_operands (struct ivopts_data *data, gimple stmt,
1369 tree **control_var, tree **bound,
1370 struct iv **iv_var, struct iv **iv_bound)
1371 {
1372 /* The objects returned when COND has constant operands. */
1373 static struct iv const_iv;
1374 static tree zero;
1375 tree *op0 = &zero, *op1 = &zero, *tmp_op;
1376 struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1377 bool ret = false;
1378
1379 if (gimple_code (stmt) == GIMPLE_COND)
1380 {
1381 op0 = gimple_cond_lhs_ptr (stmt);
1382 op1 = gimple_cond_rhs_ptr (stmt);
1383 }
1384 else
1385 {
1386 op0 = gimple_assign_rhs1_ptr (stmt);
1387 op1 = gimple_assign_rhs2_ptr (stmt);
1388 }
1389
1390 zero = integer_zero_node;
1391 const_iv.step = integer_zero_node;
1392
1393 if (TREE_CODE (*op0) == SSA_NAME)
1394 iv0 = get_iv (data, *op0);
1395 if (TREE_CODE (*op1) == SSA_NAME)
1396 iv1 = get_iv (data, *op1);
1397
1398 /* Exactly one of the compared values must be an iv, and the other one must
1399 be an invariant. */
1400 if (!iv0 || !iv1)
1401 goto end;
1402
1403 if (integer_zerop (iv0->step))
1404 {
1405 /* Control variable may be on the other side. */
1406 tmp_op = op0; op0 = op1; op1 = tmp_op;
1407 tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1408 }
1409 ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1410
1411 end:
1412 if (control_var)
1413 *control_var = op0;;
1414 if (iv_var)
1415 *iv_var = iv0;;
1416 if (bound)
1417 *bound = op1;
1418 if (iv_bound)
1419 *iv_bound = iv1;
1420
1421 return ret;
1422 }
1423
1424 /* Checks whether the condition in STMT is interesting and if so,
1425 records it. */
1426
1427 static void
1428 find_interesting_uses_cond (struct ivopts_data *data, gimple stmt)
1429 {
1430 tree *var_p, *bound_p;
1431 struct iv *var_iv, *civ;
1432
1433 if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1434 {
1435 find_interesting_uses_op (data, *var_p);
1436 find_interesting_uses_op (data, *bound_p);
1437 return;
1438 }
1439
1440 civ = XNEW (struct iv);
1441 *civ = *var_iv;
1442 record_use (data, NULL, civ, stmt, USE_COMPARE);
1443 }
1444
1445 /* Returns the outermost loop EXPR is obviously invariant in
1446 relative to the loop LOOP, i.e. if all its operands are defined
1447 outside of the returned loop. Returns NULL if EXPR is not
1448 even obviously invariant in LOOP. */
1449
1450 struct loop *
1451 outermost_invariant_loop_for_expr (struct loop *loop, tree expr)
1452 {
1453 basic_block def_bb;
1454 unsigned i, len;
1455
1456 if (is_gimple_min_invariant (expr))
1457 return current_loops->tree_root;
1458
1459 if (TREE_CODE (expr) == SSA_NAME)
1460 {
1461 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1462 if (def_bb)
1463 {
1464 if (flow_bb_inside_loop_p (loop, def_bb))
1465 return NULL;
1466 return superloop_at_depth (loop,
1467 loop_depth (def_bb->loop_father) + 1);
1468 }
1469
1470 return current_loops->tree_root;
1471 }
1472
1473 if (!EXPR_P (expr))
1474 return NULL;
1475
1476 unsigned maxdepth = 0;
1477 len = TREE_OPERAND_LENGTH (expr);
1478 for (i = 0; i < len; i++)
1479 {
1480 struct loop *ivloop;
1481 if (!TREE_OPERAND (expr, i))
1482 continue;
1483
1484 ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1485 if (!ivloop)
1486 return NULL;
1487 maxdepth = MAX (maxdepth, loop_depth (ivloop));
1488 }
1489
1490 return superloop_at_depth (loop, maxdepth);
1491 }
1492
1493 /* Returns true if expression EXPR is obviously invariant in LOOP,
1494 i.e. if all its operands are defined outside of the LOOP. LOOP
1495 should not be the function body. */
1496
1497 bool
1498 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1499 {
1500 basic_block def_bb;
1501 unsigned i, len;
1502
1503 gcc_assert (loop_depth (loop) > 0);
1504
1505 if (is_gimple_min_invariant (expr))
1506 return true;
1507
1508 if (TREE_CODE (expr) == SSA_NAME)
1509 {
1510 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1511 if (def_bb
1512 && flow_bb_inside_loop_p (loop, def_bb))
1513 return false;
1514
1515 return true;
1516 }
1517
1518 if (!EXPR_P (expr))
1519 return false;
1520
1521 len = TREE_OPERAND_LENGTH (expr);
1522 for (i = 0; i < len; i++)
1523 if (TREE_OPERAND (expr, i)
1524 && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1525 return false;
1526
1527 return true;
1528 }
1529
1530 /* Cumulates the steps of indices into DATA and replaces their values with the
1531 initial ones. Returns false when the value of the index cannot be determined.
1532 Callback for for_each_index. */
1533
1534 struct ifs_ivopts_data
1535 {
1536 struct ivopts_data *ivopts_data;
1537 gimple stmt;
1538 tree step;
1539 };
1540
1541 static bool
1542 idx_find_step (tree base, tree *idx, void *data)
1543 {
1544 struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1545 struct iv *iv;
1546 tree step, iv_base, iv_step, lbound, off;
1547 struct loop *loop = dta->ivopts_data->current_loop;
1548
1549 /* If base is a component ref, require that the offset of the reference
1550 be invariant. */
1551 if (TREE_CODE (base) == COMPONENT_REF)
1552 {
1553 off = component_ref_field_offset (base);
1554 return expr_invariant_in_loop_p (loop, off);
1555 }
1556
1557 /* If base is array, first check whether we will be able to move the
1558 reference out of the loop (in order to take its address in strength
1559 reduction). In order for this to work we need both lower bound
1560 and step to be loop invariants. */
1561 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1562 {
1563 /* Moreover, for a range, the size needs to be invariant as well. */
1564 if (TREE_CODE (base) == ARRAY_RANGE_REF
1565 && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1566 return false;
1567
1568 step = array_ref_element_size (base);
1569 lbound = array_ref_low_bound (base);
1570
1571 if (!expr_invariant_in_loop_p (loop, step)
1572 || !expr_invariant_in_loop_p (loop, lbound))
1573 return false;
1574 }
1575
1576 if (TREE_CODE (*idx) != SSA_NAME)
1577 return true;
1578
1579 iv = get_iv (dta->ivopts_data, *idx);
1580 if (!iv)
1581 return false;
1582
1583 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1584 *&x[0], which is not folded and does not trigger the
1585 ARRAY_REF path below. */
1586 *idx = iv->base;
1587
1588 if (integer_zerop (iv->step))
1589 return true;
1590
1591 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1592 {
1593 step = array_ref_element_size (base);
1594
1595 /* We only handle addresses whose step is an integer constant. */
1596 if (TREE_CODE (step) != INTEGER_CST)
1597 return false;
1598 }
1599 else
1600 /* The step for pointer arithmetics already is 1 byte. */
1601 step = size_one_node;
1602
1603 iv_base = iv->base;
1604 iv_step = iv->step;
1605 if (!convert_affine_scev (dta->ivopts_data->current_loop,
1606 sizetype, &iv_base, &iv_step, dta->stmt,
1607 false))
1608 {
1609 /* The index might wrap. */
1610 return false;
1611 }
1612
1613 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1614 dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1615
1616 return true;
1617 }
1618
1619 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1620 object is passed to it in DATA. */
1621
1622 static bool
1623 idx_record_use (tree base, tree *idx,
1624 void *vdata)
1625 {
1626 struct ivopts_data *data = (struct ivopts_data *) vdata;
1627 find_interesting_uses_op (data, *idx);
1628 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1629 {
1630 find_interesting_uses_op (data, array_ref_element_size (base));
1631 find_interesting_uses_op (data, array_ref_low_bound (base));
1632 }
1633 return true;
1634 }
1635
1636 /* If we can prove that TOP = cst * BOT for some constant cst,
1637 store cst to MUL and return true. Otherwise return false.
1638 The returned value is always sign-extended, regardless of the
1639 signedness of TOP and BOT. */
1640
1641 static bool
1642 constant_multiple_of (tree top, tree bot, widest_int *mul)
1643 {
1644 tree mby;
1645 enum tree_code code;
1646 unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1647 widest_int res, p0, p1;
1648
1649 STRIP_NOPS (top);
1650 STRIP_NOPS (bot);
1651
1652 if (operand_equal_p (top, bot, 0))
1653 {
1654 *mul = 1;
1655 return true;
1656 }
1657
1658 code = TREE_CODE (top);
1659 switch (code)
1660 {
1661 case MULT_EXPR:
1662 mby = TREE_OPERAND (top, 1);
1663 if (TREE_CODE (mby) != INTEGER_CST)
1664 return false;
1665
1666 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1667 return false;
1668
1669 *mul = wi::sext (res * wi::to_widest (mby), precision);
1670 return true;
1671
1672 case PLUS_EXPR:
1673 case MINUS_EXPR:
1674 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1675 || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1676 return false;
1677
1678 if (code == MINUS_EXPR)
1679 p1 = -p1;
1680 *mul = wi::sext (p0 + p1, precision);
1681 return true;
1682
1683 case INTEGER_CST:
1684 if (TREE_CODE (bot) != INTEGER_CST)
1685 return false;
1686
1687 p0 = widest_int::from (top, SIGNED);
1688 p1 = widest_int::from (bot, SIGNED);
1689 if (p1 == 0)
1690 return false;
1691 *mul = wi::sext (wi::divmod_trunc (p0, p1, SIGNED, &res), precision);
1692 return res == 0;
1693
1694 default:
1695 return false;
1696 }
1697 }
1698
1699 /* Return true if memory reference REF with step STEP may be unaligned. */
1700
1701 static bool
1702 may_be_unaligned_p (tree ref, tree step)
1703 {
1704 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1705 thus they are not misaligned. */
1706 if (TREE_CODE (ref) == TARGET_MEM_REF)
1707 return false;
1708
1709 unsigned int align = TYPE_ALIGN (TREE_TYPE (ref));
1710 if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))) > align)
1711 align = GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref)));
1712
1713 unsigned HOST_WIDE_INT bitpos;
1714 unsigned int ref_align;
1715 get_object_alignment_1 (ref, &ref_align, &bitpos);
1716 if (ref_align < align
1717 || (bitpos % align) != 0
1718 || (bitpos % BITS_PER_UNIT) != 0)
1719 return true;
1720
1721 unsigned int trailing_zeros = tree_ctz (step);
1722 if (trailing_zeros < HOST_BITS_PER_INT
1723 && (1U << trailing_zeros) * BITS_PER_UNIT < align)
1724 return true;
1725
1726 return false;
1727 }
1728
1729 /* Return true if EXPR may be non-addressable. */
1730
1731 bool
1732 may_be_nonaddressable_p (tree expr)
1733 {
1734 switch (TREE_CODE (expr))
1735 {
1736 case TARGET_MEM_REF:
1737 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1738 target, thus they are always addressable. */
1739 return false;
1740
1741 case COMPONENT_REF:
1742 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1743 || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1744
1745 case VIEW_CONVERT_EXPR:
1746 /* This kind of view-conversions may wrap non-addressable objects
1747 and make them look addressable. After some processing the
1748 non-addressability may be uncovered again, causing ADDR_EXPRs
1749 of inappropriate objects to be built. */
1750 if (is_gimple_reg (TREE_OPERAND (expr, 0))
1751 || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
1752 return true;
1753
1754 /* ... fall through ... */
1755
1756 case ARRAY_REF:
1757 case ARRAY_RANGE_REF:
1758 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1759
1760 CASE_CONVERT:
1761 return true;
1762
1763 default:
1764 break;
1765 }
1766
1767 return false;
1768 }
1769
1770 /* Finds addresses in *OP_P inside STMT. */
1771
1772 static void
1773 find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p)
1774 {
1775 tree base = *op_p, step = size_zero_node;
1776 struct iv *civ;
1777 struct ifs_ivopts_data ifs_ivopts_data;
1778
1779 /* Do not play with volatile memory references. A bit too conservative,
1780 perhaps, but safe. */
1781 if (gimple_has_volatile_ops (stmt))
1782 goto fail;
1783
1784 /* Ignore bitfields for now. Not really something terribly complicated
1785 to handle. TODO. */
1786 if (TREE_CODE (base) == BIT_FIELD_REF)
1787 goto fail;
1788
1789 base = unshare_expr (base);
1790
1791 if (TREE_CODE (base) == TARGET_MEM_REF)
1792 {
1793 tree type = build_pointer_type (TREE_TYPE (base));
1794 tree astep;
1795
1796 if (TMR_BASE (base)
1797 && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1798 {
1799 civ = get_iv (data, TMR_BASE (base));
1800 if (!civ)
1801 goto fail;
1802
1803 TMR_BASE (base) = civ->base;
1804 step = civ->step;
1805 }
1806 if (TMR_INDEX2 (base)
1807 && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
1808 {
1809 civ = get_iv (data, TMR_INDEX2 (base));
1810 if (!civ)
1811 goto fail;
1812
1813 TMR_INDEX2 (base) = civ->base;
1814 step = civ->step;
1815 }
1816 if (TMR_INDEX (base)
1817 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1818 {
1819 civ = get_iv (data, TMR_INDEX (base));
1820 if (!civ)
1821 goto fail;
1822
1823 TMR_INDEX (base) = civ->base;
1824 astep = civ->step;
1825
1826 if (astep)
1827 {
1828 if (TMR_STEP (base))
1829 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1830
1831 step = fold_build2 (PLUS_EXPR, type, step, astep);
1832 }
1833 }
1834
1835 if (integer_zerop (step))
1836 goto fail;
1837 base = tree_mem_ref_addr (type, base);
1838 }
1839 else
1840 {
1841 ifs_ivopts_data.ivopts_data = data;
1842 ifs_ivopts_data.stmt = stmt;
1843 ifs_ivopts_data.step = size_zero_node;
1844 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1845 || integer_zerop (ifs_ivopts_data.step))
1846 goto fail;
1847 step = ifs_ivopts_data.step;
1848
1849 /* Check that the base expression is addressable. This needs
1850 to be done after substituting bases of IVs into it. */
1851 if (may_be_nonaddressable_p (base))
1852 goto fail;
1853
1854 /* Moreover, on strict alignment platforms, check that it is
1855 sufficiently aligned. */
1856 if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
1857 goto fail;
1858
1859 base = build_fold_addr_expr (base);
1860
1861 /* Substituting bases of IVs into the base expression might
1862 have caused folding opportunities. */
1863 if (TREE_CODE (base) == ADDR_EXPR)
1864 {
1865 tree *ref = &TREE_OPERAND (base, 0);
1866 while (handled_component_p (*ref))
1867 ref = &TREE_OPERAND (*ref, 0);
1868 if (TREE_CODE (*ref) == MEM_REF)
1869 {
1870 tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
1871 TREE_OPERAND (*ref, 0),
1872 TREE_OPERAND (*ref, 1));
1873 if (tem)
1874 *ref = tem;
1875 }
1876 }
1877 }
1878
1879 civ = alloc_iv (base, step);
1880 record_use (data, op_p, civ, stmt, USE_ADDRESS);
1881 return;
1882
1883 fail:
1884 for_each_index (op_p, idx_record_use, data);
1885 }
1886
1887 /* Finds and records invariants used in STMT. */
1888
1889 static void
1890 find_invariants_stmt (struct ivopts_data *data, gimple stmt)
1891 {
1892 ssa_op_iter iter;
1893 use_operand_p use_p;
1894 tree op;
1895
1896 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1897 {
1898 op = USE_FROM_PTR (use_p);
1899 record_invariant (data, op, false);
1900 }
1901 }
1902
1903 /* Finds interesting uses of induction variables in the statement STMT. */
1904
1905 static void
1906 find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt)
1907 {
1908 struct iv *iv;
1909 tree op, *lhs, *rhs;
1910 ssa_op_iter iter;
1911 use_operand_p use_p;
1912 enum tree_code code;
1913
1914 find_invariants_stmt (data, stmt);
1915
1916 if (gimple_code (stmt) == GIMPLE_COND)
1917 {
1918 find_interesting_uses_cond (data, stmt);
1919 return;
1920 }
1921
1922 if (is_gimple_assign (stmt))
1923 {
1924 lhs = gimple_assign_lhs_ptr (stmt);
1925 rhs = gimple_assign_rhs1_ptr (stmt);
1926
1927 if (TREE_CODE (*lhs) == SSA_NAME)
1928 {
1929 /* If the statement defines an induction variable, the uses are not
1930 interesting by themselves. */
1931
1932 iv = get_iv (data, *lhs);
1933
1934 if (iv && !integer_zerop (iv->step))
1935 return;
1936 }
1937
1938 code = gimple_assign_rhs_code (stmt);
1939 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
1940 && (REFERENCE_CLASS_P (*rhs)
1941 || is_gimple_val (*rhs)))
1942 {
1943 if (REFERENCE_CLASS_P (*rhs))
1944 find_interesting_uses_address (data, stmt, rhs);
1945 else
1946 find_interesting_uses_op (data, *rhs);
1947
1948 if (REFERENCE_CLASS_P (*lhs))
1949 find_interesting_uses_address (data, stmt, lhs);
1950 return;
1951 }
1952 else if (TREE_CODE_CLASS (code) == tcc_comparison)
1953 {
1954 find_interesting_uses_cond (data, stmt);
1955 return;
1956 }
1957
1958 /* TODO -- we should also handle address uses of type
1959
1960 memory = call (whatever);
1961
1962 and
1963
1964 call (memory). */
1965 }
1966
1967 if (gimple_code (stmt) == GIMPLE_PHI
1968 && gimple_bb (stmt) == data->current_loop->header)
1969 {
1970 iv = get_iv (data, PHI_RESULT (stmt));
1971
1972 if (iv && !integer_zerop (iv->step))
1973 return;
1974 }
1975
1976 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1977 {
1978 op = USE_FROM_PTR (use_p);
1979
1980 if (TREE_CODE (op) != SSA_NAME)
1981 continue;
1982
1983 iv = get_iv (data, op);
1984 if (!iv)
1985 continue;
1986
1987 find_interesting_uses_op (data, op);
1988 }
1989 }
1990
1991 /* Finds interesting uses of induction variables outside of loops
1992 on loop exit edge EXIT. */
1993
1994 static void
1995 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1996 {
1997 gimple phi;
1998 gimple_stmt_iterator psi;
1999 tree def;
2000
2001 for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
2002 {
2003 phi = gsi_stmt (psi);
2004 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2005 if (!virtual_operand_p (def))
2006 find_interesting_uses_op (data, def);
2007 }
2008 }
2009
2010 /* Finds uses of the induction variables that are interesting. */
2011
2012 static void
2013 find_interesting_uses (struct ivopts_data *data)
2014 {
2015 basic_block bb;
2016 gimple_stmt_iterator bsi;
2017 basic_block *body = get_loop_body (data->current_loop);
2018 unsigned i;
2019 struct version_info *info;
2020 edge e;
2021
2022 if (dump_file && (dump_flags & TDF_DETAILS))
2023 fprintf (dump_file, "Uses:\n\n");
2024
2025 for (i = 0; i < data->current_loop->num_nodes; i++)
2026 {
2027 edge_iterator ei;
2028 bb = body[i];
2029
2030 FOR_EACH_EDGE (e, ei, bb->succs)
2031 if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2032 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2033 find_interesting_uses_outside (data, e);
2034
2035 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2036 find_interesting_uses_stmt (data, gsi_stmt (bsi));
2037 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2038 if (!is_gimple_debug (gsi_stmt (bsi)))
2039 find_interesting_uses_stmt (data, gsi_stmt (bsi));
2040 }
2041
2042 if (dump_file && (dump_flags & TDF_DETAILS))
2043 {
2044 bitmap_iterator bi;
2045
2046 fprintf (dump_file, "\n");
2047
2048 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2049 {
2050 info = ver_info (data, i);
2051 if (info->inv_id)
2052 {
2053 fprintf (dump_file, " ");
2054 print_generic_expr (dump_file, info->name, TDF_SLIM);
2055 fprintf (dump_file, " is invariant (%d)%s\n",
2056 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
2057 }
2058 }
2059
2060 fprintf (dump_file, "\n");
2061 }
2062
2063 free (body);
2064 }
2065
2066 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
2067 is true, assume we are inside an address. If TOP_COMPREF is true, assume
2068 we are at the top-level of the processed address. */
2069
2070 static tree
2071 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2072 HOST_WIDE_INT *offset)
2073 {
2074 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2075 enum tree_code code;
2076 tree type, orig_type = TREE_TYPE (expr);
2077 HOST_WIDE_INT off0, off1, st;
2078 tree orig_expr = expr;
2079
2080 STRIP_NOPS (expr);
2081
2082 type = TREE_TYPE (expr);
2083 code = TREE_CODE (expr);
2084 *offset = 0;
2085
2086 switch (code)
2087 {
2088 case INTEGER_CST:
2089 if (!cst_and_fits_in_hwi (expr)
2090 || integer_zerop (expr))
2091 return orig_expr;
2092
2093 *offset = int_cst_value (expr);
2094 return build_int_cst (orig_type, 0);
2095
2096 case POINTER_PLUS_EXPR:
2097 case PLUS_EXPR:
2098 case MINUS_EXPR:
2099 op0 = TREE_OPERAND (expr, 0);
2100 op1 = TREE_OPERAND (expr, 1);
2101
2102 op0 = strip_offset_1 (op0, false, false, &off0);
2103 op1 = strip_offset_1 (op1, false, false, &off1);
2104
2105 *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2106 if (op0 == TREE_OPERAND (expr, 0)
2107 && op1 == TREE_OPERAND (expr, 1))
2108 return orig_expr;
2109
2110 if (integer_zerop (op1))
2111 expr = op0;
2112 else if (integer_zerop (op0))
2113 {
2114 if (code == MINUS_EXPR)
2115 expr = fold_build1 (NEGATE_EXPR, type, op1);
2116 else
2117 expr = op1;
2118 }
2119 else
2120 expr = fold_build2 (code, type, op0, op1);
2121
2122 return fold_convert (orig_type, expr);
2123
2124 case MULT_EXPR:
2125 op1 = TREE_OPERAND (expr, 1);
2126 if (!cst_and_fits_in_hwi (op1))
2127 return orig_expr;
2128
2129 op0 = TREE_OPERAND (expr, 0);
2130 op0 = strip_offset_1 (op0, false, false, &off0);
2131 if (op0 == TREE_OPERAND (expr, 0))
2132 return orig_expr;
2133
2134 *offset = off0 * int_cst_value (op1);
2135 if (integer_zerop (op0))
2136 expr = op0;
2137 else
2138 expr = fold_build2 (MULT_EXPR, type, op0, op1);
2139
2140 return fold_convert (orig_type, expr);
2141
2142 case ARRAY_REF:
2143 case ARRAY_RANGE_REF:
2144 if (!inside_addr)
2145 return orig_expr;
2146
2147 step = array_ref_element_size (expr);
2148 if (!cst_and_fits_in_hwi (step))
2149 break;
2150
2151 st = int_cst_value (step);
2152 op1 = TREE_OPERAND (expr, 1);
2153 op1 = strip_offset_1 (op1, false, false, &off1);
2154 *offset = off1 * st;
2155
2156 if (top_compref
2157 && integer_zerop (op1))
2158 {
2159 /* Strip the component reference completely. */
2160 op0 = TREE_OPERAND (expr, 0);
2161 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2162 *offset += off0;
2163 return op0;
2164 }
2165 break;
2166
2167 case COMPONENT_REF:
2168 {
2169 tree field;
2170
2171 if (!inside_addr)
2172 return orig_expr;
2173
2174 tmp = component_ref_field_offset (expr);
2175 field = TREE_OPERAND (expr, 1);
2176 if (top_compref
2177 && cst_and_fits_in_hwi (tmp)
2178 && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2179 {
2180 HOST_WIDE_INT boffset, abs_off;
2181
2182 /* Strip the component reference completely. */
2183 op0 = TREE_OPERAND (expr, 0);
2184 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2185 boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2186 abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2187 if (boffset < 0)
2188 abs_off = -abs_off;
2189
2190 *offset = off0 + int_cst_value (tmp) + abs_off;
2191 return op0;
2192 }
2193 }
2194 break;
2195
2196 case ADDR_EXPR:
2197 op0 = TREE_OPERAND (expr, 0);
2198 op0 = strip_offset_1 (op0, true, true, &off0);
2199 *offset += off0;
2200
2201 if (op0 == TREE_OPERAND (expr, 0))
2202 return orig_expr;
2203
2204 expr = build_fold_addr_expr (op0);
2205 return fold_convert (orig_type, expr);
2206
2207 case MEM_REF:
2208 /* ??? Offset operand? */
2209 inside_addr = false;
2210 break;
2211
2212 default:
2213 return orig_expr;
2214 }
2215
2216 /* Default handling of expressions for that we want to recurse into
2217 the first operand. */
2218 op0 = TREE_OPERAND (expr, 0);
2219 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2220 *offset += off0;
2221
2222 if (op0 == TREE_OPERAND (expr, 0)
2223 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2224 return orig_expr;
2225
2226 expr = copy_node (expr);
2227 TREE_OPERAND (expr, 0) = op0;
2228 if (op1)
2229 TREE_OPERAND (expr, 1) = op1;
2230
2231 /* Inside address, we might strip the top level component references,
2232 thus changing type of the expression. Handling of ADDR_EXPR
2233 will fix that. */
2234 expr = fold_convert (orig_type, expr);
2235
2236 return expr;
2237 }
2238
2239 /* Strips constant offsets from EXPR and stores them to OFFSET. */
2240
2241 static tree
2242 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2243 {
2244 HOST_WIDE_INT off;
2245 tree core = strip_offset_1 (expr, false, false, &off);
2246 *offset = off;
2247 return core;
2248 }
2249
2250 /* Returns variant of TYPE that can be used as base for different uses.
2251 We return unsigned type with the same precision, which avoids problems
2252 with overflows. */
2253
2254 static tree
2255 generic_type_for (tree type)
2256 {
2257 if (POINTER_TYPE_P (type))
2258 return unsigned_type_for (type);
2259
2260 if (TYPE_UNSIGNED (type))
2261 return type;
2262
2263 return unsigned_type_for (type);
2264 }
2265
2266 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2267 the bitmap to that we should store it. */
2268
2269 static struct ivopts_data *fd_ivopts_data;
2270 static tree
2271 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2272 {
2273 bitmap *depends_on = (bitmap *) data;
2274 struct version_info *info;
2275
2276 if (TREE_CODE (*expr_p) != SSA_NAME)
2277 return NULL_TREE;
2278 info = name_info (fd_ivopts_data, *expr_p);
2279
2280 if (!info->inv_id || info->has_nonlin_use)
2281 return NULL_TREE;
2282
2283 if (!*depends_on)
2284 *depends_on = BITMAP_ALLOC (NULL);
2285 bitmap_set_bit (*depends_on, info->inv_id);
2286
2287 return NULL_TREE;
2288 }
2289
2290 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2291 position to POS. If USE is not NULL, the candidate is set as related to
2292 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
2293 replacement of the final value of the iv by a direct computation. */
2294
2295 static struct iv_cand *
2296 add_candidate_1 (struct ivopts_data *data,
2297 tree base, tree step, bool important, enum iv_position pos,
2298 struct iv_use *use, gimple incremented_at)
2299 {
2300 unsigned i;
2301 struct iv_cand *cand = NULL;
2302 tree type, orig_type;
2303
2304 /* For non-original variables, make sure their values are computed in a type
2305 that does not invoke undefined behavior on overflows (since in general,
2306 we cannot prove that these induction variables are non-wrapping). */
2307 if (pos != IP_ORIGINAL)
2308 {
2309 orig_type = TREE_TYPE (base);
2310 type = generic_type_for (orig_type);
2311 if (type != orig_type)
2312 {
2313 base = fold_convert (type, base);
2314 step = fold_convert (type, step);
2315 }
2316 }
2317
2318 for (i = 0; i < n_iv_cands (data); i++)
2319 {
2320 cand = iv_cand (data, i);
2321
2322 if (cand->pos != pos)
2323 continue;
2324
2325 if (cand->incremented_at != incremented_at
2326 || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2327 && cand->ainc_use != use))
2328 continue;
2329
2330 if (!cand->iv)
2331 {
2332 if (!base && !step)
2333 break;
2334
2335 continue;
2336 }
2337
2338 if (!base && !step)
2339 continue;
2340
2341 if (operand_equal_p (base, cand->iv->base, 0)
2342 && operand_equal_p (step, cand->iv->step, 0)
2343 && (TYPE_PRECISION (TREE_TYPE (base))
2344 == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
2345 break;
2346 }
2347
2348 if (i == n_iv_cands (data))
2349 {
2350 cand = XCNEW (struct iv_cand);
2351 cand->id = i;
2352
2353 if (!base && !step)
2354 cand->iv = NULL;
2355 else
2356 cand->iv = alloc_iv (base, step);
2357
2358 cand->pos = pos;
2359 if (pos != IP_ORIGINAL && cand->iv)
2360 {
2361 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2362 cand->var_after = cand->var_before;
2363 }
2364 cand->important = important;
2365 cand->incremented_at = incremented_at;
2366 data->iv_candidates.safe_push (cand);
2367
2368 if (step
2369 && TREE_CODE (step) != INTEGER_CST)
2370 {
2371 fd_ivopts_data = data;
2372 walk_tree (&step, find_depends, &cand->depends_on, NULL);
2373 }
2374
2375 if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2376 cand->ainc_use = use;
2377 else
2378 cand->ainc_use = NULL;
2379
2380 if (dump_file && (dump_flags & TDF_DETAILS))
2381 dump_cand (dump_file, cand);
2382 }
2383
2384 if (important && !cand->important)
2385 {
2386 cand->important = true;
2387 if (dump_file && (dump_flags & TDF_DETAILS))
2388 fprintf (dump_file, "Candidate %d is important\n", cand->id);
2389 }
2390
2391 if (use)
2392 {
2393 bitmap_set_bit (use->related_cands, i);
2394 if (dump_file && (dump_flags & TDF_DETAILS))
2395 fprintf (dump_file, "Candidate %d is related to use %d\n",
2396 cand->id, use->id);
2397 }
2398
2399 return cand;
2400 }
2401
2402 /* Returns true if incrementing the induction variable at the end of the LOOP
2403 is allowed.
2404
2405 The purpose is to avoid splitting latch edge with a biv increment, thus
2406 creating a jump, possibly confusing other optimization passes and leaving
2407 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2408 is not available (so we do not have a better alternative), or if the latch
2409 edge is already nonempty. */
2410
2411 static bool
2412 allow_ip_end_pos_p (struct loop *loop)
2413 {
2414 if (!ip_normal_pos (loop))
2415 return true;
2416
2417 if (!empty_block_p (ip_end_pos (loop)))
2418 return true;
2419
2420 return false;
2421 }
2422
2423 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2424 Important field is set to IMPORTANT. */
2425
2426 static void
2427 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2428 bool important, struct iv_use *use)
2429 {
2430 basic_block use_bb = gimple_bb (use->stmt);
2431 enum machine_mode mem_mode;
2432 unsigned HOST_WIDE_INT cstepi;
2433
2434 /* If we insert the increment in any position other than the standard
2435 ones, we must ensure that it is incremented once per iteration.
2436 It must not be in an inner nested loop, or one side of an if
2437 statement. */
2438 if (use_bb->loop_father != data->current_loop
2439 || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2440 || stmt_could_throw_p (use->stmt)
2441 || !cst_and_fits_in_hwi (step))
2442 return;
2443
2444 cstepi = int_cst_value (step);
2445
2446 mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2447 if (((USE_LOAD_PRE_INCREMENT (mem_mode)
2448 || USE_STORE_PRE_INCREMENT (mem_mode))
2449 && GET_MODE_SIZE (mem_mode) == cstepi)
2450 || ((USE_LOAD_PRE_DECREMENT (mem_mode)
2451 || USE_STORE_PRE_DECREMENT (mem_mode))
2452 && GET_MODE_SIZE (mem_mode) == -cstepi))
2453 {
2454 enum tree_code code = MINUS_EXPR;
2455 tree new_base;
2456 tree new_step = step;
2457
2458 if (POINTER_TYPE_P (TREE_TYPE (base)))
2459 {
2460 new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2461 code = POINTER_PLUS_EXPR;
2462 }
2463 else
2464 new_step = fold_convert (TREE_TYPE (base), new_step);
2465 new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2466 add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2467 use->stmt);
2468 }
2469 if (((USE_LOAD_POST_INCREMENT (mem_mode)
2470 || USE_STORE_POST_INCREMENT (mem_mode))
2471 && GET_MODE_SIZE (mem_mode) == cstepi)
2472 || ((USE_LOAD_POST_DECREMENT (mem_mode)
2473 || USE_STORE_POST_DECREMENT (mem_mode))
2474 && GET_MODE_SIZE (mem_mode) == -cstepi))
2475 {
2476 add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
2477 use->stmt);
2478 }
2479 }
2480
2481 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2482 position to POS. If USE is not NULL, the candidate is set as related to
2483 it. The candidate computation is scheduled on all available positions. */
2484
2485 static void
2486 add_candidate (struct ivopts_data *data,
2487 tree base, tree step, bool important, struct iv_use *use)
2488 {
2489 if (ip_normal_pos (data->current_loop))
2490 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL);
2491 if (ip_end_pos (data->current_loop)
2492 && allow_ip_end_pos_p (data->current_loop))
2493 add_candidate_1 (data, base, step, important, IP_END, use, NULL);
2494
2495 if (use != NULL && use->type == USE_ADDRESS)
2496 add_autoinc_candidates (data, base, step, important, use);
2497 }
2498
2499 /* Adds standard iv candidates. */
2500
2501 static void
2502 add_standard_iv_candidates (struct ivopts_data *data)
2503 {
2504 add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
2505
2506 /* The same for a double-integer type if it is still fast enough. */
2507 if (TYPE_PRECISION
2508 (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
2509 && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
2510 add_candidate (data, build_int_cst (long_integer_type_node, 0),
2511 build_int_cst (long_integer_type_node, 1), true, NULL);
2512
2513 /* The same for a double-integer type if it is still fast enough. */
2514 if (TYPE_PRECISION
2515 (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
2516 && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
2517 add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
2518 build_int_cst (long_long_integer_type_node, 1), true, NULL);
2519 }
2520
2521
2522 /* Adds candidates bases on the old induction variable IV. */
2523
2524 static void
2525 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2526 {
2527 gimple phi;
2528 tree def;
2529 struct iv_cand *cand;
2530
2531 add_candidate (data, iv->base, iv->step, true, NULL);
2532
2533 /* The same, but with initial value zero. */
2534 if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
2535 add_candidate (data, size_int (0), iv->step, true, NULL);
2536 else
2537 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2538 iv->step, true, NULL);
2539
2540 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2541 if (gimple_code (phi) == GIMPLE_PHI)
2542 {
2543 /* Additionally record the possibility of leaving the original iv
2544 untouched. */
2545 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2546 /* Don't add candidate if it's from another PHI node because
2547 it's an affine iv appearing in the form of PEELED_CHREC. */
2548 phi = SSA_NAME_DEF_STMT (def);
2549 if (gimple_code (phi) != GIMPLE_PHI)
2550 {
2551 cand = add_candidate_1 (data,
2552 iv->base, iv->step, true, IP_ORIGINAL, NULL,
2553 SSA_NAME_DEF_STMT (def));
2554 cand->var_before = iv->ssa_name;
2555 cand->var_after = def;
2556 }
2557 else
2558 gcc_assert (gimple_bb (phi) == data->current_loop->header);
2559 }
2560 }
2561
2562 /* Adds candidates based on the old induction variables. */
2563
2564 static void
2565 add_old_ivs_candidates (struct ivopts_data *data)
2566 {
2567 unsigned i;
2568 struct iv *iv;
2569 bitmap_iterator bi;
2570
2571 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2572 {
2573 iv = ver_info (data, i)->iv;
2574 if (iv && iv->biv_p && !integer_zerop (iv->step))
2575 add_old_iv_candidates (data, iv);
2576 }
2577 }
2578
2579 /* Adds candidates based on the value of the induction variable IV and USE. */
2580
2581 static void
2582 add_iv_value_candidates (struct ivopts_data *data,
2583 struct iv *iv, struct iv_use *use)
2584 {
2585 unsigned HOST_WIDE_INT offset;
2586 tree base;
2587 tree basetype;
2588
2589 add_candidate (data, iv->base, iv->step, false, use);
2590
2591 /* The same, but with initial value zero. Make such variable important,
2592 since it is generic enough so that possibly many uses may be based
2593 on it. */
2594 basetype = TREE_TYPE (iv->base);
2595 if (POINTER_TYPE_P (basetype))
2596 basetype = sizetype;
2597 add_candidate (data, build_int_cst (basetype, 0),
2598 iv->step, true, use);
2599
2600 /* Third, try removing the constant offset. Make sure to even
2601 add a candidate for &a[0] vs. (T *)&a. */
2602 base = strip_offset (iv->base, &offset);
2603 if (offset
2604 || base != iv->base)
2605 add_candidate (data, base, iv->step, false, use);
2606 }
2607
2608 /* Adds candidates based on the uses. */
2609
2610 static void
2611 add_derived_ivs_candidates (struct ivopts_data *data)
2612 {
2613 unsigned i;
2614
2615 for (i = 0; i < n_iv_uses (data); i++)
2616 {
2617 struct iv_use *use = iv_use (data, i);
2618
2619 if (!use)
2620 continue;
2621
2622 switch (use->type)
2623 {
2624 case USE_NONLINEAR_EXPR:
2625 case USE_COMPARE:
2626 case USE_ADDRESS:
2627 /* Just add the ivs based on the value of the iv used here. */
2628 add_iv_value_candidates (data, use->iv, use);
2629 break;
2630
2631 default:
2632 gcc_unreachable ();
2633 }
2634 }
2635 }
2636
2637 /* Record important candidates and add them to related_cands bitmaps
2638 if needed. */
2639
2640 static void
2641 record_important_candidates (struct ivopts_data *data)
2642 {
2643 unsigned i;
2644 struct iv_use *use;
2645
2646 for (i = 0; i < n_iv_cands (data); i++)
2647 {
2648 struct iv_cand *cand = iv_cand (data, i);
2649
2650 if (cand->important)
2651 bitmap_set_bit (data->important_candidates, i);
2652 }
2653
2654 data->consider_all_candidates = (n_iv_cands (data)
2655 <= CONSIDER_ALL_CANDIDATES_BOUND);
2656
2657 if (data->consider_all_candidates)
2658 {
2659 /* We will not need "related_cands" bitmaps in this case,
2660 so release them to decrease peak memory consumption. */
2661 for (i = 0; i < n_iv_uses (data); i++)
2662 {
2663 use = iv_use (data, i);
2664 BITMAP_FREE (use->related_cands);
2665 }
2666 }
2667 else
2668 {
2669 /* Add important candidates to the related_cands bitmaps. */
2670 for (i = 0; i < n_iv_uses (data); i++)
2671 bitmap_ior_into (iv_use (data, i)->related_cands,
2672 data->important_candidates);
2673 }
2674 }
2675
2676 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2677 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2678 we allocate a simple list to every use. */
2679
2680 static void
2681 alloc_use_cost_map (struct ivopts_data *data)
2682 {
2683 unsigned i, size, s;
2684
2685 for (i = 0; i < n_iv_uses (data); i++)
2686 {
2687 struct iv_use *use = iv_use (data, i);
2688
2689 if (data->consider_all_candidates)
2690 size = n_iv_cands (data);
2691 else
2692 {
2693 s = bitmap_count_bits (use->related_cands);
2694
2695 /* Round up to the power of two, so that moduling by it is fast. */
2696 size = s ? (1 << ceil_log2 (s)) : 1;
2697 }
2698
2699 use->n_map_members = size;
2700 use->cost_map = XCNEWVEC (struct cost_pair, size);
2701 }
2702 }
2703
2704 /* Returns description of computation cost of expression whose runtime
2705 cost is RUNTIME and complexity corresponds to COMPLEXITY. */
2706
2707 static comp_cost
2708 new_cost (unsigned runtime, unsigned complexity)
2709 {
2710 comp_cost cost;
2711
2712 cost.cost = runtime;
2713 cost.complexity = complexity;
2714
2715 return cost;
2716 }
2717
2718 /* Adds costs COST1 and COST2. */
2719
2720 static comp_cost
2721 add_costs (comp_cost cost1, comp_cost cost2)
2722 {
2723 cost1.cost += cost2.cost;
2724 cost1.complexity += cost2.complexity;
2725
2726 return cost1;
2727 }
2728 /* Subtracts costs COST1 and COST2. */
2729
2730 static comp_cost
2731 sub_costs (comp_cost cost1, comp_cost cost2)
2732 {
2733 cost1.cost -= cost2.cost;
2734 cost1.complexity -= cost2.complexity;
2735
2736 return cost1;
2737 }
2738
2739 /* Returns a negative number if COST1 < COST2, a positive number if
2740 COST1 > COST2, and 0 if COST1 = COST2. */
2741
2742 static int
2743 compare_costs (comp_cost cost1, comp_cost cost2)
2744 {
2745 if (cost1.cost == cost2.cost)
2746 return cost1.complexity - cost2.complexity;
2747
2748 return cost1.cost - cost2.cost;
2749 }
2750
2751 /* Returns true if COST is infinite. */
2752
2753 static bool
2754 infinite_cost_p (comp_cost cost)
2755 {
2756 return cost.cost == INFTY;
2757 }
2758
2759 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2760 on invariants DEPENDS_ON and that the value used in expressing it
2761 is VALUE, and in case of iv elimination the comparison operator is COMP. */
2762
2763 static void
2764 set_use_iv_cost (struct ivopts_data *data,
2765 struct iv_use *use, struct iv_cand *cand,
2766 comp_cost cost, bitmap depends_on, tree value,
2767 enum tree_code comp, int inv_expr_id)
2768 {
2769 unsigned i, s;
2770
2771 if (infinite_cost_p (cost))
2772 {
2773 BITMAP_FREE (depends_on);
2774 return;
2775 }
2776
2777 if (data->consider_all_candidates)
2778 {
2779 use->cost_map[cand->id].cand = cand;
2780 use->cost_map[cand->id].cost = cost;
2781 use->cost_map[cand->id].depends_on = depends_on;
2782 use->cost_map[cand->id].value = value;
2783 use->cost_map[cand->id].comp = comp;
2784 use->cost_map[cand->id].inv_expr_id = inv_expr_id;
2785 return;
2786 }
2787
2788 /* n_map_members is a power of two, so this computes modulo. */
2789 s = cand->id & (use->n_map_members - 1);
2790 for (i = s; i < use->n_map_members; i++)
2791 if (!use->cost_map[i].cand)
2792 goto found;
2793 for (i = 0; i < s; i++)
2794 if (!use->cost_map[i].cand)
2795 goto found;
2796
2797 gcc_unreachable ();
2798
2799 found:
2800 use->cost_map[i].cand = cand;
2801 use->cost_map[i].cost = cost;
2802 use->cost_map[i].depends_on = depends_on;
2803 use->cost_map[i].value = value;
2804 use->cost_map[i].comp = comp;
2805 use->cost_map[i].inv_expr_id = inv_expr_id;
2806 }
2807
2808 /* Gets cost of (USE, CANDIDATE) pair. */
2809
2810 static struct cost_pair *
2811 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2812 struct iv_cand *cand)
2813 {
2814 unsigned i, s;
2815 struct cost_pair *ret;
2816
2817 if (!cand)
2818 return NULL;
2819
2820 if (data->consider_all_candidates)
2821 {
2822 ret = use->cost_map + cand->id;
2823 if (!ret->cand)
2824 return NULL;
2825
2826 return ret;
2827 }
2828
2829 /* n_map_members is a power of two, so this computes modulo. */
2830 s = cand->id & (use->n_map_members - 1);
2831 for (i = s; i < use->n_map_members; i++)
2832 if (use->cost_map[i].cand == cand)
2833 return use->cost_map + i;
2834 else if (use->cost_map[i].cand == NULL)
2835 return NULL;
2836 for (i = 0; i < s; i++)
2837 if (use->cost_map[i].cand == cand)
2838 return use->cost_map + i;
2839 else if (use->cost_map[i].cand == NULL)
2840 return NULL;
2841
2842 return NULL;
2843 }
2844
2845 /* Returns estimate on cost of computing SEQ. */
2846
2847 static unsigned
2848 seq_cost (rtx seq, bool speed)
2849 {
2850 unsigned cost = 0;
2851 rtx set;
2852
2853 for (; seq; seq = NEXT_INSN (seq))
2854 {
2855 set = single_set (seq);
2856 if (set)
2857 cost += set_src_cost (SET_SRC (set), speed);
2858 else
2859 cost++;
2860 }
2861
2862 return cost;
2863 }
2864
2865 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2866 static rtx
2867 produce_memory_decl_rtl (tree obj, int *regno)
2868 {
2869 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
2870 enum machine_mode address_mode = targetm.addr_space.address_mode (as);
2871 rtx x;
2872
2873 gcc_assert (obj);
2874 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2875 {
2876 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2877 x = gen_rtx_SYMBOL_REF (address_mode, name);
2878 SET_SYMBOL_REF_DECL (x, obj);
2879 x = gen_rtx_MEM (DECL_MODE (obj), x);
2880 set_mem_addr_space (x, as);
2881 targetm.encode_section_info (obj, x, true);
2882 }
2883 else
2884 {
2885 x = gen_raw_REG (address_mode, (*regno)++);
2886 x = gen_rtx_MEM (DECL_MODE (obj), x);
2887 set_mem_addr_space (x, as);
2888 }
2889
2890 return x;
2891 }
2892
2893 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2894 walk_tree. DATA contains the actual fake register number. */
2895
2896 static tree
2897 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2898 {
2899 tree obj = NULL_TREE;
2900 rtx x = NULL_RTX;
2901 int *regno = (int *) data;
2902
2903 switch (TREE_CODE (*expr_p))
2904 {
2905 case ADDR_EXPR:
2906 for (expr_p = &TREE_OPERAND (*expr_p, 0);
2907 handled_component_p (*expr_p);
2908 expr_p = &TREE_OPERAND (*expr_p, 0))
2909 continue;
2910 obj = *expr_p;
2911 if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
2912 x = produce_memory_decl_rtl (obj, regno);
2913 break;
2914
2915 case SSA_NAME:
2916 *ws = 0;
2917 obj = SSA_NAME_VAR (*expr_p);
2918 /* Defer handling of anonymous SSA_NAMEs to the expander. */
2919 if (!obj)
2920 return NULL_TREE;
2921 if (!DECL_RTL_SET_P (obj))
2922 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2923 break;
2924
2925 case VAR_DECL:
2926 case PARM_DECL:
2927 case RESULT_DECL:
2928 *ws = 0;
2929 obj = *expr_p;
2930
2931 if (DECL_RTL_SET_P (obj))
2932 break;
2933
2934 if (DECL_MODE (obj) == BLKmode)
2935 x = produce_memory_decl_rtl (obj, regno);
2936 else
2937 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2938
2939 break;
2940
2941 default:
2942 break;
2943 }
2944
2945 if (x)
2946 {
2947 decl_rtl_to_reset.safe_push (obj);
2948 SET_DECL_RTL (obj, x);
2949 }
2950
2951 return NULL_TREE;
2952 }
2953
2954 /* Determines cost of the computation of EXPR. */
2955
2956 static unsigned
2957 computation_cost (tree expr, bool speed)
2958 {
2959 rtx seq, rslt;
2960 tree type = TREE_TYPE (expr);
2961 unsigned cost;
2962 /* Avoid using hard regs in ways which may be unsupported. */
2963 int regno = LAST_VIRTUAL_REGISTER + 1;
2964 struct cgraph_node *node = cgraph_node::get (current_function_decl);
2965 enum node_frequency real_frequency = node->frequency;
2966
2967 node->frequency = NODE_FREQUENCY_NORMAL;
2968 crtl->maybe_hot_insn_p = speed;
2969 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2970 start_sequence ();
2971 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2972 seq = get_insns ();
2973 end_sequence ();
2974 default_rtl_profile ();
2975 node->frequency = real_frequency;
2976
2977 cost = seq_cost (seq, speed);
2978 if (MEM_P (rslt))
2979 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
2980 TYPE_ADDR_SPACE (type), speed);
2981 else if (!REG_P (rslt))
2982 cost += set_src_cost (rslt, speed);
2983
2984 return cost;
2985 }
2986
2987 /* Returns variable containing the value of candidate CAND at statement AT. */
2988
2989 static tree
2990 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple stmt)
2991 {
2992 if (stmt_after_increment (loop, cand, stmt))
2993 return cand->var_after;
2994 else
2995 return cand->var_before;
2996 }
2997
2998 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2999 same precision that is at least as wide as the precision of TYPE, stores
3000 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
3001 type of A and B. */
3002
3003 static tree
3004 determine_common_wider_type (tree *a, tree *b)
3005 {
3006 tree wider_type = NULL;
3007 tree suba, subb;
3008 tree atype = TREE_TYPE (*a);
3009
3010 if (CONVERT_EXPR_P (*a))
3011 {
3012 suba = TREE_OPERAND (*a, 0);
3013 wider_type = TREE_TYPE (suba);
3014 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
3015 return atype;
3016 }
3017 else
3018 return atype;
3019
3020 if (CONVERT_EXPR_P (*b))
3021 {
3022 subb = TREE_OPERAND (*b, 0);
3023 if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
3024 return atype;
3025 }
3026 else
3027 return atype;
3028
3029 *a = suba;
3030 *b = subb;
3031 return wider_type;
3032 }
3033
3034 /* Determines the expression by that USE is expressed from induction variable
3035 CAND at statement AT in LOOP. The expression is stored in a decomposed
3036 form into AFF. Returns false if USE cannot be expressed using CAND. */
3037
3038 static bool
3039 get_computation_aff (struct loop *loop,
3040 struct iv_use *use, struct iv_cand *cand, gimple at,
3041 struct aff_tree *aff)
3042 {
3043 tree ubase = use->iv->base;
3044 tree ustep = use->iv->step;
3045 tree cbase = cand->iv->base;
3046 tree cstep = cand->iv->step, cstep_common;
3047 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3048 tree common_type, var;
3049 tree uutype;
3050 aff_tree cbase_aff, var_aff;
3051 widest_int rat;
3052
3053 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3054 {
3055 /* We do not have a precision to express the values of use. */
3056 return false;
3057 }
3058
3059 var = var_at_stmt (loop, cand, at);
3060 uutype = unsigned_type_for (utype);
3061
3062 /* If the conversion is not noop, perform it. */
3063 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3064 {
3065 cstep = fold_convert (uutype, cstep);
3066 cbase = fold_convert (uutype, cbase);
3067 var = fold_convert (uutype, var);
3068 }
3069
3070 if (!constant_multiple_of (ustep, cstep, &rat))
3071 return false;
3072
3073 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3074 type, we achieve better folding by computing their difference in this
3075 wider type, and cast the result to UUTYPE. We do not need to worry about
3076 overflows, as all the arithmetics will in the end be performed in UUTYPE
3077 anyway. */
3078 common_type = determine_common_wider_type (&ubase, &cbase);
3079
3080 /* use = ubase - ratio * cbase + ratio * var. */
3081 tree_to_aff_combination (ubase, common_type, aff);
3082 tree_to_aff_combination (cbase, common_type, &cbase_aff);
3083 tree_to_aff_combination (var, uutype, &var_aff);
3084
3085 /* We need to shift the value if we are after the increment. */
3086 if (stmt_after_increment (loop, cand, at))
3087 {
3088 aff_tree cstep_aff;
3089
3090 if (common_type != uutype)
3091 cstep_common = fold_convert (common_type, cstep);
3092 else
3093 cstep_common = cstep;
3094
3095 tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
3096 aff_combination_add (&cbase_aff, &cstep_aff);
3097 }
3098
3099 aff_combination_scale (&cbase_aff, -rat);
3100 aff_combination_add (aff, &cbase_aff);
3101 if (common_type != uutype)
3102 aff_combination_convert (aff, uutype);
3103
3104 aff_combination_scale (&var_aff, rat);
3105 aff_combination_add (aff, &var_aff);
3106
3107 return true;
3108 }
3109
3110 /* Return the type of USE. */
3111
3112 static tree
3113 get_use_type (struct iv_use *use)
3114 {
3115 tree base_type = TREE_TYPE (use->iv->base);
3116 tree type;
3117
3118 if (use->type == USE_ADDRESS)
3119 {
3120 /* The base_type may be a void pointer. Create a pointer type based on
3121 the mem_ref instead. */
3122 type = build_pointer_type (TREE_TYPE (*use->op_p));
3123 gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
3124 == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
3125 }
3126 else
3127 type = base_type;
3128
3129 return type;
3130 }
3131
3132 /* Determines the expression by that USE is expressed from induction variable
3133 CAND at statement AT in LOOP. The computation is unshared. */
3134
3135 static tree
3136 get_computation_at (struct loop *loop,
3137 struct iv_use *use, struct iv_cand *cand, gimple at)
3138 {
3139 aff_tree aff;
3140 tree type = get_use_type (use);
3141
3142 if (!get_computation_aff (loop, use, cand, at, &aff))
3143 return NULL_TREE;
3144 unshare_aff_combination (&aff);
3145 return fold_convert (type, aff_combination_to_tree (&aff));
3146 }
3147
3148 /* Determines the expression by that USE is expressed from induction variable
3149 CAND in LOOP. The computation is unshared. */
3150
3151 static tree
3152 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3153 {
3154 return get_computation_at (loop, use, cand, use->stmt);
3155 }
3156
3157 /* Adjust the cost COST for being in loop setup rather than loop body.
3158 If we're optimizing for space, the loop setup overhead is constant;
3159 if we're optimizing for speed, amortize it over the per-iteration cost. */
3160 static unsigned
3161 adjust_setup_cost (struct ivopts_data *data, unsigned cost)
3162 {
3163 if (cost == INFTY)
3164 return cost;
3165 else if (optimize_loop_for_speed_p (data->current_loop))
3166 return cost / avg_loop_niter (data->current_loop);
3167 else
3168 return cost;
3169 }
3170
3171 /* Returns true if multiplying by RATIO is allowed in an address. Test the
3172 validity for a memory reference accessing memory of mode MODE in
3173 address space AS. */
3174
3175
3176 bool
3177 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode,
3178 addr_space_t as)
3179 {
3180 #define MAX_RATIO 128
3181 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3182 static vec<sbitmap> valid_mult_list;
3183 sbitmap valid_mult;
3184
3185 if (data_index >= valid_mult_list.length ())
3186 valid_mult_list.safe_grow_cleared (data_index + 1);
3187
3188 valid_mult = valid_mult_list[data_index];
3189 if (!valid_mult)
3190 {
3191 enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3192 rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3193 rtx reg2 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3194 rtx addr, scaled;
3195 HOST_WIDE_INT i;
3196
3197 valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3198 bitmap_clear (valid_mult);
3199 scaled = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3200 addr = gen_rtx_fmt_ee (PLUS, address_mode, scaled, reg2);
3201 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3202 {
3203 XEXP (scaled, 1) = gen_int_mode (i, address_mode);
3204 if (memory_address_addr_space_p (mode, addr, as)
3205 || memory_address_addr_space_p (mode, scaled, as))
3206 bitmap_set_bit (valid_mult, i + MAX_RATIO);
3207 }
3208
3209 if (dump_file && (dump_flags & TDF_DETAILS))
3210 {
3211 fprintf (dump_file, " allowed multipliers:");
3212 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3213 if (bitmap_bit_p (valid_mult, i + MAX_RATIO))
3214 fprintf (dump_file, " %d", (int) i);
3215 fprintf (dump_file, "\n");
3216 fprintf (dump_file, "\n");
3217 }
3218
3219 valid_mult_list[data_index] = valid_mult;
3220 }
3221
3222 if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3223 return false;
3224
3225 return bitmap_bit_p (valid_mult, ratio + MAX_RATIO);
3226 }
3227
3228 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3229 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3230 variable is omitted. Compute the cost for a memory reference that accesses
3231 a memory location of mode MEM_MODE in address space AS.
3232
3233 MAY_AUTOINC is set to true if the autoincrement (increasing index by
3234 size of MEM_MODE / RATIO) is available. To make this determination, we
3235 look at the size of the increment to be made, which is given in CSTEP.
3236 CSTEP may be zero if the step is unknown.
3237 STMT_AFTER_INC is true iff the statement we're looking at is after the
3238 increment of the original biv.
3239
3240 TODO -- there must be some better way. This all is quite crude. */
3241
3242 enum ainc_type
3243 {
3244 AINC_PRE_INC, /* Pre increment. */
3245 AINC_PRE_DEC, /* Pre decrement. */
3246 AINC_POST_INC, /* Post increment. */
3247 AINC_POST_DEC, /* Post decrement. */
3248 AINC_NONE /* Also the number of auto increment types. */
3249 };
3250
3251 typedef struct address_cost_data_s
3252 {
3253 HOST_WIDE_INT min_offset, max_offset;
3254 unsigned costs[2][2][2][2];
3255 unsigned ainc_costs[AINC_NONE];
3256 } *address_cost_data;
3257
3258
3259 static comp_cost
3260 get_address_cost (bool symbol_present, bool var_present,
3261 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3262 HOST_WIDE_INT cstep, enum machine_mode mem_mode,
3263 addr_space_t as, bool speed,
3264 bool stmt_after_inc, bool *may_autoinc)
3265 {
3266 enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3267 static vec<address_cost_data> address_cost_data_list;
3268 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3269 address_cost_data data;
3270 static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3271 static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3272 unsigned cost, acost, complexity;
3273 enum ainc_type autoinc_type;
3274 bool offset_p, ratio_p, autoinc;
3275 HOST_WIDE_INT s_offset, autoinc_offset, msize;
3276 unsigned HOST_WIDE_INT mask;
3277 unsigned bits;
3278
3279 if (data_index >= address_cost_data_list.length ())
3280 address_cost_data_list.safe_grow_cleared (data_index + 1);
3281
3282 data = address_cost_data_list[data_index];
3283 if (!data)
3284 {
3285 HOST_WIDE_INT i;
3286 HOST_WIDE_INT rat, off = 0;
3287 int old_cse_not_expected, width;
3288 unsigned sym_p, var_p, off_p, rat_p, add_c;
3289 rtx seq, addr, base;
3290 rtx reg0, reg1;
3291
3292 data = (address_cost_data) xcalloc (1, sizeof (*data));
3293
3294 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3295
3296 width = GET_MODE_BITSIZE (address_mode) - 1;
3297 if (width > (HOST_BITS_PER_WIDE_INT - 1))
3298 width = HOST_BITS_PER_WIDE_INT - 1;
3299 addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
3300
3301 for (i = width; i >= 0; i--)
3302 {
3303 off = -((unsigned HOST_WIDE_INT) 1 << i);
3304 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3305 if (memory_address_addr_space_p (mem_mode, addr, as))
3306 break;
3307 }
3308 data->min_offset = (i == -1? 0 : off);
3309
3310 for (i = width; i >= 0; i--)
3311 {
3312 off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
3313 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3314 if (memory_address_addr_space_p (mem_mode, addr, as))
3315 break;
3316 /* For some TARGET, like ARM THUMB1, the offset should be nature
3317 aligned. Try an aligned offset if address_mode is not QImode. */
3318 off = (address_mode == QImode)
3319 ? 0
3320 : ((unsigned HOST_WIDE_INT) 1 << i)
3321 - GET_MODE_SIZE (address_mode);
3322 if (off > 0)
3323 {
3324 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3325 if (memory_address_addr_space_p (mem_mode, addr, as))
3326 break;
3327 }
3328 }
3329 if (i == -1)
3330 off = 0;
3331 data->max_offset = off;
3332
3333 if (dump_file && (dump_flags & TDF_DETAILS))
3334 {
3335 fprintf (dump_file, "get_address_cost:\n");
3336 fprintf (dump_file, " min offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3337 GET_MODE_NAME (mem_mode),
3338 data->min_offset);
3339 fprintf (dump_file, " max offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3340 GET_MODE_NAME (mem_mode),
3341 data->max_offset);
3342 }
3343
3344 rat = 1;
3345 for (i = 2; i <= MAX_RATIO; i++)
3346 if (multiplier_allowed_in_address_p (i, mem_mode, as))
3347 {
3348 rat = i;
3349 break;
3350 }
3351
3352 /* Compute the cost of various addressing modes. */
3353 acost = 0;
3354 reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3355 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3356
3357 if (USE_LOAD_PRE_DECREMENT (mem_mode)
3358 || USE_STORE_PRE_DECREMENT (mem_mode))
3359 {
3360 addr = gen_rtx_PRE_DEC (address_mode, reg0);
3361 has_predec[mem_mode]
3362 = memory_address_addr_space_p (mem_mode, addr, as);
3363
3364 if (has_predec[mem_mode])
3365 data->ainc_costs[AINC_PRE_DEC]
3366 = address_cost (addr, mem_mode, as, speed);
3367 }
3368 if (USE_LOAD_POST_DECREMENT (mem_mode)
3369 || USE_STORE_POST_DECREMENT (mem_mode))
3370 {
3371 addr = gen_rtx_POST_DEC (address_mode, reg0);
3372 has_postdec[mem_mode]
3373 = memory_address_addr_space_p (mem_mode, addr, as);
3374
3375 if (has_postdec[mem_mode])
3376 data->ainc_costs[AINC_POST_DEC]
3377 = address_cost (addr, mem_mode, as, speed);
3378 }
3379 if (USE_LOAD_PRE_INCREMENT (mem_mode)
3380 || USE_STORE_PRE_DECREMENT (mem_mode))
3381 {
3382 addr = gen_rtx_PRE_INC (address_mode, reg0);
3383 has_preinc[mem_mode]
3384 = memory_address_addr_space_p (mem_mode, addr, as);
3385
3386 if (has_preinc[mem_mode])
3387 data->ainc_costs[AINC_PRE_INC]
3388 = address_cost (addr, mem_mode, as, speed);
3389 }
3390 if (USE_LOAD_POST_INCREMENT (mem_mode)
3391 || USE_STORE_POST_INCREMENT (mem_mode))
3392 {
3393 addr = gen_rtx_POST_INC (address_mode, reg0);
3394 has_postinc[mem_mode]
3395 = memory_address_addr_space_p (mem_mode, addr, as);
3396
3397 if (has_postinc[mem_mode])
3398 data->ainc_costs[AINC_POST_INC]
3399 = address_cost (addr, mem_mode, as, speed);
3400 }
3401 for (i = 0; i < 16; i++)
3402 {
3403 sym_p = i & 1;
3404 var_p = (i >> 1) & 1;
3405 off_p = (i >> 2) & 1;
3406 rat_p = (i >> 3) & 1;
3407
3408 addr = reg0;
3409 if (rat_p)
3410 addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
3411 gen_int_mode (rat, address_mode));
3412
3413 if (var_p)
3414 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
3415
3416 if (sym_p)
3417 {
3418 base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
3419 /* ??? We can run into trouble with some backends by presenting
3420 it with symbols which haven't been properly passed through
3421 targetm.encode_section_info. By setting the local bit, we
3422 enhance the probability of things working. */
3423 SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3424
3425 if (off_p)
3426 base = gen_rtx_fmt_e (CONST, address_mode,
3427 gen_rtx_fmt_ee
3428 (PLUS, address_mode, base,
3429 gen_int_mode (off, address_mode)));
3430 }
3431 else if (off_p)
3432 base = gen_int_mode (off, address_mode);
3433 else
3434 base = NULL_RTX;
3435
3436 if (base)
3437 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
3438
3439 start_sequence ();
3440 /* To avoid splitting addressing modes, pretend that no cse will
3441 follow. */
3442 old_cse_not_expected = cse_not_expected;
3443 cse_not_expected = true;
3444 addr = memory_address_addr_space (mem_mode, addr, as);
3445 cse_not_expected = old_cse_not_expected;
3446 seq = get_insns ();
3447 end_sequence ();
3448
3449 acost = seq_cost (seq, speed);
3450 acost += address_cost (addr, mem_mode, as, speed);
3451
3452 if (!acost)
3453 acost = 1;
3454 data->costs[sym_p][var_p][off_p][rat_p] = acost;
3455 }
3456
3457 /* On some targets, it is quite expensive to load symbol to a register,
3458 which makes addresses that contain symbols look much more expensive.
3459 However, the symbol will have to be loaded in any case before the
3460 loop (and quite likely we have it in register already), so it does not
3461 make much sense to penalize them too heavily. So make some final
3462 tweaks for the SYMBOL_PRESENT modes:
3463
3464 If VAR_PRESENT is false, and the mode obtained by changing symbol to
3465 var is cheaper, use this mode with small penalty.
3466 If VAR_PRESENT is true, try whether the mode with
3467 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3468 if this is the case, use it. */
3469 add_c = add_cost (speed, address_mode);
3470 for (i = 0; i < 8; i++)
3471 {
3472 var_p = i & 1;
3473 off_p = (i >> 1) & 1;
3474 rat_p = (i >> 2) & 1;
3475
3476 acost = data->costs[0][1][off_p][rat_p] + 1;
3477 if (var_p)
3478 acost += add_c;
3479
3480 if (acost < data->costs[1][var_p][off_p][rat_p])
3481 data->costs[1][var_p][off_p][rat_p] = acost;
3482 }
3483
3484 if (dump_file && (dump_flags & TDF_DETAILS))
3485 {
3486 fprintf (dump_file, "Address costs:\n");
3487
3488 for (i = 0; i < 16; i++)
3489 {
3490 sym_p = i & 1;
3491 var_p = (i >> 1) & 1;
3492 off_p = (i >> 2) & 1;
3493 rat_p = (i >> 3) & 1;
3494
3495 fprintf (dump_file, " ");
3496 if (sym_p)
3497 fprintf (dump_file, "sym + ");
3498 if (var_p)
3499 fprintf (dump_file, "var + ");
3500 if (off_p)
3501 fprintf (dump_file, "cst + ");
3502 if (rat_p)
3503 fprintf (dump_file, "rat * ");
3504
3505 acost = data->costs[sym_p][var_p][off_p][rat_p];
3506 fprintf (dump_file, "index costs %d\n", acost);
3507 }
3508 if (has_predec[mem_mode] || has_postdec[mem_mode]
3509 || has_preinc[mem_mode] || has_postinc[mem_mode])
3510 fprintf (dump_file, " May include autoinc/dec\n");
3511 fprintf (dump_file, "\n");
3512 }
3513
3514 address_cost_data_list[data_index] = data;
3515 }
3516
3517 bits = GET_MODE_BITSIZE (address_mode);
3518 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3519 offset &= mask;
3520 if ((offset >> (bits - 1) & 1))
3521 offset |= ~mask;
3522 s_offset = offset;
3523
3524 autoinc = false;
3525 autoinc_type = AINC_NONE;
3526 msize = GET_MODE_SIZE (mem_mode);
3527 autoinc_offset = offset;
3528 if (stmt_after_inc)
3529 autoinc_offset += ratio * cstep;
3530 if (symbol_present || var_present || ratio != 1)
3531 autoinc = false;
3532 else
3533 {
3534 if (has_postinc[mem_mode] && autoinc_offset == 0
3535 && msize == cstep)
3536 autoinc_type = AINC_POST_INC;
3537 else if (has_postdec[mem_mode] && autoinc_offset == 0
3538 && msize == -cstep)
3539 autoinc_type = AINC_POST_DEC;
3540 else if (has_preinc[mem_mode] && autoinc_offset == msize
3541 && msize == cstep)
3542 autoinc_type = AINC_PRE_INC;
3543 else if (has_predec[mem_mode] && autoinc_offset == -msize
3544 && msize == -cstep)
3545 autoinc_type = AINC_PRE_DEC;
3546
3547 if (autoinc_type != AINC_NONE)
3548 autoinc = true;
3549 }
3550
3551 cost = 0;
3552 offset_p = (s_offset != 0
3553 && data->min_offset <= s_offset
3554 && s_offset <= data->max_offset);
3555 ratio_p = (ratio != 1
3556 && multiplier_allowed_in_address_p (ratio, mem_mode, as));
3557
3558 if (ratio != 1 && !ratio_p)
3559 cost += mult_by_coeff_cost (ratio, address_mode, speed);
3560
3561 if (s_offset && !offset_p && !symbol_present)
3562 cost += add_cost (speed, address_mode);
3563
3564 if (may_autoinc)
3565 *may_autoinc = autoinc;
3566 if (autoinc)
3567 acost = data->ainc_costs[autoinc_type];
3568 else
3569 acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
3570 complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3571 return new_cost (cost + acost, complexity);
3572 }
3573
3574 /* Calculate the SPEED or size cost of shiftadd EXPR in MODE. MULT is the
3575 the EXPR operand holding the shift. COST0 and COST1 are the costs for
3576 calculating the operands of EXPR. Returns true if successful, and returns
3577 the cost in COST. */
3578
3579 static bool
3580 get_shiftadd_cost (tree expr, enum machine_mode mode, comp_cost cost0,
3581 comp_cost cost1, tree mult, bool speed, comp_cost *cost)
3582 {
3583 comp_cost res;
3584 tree op1 = TREE_OPERAND (expr, 1);
3585 tree cst = TREE_OPERAND (mult, 1);
3586 tree multop = TREE_OPERAND (mult, 0);
3587 int m = exact_log2 (int_cst_value (cst));
3588 int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
3589 int sa_cost;
3590 bool equal_p = false;
3591
3592 if (!(m >= 0 && m < maxm))
3593 return false;
3594
3595 if (operand_equal_p (op1, mult, 0))
3596 equal_p = true;
3597
3598 sa_cost = (TREE_CODE (expr) != MINUS_EXPR
3599 ? shiftadd_cost (speed, mode, m)
3600 : (equal_p
3601 ? shiftsub1_cost (speed, mode, m)
3602 : shiftsub0_cost (speed, mode, m)));
3603 res = new_cost (sa_cost, 0);
3604 res = add_costs (res, equal_p ? cost0 : cost1);
3605
3606 STRIP_NOPS (multop);
3607 if (!is_gimple_val (multop))
3608 res = add_costs (res, force_expr_to_var_cost (multop, speed));
3609
3610 *cost = res;
3611 return true;
3612 }
3613
3614 /* Estimates cost of forcing expression EXPR into a variable. */
3615
3616 static comp_cost
3617 force_expr_to_var_cost (tree expr, bool speed)
3618 {
3619 static bool costs_initialized = false;
3620 static unsigned integer_cost [2];
3621 static unsigned symbol_cost [2];
3622 static unsigned address_cost [2];
3623 tree op0, op1;
3624 comp_cost cost0, cost1, cost;
3625 enum machine_mode mode;
3626
3627 if (!costs_initialized)
3628 {
3629 tree type = build_pointer_type (integer_type_node);
3630 tree var, addr;
3631 rtx x;
3632 int i;
3633
3634 var = create_tmp_var_raw (integer_type_node, "test_var");
3635 TREE_STATIC (var) = 1;
3636 x = produce_memory_decl_rtl (var, NULL);
3637 SET_DECL_RTL (var, x);
3638
3639 addr = build1 (ADDR_EXPR, type, var);
3640
3641
3642 for (i = 0; i < 2; i++)
3643 {
3644 integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
3645 2000), i);
3646
3647 symbol_cost[i] = computation_cost (addr, i) + 1;
3648
3649 address_cost[i]
3650 = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
3651 if (dump_file && (dump_flags & TDF_DETAILS))
3652 {
3653 fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
3654 fprintf (dump_file, " integer %d\n", (int) integer_cost[i]);
3655 fprintf (dump_file, " symbol %d\n", (int) symbol_cost[i]);
3656 fprintf (dump_file, " address %d\n", (int) address_cost[i]);
3657 fprintf (dump_file, " other %d\n", (int) target_spill_cost[i]);
3658 fprintf (dump_file, "\n");
3659 }
3660 }
3661
3662 costs_initialized = true;
3663 }
3664
3665 STRIP_NOPS (expr);
3666
3667 if (SSA_VAR_P (expr))
3668 return no_cost;
3669
3670 if (is_gimple_min_invariant (expr))
3671 {
3672 if (TREE_CODE (expr) == INTEGER_CST)
3673 return new_cost (integer_cost [speed], 0);
3674
3675 if (TREE_CODE (expr) == ADDR_EXPR)
3676 {
3677 tree obj = TREE_OPERAND (expr, 0);
3678
3679 if (TREE_CODE (obj) == VAR_DECL
3680 || TREE_CODE (obj) == PARM_DECL
3681 || TREE_CODE (obj) == RESULT_DECL)
3682 return new_cost (symbol_cost [speed], 0);
3683 }
3684
3685 return new_cost (address_cost [speed], 0);
3686 }
3687
3688 switch (TREE_CODE (expr))
3689 {
3690 case POINTER_PLUS_EXPR:
3691 case PLUS_EXPR:
3692 case MINUS_EXPR:
3693 case MULT_EXPR:
3694 op0 = TREE_OPERAND (expr, 0);
3695 op1 = TREE_OPERAND (expr, 1);
3696 STRIP_NOPS (op0);
3697 STRIP_NOPS (op1);
3698 break;
3699
3700 CASE_CONVERT:
3701 case NEGATE_EXPR:
3702 op0 = TREE_OPERAND (expr, 0);
3703 STRIP_NOPS (op0);
3704 op1 = NULL_TREE;
3705 break;
3706
3707 default:
3708 /* Just an arbitrary value, FIXME. */
3709 return new_cost (target_spill_cost[speed], 0);
3710 }
3711
3712 if (op0 == NULL_TREE
3713 || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
3714 cost0 = no_cost;
3715 else
3716 cost0 = force_expr_to_var_cost (op0, speed);
3717
3718 if (op1 == NULL_TREE
3719 || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
3720 cost1 = no_cost;
3721 else
3722 cost1 = force_expr_to_var_cost (op1, speed);
3723
3724 mode = TYPE_MODE (TREE_TYPE (expr));
3725 switch (TREE_CODE (expr))
3726 {
3727 case POINTER_PLUS_EXPR:
3728 case PLUS_EXPR:
3729 case MINUS_EXPR:
3730 case NEGATE_EXPR:
3731 cost = new_cost (add_cost (speed, mode), 0);
3732 if (TREE_CODE (expr) != NEGATE_EXPR)
3733 {
3734 tree mult = NULL_TREE;
3735 comp_cost sa_cost;
3736 if (TREE_CODE (op1) == MULT_EXPR)
3737 mult = op1;
3738 else if (TREE_CODE (op0) == MULT_EXPR)
3739 mult = op0;
3740
3741 if (mult != NULL_TREE
3742 && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
3743 && get_shiftadd_cost (expr, mode, cost0, cost1, mult,
3744 speed, &sa_cost))
3745 return sa_cost;
3746 }
3747 break;
3748
3749 CASE_CONVERT:
3750 {
3751 tree inner_mode, outer_mode;
3752 outer_mode = TREE_TYPE (expr);
3753 inner_mode = TREE_TYPE (op0);
3754 cost = new_cost (convert_cost (TYPE_MODE (outer_mode),
3755 TYPE_MODE (inner_mode), speed), 0);
3756 }
3757 break;
3758
3759 case MULT_EXPR:
3760 if (cst_and_fits_in_hwi (op0))
3761 cost = new_cost (mult_by_coeff_cost (int_cst_value (op0),
3762 mode, speed), 0);
3763 else if (cst_and_fits_in_hwi (op1))
3764 cost = new_cost (mult_by_coeff_cost (int_cst_value (op1),
3765 mode, speed), 0);
3766 else
3767 return new_cost (target_spill_cost [speed], 0);
3768 break;
3769
3770 default:
3771 gcc_unreachable ();
3772 }
3773
3774 cost = add_costs (cost, cost0);
3775 cost = add_costs (cost, cost1);
3776
3777 /* Bound the cost by target_spill_cost. The parts of complicated
3778 computations often are either loop invariant or at least can
3779 be shared between several iv uses, so letting this grow without
3780 limits would not give reasonable results. */
3781 if (cost.cost > (int) target_spill_cost [speed])
3782 cost.cost = target_spill_cost [speed];
3783
3784 return cost;
3785 }
3786
3787 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
3788 invariants the computation depends on. */
3789
3790 static comp_cost
3791 force_var_cost (struct ivopts_data *data,
3792 tree expr, bitmap *depends_on)
3793 {
3794 if (depends_on)
3795 {
3796 fd_ivopts_data = data;
3797 walk_tree (&expr, find_depends, depends_on, NULL);
3798 }
3799
3800 return force_expr_to_var_cost (expr, data->speed);
3801 }
3802
3803 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
3804 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3805 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3806 invariants the computation depends on. */
3807
3808 static comp_cost
3809 split_address_cost (struct ivopts_data *data,
3810 tree addr, bool *symbol_present, bool *var_present,
3811 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3812 {
3813 tree core;
3814 HOST_WIDE_INT bitsize;
3815 HOST_WIDE_INT bitpos;
3816 tree toffset;
3817 enum machine_mode mode;
3818 int unsignedp, volatilep;
3819
3820 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3821 &unsignedp, &volatilep, false);
3822
3823 if (toffset != 0
3824 || bitpos % BITS_PER_UNIT != 0
3825 || TREE_CODE (core) != VAR_DECL)
3826 {
3827 *symbol_present = false;
3828 *var_present = true;
3829 fd_ivopts_data = data;
3830 walk_tree (&addr, find_depends, depends_on, NULL);
3831 return new_cost (target_spill_cost[data->speed], 0);
3832 }
3833
3834 *offset += bitpos / BITS_PER_UNIT;
3835 if (TREE_STATIC (core)
3836 || DECL_EXTERNAL (core))
3837 {
3838 *symbol_present = true;
3839 *var_present = false;
3840 return no_cost;
3841 }
3842
3843 *symbol_present = false;
3844 *var_present = true;
3845 return no_cost;
3846 }
3847
3848 /* Estimates cost of expressing difference of addresses E1 - E2 as
3849 var + symbol + offset. The value of offset is added to OFFSET,
3850 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3851 part is missing. DEPENDS_ON is a set of the invariants the computation
3852 depends on. */
3853
3854 static comp_cost
3855 ptr_difference_cost (struct ivopts_data *data,
3856 tree e1, tree e2, bool *symbol_present, bool *var_present,
3857 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3858 {
3859 HOST_WIDE_INT diff = 0;
3860 aff_tree aff_e1, aff_e2;
3861 tree type;
3862
3863 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3864
3865 if (ptr_difference_const (e1, e2, &diff))
3866 {
3867 *offset += diff;
3868 *symbol_present = false;
3869 *var_present = false;
3870 return no_cost;
3871 }
3872
3873 if (integer_zerop (e2))
3874 return split_address_cost (data, TREE_OPERAND (e1, 0),
3875 symbol_present, var_present, offset, depends_on);
3876
3877 *symbol_present = false;
3878 *var_present = true;
3879
3880 type = signed_type_for (TREE_TYPE (e1));
3881 tree_to_aff_combination (e1, type, &aff_e1);
3882 tree_to_aff_combination (e2, type, &aff_e2);
3883 aff_combination_scale (&aff_e2, -1);
3884 aff_combination_add (&aff_e1, &aff_e2);
3885
3886 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3887 }
3888
3889 /* Estimates cost of expressing difference E1 - E2 as
3890 var + symbol + offset. The value of offset is added to OFFSET,
3891 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3892 part is missing. DEPENDS_ON is a set of the invariants the computation
3893 depends on. */
3894
3895 static comp_cost
3896 difference_cost (struct ivopts_data *data,
3897 tree e1, tree e2, bool *symbol_present, bool *var_present,
3898 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3899 {
3900 enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3901 unsigned HOST_WIDE_INT off1, off2;
3902 aff_tree aff_e1, aff_e2;
3903 tree type;
3904
3905 e1 = strip_offset (e1, &off1);
3906 e2 = strip_offset (e2, &off2);
3907 *offset += off1 - off2;
3908
3909 STRIP_NOPS (e1);
3910 STRIP_NOPS (e2);
3911
3912 if (TREE_CODE (e1) == ADDR_EXPR)
3913 return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
3914 offset, depends_on);
3915 *symbol_present = false;
3916
3917 if (operand_equal_p (e1, e2, 0))
3918 {
3919 *var_present = false;
3920 return no_cost;
3921 }
3922
3923 *var_present = true;
3924
3925 if (integer_zerop (e2))
3926 return force_var_cost (data, e1, depends_on);
3927
3928 if (integer_zerop (e1))
3929 {
3930 comp_cost cost = force_var_cost (data, e2, depends_on);
3931 cost.cost += mult_by_coeff_cost (-1, mode, data->speed);
3932 return cost;
3933 }
3934
3935 type = signed_type_for (TREE_TYPE (e1));
3936 tree_to_aff_combination (e1, type, &aff_e1);
3937 tree_to_aff_combination (e2, type, &aff_e2);
3938 aff_combination_scale (&aff_e2, -1);
3939 aff_combination_add (&aff_e1, &aff_e2);
3940
3941 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3942 }
3943
3944 /* Returns true if AFF1 and AFF2 are identical. */
3945
3946 static bool
3947 compare_aff_trees (aff_tree *aff1, aff_tree *aff2)
3948 {
3949 unsigned i;
3950
3951 if (aff1->n != aff2->n)
3952 return false;
3953
3954 for (i = 0; i < aff1->n; i++)
3955 {
3956 if (aff1->elts[i].coef != aff2->elts[i].coef)
3957 return false;
3958
3959 if (!operand_equal_p (aff1->elts[i].val, aff2->elts[i].val, 0))
3960 return false;
3961 }
3962 return true;
3963 }
3964
3965 /* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id. */
3966
3967 static int
3968 get_expr_id (struct ivopts_data *data, tree expr)
3969 {
3970 struct iv_inv_expr_ent ent;
3971 struct iv_inv_expr_ent **slot;
3972
3973 ent.expr = expr;
3974 ent.hash = iterative_hash_expr (expr, 0);
3975 slot = data->inv_expr_tab->find_slot (&ent, INSERT);
3976 if (*slot)
3977 return (*slot)->id;
3978
3979 *slot = XNEW (struct iv_inv_expr_ent);
3980 (*slot)->expr = expr;
3981 (*slot)->hash = ent.hash;
3982 (*slot)->id = data->inv_expr_id++;
3983 return (*slot)->id;
3984 }
3985
3986 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
3987 requires a new compiler generated temporary. Returns -1 otherwise.
3988 ADDRESS_P is a flag indicating if the expression is for address
3989 computation. */
3990
3991 static int
3992 get_loop_invariant_expr_id (struct ivopts_data *data, tree ubase,
3993 tree cbase, HOST_WIDE_INT ratio,
3994 bool address_p)
3995 {
3996 aff_tree ubase_aff, cbase_aff;
3997 tree expr, ub, cb;
3998
3999 STRIP_NOPS (ubase);
4000 STRIP_NOPS (cbase);
4001 ub = ubase;
4002 cb = cbase;
4003
4004 if ((TREE_CODE (ubase) == INTEGER_CST)
4005 && (TREE_CODE (cbase) == INTEGER_CST))
4006 return -1;
4007
4008 /* Strips the constant part. */
4009 if (TREE_CODE (ubase) == PLUS_EXPR
4010 || TREE_CODE (ubase) == MINUS_EXPR
4011 || TREE_CODE (ubase) == POINTER_PLUS_EXPR)
4012 {
4013 if (TREE_CODE (TREE_OPERAND (ubase, 1)) == INTEGER_CST)
4014 ubase = TREE_OPERAND (ubase, 0);
4015 }
4016
4017 /* Strips the constant part. */
4018 if (TREE_CODE (cbase) == PLUS_EXPR
4019 || TREE_CODE (cbase) == MINUS_EXPR
4020 || TREE_CODE (cbase) == POINTER_PLUS_EXPR)
4021 {
4022 if (TREE_CODE (TREE_OPERAND (cbase, 1)) == INTEGER_CST)
4023 cbase = TREE_OPERAND (cbase, 0);
4024 }
4025
4026 if (address_p)
4027 {
4028 if (((TREE_CODE (ubase) == SSA_NAME)
4029 || (TREE_CODE (ubase) == ADDR_EXPR
4030 && is_gimple_min_invariant (ubase)))
4031 && (TREE_CODE (cbase) == INTEGER_CST))
4032 return -1;
4033
4034 if (((TREE_CODE (cbase) == SSA_NAME)
4035 || (TREE_CODE (cbase) == ADDR_EXPR
4036 && is_gimple_min_invariant (cbase)))
4037 && (TREE_CODE (ubase) == INTEGER_CST))
4038 return -1;
4039 }
4040
4041 if (ratio == 1)
4042 {
4043 if (operand_equal_p (ubase, cbase, 0))
4044 return -1;
4045
4046 if (TREE_CODE (ubase) == ADDR_EXPR
4047 && TREE_CODE (cbase) == ADDR_EXPR)
4048 {
4049 tree usym, csym;
4050
4051 usym = TREE_OPERAND (ubase, 0);
4052 csym = TREE_OPERAND (cbase, 0);
4053 if (TREE_CODE (usym) == ARRAY_REF)
4054 {
4055 tree ind = TREE_OPERAND (usym, 1);
4056 if (TREE_CODE (ind) == INTEGER_CST
4057 && tree_fits_shwi_p (ind)
4058 && tree_to_shwi (ind) == 0)
4059 usym = TREE_OPERAND (usym, 0);
4060 }
4061 if (TREE_CODE (csym) == ARRAY_REF)
4062 {
4063 tree ind = TREE_OPERAND (csym, 1);
4064 if (TREE_CODE (ind) == INTEGER_CST
4065 && tree_fits_shwi_p (ind)
4066 && tree_to_shwi (ind) == 0)
4067 csym = TREE_OPERAND (csym, 0);
4068 }
4069 if (operand_equal_p (usym, csym, 0))
4070 return -1;
4071 }
4072 /* Now do more complex comparison */
4073 tree_to_aff_combination (ubase, TREE_TYPE (ubase), &ubase_aff);
4074 tree_to_aff_combination (cbase, TREE_TYPE (cbase), &cbase_aff);
4075 if (compare_aff_trees (&ubase_aff, &cbase_aff))
4076 return -1;
4077 }
4078
4079 tree_to_aff_combination (ub, TREE_TYPE (ub), &ubase_aff);
4080 tree_to_aff_combination (cb, TREE_TYPE (cb), &cbase_aff);
4081
4082 aff_combination_scale (&cbase_aff, -1 * ratio);
4083 aff_combination_add (&ubase_aff, &cbase_aff);
4084 expr = aff_combination_to_tree (&ubase_aff);
4085 return get_expr_id (data, expr);
4086 }
4087
4088
4089
4090 /* Determines the cost of the computation by that USE is expressed
4091 from induction variable CAND. If ADDRESS_P is true, we just need
4092 to create an address from it, otherwise we want to get it into
4093 register. A set of invariants we depend on is stored in
4094 DEPENDS_ON. AT is the statement at that the value is computed.
4095 If CAN_AUTOINC is nonnull, use it to record whether autoinc
4096 addressing is likely. */
4097
4098 static comp_cost
4099 get_computation_cost_at (struct ivopts_data *data,
4100 struct iv_use *use, struct iv_cand *cand,
4101 bool address_p, bitmap *depends_on, gimple at,
4102 bool *can_autoinc,
4103 int *inv_expr_id)
4104 {
4105 tree ubase = use->iv->base, ustep = use->iv->step;
4106 tree cbase, cstep;
4107 tree utype = TREE_TYPE (ubase), ctype;
4108 unsigned HOST_WIDE_INT cstepi, offset = 0;
4109 HOST_WIDE_INT ratio, aratio;
4110 bool var_present, symbol_present, stmt_is_after_inc;
4111 comp_cost cost;
4112 widest_int rat;
4113 bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4114 enum machine_mode mem_mode = (address_p
4115 ? TYPE_MODE (TREE_TYPE (*use->op_p))
4116 : VOIDmode);
4117
4118 *depends_on = NULL;
4119
4120 /* Only consider real candidates. */
4121 if (!cand->iv)
4122 return infinite_cost;
4123
4124 cbase = cand->iv->base;
4125 cstep = cand->iv->step;
4126 ctype = TREE_TYPE (cbase);
4127
4128 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4129 {
4130 /* We do not have a precision to express the values of use. */
4131 return infinite_cost;
4132 }
4133
4134 if (address_p
4135 || (use->iv->base_object
4136 && cand->iv->base_object
4137 && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4138 && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4139 {
4140 /* Do not try to express address of an object with computation based
4141 on address of a different object. This may cause problems in rtl
4142 level alias analysis (that does not expect this to be happening,
4143 as this is illegal in C), and would be unlikely to be useful
4144 anyway. */
4145 if (use->iv->base_object
4146 && cand->iv->base_object
4147 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4148 return infinite_cost;
4149 }
4150
4151 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4152 {
4153 /* TODO -- add direct handling of this case. */
4154 goto fallback;
4155 }
4156
4157 /* CSTEPI is removed from the offset in case statement is after the
4158 increment. If the step is not constant, we use zero instead.
4159 This is a bit imprecise (there is the extra addition), but
4160 redundancy elimination is likely to transform the code so that
4161 it uses value of the variable before increment anyway,
4162 so it is not that much unrealistic. */
4163 if (cst_and_fits_in_hwi (cstep))
4164 cstepi = int_cst_value (cstep);
4165 else
4166 cstepi = 0;
4167
4168 if (!constant_multiple_of (ustep, cstep, &rat))
4169 return infinite_cost;
4170
4171 if (wi::fits_shwi_p (rat))
4172 ratio = rat.to_shwi ();
4173 else
4174 return infinite_cost;
4175
4176 STRIP_NOPS (cbase);
4177 ctype = TREE_TYPE (cbase);
4178
4179 stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
4180
4181 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
4182 or ratio == 1, it is better to handle this like
4183
4184 ubase - ratio * cbase + ratio * var
4185
4186 (also holds in the case ratio == -1, TODO. */
4187
4188 if (cst_and_fits_in_hwi (cbase))
4189 {
4190 offset = - ratio * int_cst_value (cbase);
4191 cost = difference_cost (data,
4192 ubase, build_int_cst (utype, 0),
4193 &symbol_present, &var_present, &offset,
4194 depends_on);
4195 cost.cost /= avg_loop_niter (data->current_loop);
4196 }
4197 else if (ratio == 1)
4198 {
4199 tree real_cbase = cbase;
4200
4201 /* Check to see if any adjustment is needed. */
4202 if (cstepi == 0 && stmt_is_after_inc)
4203 {
4204 aff_tree real_cbase_aff;
4205 aff_tree cstep_aff;
4206
4207 tree_to_aff_combination (cbase, TREE_TYPE (real_cbase),
4208 &real_cbase_aff);
4209 tree_to_aff_combination (cstep, TREE_TYPE (cstep), &cstep_aff);
4210
4211 aff_combination_add (&real_cbase_aff, &cstep_aff);
4212 real_cbase = aff_combination_to_tree (&real_cbase_aff);
4213 }
4214
4215 cost = difference_cost (data,
4216 ubase, real_cbase,
4217 &symbol_present, &var_present, &offset,
4218 depends_on);
4219 cost.cost /= avg_loop_niter (data->current_loop);
4220 }
4221 else if (address_p
4222 && !POINTER_TYPE_P (ctype)
4223 && multiplier_allowed_in_address_p
4224 (ratio, mem_mode,
4225 TYPE_ADDR_SPACE (TREE_TYPE (utype))))
4226 {
4227 cbase
4228 = fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio));
4229 cost = difference_cost (data,
4230 ubase, cbase,
4231 &symbol_present, &var_present, &offset,
4232 depends_on);
4233 cost.cost /= avg_loop_niter (data->current_loop);
4234 }
4235 else
4236 {
4237 cost = force_var_cost (data, cbase, depends_on);
4238 cost = add_costs (cost,
4239 difference_cost (data,
4240 ubase, build_int_cst (utype, 0),
4241 &symbol_present, &var_present,
4242 &offset, depends_on));
4243 cost.cost /= avg_loop_niter (data->current_loop);
4244 cost.cost += add_cost (data->speed, TYPE_MODE (ctype));
4245 }
4246
4247 if (inv_expr_id)
4248 {
4249 *inv_expr_id =
4250 get_loop_invariant_expr_id (data, ubase, cbase, ratio, address_p);
4251 /* Clear depends on. */
4252 if (*inv_expr_id != -1 && depends_on && *depends_on)
4253 bitmap_clear (*depends_on);
4254 }
4255
4256 /* If we are after the increment, the value of the candidate is higher by
4257 one iteration. */
4258 if (stmt_is_after_inc)
4259 offset -= ratio * cstepi;
4260
4261 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4262 (symbol/var1/const parts may be omitted). If we are looking for an
4263 address, find the cost of addressing this. */
4264 if (address_p)
4265 return add_costs (cost,
4266 get_address_cost (symbol_present, var_present,
4267 offset, ratio, cstepi,
4268 mem_mode,
4269 TYPE_ADDR_SPACE (TREE_TYPE (utype)),
4270 speed, stmt_is_after_inc,
4271 can_autoinc));
4272
4273 /* Otherwise estimate the costs for computing the expression. */
4274 if (!symbol_present && !var_present && !offset)
4275 {
4276 if (ratio != 1)
4277 cost.cost += mult_by_coeff_cost (ratio, TYPE_MODE (ctype), speed);
4278 return cost;
4279 }
4280
4281 /* Symbol + offset should be compile-time computable so consider that they
4282 are added once to the variable, if present. */
4283 if (var_present && (symbol_present || offset))
4284 cost.cost += adjust_setup_cost (data,
4285 add_cost (speed, TYPE_MODE (ctype)));
4286
4287 /* Having offset does not affect runtime cost in case it is added to
4288 symbol, but it increases complexity. */
4289 if (offset)
4290 cost.complexity++;
4291
4292 cost.cost += add_cost (speed, TYPE_MODE (ctype));
4293
4294 aratio = ratio > 0 ? ratio : -ratio;
4295 if (aratio != 1)
4296 cost.cost += mult_by_coeff_cost (aratio, TYPE_MODE (ctype), speed);
4297 return cost;
4298
4299 fallback:
4300 if (can_autoinc)
4301 *can_autoinc = false;
4302
4303 {
4304 /* Just get the expression, expand it and measure the cost. */
4305 tree comp = get_computation_at (data->current_loop, use, cand, at);
4306
4307 if (!comp)
4308 return infinite_cost;
4309
4310 if (address_p)
4311 comp = build_simple_mem_ref (comp);
4312
4313 return new_cost (computation_cost (comp, speed), 0);
4314 }
4315 }
4316
4317 /* Determines the cost of the computation by that USE is expressed
4318 from induction variable CAND. If ADDRESS_P is true, we just need
4319 to create an address from it, otherwise we want to get it into
4320 register. A set of invariants we depend on is stored in
4321 DEPENDS_ON. If CAN_AUTOINC is nonnull, use it to record whether
4322 autoinc addressing is likely. */
4323
4324 static comp_cost
4325 get_computation_cost (struct ivopts_data *data,
4326 struct iv_use *use, struct iv_cand *cand,
4327 bool address_p, bitmap *depends_on,
4328 bool *can_autoinc, int *inv_expr_id)
4329 {
4330 return get_computation_cost_at (data,
4331 use, cand, address_p, depends_on, use->stmt,
4332 can_autoinc, inv_expr_id);
4333 }
4334
4335 /* Determines cost of basing replacement of USE on CAND in a generic
4336 expression. */
4337
4338 static bool
4339 determine_use_iv_cost_generic (struct ivopts_data *data,
4340 struct iv_use *use, struct iv_cand *cand)
4341 {
4342 bitmap depends_on;
4343 comp_cost cost;
4344 int inv_expr_id = -1;
4345
4346 /* The simple case first -- if we need to express value of the preserved
4347 original biv, the cost is 0. This also prevents us from counting the
4348 cost of increment twice -- once at this use and once in the cost of
4349 the candidate. */
4350 if (cand->pos == IP_ORIGINAL
4351 && cand->incremented_at == use->stmt)
4352 {
4353 set_use_iv_cost (data, use, cand, no_cost, NULL, NULL_TREE,
4354 ERROR_MARK, -1);
4355 return true;
4356 }
4357
4358 cost = get_computation_cost (data, use, cand, false, &depends_on,
4359 NULL, &inv_expr_id);
4360
4361 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4362 inv_expr_id);
4363
4364 return !infinite_cost_p (cost);
4365 }
4366
4367 /* Determines cost of basing replacement of USE on CAND in an address. */
4368
4369 static bool
4370 determine_use_iv_cost_address (struct ivopts_data *data,
4371 struct iv_use *use, struct iv_cand *cand)
4372 {
4373 bitmap depends_on;
4374 bool can_autoinc;
4375 int inv_expr_id = -1;
4376 comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
4377 &can_autoinc, &inv_expr_id);
4378
4379 if (cand->ainc_use == use)
4380 {
4381 if (can_autoinc)
4382 cost.cost -= cand->cost_step;
4383 /* If we generated the candidate solely for exploiting autoincrement
4384 opportunities, and it turns out it can't be used, set the cost to
4385 infinity to make sure we ignore it. */
4386 else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
4387 cost = infinite_cost;
4388 }
4389 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4390 inv_expr_id);
4391
4392 return !infinite_cost_p (cost);
4393 }
4394
4395 /* Computes value of candidate CAND at position AT in iteration NITER, and
4396 stores it to VAL. */
4397
4398 static void
4399 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple at, tree niter,
4400 aff_tree *val)
4401 {
4402 aff_tree step, delta, nit;
4403 struct iv *iv = cand->iv;
4404 tree type = TREE_TYPE (iv->base);
4405 tree steptype = type;
4406 if (POINTER_TYPE_P (type))
4407 steptype = sizetype;
4408 steptype = unsigned_type_for (type);
4409
4410 tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step);
4411 aff_combination_convert (&step, steptype);
4412 tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
4413 aff_combination_convert (&nit, steptype);
4414 aff_combination_mult (&nit, &step, &delta);
4415 if (stmt_after_increment (loop, cand, at))
4416 aff_combination_add (&delta, &step);
4417
4418 tree_to_aff_combination (iv->base, type, val);
4419 if (!POINTER_TYPE_P (type))
4420 aff_combination_convert (val, steptype);
4421 aff_combination_add (val, &delta);
4422 }
4423
4424 /* Returns period of induction variable iv. */
4425
4426 static tree
4427 iv_period (struct iv *iv)
4428 {
4429 tree step = iv->step, period, type;
4430 tree pow2div;
4431
4432 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4433
4434 type = unsigned_type_for (TREE_TYPE (step));
4435 /* Period of the iv is lcm (step, type_range)/step -1,
4436 i.e., N*type_range/step - 1. Since type range is power
4437 of two, N == (step >> num_of_ending_zeros_binary (step),
4438 so the final result is
4439
4440 (type_range >> num_of_ending_zeros_binary (step)) - 1
4441
4442 */
4443 pow2div = num_ending_zeros (step);
4444
4445 period = build_low_bits_mask (type,
4446 (TYPE_PRECISION (type)
4447 - tree_to_uhwi (pow2div)));
4448
4449 return period;
4450 }
4451
4452 /* Returns the comparison operator used when eliminating the iv USE. */
4453
4454 static enum tree_code
4455 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4456 {
4457 struct loop *loop = data->current_loop;
4458 basic_block ex_bb;
4459 edge exit;
4460
4461 ex_bb = gimple_bb (use->stmt);
4462 exit = EDGE_SUCC (ex_bb, 0);
4463 if (flow_bb_inside_loop_p (loop, exit->dest))
4464 exit = EDGE_SUCC (ex_bb, 1);
4465
4466 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4467 }
4468
4469 /* Returns true if we can prove that BASE - OFFSET does not overflow. For now,
4470 we only detect the situation that BASE = SOMETHING + OFFSET, where the
4471 calculation is performed in non-wrapping type.
4472
4473 TODO: More generally, we could test for the situation that
4474 BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
4475 This would require knowing the sign of OFFSET. */
4476
4477 static bool
4478 difference_cannot_overflow_p (struct ivopts_data *data, tree base, tree offset)
4479 {
4480 enum tree_code code;
4481 tree e1, e2;
4482 aff_tree aff_e1, aff_e2, aff_offset;
4483
4484 if (!nowrap_type_p (TREE_TYPE (base)))
4485 return false;
4486
4487 base = expand_simple_operations (base);
4488
4489 if (TREE_CODE (base) == SSA_NAME)
4490 {
4491 gimple stmt = SSA_NAME_DEF_STMT (base);
4492
4493 if (gimple_code (stmt) != GIMPLE_ASSIGN)
4494 return false;
4495
4496 code = gimple_assign_rhs_code (stmt);
4497 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4498 return false;
4499
4500 e1 = gimple_assign_rhs1 (stmt);
4501 e2 = gimple_assign_rhs2 (stmt);
4502 }
4503 else
4504 {
4505 code = TREE_CODE (base);
4506 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4507 return false;
4508 e1 = TREE_OPERAND (base, 0);
4509 e2 = TREE_OPERAND (base, 1);
4510 }
4511
4512 /* Use affine expansion as deeper inspection to prove the equality. */
4513 tree_to_aff_combination_expand (e2, TREE_TYPE (e2),
4514 &aff_e2, &data->name_expansion_cache);
4515 tree_to_aff_combination_expand (offset, TREE_TYPE (offset),
4516 &aff_offset, &data->name_expansion_cache);
4517 aff_combination_scale (&aff_offset, -1);
4518 switch (code)
4519 {
4520 case PLUS_EXPR:
4521 aff_combination_add (&aff_e2, &aff_offset);
4522 if (aff_combination_zero_p (&aff_e2))
4523 return true;
4524
4525 tree_to_aff_combination_expand (e1, TREE_TYPE (e1),
4526 &aff_e1, &data->name_expansion_cache);
4527 aff_combination_add (&aff_e1, &aff_offset);
4528 return aff_combination_zero_p (&aff_e1);
4529
4530 case POINTER_PLUS_EXPR:
4531 aff_combination_add (&aff_e2, &aff_offset);
4532 return aff_combination_zero_p (&aff_e2);
4533
4534 default:
4535 return false;
4536 }
4537 }
4538
4539 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
4540 comparison with CAND. NITER describes the number of iterations of
4541 the loops. If successful, the comparison in COMP_P is altered accordingly.
4542
4543 We aim to handle the following situation:
4544
4545 sometype *base, *p;
4546 int a, b, i;
4547
4548 i = a;
4549 p = p_0 = base + a;
4550
4551 do
4552 {
4553 bla (*p);
4554 p++;
4555 i++;
4556 }
4557 while (i < b);
4558
4559 Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
4560 We aim to optimize this to
4561
4562 p = p_0 = base + a;
4563 do
4564 {
4565 bla (*p);
4566 p++;
4567 }
4568 while (p < p_0 - a + b);
4569
4570 This preserves the correctness, since the pointer arithmetics does not
4571 overflow. More precisely:
4572
4573 1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
4574 overflow in computing it or the values of p.
4575 2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
4576 overflow. To prove this, we use the fact that p_0 = base + a. */
4577
4578 static bool
4579 iv_elimination_compare_lt (struct ivopts_data *data,
4580 struct iv_cand *cand, enum tree_code *comp_p,
4581 struct tree_niter_desc *niter)
4582 {
4583 tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
4584 struct aff_tree nit, tmpa, tmpb;
4585 enum tree_code comp;
4586 HOST_WIDE_INT step;
4587
4588 /* We need to know that the candidate induction variable does not overflow.
4589 While more complex analysis may be used to prove this, for now just
4590 check that the variable appears in the original program and that it
4591 is computed in a type that guarantees no overflows. */
4592 cand_type = TREE_TYPE (cand->iv->base);
4593 if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
4594 return false;
4595
4596 /* Make sure that the loop iterates till the loop bound is hit, as otherwise
4597 the calculation of the BOUND could overflow, making the comparison
4598 invalid. */
4599 if (!data->loop_single_exit_p)
4600 return false;
4601
4602 /* We need to be able to decide whether candidate is increasing or decreasing
4603 in order to choose the right comparison operator. */
4604 if (!cst_and_fits_in_hwi (cand->iv->step))
4605 return false;
4606 step = int_cst_value (cand->iv->step);
4607
4608 /* Check that the number of iterations matches the expected pattern:
4609 a + 1 > b ? 0 : b - a - 1. */
4610 mbz = niter->may_be_zero;
4611 if (TREE_CODE (mbz) == GT_EXPR)
4612 {
4613 /* Handle a + 1 > b. */
4614 tree op0 = TREE_OPERAND (mbz, 0);
4615 if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
4616 {
4617 a = TREE_OPERAND (op0, 0);
4618 b = TREE_OPERAND (mbz, 1);
4619 }
4620 else
4621 return false;
4622 }
4623 else if (TREE_CODE (mbz) == LT_EXPR)
4624 {
4625 tree op1 = TREE_OPERAND (mbz, 1);
4626
4627 /* Handle b < a + 1. */
4628 if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
4629 {
4630 a = TREE_OPERAND (op1, 0);
4631 b = TREE_OPERAND (mbz, 0);
4632 }
4633 else
4634 return false;
4635 }
4636 else
4637 return false;
4638
4639 /* Expected number of iterations is B - A - 1. Check that it matches
4640 the actual number, i.e., that B - A - NITER = 1. */
4641 tree_to_aff_combination (niter->niter, nit_type, &nit);
4642 tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
4643 tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
4644 aff_combination_scale (&nit, -1);
4645 aff_combination_scale (&tmpa, -1);
4646 aff_combination_add (&tmpb, &tmpa);
4647 aff_combination_add (&tmpb, &nit);
4648 if (tmpb.n != 0 || tmpb.offset != 1)
4649 return false;
4650
4651 /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
4652 overflow. */
4653 offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
4654 cand->iv->step,
4655 fold_convert (TREE_TYPE (cand->iv->step), a));
4656 if (!difference_cannot_overflow_p (data, cand->iv->base, offset))
4657 return false;
4658
4659 /* Determine the new comparison operator. */
4660 comp = step < 0 ? GT_EXPR : LT_EXPR;
4661 if (*comp_p == NE_EXPR)
4662 *comp_p = comp;
4663 else if (*comp_p == EQ_EXPR)
4664 *comp_p = invert_tree_comparison (comp, false);
4665 else
4666 gcc_unreachable ();
4667
4668 return true;
4669 }
4670
4671 /* Check whether it is possible to express the condition in USE by comparison
4672 of candidate CAND. If so, store the value compared with to BOUND, and the
4673 comparison operator to COMP. */
4674
4675 static bool
4676 may_eliminate_iv (struct ivopts_data *data,
4677 struct iv_use *use, struct iv_cand *cand, tree *bound,
4678 enum tree_code *comp)
4679 {
4680 basic_block ex_bb;
4681 edge exit;
4682 tree period;
4683 struct loop *loop = data->current_loop;
4684 aff_tree bnd;
4685 struct tree_niter_desc *desc = NULL;
4686
4687 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
4688 return false;
4689
4690 /* For now works only for exits that dominate the loop latch.
4691 TODO: extend to other conditions inside loop body. */
4692 ex_bb = gimple_bb (use->stmt);
4693 if (use->stmt != last_stmt (ex_bb)
4694 || gimple_code (use->stmt) != GIMPLE_COND
4695 || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
4696 return false;
4697
4698 exit = EDGE_SUCC (ex_bb, 0);
4699 if (flow_bb_inside_loop_p (loop, exit->dest))
4700 exit = EDGE_SUCC (ex_bb, 1);
4701 if (flow_bb_inside_loop_p (loop, exit->dest))
4702 return false;
4703
4704 desc = niter_for_exit (data, exit);
4705 if (!desc)
4706 return false;
4707
4708 /* Determine whether we can use the variable to test the exit condition.
4709 This is the case iff the period of the induction variable is greater
4710 than the number of iterations for which the exit condition is true. */
4711 period = iv_period (cand->iv);
4712
4713 /* If the number of iterations is constant, compare against it directly. */
4714 if (TREE_CODE (desc->niter) == INTEGER_CST)
4715 {
4716 /* See cand_value_at. */
4717 if (stmt_after_increment (loop, cand, use->stmt))
4718 {
4719 if (!tree_int_cst_lt (desc->niter, period))
4720 return false;
4721 }
4722 else
4723 {
4724 if (tree_int_cst_lt (period, desc->niter))
4725 return false;
4726 }
4727 }
4728
4729 /* If not, and if this is the only possible exit of the loop, see whether
4730 we can get a conservative estimate on the number of iterations of the
4731 entire loop and compare against that instead. */
4732 else
4733 {
4734 widest_int period_value, max_niter;
4735
4736 max_niter = desc->max;
4737 if (stmt_after_increment (loop, cand, use->stmt))
4738 max_niter += 1;
4739 period_value = wi::to_widest (period);
4740 if (wi::gtu_p (max_niter, period_value))
4741 {
4742 /* See if we can take advantage of inferred loop bound information. */
4743 if (data->loop_single_exit_p)
4744 {
4745 if (!max_loop_iterations (loop, &max_niter))
4746 return false;
4747 /* The loop bound is already adjusted by adding 1. */
4748 if (wi::gtu_p (max_niter, period_value))
4749 return false;
4750 }
4751 else
4752 return false;
4753 }
4754 }
4755
4756 cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
4757
4758 *bound = fold_convert (TREE_TYPE (cand->iv->base),
4759 aff_combination_to_tree (&bnd));
4760 *comp = iv_elimination_compare (data, use);
4761
4762 /* It is unlikely that computing the number of iterations using division
4763 would be more profitable than keeping the original induction variable. */
4764 if (expression_expensive_p (*bound))
4765 return false;
4766
4767 /* Sometimes, it is possible to handle the situation that the number of
4768 iterations may be zero unless additional assumtions by using <
4769 instead of != in the exit condition.
4770
4771 TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
4772 base the exit condition on it. However, that is often too
4773 expensive. */
4774 if (!integer_zerop (desc->may_be_zero))
4775 return iv_elimination_compare_lt (data, cand, comp, desc);
4776
4777 return true;
4778 }
4779
4780 /* Calculates the cost of BOUND, if it is a PARM_DECL. A PARM_DECL must
4781 be copied, if is is used in the loop body and DATA->body_includes_call. */
4782
4783 static int
4784 parm_decl_cost (struct ivopts_data *data, tree bound)
4785 {
4786 tree sbound = bound;
4787 STRIP_NOPS (sbound);
4788
4789 if (TREE_CODE (sbound) == SSA_NAME
4790 && SSA_NAME_IS_DEFAULT_DEF (sbound)
4791 && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
4792 && data->body_includes_call)
4793 return COSTS_N_INSNS (1);
4794
4795 return 0;
4796 }
4797
4798 /* Determines cost of basing replacement of USE on CAND in a condition. */
4799
4800 static bool
4801 determine_use_iv_cost_condition (struct ivopts_data *data,
4802 struct iv_use *use, struct iv_cand *cand)
4803 {
4804 tree bound = NULL_TREE;
4805 struct iv *cmp_iv;
4806 bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
4807 comp_cost elim_cost, express_cost, cost, bound_cost;
4808 bool ok;
4809 int elim_inv_expr_id = -1, express_inv_expr_id = -1, inv_expr_id;
4810 tree *control_var, *bound_cst;
4811 enum tree_code comp = ERROR_MARK;
4812
4813 /* Only consider real candidates. */
4814 if (!cand->iv)
4815 {
4816 set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE,
4817 ERROR_MARK, -1);
4818 return false;
4819 }
4820
4821 /* Try iv elimination. */
4822 if (may_eliminate_iv (data, use, cand, &bound, &comp))
4823 {
4824 elim_cost = force_var_cost (data, bound, &depends_on_elim);
4825 if (elim_cost.cost == 0)
4826 elim_cost.cost = parm_decl_cost (data, bound);
4827 else if (TREE_CODE (bound) == INTEGER_CST)
4828 elim_cost.cost = 0;
4829 /* If we replace a loop condition 'i < n' with 'p < base + n',
4830 depends_on_elim will have 'base' and 'n' set, which implies
4831 that both 'base' and 'n' will be live during the loop. More likely,
4832 'base + n' will be loop invariant, resulting in only one live value
4833 during the loop. So in that case we clear depends_on_elim and set
4834 elim_inv_expr_id instead. */
4835 if (depends_on_elim && bitmap_count_bits (depends_on_elim) > 1)
4836 {
4837 elim_inv_expr_id = get_expr_id (data, bound);
4838 bitmap_clear (depends_on_elim);
4839 }
4840 /* The bound is a loop invariant, so it will be only computed
4841 once. */
4842 elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
4843 }
4844 else
4845 elim_cost = infinite_cost;
4846
4847 /* Try expressing the original giv. If it is compared with an invariant,
4848 note that we cannot get rid of it. */
4849 ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
4850 NULL, &cmp_iv);
4851 gcc_assert (ok);
4852
4853 /* When the condition is a comparison of the candidate IV against
4854 zero, prefer this IV.
4855
4856 TODO: The constant that we're subtracting from the cost should
4857 be target-dependent. This information should be added to the
4858 target costs for each backend. */
4859 if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */
4860 && integer_zerop (*bound_cst)
4861 && (operand_equal_p (*control_var, cand->var_after, 0)
4862 || operand_equal_p (*control_var, cand->var_before, 0)))
4863 elim_cost.cost -= 1;
4864
4865 express_cost = get_computation_cost (data, use, cand, false,
4866 &depends_on_express, NULL,
4867 &express_inv_expr_id);
4868 fd_ivopts_data = data;
4869 walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
4870
4871 /* Count the cost of the original bound as well. */
4872 bound_cost = force_var_cost (data, *bound_cst, NULL);
4873 if (bound_cost.cost == 0)
4874 bound_cost.cost = parm_decl_cost (data, *bound_cst);
4875 else if (TREE_CODE (*bound_cst) == INTEGER_CST)
4876 bound_cost.cost = 0;
4877 express_cost.cost += bound_cost.cost;
4878
4879 /* Choose the better approach, preferring the eliminated IV. */
4880 if (compare_costs (elim_cost, express_cost) <= 0)
4881 {
4882 cost = elim_cost;
4883 depends_on = depends_on_elim;
4884 depends_on_elim = NULL;
4885 inv_expr_id = elim_inv_expr_id;
4886 }
4887 else
4888 {
4889 cost = express_cost;
4890 depends_on = depends_on_express;
4891 depends_on_express = NULL;
4892 bound = NULL_TREE;
4893 comp = ERROR_MARK;
4894 inv_expr_id = express_inv_expr_id;
4895 }
4896
4897 set_use_iv_cost (data, use, cand, cost, depends_on, bound, comp, inv_expr_id);
4898
4899 if (depends_on_elim)
4900 BITMAP_FREE (depends_on_elim);
4901 if (depends_on_express)
4902 BITMAP_FREE (depends_on_express);
4903
4904 return !infinite_cost_p (cost);
4905 }
4906
4907 /* Determines cost of basing replacement of USE on CAND. Returns false
4908 if USE cannot be based on CAND. */
4909
4910 static bool
4911 determine_use_iv_cost (struct ivopts_data *data,
4912 struct iv_use *use, struct iv_cand *cand)
4913 {
4914 switch (use->type)
4915 {
4916 case USE_NONLINEAR_EXPR:
4917 return determine_use_iv_cost_generic (data, use, cand);
4918
4919 case USE_ADDRESS:
4920 return determine_use_iv_cost_address (data, use, cand);
4921
4922 case USE_COMPARE:
4923 return determine_use_iv_cost_condition (data, use, cand);
4924
4925 default:
4926 gcc_unreachable ();
4927 }
4928 }
4929
4930 /* Return true if get_computation_cost indicates that autoincrement is
4931 a possibility for the pair of USE and CAND, false otherwise. */
4932
4933 static bool
4934 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
4935 struct iv_cand *cand)
4936 {
4937 bitmap depends_on;
4938 bool can_autoinc;
4939 comp_cost cost;
4940
4941 if (use->type != USE_ADDRESS)
4942 return false;
4943
4944 cost = get_computation_cost (data, use, cand, true, &depends_on,
4945 &can_autoinc, NULL);
4946
4947 BITMAP_FREE (depends_on);
4948
4949 return !infinite_cost_p (cost) && can_autoinc;
4950 }
4951
4952 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
4953 use that allows autoincrement, and set their AINC_USE if possible. */
4954
4955 static void
4956 set_autoinc_for_original_candidates (struct ivopts_data *data)
4957 {
4958 unsigned i, j;
4959
4960 for (i = 0; i < n_iv_cands (data); i++)
4961 {
4962 struct iv_cand *cand = iv_cand (data, i);
4963 struct iv_use *closest_before = NULL;
4964 struct iv_use *closest_after = NULL;
4965 if (cand->pos != IP_ORIGINAL)
4966 continue;
4967
4968 for (j = 0; j < n_iv_uses (data); j++)
4969 {
4970 struct iv_use *use = iv_use (data, j);
4971 unsigned uid = gimple_uid (use->stmt);
4972
4973 if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
4974 continue;
4975
4976 if (uid < gimple_uid (cand->incremented_at)
4977 && (closest_before == NULL
4978 || uid > gimple_uid (closest_before->stmt)))
4979 closest_before = use;
4980
4981 if (uid > gimple_uid (cand->incremented_at)
4982 && (closest_after == NULL
4983 || uid < gimple_uid (closest_after->stmt)))
4984 closest_after = use;
4985 }
4986
4987 if (closest_before != NULL
4988 && autoinc_possible_for_pair (data, closest_before, cand))
4989 cand->ainc_use = closest_before;
4990 else if (closest_after != NULL
4991 && autoinc_possible_for_pair (data, closest_after, cand))
4992 cand->ainc_use = closest_after;
4993 }
4994 }
4995
4996 /* Finds the candidates for the induction variables. */
4997
4998 static void
4999 find_iv_candidates (struct ivopts_data *data)
5000 {
5001 /* Add commonly used ivs. */
5002 add_standard_iv_candidates (data);
5003
5004 /* Add old induction variables. */
5005 add_old_ivs_candidates (data);
5006
5007 /* Add induction variables derived from uses. */
5008 add_derived_ivs_candidates (data);
5009
5010 set_autoinc_for_original_candidates (data);
5011
5012 /* Record the important candidates. */
5013 record_important_candidates (data);
5014 }
5015
5016 /* Determines costs of basing the use of the iv on an iv candidate. */
5017
5018 static void
5019 determine_use_iv_costs (struct ivopts_data *data)
5020 {
5021 unsigned i, j;
5022 struct iv_use *use;
5023 struct iv_cand *cand;
5024 bitmap to_clear = BITMAP_ALLOC (NULL);
5025
5026 alloc_use_cost_map (data);
5027
5028 for (i = 0; i < n_iv_uses (data); i++)
5029 {
5030 use = iv_use (data, i);
5031
5032 if (data->consider_all_candidates)
5033 {
5034 for (j = 0; j < n_iv_cands (data); j++)
5035 {
5036 cand = iv_cand (data, j);
5037 determine_use_iv_cost (data, use, cand);
5038 }
5039 }
5040 else
5041 {
5042 bitmap_iterator bi;
5043
5044 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
5045 {
5046 cand = iv_cand (data, j);
5047 if (!determine_use_iv_cost (data, use, cand))
5048 bitmap_set_bit (to_clear, j);
5049 }
5050
5051 /* Remove the candidates for that the cost is infinite from
5052 the list of related candidates. */
5053 bitmap_and_compl_into (use->related_cands, to_clear);
5054 bitmap_clear (to_clear);
5055 }
5056 }
5057
5058 BITMAP_FREE (to_clear);
5059
5060 if (dump_file && (dump_flags & TDF_DETAILS))
5061 {
5062 fprintf (dump_file, "Use-candidate costs:\n");
5063
5064 for (i = 0; i < n_iv_uses (data); i++)
5065 {
5066 use = iv_use (data, i);
5067
5068 fprintf (dump_file, "Use %d:\n", i);
5069 fprintf (dump_file, " cand\tcost\tcompl.\tdepends on\n");
5070 for (j = 0; j < use->n_map_members; j++)
5071 {
5072 if (!use->cost_map[j].cand
5073 || infinite_cost_p (use->cost_map[j].cost))
5074 continue;
5075
5076 fprintf (dump_file, " %d\t%d\t%d\t",
5077 use->cost_map[j].cand->id,
5078 use->cost_map[j].cost.cost,
5079 use->cost_map[j].cost.complexity);
5080 if (use->cost_map[j].depends_on)
5081 bitmap_print (dump_file,
5082 use->cost_map[j].depends_on, "","");
5083 if (use->cost_map[j].inv_expr_id != -1)
5084 fprintf (dump_file, " inv_expr:%d", use->cost_map[j].inv_expr_id);
5085 fprintf (dump_file, "\n");
5086 }
5087
5088 fprintf (dump_file, "\n");
5089 }
5090 fprintf (dump_file, "\n");
5091 }
5092 }
5093
5094 /* Determines cost of the candidate CAND. */
5095
5096 static void
5097 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5098 {
5099 comp_cost cost_base;
5100 unsigned cost, cost_step;
5101 tree base;
5102
5103 if (!cand->iv)
5104 {
5105 cand->cost = 0;
5106 return;
5107 }
5108
5109 /* There are two costs associated with the candidate -- its increment
5110 and its initialization. The second is almost negligible for any loop
5111 that rolls enough, so we take it just very little into account. */
5112
5113 base = cand->iv->base;
5114 cost_base = force_var_cost (data, base, NULL);
5115 /* It will be exceptional that the iv register happens to be initialized with
5116 the proper value at no cost. In general, there will at least be a regcopy
5117 or a const set. */
5118 if (cost_base.cost == 0)
5119 cost_base.cost = COSTS_N_INSNS (1);
5120 cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
5121
5122 cost = cost_step + adjust_setup_cost (data, cost_base.cost);
5123
5124 /* Prefer the original ivs unless we may gain something by replacing it.
5125 The reason is to make debugging simpler; so this is not relevant for
5126 artificial ivs created by other optimization passes. */
5127 if (cand->pos != IP_ORIGINAL
5128 || !SSA_NAME_VAR (cand->var_before)
5129 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
5130 cost++;
5131
5132 /* Prefer not to insert statements into latch unless there are some
5133 already (so that we do not create unnecessary jumps). */
5134 if (cand->pos == IP_END
5135 && empty_block_p (ip_end_pos (data->current_loop)))
5136 cost++;
5137
5138 cand->cost = cost;
5139 cand->cost_step = cost_step;
5140 }
5141
5142 /* Determines costs of computation of the candidates. */
5143
5144 static void
5145 determine_iv_costs (struct ivopts_data *data)
5146 {
5147 unsigned i;
5148
5149 if (dump_file && (dump_flags & TDF_DETAILS))
5150 {
5151 fprintf (dump_file, "Candidate costs:\n");
5152 fprintf (dump_file, " cand\tcost\n");
5153 }
5154
5155 for (i = 0; i < n_iv_cands (data); i++)
5156 {
5157 struct iv_cand *cand = iv_cand (data, i);
5158
5159 determine_iv_cost (data, cand);
5160
5161 if (dump_file && (dump_flags & TDF_DETAILS))
5162 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
5163 }
5164
5165 if (dump_file && (dump_flags & TDF_DETAILS))
5166 fprintf (dump_file, "\n");
5167 }
5168
5169 /* Calculates cost for having SIZE induction variables. */
5170
5171 static unsigned
5172 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
5173 {
5174 /* We add size to the cost, so that we prefer eliminating ivs
5175 if possible. */
5176 return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed,
5177 data->body_includes_call);
5178 }
5179
5180 /* For each size of the induction variable set determine the penalty. */
5181
5182 static void
5183 determine_set_costs (struct ivopts_data *data)
5184 {
5185 unsigned j, n;
5186 gimple phi;
5187 gimple_stmt_iterator psi;
5188 tree op;
5189 struct loop *loop = data->current_loop;
5190 bitmap_iterator bi;
5191
5192 if (dump_file && (dump_flags & TDF_DETAILS))
5193 {
5194 fprintf (dump_file, "Global costs:\n");
5195 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
5196 fprintf (dump_file, " target_clobbered_regs %d\n", target_clobbered_regs);
5197 fprintf (dump_file, " target_reg_cost %d\n", target_reg_cost[data->speed]);
5198 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost[data->speed]);
5199 }
5200
5201 n = 0;
5202 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
5203 {
5204 phi = gsi_stmt (psi);
5205 op = PHI_RESULT (phi);
5206
5207 if (virtual_operand_p (op))
5208 continue;
5209
5210 if (get_iv (data, op))
5211 continue;
5212
5213 n++;
5214 }
5215
5216 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5217 {
5218 struct version_info *info = ver_info (data, j);
5219
5220 if (info->inv_id && info->has_nonlin_use)
5221 n++;
5222 }
5223
5224 data->regs_used = n;
5225 if (dump_file && (dump_flags & TDF_DETAILS))
5226 fprintf (dump_file, " regs_used %d\n", n);
5227
5228 if (dump_file && (dump_flags & TDF_DETAILS))
5229 {
5230 fprintf (dump_file, " cost for size:\n");
5231 fprintf (dump_file, " ivs\tcost\n");
5232 for (j = 0; j <= 2 * target_avail_regs; j++)
5233 fprintf (dump_file, " %d\t%d\n", j,
5234 ivopts_global_cost_for_size (data, j));
5235 fprintf (dump_file, "\n");
5236 }
5237 }
5238
5239 /* Returns true if A is a cheaper cost pair than B. */
5240
5241 static bool
5242 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
5243 {
5244 int cmp;
5245
5246 if (!a)
5247 return false;
5248
5249 if (!b)
5250 return true;
5251
5252 cmp = compare_costs (a->cost, b->cost);
5253 if (cmp < 0)
5254 return true;
5255
5256 if (cmp > 0)
5257 return false;
5258
5259 /* In case the costs are the same, prefer the cheaper candidate. */
5260 if (a->cand->cost < b->cand->cost)
5261 return true;
5262
5263 return false;
5264 }
5265
5266
5267 /* Returns candidate by that USE is expressed in IVS. */
5268
5269 static struct cost_pair *
5270 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
5271 {
5272 return ivs->cand_for_use[use->id];
5273 }
5274
5275 /* Computes the cost field of IVS structure. */
5276
5277 static void
5278 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
5279 {
5280 comp_cost cost = ivs->cand_use_cost;
5281
5282 cost.cost += ivs->cand_cost;
5283
5284 cost.cost += ivopts_global_cost_for_size (data,
5285 ivs->n_regs + ivs->num_used_inv_expr);
5286
5287 ivs->cost = cost;
5288 }
5289
5290 /* Remove invariants in set INVS to set IVS. */
5291
5292 static void
5293 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
5294 {
5295 bitmap_iterator bi;
5296 unsigned iid;
5297
5298 if (!invs)
5299 return;
5300
5301 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5302 {
5303 ivs->n_invariant_uses[iid]--;
5304 if (ivs->n_invariant_uses[iid] == 0)
5305 ivs->n_regs--;
5306 }
5307 }
5308
5309 /* Set USE not to be expressed by any candidate in IVS. */
5310
5311 static void
5312 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
5313 struct iv_use *use)
5314 {
5315 unsigned uid = use->id, cid;
5316 struct cost_pair *cp;
5317
5318 cp = ivs->cand_for_use[uid];
5319 if (!cp)
5320 return;
5321 cid = cp->cand->id;
5322
5323 ivs->bad_uses++;
5324 ivs->cand_for_use[uid] = NULL;
5325 ivs->n_cand_uses[cid]--;
5326
5327 if (ivs->n_cand_uses[cid] == 0)
5328 {
5329 bitmap_clear_bit (ivs->cands, cid);
5330 /* Do not count the pseudocandidates. */
5331 if (cp->cand->iv)
5332 ivs->n_regs--;
5333 ivs->n_cands--;
5334 ivs->cand_cost -= cp->cand->cost;
5335
5336 iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
5337 }
5338
5339 ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
5340
5341 iv_ca_set_remove_invariants (ivs, cp->depends_on);
5342
5343 if (cp->inv_expr_id != -1)
5344 {
5345 ivs->used_inv_expr[cp->inv_expr_id]--;
5346 if (ivs->used_inv_expr[cp->inv_expr_id] == 0)
5347 ivs->num_used_inv_expr--;
5348 }
5349 iv_ca_recount_cost (data, ivs);
5350 }
5351
5352 /* Add invariants in set INVS to set IVS. */
5353
5354 static void
5355 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
5356 {
5357 bitmap_iterator bi;
5358 unsigned iid;
5359
5360 if (!invs)
5361 return;
5362
5363 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5364 {
5365 ivs->n_invariant_uses[iid]++;
5366 if (ivs->n_invariant_uses[iid] == 1)
5367 ivs->n_regs++;
5368 }
5369 }
5370
5371 /* Set cost pair for USE in set IVS to CP. */
5372
5373 static void
5374 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
5375 struct iv_use *use, struct cost_pair *cp)
5376 {
5377 unsigned uid = use->id, cid;
5378
5379 if (ivs->cand_for_use[uid] == cp)
5380 return;
5381
5382 if (ivs->cand_for_use[uid])
5383 iv_ca_set_no_cp (data, ivs, use);
5384
5385 if (cp)
5386 {
5387 cid = cp->cand->id;
5388
5389 ivs->bad_uses--;
5390 ivs->cand_for_use[uid] = cp;
5391 ivs->n_cand_uses[cid]++;
5392 if (ivs->n_cand_uses[cid] == 1)
5393 {
5394 bitmap_set_bit (ivs->cands, cid);
5395 /* Do not count the pseudocandidates. */
5396 if (cp->cand->iv)
5397 ivs->n_regs++;
5398 ivs->n_cands++;
5399 ivs->cand_cost += cp->cand->cost;
5400
5401 iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
5402 }
5403
5404 ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
5405 iv_ca_set_add_invariants (ivs, cp->depends_on);
5406
5407 if (cp->inv_expr_id != -1)
5408 {
5409 ivs->used_inv_expr[cp->inv_expr_id]++;
5410 if (ivs->used_inv_expr[cp->inv_expr_id] == 1)
5411 ivs->num_used_inv_expr++;
5412 }
5413 iv_ca_recount_cost (data, ivs);
5414 }
5415 }
5416
5417 /* Extend set IVS by expressing USE by some of the candidates in it
5418 if possible. Consider all important candidates if candidates in
5419 set IVS don't give any result. */
5420
5421 static void
5422 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
5423 struct iv_use *use)
5424 {
5425 struct cost_pair *best_cp = NULL, *cp;
5426 bitmap_iterator bi;
5427 unsigned i;
5428 struct iv_cand *cand;
5429
5430 gcc_assert (ivs->upto >= use->id);
5431 ivs->upto++;
5432 ivs->bad_uses++;
5433
5434 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5435 {
5436 cand = iv_cand (data, i);
5437 cp = get_use_iv_cost (data, use, cand);
5438 if (cheaper_cost_pair (cp, best_cp))
5439 best_cp = cp;
5440 }
5441
5442 if (best_cp == NULL)
5443 {
5444 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5445 {
5446 cand = iv_cand (data, i);
5447 cp = get_use_iv_cost (data, use, cand);
5448 if (cheaper_cost_pair (cp, best_cp))
5449 best_cp = cp;
5450 }
5451 }
5452
5453 iv_ca_set_cp (data, ivs, use, best_cp);
5454 }
5455
5456 /* Get cost for assignment IVS. */
5457
5458 static comp_cost
5459 iv_ca_cost (struct iv_ca *ivs)
5460 {
5461 /* This was a conditional expression but it triggered a bug in
5462 Sun C 5.5. */
5463 if (ivs->bad_uses)
5464 return infinite_cost;
5465 else
5466 return ivs->cost;
5467 }
5468
5469 /* Returns true if all dependences of CP are among invariants in IVS. */
5470
5471 static bool
5472 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
5473 {
5474 unsigned i;
5475 bitmap_iterator bi;
5476
5477 if (!cp->depends_on)
5478 return true;
5479
5480 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
5481 {
5482 if (ivs->n_invariant_uses[i] == 0)
5483 return false;
5484 }
5485
5486 return true;
5487 }
5488
5489 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
5490 it before NEXT_CHANGE. */
5491
5492 static struct iv_ca_delta *
5493 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
5494 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
5495 {
5496 struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
5497
5498 change->use = use;
5499 change->old_cp = old_cp;
5500 change->new_cp = new_cp;
5501 change->next_change = next_change;
5502
5503 return change;
5504 }
5505
5506 /* Joins two lists of changes L1 and L2. Destructive -- old lists
5507 are rewritten. */
5508
5509 static struct iv_ca_delta *
5510 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
5511 {
5512 struct iv_ca_delta *last;
5513
5514 if (!l2)
5515 return l1;
5516
5517 if (!l1)
5518 return l2;
5519
5520 for (last = l1; last->next_change; last = last->next_change)
5521 continue;
5522 last->next_change = l2;
5523
5524 return l1;
5525 }
5526
5527 /* Reverse the list of changes DELTA, forming the inverse to it. */
5528
5529 static struct iv_ca_delta *
5530 iv_ca_delta_reverse (struct iv_ca_delta *delta)
5531 {
5532 struct iv_ca_delta *act, *next, *prev = NULL;
5533 struct cost_pair *tmp;
5534
5535 for (act = delta; act; act = next)
5536 {
5537 next = act->next_change;
5538 act->next_change = prev;
5539 prev = act;
5540
5541 tmp = act->old_cp;
5542 act->old_cp = act->new_cp;
5543 act->new_cp = tmp;
5544 }
5545
5546 return prev;
5547 }
5548
5549 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
5550 reverted instead. */
5551
5552 static void
5553 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
5554 struct iv_ca_delta *delta, bool forward)
5555 {
5556 struct cost_pair *from, *to;
5557 struct iv_ca_delta *act;
5558
5559 if (!forward)
5560 delta = iv_ca_delta_reverse (delta);
5561
5562 for (act = delta; act; act = act->next_change)
5563 {
5564 from = act->old_cp;
5565 to = act->new_cp;
5566 gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
5567 iv_ca_set_cp (data, ivs, act->use, to);
5568 }
5569
5570 if (!forward)
5571 iv_ca_delta_reverse (delta);
5572 }
5573
5574 /* Returns true if CAND is used in IVS. */
5575
5576 static bool
5577 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
5578 {
5579 return ivs->n_cand_uses[cand->id] > 0;
5580 }
5581
5582 /* Returns number of induction variable candidates in the set IVS. */
5583
5584 static unsigned
5585 iv_ca_n_cands (struct iv_ca *ivs)
5586 {
5587 return ivs->n_cands;
5588 }
5589
5590 /* Free the list of changes DELTA. */
5591
5592 static void
5593 iv_ca_delta_free (struct iv_ca_delta **delta)
5594 {
5595 struct iv_ca_delta *act, *next;
5596
5597 for (act = *delta; act; act = next)
5598 {
5599 next = act->next_change;
5600 free (act);
5601 }
5602
5603 *delta = NULL;
5604 }
5605
5606 /* Allocates new iv candidates assignment. */
5607
5608 static struct iv_ca *
5609 iv_ca_new (struct ivopts_data *data)
5610 {
5611 struct iv_ca *nw = XNEW (struct iv_ca);
5612
5613 nw->upto = 0;
5614 nw->bad_uses = 0;
5615 nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
5616 nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
5617 nw->cands = BITMAP_ALLOC (NULL);
5618 nw->n_cands = 0;
5619 nw->n_regs = 0;
5620 nw->cand_use_cost = no_cost;
5621 nw->cand_cost = 0;
5622 nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
5623 nw->cost = no_cost;
5624 nw->used_inv_expr = XCNEWVEC (unsigned, data->inv_expr_id + 1);
5625 nw->num_used_inv_expr = 0;
5626
5627 return nw;
5628 }
5629
5630 /* Free memory occupied by the set IVS. */
5631
5632 static void
5633 iv_ca_free (struct iv_ca **ivs)
5634 {
5635 free ((*ivs)->cand_for_use);
5636 free ((*ivs)->n_cand_uses);
5637 BITMAP_FREE ((*ivs)->cands);
5638 free ((*ivs)->n_invariant_uses);
5639 free ((*ivs)->used_inv_expr);
5640 free (*ivs);
5641 *ivs = NULL;
5642 }
5643
5644 /* Dumps IVS to FILE. */
5645
5646 static void
5647 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
5648 {
5649 const char *pref = " invariants ";
5650 unsigned i;
5651 comp_cost cost = iv_ca_cost (ivs);
5652
5653 fprintf (file, " cost: %d (complexity %d)\n", cost.cost, cost.complexity);
5654 fprintf (file, " cand_cost: %d\n cand_use_cost: %d (complexity %d)\n",
5655 ivs->cand_cost, ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
5656 bitmap_print (file, ivs->cands, " candidates: ","\n");
5657
5658 for (i = 0; i < ivs->upto; i++)
5659 {
5660 struct iv_use *use = iv_use (data, i);
5661 struct cost_pair *cp = iv_ca_cand_for_use (ivs, use);
5662 if (cp)
5663 fprintf (file, " use:%d --> iv_cand:%d, cost=(%d,%d)\n",
5664 use->id, cp->cand->id, cp->cost.cost, cp->cost.complexity);
5665 else
5666 fprintf (file, " use:%d --> ??\n", use->id);
5667 }
5668
5669 for (i = 1; i <= data->max_inv_id; i++)
5670 if (ivs->n_invariant_uses[i])
5671 {
5672 fprintf (file, "%s%d", pref, i);
5673 pref = ", ";
5674 }
5675 fprintf (file, "\n\n");
5676 }
5677
5678 /* Try changing candidate in IVS to CAND for each use. Return cost of the
5679 new set, and store differences in DELTA. Number of induction variables
5680 in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
5681 the function will try to find a solution with mimimal iv candidates. */
5682
5683 static comp_cost
5684 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
5685 struct iv_cand *cand, struct iv_ca_delta **delta,
5686 unsigned *n_ivs, bool min_ncand)
5687 {
5688 unsigned i;
5689 comp_cost cost;
5690 struct iv_use *use;
5691 struct cost_pair *old_cp, *new_cp;
5692
5693 *delta = NULL;
5694 for (i = 0; i < ivs->upto; i++)
5695 {
5696 use = iv_use (data, i);
5697 old_cp = iv_ca_cand_for_use (ivs, use);
5698
5699 if (old_cp
5700 && old_cp->cand == cand)
5701 continue;
5702
5703 new_cp = get_use_iv_cost (data, use, cand);
5704 if (!new_cp)
5705 continue;
5706
5707 if (!min_ncand && !iv_ca_has_deps (ivs, new_cp))
5708 continue;
5709
5710 if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp))
5711 continue;
5712
5713 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5714 }
5715
5716 iv_ca_delta_commit (data, ivs, *delta, true);
5717 cost = iv_ca_cost (ivs);
5718 if (n_ivs)
5719 *n_ivs = iv_ca_n_cands (ivs);
5720 iv_ca_delta_commit (data, ivs, *delta, false);
5721
5722 return cost;
5723 }
5724
5725 /* Try narrowing set IVS by removing CAND. Return the cost of
5726 the new set and store the differences in DELTA. START is
5727 the candidate with which we start narrowing. */
5728
5729 static comp_cost
5730 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
5731 struct iv_cand *cand, struct iv_cand *start,
5732 struct iv_ca_delta **delta)
5733 {
5734 unsigned i, ci;
5735 struct iv_use *use;
5736 struct cost_pair *old_cp, *new_cp, *cp;
5737 bitmap_iterator bi;
5738 struct iv_cand *cnd;
5739 comp_cost cost, best_cost, acost;
5740
5741 *delta = NULL;
5742 for (i = 0; i < n_iv_uses (data); i++)
5743 {
5744 use = iv_use (data, i);
5745
5746 old_cp = iv_ca_cand_for_use (ivs, use);
5747 if (old_cp->cand != cand)
5748 continue;
5749
5750 best_cost = iv_ca_cost (ivs);
5751 /* Start narrowing with START. */
5752 new_cp = get_use_iv_cost (data, use, start);
5753
5754 if (data->consider_all_candidates)
5755 {
5756 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
5757 {
5758 if (ci == cand->id || (start && ci == start->id))
5759 continue;
5760
5761 cnd = iv_cand (data, ci);
5762
5763 cp = get_use_iv_cost (data, use, cnd);
5764 if (!cp)
5765 continue;
5766
5767 iv_ca_set_cp (data, ivs, use, cp);
5768 acost = iv_ca_cost (ivs);
5769
5770 if (compare_costs (acost, best_cost) < 0)
5771 {
5772 best_cost = acost;
5773 new_cp = cp;
5774 }
5775 }
5776 }
5777 else
5778 {
5779 EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
5780 {
5781 if (ci == cand->id || (start && ci == start->id))
5782 continue;
5783
5784 cnd = iv_cand (data, ci);
5785
5786 cp = get_use_iv_cost (data, use, cnd);
5787 if (!cp)
5788 continue;
5789
5790 iv_ca_set_cp (data, ivs, use, cp);
5791 acost = iv_ca_cost (ivs);
5792
5793 if (compare_costs (acost, best_cost) < 0)
5794 {
5795 best_cost = acost;
5796 new_cp = cp;
5797 }
5798 }
5799 }
5800 /* Restore to old cp for use. */
5801 iv_ca_set_cp (data, ivs, use, old_cp);
5802
5803 if (!new_cp)
5804 {
5805 iv_ca_delta_free (delta);
5806 return infinite_cost;
5807 }
5808
5809 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5810 }
5811
5812 iv_ca_delta_commit (data, ivs, *delta, true);
5813 cost = iv_ca_cost (ivs);
5814 iv_ca_delta_commit (data, ivs, *delta, false);
5815
5816 return cost;
5817 }
5818
5819 /* Try optimizing the set of candidates IVS by removing candidates different
5820 from to EXCEPT_CAND from it. Return cost of the new set, and store
5821 differences in DELTA. */
5822
5823 static comp_cost
5824 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
5825 struct iv_cand *except_cand, struct iv_ca_delta **delta)
5826 {
5827 bitmap_iterator bi;
5828 struct iv_ca_delta *act_delta, *best_delta;
5829 unsigned i;
5830 comp_cost best_cost, acost;
5831 struct iv_cand *cand;
5832
5833 best_delta = NULL;
5834 best_cost = iv_ca_cost (ivs);
5835
5836 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5837 {
5838 cand = iv_cand (data, i);
5839
5840 if (cand == except_cand)
5841 continue;
5842
5843 acost = iv_ca_narrow (data, ivs, cand, except_cand, &act_delta);
5844
5845 if (compare_costs (acost, best_cost) < 0)
5846 {
5847 best_cost = acost;
5848 iv_ca_delta_free (&best_delta);
5849 best_delta = act_delta;
5850 }
5851 else
5852 iv_ca_delta_free (&act_delta);
5853 }
5854
5855 if (!best_delta)
5856 {
5857 *delta = NULL;
5858 return best_cost;
5859 }
5860
5861 /* Recurse to possibly remove other unnecessary ivs. */
5862 iv_ca_delta_commit (data, ivs, best_delta, true);
5863 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
5864 iv_ca_delta_commit (data, ivs, best_delta, false);
5865 *delta = iv_ca_delta_join (best_delta, *delta);
5866 return best_cost;
5867 }
5868
5869 /* Tries to extend the sets IVS in the best possible way in order
5870 to express the USE. If ORIGINALP is true, prefer candidates from
5871 the original set of IVs, otherwise favor important candidates not
5872 based on any memory object. */
5873
5874 static bool
5875 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
5876 struct iv_use *use, bool originalp)
5877 {
5878 comp_cost best_cost, act_cost;
5879 unsigned i;
5880 bitmap_iterator bi;
5881 struct iv_cand *cand;
5882 struct iv_ca_delta *best_delta = NULL, *act_delta;
5883 struct cost_pair *cp;
5884
5885 iv_ca_add_use (data, ivs, use);
5886 best_cost = iv_ca_cost (ivs);
5887 cp = iv_ca_cand_for_use (ivs, use);
5888 if (cp)
5889 {
5890 best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
5891 iv_ca_set_no_cp (data, ivs, use);
5892 }
5893
5894 /* If ORIGINALP is true, try to find the original IV for the use. Otherwise
5895 first try important candidates not based on any memory object. Only if
5896 this fails, try the specific ones. Rationale -- in loops with many
5897 variables the best choice often is to use just one generic biv. If we
5898 added here many ivs specific to the uses, the optimization algorithm later
5899 would be likely to get stuck in a local minimum, thus causing us to create
5900 too many ivs. The approach from few ivs to more seems more likely to be
5901 successful -- starting from few ivs, replacing an expensive use by a
5902 specific iv should always be a win. */
5903 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5904 {
5905 cand = iv_cand (data, i);
5906
5907 if (originalp && cand->pos !=IP_ORIGINAL)
5908 continue;
5909
5910 if (!originalp && cand->iv->base_object != NULL_TREE)
5911 continue;
5912
5913 if (iv_ca_cand_used_p (ivs, cand))
5914 continue;
5915
5916 cp = get_use_iv_cost (data, use, cand);
5917 if (!cp)
5918 continue;
5919
5920 iv_ca_set_cp (data, ivs, use, cp);
5921 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
5922 true);
5923 iv_ca_set_no_cp (data, ivs, use);
5924 act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
5925
5926 if (compare_costs (act_cost, best_cost) < 0)
5927 {
5928 best_cost = act_cost;
5929
5930 iv_ca_delta_free (&best_delta);
5931 best_delta = act_delta;
5932 }
5933 else
5934 iv_ca_delta_free (&act_delta);
5935 }
5936
5937 if (infinite_cost_p (best_cost))
5938 {
5939 for (i = 0; i < use->n_map_members; i++)
5940 {
5941 cp = use->cost_map + i;
5942 cand = cp->cand;
5943 if (!cand)
5944 continue;
5945
5946 /* Already tried this. */
5947 if (cand->important)
5948 {
5949 if (originalp && cand->pos == IP_ORIGINAL)
5950 continue;
5951 if (!originalp && cand->iv->base_object == NULL_TREE)
5952 continue;
5953 }
5954
5955 if (iv_ca_cand_used_p (ivs, cand))
5956 continue;
5957
5958 act_delta = NULL;
5959 iv_ca_set_cp (data, ivs, use, cp);
5960 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
5961 iv_ca_set_no_cp (data, ivs, use);
5962 act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
5963 cp, act_delta);
5964
5965 if (compare_costs (act_cost, best_cost) < 0)
5966 {
5967 best_cost = act_cost;
5968
5969 if (best_delta)
5970 iv_ca_delta_free (&best_delta);
5971 best_delta = act_delta;
5972 }
5973 else
5974 iv_ca_delta_free (&act_delta);
5975 }
5976 }
5977
5978 iv_ca_delta_commit (data, ivs, best_delta, true);
5979 iv_ca_delta_free (&best_delta);
5980
5981 return !infinite_cost_p (best_cost);
5982 }
5983
5984 /* Finds an initial assignment of candidates to uses. */
5985
5986 static struct iv_ca *
5987 get_initial_solution (struct ivopts_data *data, bool originalp)
5988 {
5989 struct iv_ca *ivs = iv_ca_new (data);
5990 unsigned i;
5991
5992 for (i = 0; i < n_iv_uses (data); i++)
5993 if (!try_add_cand_for (data, ivs, iv_use (data, i), originalp))
5994 {
5995 iv_ca_free (&ivs);
5996 return NULL;
5997 }
5998
5999 return ivs;
6000 }
6001
6002 /* Tries to improve set of induction variables IVS. */
6003
6004 static bool
6005 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
6006 {
6007 unsigned i, n_ivs;
6008 comp_cost acost, best_cost = iv_ca_cost (ivs);
6009 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
6010 struct iv_cand *cand;
6011
6012 /* Try extending the set of induction variables by one. */
6013 for (i = 0; i < n_iv_cands (data); i++)
6014 {
6015 cand = iv_cand (data, i);
6016
6017 if (iv_ca_cand_used_p (ivs, cand))
6018 continue;
6019
6020 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
6021 if (!act_delta)
6022 continue;
6023
6024 /* If we successfully added the candidate and the set is small enough,
6025 try optimizing it by removing other candidates. */
6026 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
6027 {
6028 iv_ca_delta_commit (data, ivs, act_delta, true);
6029 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
6030 iv_ca_delta_commit (data, ivs, act_delta, false);
6031 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6032 }
6033
6034 if (compare_costs (acost, best_cost) < 0)
6035 {
6036 best_cost = acost;
6037 iv_ca_delta_free (&best_delta);
6038 best_delta = act_delta;
6039 }
6040 else
6041 iv_ca_delta_free (&act_delta);
6042 }
6043
6044 if (!best_delta)
6045 {
6046 /* Try removing the candidates from the set instead. */
6047 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
6048
6049 /* Nothing more we can do. */
6050 if (!best_delta)
6051 return false;
6052 }
6053
6054 iv_ca_delta_commit (data, ivs, best_delta, true);
6055 gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
6056 iv_ca_delta_free (&best_delta);
6057 return true;
6058 }
6059
6060 /* Attempts to find the optimal set of induction variables. We do simple
6061 greedy heuristic -- we try to replace at most one candidate in the selected
6062 solution and remove the unused ivs while this improves the cost. */
6063
6064 static struct iv_ca *
6065 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
6066 {
6067 struct iv_ca *set;
6068
6069 /* Get the initial solution. */
6070 set = get_initial_solution (data, originalp);
6071 if (!set)
6072 {
6073 if (dump_file && (dump_flags & TDF_DETAILS))
6074 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
6075 return NULL;
6076 }
6077
6078 if (dump_file && (dump_flags & TDF_DETAILS))
6079 {
6080 fprintf (dump_file, "Initial set of candidates:\n");
6081 iv_ca_dump (data, dump_file, set);
6082 }
6083
6084 while (try_improve_iv_set (data, set))
6085 {
6086 if (dump_file && (dump_flags & TDF_DETAILS))
6087 {
6088 fprintf (dump_file, "Improved to:\n");
6089 iv_ca_dump (data, dump_file, set);
6090 }
6091 }
6092
6093 return set;
6094 }
6095
6096 static struct iv_ca *
6097 find_optimal_iv_set (struct ivopts_data *data)
6098 {
6099 unsigned i;
6100 struct iv_ca *set, *origset;
6101 struct iv_use *use;
6102 comp_cost cost, origcost;
6103
6104 /* Determine the cost based on a strategy that starts with original IVs,
6105 and try again using a strategy that prefers candidates not based
6106 on any IVs. */
6107 origset = find_optimal_iv_set_1 (data, true);
6108 set = find_optimal_iv_set_1 (data, false);
6109
6110 if (!origset && !set)
6111 return NULL;
6112
6113 origcost = origset ? iv_ca_cost (origset) : infinite_cost;
6114 cost = set ? iv_ca_cost (set) : infinite_cost;
6115
6116 if (dump_file && (dump_flags & TDF_DETAILS))
6117 {
6118 fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
6119 origcost.cost, origcost.complexity);
6120 fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
6121 cost.cost, cost.complexity);
6122 }
6123
6124 /* Choose the one with the best cost. */
6125 if (compare_costs (origcost, cost) <= 0)
6126 {
6127 if (set)
6128 iv_ca_free (&set);
6129 set = origset;
6130 }
6131 else if (origset)
6132 iv_ca_free (&origset);
6133
6134 for (i = 0; i < n_iv_uses (data); i++)
6135 {
6136 use = iv_use (data, i);
6137 use->selected = iv_ca_cand_for_use (set, use)->cand;
6138 }
6139
6140 return set;
6141 }
6142
6143 /* Creates a new induction variable corresponding to CAND. */
6144
6145 static void
6146 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
6147 {
6148 gimple_stmt_iterator incr_pos;
6149 tree base;
6150 bool after = false;
6151
6152 if (!cand->iv)
6153 return;
6154
6155 switch (cand->pos)
6156 {
6157 case IP_NORMAL:
6158 incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
6159 break;
6160
6161 case IP_END:
6162 incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
6163 after = true;
6164 break;
6165
6166 case IP_AFTER_USE:
6167 after = true;
6168 /* fall through */
6169 case IP_BEFORE_USE:
6170 incr_pos = gsi_for_stmt (cand->incremented_at);
6171 break;
6172
6173 case IP_ORIGINAL:
6174 /* Mark that the iv is preserved. */
6175 name_info (data, cand->var_before)->preserve_biv = true;
6176 name_info (data, cand->var_after)->preserve_biv = true;
6177
6178 /* Rewrite the increment so that it uses var_before directly. */
6179 find_interesting_uses_op (data, cand->var_after)->selected = cand;
6180 return;
6181 }
6182
6183 gimple_add_tmp_var (cand->var_before);
6184
6185 base = unshare_expr (cand->iv->base);
6186
6187 create_iv (base, unshare_expr (cand->iv->step),
6188 cand->var_before, data->current_loop,
6189 &incr_pos, after, &cand->var_before, &cand->var_after);
6190 }
6191
6192 /* Creates new induction variables described in SET. */
6193
6194 static void
6195 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
6196 {
6197 unsigned i;
6198 struct iv_cand *cand;
6199 bitmap_iterator bi;
6200
6201 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6202 {
6203 cand = iv_cand (data, i);
6204 create_new_iv (data, cand);
6205 }
6206
6207 if (dump_file && (dump_flags & TDF_DETAILS))
6208 {
6209 fprintf (dump_file, "\nSelected IV set: \n");
6210 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6211 {
6212 cand = iv_cand (data, i);
6213 dump_cand (dump_file, cand);
6214 }
6215 fprintf (dump_file, "\n");
6216 }
6217 }
6218
6219 /* Rewrites USE (definition of iv used in a nonlinear expression)
6220 using candidate CAND. */
6221
6222 static void
6223 rewrite_use_nonlinear_expr (struct ivopts_data *data,
6224 struct iv_use *use, struct iv_cand *cand)
6225 {
6226 tree comp;
6227 tree op, tgt;
6228 gimple ass;
6229 gimple_stmt_iterator bsi;
6230
6231 /* An important special case -- if we are asked to express value of
6232 the original iv by itself, just exit; there is no need to
6233 introduce a new computation (that might also need casting the
6234 variable to unsigned and back). */
6235 if (cand->pos == IP_ORIGINAL
6236 && cand->incremented_at == use->stmt)
6237 {
6238 enum tree_code stmt_code;
6239
6240 gcc_assert (is_gimple_assign (use->stmt));
6241 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
6242
6243 /* Check whether we may leave the computation unchanged.
6244 This is the case only if it does not rely on other
6245 computations in the loop -- otherwise, the computation
6246 we rely upon may be removed in remove_unused_ivs,
6247 thus leading to ICE. */
6248 stmt_code = gimple_assign_rhs_code (use->stmt);
6249 if (stmt_code == PLUS_EXPR
6250 || stmt_code == MINUS_EXPR
6251 || stmt_code == POINTER_PLUS_EXPR)
6252 {
6253 if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
6254 op = gimple_assign_rhs2 (use->stmt);
6255 else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
6256 op = gimple_assign_rhs1 (use->stmt);
6257 else
6258 op = NULL_TREE;
6259 }
6260 else
6261 op = NULL_TREE;
6262
6263 if (op && expr_invariant_in_loop_p (data->current_loop, op))
6264 return;
6265 }
6266
6267 comp = get_computation (data->current_loop, use, cand);
6268 gcc_assert (comp != NULL_TREE);
6269
6270 switch (gimple_code (use->stmt))
6271 {
6272 case GIMPLE_PHI:
6273 tgt = PHI_RESULT (use->stmt);
6274
6275 /* If we should keep the biv, do not replace it. */
6276 if (name_info (data, tgt)->preserve_biv)
6277 return;
6278
6279 bsi = gsi_after_labels (gimple_bb (use->stmt));
6280 break;
6281
6282 case GIMPLE_ASSIGN:
6283 tgt = gimple_assign_lhs (use->stmt);
6284 bsi = gsi_for_stmt (use->stmt);
6285 break;
6286
6287 default:
6288 gcc_unreachable ();
6289 }
6290
6291 if (!valid_gimple_rhs_p (comp)
6292 || (gimple_code (use->stmt) != GIMPLE_PHI
6293 /* We can't allow re-allocating the stmt as it might be pointed
6294 to still. */
6295 && (get_gimple_rhs_num_ops (TREE_CODE (comp))
6296 >= gimple_num_ops (gsi_stmt (bsi)))))
6297 {
6298 comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE,
6299 true, GSI_SAME_STMT);
6300 if (POINTER_TYPE_P (TREE_TYPE (tgt)))
6301 {
6302 duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
6303 /* As this isn't a plain copy we have to reset alignment
6304 information. */
6305 if (SSA_NAME_PTR_INFO (comp))
6306 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
6307 }
6308 }
6309
6310 if (gimple_code (use->stmt) == GIMPLE_PHI)
6311 {
6312 ass = gimple_build_assign (tgt, comp);
6313 gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
6314
6315 bsi = gsi_for_stmt (use->stmt);
6316 remove_phi_node (&bsi, false);
6317 }
6318 else
6319 {
6320 gimple_assign_set_rhs_from_tree (&bsi, comp);
6321 use->stmt = gsi_stmt (bsi);
6322 }
6323 }
6324
6325 /* Performs a peephole optimization to reorder the iv update statement with
6326 a mem ref to enable instruction combining in later phases. The mem ref uses
6327 the iv value before the update, so the reordering transformation requires
6328 adjustment of the offset. CAND is the selected IV_CAND.
6329
6330 Example:
6331
6332 t = MEM_REF (base, iv1, 8, 16); // base, index, stride, offset
6333 iv2 = iv1 + 1;
6334
6335 if (t < val) (1)
6336 goto L;
6337 goto Head;
6338
6339
6340 directly propagating t over to (1) will introduce overlapping live range
6341 thus increase register pressure. This peephole transform it into:
6342
6343
6344 iv2 = iv1 + 1;
6345 t = MEM_REF (base, iv2, 8, 8);
6346 if (t < val)
6347 goto L;
6348 goto Head;
6349 */
6350
6351 static void
6352 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
6353 {
6354 tree var_after;
6355 gimple iv_update, stmt;
6356 basic_block bb;
6357 gimple_stmt_iterator gsi, gsi_iv;
6358
6359 if (cand->pos != IP_NORMAL)
6360 return;
6361
6362 var_after = cand->var_after;
6363 iv_update = SSA_NAME_DEF_STMT (var_after);
6364
6365 bb = gimple_bb (iv_update);
6366 gsi = gsi_last_nondebug_bb (bb);
6367 stmt = gsi_stmt (gsi);
6368
6369 /* Only handle conditional statement for now. */
6370 if (gimple_code (stmt) != GIMPLE_COND)
6371 return;
6372
6373 gsi_prev_nondebug (&gsi);
6374 stmt = gsi_stmt (gsi);
6375 if (stmt != iv_update)
6376 return;
6377
6378 gsi_prev_nondebug (&gsi);
6379 if (gsi_end_p (gsi))
6380 return;
6381
6382 stmt = gsi_stmt (gsi);
6383 if (gimple_code (stmt) != GIMPLE_ASSIGN)
6384 return;
6385
6386 if (stmt != use->stmt)
6387 return;
6388
6389 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
6390 return;
6391
6392 if (dump_file && (dump_flags & TDF_DETAILS))
6393 {
6394 fprintf (dump_file, "Reordering \n");
6395 print_gimple_stmt (dump_file, iv_update, 0, 0);
6396 print_gimple_stmt (dump_file, use->stmt, 0, 0);
6397 fprintf (dump_file, "\n");
6398 }
6399
6400 gsi = gsi_for_stmt (use->stmt);
6401 gsi_iv = gsi_for_stmt (iv_update);
6402 gsi_move_before (&gsi_iv, &gsi);
6403
6404 cand->pos = IP_BEFORE_USE;
6405 cand->incremented_at = use->stmt;
6406 }
6407
6408 /* Rewrites USE (address that is an iv) using candidate CAND. */
6409
6410 static void
6411 rewrite_use_address (struct ivopts_data *data,
6412 struct iv_use *use, struct iv_cand *cand)
6413 {
6414 aff_tree aff;
6415 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6416 tree base_hint = NULL_TREE;
6417 tree ref, iv;
6418 bool ok;
6419
6420 adjust_iv_update_pos (cand, use);
6421 ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
6422 gcc_assert (ok);
6423 unshare_aff_combination (&aff);
6424
6425 /* To avoid undefined overflow problems, all IV candidates use unsigned
6426 integer types. The drawback is that this makes it impossible for
6427 create_mem_ref to distinguish an IV that is based on a memory object
6428 from one that represents simply an offset.
6429
6430 To work around this problem, we pass a hint to create_mem_ref that
6431 indicates which variable (if any) in aff is an IV based on a memory
6432 object. Note that we only consider the candidate. If this is not
6433 based on an object, the base of the reference is in some subexpression
6434 of the use -- but these will use pointer types, so they are recognized
6435 by the create_mem_ref heuristics anyway. */
6436 if (cand->iv->base_object)
6437 base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
6438
6439 iv = var_at_stmt (data->current_loop, cand, use->stmt);
6440 ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff,
6441 reference_alias_ptr_type (*use->op_p),
6442 iv, base_hint, data->speed);
6443 copy_ref_info (ref, *use->op_p);
6444 *use->op_p = ref;
6445 }
6446
6447 /* Rewrites USE (the condition such that one of the arguments is an iv) using
6448 candidate CAND. */
6449
6450 static void
6451 rewrite_use_compare (struct ivopts_data *data,
6452 struct iv_use *use, struct iv_cand *cand)
6453 {
6454 tree comp, *var_p, op, bound;
6455 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6456 enum tree_code compare;
6457 struct cost_pair *cp = get_use_iv_cost (data, use, cand);
6458 bool ok;
6459
6460 bound = cp->value;
6461 if (bound)
6462 {
6463 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
6464 tree var_type = TREE_TYPE (var);
6465 gimple_seq stmts;
6466
6467 if (dump_file && (dump_flags & TDF_DETAILS))
6468 {
6469 fprintf (dump_file, "Replacing exit test: ");
6470 print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
6471 }
6472 compare = cp->comp;
6473 bound = unshare_expr (fold_convert (var_type, bound));
6474 op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
6475 if (stmts)
6476 gsi_insert_seq_on_edge_immediate (
6477 loop_preheader_edge (data->current_loop),
6478 stmts);
6479
6480 gimple_cond_set_lhs (use->stmt, var);
6481 gimple_cond_set_code (use->stmt, compare);
6482 gimple_cond_set_rhs (use->stmt, op);
6483 return;
6484 }
6485
6486 /* The induction variable elimination failed; just express the original
6487 giv. */
6488 comp = get_computation (data->current_loop, use, cand);
6489 gcc_assert (comp != NULL_TREE);
6490
6491 ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
6492 gcc_assert (ok);
6493
6494 *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
6495 true, GSI_SAME_STMT);
6496 }
6497
6498 /* Rewrites USE using candidate CAND. */
6499
6500 static void
6501 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
6502 {
6503 switch (use->type)
6504 {
6505 case USE_NONLINEAR_EXPR:
6506 rewrite_use_nonlinear_expr (data, use, cand);
6507 break;
6508
6509 case USE_ADDRESS:
6510 rewrite_use_address (data, use, cand);
6511 break;
6512
6513 case USE_COMPARE:
6514 rewrite_use_compare (data, use, cand);
6515 break;
6516
6517 default:
6518 gcc_unreachable ();
6519 }
6520
6521 update_stmt (use->stmt);
6522 }
6523
6524 /* Rewrite the uses using the selected induction variables. */
6525
6526 static void
6527 rewrite_uses (struct ivopts_data *data)
6528 {
6529 unsigned i;
6530 struct iv_cand *cand;
6531 struct iv_use *use;
6532
6533 for (i = 0; i < n_iv_uses (data); i++)
6534 {
6535 use = iv_use (data, i);
6536 cand = use->selected;
6537 gcc_assert (cand);
6538
6539 rewrite_use (data, use, cand);
6540 }
6541 }
6542
6543 /* Removes the ivs that are not used after rewriting. */
6544
6545 static void
6546 remove_unused_ivs (struct ivopts_data *data)
6547 {
6548 unsigned j;
6549 bitmap_iterator bi;
6550 bitmap toremove = BITMAP_ALLOC (NULL);
6551
6552 /* Figure out an order in which to release SSA DEFs so that we don't
6553 release something that we'd have to propagate into a debug stmt
6554 afterwards. */
6555 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
6556 {
6557 struct version_info *info;
6558
6559 info = ver_info (data, j);
6560 if (info->iv
6561 && !integer_zerop (info->iv->step)
6562 && !info->inv_id
6563 && !info->iv->have_use_for
6564 && !info->preserve_biv)
6565 {
6566 bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
6567
6568 tree def = info->iv->ssa_name;
6569
6570 if (MAY_HAVE_DEBUG_STMTS && SSA_NAME_DEF_STMT (def))
6571 {
6572 imm_use_iterator imm_iter;
6573 use_operand_p use_p;
6574 gimple stmt;
6575 int count = 0;
6576
6577 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
6578 {
6579 if (!gimple_debug_bind_p (stmt))
6580 continue;
6581
6582 /* We just want to determine whether to do nothing
6583 (count == 0), to substitute the computed
6584 expression into a single use of the SSA DEF by
6585 itself (count == 1), or to use a debug temp
6586 because the SSA DEF is used multiple times or as
6587 part of a larger expression (count > 1). */
6588 count++;
6589 if (gimple_debug_bind_get_value (stmt) != def)
6590 count++;
6591
6592 if (count > 1)
6593 BREAK_FROM_IMM_USE_STMT (imm_iter);
6594 }
6595
6596 if (!count)
6597 continue;
6598
6599 struct iv_use dummy_use;
6600 struct iv_cand *best_cand = NULL, *cand;
6601 unsigned i, best_pref = 0, cand_pref;
6602
6603 memset (&dummy_use, 0, sizeof (dummy_use));
6604 dummy_use.iv = info->iv;
6605 for (i = 0; i < n_iv_uses (data) && i < 64; i++)
6606 {
6607 cand = iv_use (data, i)->selected;
6608 if (cand == best_cand)
6609 continue;
6610 cand_pref = operand_equal_p (cand->iv->step,
6611 info->iv->step, 0)
6612 ? 4 : 0;
6613 cand_pref
6614 += TYPE_MODE (TREE_TYPE (cand->iv->base))
6615 == TYPE_MODE (TREE_TYPE (info->iv->base))
6616 ? 2 : 0;
6617 cand_pref
6618 += TREE_CODE (cand->iv->base) == INTEGER_CST
6619 ? 1 : 0;
6620 if (best_cand == NULL || best_pref < cand_pref)
6621 {
6622 best_cand = cand;
6623 best_pref = cand_pref;
6624 }
6625 }
6626
6627 if (!best_cand)
6628 continue;
6629
6630 tree comp = get_computation_at (data->current_loop,
6631 &dummy_use, best_cand,
6632 SSA_NAME_DEF_STMT (def));
6633 if (!comp)
6634 continue;
6635
6636 if (count > 1)
6637 {
6638 tree vexpr = make_node (DEBUG_EXPR_DECL);
6639 DECL_ARTIFICIAL (vexpr) = 1;
6640 TREE_TYPE (vexpr) = TREE_TYPE (comp);
6641 if (SSA_NAME_VAR (def))
6642 DECL_MODE (vexpr) = DECL_MODE (SSA_NAME_VAR (def));
6643 else
6644 DECL_MODE (vexpr) = TYPE_MODE (TREE_TYPE (vexpr));
6645 gimple def_temp = gimple_build_debug_bind (vexpr, comp, NULL);
6646 gimple_stmt_iterator gsi;
6647
6648 if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
6649 gsi = gsi_after_labels (gimple_bb
6650 (SSA_NAME_DEF_STMT (def)));
6651 else
6652 gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
6653
6654 gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
6655 comp = vexpr;
6656 }
6657
6658 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
6659 {
6660 if (!gimple_debug_bind_p (stmt))
6661 continue;
6662
6663 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
6664 SET_USE (use_p, comp);
6665
6666 update_stmt (stmt);
6667 }
6668 }
6669 }
6670 }
6671
6672 release_defs_bitset (toremove);
6673
6674 BITMAP_FREE (toremove);
6675 }
6676
6677 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
6678 for hash_map::traverse. */
6679
6680 bool
6681 free_tree_niter_desc (edge const &, tree_niter_desc *const &value, void *)
6682 {
6683 free (value);
6684 return true;
6685 }
6686
6687 /* Frees data allocated by the optimization of a single loop. */
6688
6689 static void
6690 free_loop_data (struct ivopts_data *data)
6691 {
6692 unsigned i, j;
6693 bitmap_iterator bi;
6694 tree obj;
6695
6696 if (data->niters)
6697 {
6698 data->niters->traverse<void *, free_tree_niter_desc> (NULL);
6699 delete data->niters;
6700 data->niters = NULL;
6701 }
6702
6703 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
6704 {
6705 struct version_info *info;
6706
6707 info = ver_info (data, i);
6708 free (info->iv);
6709 info->iv = NULL;
6710 info->has_nonlin_use = false;
6711 info->preserve_biv = false;
6712 info->inv_id = 0;
6713 }
6714 bitmap_clear (data->relevant);
6715 bitmap_clear (data->important_candidates);
6716
6717 for (i = 0; i < n_iv_uses (data); i++)
6718 {
6719 struct iv_use *use = iv_use (data, i);
6720
6721 free (use->iv);
6722 BITMAP_FREE (use->related_cands);
6723 for (j = 0; j < use->n_map_members; j++)
6724 if (use->cost_map[j].depends_on)
6725 BITMAP_FREE (use->cost_map[j].depends_on);
6726 free (use->cost_map);
6727 free (use);
6728 }
6729 data->iv_uses.truncate (0);
6730
6731 for (i = 0; i < n_iv_cands (data); i++)
6732 {
6733 struct iv_cand *cand = iv_cand (data, i);
6734
6735 free (cand->iv);
6736 if (cand->depends_on)
6737 BITMAP_FREE (cand->depends_on);
6738 free (cand);
6739 }
6740 data->iv_candidates.truncate (0);
6741
6742 if (data->version_info_size < num_ssa_names)
6743 {
6744 data->version_info_size = 2 * num_ssa_names;
6745 free (data->version_info);
6746 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
6747 }
6748
6749 data->max_inv_id = 0;
6750
6751 FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
6752 SET_DECL_RTL (obj, NULL_RTX);
6753
6754 decl_rtl_to_reset.truncate (0);
6755
6756 data->inv_expr_tab->empty ();
6757 data->inv_expr_id = 0;
6758 }
6759
6760 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
6761 loop tree. */
6762
6763 static void
6764 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
6765 {
6766 free_loop_data (data);
6767 free (data->version_info);
6768 BITMAP_FREE (data->relevant);
6769 BITMAP_FREE (data->important_candidates);
6770
6771 decl_rtl_to_reset.release ();
6772 data->iv_uses.release ();
6773 data->iv_candidates.release ();
6774 delete data->inv_expr_tab;
6775 data->inv_expr_tab = NULL;
6776 free_affine_expand_cache (&data->name_expansion_cache);
6777 }
6778
6779 /* Returns true if the loop body BODY includes any function calls. */
6780
6781 static bool
6782 loop_body_includes_call (basic_block *body, unsigned num_nodes)
6783 {
6784 gimple_stmt_iterator gsi;
6785 unsigned i;
6786
6787 for (i = 0; i < num_nodes; i++)
6788 for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
6789 {
6790 gimple stmt = gsi_stmt (gsi);
6791 if (is_gimple_call (stmt)
6792 && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
6793 return true;
6794 }
6795 return false;
6796 }
6797
6798 /* Optimizes the LOOP. Returns true if anything changed. */
6799
6800 static bool
6801 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
6802 {
6803 bool changed = false;
6804 struct iv_ca *iv_ca;
6805 edge exit = single_dom_exit (loop);
6806 basic_block *body;
6807
6808 gcc_assert (!data->niters);
6809 data->current_loop = loop;
6810 data->speed = optimize_loop_for_speed_p (loop);
6811
6812 if (dump_file && (dump_flags & TDF_DETAILS))
6813 {
6814 fprintf (dump_file, "Processing loop %d\n", loop->num);
6815
6816 if (exit)
6817 {
6818 fprintf (dump_file, " single exit %d -> %d, exit condition ",
6819 exit->src->index, exit->dest->index);
6820 print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
6821 fprintf (dump_file, "\n");
6822 }
6823
6824 fprintf (dump_file, "\n");
6825 }
6826
6827 body = get_loop_body (loop);
6828 data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
6829 renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
6830 free (body);
6831
6832 data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
6833
6834 /* For each ssa name determines whether it behaves as an induction variable
6835 in some loop. */
6836 if (!find_induction_variables (data))
6837 goto finish;
6838
6839 /* Finds interesting uses (item 1). */
6840 find_interesting_uses (data);
6841 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
6842 goto finish;
6843
6844 /* Finds candidates for the induction variables (item 2). */
6845 find_iv_candidates (data);
6846
6847 /* Calculates the costs (item 3, part 1). */
6848 determine_iv_costs (data);
6849 determine_use_iv_costs (data);
6850 determine_set_costs (data);
6851
6852 /* Find the optimal set of induction variables (item 3, part 2). */
6853 iv_ca = find_optimal_iv_set (data);
6854 if (!iv_ca)
6855 goto finish;
6856 changed = true;
6857
6858 /* Create the new induction variables (item 4, part 1). */
6859 create_new_ivs (data, iv_ca);
6860 iv_ca_free (&iv_ca);
6861
6862 /* Rewrite the uses (item 4, part 2). */
6863 rewrite_uses (data);
6864
6865 /* Remove the ivs that are unused after rewriting. */
6866 remove_unused_ivs (data);
6867
6868 /* We have changed the structure of induction variables; it might happen
6869 that definitions in the scev database refer to some of them that were
6870 eliminated. */
6871 scev_reset ();
6872
6873 finish:
6874 free_loop_data (data);
6875
6876 return changed;
6877 }
6878
6879 /* Main entry point. Optimizes induction variables in loops. */
6880
6881 void
6882 tree_ssa_iv_optimize (void)
6883 {
6884 struct loop *loop;
6885 struct ivopts_data data;
6886
6887 tree_ssa_iv_optimize_init (&data);
6888
6889 /* Optimize the loops starting with the innermost ones. */
6890 FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
6891 {
6892 if (dump_file && (dump_flags & TDF_DETAILS))
6893 flow_loop_dump (loop, dump_file, NULL, 1);
6894
6895 tree_ssa_iv_optimize_loop (&data, loop);
6896 }
6897
6898 tree_ssa_iv_optimize_finalize (&data);
6899 }