]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/tree-ssa-loop-ivopts.c
cfgloop.h (struct loop_iterator): C++-ify, add constructor and destructor and make...
[thirdparty/gcc.git] / gcc / tree-ssa-loop-ivopts.c
1 /* Induction variable optimizations.
2 Copyright (C) 2003-2013 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
10
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
19
20 /* This pass tries to find the optimal set of induction variables for the loop.
21 It optimizes just the basic linear induction variables (although adding
22 support for other types should not be too hard). It includes the
23 optimizations commonly known as strength reduction, induction variable
24 coalescing and induction variable elimination. It does it in the
25 following steps:
26
27 1) The interesting uses of induction variables are found. This includes
28
29 -- uses of induction variables in non-linear expressions
30 -- addresses of arrays
31 -- comparisons of induction variables
32
33 2) Candidates for the induction variables are found. This includes
34
35 -- old induction variables
36 -- the variables defined by expressions derived from the "interesting
37 uses" above
38
39 3) The optimal (w.r. to a cost function) set of variables is chosen. The
40 cost function assigns a cost to sets of induction variables and consists
41 of three parts:
42
43 -- The use costs. Each of the interesting uses chooses the best induction
44 variable in the set and adds its cost to the sum. The cost reflects
45 the time spent on modifying the induction variables value to be usable
46 for the given purpose (adding base and offset for arrays, etc.).
47 -- The variable costs. Each of the variables has a cost assigned that
48 reflects the costs associated with incrementing the value of the
49 variable. The original variables are somewhat preferred.
50 -- The set cost. Depending on the size of the set, extra cost may be
51 added to reflect register pressure.
52
53 All the costs are defined in a machine-specific way, using the target
54 hooks and machine descriptions to determine them.
55
56 4) The trees are transformed to use the new variables, the dead code is
57 removed.
58
59 All of this is done loop by loop. Doing it globally is theoretically
60 possible, it might give a better performance and it might enable us
61 to decide costs more precisely, but getting all the interactions right
62 would be complicated. */
63
64 #include "config.h"
65 #include "system.h"
66 #include "coretypes.h"
67 #include "tm.h"
68 #include "tree.h"
69 #include "stor-layout.h"
70 #include "tm_p.h"
71 #include "basic-block.h"
72 #include "gimple-pretty-print.h"
73 #include "gimple.h"
74 #include "gimplify.h"
75 #include "gimple-iterator.h"
76 #include "gimplify-me.h"
77 #include "gimple-ssa.h"
78 #include "cgraph.h"
79 #include "tree-cfg.h"
80 #include "tree-phinodes.h"
81 #include "ssa-iterators.h"
82 #include "stringpool.h"
83 #include "tree-ssanames.h"
84 #include "tree-ssa-loop-ivopts.h"
85 #include "tree-ssa-loop-manip.h"
86 #include "tree-ssa-loop-niter.h"
87 #include "tree-ssa-loop.h"
88 #include "expr.h"
89 #include "tree-dfa.h"
90 #include "tree-ssa.h"
91 #include "cfgloop.h"
92 #include "tree-pass.h"
93 #include "ggc.h"
94 #include "insn-config.h"
95 #include "pointer-set.h"
96 #include "hash-table.h"
97 #include "tree-chrec.h"
98 #include "tree-scalar-evolution.h"
99 #include "cfgloop.h"
100 #include "params.h"
101 #include "langhooks.h"
102 #include "tree-affine.h"
103 #include "target.h"
104 #include "tree-inline.h"
105 #include "tree-ssa-propagate.h"
106 #include "expmed.h"
107 #include "tree-ssa-address.h"
108
109 /* FIXME: Expressions are expanded to RTL in this pass to determine the
110 cost of different addressing modes. This should be moved to a TBD
111 interface between the GIMPLE and RTL worlds. */
112 #include "expr.h"
113 #include "recog.h"
114
115 /* The infinite cost. */
116 #define INFTY 10000000
117
118 #define AVG_LOOP_NITER(LOOP) 5
119
120 /* Returns the expected number of loop iterations for LOOP.
121 The average trip count is computed from profile data if it
122 exists. */
123
124 static inline HOST_WIDE_INT
125 avg_loop_niter (struct loop *loop)
126 {
127 HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
128 if (niter == -1)
129 return AVG_LOOP_NITER (loop);
130
131 return niter;
132 }
133
134 /* Representation of the induction variable. */
135 struct iv
136 {
137 tree base; /* Initial value of the iv. */
138 tree base_object; /* A memory object to that the induction variable points. */
139 tree step; /* Step of the iv (constant only). */
140 tree ssa_name; /* The ssa name with the value. */
141 bool biv_p; /* Is it a biv? */
142 bool have_use_for; /* Do we already have a use for it? */
143 unsigned use_id; /* The identifier in the use if it is the case. */
144 };
145
146 /* Per-ssa version information (induction variable descriptions, etc.). */
147 struct version_info
148 {
149 tree name; /* The ssa name. */
150 struct iv *iv; /* Induction variable description. */
151 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
152 an expression that is not an induction variable. */
153 bool preserve_biv; /* For the original biv, whether to preserve it. */
154 unsigned inv_id; /* Id of an invariant. */
155 };
156
157 /* Types of uses. */
158 enum use_type
159 {
160 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
161 USE_ADDRESS, /* Use in an address. */
162 USE_COMPARE /* Use is a compare. */
163 };
164
165 /* Cost of a computation. */
166 typedef struct
167 {
168 int cost; /* The runtime cost. */
169 unsigned complexity; /* The estimate of the complexity of the code for
170 the computation (in no concrete units --
171 complexity field should be larger for more
172 complex expressions and addressing modes). */
173 } comp_cost;
174
175 static const comp_cost no_cost = {0, 0};
176 static const comp_cost infinite_cost = {INFTY, INFTY};
177
178 /* The candidate - cost pair. */
179 struct cost_pair
180 {
181 struct iv_cand *cand; /* The candidate. */
182 comp_cost cost; /* The cost. */
183 bitmap depends_on; /* The list of invariants that have to be
184 preserved. */
185 tree value; /* For final value elimination, the expression for
186 the final value of the iv. For iv elimination,
187 the new bound to compare with. */
188 enum tree_code comp; /* For iv elimination, the comparison. */
189 int inv_expr_id; /* Loop invariant expression id. */
190 };
191
192 /* Use. */
193 struct iv_use
194 {
195 unsigned id; /* The id of the use. */
196 enum use_type type; /* Type of the use. */
197 struct iv *iv; /* The induction variable it is based on. */
198 gimple stmt; /* Statement in that it occurs. */
199 tree *op_p; /* The place where it occurs. */
200 bitmap related_cands; /* The set of "related" iv candidates, plus the common
201 important ones. */
202
203 unsigned n_map_members; /* Number of candidates in the cost_map list. */
204 struct cost_pair *cost_map;
205 /* The costs wrto the iv candidates. */
206
207 struct iv_cand *selected;
208 /* The selected candidate. */
209 };
210
211 /* The position where the iv is computed. */
212 enum iv_position
213 {
214 IP_NORMAL, /* At the end, just before the exit condition. */
215 IP_END, /* At the end of the latch block. */
216 IP_BEFORE_USE, /* Immediately before a specific use. */
217 IP_AFTER_USE, /* Immediately after a specific use. */
218 IP_ORIGINAL /* The original biv. */
219 };
220
221 /* The induction variable candidate. */
222 struct iv_cand
223 {
224 unsigned id; /* The number of the candidate. */
225 bool important; /* Whether this is an "important" candidate, i.e. such
226 that it should be considered by all uses. */
227 ENUM_BITFIELD(iv_position) pos : 8; /* Where it is computed. */
228 gimple incremented_at;/* For original biv, the statement where it is
229 incremented. */
230 tree var_before; /* The variable used for it before increment. */
231 tree var_after; /* The variable used for it after increment. */
232 struct iv *iv; /* The value of the candidate. NULL for
233 "pseudocandidate" used to indicate the possibility
234 to replace the final value of an iv by direct
235 computation of the value. */
236 unsigned cost; /* Cost of the candidate. */
237 unsigned cost_step; /* Cost of the candidate's increment operation. */
238 struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
239 where it is incremented. */
240 bitmap depends_on; /* The list of invariants that are used in step of the
241 biv. */
242 };
243
244 /* Loop invariant expression hashtable entry. */
245 struct iv_inv_expr_ent
246 {
247 tree expr;
248 int id;
249 hashval_t hash;
250 };
251
252 /* The data used by the induction variable optimizations. */
253
254 typedef struct iv_use *iv_use_p;
255
256 typedef struct iv_cand *iv_cand_p;
257
258 /* Hashtable helpers. */
259
260 struct iv_inv_expr_hasher : typed_free_remove <iv_inv_expr_ent>
261 {
262 typedef iv_inv_expr_ent value_type;
263 typedef iv_inv_expr_ent compare_type;
264 static inline hashval_t hash (const value_type *);
265 static inline bool equal (const value_type *, const compare_type *);
266 };
267
268 /* Hash function for loop invariant expressions. */
269
270 inline hashval_t
271 iv_inv_expr_hasher::hash (const value_type *expr)
272 {
273 return expr->hash;
274 }
275
276 /* Hash table equality function for expressions. */
277
278 inline bool
279 iv_inv_expr_hasher::equal (const value_type *expr1, const compare_type *expr2)
280 {
281 return expr1->hash == expr2->hash
282 && operand_equal_p (expr1->expr, expr2->expr, 0);
283 }
284
285 struct ivopts_data
286 {
287 /* The currently optimized loop. */
288 struct loop *current_loop;
289
290 /* Numbers of iterations for all exits of the current loop. */
291 struct pointer_map_t *niters;
292
293 /* Number of registers used in it. */
294 unsigned regs_used;
295
296 /* The size of version_info array allocated. */
297 unsigned version_info_size;
298
299 /* The array of information for the ssa names. */
300 struct version_info *version_info;
301
302 /* The hashtable of loop invariant expressions created
303 by ivopt. */
304 hash_table <iv_inv_expr_hasher> inv_expr_tab;
305
306 /* Loop invariant expression id. */
307 int inv_expr_id;
308
309 /* The bitmap of indices in version_info whose value was changed. */
310 bitmap relevant;
311
312 /* The uses of induction variables. */
313 vec<iv_use_p> iv_uses;
314
315 /* The candidates. */
316 vec<iv_cand_p> iv_candidates;
317
318 /* A bitmap of important candidates. */
319 bitmap important_candidates;
320
321 /* The maximum invariant id. */
322 unsigned max_inv_id;
323
324 /* Whether to consider just related and important candidates when replacing a
325 use. */
326 bool consider_all_candidates;
327
328 /* Are we optimizing for speed? */
329 bool speed;
330
331 /* Whether the loop body includes any function calls. */
332 bool body_includes_call;
333
334 /* Whether the loop body can only be exited via single exit. */
335 bool loop_single_exit_p;
336 };
337
338 /* An assignment of iv candidates to uses. */
339
340 struct iv_ca
341 {
342 /* The number of uses covered by the assignment. */
343 unsigned upto;
344
345 /* Number of uses that cannot be expressed by the candidates in the set. */
346 unsigned bad_uses;
347
348 /* Candidate assigned to a use, together with the related costs. */
349 struct cost_pair **cand_for_use;
350
351 /* Number of times each candidate is used. */
352 unsigned *n_cand_uses;
353
354 /* The candidates used. */
355 bitmap cands;
356
357 /* The number of candidates in the set. */
358 unsigned n_cands;
359
360 /* Total number of registers needed. */
361 unsigned n_regs;
362
363 /* Total cost of expressing uses. */
364 comp_cost cand_use_cost;
365
366 /* Total cost of candidates. */
367 unsigned cand_cost;
368
369 /* Number of times each invariant is used. */
370 unsigned *n_invariant_uses;
371
372 /* The array holding the number of uses of each loop
373 invariant expressions created by ivopt. */
374 unsigned *used_inv_expr;
375
376 /* The number of created loop invariants. */
377 unsigned num_used_inv_expr;
378
379 /* Total cost of the assignment. */
380 comp_cost cost;
381 };
382
383 /* Difference of two iv candidate assignments. */
384
385 struct iv_ca_delta
386 {
387 /* Changed use. */
388 struct iv_use *use;
389
390 /* An old assignment (for rollback purposes). */
391 struct cost_pair *old_cp;
392
393 /* A new assignment. */
394 struct cost_pair *new_cp;
395
396 /* Next change in the list. */
397 struct iv_ca_delta *next_change;
398 };
399
400 /* Bound on number of candidates below that all candidates are considered. */
401
402 #define CONSIDER_ALL_CANDIDATES_BOUND \
403 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
404
405 /* If there are more iv occurrences, we just give up (it is quite unlikely that
406 optimizing such a loop would help, and it would take ages). */
407
408 #define MAX_CONSIDERED_USES \
409 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
410
411 /* If there are at most this number of ivs in the set, try removing unnecessary
412 ivs from the set always. */
413
414 #define ALWAYS_PRUNE_CAND_SET_BOUND \
415 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
416
417 /* The list of trees for that the decl_rtl field must be reset is stored
418 here. */
419
420 static vec<tree> decl_rtl_to_reset;
421
422 static comp_cost force_expr_to_var_cost (tree, bool);
423
424 /* Number of uses recorded in DATA. */
425
426 static inline unsigned
427 n_iv_uses (struct ivopts_data *data)
428 {
429 return data->iv_uses.length ();
430 }
431
432 /* Ith use recorded in DATA. */
433
434 static inline struct iv_use *
435 iv_use (struct ivopts_data *data, unsigned i)
436 {
437 return data->iv_uses[i];
438 }
439
440 /* Number of candidates recorded in DATA. */
441
442 static inline unsigned
443 n_iv_cands (struct ivopts_data *data)
444 {
445 return data->iv_candidates.length ();
446 }
447
448 /* Ith candidate recorded in DATA. */
449
450 static inline struct iv_cand *
451 iv_cand (struct ivopts_data *data, unsigned i)
452 {
453 return data->iv_candidates[i];
454 }
455
456 /* The single loop exit if it dominates the latch, NULL otherwise. */
457
458 edge
459 single_dom_exit (struct loop *loop)
460 {
461 edge exit = single_exit (loop);
462
463 if (!exit)
464 return NULL;
465
466 if (!just_once_each_iteration_p (loop, exit->src))
467 return NULL;
468
469 return exit;
470 }
471
472 /* Dumps information about the induction variable IV to FILE. */
473
474 void
475 dump_iv (FILE *file, struct iv *iv)
476 {
477 if (iv->ssa_name)
478 {
479 fprintf (file, "ssa name ");
480 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
481 fprintf (file, "\n");
482 }
483
484 fprintf (file, " type ");
485 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
486 fprintf (file, "\n");
487
488 if (iv->step)
489 {
490 fprintf (file, " base ");
491 print_generic_expr (file, iv->base, TDF_SLIM);
492 fprintf (file, "\n");
493
494 fprintf (file, " step ");
495 print_generic_expr (file, iv->step, TDF_SLIM);
496 fprintf (file, "\n");
497 }
498 else
499 {
500 fprintf (file, " invariant ");
501 print_generic_expr (file, iv->base, TDF_SLIM);
502 fprintf (file, "\n");
503 }
504
505 if (iv->base_object)
506 {
507 fprintf (file, " base object ");
508 print_generic_expr (file, iv->base_object, TDF_SLIM);
509 fprintf (file, "\n");
510 }
511
512 if (iv->biv_p)
513 fprintf (file, " is a biv\n");
514 }
515
516 /* Dumps information about the USE to FILE. */
517
518 void
519 dump_use (FILE *file, struct iv_use *use)
520 {
521 fprintf (file, "use %d\n", use->id);
522
523 switch (use->type)
524 {
525 case USE_NONLINEAR_EXPR:
526 fprintf (file, " generic\n");
527 break;
528
529 case USE_ADDRESS:
530 fprintf (file, " address\n");
531 break;
532
533 case USE_COMPARE:
534 fprintf (file, " compare\n");
535 break;
536
537 default:
538 gcc_unreachable ();
539 }
540
541 fprintf (file, " in statement ");
542 print_gimple_stmt (file, use->stmt, 0, 0);
543 fprintf (file, "\n");
544
545 fprintf (file, " at position ");
546 if (use->op_p)
547 print_generic_expr (file, *use->op_p, TDF_SLIM);
548 fprintf (file, "\n");
549
550 dump_iv (file, use->iv);
551
552 if (use->related_cands)
553 {
554 fprintf (file, " related candidates ");
555 dump_bitmap (file, use->related_cands);
556 }
557 }
558
559 /* Dumps information about the uses to FILE. */
560
561 void
562 dump_uses (FILE *file, struct ivopts_data *data)
563 {
564 unsigned i;
565 struct iv_use *use;
566
567 for (i = 0; i < n_iv_uses (data); i++)
568 {
569 use = iv_use (data, i);
570
571 dump_use (file, use);
572 fprintf (file, "\n");
573 }
574 }
575
576 /* Dumps information about induction variable candidate CAND to FILE. */
577
578 void
579 dump_cand (FILE *file, struct iv_cand *cand)
580 {
581 struct iv *iv = cand->iv;
582
583 fprintf (file, "candidate %d%s\n",
584 cand->id, cand->important ? " (important)" : "");
585
586 if (cand->depends_on)
587 {
588 fprintf (file, " depends on ");
589 dump_bitmap (file, cand->depends_on);
590 }
591
592 if (!iv)
593 {
594 fprintf (file, " final value replacement\n");
595 return;
596 }
597
598 if (cand->var_before)
599 {
600 fprintf (file, " var_before ");
601 print_generic_expr (file, cand->var_before, TDF_SLIM);
602 fprintf (file, "\n");
603 }
604 if (cand->var_after)
605 {
606 fprintf (file, " var_after ");
607 print_generic_expr (file, cand->var_after, TDF_SLIM);
608 fprintf (file, "\n");
609 }
610
611 switch (cand->pos)
612 {
613 case IP_NORMAL:
614 fprintf (file, " incremented before exit test\n");
615 break;
616
617 case IP_BEFORE_USE:
618 fprintf (file, " incremented before use %d\n", cand->ainc_use->id);
619 break;
620
621 case IP_AFTER_USE:
622 fprintf (file, " incremented after use %d\n", cand->ainc_use->id);
623 break;
624
625 case IP_END:
626 fprintf (file, " incremented at end\n");
627 break;
628
629 case IP_ORIGINAL:
630 fprintf (file, " original biv\n");
631 break;
632 }
633
634 dump_iv (file, iv);
635 }
636
637 /* Returns the info for ssa version VER. */
638
639 static inline struct version_info *
640 ver_info (struct ivopts_data *data, unsigned ver)
641 {
642 return data->version_info + ver;
643 }
644
645 /* Returns the info for ssa name NAME. */
646
647 static inline struct version_info *
648 name_info (struct ivopts_data *data, tree name)
649 {
650 return ver_info (data, SSA_NAME_VERSION (name));
651 }
652
653 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
654 emitted in LOOP. */
655
656 static bool
657 stmt_after_ip_normal_pos (struct loop *loop, gimple stmt)
658 {
659 basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
660
661 gcc_assert (bb);
662
663 if (sbb == loop->latch)
664 return true;
665
666 if (sbb != bb)
667 return false;
668
669 return stmt == last_stmt (bb);
670 }
671
672 /* Returns true if STMT if after the place where the original induction
673 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
674 if the positions are identical. */
675
676 static bool
677 stmt_after_inc_pos (struct iv_cand *cand, gimple stmt, bool true_if_equal)
678 {
679 basic_block cand_bb = gimple_bb (cand->incremented_at);
680 basic_block stmt_bb = gimple_bb (stmt);
681
682 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
683 return false;
684
685 if (stmt_bb != cand_bb)
686 return true;
687
688 if (true_if_equal
689 && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
690 return true;
691 return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
692 }
693
694 /* Returns true if STMT if after the place where the induction variable
695 CAND is incremented in LOOP. */
696
697 static bool
698 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple stmt)
699 {
700 switch (cand->pos)
701 {
702 case IP_END:
703 return false;
704
705 case IP_NORMAL:
706 return stmt_after_ip_normal_pos (loop, stmt);
707
708 case IP_ORIGINAL:
709 case IP_AFTER_USE:
710 return stmt_after_inc_pos (cand, stmt, false);
711
712 case IP_BEFORE_USE:
713 return stmt_after_inc_pos (cand, stmt, true);
714
715 default:
716 gcc_unreachable ();
717 }
718 }
719
720 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
721
722 static bool
723 abnormal_ssa_name_p (tree exp)
724 {
725 if (!exp)
726 return false;
727
728 if (TREE_CODE (exp) != SSA_NAME)
729 return false;
730
731 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
732 }
733
734 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
735 abnormal phi node. Callback for for_each_index. */
736
737 static bool
738 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
739 void *data ATTRIBUTE_UNUSED)
740 {
741 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
742 {
743 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
744 return false;
745 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
746 return false;
747 }
748
749 return !abnormal_ssa_name_p (*index);
750 }
751
752 /* Returns true if EXPR contains a ssa name that occurs in an
753 abnormal phi node. */
754
755 bool
756 contains_abnormal_ssa_name_p (tree expr)
757 {
758 enum tree_code code;
759 enum tree_code_class codeclass;
760
761 if (!expr)
762 return false;
763
764 code = TREE_CODE (expr);
765 codeclass = TREE_CODE_CLASS (code);
766
767 if (code == SSA_NAME)
768 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
769
770 if (code == INTEGER_CST
771 || is_gimple_min_invariant (expr))
772 return false;
773
774 if (code == ADDR_EXPR)
775 return !for_each_index (&TREE_OPERAND (expr, 0),
776 idx_contains_abnormal_ssa_name_p,
777 NULL);
778
779 if (code == COND_EXPR)
780 return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
781 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
782 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
783
784 switch (codeclass)
785 {
786 case tcc_binary:
787 case tcc_comparison:
788 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
789 return true;
790
791 /* Fallthru. */
792 case tcc_unary:
793 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
794 return true;
795
796 break;
797
798 default:
799 gcc_unreachable ();
800 }
801
802 return false;
803 }
804
805 /* Returns the structure describing number of iterations determined from
806 EXIT of DATA->current_loop, or NULL if something goes wrong. */
807
808 static struct tree_niter_desc *
809 niter_for_exit (struct ivopts_data *data, edge exit)
810 {
811 struct tree_niter_desc *desc;
812 void **slot;
813
814 if (!data->niters)
815 {
816 data->niters = pointer_map_create ();
817 slot = NULL;
818 }
819 else
820 slot = pointer_map_contains (data->niters, exit);
821
822 if (!slot)
823 {
824 /* Try to determine number of iterations. We cannot safely work with ssa
825 names that appear in phi nodes on abnormal edges, so that we do not
826 create overlapping life ranges for them (PR 27283). */
827 desc = XNEW (struct tree_niter_desc);
828 if (!number_of_iterations_exit (data->current_loop,
829 exit, desc, true)
830 || contains_abnormal_ssa_name_p (desc->niter))
831 {
832 XDELETE (desc);
833 desc = NULL;
834 }
835 slot = pointer_map_insert (data->niters, exit);
836 *slot = desc;
837 }
838 else
839 desc = (struct tree_niter_desc *) *slot;
840
841 return desc;
842 }
843
844 /* Returns the structure describing number of iterations determined from
845 single dominating exit of DATA->current_loop, or NULL if something
846 goes wrong. */
847
848 static struct tree_niter_desc *
849 niter_for_single_dom_exit (struct ivopts_data *data)
850 {
851 edge exit = single_dom_exit (data->current_loop);
852
853 if (!exit)
854 return NULL;
855
856 return niter_for_exit (data, exit);
857 }
858
859 /* Initializes data structures used by the iv optimization pass, stored
860 in DATA. */
861
862 static void
863 tree_ssa_iv_optimize_init (struct ivopts_data *data)
864 {
865 data->version_info_size = 2 * num_ssa_names;
866 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
867 data->relevant = BITMAP_ALLOC (NULL);
868 data->important_candidates = BITMAP_ALLOC (NULL);
869 data->max_inv_id = 0;
870 data->niters = NULL;
871 data->iv_uses.create (20);
872 data->iv_candidates.create (20);
873 data->inv_expr_tab.create (10);
874 data->inv_expr_id = 0;
875 decl_rtl_to_reset.create (20);
876 }
877
878 /* Returns a memory object to that EXPR points. In case we are able to
879 determine that it does not point to any such object, NULL is returned. */
880
881 static tree
882 determine_base_object (tree expr)
883 {
884 enum tree_code code = TREE_CODE (expr);
885 tree base, obj;
886
887 /* If this is a pointer casted to any type, we need to determine
888 the base object for the pointer; so handle conversions before
889 throwing away non-pointer expressions. */
890 if (CONVERT_EXPR_P (expr))
891 return determine_base_object (TREE_OPERAND (expr, 0));
892
893 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
894 return NULL_TREE;
895
896 switch (code)
897 {
898 case INTEGER_CST:
899 return NULL_TREE;
900
901 case ADDR_EXPR:
902 obj = TREE_OPERAND (expr, 0);
903 base = get_base_address (obj);
904
905 if (!base)
906 return expr;
907
908 if (TREE_CODE (base) == MEM_REF)
909 return determine_base_object (TREE_OPERAND (base, 0));
910
911 return fold_convert (ptr_type_node,
912 build_fold_addr_expr (base));
913
914 case POINTER_PLUS_EXPR:
915 return determine_base_object (TREE_OPERAND (expr, 0));
916
917 case PLUS_EXPR:
918 case MINUS_EXPR:
919 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
920 gcc_unreachable ();
921
922 default:
923 return fold_convert (ptr_type_node, expr);
924 }
925 }
926
927 /* Allocates an induction variable with given initial value BASE and step STEP
928 for loop LOOP. */
929
930 static struct iv *
931 alloc_iv (tree base, tree step)
932 {
933 tree base_object = base;
934 struct iv *iv = XCNEW (struct iv);
935 gcc_assert (step != NULL_TREE);
936
937 /* Lower all address expressions except ones with DECL_P as operand.
938 By doing this:
939 1) More accurate cost can be computed for address expressions;
940 2) Duplicate candidates won't be created for bases in different
941 forms, like &a[0] and &a. */
942 STRIP_NOPS (base_object);
943 if (TREE_CODE (base_object) == ADDR_EXPR
944 && !DECL_P (TREE_OPERAND (base_object, 0)))
945 {
946 aff_tree comb;
947 double_int size;
948 base_object = get_inner_reference_aff (TREE_OPERAND (base_object, 0),
949 &comb, &size);
950 gcc_assert (base_object != NULL_TREE);
951 base_object = build_fold_addr_expr (base_object);
952 base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
953 }
954
955 iv->base = base;
956 iv->base_object = determine_base_object (base_object);
957 iv->step = step;
958 iv->biv_p = false;
959 iv->have_use_for = false;
960 iv->use_id = 0;
961 iv->ssa_name = NULL_TREE;
962
963 return iv;
964 }
965
966 /* Sets STEP and BASE for induction variable IV. */
967
968 static void
969 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
970 {
971 struct version_info *info = name_info (data, iv);
972
973 gcc_assert (!info->iv);
974
975 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
976 info->iv = alloc_iv (base, step);
977 info->iv->ssa_name = iv;
978 }
979
980 /* Finds induction variable declaration for VAR. */
981
982 static struct iv *
983 get_iv (struct ivopts_data *data, tree var)
984 {
985 basic_block bb;
986 tree type = TREE_TYPE (var);
987
988 if (!POINTER_TYPE_P (type)
989 && !INTEGRAL_TYPE_P (type))
990 return NULL;
991
992 if (!name_info (data, var)->iv)
993 {
994 bb = gimple_bb (SSA_NAME_DEF_STMT (var));
995
996 if (!bb
997 || !flow_bb_inside_loop_p (data->current_loop, bb))
998 set_iv (data, var, var, build_int_cst (type, 0));
999 }
1000
1001 return name_info (data, var)->iv;
1002 }
1003
1004 /* Determines the step of a biv defined in PHI. Returns NULL if PHI does
1005 not define a simple affine biv with nonzero step. */
1006
1007 static tree
1008 determine_biv_step (gimple phi)
1009 {
1010 struct loop *loop = gimple_bb (phi)->loop_father;
1011 tree name = PHI_RESULT (phi);
1012 affine_iv iv;
1013
1014 if (virtual_operand_p (name))
1015 return NULL_TREE;
1016
1017 if (!simple_iv (loop, loop, name, &iv, true))
1018 return NULL_TREE;
1019
1020 return integer_zerop (iv.step) ? NULL_TREE : iv.step;
1021 }
1022
1023 /* Finds basic ivs. */
1024
1025 static bool
1026 find_bivs (struct ivopts_data *data)
1027 {
1028 gimple phi;
1029 tree step, type, base;
1030 bool found = false;
1031 struct loop *loop = data->current_loop;
1032 gimple_stmt_iterator psi;
1033
1034 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1035 {
1036 phi = gsi_stmt (psi);
1037
1038 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1039 continue;
1040
1041 step = determine_biv_step (phi);
1042 if (!step)
1043 continue;
1044
1045 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1046 base = expand_simple_operations (base);
1047 if (contains_abnormal_ssa_name_p (base)
1048 || contains_abnormal_ssa_name_p (step))
1049 continue;
1050
1051 type = TREE_TYPE (PHI_RESULT (phi));
1052 base = fold_convert (type, base);
1053 if (step)
1054 {
1055 if (POINTER_TYPE_P (type))
1056 step = convert_to_ptrofftype (step);
1057 else
1058 step = fold_convert (type, step);
1059 }
1060
1061 set_iv (data, PHI_RESULT (phi), base, step);
1062 found = true;
1063 }
1064
1065 return found;
1066 }
1067
1068 /* Marks basic ivs. */
1069
1070 static void
1071 mark_bivs (struct ivopts_data *data)
1072 {
1073 gimple phi;
1074 tree var;
1075 struct iv *iv, *incr_iv;
1076 struct loop *loop = data->current_loop;
1077 basic_block incr_bb;
1078 gimple_stmt_iterator psi;
1079
1080 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1081 {
1082 phi = gsi_stmt (psi);
1083
1084 iv = get_iv (data, PHI_RESULT (phi));
1085 if (!iv)
1086 continue;
1087
1088 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1089 incr_iv = get_iv (data, var);
1090 if (!incr_iv)
1091 continue;
1092
1093 /* If the increment is in the subloop, ignore it. */
1094 incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1095 if (incr_bb->loop_father != data->current_loop
1096 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1097 continue;
1098
1099 iv->biv_p = true;
1100 incr_iv->biv_p = true;
1101 }
1102 }
1103
1104 /* Checks whether STMT defines a linear induction variable and stores its
1105 parameters to IV. */
1106
1107 static bool
1108 find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv)
1109 {
1110 tree lhs;
1111 struct loop *loop = data->current_loop;
1112
1113 iv->base = NULL_TREE;
1114 iv->step = NULL_TREE;
1115
1116 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1117 return false;
1118
1119 lhs = gimple_assign_lhs (stmt);
1120 if (TREE_CODE (lhs) != SSA_NAME)
1121 return false;
1122
1123 if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1124 return false;
1125 iv->base = expand_simple_operations (iv->base);
1126
1127 if (contains_abnormal_ssa_name_p (iv->base)
1128 || contains_abnormal_ssa_name_p (iv->step))
1129 return false;
1130
1131 /* If STMT could throw, then do not consider STMT as defining a GIV.
1132 While this will suppress optimizations, we can not safely delete this
1133 GIV and associated statements, even if it appears it is not used. */
1134 if (stmt_could_throw_p (stmt))
1135 return false;
1136
1137 return true;
1138 }
1139
1140 /* Finds general ivs in statement STMT. */
1141
1142 static void
1143 find_givs_in_stmt (struct ivopts_data *data, gimple stmt)
1144 {
1145 affine_iv iv;
1146
1147 if (!find_givs_in_stmt_scev (data, stmt, &iv))
1148 return;
1149
1150 set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step);
1151 }
1152
1153 /* Finds general ivs in basic block BB. */
1154
1155 static void
1156 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1157 {
1158 gimple_stmt_iterator bsi;
1159
1160 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1161 find_givs_in_stmt (data, gsi_stmt (bsi));
1162 }
1163
1164 /* Finds general ivs. */
1165
1166 static void
1167 find_givs (struct ivopts_data *data)
1168 {
1169 struct loop *loop = data->current_loop;
1170 basic_block *body = get_loop_body_in_dom_order (loop);
1171 unsigned i;
1172
1173 for (i = 0; i < loop->num_nodes; i++)
1174 find_givs_in_bb (data, body[i]);
1175 free (body);
1176 }
1177
1178 /* For each ssa name defined in LOOP determines whether it is an induction
1179 variable and if so, its initial value and step. */
1180
1181 static bool
1182 find_induction_variables (struct ivopts_data *data)
1183 {
1184 unsigned i;
1185 bitmap_iterator bi;
1186
1187 if (!find_bivs (data))
1188 return false;
1189
1190 find_givs (data);
1191 mark_bivs (data);
1192
1193 if (dump_file && (dump_flags & TDF_DETAILS))
1194 {
1195 struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
1196
1197 if (niter)
1198 {
1199 fprintf (dump_file, " number of iterations ");
1200 print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1201 if (!integer_zerop (niter->may_be_zero))
1202 {
1203 fprintf (dump_file, "; zero if ");
1204 print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1205 }
1206 fprintf (dump_file, "\n\n");
1207 };
1208
1209 fprintf (dump_file, "Induction variables:\n\n");
1210
1211 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1212 {
1213 if (ver_info (data, i)->iv)
1214 dump_iv (dump_file, ver_info (data, i)->iv);
1215 }
1216 }
1217
1218 return true;
1219 }
1220
1221 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1222
1223 static struct iv_use *
1224 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1225 gimple stmt, enum use_type use_type)
1226 {
1227 struct iv_use *use = XCNEW (struct iv_use);
1228
1229 use->id = n_iv_uses (data);
1230 use->type = use_type;
1231 use->iv = iv;
1232 use->stmt = stmt;
1233 use->op_p = use_p;
1234 use->related_cands = BITMAP_ALLOC (NULL);
1235
1236 /* To avoid showing ssa name in the dumps, if it was not reset by the
1237 caller. */
1238 iv->ssa_name = NULL_TREE;
1239
1240 if (dump_file && (dump_flags & TDF_DETAILS))
1241 dump_use (dump_file, use);
1242
1243 data->iv_uses.safe_push (use);
1244
1245 return use;
1246 }
1247
1248 /* Checks whether OP is a loop-level invariant and if so, records it.
1249 NONLINEAR_USE is true if the invariant is used in a way we do not
1250 handle specially. */
1251
1252 static void
1253 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1254 {
1255 basic_block bb;
1256 struct version_info *info;
1257
1258 if (TREE_CODE (op) != SSA_NAME
1259 || virtual_operand_p (op))
1260 return;
1261
1262 bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1263 if (bb
1264 && flow_bb_inside_loop_p (data->current_loop, bb))
1265 return;
1266
1267 info = name_info (data, op);
1268 info->name = op;
1269 info->has_nonlin_use |= nonlinear_use;
1270 if (!info->inv_id)
1271 info->inv_id = ++data->max_inv_id;
1272 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1273 }
1274
1275 /* Checks whether the use OP is interesting and if so, records it. */
1276
1277 static struct iv_use *
1278 find_interesting_uses_op (struct ivopts_data *data, tree op)
1279 {
1280 struct iv *iv;
1281 struct iv *civ;
1282 gimple stmt;
1283 struct iv_use *use;
1284
1285 if (TREE_CODE (op) != SSA_NAME)
1286 return NULL;
1287
1288 iv = get_iv (data, op);
1289 if (!iv)
1290 return NULL;
1291
1292 if (iv->have_use_for)
1293 {
1294 use = iv_use (data, iv->use_id);
1295
1296 gcc_assert (use->type == USE_NONLINEAR_EXPR);
1297 return use;
1298 }
1299
1300 if (integer_zerop (iv->step))
1301 {
1302 record_invariant (data, op, true);
1303 return NULL;
1304 }
1305 iv->have_use_for = true;
1306
1307 civ = XNEW (struct iv);
1308 *civ = *iv;
1309
1310 stmt = SSA_NAME_DEF_STMT (op);
1311 gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1312 || is_gimple_assign (stmt));
1313
1314 use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1315 iv->use_id = use->id;
1316
1317 return use;
1318 }
1319
1320 /* Given a condition in statement STMT, checks whether it is a compare
1321 of an induction variable and an invariant. If this is the case,
1322 CONTROL_VAR is set to location of the iv, BOUND to the location of
1323 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1324 induction variable descriptions, and true is returned. If this is not
1325 the case, CONTROL_VAR and BOUND are set to the arguments of the
1326 condition and false is returned. */
1327
1328 static bool
1329 extract_cond_operands (struct ivopts_data *data, gimple stmt,
1330 tree **control_var, tree **bound,
1331 struct iv **iv_var, struct iv **iv_bound)
1332 {
1333 /* The objects returned when COND has constant operands. */
1334 static struct iv const_iv;
1335 static tree zero;
1336 tree *op0 = &zero, *op1 = &zero, *tmp_op;
1337 struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1338 bool ret = false;
1339
1340 if (gimple_code (stmt) == GIMPLE_COND)
1341 {
1342 op0 = gimple_cond_lhs_ptr (stmt);
1343 op1 = gimple_cond_rhs_ptr (stmt);
1344 }
1345 else
1346 {
1347 op0 = gimple_assign_rhs1_ptr (stmt);
1348 op1 = gimple_assign_rhs2_ptr (stmt);
1349 }
1350
1351 zero = integer_zero_node;
1352 const_iv.step = integer_zero_node;
1353
1354 if (TREE_CODE (*op0) == SSA_NAME)
1355 iv0 = get_iv (data, *op0);
1356 if (TREE_CODE (*op1) == SSA_NAME)
1357 iv1 = get_iv (data, *op1);
1358
1359 /* Exactly one of the compared values must be an iv, and the other one must
1360 be an invariant. */
1361 if (!iv0 || !iv1)
1362 goto end;
1363
1364 if (integer_zerop (iv0->step))
1365 {
1366 /* Control variable may be on the other side. */
1367 tmp_op = op0; op0 = op1; op1 = tmp_op;
1368 tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1369 }
1370 ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1371
1372 end:
1373 if (control_var)
1374 *control_var = op0;;
1375 if (iv_var)
1376 *iv_var = iv0;;
1377 if (bound)
1378 *bound = op1;
1379 if (iv_bound)
1380 *iv_bound = iv1;
1381
1382 return ret;
1383 }
1384
1385 /* Checks whether the condition in STMT is interesting and if so,
1386 records it. */
1387
1388 static void
1389 find_interesting_uses_cond (struct ivopts_data *data, gimple stmt)
1390 {
1391 tree *var_p, *bound_p;
1392 struct iv *var_iv, *civ;
1393
1394 if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1395 {
1396 find_interesting_uses_op (data, *var_p);
1397 find_interesting_uses_op (data, *bound_p);
1398 return;
1399 }
1400
1401 civ = XNEW (struct iv);
1402 *civ = *var_iv;
1403 record_use (data, NULL, civ, stmt, USE_COMPARE);
1404 }
1405
1406 /* Returns the outermost loop EXPR is obviously invariant in
1407 relative to the loop LOOP, i.e. if all its operands are defined
1408 outside of the returned loop. Returns NULL if EXPR is not
1409 even obviously invariant in LOOP. */
1410
1411 struct loop *
1412 outermost_invariant_loop_for_expr (struct loop *loop, tree expr)
1413 {
1414 basic_block def_bb;
1415 unsigned i, len;
1416
1417 if (is_gimple_min_invariant (expr))
1418 return current_loops->tree_root;
1419
1420 if (TREE_CODE (expr) == SSA_NAME)
1421 {
1422 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1423 if (def_bb)
1424 {
1425 if (flow_bb_inside_loop_p (loop, def_bb))
1426 return NULL;
1427 return superloop_at_depth (loop,
1428 loop_depth (def_bb->loop_father) + 1);
1429 }
1430
1431 return current_loops->tree_root;
1432 }
1433
1434 if (!EXPR_P (expr))
1435 return NULL;
1436
1437 unsigned maxdepth = 0;
1438 len = TREE_OPERAND_LENGTH (expr);
1439 for (i = 0; i < len; i++)
1440 {
1441 struct loop *ivloop;
1442 if (!TREE_OPERAND (expr, i))
1443 continue;
1444
1445 ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1446 if (!ivloop)
1447 return NULL;
1448 maxdepth = MAX (maxdepth, loop_depth (ivloop));
1449 }
1450
1451 return superloop_at_depth (loop, maxdepth);
1452 }
1453
1454 /* Returns true if expression EXPR is obviously invariant in LOOP,
1455 i.e. if all its operands are defined outside of the LOOP. LOOP
1456 should not be the function body. */
1457
1458 bool
1459 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1460 {
1461 basic_block def_bb;
1462 unsigned i, len;
1463
1464 gcc_assert (loop_depth (loop) > 0);
1465
1466 if (is_gimple_min_invariant (expr))
1467 return true;
1468
1469 if (TREE_CODE (expr) == SSA_NAME)
1470 {
1471 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1472 if (def_bb
1473 && flow_bb_inside_loop_p (loop, def_bb))
1474 return false;
1475
1476 return true;
1477 }
1478
1479 if (!EXPR_P (expr))
1480 return false;
1481
1482 len = TREE_OPERAND_LENGTH (expr);
1483 for (i = 0; i < len; i++)
1484 if (TREE_OPERAND (expr, i)
1485 && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1486 return false;
1487
1488 return true;
1489 }
1490
1491 /* Cumulates the steps of indices into DATA and replaces their values with the
1492 initial ones. Returns false when the value of the index cannot be determined.
1493 Callback for for_each_index. */
1494
1495 struct ifs_ivopts_data
1496 {
1497 struct ivopts_data *ivopts_data;
1498 gimple stmt;
1499 tree step;
1500 };
1501
1502 static bool
1503 idx_find_step (tree base, tree *idx, void *data)
1504 {
1505 struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1506 struct iv *iv;
1507 tree step, iv_base, iv_step, lbound, off;
1508 struct loop *loop = dta->ivopts_data->current_loop;
1509
1510 /* If base is a component ref, require that the offset of the reference
1511 be invariant. */
1512 if (TREE_CODE (base) == COMPONENT_REF)
1513 {
1514 off = component_ref_field_offset (base);
1515 return expr_invariant_in_loop_p (loop, off);
1516 }
1517
1518 /* If base is array, first check whether we will be able to move the
1519 reference out of the loop (in order to take its address in strength
1520 reduction). In order for this to work we need both lower bound
1521 and step to be loop invariants. */
1522 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1523 {
1524 /* Moreover, for a range, the size needs to be invariant as well. */
1525 if (TREE_CODE (base) == ARRAY_RANGE_REF
1526 && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1527 return false;
1528
1529 step = array_ref_element_size (base);
1530 lbound = array_ref_low_bound (base);
1531
1532 if (!expr_invariant_in_loop_p (loop, step)
1533 || !expr_invariant_in_loop_p (loop, lbound))
1534 return false;
1535 }
1536
1537 if (TREE_CODE (*idx) != SSA_NAME)
1538 return true;
1539
1540 iv = get_iv (dta->ivopts_data, *idx);
1541 if (!iv)
1542 return false;
1543
1544 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1545 *&x[0], which is not folded and does not trigger the
1546 ARRAY_REF path below. */
1547 *idx = iv->base;
1548
1549 if (integer_zerop (iv->step))
1550 return true;
1551
1552 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1553 {
1554 step = array_ref_element_size (base);
1555
1556 /* We only handle addresses whose step is an integer constant. */
1557 if (TREE_CODE (step) != INTEGER_CST)
1558 return false;
1559 }
1560 else
1561 /* The step for pointer arithmetics already is 1 byte. */
1562 step = size_one_node;
1563
1564 iv_base = iv->base;
1565 iv_step = iv->step;
1566 if (!convert_affine_scev (dta->ivopts_data->current_loop,
1567 sizetype, &iv_base, &iv_step, dta->stmt,
1568 false))
1569 {
1570 /* The index might wrap. */
1571 return false;
1572 }
1573
1574 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1575 dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1576
1577 return true;
1578 }
1579
1580 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1581 object is passed to it in DATA. */
1582
1583 static bool
1584 idx_record_use (tree base, tree *idx,
1585 void *vdata)
1586 {
1587 struct ivopts_data *data = (struct ivopts_data *) vdata;
1588 find_interesting_uses_op (data, *idx);
1589 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1590 {
1591 find_interesting_uses_op (data, array_ref_element_size (base));
1592 find_interesting_uses_op (data, array_ref_low_bound (base));
1593 }
1594 return true;
1595 }
1596
1597 /* If we can prove that TOP = cst * BOT for some constant cst,
1598 store cst to MUL and return true. Otherwise return false.
1599 The returned value is always sign-extended, regardless of the
1600 signedness of TOP and BOT. */
1601
1602 static bool
1603 constant_multiple_of (tree top, tree bot, double_int *mul)
1604 {
1605 tree mby;
1606 enum tree_code code;
1607 double_int res, p0, p1;
1608 unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1609
1610 STRIP_NOPS (top);
1611 STRIP_NOPS (bot);
1612
1613 if (operand_equal_p (top, bot, 0))
1614 {
1615 *mul = double_int_one;
1616 return true;
1617 }
1618
1619 code = TREE_CODE (top);
1620 switch (code)
1621 {
1622 case MULT_EXPR:
1623 mby = TREE_OPERAND (top, 1);
1624 if (TREE_CODE (mby) != INTEGER_CST)
1625 return false;
1626
1627 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1628 return false;
1629
1630 *mul = (res * tree_to_double_int (mby)).sext (precision);
1631 return true;
1632
1633 case PLUS_EXPR:
1634 case MINUS_EXPR:
1635 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1636 || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1637 return false;
1638
1639 if (code == MINUS_EXPR)
1640 p1 = -p1;
1641 *mul = (p0 + p1).sext (precision);
1642 return true;
1643
1644 case INTEGER_CST:
1645 if (TREE_CODE (bot) != INTEGER_CST)
1646 return false;
1647
1648 p0 = tree_to_double_int (top).sext (precision);
1649 p1 = tree_to_double_int (bot).sext (precision);
1650 if (p1.is_zero ())
1651 return false;
1652 *mul = p0.sdivmod (p1, FLOOR_DIV_EXPR, &res).sext (precision);
1653 return res.is_zero ();
1654
1655 default:
1656 return false;
1657 }
1658 }
1659
1660 /* Returns true if memory reference REF with step STEP may be unaligned. */
1661
1662 static bool
1663 may_be_unaligned_p (tree ref, tree step)
1664 {
1665 tree base;
1666 tree base_type;
1667 HOST_WIDE_INT bitsize;
1668 HOST_WIDE_INT bitpos;
1669 tree toffset;
1670 enum machine_mode mode;
1671 int unsignedp, volatilep;
1672 unsigned base_align;
1673
1674 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1675 thus they are not misaligned. */
1676 if (TREE_CODE (ref) == TARGET_MEM_REF)
1677 return false;
1678
1679 /* The test below is basically copy of what expr.c:normal_inner_ref
1680 does to check whether the object must be loaded by parts when
1681 STRICT_ALIGNMENT is true. */
1682 base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1683 &unsignedp, &volatilep, true);
1684 base_type = TREE_TYPE (base);
1685 base_align = get_object_alignment (base);
1686 base_align = MAX (base_align, TYPE_ALIGN (base_type));
1687
1688 if (mode != BLKmode)
1689 {
1690 unsigned mode_align = GET_MODE_ALIGNMENT (mode);
1691
1692 if (base_align < mode_align
1693 || (bitpos % mode_align) != 0
1694 || (bitpos % BITS_PER_UNIT) != 0)
1695 return true;
1696
1697 if (toffset
1698 && (highest_pow2_factor (toffset) * BITS_PER_UNIT) < mode_align)
1699 return true;
1700
1701 if ((highest_pow2_factor (step) * BITS_PER_UNIT) < mode_align)
1702 return true;
1703 }
1704
1705 return false;
1706 }
1707
1708 /* Return true if EXPR may be non-addressable. */
1709
1710 bool
1711 may_be_nonaddressable_p (tree expr)
1712 {
1713 switch (TREE_CODE (expr))
1714 {
1715 case TARGET_MEM_REF:
1716 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1717 target, thus they are always addressable. */
1718 return false;
1719
1720 case COMPONENT_REF:
1721 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1722 || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1723
1724 case VIEW_CONVERT_EXPR:
1725 /* This kind of view-conversions may wrap non-addressable objects
1726 and make them look addressable. After some processing the
1727 non-addressability may be uncovered again, causing ADDR_EXPRs
1728 of inappropriate objects to be built. */
1729 if (is_gimple_reg (TREE_OPERAND (expr, 0))
1730 || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
1731 return true;
1732
1733 /* ... fall through ... */
1734
1735 case ARRAY_REF:
1736 case ARRAY_RANGE_REF:
1737 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1738
1739 CASE_CONVERT:
1740 return true;
1741
1742 default:
1743 break;
1744 }
1745
1746 return false;
1747 }
1748
1749 /* Finds addresses in *OP_P inside STMT. */
1750
1751 static void
1752 find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p)
1753 {
1754 tree base = *op_p, step = size_zero_node;
1755 struct iv *civ;
1756 struct ifs_ivopts_data ifs_ivopts_data;
1757
1758 /* Do not play with volatile memory references. A bit too conservative,
1759 perhaps, but safe. */
1760 if (gimple_has_volatile_ops (stmt))
1761 goto fail;
1762
1763 /* Ignore bitfields for now. Not really something terribly complicated
1764 to handle. TODO. */
1765 if (TREE_CODE (base) == BIT_FIELD_REF)
1766 goto fail;
1767
1768 base = unshare_expr (base);
1769
1770 if (TREE_CODE (base) == TARGET_MEM_REF)
1771 {
1772 tree type = build_pointer_type (TREE_TYPE (base));
1773 tree astep;
1774
1775 if (TMR_BASE (base)
1776 && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1777 {
1778 civ = get_iv (data, TMR_BASE (base));
1779 if (!civ)
1780 goto fail;
1781
1782 TMR_BASE (base) = civ->base;
1783 step = civ->step;
1784 }
1785 if (TMR_INDEX2 (base)
1786 && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
1787 {
1788 civ = get_iv (data, TMR_INDEX2 (base));
1789 if (!civ)
1790 goto fail;
1791
1792 TMR_INDEX2 (base) = civ->base;
1793 step = civ->step;
1794 }
1795 if (TMR_INDEX (base)
1796 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1797 {
1798 civ = get_iv (data, TMR_INDEX (base));
1799 if (!civ)
1800 goto fail;
1801
1802 TMR_INDEX (base) = civ->base;
1803 astep = civ->step;
1804
1805 if (astep)
1806 {
1807 if (TMR_STEP (base))
1808 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1809
1810 step = fold_build2 (PLUS_EXPR, type, step, astep);
1811 }
1812 }
1813
1814 if (integer_zerop (step))
1815 goto fail;
1816 base = tree_mem_ref_addr (type, base);
1817 }
1818 else
1819 {
1820 ifs_ivopts_data.ivopts_data = data;
1821 ifs_ivopts_data.stmt = stmt;
1822 ifs_ivopts_data.step = size_zero_node;
1823 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1824 || integer_zerop (ifs_ivopts_data.step))
1825 goto fail;
1826 step = ifs_ivopts_data.step;
1827
1828 /* Check that the base expression is addressable. This needs
1829 to be done after substituting bases of IVs into it. */
1830 if (may_be_nonaddressable_p (base))
1831 goto fail;
1832
1833 /* Moreover, on strict alignment platforms, check that it is
1834 sufficiently aligned. */
1835 if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
1836 goto fail;
1837
1838 base = build_fold_addr_expr (base);
1839
1840 /* Substituting bases of IVs into the base expression might
1841 have caused folding opportunities. */
1842 if (TREE_CODE (base) == ADDR_EXPR)
1843 {
1844 tree *ref = &TREE_OPERAND (base, 0);
1845 while (handled_component_p (*ref))
1846 ref = &TREE_OPERAND (*ref, 0);
1847 if (TREE_CODE (*ref) == MEM_REF)
1848 {
1849 tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
1850 TREE_OPERAND (*ref, 0),
1851 TREE_OPERAND (*ref, 1));
1852 if (tem)
1853 *ref = tem;
1854 }
1855 }
1856 }
1857
1858 civ = alloc_iv (base, step);
1859 record_use (data, op_p, civ, stmt, USE_ADDRESS);
1860 return;
1861
1862 fail:
1863 for_each_index (op_p, idx_record_use, data);
1864 }
1865
1866 /* Finds and records invariants used in STMT. */
1867
1868 static void
1869 find_invariants_stmt (struct ivopts_data *data, gimple stmt)
1870 {
1871 ssa_op_iter iter;
1872 use_operand_p use_p;
1873 tree op;
1874
1875 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1876 {
1877 op = USE_FROM_PTR (use_p);
1878 record_invariant (data, op, false);
1879 }
1880 }
1881
1882 /* Finds interesting uses of induction variables in the statement STMT. */
1883
1884 static void
1885 find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt)
1886 {
1887 struct iv *iv;
1888 tree op, *lhs, *rhs;
1889 ssa_op_iter iter;
1890 use_operand_p use_p;
1891 enum tree_code code;
1892
1893 find_invariants_stmt (data, stmt);
1894
1895 if (gimple_code (stmt) == GIMPLE_COND)
1896 {
1897 find_interesting_uses_cond (data, stmt);
1898 return;
1899 }
1900
1901 if (is_gimple_assign (stmt))
1902 {
1903 lhs = gimple_assign_lhs_ptr (stmt);
1904 rhs = gimple_assign_rhs1_ptr (stmt);
1905
1906 if (TREE_CODE (*lhs) == SSA_NAME)
1907 {
1908 /* If the statement defines an induction variable, the uses are not
1909 interesting by themselves. */
1910
1911 iv = get_iv (data, *lhs);
1912
1913 if (iv && !integer_zerop (iv->step))
1914 return;
1915 }
1916
1917 code = gimple_assign_rhs_code (stmt);
1918 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
1919 && (REFERENCE_CLASS_P (*rhs)
1920 || is_gimple_val (*rhs)))
1921 {
1922 if (REFERENCE_CLASS_P (*rhs))
1923 find_interesting_uses_address (data, stmt, rhs);
1924 else
1925 find_interesting_uses_op (data, *rhs);
1926
1927 if (REFERENCE_CLASS_P (*lhs))
1928 find_interesting_uses_address (data, stmt, lhs);
1929 return;
1930 }
1931 else if (TREE_CODE_CLASS (code) == tcc_comparison)
1932 {
1933 find_interesting_uses_cond (data, stmt);
1934 return;
1935 }
1936
1937 /* TODO -- we should also handle address uses of type
1938
1939 memory = call (whatever);
1940
1941 and
1942
1943 call (memory). */
1944 }
1945
1946 if (gimple_code (stmt) == GIMPLE_PHI
1947 && gimple_bb (stmt) == data->current_loop->header)
1948 {
1949 iv = get_iv (data, PHI_RESULT (stmt));
1950
1951 if (iv && !integer_zerop (iv->step))
1952 return;
1953 }
1954
1955 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1956 {
1957 op = USE_FROM_PTR (use_p);
1958
1959 if (TREE_CODE (op) != SSA_NAME)
1960 continue;
1961
1962 iv = get_iv (data, op);
1963 if (!iv)
1964 continue;
1965
1966 find_interesting_uses_op (data, op);
1967 }
1968 }
1969
1970 /* Finds interesting uses of induction variables outside of loops
1971 on loop exit edge EXIT. */
1972
1973 static void
1974 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1975 {
1976 gimple phi;
1977 gimple_stmt_iterator psi;
1978 tree def;
1979
1980 for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
1981 {
1982 phi = gsi_stmt (psi);
1983 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1984 if (!virtual_operand_p (def))
1985 find_interesting_uses_op (data, def);
1986 }
1987 }
1988
1989 /* Finds uses of the induction variables that are interesting. */
1990
1991 static void
1992 find_interesting_uses (struct ivopts_data *data)
1993 {
1994 basic_block bb;
1995 gimple_stmt_iterator bsi;
1996 basic_block *body = get_loop_body (data->current_loop);
1997 unsigned i;
1998 struct version_info *info;
1999 edge e;
2000
2001 if (dump_file && (dump_flags & TDF_DETAILS))
2002 fprintf (dump_file, "Uses:\n\n");
2003
2004 for (i = 0; i < data->current_loop->num_nodes; i++)
2005 {
2006 edge_iterator ei;
2007 bb = body[i];
2008
2009 FOR_EACH_EDGE (e, ei, bb->succs)
2010 if (e->dest != EXIT_BLOCK_PTR
2011 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2012 find_interesting_uses_outside (data, e);
2013
2014 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2015 find_interesting_uses_stmt (data, gsi_stmt (bsi));
2016 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2017 if (!is_gimple_debug (gsi_stmt (bsi)))
2018 find_interesting_uses_stmt (data, gsi_stmt (bsi));
2019 }
2020
2021 if (dump_file && (dump_flags & TDF_DETAILS))
2022 {
2023 bitmap_iterator bi;
2024
2025 fprintf (dump_file, "\n");
2026
2027 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2028 {
2029 info = ver_info (data, i);
2030 if (info->inv_id)
2031 {
2032 fprintf (dump_file, " ");
2033 print_generic_expr (dump_file, info->name, TDF_SLIM);
2034 fprintf (dump_file, " is invariant (%d)%s\n",
2035 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
2036 }
2037 }
2038
2039 fprintf (dump_file, "\n");
2040 }
2041
2042 free (body);
2043 }
2044
2045 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
2046 is true, assume we are inside an address. If TOP_COMPREF is true, assume
2047 we are at the top-level of the processed address. */
2048
2049 static tree
2050 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2051 HOST_WIDE_INT *offset)
2052 {
2053 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2054 enum tree_code code;
2055 tree type, orig_type = TREE_TYPE (expr);
2056 HOST_WIDE_INT off0, off1, st;
2057 tree orig_expr = expr;
2058
2059 STRIP_NOPS (expr);
2060
2061 type = TREE_TYPE (expr);
2062 code = TREE_CODE (expr);
2063 *offset = 0;
2064
2065 switch (code)
2066 {
2067 case INTEGER_CST:
2068 if (!cst_and_fits_in_hwi (expr)
2069 || integer_zerop (expr))
2070 return orig_expr;
2071
2072 *offset = int_cst_value (expr);
2073 return build_int_cst (orig_type, 0);
2074
2075 case POINTER_PLUS_EXPR:
2076 case PLUS_EXPR:
2077 case MINUS_EXPR:
2078 op0 = TREE_OPERAND (expr, 0);
2079 op1 = TREE_OPERAND (expr, 1);
2080
2081 op0 = strip_offset_1 (op0, false, false, &off0);
2082 op1 = strip_offset_1 (op1, false, false, &off1);
2083
2084 *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2085 if (op0 == TREE_OPERAND (expr, 0)
2086 && op1 == TREE_OPERAND (expr, 1))
2087 return orig_expr;
2088
2089 if (integer_zerop (op1))
2090 expr = op0;
2091 else if (integer_zerop (op0))
2092 {
2093 if (code == MINUS_EXPR)
2094 expr = fold_build1 (NEGATE_EXPR, type, op1);
2095 else
2096 expr = op1;
2097 }
2098 else
2099 expr = fold_build2 (code, type, op0, op1);
2100
2101 return fold_convert (orig_type, expr);
2102
2103 case MULT_EXPR:
2104 op1 = TREE_OPERAND (expr, 1);
2105 if (!cst_and_fits_in_hwi (op1))
2106 return orig_expr;
2107
2108 op0 = TREE_OPERAND (expr, 0);
2109 op0 = strip_offset_1 (op0, false, false, &off0);
2110 if (op0 == TREE_OPERAND (expr, 0))
2111 return orig_expr;
2112
2113 *offset = off0 * int_cst_value (op1);
2114 if (integer_zerop (op0))
2115 expr = op0;
2116 else
2117 expr = fold_build2 (MULT_EXPR, type, op0, op1);
2118
2119 return fold_convert (orig_type, expr);
2120
2121 case ARRAY_REF:
2122 case ARRAY_RANGE_REF:
2123 if (!inside_addr)
2124 return orig_expr;
2125
2126 step = array_ref_element_size (expr);
2127 if (!cst_and_fits_in_hwi (step))
2128 break;
2129
2130 st = int_cst_value (step);
2131 op1 = TREE_OPERAND (expr, 1);
2132 op1 = strip_offset_1 (op1, false, false, &off1);
2133 *offset = off1 * st;
2134
2135 if (top_compref
2136 && integer_zerop (op1))
2137 {
2138 /* Strip the component reference completely. */
2139 op0 = TREE_OPERAND (expr, 0);
2140 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2141 *offset += off0;
2142 return op0;
2143 }
2144 break;
2145
2146 case COMPONENT_REF:
2147 {
2148 tree field;
2149
2150 if (!inside_addr)
2151 return orig_expr;
2152
2153 tmp = component_ref_field_offset (expr);
2154 field = TREE_OPERAND (expr, 1);
2155 if (top_compref
2156 && cst_and_fits_in_hwi (tmp)
2157 && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2158 {
2159 HOST_WIDE_INT boffset, abs_off;
2160
2161 /* Strip the component reference completely. */
2162 op0 = TREE_OPERAND (expr, 0);
2163 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2164 boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2165 abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2166 if (boffset < 0)
2167 abs_off = -abs_off;
2168
2169 *offset = off0 + int_cst_value (tmp) + abs_off;
2170 return op0;
2171 }
2172 }
2173 break;
2174
2175 case ADDR_EXPR:
2176 op0 = TREE_OPERAND (expr, 0);
2177 op0 = strip_offset_1 (op0, true, true, &off0);
2178 *offset += off0;
2179
2180 if (op0 == TREE_OPERAND (expr, 0))
2181 return orig_expr;
2182
2183 expr = build_fold_addr_expr (op0);
2184 return fold_convert (orig_type, expr);
2185
2186 case MEM_REF:
2187 /* ??? Offset operand? */
2188 inside_addr = false;
2189 break;
2190
2191 default:
2192 return orig_expr;
2193 }
2194
2195 /* Default handling of expressions for that we want to recurse into
2196 the first operand. */
2197 op0 = TREE_OPERAND (expr, 0);
2198 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2199 *offset += off0;
2200
2201 if (op0 == TREE_OPERAND (expr, 0)
2202 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2203 return orig_expr;
2204
2205 expr = copy_node (expr);
2206 TREE_OPERAND (expr, 0) = op0;
2207 if (op1)
2208 TREE_OPERAND (expr, 1) = op1;
2209
2210 /* Inside address, we might strip the top level component references,
2211 thus changing type of the expression. Handling of ADDR_EXPR
2212 will fix that. */
2213 expr = fold_convert (orig_type, expr);
2214
2215 return expr;
2216 }
2217
2218 /* Strips constant offsets from EXPR and stores them to OFFSET. */
2219
2220 static tree
2221 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2222 {
2223 HOST_WIDE_INT off;
2224 tree core = strip_offset_1 (expr, false, false, &off);
2225 *offset = off;
2226 return core;
2227 }
2228
2229 /* Returns variant of TYPE that can be used as base for different uses.
2230 We return unsigned type with the same precision, which avoids problems
2231 with overflows. */
2232
2233 static tree
2234 generic_type_for (tree type)
2235 {
2236 if (POINTER_TYPE_P (type))
2237 return unsigned_type_for (type);
2238
2239 if (TYPE_UNSIGNED (type))
2240 return type;
2241
2242 return unsigned_type_for (type);
2243 }
2244
2245 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2246 the bitmap to that we should store it. */
2247
2248 static struct ivopts_data *fd_ivopts_data;
2249 static tree
2250 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2251 {
2252 bitmap *depends_on = (bitmap *) data;
2253 struct version_info *info;
2254
2255 if (TREE_CODE (*expr_p) != SSA_NAME)
2256 return NULL_TREE;
2257 info = name_info (fd_ivopts_data, *expr_p);
2258
2259 if (!info->inv_id || info->has_nonlin_use)
2260 return NULL_TREE;
2261
2262 if (!*depends_on)
2263 *depends_on = BITMAP_ALLOC (NULL);
2264 bitmap_set_bit (*depends_on, info->inv_id);
2265
2266 return NULL_TREE;
2267 }
2268
2269 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2270 position to POS. If USE is not NULL, the candidate is set as related to
2271 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
2272 replacement of the final value of the iv by a direct computation. */
2273
2274 static struct iv_cand *
2275 add_candidate_1 (struct ivopts_data *data,
2276 tree base, tree step, bool important, enum iv_position pos,
2277 struct iv_use *use, gimple incremented_at)
2278 {
2279 unsigned i;
2280 struct iv_cand *cand = NULL;
2281 tree type, orig_type;
2282
2283 /* For non-original variables, make sure their values are computed in a type
2284 that does not invoke undefined behavior on overflows (since in general,
2285 we cannot prove that these induction variables are non-wrapping). */
2286 if (pos != IP_ORIGINAL)
2287 {
2288 orig_type = TREE_TYPE (base);
2289 type = generic_type_for (orig_type);
2290 if (type != orig_type)
2291 {
2292 base = fold_convert (type, base);
2293 step = fold_convert (type, step);
2294 }
2295 }
2296
2297 for (i = 0; i < n_iv_cands (data); i++)
2298 {
2299 cand = iv_cand (data, i);
2300
2301 if (cand->pos != pos)
2302 continue;
2303
2304 if (cand->incremented_at != incremented_at
2305 || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2306 && cand->ainc_use != use))
2307 continue;
2308
2309 if (!cand->iv)
2310 {
2311 if (!base && !step)
2312 break;
2313
2314 continue;
2315 }
2316
2317 if (!base && !step)
2318 continue;
2319
2320 if (operand_equal_p (base, cand->iv->base, 0)
2321 && operand_equal_p (step, cand->iv->step, 0)
2322 && (TYPE_PRECISION (TREE_TYPE (base))
2323 == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
2324 break;
2325 }
2326
2327 if (i == n_iv_cands (data))
2328 {
2329 cand = XCNEW (struct iv_cand);
2330 cand->id = i;
2331
2332 if (!base && !step)
2333 cand->iv = NULL;
2334 else
2335 cand->iv = alloc_iv (base, step);
2336
2337 cand->pos = pos;
2338 if (pos != IP_ORIGINAL && cand->iv)
2339 {
2340 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2341 cand->var_after = cand->var_before;
2342 }
2343 cand->important = important;
2344 cand->incremented_at = incremented_at;
2345 data->iv_candidates.safe_push (cand);
2346
2347 if (step
2348 && TREE_CODE (step) != INTEGER_CST)
2349 {
2350 fd_ivopts_data = data;
2351 walk_tree (&step, find_depends, &cand->depends_on, NULL);
2352 }
2353
2354 if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2355 cand->ainc_use = use;
2356 else
2357 cand->ainc_use = NULL;
2358
2359 if (dump_file && (dump_flags & TDF_DETAILS))
2360 dump_cand (dump_file, cand);
2361 }
2362
2363 if (important && !cand->important)
2364 {
2365 cand->important = true;
2366 if (dump_file && (dump_flags & TDF_DETAILS))
2367 fprintf (dump_file, "Candidate %d is important\n", cand->id);
2368 }
2369
2370 if (use)
2371 {
2372 bitmap_set_bit (use->related_cands, i);
2373 if (dump_file && (dump_flags & TDF_DETAILS))
2374 fprintf (dump_file, "Candidate %d is related to use %d\n",
2375 cand->id, use->id);
2376 }
2377
2378 return cand;
2379 }
2380
2381 /* Returns true if incrementing the induction variable at the end of the LOOP
2382 is allowed.
2383
2384 The purpose is to avoid splitting latch edge with a biv increment, thus
2385 creating a jump, possibly confusing other optimization passes and leaving
2386 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2387 is not available (so we do not have a better alternative), or if the latch
2388 edge is already nonempty. */
2389
2390 static bool
2391 allow_ip_end_pos_p (struct loop *loop)
2392 {
2393 if (!ip_normal_pos (loop))
2394 return true;
2395
2396 if (!empty_block_p (ip_end_pos (loop)))
2397 return true;
2398
2399 return false;
2400 }
2401
2402 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2403 Important field is set to IMPORTANT. */
2404
2405 static void
2406 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2407 bool important, struct iv_use *use)
2408 {
2409 basic_block use_bb = gimple_bb (use->stmt);
2410 enum machine_mode mem_mode;
2411 unsigned HOST_WIDE_INT cstepi;
2412
2413 /* If we insert the increment in any position other than the standard
2414 ones, we must ensure that it is incremented once per iteration.
2415 It must not be in an inner nested loop, or one side of an if
2416 statement. */
2417 if (use_bb->loop_father != data->current_loop
2418 || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2419 || stmt_could_throw_p (use->stmt)
2420 || !cst_and_fits_in_hwi (step))
2421 return;
2422
2423 cstepi = int_cst_value (step);
2424
2425 mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2426 if (((USE_LOAD_PRE_INCREMENT (mem_mode)
2427 || USE_STORE_PRE_INCREMENT (mem_mode))
2428 && GET_MODE_SIZE (mem_mode) == cstepi)
2429 || ((USE_LOAD_PRE_DECREMENT (mem_mode)
2430 || USE_STORE_PRE_DECREMENT (mem_mode))
2431 && GET_MODE_SIZE (mem_mode) == -cstepi))
2432 {
2433 enum tree_code code = MINUS_EXPR;
2434 tree new_base;
2435 tree new_step = step;
2436
2437 if (POINTER_TYPE_P (TREE_TYPE (base)))
2438 {
2439 new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2440 code = POINTER_PLUS_EXPR;
2441 }
2442 else
2443 new_step = fold_convert (TREE_TYPE (base), new_step);
2444 new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2445 add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2446 use->stmt);
2447 }
2448 if (((USE_LOAD_POST_INCREMENT (mem_mode)
2449 || USE_STORE_POST_INCREMENT (mem_mode))
2450 && GET_MODE_SIZE (mem_mode) == cstepi)
2451 || ((USE_LOAD_POST_DECREMENT (mem_mode)
2452 || USE_STORE_POST_DECREMENT (mem_mode))
2453 && GET_MODE_SIZE (mem_mode) == -cstepi))
2454 {
2455 add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
2456 use->stmt);
2457 }
2458 }
2459
2460 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2461 position to POS. If USE is not NULL, the candidate is set as related to
2462 it. The candidate computation is scheduled on all available positions. */
2463
2464 static void
2465 add_candidate (struct ivopts_data *data,
2466 tree base, tree step, bool important, struct iv_use *use)
2467 {
2468 if (ip_normal_pos (data->current_loop))
2469 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL);
2470 if (ip_end_pos (data->current_loop)
2471 && allow_ip_end_pos_p (data->current_loop))
2472 add_candidate_1 (data, base, step, important, IP_END, use, NULL);
2473
2474 if (use != NULL && use->type == USE_ADDRESS)
2475 add_autoinc_candidates (data, base, step, important, use);
2476 }
2477
2478 /* Adds standard iv candidates. */
2479
2480 static void
2481 add_standard_iv_candidates (struct ivopts_data *data)
2482 {
2483 add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
2484
2485 /* The same for a double-integer type if it is still fast enough. */
2486 if (TYPE_PRECISION
2487 (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
2488 && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
2489 add_candidate (data, build_int_cst (long_integer_type_node, 0),
2490 build_int_cst (long_integer_type_node, 1), true, NULL);
2491
2492 /* The same for a double-integer type if it is still fast enough. */
2493 if (TYPE_PRECISION
2494 (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
2495 && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
2496 add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
2497 build_int_cst (long_long_integer_type_node, 1), true, NULL);
2498 }
2499
2500
2501 /* Adds candidates bases on the old induction variable IV. */
2502
2503 static void
2504 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2505 {
2506 gimple phi;
2507 tree def;
2508 struct iv_cand *cand;
2509
2510 add_candidate (data, iv->base, iv->step, true, NULL);
2511
2512 /* The same, but with initial value zero. */
2513 if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
2514 add_candidate (data, size_int (0), iv->step, true, NULL);
2515 else
2516 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2517 iv->step, true, NULL);
2518
2519 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2520 if (gimple_code (phi) == GIMPLE_PHI)
2521 {
2522 /* Additionally record the possibility of leaving the original iv
2523 untouched. */
2524 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2525 cand = add_candidate_1 (data,
2526 iv->base, iv->step, true, IP_ORIGINAL, NULL,
2527 SSA_NAME_DEF_STMT (def));
2528 cand->var_before = iv->ssa_name;
2529 cand->var_after = def;
2530 }
2531 }
2532
2533 /* Adds candidates based on the old induction variables. */
2534
2535 static void
2536 add_old_ivs_candidates (struct ivopts_data *data)
2537 {
2538 unsigned i;
2539 struct iv *iv;
2540 bitmap_iterator bi;
2541
2542 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2543 {
2544 iv = ver_info (data, i)->iv;
2545 if (iv && iv->biv_p && !integer_zerop (iv->step))
2546 add_old_iv_candidates (data, iv);
2547 }
2548 }
2549
2550 /* Adds candidates based on the value of the induction variable IV and USE. */
2551
2552 static void
2553 add_iv_value_candidates (struct ivopts_data *data,
2554 struct iv *iv, struct iv_use *use)
2555 {
2556 unsigned HOST_WIDE_INT offset;
2557 tree base;
2558 tree basetype;
2559
2560 add_candidate (data, iv->base, iv->step, false, use);
2561
2562 /* The same, but with initial value zero. Make such variable important,
2563 since it is generic enough so that possibly many uses may be based
2564 on it. */
2565 basetype = TREE_TYPE (iv->base);
2566 if (POINTER_TYPE_P (basetype))
2567 basetype = sizetype;
2568 add_candidate (data, build_int_cst (basetype, 0),
2569 iv->step, true, use);
2570
2571 /* Third, try removing the constant offset. Make sure to even
2572 add a candidate for &a[0] vs. (T *)&a. */
2573 base = strip_offset (iv->base, &offset);
2574 if (offset
2575 || base != iv->base)
2576 add_candidate (data, base, iv->step, false, use);
2577 }
2578
2579 /* Adds candidates based on the uses. */
2580
2581 static void
2582 add_derived_ivs_candidates (struct ivopts_data *data)
2583 {
2584 unsigned i;
2585
2586 for (i = 0; i < n_iv_uses (data); i++)
2587 {
2588 struct iv_use *use = iv_use (data, i);
2589
2590 if (!use)
2591 continue;
2592
2593 switch (use->type)
2594 {
2595 case USE_NONLINEAR_EXPR:
2596 case USE_COMPARE:
2597 case USE_ADDRESS:
2598 /* Just add the ivs based on the value of the iv used here. */
2599 add_iv_value_candidates (data, use->iv, use);
2600 break;
2601
2602 default:
2603 gcc_unreachable ();
2604 }
2605 }
2606 }
2607
2608 /* Record important candidates and add them to related_cands bitmaps
2609 if needed. */
2610
2611 static void
2612 record_important_candidates (struct ivopts_data *data)
2613 {
2614 unsigned i;
2615 struct iv_use *use;
2616
2617 for (i = 0; i < n_iv_cands (data); i++)
2618 {
2619 struct iv_cand *cand = iv_cand (data, i);
2620
2621 if (cand->important)
2622 bitmap_set_bit (data->important_candidates, i);
2623 }
2624
2625 data->consider_all_candidates = (n_iv_cands (data)
2626 <= CONSIDER_ALL_CANDIDATES_BOUND);
2627
2628 if (data->consider_all_candidates)
2629 {
2630 /* We will not need "related_cands" bitmaps in this case,
2631 so release them to decrease peak memory consumption. */
2632 for (i = 0; i < n_iv_uses (data); i++)
2633 {
2634 use = iv_use (data, i);
2635 BITMAP_FREE (use->related_cands);
2636 }
2637 }
2638 else
2639 {
2640 /* Add important candidates to the related_cands bitmaps. */
2641 for (i = 0; i < n_iv_uses (data); i++)
2642 bitmap_ior_into (iv_use (data, i)->related_cands,
2643 data->important_candidates);
2644 }
2645 }
2646
2647 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2648 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2649 we allocate a simple list to every use. */
2650
2651 static void
2652 alloc_use_cost_map (struct ivopts_data *data)
2653 {
2654 unsigned i, size, s;
2655
2656 for (i = 0; i < n_iv_uses (data); i++)
2657 {
2658 struct iv_use *use = iv_use (data, i);
2659
2660 if (data->consider_all_candidates)
2661 size = n_iv_cands (data);
2662 else
2663 {
2664 s = bitmap_count_bits (use->related_cands);
2665
2666 /* Round up to the power of two, so that moduling by it is fast. */
2667 size = s ? (1 << ceil_log2 (s)) : 1;
2668 }
2669
2670 use->n_map_members = size;
2671 use->cost_map = XCNEWVEC (struct cost_pair, size);
2672 }
2673 }
2674
2675 /* Returns description of computation cost of expression whose runtime
2676 cost is RUNTIME and complexity corresponds to COMPLEXITY. */
2677
2678 static comp_cost
2679 new_cost (unsigned runtime, unsigned complexity)
2680 {
2681 comp_cost cost;
2682
2683 cost.cost = runtime;
2684 cost.complexity = complexity;
2685
2686 return cost;
2687 }
2688
2689 /* Adds costs COST1 and COST2. */
2690
2691 static comp_cost
2692 add_costs (comp_cost cost1, comp_cost cost2)
2693 {
2694 cost1.cost += cost2.cost;
2695 cost1.complexity += cost2.complexity;
2696
2697 return cost1;
2698 }
2699 /* Subtracts costs COST1 and COST2. */
2700
2701 static comp_cost
2702 sub_costs (comp_cost cost1, comp_cost cost2)
2703 {
2704 cost1.cost -= cost2.cost;
2705 cost1.complexity -= cost2.complexity;
2706
2707 return cost1;
2708 }
2709
2710 /* Returns a negative number if COST1 < COST2, a positive number if
2711 COST1 > COST2, and 0 if COST1 = COST2. */
2712
2713 static int
2714 compare_costs (comp_cost cost1, comp_cost cost2)
2715 {
2716 if (cost1.cost == cost2.cost)
2717 return cost1.complexity - cost2.complexity;
2718
2719 return cost1.cost - cost2.cost;
2720 }
2721
2722 /* Returns true if COST is infinite. */
2723
2724 static bool
2725 infinite_cost_p (comp_cost cost)
2726 {
2727 return cost.cost == INFTY;
2728 }
2729
2730 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2731 on invariants DEPENDS_ON and that the value used in expressing it
2732 is VALUE, and in case of iv elimination the comparison operator is COMP. */
2733
2734 static void
2735 set_use_iv_cost (struct ivopts_data *data,
2736 struct iv_use *use, struct iv_cand *cand,
2737 comp_cost cost, bitmap depends_on, tree value,
2738 enum tree_code comp, int inv_expr_id)
2739 {
2740 unsigned i, s;
2741
2742 if (infinite_cost_p (cost))
2743 {
2744 BITMAP_FREE (depends_on);
2745 return;
2746 }
2747
2748 if (data->consider_all_candidates)
2749 {
2750 use->cost_map[cand->id].cand = cand;
2751 use->cost_map[cand->id].cost = cost;
2752 use->cost_map[cand->id].depends_on = depends_on;
2753 use->cost_map[cand->id].value = value;
2754 use->cost_map[cand->id].comp = comp;
2755 use->cost_map[cand->id].inv_expr_id = inv_expr_id;
2756 return;
2757 }
2758
2759 /* n_map_members is a power of two, so this computes modulo. */
2760 s = cand->id & (use->n_map_members - 1);
2761 for (i = s; i < use->n_map_members; i++)
2762 if (!use->cost_map[i].cand)
2763 goto found;
2764 for (i = 0; i < s; i++)
2765 if (!use->cost_map[i].cand)
2766 goto found;
2767
2768 gcc_unreachable ();
2769
2770 found:
2771 use->cost_map[i].cand = cand;
2772 use->cost_map[i].cost = cost;
2773 use->cost_map[i].depends_on = depends_on;
2774 use->cost_map[i].value = value;
2775 use->cost_map[i].comp = comp;
2776 use->cost_map[i].inv_expr_id = inv_expr_id;
2777 }
2778
2779 /* Gets cost of (USE, CANDIDATE) pair. */
2780
2781 static struct cost_pair *
2782 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2783 struct iv_cand *cand)
2784 {
2785 unsigned i, s;
2786 struct cost_pair *ret;
2787
2788 if (!cand)
2789 return NULL;
2790
2791 if (data->consider_all_candidates)
2792 {
2793 ret = use->cost_map + cand->id;
2794 if (!ret->cand)
2795 return NULL;
2796
2797 return ret;
2798 }
2799
2800 /* n_map_members is a power of two, so this computes modulo. */
2801 s = cand->id & (use->n_map_members - 1);
2802 for (i = s; i < use->n_map_members; i++)
2803 if (use->cost_map[i].cand == cand)
2804 return use->cost_map + i;
2805 else if (use->cost_map[i].cand == NULL)
2806 return NULL;
2807 for (i = 0; i < s; i++)
2808 if (use->cost_map[i].cand == cand)
2809 return use->cost_map + i;
2810 else if (use->cost_map[i].cand == NULL)
2811 return NULL;
2812
2813 return NULL;
2814 }
2815
2816 /* Returns estimate on cost of computing SEQ. */
2817
2818 static unsigned
2819 seq_cost (rtx seq, bool speed)
2820 {
2821 unsigned cost = 0;
2822 rtx set;
2823
2824 for (; seq; seq = NEXT_INSN (seq))
2825 {
2826 set = single_set (seq);
2827 if (set)
2828 cost += set_src_cost (SET_SRC (set), speed);
2829 else
2830 cost++;
2831 }
2832
2833 return cost;
2834 }
2835
2836 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2837 static rtx
2838 produce_memory_decl_rtl (tree obj, int *regno)
2839 {
2840 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
2841 enum machine_mode address_mode = targetm.addr_space.address_mode (as);
2842 rtx x;
2843
2844 gcc_assert (obj);
2845 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2846 {
2847 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2848 x = gen_rtx_SYMBOL_REF (address_mode, name);
2849 SET_SYMBOL_REF_DECL (x, obj);
2850 x = gen_rtx_MEM (DECL_MODE (obj), x);
2851 set_mem_addr_space (x, as);
2852 targetm.encode_section_info (obj, x, true);
2853 }
2854 else
2855 {
2856 x = gen_raw_REG (address_mode, (*regno)++);
2857 x = gen_rtx_MEM (DECL_MODE (obj), x);
2858 set_mem_addr_space (x, as);
2859 }
2860
2861 return x;
2862 }
2863
2864 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2865 walk_tree. DATA contains the actual fake register number. */
2866
2867 static tree
2868 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2869 {
2870 tree obj = NULL_TREE;
2871 rtx x = NULL_RTX;
2872 int *regno = (int *) data;
2873
2874 switch (TREE_CODE (*expr_p))
2875 {
2876 case ADDR_EXPR:
2877 for (expr_p = &TREE_OPERAND (*expr_p, 0);
2878 handled_component_p (*expr_p);
2879 expr_p = &TREE_OPERAND (*expr_p, 0))
2880 continue;
2881 obj = *expr_p;
2882 if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
2883 x = produce_memory_decl_rtl (obj, regno);
2884 break;
2885
2886 case SSA_NAME:
2887 *ws = 0;
2888 obj = SSA_NAME_VAR (*expr_p);
2889 /* Defer handling of anonymous SSA_NAMEs to the expander. */
2890 if (!obj)
2891 return NULL_TREE;
2892 if (!DECL_RTL_SET_P (obj))
2893 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2894 break;
2895
2896 case VAR_DECL:
2897 case PARM_DECL:
2898 case RESULT_DECL:
2899 *ws = 0;
2900 obj = *expr_p;
2901
2902 if (DECL_RTL_SET_P (obj))
2903 break;
2904
2905 if (DECL_MODE (obj) == BLKmode)
2906 x = produce_memory_decl_rtl (obj, regno);
2907 else
2908 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2909
2910 break;
2911
2912 default:
2913 break;
2914 }
2915
2916 if (x)
2917 {
2918 decl_rtl_to_reset.safe_push (obj);
2919 SET_DECL_RTL (obj, x);
2920 }
2921
2922 return NULL_TREE;
2923 }
2924
2925 /* Determines cost of the computation of EXPR. */
2926
2927 static unsigned
2928 computation_cost (tree expr, bool speed)
2929 {
2930 rtx seq, rslt;
2931 tree type = TREE_TYPE (expr);
2932 unsigned cost;
2933 /* Avoid using hard regs in ways which may be unsupported. */
2934 int regno = LAST_VIRTUAL_REGISTER + 1;
2935 struct cgraph_node *node = cgraph_get_node (current_function_decl);
2936 enum node_frequency real_frequency = node->frequency;
2937
2938 node->frequency = NODE_FREQUENCY_NORMAL;
2939 crtl->maybe_hot_insn_p = speed;
2940 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2941 start_sequence ();
2942 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2943 seq = get_insns ();
2944 end_sequence ();
2945 default_rtl_profile ();
2946 node->frequency = real_frequency;
2947
2948 cost = seq_cost (seq, speed);
2949 if (MEM_P (rslt))
2950 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
2951 TYPE_ADDR_SPACE (type), speed);
2952 else if (!REG_P (rslt))
2953 cost += set_src_cost (rslt, speed);
2954
2955 return cost;
2956 }
2957
2958 /* Returns variable containing the value of candidate CAND at statement AT. */
2959
2960 static tree
2961 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple stmt)
2962 {
2963 if (stmt_after_increment (loop, cand, stmt))
2964 return cand->var_after;
2965 else
2966 return cand->var_before;
2967 }
2968
2969 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2970 same precision that is at least as wide as the precision of TYPE, stores
2971 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
2972 type of A and B. */
2973
2974 static tree
2975 determine_common_wider_type (tree *a, tree *b)
2976 {
2977 tree wider_type = NULL;
2978 tree suba, subb;
2979 tree atype = TREE_TYPE (*a);
2980
2981 if (CONVERT_EXPR_P (*a))
2982 {
2983 suba = TREE_OPERAND (*a, 0);
2984 wider_type = TREE_TYPE (suba);
2985 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
2986 return atype;
2987 }
2988 else
2989 return atype;
2990
2991 if (CONVERT_EXPR_P (*b))
2992 {
2993 subb = TREE_OPERAND (*b, 0);
2994 if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
2995 return atype;
2996 }
2997 else
2998 return atype;
2999
3000 *a = suba;
3001 *b = subb;
3002 return wider_type;
3003 }
3004
3005 /* Determines the expression by that USE is expressed from induction variable
3006 CAND at statement AT in LOOP. The expression is stored in a decomposed
3007 form into AFF. Returns false if USE cannot be expressed using CAND. */
3008
3009 static bool
3010 get_computation_aff (struct loop *loop,
3011 struct iv_use *use, struct iv_cand *cand, gimple at,
3012 struct affine_tree_combination *aff)
3013 {
3014 tree ubase = use->iv->base;
3015 tree ustep = use->iv->step;
3016 tree cbase = cand->iv->base;
3017 tree cstep = cand->iv->step, cstep_common;
3018 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3019 tree common_type, var;
3020 tree uutype;
3021 aff_tree cbase_aff, var_aff;
3022 double_int rat;
3023
3024 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3025 {
3026 /* We do not have a precision to express the values of use. */
3027 return false;
3028 }
3029
3030 var = var_at_stmt (loop, cand, at);
3031 uutype = unsigned_type_for (utype);
3032
3033 /* If the conversion is not noop, perform it. */
3034 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3035 {
3036 cstep = fold_convert (uutype, cstep);
3037 cbase = fold_convert (uutype, cbase);
3038 var = fold_convert (uutype, var);
3039 }
3040
3041 if (!constant_multiple_of (ustep, cstep, &rat))
3042 return false;
3043
3044 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3045 type, we achieve better folding by computing their difference in this
3046 wider type, and cast the result to UUTYPE. We do not need to worry about
3047 overflows, as all the arithmetics will in the end be performed in UUTYPE
3048 anyway. */
3049 common_type = determine_common_wider_type (&ubase, &cbase);
3050
3051 /* use = ubase - ratio * cbase + ratio * var. */
3052 tree_to_aff_combination (ubase, common_type, aff);
3053 tree_to_aff_combination (cbase, common_type, &cbase_aff);
3054 tree_to_aff_combination (var, uutype, &var_aff);
3055
3056 /* We need to shift the value if we are after the increment. */
3057 if (stmt_after_increment (loop, cand, at))
3058 {
3059 aff_tree cstep_aff;
3060
3061 if (common_type != uutype)
3062 cstep_common = fold_convert (common_type, cstep);
3063 else
3064 cstep_common = cstep;
3065
3066 tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
3067 aff_combination_add (&cbase_aff, &cstep_aff);
3068 }
3069
3070 aff_combination_scale (&cbase_aff, -rat);
3071 aff_combination_add (aff, &cbase_aff);
3072 if (common_type != uutype)
3073 aff_combination_convert (aff, uutype);
3074
3075 aff_combination_scale (&var_aff, rat);
3076 aff_combination_add (aff, &var_aff);
3077
3078 return true;
3079 }
3080
3081 /* Return the type of USE. */
3082
3083 static tree
3084 get_use_type (struct iv_use *use)
3085 {
3086 tree base_type = TREE_TYPE (use->iv->base);
3087 tree type;
3088
3089 if (use->type == USE_ADDRESS)
3090 {
3091 /* The base_type may be a void pointer. Create a pointer type based on
3092 the mem_ref instead. */
3093 type = build_pointer_type (TREE_TYPE (*use->op_p));
3094 gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
3095 == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
3096 }
3097 else
3098 type = base_type;
3099
3100 return type;
3101 }
3102
3103 /* Determines the expression by that USE is expressed from induction variable
3104 CAND at statement AT in LOOP. The computation is unshared. */
3105
3106 static tree
3107 get_computation_at (struct loop *loop,
3108 struct iv_use *use, struct iv_cand *cand, gimple at)
3109 {
3110 aff_tree aff;
3111 tree type = get_use_type (use);
3112
3113 if (!get_computation_aff (loop, use, cand, at, &aff))
3114 return NULL_TREE;
3115 unshare_aff_combination (&aff);
3116 return fold_convert (type, aff_combination_to_tree (&aff));
3117 }
3118
3119 /* Determines the expression by that USE is expressed from induction variable
3120 CAND in LOOP. The computation is unshared. */
3121
3122 static tree
3123 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3124 {
3125 return get_computation_at (loop, use, cand, use->stmt);
3126 }
3127
3128 /* Adjust the cost COST for being in loop setup rather than loop body.
3129 If we're optimizing for space, the loop setup overhead is constant;
3130 if we're optimizing for speed, amortize it over the per-iteration cost. */
3131 static unsigned
3132 adjust_setup_cost (struct ivopts_data *data, unsigned cost)
3133 {
3134 if (cost == INFTY)
3135 return cost;
3136 else if (optimize_loop_for_speed_p (data->current_loop))
3137 return cost / avg_loop_niter (data->current_loop);
3138 else
3139 return cost;
3140 }
3141
3142 /* Returns true if multiplying by RATIO is allowed in an address. Test the
3143 validity for a memory reference accessing memory of mode MODE in
3144 address space AS. */
3145
3146
3147 bool
3148 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode,
3149 addr_space_t as)
3150 {
3151 #define MAX_RATIO 128
3152 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3153 static vec<sbitmap> valid_mult_list;
3154 sbitmap valid_mult;
3155
3156 if (data_index >= valid_mult_list.length ())
3157 valid_mult_list.safe_grow_cleared (data_index + 1);
3158
3159 valid_mult = valid_mult_list[data_index];
3160 if (!valid_mult)
3161 {
3162 enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3163 rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3164 rtx reg2 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3165 rtx addr, scaled;
3166 HOST_WIDE_INT i;
3167
3168 valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3169 bitmap_clear (valid_mult);
3170 scaled = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3171 addr = gen_rtx_fmt_ee (PLUS, address_mode, scaled, reg2);
3172 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3173 {
3174 XEXP (scaled, 1) = gen_int_mode (i, address_mode);
3175 if (memory_address_addr_space_p (mode, addr, as)
3176 || memory_address_addr_space_p (mode, scaled, as))
3177 bitmap_set_bit (valid_mult, i + MAX_RATIO);
3178 }
3179
3180 if (dump_file && (dump_flags & TDF_DETAILS))
3181 {
3182 fprintf (dump_file, " allowed multipliers:");
3183 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3184 if (bitmap_bit_p (valid_mult, i + MAX_RATIO))
3185 fprintf (dump_file, " %d", (int) i);
3186 fprintf (dump_file, "\n");
3187 fprintf (dump_file, "\n");
3188 }
3189
3190 valid_mult_list[data_index] = valid_mult;
3191 }
3192
3193 if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3194 return false;
3195
3196 return bitmap_bit_p (valid_mult, ratio + MAX_RATIO);
3197 }
3198
3199 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3200 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3201 variable is omitted. Compute the cost for a memory reference that accesses
3202 a memory location of mode MEM_MODE in address space AS.
3203
3204 MAY_AUTOINC is set to true if the autoincrement (increasing index by
3205 size of MEM_MODE / RATIO) is available. To make this determination, we
3206 look at the size of the increment to be made, which is given in CSTEP.
3207 CSTEP may be zero if the step is unknown.
3208 STMT_AFTER_INC is true iff the statement we're looking at is after the
3209 increment of the original biv.
3210
3211 TODO -- there must be some better way. This all is quite crude. */
3212
3213 enum ainc_type
3214 {
3215 AINC_PRE_INC, /* Pre increment. */
3216 AINC_PRE_DEC, /* Pre decrement. */
3217 AINC_POST_INC, /* Post increment. */
3218 AINC_POST_DEC, /* Post decrement. */
3219 AINC_NONE /* Also the number of auto increment types. */
3220 };
3221
3222 typedef struct address_cost_data_s
3223 {
3224 HOST_WIDE_INT min_offset, max_offset;
3225 unsigned costs[2][2][2][2];
3226 unsigned ainc_costs[AINC_NONE];
3227 } *address_cost_data;
3228
3229
3230 static comp_cost
3231 get_address_cost (bool symbol_present, bool var_present,
3232 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3233 HOST_WIDE_INT cstep, enum machine_mode mem_mode,
3234 addr_space_t as, bool speed,
3235 bool stmt_after_inc, bool *may_autoinc)
3236 {
3237 enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3238 static vec<address_cost_data> address_cost_data_list;
3239 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3240 address_cost_data data;
3241 static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3242 static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3243 unsigned cost, acost, complexity;
3244 enum ainc_type autoinc_type;
3245 bool offset_p, ratio_p, autoinc;
3246 HOST_WIDE_INT s_offset, autoinc_offset, msize;
3247 unsigned HOST_WIDE_INT mask;
3248 unsigned bits;
3249
3250 if (data_index >= address_cost_data_list.length ())
3251 address_cost_data_list.safe_grow_cleared (data_index + 1);
3252
3253 data = address_cost_data_list[data_index];
3254 if (!data)
3255 {
3256 HOST_WIDE_INT i;
3257 HOST_WIDE_INT rat, off = 0;
3258 int old_cse_not_expected, width;
3259 unsigned sym_p, var_p, off_p, rat_p, add_c;
3260 rtx seq, addr, base;
3261 rtx reg0, reg1;
3262
3263 data = (address_cost_data) xcalloc (1, sizeof (*data));
3264
3265 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3266
3267 width = GET_MODE_BITSIZE (address_mode) - 1;
3268 if (width > (HOST_BITS_PER_WIDE_INT - 1))
3269 width = HOST_BITS_PER_WIDE_INT - 1;
3270 addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
3271
3272 for (i = width; i >= 0; i--)
3273 {
3274 off = -((unsigned HOST_WIDE_INT) 1 << i);
3275 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3276 if (memory_address_addr_space_p (mem_mode, addr, as))
3277 break;
3278 }
3279 data->min_offset = (i == -1? 0 : off);
3280
3281 for (i = width; i >= 0; i--)
3282 {
3283 off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
3284 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3285 if (memory_address_addr_space_p (mem_mode, addr, as))
3286 break;
3287 }
3288 if (i == -1)
3289 off = 0;
3290 data->max_offset = off;
3291
3292 if (dump_file && (dump_flags & TDF_DETAILS))
3293 {
3294 fprintf (dump_file, "get_address_cost:\n");
3295 fprintf (dump_file, " min offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3296 GET_MODE_NAME (mem_mode),
3297 data->min_offset);
3298 fprintf (dump_file, " max offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3299 GET_MODE_NAME (mem_mode),
3300 data->max_offset);
3301 }
3302
3303 rat = 1;
3304 for (i = 2; i <= MAX_RATIO; i++)
3305 if (multiplier_allowed_in_address_p (i, mem_mode, as))
3306 {
3307 rat = i;
3308 break;
3309 }
3310
3311 /* Compute the cost of various addressing modes. */
3312 acost = 0;
3313 reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3314 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3315
3316 if (USE_LOAD_PRE_DECREMENT (mem_mode)
3317 || USE_STORE_PRE_DECREMENT (mem_mode))
3318 {
3319 addr = gen_rtx_PRE_DEC (address_mode, reg0);
3320 has_predec[mem_mode]
3321 = memory_address_addr_space_p (mem_mode, addr, as);
3322
3323 if (has_predec[mem_mode])
3324 data->ainc_costs[AINC_PRE_DEC]
3325 = address_cost (addr, mem_mode, as, speed);
3326 }
3327 if (USE_LOAD_POST_DECREMENT (mem_mode)
3328 || USE_STORE_POST_DECREMENT (mem_mode))
3329 {
3330 addr = gen_rtx_POST_DEC (address_mode, reg0);
3331 has_postdec[mem_mode]
3332 = memory_address_addr_space_p (mem_mode, addr, as);
3333
3334 if (has_postdec[mem_mode])
3335 data->ainc_costs[AINC_POST_DEC]
3336 = address_cost (addr, mem_mode, as, speed);
3337 }
3338 if (USE_LOAD_PRE_INCREMENT (mem_mode)
3339 || USE_STORE_PRE_DECREMENT (mem_mode))
3340 {
3341 addr = gen_rtx_PRE_INC (address_mode, reg0);
3342 has_preinc[mem_mode]
3343 = memory_address_addr_space_p (mem_mode, addr, as);
3344
3345 if (has_preinc[mem_mode])
3346 data->ainc_costs[AINC_PRE_INC]
3347 = address_cost (addr, mem_mode, as, speed);
3348 }
3349 if (USE_LOAD_POST_INCREMENT (mem_mode)
3350 || USE_STORE_POST_INCREMENT (mem_mode))
3351 {
3352 addr = gen_rtx_POST_INC (address_mode, reg0);
3353 has_postinc[mem_mode]
3354 = memory_address_addr_space_p (mem_mode, addr, as);
3355
3356 if (has_postinc[mem_mode])
3357 data->ainc_costs[AINC_POST_INC]
3358 = address_cost (addr, mem_mode, as, speed);
3359 }
3360 for (i = 0; i < 16; i++)
3361 {
3362 sym_p = i & 1;
3363 var_p = (i >> 1) & 1;
3364 off_p = (i >> 2) & 1;
3365 rat_p = (i >> 3) & 1;
3366
3367 addr = reg0;
3368 if (rat_p)
3369 addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
3370 gen_int_mode (rat, address_mode));
3371
3372 if (var_p)
3373 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
3374
3375 if (sym_p)
3376 {
3377 base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
3378 /* ??? We can run into trouble with some backends by presenting
3379 it with symbols which haven't been properly passed through
3380 targetm.encode_section_info. By setting the local bit, we
3381 enhance the probability of things working. */
3382 SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3383
3384 if (off_p)
3385 base = gen_rtx_fmt_e (CONST, address_mode,
3386 gen_rtx_fmt_ee
3387 (PLUS, address_mode, base,
3388 gen_int_mode (off, address_mode)));
3389 }
3390 else if (off_p)
3391 base = gen_int_mode (off, address_mode);
3392 else
3393 base = NULL_RTX;
3394
3395 if (base)
3396 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
3397
3398 start_sequence ();
3399 /* To avoid splitting addressing modes, pretend that no cse will
3400 follow. */
3401 old_cse_not_expected = cse_not_expected;
3402 cse_not_expected = true;
3403 addr = memory_address_addr_space (mem_mode, addr, as);
3404 cse_not_expected = old_cse_not_expected;
3405 seq = get_insns ();
3406 end_sequence ();
3407
3408 acost = seq_cost (seq, speed);
3409 acost += address_cost (addr, mem_mode, as, speed);
3410
3411 if (!acost)
3412 acost = 1;
3413 data->costs[sym_p][var_p][off_p][rat_p] = acost;
3414 }
3415
3416 /* On some targets, it is quite expensive to load symbol to a register,
3417 which makes addresses that contain symbols look much more expensive.
3418 However, the symbol will have to be loaded in any case before the
3419 loop (and quite likely we have it in register already), so it does not
3420 make much sense to penalize them too heavily. So make some final
3421 tweaks for the SYMBOL_PRESENT modes:
3422
3423 If VAR_PRESENT is false, and the mode obtained by changing symbol to
3424 var is cheaper, use this mode with small penalty.
3425 If VAR_PRESENT is true, try whether the mode with
3426 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3427 if this is the case, use it. */
3428 add_c = add_cost (speed, address_mode);
3429 for (i = 0; i < 8; i++)
3430 {
3431 var_p = i & 1;
3432 off_p = (i >> 1) & 1;
3433 rat_p = (i >> 2) & 1;
3434
3435 acost = data->costs[0][1][off_p][rat_p] + 1;
3436 if (var_p)
3437 acost += add_c;
3438
3439 if (acost < data->costs[1][var_p][off_p][rat_p])
3440 data->costs[1][var_p][off_p][rat_p] = acost;
3441 }
3442
3443 if (dump_file && (dump_flags & TDF_DETAILS))
3444 {
3445 fprintf (dump_file, "Address costs:\n");
3446
3447 for (i = 0; i < 16; i++)
3448 {
3449 sym_p = i & 1;
3450 var_p = (i >> 1) & 1;
3451 off_p = (i >> 2) & 1;
3452 rat_p = (i >> 3) & 1;
3453
3454 fprintf (dump_file, " ");
3455 if (sym_p)
3456 fprintf (dump_file, "sym + ");
3457 if (var_p)
3458 fprintf (dump_file, "var + ");
3459 if (off_p)
3460 fprintf (dump_file, "cst + ");
3461 if (rat_p)
3462 fprintf (dump_file, "rat * ");
3463
3464 acost = data->costs[sym_p][var_p][off_p][rat_p];
3465 fprintf (dump_file, "index costs %d\n", acost);
3466 }
3467 if (has_predec[mem_mode] || has_postdec[mem_mode]
3468 || has_preinc[mem_mode] || has_postinc[mem_mode])
3469 fprintf (dump_file, " May include autoinc/dec\n");
3470 fprintf (dump_file, "\n");
3471 }
3472
3473 address_cost_data_list[data_index] = data;
3474 }
3475
3476 bits = GET_MODE_BITSIZE (address_mode);
3477 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3478 offset &= mask;
3479 if ((offset >> (bits - 1) & 1))
3480 offset |= ~mask;
3481 s_offset = offset;
3482
3483 autoinc = false;
3484 autoinc_type = AINC_NONE;
3485 msize = GET_MODE_SIZE (mem_mode);
3486 autoinc_offset = offset;
3487 if (stmt_after_inc)
3488 autoinc_offset += ratio * cstep;
3489 if (symbol_present || var_present || ratio != 1)
3490 autoinc = false;
3491 else
3492 {
3493 if (has_postinc[mem_mode] && autoinc_offset == 0
3494 && msize == cstep)
3495 autoinc_type = AINC_POST_INC;
3496 else if (has_postdec[mem_mode] && autoinc_offset == 0
3497 && msize == -cstep)
3498 autoinc_type = AINC_POST_DEC;
3499 else if (has_preinc[mem_mode] && autoinc_offset == msize
3500 && msize == cstep)
3501 autoinc_type = AINC_PRE_INC;
3502 else if (has_predec[mem_mode] && autoinc_offset == -msize
3503 && msize == -cstep)
3504 autoinc_type = AINC_PRE_DEC;
3505
3506 if (autoinc_type != AINC_NONE)
3507 autoinc = true;
3508 }
3509
3510 cost = 0;
3511 offset_p = (s_offset != 0
3512 && data->min_offset <= s_offset
3513 && s_offset <= data->max_offset);
3514 ratio_p = (ratio != 1
3515 && multiplier_allowed_in_address_p (ratio, mem_mode, as));
3516
3517 if (ratio != 1 && !ratio_p)
3518 cost += mult_by_coeff_cost (ratio, address_mode, speed);
3519
3520 if (s_offset && !offset_p && !symbol_present)
3521 cost += add_cost (speed, address_mode);
3522
3523 if (may_autoinc)
3524 *may_autoinc = autoinc;
3525 if (autoinc)
3526 acost = data->ainc_costs[autoinc_type];
3527 else
3528 acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
3529 complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3530 return new_cost (cost + acost, complexity);
3531 }
3532
3533 /* Calculate the SPEED or size cost of shiftadd EXPR in MODE. MULT is the
3534 the EXPR operand holding the shift. COST0 and COST1 are the costs for
3535 calculating the operands of EXPR. Returns true if successful, and returns
3536 the cost in COST. */
3537
3538 static bool
3539 get_shiftadd_cost (tree expr, enum machine_mode mode, comp_cost cost0,
3540 comp_cost cost1, tree mult, bool speed, comp_cost *cost)
3541 {
3542 comp_cost res;
3543 tree op1 = TREE_OPERAND (expr, 1);
3544 tree cst = TREE_OPERAND (mult, 1);
3545 tree multop = TREE_OPERAND (mult, 0);
3546 int m = exact_log2 (int_cst_value (cst));
3547 int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
3548 int sa_cost;
3549 bool equal_p = false;
3550
3551 if (!(m >= 0 && m < maxm))
3552 return false;
3553
3554 if (operand_equal_p (op1, mult, 0))
3555 equal_p = true;
3556
3557 sa_cost = (TREE_CODE (expr) != MINUS_EXPR
3558 ? shiftadd_cost (speed, mode, m)
3559 : (equal_p
3560 ? shiftsub1_cost (speed, mode, m)
3561 : shiftsub0_cost (speed, mode, m)));
3562 res = new_cost (sa_cost, 0);
3563 res = add_costs (res, equal_p ? cost0 : cost1);
3564
3565 STRIP_NOPS (multop);
3566 if (!is_gimple_val (multop))
3567 res = add_costs (res, force_expr_to_var_cost (multop, speed));
3568
3569 *cost = res;
3570 return true;
3571 }
3572
3573 /* Estimates cost of forcing expression EXPR into a variable. */
3574
3575 static comp_cost
3576 force_expr_to_var_cost (tree expr, bool speed)
3577 {
3578 static bool costs_initialized = false;
3579 static unsigned integer_cost [2];
3580 static unsigned symbol_cost [2];
3581 static unsigned address_cost [2];
3582 tree op0, op1;
3583 comp_cost cost0, cost1, cost;
3584 enum machine_mode mode;
3585
3586 if (!costs_initialized)
3587 {
3588 tree type = build_pointer_type (integer_type_node);
3589 tree var, addr;
3590 rtx x;
3591 int i;
3592
3593 var = create_tmp_var_raw (integer_type_node, "test_var");
3594 TREE_STATIC (var) = 1;
3595 x = produce_memory_decl_rtl (var, NULL);
3596 SET_DECL_RTL (var, x);
3597
3598 addr = build1 (ADDR_EXPR, type, var);
3599
3600
3601 for (i = 0; i < 2; i++)
3602 {
3603 integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
3604 2000), i);
3605
3606 symbol_cost[i] = computation_cost (addr, i) + 1;
3607
3608 address_cost[i]
3609 = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
3610 if (dump_file && (dump_flags & TDF_DETAILS))
3611 {
3612 fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
3613 fprintf (dump_file, " integer %d\n", (int) integer_cost[i]);
3614 fprintf (dump_file, " symbol %d\n", (int) symbol_cost[i]);
3615 fprintf (dump_file, " address %d\n", (int) address_cost[i]);
3616 fprintf (dump_file, " other %d\n", (int) target_spill_cost[i]);
3617 fprintf (dump_file, "\n");
3618 }
3619 }
3620
3621 costs_initialized = true;
3622 }
3623
3624 STRIP_NOPS (expr);
3625
3626 if (SSA_VAR_P (expr))
3627 return no_cost;
3628
3629 if (is_gimple_min_invariant (expr))
3630 {
3631 if (TREE_CODE (expr) == INTEGER_CST)
3632 return new_cost (integer_cost [speed], 0);
3633
3634 if (TREE_CODE (expr) == ADDR_EXPR)
3635 {
3636 tree obj = TREE_OPERAND (expr, 0);
3637
3638 if (TREE_CODE (obj) == VAR_DECL
3639 || TREE_CODE (obj) == PARM_DECL
3640 || TREE_CODE (obj) == RESULT_DECL)
3641 return new_cost (symbol_cost [speed], 0);
3642 }
3643
3644 return new_cost (address_cost [speed], 0);
3645 }
3646
3647 switch (TREE_CODE (expr))
3648 {
3649 case POINTER_PLUS_EXPR:
3650 case PLUS_EXPR:
3651 case MINUS_EXPR:
3652 case MULT_EXPR:
3653 op0 = TREE_OPERAND (expr, 0);
3654 op1 = TREE_OPERAND (expr, 1);
3655 STRIP_NOPS (op0);
3656 STRIP_NOPS (op1);
3657 break;
3658
3659 CASE_CONVERT:
3660 case NEGATE_EXPR:
3661 op0 = TREE_OPERAND (expr, 0);
3662 STRIP_NOPS (op0);
3663 op1 = NULL_TREE;
3664 break;
3665
3666 default:
3667 /* Just an arbitrary value, FIXME. */
3668 return new_cost (target_spill_cost[speed], 0);
3669 }
3670
3671 if (op0 == NULL_TREE
3672 || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
3673 cost0 = no_cost;
3674 else
3675 cost0 = force_expr_to_var_cost (op0, speed);
3676
3677 if (op1 == NULL_TREE
3678 || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
3679 cost1 = no_cost;
3680 else
3681 cost1 = force_expr_to_var_cost (op1, speed);
3682
3683 mode = TYPE_MODE (TREE_TYPE (expr));
3684 switch (TREE_CODE (expr))
3685 {
3686 case POINTER_PLUS_EXPR:
3687 case PLUS_EXPR:
3688 case MINUS_EXPR:
3689 case NEGATE_EXPR:
3690 cost = new_cost (add_cost (speed, mode), 0);
3691 if (TREE_CODE (expr) != NEGATE_EXPR)
3692 {
3693 tree mult = NULL_TREE;
3694 comp_cost sa_cost;
3695 if (TREE_CODE (op1) == MULT_EXPR)
3696 mult = op1;
3697 else if (TREE_CODE (op0) == MULT_EXPR)
3698 mult = op0;
3699
3700 if (mult != NULL_TREE
3701 && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
3702 && get_shiftadd_cost (expr, mode, cost0, cost1, mult,
3703 speed, &sa_cost))
3704 return sa_cost;
3705 }
3706 break;
3707
3708 CASE_CONVERT:
3709 {
3710 tree inner_mode, outer_mode;
3711 outer_mode = TREE_TYPE (expr);
3712 inner_mode = TREE_TYPE (op0);
3713 cost = new_cost (convert_cost (TYPE_MODE (outer_mode),
3714 TYPE_MODE (inner_mode), speed), 0);
3715 }
3716 break;
3717
3718 case MULT_EXPR:
3719 if (cst_and_fits_in_hwi (op0))
3720 cost = new_cost (mult_by_coeff_cost (int_cst_value (op0),
3721 mode, speed), 0);
3722 else if (cst_and_fits_in_hwi (op1))
3723 cost = new_cost (mult_by_coeff_cost (int_cst_value (op1),
3724 mode, speed), 0);
3725 else
3726 return new_cost (target_spill_cost [speed], 0);
3727 break;
3728
3729 default:
3730 gcc_unreachable ();
3731 }
3732
3733 cost = add_costs (cost, cost0);
3734 cost = add_costs (cost, cost1);
3735
3736 /* Bound the cost by target_spill_cost. The parts of complicated
3737 computations often are either loop invariant or at least can
3738 be shared between several iv uses, so letting this grow without
3739 limits would not give reasonable results. */
3740 if (cost.cost > (int) target_spill_cost [speed])
3741 cost.cost = target_spill_cost [speed];
3742
3743 return cost;
3744 }
3745
3746 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
3747 invariants the computation depends on. */
3748
3749 static comp_cost
3750 force_var_cost (struct ivopts_data *data,
3751 tree expr, bitmap *depends_on)
3752 {
3753 if (depends_on)
3754 {
3755 fd_ivopts_data = data;
3756 walk_tree (&expr, find_depends, depends_on, NULL);
3757 }
3758
3759 return force_expr_to_var_cost (expr, data->speed);
3760 }
3761
3762 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
3763 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3764 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3765 invariants the computation depends on. */
3766
3767 static comp_cost
3768 split_address_cost (struct ivopts_data *data,
3769 tree addr, bool *symbol_present, bool *var_present,
3770 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3771 {
3772 tree core;
3773 HOST_WIDE_INT bitsize;
3774 HOST_WIDE_INT bitpos;
3775 tree toffset;
3776 enum machine_mode mode;
3777 int unsignedp, volatilep;
3778
3779 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3780 &unsignedp, &volatilep, false);
3781
3782 if (toffset != 0
3783 || bitpos % BITS_PER_UNIT != 0
3784 || TREE_CODE (core) != VAR_DECL)
3785 {
3786 *symbol_present = false;
3787 *var_present = true;
3788 fd_ivopts_data = data;
3789 walk_tree (&addr, find_depends, depends_on, NULL);
3790 return new_cost (target_spill_cost[data->speed], 0);
3791 }
3792
3793 *offset += bitpos / BITS_PER_UNIT;
3794 if (TREE_STATIC (core)
3795 || DECL_EXTERNAL (core))
3796 {
3797 *symbol_present = true;
3798 *var_present = false;
3799 return no_cost;
3800 }
3801
3802 *symbol_present = false;
3803 *var_present = true;
3804 return no_cost;
3805 }
3806
3807 /* Estimates cost of expressing difference of addresses E1 - E2 as
3808 var + symbol + offset. The value of offset is added to OFFSET,
3809 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3810 part is missing. DEPENDS_ON is a set of the invariants the computation
3811 depends on. */
3812
3813 static comp_cost
3814 ptr_difference_cost (struct ivopts_data *data,
3815 tree e1, tree e2, bool *symbol_present, bool *var_present,
3816 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3817 {
3818 HOST_WIDE_INT diff = 0;
3819 aff_tree aff_e1, aff_e2;
3820 tree type;
3821
3822 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3823
3824 if (ptr_difference_const (e1, e2, &diff))
3825 {
3826 *offset += diff;
3827 *symbol_present = false;
3828 *var_present = false;
3829 return no_cost;
3830 }
3831
3832 if (integer_zerop (e2))
3833 return split_address_cost (data, TREE_OPERAND (e1, 0),
3834 symbol_present, var_present, offset, depends_on);
3835
3836 *symbol_present = false;
3837 *var_present = true;
3838
3839 type = signed_type_for (TREE_TYPE (e1));
3840 tree_to_aff_combination (e1, type, &aff_e1);
3841 tree_to_aff_combination (e2, type, &aff_e2);
3842 aff_combination_scale (&aff_e2, double_int_minus_one);
3843 aff_combination_add (&aff_e1, &aff_e2);
3844
3845 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3846 }
3847
3848 /* Estimates cost of expressing difference E1 - E2 as
3849 var + symbol + offset. The value of offset is added to OFFSET,
3850 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3851 part is missing. DEPENDS_ON is a set of the invariants the computation
3852 depends on. */
3853
3854 static comp_cost
3855 difference_cost (struct ivopts_data *data,
3856 tree e1, tree e2, bool *symbol_present, bool *var_present,
3857 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3858 {
3859 enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3860 unsigned HOST_WIDE_INT off1, off2;
3861 aff_tree aff_e1, aff_e2;
3862 tree type;
3863
3864 e1 = strip_offset (e1, &off1);
3865 e2 = strip_offset (e2, &off2);
3866 *offset += off1 - off2;
3867
3868 STRIP_NOPS (e1);
3869 STRIP_NOPS (e2);
3870
3871 if (TREE_CODE (e1) == ADDR_EXPR)
3872 return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
3873 offset, depends_on);
3874 *symbol_present = false;
3875
3876 if (operand_equal_p (e1, e2, 0))
3877 {
3878 *var_present = false;
3879 return no_cost;
3880 }
3881
3882 *var_present = true;
3883
3884 if (integer_zerop (e2))
3885 return force_var_cost (data, e1, depends_on);
3886
3887 if (integer_zerop (e1))
3888 {
3889 comp_cost cost = force_var_cost (data, e2, depends_on);
3890 cost.cost += mult_by_coeff_cost (-1, mode, data->speed);
3891 return cost;
3892 }
3893
3894 type = signed_type_for (TREE_TYPE (e1));
3895 tree_to_aff_combination (e1, type, &aff_e1);
3896 tree_to_aff_combination (e2, type, &aff_e2);
3897 aff_combination_scale (&aff_e2, double_int_minus_one);
3898 aff_combination_add (&aff_e1, &aff_e2);
3899
3900 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3901 }
3902
3903 /* Returns true if AFF1 and AFF2 are identical. */
3904
3905 static bool
3906 compare_aff_trees (aff_tree *aff1, aff_tree *aff2)
3907 {
3908 unsigned i;
3909
3910 if (aff1->n != aff2->n)
3911 return false;
3912
3913 for (i = 0; i < aff1->n; i++)
3914 {
3915 if (aff1->elts[i].coef != aff2->elts[i].coef)
3916 return false;
3917
3918 if (!operand_equal_p (aff1->elts[i].val, aff2->elts[i].val, 0))
3919 return false;
3920 }
3921 return true;
3922 }
3923
3924 /* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id. */
3925
3926 static int
3927 get_expr_id (struct ivopts_data *data, tree expr)
3928 {
3929 struct iv_inv_expr_ent ent;
3930 struct iv_inv_expr_ent **slot;
3931
3932 ent.expr = expr;
3933 ent.hash = iterative_hash_expr (expr, 0);
3934 slot = data->inv_expr_tab.find_slot (&ent, INSERT);
3935 if (*slot)
3936 return (*slot)->id;
3937
3938 *slot = XNEW (struct iv_inv_expr_ent);
3939 (*slot)->expr = expr;
3940 (*slot)->hash = ent.hash;
3941 (*slot)->id = data->inv_expr_id++;
3942 return (*slot)->id;
3943 }
3944
3945 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
3946 requires a new compiler generated temporary. Returns -1 otherwise.
3947 ADDRESS_P is a flag indicating if the expression is for address
3948 computation. */
3949
3950 static int
3951 get_loop_invariant_expr_id (struct ivopts_data *data, tree ubase,
3952 tree cbase, HOST_WIDE_INT ratio,
3953 bool address_p)
3954 {
3955 aff_tree ubase_aff, cbase_aff;
3956 tree expr, ub, cb;
3957
3958 STRIP_NOPS (ubase);
3959 STRIP_NOPS (cbase);
3960 ub = ubase;
3961 cb = cbase;
3962
3963 if ((TREE_CODE (ubase) == INTEGER_CST)
3964 && (TREE_CODE (cbase) == INTEGER_CST))
3965 return -1;
3966
3967 /* Strips the constant part. */
3968 if (TREE_CODE (ubase) == PLUS_EXPR
3969 || TREE_CODE (ubase) == MINUS_EXPR
3970 || TREE_CODE (ubase) == POINTER_PLUS_EXPR)
3971 {
3972 if (TREE_CODE (TREE_OPERAND (ubase, 1)) == INTEGER_CST)
3973 ubase = TREE_OPERAND (ubase, 0);
3974 }
3975
3976 /* Strips the constant part. */
3977 if (TREE_CODE (cbase) == PLUS_EXPR
3978 || TREE_CODE (cbase) == MINUS_EXPR
3979 || TREE_CODE (cbase) == POINTER_PLUS_EXPR)
3980 {
3981 if (TREE_CODE (TREE_OPERAND (cbase, 1)) == INTEGER_CST)
3982 cbase = TREE_OPERAND (cbase, 0);
3983 }
3984
3985 if (address_p)
3986 {
3987 if (((TREE_CODE (ubase) == SSA_NAME)
3988 || (TREE_CODE (ubase) == ADDR_EXPR
3989 && is_gimple_min_invariant (ubase)))
3990 && (TREE_CODE (cbase) == INTEGER_CST))
3991 return -1;
3992
3993 if (((TREE_CODE (cbase) == SSA_NAME)
3994 || (TREE_CODE (cbase) == ADDR_EXPR
3995 && is_gimple_min_invariant (cbase)))
3996 && (TREE_CODE (ubase) == INTEGER_CST))
3997 return -1;
3998 }
3999
4000 if (ratio == 1)
4001 {
4002 if (operand_equal_p (ubase, cbase, 0))
4003 return -1;
4004
4005 if (TREE_CODE (ubase) == ADDR_EXPR
4006 && TREE_CODE (cbase) == ADDR_EXPR)
4007 {
4008 tree usym, csym;
4009
4010 usym = TREE_OPERAND (ubase, 0);
4011 csym = TREE_OPERAND (cbase, 0);
4012 if (TREE_CODE (usym) == ARRAY_REF)
4013 {
4014 tree ind = TREE_OPERAND (usym, 1);
4015 if (TREE_CODE (ind) == INTEGER_CST
4016 && tree_fits_shwi_p (ind)
4017 && TREE_INT_CST_LOW (ind) == 0)
4018 usym = TREE_OPERAND (usym, 0);
4019 }
4020 if (TREE_CODE (csym) == ARRAY_REF)
4021 {
4022 tree ind = TREE_OPERAND (csym, 1);
4023 if (TREE_CODE (ind) == INTEGER_CST
4024 && tree_fits_shwi_p (ind)
4025 && TREE_INT_CST_LOW (ind) == 0)
4026 csym = TREE_OPERAND (csym, 0);
4027 }
4028 if (operand_equal_p (usym, csym, 0))
4029 return -1;
4030 }
4031 /* Now do more complex comparison */
4032 tree_to_aff_combination (ubase, TREE_TYPE (ubase), &ubase_aff);
4033 tree_to_aff_combination (cbase, TREE_TYPE (cbase), &cbase_aff);
4034 if (compare_aff_trees (&ubase_aff, &cbase_aff))
4035 return -1;
4036 }
4037
4038 tree_to_aff_combination (ub, TREE_TYPE (ub), &ubase_aff);
4039 tree_to_aff_combination (cb, TREE_TYPE (cb), &cbase_aff);
4040
4041 aff_combination_scale (&cbase_aff, double_int::from_shwi (-1 * ratio));
4042 aff_combination_add (&ubase_aff, &cbase_aff);
4043 expr = aff_combination_to_tree (&ubase_aff);
4044 return get_expr_id (data, expr);
4045 }
4046
4047
4048
4049 /* Determines the cost of the computation by that USE is expressed
4050 from induction variable CAND. If ADDRESS_P is true, we just need
4051 to create an address from it, otherwise we want to get it into
4052 register. A set of invariants we depend on is stored in
4053 DEPENDS_ON. AT is the statement at that the value is computed.
4054 If CAN_AUTOINC is nonnull, use it to record whether autoinc
4055 addressing is likely. */
4056
4057 static comp_cost
4058 get_computation_cost_at (struct ivopts_data *data,
4059 struct iv_use *use, struct iv_cand *cand,
4060 bool address_p, bitmap *depends_on, gimple at,
4061 bool *can_autoinc,
4062 int *inv_expr_id)
4063 {
4064 tree ubase = use->iv->base, ustep = use->iv->step;
4065 tree cbase, cstep;
4066 tree utype = TREE_TYPE (ubase), ctype;
4067 unsigned HOST_WIDE_INT cstepi, offset = 0;
4068 HOST_WIDE_INT ratio, aratio;
4069 bool var_present, symbol_present, stmt_is_after_inc;
4070 comp_cost cost;
4071 double_int rat;
4072 bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4073 enum machine_mode mem_mode = (address_p
4074 ? TYPE_MODE (TREE_TYPE (*use->op_p))
4075 : VOIDmode);
4076
4077 *depends_on = NULL;
4078
4079 /* Only consider real candidates. */
4080 if (!cand->iv)
4081 return infinite_cost;
4082
4083 cbase = cand->iv->base;
4084 cstep = cand->iv->step;
4085 ctype = TREE_TYPE (cbase);
4086
4087 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4088 {
4089 /* We do not have a precision to express the values of use. */
4090 return infinite_cost;
4091 }
4092
4093 if (address_p
4094 || (use->iv->base_object
4095 && cand->iv->base_object
4096 && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4097 && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4098 {
4099 /* Do not try to express address of an object with computation based
4100 on address of a different object. This may cause problems in rtl
4101 level alias analysis (that does not expect this to be happening,
4102 as this is illegal in C), and would be unlikely to be useful
4103 anyway. */
4104 if (use->iv->base_object
4105 && cand->iv->base_object
4106 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4107 return infinite_cost;
4108 }
4109
4110 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4111 {
4112 /* TODO -- add direct handling of this case. */
4113 goto fallback;
4114 }
4115
4116 /* CSTEPI is removed from the offset in case statement is after the
4117 increment. If the step is not constant, we use zero instead.
4118 This is a bit imprecise (there is the extra addition), but
4119 redundancy elimination is likely to transform the code so that
4120 it uses value of the variable before increment anyway,
4121 so it is not that much unrealistic. */
4122 if (cst_and_fits_in_hwi (cstep))
4123 cstepi = int_cst_value (cstep);
4124 else
4125 cstepi = 0;
4126
4127 if (!constant_multiple_of (ustep, cstep, &rat))
4128 return infinite_cost;
4129
4130 if (rat.fits_shwi ())
4131 ratio = rat.to_shwi ();
4132 else
4133 return infinite_cost;
4134
4135 STRIP_NOPS (cbase);
4136 ctype = TREE_TYPE (cbase);
4137
4138 stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
4139
4140 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
4141 or ratio == 1, it is better to handle this like
4142
4143 ubase - ratio * cbase + ratio * var
4144
4145 (also holds in the case ratio == -1, TODO. */
4146
4147 if (cst_and_fits_in_hwi (cbase))
4148 {
4149 offset = - ratio * int_cst_value (cbase);
4150 cost = difference_cost (data,
4151 ubase, build_int_cst (utype, 0),
4152 &symbol_present, &var_present, &offset,
4153 depends_on);
4154 cost.cost /= avg_loop_niter (data->current_loop);
4155 }
4156 else if (ratio == 1)
4157 {
4158 tree real_cbase = cbase;
4159
4160 /* Check to see if any adjustment is needed. */
4161 if (cstepi == 0 && stmt_is_after_inc)
4162 {
4163 aff_tree real_cbase_aff;
4164 aff_tree cstep_aff;
4165
4166 tree_to_aff_combination (cbase, TREE_TYPE (real_cbase),
4167 &real_cbase_aff);
4168 tree_to_aff_combination (cstep, TREE_TYPE (cstep), &cstep_aff);
4169
4170 aff_combination_add (&real_cbase_aff, &cstep_aff);
4171 real_cbase = aff_combination_to_tree (&real_cbase_aff);
4172 }
4173
4174 cost = difference_cost (data,
4175 ubase, real_cbase,
4176 &symbol_present, &var_present, &offset,
4177 depends_on);
4178 cost.cost /= avg_loop_niter (data->current_loop);
4179 }
4180 else if (address_p
4181 && !POINTER_TYPE_P (ctype)
4182 && multiplier_allowed_in_address_p
4183 (ratio, mem_mode,
4184 TYPE_ADDR_SPACE (TREE_TYPE (utype))))
4185 {
4186 cbase
4187 = fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio));
4188 cost = difference_cost (data,
4189 ubase, cbase,
4190 &symbol_present, &var_present, &offset,
4191 depends_on);
4192 cost.cost /= avg_loop_niter (data->current_loop);
4193 }
4194 else
4195 {
4196 cost = force_var_cost (data, cbase, depends_on);
4197 cost = add_costs (cost,
4198 difference_cost (data,
4199 ubase, build_int_cst (utype, 0),
4200 &symbol_present, &var_present,
4201 &offset, depends_on));
4202 cost.cost /= avg_loop_niter (data->current_loop);
4203 cost.cost += add_cost (data->speed, TYPE_MODE (ctype));
4204 }
4205
4206 if (inv_expr_id)
4207 {
4208 *inv_expr_id =
4209 get_loop_invariant_expr_id (data, ubase, cbase, ratio, address_p);
4210 /* Clear depends on. */
4211 if (*inv_expr_id != -1 && depends_on && *depends_on)
4212 bitmap_clear (*depends_on);
4213 }
4214
4215 /* If we are after the increment, the value of the candidate is higher by
4216 one iteration. */
4217 if (stmt_is_after_inc)
4218 offset -= ratio * cstepi;
4219
4220 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4221 (symbol/var1/const parts may be omitted). If we are looking for an
4222 address, find the cost of addressing this. */
4223 if (address_p)
4224 return add_costs (cost,
4225 get_address_cost (symbol_present, var_present,
4226 offset, ratio, cstepi,
4227 mem_mode,
4228 TYPE_ADDR_SPACE (TREE_TYPE (utype)),
4229 speed, stmt_is_after_inc,
4230 can_autoinc));
4231
4232 /* Otherwise estimate the costs for computing the expression. */
4233 if (!symbol_present && !var_present && !offset)
4234 {
4235 if (ratio != 1)
4236 cost.cost += mult_by_coeff_cost (ratio, TYPE_MODE (ctype), speed);
4237 return cost;
4238 }
4239
4240 /* Symbol + offset should be compile-time computable so consider that they
4241 are added once to the variable, if present. */
4242 if (var_present && (symbol_present || offset))
4243 cost.cost += adjust_setup_cost (data,
4244 add_cost (speed, TYPE_MODE (ctype)));
4245
4246 /* Having offset does not affect runtime cost in case it is added to
4247 symbol, but it increases complexity. */
4248 if (offset)
4249 cost.complexity++;
4250
4251 cost.cost += add_cost (speed, TYPE_MODE (ctype));
4252
4253 aratio = ratio > 0 ? ratio : -ratio;
4254 if (aratio != 1)
4255 cost.cost += mult_by_coeff_cost (aratio, TYPE_MODE (ctype), speed);
4256 return cost;
4257
4258 fallback:
4259 if (can_autoinc)
4260 *can_autoinc = false;
4261
4262 {
4263 /* Just get the expression, expand it and measure the cost. */
4264 tree comp = get_computation_at (data->current_loop, use, cand, at);
4265
4266 if (!comp)
4267 return infinite_cost;
4268
4269 if (address_p)
4270 comp = build_simple_mem_ref (comp);
4271
4272 return new_cost (computation_cost (comp, speed), 0);
4273 }
4274 }
4275
4276 /* Determines the cost of the computation by that USE is expressed
4277 from induction variable CAND. If ADDRESS_P is true, we just need
4278 to create an address from it, otherwise we want to get it into
4279 register. A set of invariants we depend on is stored in
4280 DEPENDS_ON. If CAN_AUTOINC is nonnull, use it to record whether
4281 autoinc addressing is likely. */
4282
4283 static comp_cost
4284 get_computation_cost (struct ivopts_data *data,
4285 struct iv_use *use, struct iv_cand *cand,
4286 bool address_p, bitmap *depends_on,
4287 bool *can_autoinc, int *inv_expr_id)
4288 {
4289 return get_computation_cost_at (data,
4290 use, cand, address_p, depends_on, use->stmt,
4291 can_autoinc, inv_expr_id);
4292 }
4293
4294 /* Determines cost of basing replacement of USE on CAND in a generic
4295 expression. */
4296
4297 static bool
4298 determine_use_iv_cost_generic (struct ivopts_data *data,
4299 struct iv_use *use, struct iv_cand *cand)
4300 {
4301 bitmap depends_on;
4302 comp_cost cost;
4303 int inv_expr_id = -1;
4304
4305 /* The simple case first -- if we need to express value of the preserved
4306 original biv, the cost is 0. This also prevents us from counting the
4307 cost of increment twice -- once at this use and once in the cost of
4308 the candidate. */
4309 if (cand->pos == IP_ORIGINAL
4310 && cand->incremented_at == use->stmt)
4311 {
4312 set_use_iv_cost (data, use, cand, no_cost, NULL, NULL_TREE,
4313 ERROR_MARK, -1);
4314 return true;
4315 }
4316
4317 cost = get_computation_cost (data, use, cand, false, &depends_on,
4318 NULL, &inv_expr_id);
4319
4320 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4321 inv_expr_id);
4322
4323 return !infinite_cost_p (cost);
4324 }
4325
4326 /* Determines cost of basing replacement of USE on CAND in an address. */
4327
4328 static bool
4329 determine_use_iv_cost_address (struct ivopts_data *data,
4330 struct iv_use *use, struct iv_cand *cand)
4331 {
4332 bitmap depends_on;
4333 bool can_autoinc;
4334 int inv_expr_id = -1;
4335 comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
4336 &can_autoinc, &inv_expr_id);
4337
4338 if (cand->ainc_use == use)
4339 {
4340 if (can_autoinc)
4341 cost.cost -= cand->cost_step;
4342 /* If we generated the candidate solely for exploiting autoincrement
4343 opportunities, and it turns out it can't be used, set the cost to
4344 infinity to make sure we ignore it. */
4345 else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
4346 cost = infinite_cost;
4347 }
4348 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4349 inv_expr_id);
4350
4351 return !infinite_cost_p (cost);
4352 }
4353
4354 /* Computes value of candidate CAND at position AT in iteration NITER, and
4355 stores it to VAL. */
4356
4357 static void
4358 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple at, tree niter,
4359 aff_tree *val)
4360 {
4361 aff_tree step, delta, nit;
4362 struct iv *iv = cand->iv;
4363 tree type = TREE_TYPE (iv->base);
4364 tree steptype = type;
4365 if (POINTER_TYPE_P (type))
4366 steptype = sizetype;
4367
4368 tree_to_aff_combination (iv->step, steptype, &step);
4369 tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
4370 aff_combination_convert (&nit, steptype);
4371 aff_combination_mult (&nit, &step, &delta);
4372 if (stmt_after_increment (loop, cand, at))
4373 aff_combination_add (&delta, &step);
4374
4375 tree_to_aff_combination (iv->base, type, val);
4376 aff_combination_add (val, &delta);
4377 }
4378
4379 /* Returns period of induction variable iv. */
4380
4381 static tree
4382 iv_period (struct iv *iv)
4383 {
4384 tree step = iv->step, period, type;
4385 tree pow2div;
4386
4387 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4388
4389 type = unsigned_type_for (TREE_TYPE (step));
4390 /* Period of the iv is lcm (step, type_range)/step -1,
4391 i.e., N*type_range/step - 1. Since type range is power
4392 of two, N == (step >> num_of_ending_zeros_binary (step),
4393 so the final result is
4394
4395 (type_range >> num_of_ending_zeros_binary (step)) - 1
4396
4397 */
4398 pow2div = num_ending_zeros (step);
4399
4400 period = build_low_bits_mask (type,
4401 (TYPE_PRECISION (type)
4402 - tree_to_uhwi (pow2div)));
4403
4404 return period;
4405 }
4406
4407 /* Returns the comparison operator used when eliminating the iv USE. */
4408
4409 static enum tree_code
4410 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4411 {
4412 struct loop *loop = data->current_loop;
4413 basic_block ex_bb;
4414 edge exit;
4415
4416 ex_bb = gimple_bb (use->stmt);
4417 exit = EDGE_SUCC (ex_bb, 0);
4418 if (flow_bb_inside_loop_p (loop, exit->dest))
4419 exit = EDGE_SUCC (ex_bb, 1);
4420
4421 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4422 }
4423
4424 static tree
4425 strip_wrap_conserving_type_conversions (tree exp)
4426 {
4427 while (tree_ssa_useless_type_conversion (exp)
4428 && (nowrap_type_p (TREE_TYPE (exp))
4429 == nowrap_type_p (TREE_TYPE (TREE_OPERAND (exp, 0)))))
4430 exp = TREE_OPERAND (exp, 0);
4431 return exp;
4432 }
4433
4434 /* Walk the SSA form and check whether E == WHAT. Fairly simplistic, we
4435 check for an exact match. */
4436
4437 static bool
4438 expr_equal_p (tree e, tree what)
4439 {
4440 gimple stmt;
4441 enum tree_code code;
4442
4443 e = strip_wrap_conserving_type_conversions (e);
4444 what = strip_wrap_conserving_type_conversions (what);
4445
4446 code = TREE_CODE (what);
4447 if (TREE_TYPE (e) != TREE_TYPE (what))
4448 return false;
4449
4450 if (operand_equal_p (e, what, 0))
4451 return true;
4452
4453 if (TREE_CODE (e) != SSA_NAME)
4454 return false;
4455
4456 stmt = SSA_NAME_DEF_STMT (e);
4457 if (gimple_code (stmt) != GIMPLE_ASSIGN
4458 || gimple_assign_rhs_code (stmt) != code)
4459 return false;
4460
4461 switch (get_gimple_rhs_class (code))
4462 {
4463 case GIMPLE_BINARY_RHS:
4464 if (!expr_equal_p (gimple_assign_rhs2 (stmt), TREE_OPERAND (what, 1)))
4465 return false;
4466 /* Fallthru. */
4467
4468 case GIMPLE_UNARY_RHS:
4469 case GIMPLE_SINGLE_RHS:
4470 return expr_equal_p (gimple_assign_rhs1 (stmt), TREE_OPERAND (what, 0));
4471 default:
4472 return false;
4473 }
4474 }
4475
4476 /* Returns true if we can prove that BASE - OFFSET does not overflow. For now,
4477 we only detect the situation that BASE = SOMETHING + OFFSET, where the
4478 calculation is performed in non-wrapping type.
4479
4480 TODO: More generally, we could test for the situation that
4481 BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
4482 This would require knowing the sign of OFFSET.
4483
4484 Also, we only look for the first addition in the computation of BASE.
4485 More complex analysis would be better, but introducing it just for
4486 this optimization seems like an overkill. */
4487
4488 static bool
4489 difference_cannot_overflow_p (tree base, tree offset)
4490 {
4491 enum tree_code code;
4492 tree e1, e2;
4493
4494 if (!nowrap_type_p (TREE_TYPE (base)))
4495 return false;
4496
4497 base = expand_simple_operations (base);
4498
4499 if (TREE_CODE (base) == SSA_NAME)
4500 {
4501 gimple stmt = SSA_NAME_DEF_STMT (base);
4502
4503 if (gimple_code (stmt) != GIMPLE_ASSIGN)
4504 return false;
4505
4506 code = gimple_assign_rhs_code (stmt);
4507 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4508 return false;
4509
4510 e1 = gimple_assign_rhs1 (stmt);
4511 e2 = gimple_assign_rhs2 (stmt);
4512 }
4513 else
4514 {
4515 code = TREE_CODE (base);
4516 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4517 return false;
4518 e1 = TREE_OPERAND (base, 0);
4519 e2 = TREE_OPERAND (base, 1);
4520 }
4521
4522 /* TODO: deeper inspection may be necessary to prove the equality. */
4523 switch (code)
4524 {
4525 case PLUS_EXPR:
4526 return expr_equal_p (e1, offset) || expr_equal_p (e2, offset);
4527 case POINTER_PLUS_EXPR:
4528 return expr_equal_p (e2, offset);
4529
4530 default:
4531 return false;
4532 }
4533 }
4534
4535 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
4536 comparison with CAND. NITER describes the number of iterations of
4537 the loops. If successful, the comparison in COMP_P is altered accordingly.
4538
4539 We aim to handle the following situation:
4540
4541 sometype *base, *p;
4542 int a, b, i;
4543
4544 i = a;
4545 p = p_0 = base + a;
4546
4547 do
4548 {
4549 bla (*p);
4550 p++;
4551 i++;
4552 }
4553 while (i < b);
4554
4555 Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
4556 We aim to optimize this to
4557
4558 p = p_0 = base + a;
4559 do
4560 {
4561 bla (*p);
4562 p++;
4563 }
4564 while (p < p_0 - a + b);
4565
4566 This preserves the correctness, since the pointer arithmetics does not
4567 overflow. More precisely:
4568
4569 1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
4570 overflow in computing it or the values of p.
4571 2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
4572 overflow. To prove this, we use the fact that p_0 = base + a. */
4573
4574 static bool
4575 iv_elimination_compare_lt (struct ivopts_data *data,
4576 struct iv_cand *cand, enum tree_code *comp_p,
4577 struct tree_niter_desc *niter)
4578 {
4579 tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
4580 struct affine_tree_combination nit, tmpa, tmpb;
4581 enum tree_code comp;
4582 HOST_WIDE_INT step;
4583
4584 /* We need to know that the candidate induction variable does not overflow.
4585 While more complex analysis may be used to prove this, for now just
4586 check that the variable appears in the original program and that it
4587 is computed in a type that guarantees no overflows. */
4588 cand_type = TREE_TYPE (cand->iv->base);
4589 if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
4590 return false;
4591
4592 /* Make sure that the loop iterates till the loop bound is hit, as otherwise
4593 the calculation of the BOUND could overflow, making the comparison
4594 invalid. */
4595 if (!data->loop_single_exit_p)
4596 return false;
4597
4598 /* We need to be able to decide whether candidate is increasing or decreasing
4599 in order to choose the right comparison operator. */
4600 if (!cst_and_fits_in_hwi (cand->iv->step))
4601 return false;
4602 step = int_cst_value (cand->iv->step);
4603
4604 /* Check that the number of iterations matches the expected pattern:
4605 a + 1 > b ? 0 : b - a - 1. */
4606 mbz = niter->may_be_zero;
4607 if (TREE_CODE (mbz) == GT_EXPR)
4608 {
4609 /* Handle a + 1 > b. */
4610 tree op0 = TREE_OPERAND (mbz, 0);
4611 if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
4612 {
4613 a = TREE_OPERAND (op0, 0);
4614 b = TREE_OPERAND (mbz, 1);
4615 }
4616 else
4617 return false;
4618 }
4619 else if (TREE_CODE (mbz) == LT_EXPR)
4620 {
4621 tree op1 = TREE_OPERAND (mbz, 1);
4622
4623 /* Handle b < a + 1. */
4624 if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
4625 {
4626 a = TREE_OPERAND (op1, 0);
4627 b = TREE_OPERAND (mbz, 0);
4628 }
4629 else
4630 return false;
4631 }
4632 else
4633 return false;
4634
4635 /* Expected number of iterations is B - A - 1. Check that it matches
4636 the actual number, i.e., that B - A - NITER = 1. */
4637 tree_to_aff_combination (niter->niter, nit_type, &nit);
4638 tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
4639 tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
4640 aff_combination_scale (&nit, double_int_minus_one);
4641 aff_combination_scale (&tmpa, double_int_minus_one);
4642 aff_combination_add (&tmpb, &tmpa);
4643 aff_combination_add (&tmpb, &nit);
4644 if (tmpb.n != 0 || tmpb.offset != double_int_one)
4645 return false;
4646
4647 /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
4648 overflow. */
4649 offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
4650 cand->iv->step,
4651 fold_convert (TREE_TYPE (cand->iv->step), a));
4652 if (!difference_cannot_overflow_p (cand->iv->base, offset))
4653 return false;
4654
4655 /* Determine the new comparison operator. */
4656 comp = step < 0 ? GT_EXPR : LT_EXPR;
4657 if (*comp_p == NE_EXPR)
4658 *comp_p = comp;
4659 else if (*comp_p == EQ_EXPR)
4660 *comp_p = invert_tree_comparison (comp, false);
4661 else
4662 gcc_unreachable ();
4663
4664 return true;
4665 }
4666
4667 /* Check whether it is possible to express the condition in USE by comparison
4668 of candidate CAND. If so, store the value compared with to BOUND, and the
4669 comparison operator to COMP. */
4670
4671 static bool
4672 may_eliminate_iv (struct ivopts_data *data,
4673 struct iv_use *use, struct iv_cand *cand, tree *bound,
4674 enum tree_code *comp)
4675 {
4676 basic_block ex_bb;
4677 edge exit;
4678 tree period;
4679 struct loop *loop = data->current_loop;
4680 aff_tree bnd;
4681 struct tree_niter_desc *desc = NULL;
4682
4683 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
4684 return false;
4685
4686 /* For now works only for exits that dominate the loop latch.
4687 TODO: extend to other conditions inside loop body. */
4688 ex_bb = gimple_bb (use->stmt);
4689 if (use->stmt != last_stmt (ex_bb)
4690 || gimple_code (use->stmt) != GIMPLE_COND
4691 || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
4692 return false;
4693
4694 exit = EDGE_SUCC (ex_bb, 0);
4695 if (flow_bb_inside_loop_p (loop, exit->dest))
4696 exit = EDGE_SUCC (ex_bb, 1);
4697 if (flow_bb_inside_loop_p (loop, exit->dest))
4698 return false;
4699
4700 desc = niter_for_exit (data, exit);
4701 if (!desc)
4702 return false;
4703
4704 /* Determine whether we can use the variable to test the exit condition.
4705 This is the case iff the period of the induction variable is greater
4706 than the number of iterations for which the exit condition is true. */
4707 period = iv_period (cand->iv);
4708
4709 /* If the number of iterations is constant, compare against it directly. */
4710 if (TREE_CODE (desc->niter) == INTEGER_CST)
4711 {
4712 /* See cand_value_at. */
4713 if (stmt_after_increment (loop, cand, use->stmt))
4714 {
4715 if (!tree_int_cst_lt (desc->niter, period))
4716 return false;
4717 }
4718 else
4719 {
4720 if (tree_int_cst_lt (period, desc->niter))
4721 return false;
4722 }
4723 }
4724
4725 /* If not, and if this is the only possible exit of the loop, see whether
4726 we can get a conservative estimate on the number of iterations of the
4727 entire loop and compare against that instead. */
4728 else
4729 {
4730 double_int period_value, max_niter;
4731
4732 max_niter = desc->max;
4733 if (stmt_after_increment (loop, cand, use->stmt))
4734 max_niter += double_int_one;
4735 period_value = tree_to_double_int (period);
4736 if (max_niter.ugt (period_value))
4737 {
4738 /* See if we can take advantage of inferred loop bound information. */
4739 if (data->loop_single_exit_p)
4740 {
4741 if (!max_loop_iterations (loop, &max_niter))
4742 return false;
4743 /* The loop bound is already adjusted by adding 1. */
4744 if (max_niter.ugt (period_value))
4745 return false;
4746 }
4747 else
4748 return false;
4749 }
4750 }
4751
4752 cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
4753
4754 *bound = aff_combination_to_tree (&bnd);
4755 *comp = iv_elimination_compare (data, use);
4756
4757 /* It is unlikely that computing the number of iterations using division
4758 would be more profitable than keeping the original induction variable. */
4759 if (expression_expensive_p (*bound))
4760 return false;
4761
4762 /* Sometimes, it is possible to handle the situation that the number of
4763 iterations may be zero unless additional assumtions by using <
4764 instead of != in the exit condition.
4765
4766 TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
4767 base the exit condition on it. However, that is often too
4768 expensive. */
4769 if (!integer_zerop (desc->may_be_zero))
4770 return iv_elimination_compare_lt (data, cand, comp, desc);
4771
4772 return true;
4773 }
4774
4775 /* Calculates the cost of BOUND, if it is a PARM_DECL. A PARM_DECL must
4776 be copied, if is is used in the loop body and DATA->body_includes_call. */
4777
4778 static int
4779 parm_decl_cost (struct ivopts_data *data, tree bound)
4780 {
4781 tree sbound = bound;
4782 STRIP_NOPS (sbound);
4783
4784 if (TREE_CODE (sbound) == SSA_NAME
4785 && SSA_NAME_IS_DEFAULT_DEF (sbound)
4786 && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
4787 && data->body_includes_call)
4788 return COSTS_N_INSNS (1);
4789
4790 return 0;
4791 }
4792
4793 /* Determines cost of basing replacement of USE on CAND in a condition. */
4794
4795 static bool
4796 determine_use_iv_cost_condition (struct ivopts_data *data,
4797 struct iv_use *use, struct iv_cand *cand)
4798 {
4799 tree bound = NULL_TREE;
4800 struct iv *cmp_iv;
4801 bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
4802 comp_cost elim_cost, express_cost, cost, bound_cost;
4803 bool ok;
4804 int elim_inv_expr_id = -1, express_inv_expr_id = -1, inv_expr_id;
4805 tree *control_var, *bound_cst;
4806 enum tree_code comp = ERROR_MARK;
4807
4808 /* Only consider real candidates. */
4809 if (!cand->iv)
4810 {
4811 set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE,
4812 ERROR_MARK, -1);
4813 return false;
4814 }
4815
4816 /* Try iv elimination. */
4817 if (may_eliminate_iv (data, use, cand, &bound, &comp))
4818 {
4819 elim_cost = force_var_cost (data, bound, &depends_on_elim);
4820 if (elim_cost.cost == 0)
4821 elim_cost.cost = parm_decl_cost (data, bound);
4822 else if (TREE_CODE (bound) == INTEGER_CST)
4823 elim_cost.cost = 0;
4824 /* If we replace a loop condition 'i < n' with 'p < base + n',
4825 depends_on_elim will have 'base' and 'n' set, which implies
4826 that both 'base' and 'n' will be live during the loop. More likely,
4827 'base + n' will be loop invariant, resulting in only one live value
4828 during the loop. So in that case we clear depends_on_elim and set
4829 elim_inv_expr_id instead. */
4830 if (depends_on_elim && bitmap_count_bits (depends_on_elim) > 1)
4831 {
4832 elim_inv_expr_id = get_expr_id (data, bound);
4833 bitmap_clear (depends_on_elim);
4834 }
4835 /* The bound is a loop invariant, so it will be only computed
4836 once. */
4837 elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
4838 }
4839 else
4840 elim_cost = infinite_cost;
4841
4842 /* Try expressing the original giv. If it is compared with an invariant,
4843 note that we cannot get rid of it. */
4844 ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
4845 NULL, &cmp_iv);
4846 gcc_assert (ok);
4847
4848 /* When the condition is a comparison of the candidate IV against
4849 zero, prefer this IV.
4850
4851 TODO: The constant that we're subtracting from the cost should
4852 be target-dependent. This information should be added to the
4853 target costs for each backend. */
4854 if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */
4855 && integer_zerop (*bound_cst)
4856 && (operand_equal_p (*control_var, cand->var_after, 0)
4857 || operand_equal_p (*control_var, cand->var_before, 0)))
4858 elim_cost.cost -= 1;
4859
4860 express_cost = get_computation_cost (data, use, cand, false,
4861 &depends_on_express, NULL,
4862 &express_inv_expr_id);
4863 fd_ivopts_data = data;
4864 walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
4865
4866 /* Count the cost of the original bound as well. */
4867 bound_cost = force_var_cost (data, *bound_cst, NULL);
4868 if (bound_cost.cost == 0)
4869 bound_cost.cost = parm_decl_cost (data, *bound_cst);
4870 else if (TREE_CODE (*bound_cst) == INTEGER_CST)
4871 bound_cost.cost = 0;
4872 express_cost.cost += bound_cost.cost;
4873
4874 /* Choose the better approach, preferring the eliminated IV. */
4875 if (compare_costs (elim_cost, express_cost) <= 0)
4876 {
4877 cost = elim_cost;
4878 depends_on = depends_on_elim;
4879 depends_on_elim = NULL;
4880 inv_expr_id = elim_inv_expr_id;
4881 }
4882 else
4883 {
4884 cost = express_cost;
4885 depends_on = depends_on_express;
4886 depends_on_express = NULL;
4887 bound = NULL_TREE;
4888 comp = ERROR_MARK;
4889 inv_expr_id = express_inv_expr_id;
4890 }
4891
4892 set_use_iv_cost (data, use, cand, cost, depends_on, bound, comp, inv_expr_id);
4893
4894 if (depends_on_elim)
4895 BITMAP_FREE (depends_on_elim);
4896 if (depends_on_express)
4897 BITMAP_FREE (depends_on_express);
4898
4899 return !infinite_cost_p (cost);
4900 }
4901
4902 /* Determines cost of basing replacement of USE on CAND. Returns false
4903 if USE cannot be based on CAND. */
4904
4905 static bool
4906 determine_use_iv_cost (struct ivopts_data *data,
4907 struct iv_use *use, struct iv_cand *cand)
4908 {
4909 switch (use->type)
4910 {
4911 case USE_NONLINEAR_EXPR:
4912 return determine_use_iv_cost_generic (data, use, cand);
4913
4914 case USE_ADDRESS:
4915 return determine_use_iv_cost_address (data, use, cand);
4916
4917 case USE_COMPARE:
4918 return determine_use_iv_cost_condition (data, use, cand);
4919
4920 default:
4921 gcc_unreachable ();
4922 }
4923 }
4924
4925 /* Return true if get_computation_cost indicates that autoincrement is
4926 a possibility for the pair of USE and CAND, false otherwise. */
4927
4928 static bool
4929 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
4930 struct iv_cand *cand)
4931 {
4932 bitmap depends_on;
4933 bool can_autoinc;
4934 comp_cost cost;
4935
4936 if (use->type != USE_ADDRESS)
4937 return false;
4938
4939 cost = get_computation_cost (data, use, cand, true, &depends_on,
4940 &can_autoinc, NULL);
4941
4942 BITMAP_FREE (depends_on);
4943
4944 return !infinite_cost_p (cost) && can_autoinc;
4945 }
4946
4947 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
4948 use that allows autoincrement, and set their AINC_USE if possible. */
4949
4950 static void
4951 set_autoinc_for_original_candidates (struct ivopts_data *data)
4952 {
4953 unsigned i, j;
4954
4955 for (i = 0; i < n_iv_cands (data); i++)
4956 {
4957 struct iv_cand *cand = iv_cand (data, i);
4958 struct iv_use *closest_before = NULL;
4959 struct iv_use *closest_after = NULL;
4960 if (cand->pos != IP_ORIGINAL)
4961 continue;
4962
4963 for (j = 0; j < n_iv_uses (data); j++)
4964 {
4965 struct iv_use *use = iv_use (data, j);
4966 unsigned uid = gimple_uid (use->stmt);
4967
4968 if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
4969 continue;
4970
4971 if (uid < gimple_uid (cand->incremented_at)
4972 && (closest_before == NULL
4973 || uid > gimple_uid (closest_before->stmt)))
4974 closest_before = use;
4975
4976 if (uid > gimple_uid (cand->incremented_at)
4977 && (closest_after == NULL
4978 || uid < gimple_uid (closest_after->stmt)))
4979 closest_after = use;
4980 }
4981
4982 if (closest_before != NULL
4983 && autoinc_possible_for_pair (data, closest_before, cand))
4984 cand->ainc_use = closest_before;
4985 else if (closest_after != NULL
4986 && autoinc_possible_for_pair (data, closest_after, cand))
4987 cand->ainc_use = closest_after;
4988 }
4989 }
4990
4991 /* Finds the candidates for the induction variables. */
4992
4993 static void
4994 find_iv_candidates (struct ivopts_data *data)
4995 {
4996 /* Add commonly used ivs. */
4997 add_standard_iv_candidates (data);
4998
4999 /* Add old induction variables. */
5000 add_old_ivs_candidates (data);
5001
5002 /* Add induction variables derived from uses. */
5003 add_derived_ivs_candidates (data);
5004
5005 set_autoinc_for_original_candidates (data);
5006
5007 /* Record the important candidates. */
5008 record_important_candidates (data);
5009 }
5010
5011 /* Determines costs of basing the use of the iv on an iv candidate. */
5012
5013 static void
5014 determine_use_iv_costs (struct ivopts_data *data)
5015 {
5016 unsigned i, j;
5017 struct iv_use *use;
5018 struct iv_cand *cand;
5019 bitmap to_clear = BITMAP_ALLOC (NULL);
5020
5021 alloc_use_cost_map (data);
5022
5023 for (i = 0; i < n_iv_uses (data); i++)
5024 {
5025 use = iv_use (data, i);
5026
5027 if (data->consider_all_candidates)
5028 {
5029 for (j = 0; j < n_iv_cands (data); j++)
5030 {
5031 cand = iv_cand (data, j);
5032 determine_use_iv_cost (data, use, cand);
5033 }
5034 }
5035 else
5036 {
5037 bitmap_iterator bi;
5038
5039 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
5040 {
5041 cand = iv_cand (data, j);
5042 if (!determine_use_iv_cost (data, use, cand))
5043 bitmap_set_bit (to_clear, j);
5044 }
5045
5046 /* Remove the candidates for that the cost is infinite from
5047 the list of related candidates. */
5048 bitmap_and_compl_into (use->related_cands, to_clear);
5049 bitmap_clear (to_clear);
5050 }
5051 }
5052
5053 BITMAP_FREE (to_clear);
5054
5055 if (dump_file && (dump_flags & TDF_DETAILS))
5056 {
5057 fprintf (dump_file, "Use-candidate costs:\n");
5058
5059 for (i = 0; i < n_iv_uses (data); i++)
5060 {
5061 use = iv_use (data, i);
5062
5063 fprintf (dump_file, "Use %d:\n", i);
5064 fprintf (dump_file, " cand\tcost\tcompl.\tdepends on\n");
5065 for (j = 0; j < use->n_map_members; j++)
5066 {
5067 if (!use->cost_map[j].cand
5068 || infinite_cost_p (use->cost_map[j].cost))
5069 continue;
5070
5071 fprintf (dump_file, " %d\t%d\t%d\t",
5072 use->cost_map[j].cand->id,
5073 use->cost_map[j].cost.cost,
5074 use->cost_map[j].cost.complexity);
5075 if (use->cost_map[j].depends_on)
5076 bitmap_print (dump_file,
5077 use->cost_map[j].depends_on, "","");
5078 if (use->cost_map[j].inv_expr_id != -1)
5079 fprintf (dump_file, " inv_expr:%d", use->cost_map[j].inv_expr_id);
5080 fprintf (dump_file, "\n");
5081 }
5082
5083 fprintf (dump_file, "\n");
5084 }
5085 fprintf (dump_file, "\n");
5086 }
5087 }
5088
5089 /* Determines cost of the candidate CAND. */
5090
5091 static void
5092 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5093 {
5094 comp_cost cost_base;
5095 unsigned cost, cost_step;
5096 tree base;
5097
5098 if (!cand->iv)
5099 {
5100 cand->cost = 0;
5101 return;
5102 }
5103
5104 /* There are two costs associated with the candidate -- its increment
5105 and its initialization. The second is almost negligible for any loop
5106 that rolls enough, so we take it just very little into account. */
5107
5108 base = cand->iv->base;
5109 cost_base = force_var_cost (data, base, NULL);
5110 /* It will be exceptional that the iv register happens to be initialized with
5111 the proper value at no cost. In general, there will at least be a regcopy
5112 or a const set. */
5113 if (cost_base.cost == 0)
5114 cost_base.cost = COSTS_N_INSNS (1);
5115 cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
5116
5117 cost = cost_step + adjust_setup_cost (data, cost_base.cost);
5118
5119 /* Prefer the original ivs unless we may gain something by replacing it.
5120 The reason is to make debugging simpler; so this is not relevant for
5121 artificial ivs created by other optimization passes. */
5122 if (cand->pos != IP_ORIGINAL
5123 || !SSA_NAME_VAR (cand->var_before)
5124 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
5125 cost++;
5126
5127 /* Prefer not to insert statements into latch unless there are some
5128 already (so that we do not create unnecessary jumps). */
5129 if (cand->pos == IP_END
5130 && empty_block_p (ip_end_pos (data->current_loop)))
5131 cost++;
5132
5133 cand->cost = cost;
5134 cand->cost_step = cost_step;
5135 }
5136
5137 /* Determines costs of computation of the candidates. */
5138
5139 static void
5140 determine_iv_costs (struct ivopts_data *data)
5141 {
5142 unsigned i;
5143
5144 if (dump_file && (dump_flags & TDF_DETAILS))
5145 {
5146 fprintf (dump_file, "Candidate costs:\n");
5147 fprintf (dump_file, " cand\tcost\n");
5148 }
5149
5150 for (i = 0; i < n_iv_cands (data); i++)
5151 {
5152 struct iv_cand *cand = iv_cand (data, i);
5153
5154 determine_iv_cost (data, cand);
5155
5156 if (dump_file && (dump_flags & TDF_DETAILS))
5157 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
5158 }
5159
5160 if (dump_file && (dump_flags & TDF_DETAILS))
5161 fprintf (dump_file, "\n");
5162 }
5163
5164 /* Calculates cost for having SIZE induction variables. */
5165
5166 static unsigned
5167 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
5168 {
5169 /* We add size to the cost, so that we prefer eliminating ivs
5170 if possible. */
5171 return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed,
5172 data->body_includes_call);
5173 }
5174
5175 /* For each size of the induction variable set determine the penalty. */
5176
5177 static void
5178 determine_set_costs (struct ivopts_data *data)
5179 {
5180 unsigned j, n;
5181 gimple phi;
5182 gimple_stmt_iterator psi;
5183 tree op;
5184 struct loop *loop = data->current_loop;
5185 bitmap_iterator bi;
5186
5187 if (dump_file && (dump_flags & TDF_DETAILS))
5188 {
5189 fprintf (dump_file, "Global costs:\n");
5190 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
5191 fprintf (dump_file, " target_clobbered_regs %d\n", target_clobbered_regs);
5192 fprintf (dump_file, " target_reg_cost %d\n", target_reg_cost[data->speed]);
5193 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost[data->speed]);
5194 }
5195
5196 n = 0;
5197 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
5198 {
5199 phi = gsi_stmt (psi);
5200 op = PHI_RESULT (phi);
5201
5202 if (virtual_operand_p (op))
5203 continue;
5204
5205 if (get_iv (data, op))
5206 continue;
5207
5208 n++;
5209 }
5210
5211 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5212 {
5213 struct version_info *info = ver_info (data, j);
5214
5215 if (info->inv_id && info->has_nonlin_use)
5216 n++;
5217 }
5218
5219 data->regs_used = n;
5220 if (dump_file && (dump_flags & TDF_DETAILS))
5221 fprintf (dump_file, " regs_used %d\n", n);
5222
5223 if (dump_file && (dump_flags & TDF_DETAILS))
5224 {
5225 fprintf (dump_file, " cost for size:\n");
5226 fprintf (dump_file, " ivs\tcost\n");
5227 for (j = 0; j <= 2 * target_avail_regs; j++)
5228 fprintf (dump_file, " %d\t%d\n", j,
5229 ivopts_global_cost_for_size (data, j));
5230 fprintf (dump_file, "\n");
5231 }
5232 }
5233
5234 /* Returns true if A is a cheaper cost pair than B. */
5235
5236 static bool
5237 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
5238 {
5239 int cmp;
5240
5241 if (!a)
5242 return false;
5243
5244 if (!b)
5245 return true;
5246
5247 cmp = compare_costs (a->cost, b->cost);
5248 if (cmp < 0)
5249 return true;
5250
5251 if (cmp > 0)
5252 return false;
5253
5254 /* In case the costs are the same, prefer the cheaper candidate. */
5255 if (a->cand->cost < b->cand->cost)
5256 return true;
5257
5258 return false;
5259 }
5260
5261
5262 /* Returns candidate by that USE is expressed in IVS. */
5263
5264 static struct cost_pair *
5265 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
5266 {
5267 return ivs->cand_for_use[use->id];
5268 }
5269
5270 /* Computes the cost field of IVS structure. */
5271
5272 static void
5273 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
5274 {
5275 comp_cost cost = ivs->cand_use_cost;
5276
5277 cost.cost += ivs->cand_cost;
5278
5279 cost.cost += ivopts_global_cost_for_size (data,
5280 ivs->n_regs + ivs->num_used_inv_expr);
5281
5282 ivs->cost = cost;
5283 }
5284
5285 /* Remove invariants in set INVS to set IVS. */
5286
5287 static void
5288 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
5289 {
5290 bitmap_iterator bi;
5291 unsigned iid;
5292
5293 if (!invs)
5294 return;
5295
5296 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5297 {
5298 ivs->n_invariant_uses[iid]--;
5299 if (ivs->n_invariant_uses[iid] == 0)
5300 ivs->n_regs--;
5301 }
5302 }
5303
5304 /* Set USE not to be expressed by any candidate in IVS. */
5305
5306 static void
5307 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
5308 struct iv_use *use)
5309 {
5310 unsigned uid = use->id, cid;
5311 struct cost_pair *cp;
5312
5313 cp = ivs->cand_for_use[uid];
5314 if (!cp)
5315 return;
5316 cid = cp->cand->id;
5317
5318 ivs->bad_uses++;
5319 ivs->cand_for_use[uid] = NULL;
5320 ivs->n_cand_uses[cid]--;
5321
5322 if (ivs->n_cand_uses[cid] == 0)
5323 {
5324 bitmap_clear_bit (ivs->cands, cid);
5325 /* Do not count the pseudocandidates. */
5326 if (cp->cand->iv)
5327 ivs->n_regs--;
5328 ivs->n_cands--;
5329 ivs->cand_cost -= cp->cand->cost;
5330
5331 iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
5332 }
5333
5334 ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
5335
5336 iv_ca_set_remove_invariants (ivs, cp->depends_on);
5337
5338 if (cp->inv_expr_id != -1)
5339 {
5340 ivs->used_inv_expr[cp->inv_expr_id]--;
5341 if (ivs->used_inv_expr[cp->inv_expr_id] == 0)
5342 ivs->num_used_inv_expr--;
5343 }
5344 iv_ca_recount_cost (data, ivs);
5345 }
5346
5347 /* Add invariants in set INVS to set IVS. */
5348
5349 static void
5350 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
5351 {
5352 bitmap_iterator bi;
5353 unsigned iid;
5354
5355 if (!invs)
5356 return;
5357
5358 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5359 {
5360 ivs->n_invariant_uses[iid]++;
5361 if (ivs->n_invariant_uses[iid] == 1)
5362 ivs->n_regs++;
5363 }
5364 }
5365
5366 /* Set cost pair for USE in set IVS to CP. */
5367
5368 static void
5369 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
5370 struct iv_use *use, struct cost_pair *cp)
5371 {
5372 unsigned uid = use->id, cid;
5373
5374 if (ivs->cand_for_use[uid] == cp)
5375 return;
5376
5377 if (ivs->cand_for_use[uid])
5378 iv_ca_set_no_cp (data, ivs, use);
5379
5380 if (cp)
5381 {
5382 cid = cp->cand->id;
5383
5384 ivs->bad_uses--;
5385 ivs->cand_for_use[uid] = cp;
5386 ivs->n_cand_uses[cid]++;
5387 if (ivs->n_cand_uses[cid] == 1)
5388 {
5389 bitmap_set_bit (ivs->cands, cid);
5390 /* Do not count the pseudocandidates. */
5391 if (cp->cand->iv)
5392 ivs->n_regs++;
5393 ivs->n_cands++;
5394 ivs->cand_cost += cp->cand->cost;
5395
5396 iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
5397 }
5398
5399 ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
5400 iv_ca_set_add_invariants (ivs, cp->depends_on);
5401
5402 if (cp->inv_expr_id != -1)
5403 {
5404 ivs->used_inv_expr[cp->inv_expr_id]++;
5405 if (ivs->used_inv_expr[cp->inv_expr_id] == 1)
5406 ivs->num_used_inv_expr++;
5407 }
5408 iv_ca_recount_cost (data, ivs);
5409 }
5410 }
5411
5412 /* Extend set IVS by expressing USE by some of the candidates in it
5413 if possible. All important candidates will be considered
5414 if IMPORTANT_CANDIDATES is true. */
5415
5416 static void
5417 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
5418 struct iv_use *use, bool important_candidates)
5419 {
5420 struct cost_pair *best_cp = NULL, *cp;
5421 bitmap_iterator bi;
5422 bitmap cands;
5423 unsigned i;
5424
5425 gcc_assert (ivs->upto >= use->id);
5426
5427 if (ivs->upto == use->id)
5428 {
5429 ivs->upto++;
5430 ivs->bad_uses++;
5431 }
5432
5433 cands = (important_candidates ? data->important_candidates : ivs->cands);
5434 EXECUTE_IF_SET_IN_BITMAP (cands, 0, i, bi)
5435 {
5436 struct iv_cand *cand = iv_cand (data, i);
5437
5438 cp = get_use_iv_cost (data, use, cand);
5439
5440 if (cheaper_cost_pair (cp, best_cp))
5441 best_cp = cp;
5442 }
5443
5444 iv_ca_set_cp (data, ivs, use, best_cp);
5445 }
5446
5447 /* Get cost for assignment IVS. */
5448
5449 static comp_cost
5450 iv_ca_cost (struct iv_ca *ivs)
5451 {
5452 /* This was a conditional expression but it triggered a bug in
5453 Sun C 5.5. */
5454 if (ivs->bad_uses)
5455 return infinite_cost;
5456 else
5457 return ivs->cost;
5458 }
5459
5460 /* Returns true if all dependences of CP are among invariants in IVS. */
5461
5462 static bool
5463 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
5464 {
5465 unsigned i;
5466 bitmap_iterator bi;
5467
5468 if (!cp->depends_on)
5469 return true;
5470
5471 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
5472 {
5473 if (ivs->n_invariant_uses[i] == 0)
5474 return false;
5475 }
5476
5477 return true;
5478 }
5479
5480 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
5481 it before NEXT_CHANGE. */
5482
5483 static struct iv_ca_delta *
5484 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
5485 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
5486 {
5487 struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
5488
5489 change->use = use;
5490 change->old_cp = old_cp;
5491 change->new_cp = new_cp;
5492 change->next_change = next_change;
5493
5494 return change;
5495 }
5496
5497 /* Joins two lists of changes L1 and L2. Destructive -- old lists
5498 are rewritten. */
5499
5500 static struct iv_ca_delta *
5501 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
5502 {
5503 struct iv_ca_delta *last;
5504
5505 if (!l2)
5506 return l1;
5507
5508 if (!l1)
5509 return l2;
5510
5511 for (last = l1; last->next_change; last = last->next_change)
5512 continue;
5513 last->next_change = l2;
5514
5515 return l1;
5516 }
5517
5518 /* Reverse the list of changes DELTA, forming the inverse to it. */
5519
5520 static struct iv_ca_delta *
5521 iv_ca_delta_reverse (struct iv_ca_delta *delta)
5522 {
5523 struct iv_ca_delta *act, *next, *prev = NULL;
5524 struct cost_pair *tmp;
5525
5526 for (act = delta; act; act = next)
5527 {
5528 next = act->next_change;
5529 act->next_change = prev;
5530 prev = act;
5531
5532 tmp = act->old_cp;
5533 act->old_cp = act->new_cp;
5534 act->new_cp = tmp;
5535 }
5536
5537 return prev;
5538 }
5539
5540 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
5541 reverted instead. */
5542
5543 static void
5544 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
5545 struct iv_ca_delta *delta, bool forward)
5546 {
5547 struct cost_pair *from, *to;
5548 struct iv_ca_delta *act;
5549
5550 if (!forward)
5551 delta = iv_ca_delta_reverse (delta);
5552
5553 for (act = delta; act; act = act->next_change)
5554 {
5555 from = act->old_cp;
5556 to = act->new_cp;
5557 gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
5558 iv_ca_set_cp (data, ivs, act->use, to);
5559 }
5560
5561 if (!forward)
5562 iv_ca_delta_reverse (delta);
5563 }
5564
5565 /* Returns true if CAND is used in IVS. */
5566
5567 static bool
5568 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
5569 {
5570 return ivs->n_cand_uses[cand->id] > 0;
5571 }
5572
5573 /* Returns number of induction variable candidates in the set IVS. */
5574
5575 static unsigned
5576 iv_ca_n_cands (struct iv_ca *ivs)
5577 {
5578 return ivs->n_cands;
5579 }
5580
5581 /* Free the list of changes DELTA. */
5582
5583 static void
5584 iv_ca_delta_free (struct iv_ca_delta **delta)
5585 {
5586 struct iv_ca_delta *act, *next;
5587
5588 for (act = *delta; act; act = next)
5589 {
5590 next = act->next_change;
5591 free (act);
5592 }
5593
5594 *delta = NULL;
5595 }
5596
5597 /* Allocates new iv candidates assignment. */
5598
5599 static struct iv_ca *
5600 iv_ca_new (struct ivopts_data *data)
5601 {
5602 struct iv_ca *nw = XNEW (struct iv_ca);
5603
5604 nw->upto = 0;
5605 nw->bad_uses = 0;
5606 nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
5607 nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
5608 nw->cands = BITMAP_ALLOC (NULL);
5609 nw->n_cands = 0;
5610 nw->n_regs = 0;
5611 nw->cand_use_cost = no_cost;
5612 nw->cand_cost = 0;
5613 nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
5614 nw->cost = no_cost;
5615 nw->used_inv_expr = XCNEWVEC (unsigned, data->inv_expr_id + 1);
5616 nw->num_used_inv_expr = 0;
5617
5618 return nw;
5619 }
5620
5621 /* Free memory occupied by the set IVS. */
5622
5623 static void
5624 iv_ca_free (struct iv_ca **ivs)
5625 {
5626 free ((*ivs)->cand_for_use);
5627 free ((*ivs)->n_cand_uses);
5628 BITMAP_FREE ((*ivs)->cands);
5629 free ((*ivs)->n_invariant_uses);
5630 free ((*ivs)->used_inv_expr);
5631 free (*ivs);
5632 *ivs = NULL;
5633 }
5634
5635 /* Dumps IVS to FILE. */
5636
5637 static void
5638 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
5639 {
5640 const char *pref = " invariants ";
5641 unsigned i;
5642 comp_cost cost = iv_ca_cost (ivs);
5643
5644 fprintf (file, " cost: %d (complexity %d)\n", cost.cost, cost.complexity);
5645 fprintf (file, " cand_cost: %d\n cand_use_cost: %d (complexity %d)\n",
5646 ivs->cand_cost, ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
5647 bitmap_print (file, ivs->cands, " candidates: ","\n");
5648
5649 for (i = 0; i < ivs->upto; i++)
5650 {
5651 struct iv_use *use = iv_use (data, i);
5652 struct cost_pair *cp = iv_ca_cand_for_use (ivs, use);
5653 if (cp)
5654 fprintf (file, " use:%d --> iv_cand:%d, cost=(%d,%d)\n",
5655 use->id, cp->cand->id, cp->cost.cost, cp->cost.complexity);
5656 else
5657 fprintf (file, " use:%d --> ??\n", use->id);
5658 }
5659
5660 for (i = 1; i <= data->max_inv_id; i++)
5661 if (ivs->n_invariant_uses[i])
5662 {
5663 fprintf (file, "%s%d", pref, i);
5664 pref = ", ";
5665 }
5666 fprintf (file, "\n\n");
5667 }
5668
5669 /* Try changing candidate in IVS to CAND for each use. Return cost of the
5670 new set, and store differences in DELTA. Number of induction variables
5671 in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
5672 the function will try to find a solution with mimimal iv candidates. */
5673
5674 static comp_cost
5675 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
5676 struct iv_cand *cand, struct iv_ca_delta **delta,
5677 unsigned *n_ivs, bool min_ncand)
5678 {
5679 unsigned i;
5680 comp_cost cost;
5681 struct iv_use *use;
5682 struct cost_pair *old_cp, *new_cp;
5683
5684 *delta = NULL;
5685 for (i = 0; i < ivs->upto; i++)
5686 {
5687 use = iv_use (data, i);
5688 old_cp = iv_ca_cand_for_use (ivs, use);
5689
5690 if (old_cp
5691 && old_cp->cand == cand)
5692 continue;
5693
5694 new_cp = get_use_iv_cost (data, use, cand);
5695 if (!new_cp)
5696 continue;
5697
5698 if (!min_ncand && !iv_ca_has_deps (ivs, new_cp))
5699 continue;
5700
5701 if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp))
5702 continue;
5703
5704 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5705 }
5706
5707 iv_ca_delta_commit (data, ivs, *delta, true);
5708 cost = iv_ca_cost (ivs);
5709 if (n_ivs)
5710 *n_ivs = iv_ca_n_cands (ivs);
5711 iv_ca_delta_commit (data, ivs, *delta, false);
5712
5713 return cost;
5714 }
5715
5716 /* Try narrowing set IVS by removing CAND. Return the cost of
5717 the new set and store the differences in DELTA. */
5718
5719 static comp_cost
5720 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
5721 struct iv_cand *cand, struct iv_ca_delta **delta)
5722 {
5723 unsigned i, ci;
5724 struct iv_use *use;
5725 struct cost_pair *old_cp, *new_cp, *cp;
5726 bitmap_iterator bi;
5727 struct iv_cand *cnd;
5728 comp_cost cost;
5729
5730 *delta = NULL;
5731 for (i = 0; i < n_iv_uses (data); i++)
5732 {
5733 use = iv_use (data, i);
5734
5735 old_cp = iv_ca_cand_for_use (ivs, use);
5736 if (old_cp->cand != cand)
5737 continue;
5738
5739 new_cp = NULL;
5740
5741 if (data->consider_all_candidates)
5742 {
5743 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
5744 {
5745 if (ci == cand->id)
5746 continue;
5747
5748 cnd = iv_cand (data, ci);
5749
5750 cp = get_use_iv_cost (data, use, cnd);
5751 if (!cp)
5752 continue;
5753
5754 if (!iv_ca_has_deps (ivs, cp))
5755 continue;
5756
5757 if (!cheaper_cost_pair (cp, new_cp))
5758 continue;
5759
5760 new_cp = cp;
5761 }
5762 }
5763 else
5764 {
5765 EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
5766 {
5767 if (ci == cand->id)
5768 continue;
5769
5770 cnd = iv_cand (data, ci);
5771
5772 cp = get_use_iv_cost (data, use, cnd);
5773 if (!cp)
5774 continue;
5775 if (!iv_ca_has_deps (ivs, cp))
5776 continue;
5777
5778 if (!cheaper_cost_pair (cp, new_cp))
5779 continue;
5780
5781 new_cp = cp;
5782 }
5783 }
5784
5785 if (!new_cp)
5786 {
5787 iv_ca_delta_free (delta);
5788 return infinite_cost;
5789 }
5790
5791 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5792 }
5793
5794 iv_ca_delta_commit (data, ivs, *delta, true);
5795 cost = iv_ca_cost (ivs);
5796 iv_ca_delta_commit (data, ivs, *delta, false);
5797
5798 return cost;
5799 }
5800
5801 /* Try optimizing the set of candidates IVS by removing candidates different
5802 from to EXCEPT_CAND from it. Return cost of the new set, and store
5803 differences in DELTA. */
5804
5805 static comp_cost
5806 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
5807 struct iv_cand *except_cand, struct iv_ca_delta **delta)
5808 {
5809 bitmap_iterator bi;
5810 struct iv_ca_delta *act_delta, *best_delta;
5811 unsigned i;
5812 comp_cost best_cost, acost;
5813 struct iv_cand *cand;
5814
5815 best_delta = NULL;
5816 best_cost = iv_ca_cost (ivs);
5817
5818 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5819 {
5820 cand = iv_cand (data, i);
5821
5822 if (cand == except_cand)
5823 continue;
5824
5825 acost = iv_ca_narrow (data, ivs, cand, &act_delta);
5826
5827 if (compare_costs (acost, best_cost) < 0)
5828 {
5829 best_cost = acost;
5830 iv_ca_delta_free (&best_delta);
5831 best_delta = act_delta;
5832 }
5833 else
5834 iv_ca_delta_free (&act_delta);
5835 }
5836
5837 if (!best_delta)
5838 {
5839 *delta = NULL;
5840 return best_cost;
5841 }
5842
5843 /* Recurse to possibly remove other unnecessary ivs. */
5844 iv_ca_delta_commit (data, ivs, best_delta, true);
5845 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
5846 iv_ca_delta_commit (data, ivs, best_delta, false);
5847 *delta = iv_ca_delta_join (best_delta, *delta);
5848 return best_cost;
5849 }
5850
5851 /* Tries to extend the sets IVS in the best possible way in order
5852 to express the USE. If ORIGINALP is true, prefer candidates from
5853 the original set of IVs, otherwise favor important candidates not
5854 based on any memory object. */
5855
5856 static bool
5857 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
5858 struct iv_use *use, bool originalp)
5859 {
5860 comp_cost best_cost, act_cost;
5861 unsigned i;
5862 bitmap_iterator bi;
5863 struct iv_cand *cand;
5864 struct iv_ca_delta *best_delta = NULL, *act_delta;
5865 struct cost_pair *cp;
5866
5867 iv_ca_add_use (data, ivs, use, false);
5868 best_cost = iv_ca_cost (ivs);
5869
5870 cp = iv_ca_cand_for_use (ivs, use);
5871 if (!cp)
5872 {
5873 ivs->upto--;
5874 ivs->bad_uses--;
5875 iv_ca_add_use (data, ivs, use, true);
5876 best_cost = iv_ca_cost (ivs);
5877 cp = iv_ca_cand_for_use (ivs, use);
5878 }
5879 if (cp)
5880 {
5881 best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
5882 iv_ca_set_no_cp (data, ivs, use);
5883 }
5884
5885 /* If ORIGINALP is true, try to find the original IV for the use. Otherwise
5886 first try important candidates not based on any memory object. Only if
5887 this fails, try the specific ones. Rationale -- in loops with many
5888 variables the best choice often is to use just one generic biv. If we
5889 added here many ivs specific to the uses, the optimization algorithm later
5890 would be likely to get stuck in a local minimum, thus causing us to create
5891 too many ivs. The approach from few ivs to more seems more likely to be
5892 successful -- starting from few ivs, replacing an expensive use by a
5893 specific iv should always be a win. */
5894 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5895 {
5896 cand = iv_cand (data, i);
5897
5898 if (originalp && cand->pos !=IP_ORIGINAL)
5899 continue;
5900
5901 if (!originalp && cand->iv->base_object != NULL_TREE)
5902 continue;
5903
5904 if (iv_ca_cand_used_p (ivs, cand))
5905 continue;
5906
5907 cp = get_use_iv_cost (data, use, cand);
5908 if (!cp)
5909 continue;
5910
5911 iv_ca_set_cp (data, ivs, use, cp);
5912 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
5913 true);
5914 iv_ca_set_no_cp (data, ivs, use);
5915 act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
5916
5917 if (compare_costs (act_cost, best_cost) < 0)
5918 {
5919 best_cost = act_cost;
5920
5921 iv_ca_delta_free (&best_delta);
5922 best_delta = act_delta;
5923 }
5924 else
5925 iv_ca_delta_free (&act_delta);
5926 }
5927
5928 if (infinite_cost_p (best_cost))
5929 {
5930 for (i = 0; i < use->n_map_members; i++)
5931 {
5932 cp = use->cost_map + i;
5933 cand = cp->cand;
5934 if (!cand)
5935 continue;
5936
5937 /* Already tried this. */
5938 if (cand->important)
5939 {
5940 if (originalp && cand->pos == IP_ORIGINAL)
5941 continue;
5942 if (!originalp && cand->iv->base_object == NULL_TREE)
5943 continue;
5944 }
5945
5946 if (iv_ca_cand_used_p (ivs, cand))
5947 continue;
5948
5949 act_delta = NULL;
5950 iv_ca_set_cp (data, ivs, use, cp);
5951 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
5952 iv_ca_set_no_cp (data, ivs, use);
5953 act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
5954 cp, act_delta);
5955
5956 if (compare_costs (act_cost, best_cost) < 0)
5957 {
5958 best_cost = act_cost;
5959
5960 if (best_delta)
5961 iv_ca_delta_free (&best_delta);
5962 best_delta = act_delta;
5963 }
5964 else
5965 iv_ca_delta_free (&act_delta);
5966 }
5967 }
5968
5969 iv_ca_delta_commit (data, ivs, best_delta, true);
5970 iv_ca_delta_free (&best_delta);
5971
5972 return !infinite_cost_p (best_cost);
5973 }
5974
5975 /* Finds an initial assignment of candidates to uses. */
5976
5977 static struct iv_ca *
5978 get_initial_solution (struct ivopts_data *data, bool originalp)
5979 {
5980 struct iv_ca *ivs = iv_ca_new (data);
5981 unsigned i;
5982
5983 for (i = 0; i < n_iv_uses (data); i++)
5984 if (!try_add_cand_for (data, ivs, iv_use (data, i), originalp))
5985 {
5986 iv_ca_free (&ivs);
5987 return NULL;
5988 }
5989
5990 return ivs;
5991 }
5992
5993 /* Tries to improve set of induction variables IVS. */
5994
5995 static bool
5996 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
5997 {
5998 unsigned i, n_ivs;
5999 comp_cost acost, best_cost = iv_ca_cost (ivs);
6000 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
6001 struct iv_cand *cand;
6002
6003 /* Try extending the set of induction variables by one. */
6004 for (i = 0; i < n_iv_cands (data); i++)
6005 {
6006 cand = iv_cand (data, i);
6007
6008 if (iv_ca_cand_used_p (ivs, cand))
6009 continue;
6010
6011 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
6012 if (!act_delta)
6013 continue;
6014
6015 /* If we successfully added the candidate and the set is small enough,
6016 try optimizing it by removing other candidates. */
6017 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
6018 {
6019 iv_ca_delta_commit (data, ivs, act_delta, true);
6020 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
6021 iv_ca_delta_commit (data, ivs, act_delta, false);
6022 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6023 }
6024
6025 if (compare_costs (acost, best_cost) < 0)
6026 {
6027 best_cost = acost;
6028 iv_ca_delta_free (&best_delta);
6029 best_delta = act_delta;
6030 }
6031 else
6032 iv_ca_delta_free (&act_delta);
6033 }
6034
6035 if (!best_delta)
6036 {
6037 /* Try removing the candidates from the set instead. */
6038 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
6039
6040 /* Nothing more we can do. */
6041 if (!best_delta)
6042 return false;
6043 }
6044
6045 iv_ca_delta_commit (data, ivs, best_delta, true);
6046 gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
6047 iv_ca_delta_free (&best_delta);
6048 return true;
6049 }
6050
6051 /* Attempts to find the optimal set of induction variables. We do simple
6052 greedy heuristic -- we try to replace at most one candidate in the selected
6053 solution and remove the unused ivs while this improves the cost. */
6054
6055 static struct iv_ca *
6056 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
6057 {
6058 struct iv_ca *set;
6059
6060 /* Get the initial solution. */
6061 set = get_initial_solution (data, originalp);
6062 if (!set)
6063 {
6064 if (dump_file && (dump_flags & TDF_DETAILS))
6065 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
6066 return NULL;
6067 }
6068
6069 if (dump_file && (dump_flags & TDF_DETAILS))
6070 {
6071 fprintf (dump_file, "Initial set of candidates:\n");
6072 iv_ca_dump (data, dump_file, set);
6073 }
6074
6075 while (try_improve_iv_set (data, set))
6076 {
6077 if (dump_file && (dump_flags & TDF_DETAILS))
6078 {
6079 fprintf (dump_file, "Improved to:\n");
6080 iv_ca_dump (data, dump_file, set);
6081 }
6082 }
6083
6084 return set;
6085 }
6086
6087 static struct iv_ca *
6088 find_optimal_iv_set (struct ivopts_data *data)
6089 {
6090 unsigned i;
6091 struct iv_ca *set, *origset;
6092 struct iv_use *use;
6093 comp_cost cost, origcost;
6094
6095 /* Determine the cost based on a strategy that starts with original IVs,
6096 and try again using a strategy that prefers candidates not based
6097 on any IVs. */
6098 origset = find_optimal_iv_set_1 (data, true);
6099 set = find_optimal_iv_set_1 (data, false);
6100
6101 if (!origset && !set)
6102 return NULL;
6103
6104 origcost = origset ? iv_ca_cost (origset) : infinite_cost;
6105 cost = set ? iv_ca_cost (set) : infinite_cost;
6106
6107 if (dump_file && (dump_flags & TDF_DETAILS))
6108 {
6109 fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
6110 origcost.cost, origcost.complexity);
6111 fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
6112 cost.cost, cost.complexity);
6113 }
6114
6115 /* Choose the one with the best cost. */
6116 if (compare_costs (origcost, cost) <= 0)
6117 {
6118 if (set)
6119 iv_ca_free (&set);
6120 set = origset;
6121 }
6122 else if (origset)
6123 iv_ca_free (&origset);
6124
6125 for (i = 0; i < n_iv_uses (data); i++)
6126 {
6127 use = iv_use (data, i);
6128 use->selected = iv_ca_cand_for_use (set, use)->cand;
6129 }
6130
6131 return set;
6132 }
6133
6134 /* Creates a new induction variable corresponding to CAND. */
6135
6136 static void
6137 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
6138 {
6139 gimple_stmt_iterator incr_pos;
6140 tree base;
6141 bool after = false;
6142
6143 if (!cand->iv)
6144 return;
6145
6146 switch (cand->pos)
6147 {
6148 case IP_NORMAL:
6149 incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
6150 break;
6151
6152 case IP_END:
6153 incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
6154 after = true;
6155 break;
6156
6157 case IP_AFTER_USE:
6158 after = true;
6159 /* fall through */
6160 case IP_BEFORE_USE:
6161 incr_pos = gsi_for_stmt (cand->incremented_at);
6162 break;
6163
6164 case IP_ORIGINAL:
6165 /* Mark that the iv is preserved. */
6166 name_info (data, cand->var_before)->preserve_biv = true;
6167 name_info (data, cand->var_after)->preserve_biv = true;
6168
6169 /* Rewrite the increment so that it uses var_before directly. */
6170 find_interesting_uses_op (data, cand->var_after)->selected = cand;
6171 return;
6172 }
6173
6174 gimple_add_tmp_var (cand->var_before);
6175
6176 base = unshare_expr (cand->iv->base);
6177
6178 create_iv (base, unshare_expr (cand->iv->step),
6179 cand->var_before, data->current_loop,
6180 &incr_pos, after, &cand->var_before, &cand->var_after);
6181 }
6182
6183 /* Creates new induction variables described in SET. */
6184
6185 static void
6186 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
6187 {
6188 unsigned i;
6189 struct iv_cand *cand;
6190 bitmap_iterator bi;
6191
6192 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6193 {
6194 cand = iv_cand (data, i);
6195 create_new_iv (data, cand);
6196 }
6197
6198 if (dump_file && (dump_flags & TDF_DETAILS))
6199 {
6200 fprintf (dump_file, "\nSelected IV set: \n");
6201 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6202 {
6203 cand = iv_cand (data, i);
6204 dump_cand (dump_file, cand);
6205 }
6206 fprintf (dump_file, "\n");
6207 }
6208 }
6209
6210 /* Rewrites USE (definition of iv used in a nonlinear expression)
6211 using candidate CAND. */
6212
6213 static void
6214 rewrite_use_nonlinear_expr (struct ivopts_data *data,
6215 struct iv_use *use, struct iv_cand *cand)
6216 {
6217 tree comp;
6218 tree op, tgt;
6219 gimple ass;
6220 gimple_stmt_iterator bsi;
6221
6222 /* An important special case -- if we are asked to express value of
6223 the original iv by itself, just exit; there is no need to
6224 introduce a new computation (that might also need casting the
6225 variable to unsigned and back). */
6226 if (cand->pos == IP_ORIGINAL
6227 && cand->incremented_at == use->stmt)
6228 {
6229 enum tree_code stmt_code;
6230
6231 gcc_assert (is_gimple_assign (use->stmt));
6232 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
6233
6234 /* Check whether we may leave the computation unchanged.
6235 This is the case only if it does not rely on other
6236 computations in the loop -- otherwise, the computation
6237 we rely upon may be removed in remove_unused_ivs,
6238 thus leading to ICE. */
6239 stmt_code = gimple_assign_rhs_code (use->stmt);
6240 if (stmt_code == PLUS_EXPR
6241 || stmt_code == MINUS_EXPR
6242 || stmt_code == POINTER_PLUS_EXPR)
6243 {
6244 if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
6245 op = gimple_assign_rhs2 (use->stmt);
6246 else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
6247 op = gimple_assign_rhs1 (use->stmt);
6248 else
6249 op = NULL_TREE;
6250 }
6251 else
6252 op = NULL_TREE;
6253
6254 if (op && expr_invariant_in_loop_p (data->current_loop, op))
6255 return;
6256 }
6257
6258 comp = get_computation (data->current_loop, use, cand);
6259 gcc_assert (comp != NULL_TREE);
6260
6261 switch (gimple_code (use->stmt))
6262 {
6263 case GIMPLE_PHI:
6264 tgt = PHI_RESULT (use->stmt);
6265
6266 /* If we should keep the biv, do not replace it. */
6267 if (name_info (data, tgt)->preserve_biv)
6268 return;
6269
6270 bsi = gsi_after_labels (gimple_bb (use->stmt));
6271 break;
6272
6273 case GIMPLE_ASSIGN:
6274 tgt = gimple_assign_lhs (use->stmt);
6275 bsi = gsi_for_stmt (use->stmt);
6276 break;
6277
6278 default:
6279 gcc_unreachable ();
6280 }
6281
6282 if (!valid_gimple_rhs_p (comp)
6283 || (gimple_code (use->stmt) != GIMPLE_PHI
6284 /* We can't allow re-allocating the stmt as it might be pointed
6285 to still. */
6286 && (get_gimple_rhs_num_ops (TREE_CODE (comp))
6287 >= gimple_num_ops (gsi_stmt (bsi)))))
6288 {
6289 comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE,
6290 true, GSI_SAME_STMT);
6291 if (POINTER_TYPE_P (TREE_TYPE (tgt)))
6292 {
6293 duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
6294 /* As this isn't a plain copy we have to reset alignment
6295 information. */
6296 if (SSA_NAME_PTR_INFO (comp))
6297 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
6298 }
6299 }
6300
6301 if (gimple_code (use->stmt) == GIMPLE_PHI)
6302 {
6303 ass = gimple_build_assign (tgt, comp);
6304 gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
6305
6306 bsi = gsi_for_stmt (use->stmt);
6307 remove_phi_node (&bsi, false);
6308 }
6309 else
6310 {
6311 gimple_assign_set_rhs_from_tree (&bsi, comp);
6312 use->stmt = gsi_stmt (bsi);
6313 }
6314 }
6315
6316 /* Performs a peephole optimization to reorder the iv update statement with
6317 a mem ref to enable instruction combining in later phases. The mem ref uses
6318 the iv value before the update, so the reordering transformation requires
6319 adjustment of the offset. CAND is the selected IV_CAND.
6320
6321 Example:
6322
6323 t = MEM_REF (base, iv1, 8, 16); // base, index, stride, offset
6324 iv2 = iv1 + 1;
6325
6326 if (t < val) (1)
6327 goto L;
6328 goto Head;
6329
6330
6331 directly propagating t over to (1) will introduce overlapping live range
6332 thus increase register pressure. This peephole transform it into:
6333
6334
6335 iv2 = iv1 + 1;
6336 t = MEM_REF (base, iv2, 8, 8);
6337 if (t < val)
6338 goto L;
6339 goto Head;
6340 */
6341
6342 static void
6343 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
6344 {
6345 tree var_after;
6346 gimple iv_update, stmt;
6347 basic_block bb;
6348 gimple_stmt_iterator gsi, gsi_iv;
6349
6350 if (cand->pos != IP_NORMAL)
6351 return;
6352
6353 var_after = cand->var_after;
6354 iv_update = SSA_NAME_DEF_STMT (var_after);
6355
6356 bb = gimple_bb (iv_update);
6357 gsi = gsi_last_nondebug_bb (bb);
6358 stmt = gsi_stmt (gsi);
6359
6360 /* Only handle conditional statement for now. */
6361 if (gimple_code (stmt) != GIMPLE_COND)
6362 return;
6363
6364 gsi_prev_nondebug (&gsi);
6365 stmt = gsi_stmt (gsi);
6366 if (stmt != iv_update)
6367 return;
6368
6369 gsi_prev_nondebug (&gsi);
6370 if (gsi_end_p (gsi))
6371 return;
6372
6373 stmt = gsi_stmt (gsi);
6374 if (gimple_code (stmt) != GIMPLE_ASSIGN)
6375 return;
6376
6377 if (stmt != use->stmt)
6378 return;
6379
6380 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
6381 return;
6382
6383 if (dump_file && (dump_flags & TDF_DETAILS))
6384 {
6385 fprintf (dump_file, "Reordering \n");
6386 print_gimple_stmt (dump_file, iv_update, 0, 0);
6387 print_gimple_stmt (dump_file, use->stmt, 0, 0);
6388 fprintf (dump_file, "\n");
6389 }
6390
6391 gsi = gsi_for_stmt (use->stmt);
6392 gsi_iv = gsi_for_stmt (iv_update);
6393 gsi_move_before (&gsi_iv, &gsi);
6394
6395 cand->pos = IP_BEFORE_USE;
6396 cand->incremented_at = use->stmt;
6397 }
6398
6399 /* Rewrites USE (address that is an iv) using candidate CAND. */
6400
6401 static void
6402 rewrite_use_address (struct ivopts_data *data,
6403 struct iv_use *use, struct iv_cand *cand)
6404 {
6405 aff_tree aff;
6406 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6407 tree base_hint = NULL_TREE;
6408 tree ref, iv;
6409 bool ok;
6410
6411 adjust_iv_update_pos (cand, use);
6412 ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
6413 gcc_assert (ok);
6414 unshare_aff_combination (&aff);
6415
6416 /* To avoid undefined overflow problems, all IV candidates use unsigned
6417 integer types. The drawback is that this makes it impossible for
6418 create_mem_ref to distinguish an IV that is based on a memory object
6419 from one that represents simply an offset.
6420
6421 To work around this problem, we pass a hint to create_mem_ref that
6422 indicates which variable (if any) in aff is an IV based on a memory
6423 object. Note that we only consider the candidate. If this is not
6424 based on an object, the base of the reference is in some subexpression
6425 of the use -- but these will use pointer types, so they are recognized
6426 by the create_mem_ref heuristics anyway. */
6427 if (cand->iv->base_object)
6428 base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
6429
6430 iv = var_at_stmt (data->current_loop, cand, use->stmt);
6431 ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff,
6432 reference_alias_ptr_type (*use->op_p),
6433 iv, base_hint, data->speed);
6434 copy_ref_info (ref, *use->op_p);
6435 *use->op_p = ref;
6436 }
6437
6438 /* Rewrites USE (the condition such that one of the arguments is an iv) using
6439 candidate CAND. */
6440
6441 static void
6442 rewrite_use_compare (struct ivopts_data *data,
6443 struct iv_use *use, struct iv_cand *cand)
6444 {
6445 tree comp, *var_p, op, bound;
6446 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6447 enum tree_code compare;
6448 struct cost_pair *cp = get_use_iv_cost (data, use, cand);
6449 bool ok;
6450
6451 bound = cp->value;
6452 if (bound)
6453 {
6454 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
6455 tree var_type = TREE_TYPE (var);
6456 gimple_seq stmts;
6457
6458 if (dump_file && (dump_flags & TDF_DETAILS))
6459 {
6460 fprintf (dump_file, "Replacing exit test: ");
6461 print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
6462 }
6463 compare = cp->comp;
6464 bound = unshare_expr (fold_convert (var_type, bound));
6465 op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
6466 if (stmts)
6467 gsi_insert_seq_on_edge_immediate (
6468 loop_preheader_edge (data->current_loop),
6469 stmts);
6470
6471 gimple_cond_set_lhs (use->stmt, var);
6472 gimple_cond_set_code (use->stmt, compare);
6473 gimple_cond_set_rhs (use->stmt, op);
6474 return;
6475 }
6476
6477 /* The induction variable elimination failed; just express the original
6478 giv. */
6479 comp = get_computation (data->current_loop, use, cand);
6480 gcc_assert (comp != NULL_TREE);
6481
6482 ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
6483 gcc_assert (ok);
6484
6485 *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
6486 true, GSI_SAME_STMT);
6487 }
6488
6489 /* Rewrites USE using candidate CAND. */
6490
6491 static void
6492 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
6493 {
6494 switch (use->type)
6495 {
6496 case USE_NONLINEAR_EXPR:
6497 rewrite_use_nonlinear_expr (data, use, cand);
6498 break;
6499
6500 case USE_ADDRESS:
6501 rewrite_use_address (data, use, cand);
6502 break;
6503
6504 case USE_COMPARE:
6505 rewrite_use_compare (data, use, cand);
6506 break;
6507
6508 default:
6509 gcc_unreachable ();
6510 }
6511
6512 update_stmt (use->stmt);
6513 }
6514
6515 /* Rewrite the uses using the selected induction variables. */
6516
6517 static void
6518 rewrite_uses (struct ivopts_data *data)
6519 {
6520 unsigned i;
6521 struct iv_cand *cand;
6522 struct iv_use *use;
6523
6524 for (i = 0; i < n_iv_uses (data); i++)
6525 {
6526 use = iv_use (data, i);
6527 cand = use->selected;
6528 gcc_assert (cand);
6529
6530 rewrite_use (data, use, cand);
6531 }
6532 }
6533
6534 /* Removes the ivs that are not used after rewriting. */
6535
6536 static void
6537 remove_unused_ivs (struct ivopts_data *data)
6538 {
6539 unsigned j;
6540 bitmap_iterator bi;
6541 bitmap toremove = BITMAP_ALLOC (NULL);
6542
6543 /* Figure out an order in which to release SSA DEFs so that we don't
6544 release something that we'd have to propagate into a debug stmt
6545 afterwards. */
6546 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
6547 {
6548 struct version_info *info;
6549
6550 info = ver_info (data, j);
6551 if (info->iv
6552 && !integer_zerop (info->iv->step)
6553 && !info->inv_id
6554 && !info->iv->have_use_for
6555 && !info->preserve_biv)
6556 {
6557 bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
6558
6559 tree def = info->iv->ssa_name;
6560
6561 if (MAY_HAVE_DEBUG_STMTS && SSA_NAME_DEF_STMT (def))
6562 {
6563 imm_use_iterator imm_iter;
6564 use_operand_p use_p;
6565 gimple stmt;
6566 int count = 0;
6567
6568 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
6569 {
6570 if (!gimple_debug_bind_p (stmt))
6571 continue;
6572
6573 /* We just want to determine whether to do nothing
6574 (count == 0), to substitute the computed
6575 expression into a single use of the SSA DEF by
6576 itself (count == 1), or to use a debug temp
6577 because the SSA DEF is used multiple times or as
6578 part of a larger expression (count > 1). */
6579 count++;
6580 if (gimple_debug_bind_get_value (stmt) != def)
6581 count++;
6582
6583 if (count > 1)
6584 BREAK_FROM_IMM_USE_STMT (imm_iter);
6585 }
6586
6587 if (!count)
6588 continue;
6589
6590 struct iv_use dummy_use;
6591 struct iv_cand *best_cand = NULL, *cand;
6592 unsigned i, best_pref = 0, cand_pref;
6593
6594 memset (&dummy_use, 0, sizeof (dummy_use));
6595 dummy_use.iv = info->iv;
6596 for (i = 0; i < n_iv_uses (data) && i < 64; i++)
6597 {
6598 cand = iv_use (data, i)->selected;
6599 if (cand == best_cand)
6600 continue;
6601 cand_pref = operand_equal_p (cand->iv->step,
6602 info->iv->step, 0)
6603 ? 4 : 0;
6604 cand_pref
6605 += TYPE_MODE (TREE_TYPE (cand->iv->base))
6606 == TYPE_MODE (TREE_TYPE (info->iv->base))
6607 ? 2 : 0;
6608 cand_pref
6609 += TREE_CODE (cand->iv->base) == INTEGER_CST
6610 ? 1 : 0;
6611 if (best_cand == NULL || best_pref < cand_pref)
6612 {
6613 best_cand = cand;
6614 best_pref = cand_pref;
6615 }
6616 }
6617
6618 if (!best_cand)
6619 continue;
6620
6621 tree comp = get_computation_at (data->current_loop,
6622 &dummy_use, best_cand,
6623 SSA_NAME_DEF_STMT (def));
6624 if (!comp)
6625 continue;
6626
6627 if (count > 1)
6628 {
6629 tree vexpr = make_node (DEBUG_EXPR_DECL);
6630 DECL_ARTIFICIAL (vexpr) = 1;
6631 TREE_TYPE (vexpr) = TREE_TYPE (comp);
6632 if (SSA_NAME_VAR (def))
6633 DECL_MODE (vexpr) = DECL_MODE (SSA_NAME_VAR (def));
6634 else
6635 DECL_MODE (vexpr) = TYPE_MODE (TREE_TYPE (vexpr));
6636 gimple def_temp = gimple_build_debug_bind (vexpr, comp, NULL);
6637 gimple_stmt_iterator gsi;
6638
6639 if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
6640 gsi = gsi_after_labels (gimple_bb
6641 (SSA_NAME_DEF_STMT (def)));
6642 else
6643 gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
6644
6645 gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
6646 comp = vexpr;
6647 }
6648
6649 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
6650 {
6651 if (!gimple_debug_bind_p (stmt))
6652 continue;
6653
6654 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
6655 SET_USE (use_p, comp);
6656
6657 update_stmt (stmt);
6658 }
6659 }
6660 }
6661 }
6662
6663 release_defs_bitset (toremove);
6664
6665 BITMAP_FREE (toremove);
6666 }
6667
6668 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
6669 for pointer_map_traverse. */
6670
6671 static bool
6672 free_tree_niter_desc (const void *key ATTRIBUTE_UNUSED, void **value,
6673 void *data ATTRIBUTE_UNUSED)
6674 {
6675 struct tree_niter_desc *const niter = (struct tree_niter_desc *) *value;
6676
6677 free (niter);
6678 return true;
6679 }
6680
6681 /* Frees data allocated by the optimization of a single loop. */
6682
6683 static void
6684 free_loop_data (struct ivopts_data *data)
6685 {
6686 unsigned i, j;
6687 bitmap_iterator bi;
6688 tree obj;
6689
6690 if (data->niters)
6691 {
6692 pointer_map_traverse (data->niters, free_tree_niter_desc, NULL);
6693 pointer_map_destroy (data->niters);
6694 data->niters = NULL;
6695 }
6696
6697 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
6698 {
6699 struct version_info *info;
6700
6701 info = ver_info (data, i);
6702 free (info->iv);
6703 info->iv = NULL;
6704 info->has_nonlin_use = false;
6705 info->preserve_biv = false;
6706 info->inv_id = 0;
6707 }
6708 bitmap_clear (data->relevant);
6709 bitmap_clear (data->important_candidates);
6710
6711 for (i = 0; i < n_iv_uses (data); i++)
6712 {
6713 struct iv_use *use = iv_use (data, i);
6714
6715 free (use->iv);
6716 BITMAP_FREE (use->related_cands);
6717 for (j = 0; j < use->n_map_members; j++)
6718 if (use->cost_map[j].depends_on)
6719 BITMAP_FREE (use->cost_map[j].depends_on);
6720 free (use->cost_map);
6721 free (use);
6722 }
6723 data->iv_uses.truncate (0);
6724
6725 for (i = 0; i < n_iv_cands (data); i++)
6726 {
6727 struct iv_cand *cand = iv_cand (data, i);
6728
6729 free (cand->iv);
6730 if (cand->depends_on)
6731 BITMAP_FREE (cand->depends_on);
6732 free (cand);
6733 }
6734 data->iv_candidates.truncate (0);
6735
6736 if (data->version_info_size < num_ssa_names)
6737 {
6738 data->version_info_size = 2 * num_ssa_names;
6739 free (data->version_info);
6740 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
6741 }
6742
6743 data->max_inv_id = 0;
6744
6745 FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
6746 SET_DECL_RTL (obj, NULL_RTX);
6747
6748 decl_rtl_to_reset.truncate (0);
6749
6750 data->inv_expr_tab.empty ();
6751 data->inv_expr_id = 0;
6752 }
6753
6754 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
6755 loop tree. */
6756
6757 static void
6758 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
6759 {
6760 free_loop_data (data);
6761 free (data->version_info);
6762 BITMAP_FREE (data->relevant);
6763 BITMAP_FREE (data->important_candidates);
6764
6765 decl_rtl_to_reset.release ();
6766 data->iv_uses.release ();
6767 data->iv_candidates.release ();
6768 data->inv_expr_tab.dispose ();
6769 }
6770
6771 /* Returns true if the loop body BODY includes any function calls. */
6772
6773 static bool
6774 loop_body_includes_call (basic_block *body, unsigned num_nodes)
6775 {
6776 gimple_stmt_iterator gsi;
6777 unsigned i;
6778
6779 for (i = 0; i < num_nodes; i++)
6780 for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
6781 {
6782 gimple stmt = gsi_stmt (gsi);
6783 if (is_gimple_call (stmt)
6784 && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
6785 return true;
6786 }
6787 return false;
6788 }
6789
6790 /* Optimizes the LOOP. Returns true if anything changed. */
6791
6792 static bool
6793 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
6794 {
6795 bool changed = false;
6796 struct iv_ca *iv_ca;
6797 edge exit = single_dom_exit (loop);
6798 basic_block *body;
6799
6800 gcc_assert (!data->niters);
6801 data->current_loop = loop;
6802 data->speed = optimize_loop_for_speed_p (loop);
6803
6804 if (dump_file && (dump_flags & TDF_DETAILS))
6805 {
6806 fprintf (dump_file, "Processing loop %d\n", loop->num);
6807
6808 if (exit)
6809 {
6810 fprintf (dump_file, " single exit %d -> %d, exit condition ",
6811 exit->src->index, exit->dest->index);
6812 print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
6813 fprintf (dump_file, "\n");
6814 }
6815
6816 fprintf (dump_file, "\n");
6817 }
6818
6819 body = get_loop_body (loop);
6820 data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
6821 renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
6822 free (body);
6823
6824 data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
6825
6826 /* For each ssa name determines whether it behaves as an induction variable
6827 in some loop. */
6828 if (!find_induction_variables (data))
6829 goto finish;
6830
6831 /* Finds interesting uses (item 1). */
6832 find_interesting_uses (data);
6833 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
6834 goto finish;
6835
6836 /* Finds candidates for the induction variables (item 2). */
6837 find_iv_candidates (data);
6838
6839 /* Calculates the costs (item 3, part 1). */
6840 determine_iv_costs (data);
6841 determine_use_iv_costs (data);
6842 determine_set_costs (data);
6843
6844 /* Find the optimal set of induction variables (item 3, part 2). */
6845 iv_ca = find_optimal_iv_set (data);
6846 if (!iv_ca)
6847 goto finish;
6848 changed = true;
6849
6850 /* Create the new induction variables (item 4, part 1). */
6851 create_new_ivs (data, iv_ca);
6852 iv_ca_free (&iv_ca);
6853
6854 /* Rewrite the uses (item 4, part 2). */
6855 rewrite_uses (data);
6856
6857 /* Remove the ivs that are unused after rewriting. */
6858 remove_unused_ivs (data);
6859
6860 /* We have changed the structure of induction variables; it might happen
6861 that definitions in the scev database refer to some of them that were
6862 eliminated. */
6863 scev_reset ();
6864
6865 finish:
6866 free_loop_data (data);
6867
6868 return changed;
6869 }
6870
6871 /* Main entry point. Optimizes induction variables in loops. */
6872
6873 void
6874 tree_ssa_iv_optimize (void)
6875 {
6876 struct loop *loop;
6877 struct ivopts_data data;
6878
6879 tree_ssa_iv_optimize_init (&data);
6880
6881 /* Optimize the loops starting with the innermost ones. */
6882 FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
6883 {
6884 if (dump_file && (dump_flags & TDF_DETAILS))
6885 flow_loop_dump (loop, dump_file, NULL, 1);
6886
6887 tree_ssa_iv_optimize_loop (&data, loop);
6888 }
6889
6890 tree_ssa_iv_optimize_finalize (&data);
6891 }