1 /* Induction variable optimizations.
2 Copyright (C) 2003-2016 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 /* This pass tries to find the optimal set of induction variables for the loop.
21 It optimizes just the basic linear induction variables (although adding
22 support for other types should not be too hard). It includes the
23 optimizations commonly known as strength reduction, induction variable
24 coalescing and induction variable elimination. It does it in the
27 1) The interesting uses of induction variables are found. This includes
29 -- uses of induction variables in non-linear expressions
30 -- addresses of arrays
31 -- comparisons of induction variables
33 2) Candidates for the induction variables are found. This includes
35 -- old induction variables
36 -- the variables defined by expressions derived from the "interesting
39 3) The optimal (w.r. to a cost function) set of variables is chosen. The
40 cost function assigns a cost to sets of induction variables and consists
43 -- The use costs. Each of the interesting uses chooses the best induction
44 variable in the set and adds its cost to the sum. The cost reflects
45 the time spent on modifying the induction variables value to be usable
46 for the given purpose (adding base and offset for arrays, etc.).
47 -- The variable costs. Each of the variables has a cost assigned that
48 reflects the costs associated with incrementing the value of the
49 variable. The original variables are somewhat preferred.
50 -- The set cost. Depending on the size of the set, extra cost may be
51 added to reflect register pressure.
53 All the costs are defined in a machine-specific way, using the target
54 hooks and machine descriptions to determine them.
56 4) The trees are transformed to use the new variables, the dead code is
59 All of this is done loop by loop. Doing it globally is theoretically
60 possible, it might give a better performance and it might enable us
61 to decide costs more precisely, but getting all the interactions right
62 would be complicated. */
66 #include "coretypes.h"
72 #include "tree-pass.h"
76 #include "insn-config.h"
80 #include "gimple-pretty-print.h"
82 #include "fold-const.h"
83 #include "stor-layout.h"
86 #include "gimple-iterator.h"
87 #include "gimplify-me.h"
89 #include "tree-ssa-loop-ivopts.h"
90 #include "tree-ssa-loop-manip.h"
91 #include "tree-ssa-loop-niter.h"
92 #include "tree-ssa-loop.h"
98 #include "tree-scalar-evolution.h"
100 #include "tree-affine.h"
101 #include "tree-ssa-propagate.h"
102 #include "tree-ssa-address.h"
103 #include "builtins.h"
104 #include "tree-vectorizer.h"
106 /* FIXME: Expressions are expanded to RTL in this pass to determine the
107 cost of different addressing modes. This should be moved to a TBD
108 interface between the GIMPLE and RTL worlds. */
110 /* The infinite cost. */
111 #define INFTY 10000000
113 #define AVG_LOOP_NITER(LOOP) 5
115 /* Returns the expected number of loop iterations for LOOP.
116 The average trip count is computed from profile data if it
119 static inline HOST_WIDE_INT
120 avg_loop_niter (struct loop
*loop
)
122 HOST_WIDE_INT niter
= estimated_stmt_executions_int (loop
);
124 return AVG_LOOP_NITER (loop
);
129 /* Representation of the induction variable. */
132 tree base
; /* Initial value of the iv. */
133 tree base_object
; /* A memory object to that the induction variable points. */
134 tree step
; /* Step of the iv (constant only). */
135 tree ssa_name
; /* The ssa name with the value. */
136 unsigned use_id
; /* The identifier in the use if it is the case. */
137 bool biv_p
; /* Is it a biv? */
138 bool have_use_for
; /* Do we already have a use for it? */
139 bool no_overflow
; /* True if the iv doesn't overflow. */
140 bool have_address_use
;/* For biv, indicate if it's used in any address
144 /* Per-ssa version information (induction variable descriptions, etc.). */
147 tree name
; /* The ssa name. */
148 struct iv
*iv
; /* Induction variable description. */
149 bool has_nonlin_use
; /* For a loop-level invariant, whether it is used in
150 an expression that is not an induction variable. */
151 bool preserve_biv
; /* For the original biv, whether to preserve it. */
152 unsigned inv_id
; /* Id of an invariant. */
158 USE_NONLINEAR_EXPR
, /* Use in a nonlinear expression. */
159 USE_ADDRESS
, /* Use in an address. */
160 USE_COMPARE
/* Use is a compare. */
163 /* Cost of a computation. */
166 int cost
; /* The runtime cost. */
167 unsigned complexity
; /* The estimate of the complexity of the code for
168 the computation (in no concrete units --
169 complexity field should be larger for more
170 complex expressions and addressing modes). */
173 static const comp_cost no_cost
= {0, 0};
174 static const comp_cost infinite_cost
= {INFTY
, INFTY
};
176 /* The candidate - cost pair. */
179 struct iv_cand
*cand
; /* The candidate. */
180 comp_cost cost
; /* The cost. */
181 bitmap depends_on
; /* The list of invariants that have to be
183 tree value
; /* For final value elimination, the expression for
184 the final value of the iv. For iv elimination,
185 the new bound to compare with. */
186 enum tree_code comp
; /* For iv elimination, the comparison. */
187 int inv_expr_id
; /* Loop invariant expression id. */
193 unsigned id
; /* The id of the use. */
194 unsigned sub_id
; /* The id of the sub use. */
195 enum use_type type
; /* Type of the use. */
196 struct iv
*iv
; /* The induction variable it is based on. */
197 gimple
*stmt
; /* Statement in that it occurs. */
198 tree
*op_p
; /* The place where it occurs. */
199 bitmap related_cands
; /* The set of "related" iv candidates, plus the common
202 unsigned n_map_members
; /* Number of candidates in the cost_map list. */
203 struct cost_pair
*cost_map
;
204 /* The costs wrto the iv candidates. */
206 struct iv_cand
*selected
;
207 /* The selected candidate. */
209 struct iv_use
*next
; /* The next sub use. */
210 tree addr_base
; /* Base address with const offset stripped. */
211 unsigned HOST_WIDE_INT addr_offset
;
212 /* Const offset stripped from base address. */
215 /* The position where the iv is computed. */
218 IP_NORMAL
, /* At the end, just before the exit condition. */
219 IP_END
, /* At the end of the latch block. */
220 IP_BEFORE_USE
, /* Immediately before a specific use. */
221 IP_AFTER_USE
, /* Immediately after a specific use. */
222 IP_ORIGINAL
/* The original biv. */
225 /* The induction variable candidate. */
228 unsigned id
; /* The number of the candidate. */
229 bool important
; /* Whether this is an "important" candidate, i.e. such
230 that it should be considered by all uses. */
231 ENUM_BITFIELD(iv_position
) pos
: 8; /* Where it is computed. */
232 gimple
*incremented_at
;/* For original biv, the statement where it is
234 tree var_before
; /* The variable used for it before increment. */
235 tree var_after
; /* The variable used for it after increment. */
236 struct iv
*iv
; /* The value of the candidate. NULL for
237 "pseudocandidate" used to indicate the possibility
238 to replace the final value of an iv by direct
239 computation of the value. */
240 unsigned cost
; /* Cost of the candidate. */
241 unsigned cost_step
; /* Cost of the candidate's increment operation. */
242 struct iv_use
*ainc_use
; /* For IP_{BEFORE,AFTER}_USE candidates, the place
243 where it is incremented. */
244 bitmap depends_on
; /* The list of invariants that are used in step of the
246 struct iv
*orig_iv
; /* The original iv if this cand is added from biv with
250 /* Hashtable entry for common candidate derived from iv uses. */
251 struct iv_common_cand
255 /* IV uses from which this common candidate is derived. */
256 auto_vec
<iv_use
*> uses
;
260 /* Hashtable helpers. */
262 struct iv_common_cand_hasher
: delete_ptr_hash
<iv_common_cand
>
264 static inline hashval_t
hash (const iv_common_cand
*);
265 static inline bool equal (const iv_common_cand
*, const iv_common_cand
*);
268 /* Hash function for possible common candidates. */
271 iv_common_cand_hasher::hash (const iv_common_cand
*ccand
)
276 /* Hash table equality function for common candidates. */
279 iv_common_cand_hasher::equal (const iv_common_cand
*ccand1
,
280 const iv_common_cand
*ccand2
)
282 return (ccand1
->hash
== ccand2
->hash
283 && operand_equal_p (ccand1
->base
, ccand2
->base
, 0)
284 && operand_equal_p (ccand1
->step
, ccand2
->step
, 0)
285 && (TYPE_PRECISION (TREE_TYPE (ccand1
->base
))
286 == TYPE_PRECISION (TREE_TYPE (ccand2
->base
))));
289 /* Loop invariant expression hashtable entry. */
290 struct iv_inv_expr_ent
297 /* Hashtable helpers. */
299 struct iv_inv_expr_hasher
: free_ptr_hash
<iv_inv_expr_ent
>
301 static inline hashval_t
hash (const iv_inv_expr_ent
*);
302 static inline bool equal (const iv_inv_expr_ent
*, const iv_inv_expr_ent
*);
305 /* Hash function for loop invariant expressions. */
308 iv_inv_expr_hasher::hash (const iv_inv_expr_ent
*expr
)
313 /* Hash table equality function for expressions. */
316 iv_inv_expr_hasher::equal (const iv_inv_expr_ent
*expr1
,
317 const iv_inv_expr_ent
*expr2
)
319 return expr1
->hash
== expr2
->hash
320 && operand_equal_p (expr1
->expr
, expr2
->expr
, 0);
325 /* The currently optimized loop. */
326 struct loop
*current_loop
;
327 source_location loop_loc
;
329 /* Numbers of iterations for all exits of the current loop. */
330 hash_map
<edge
, tree_niter_desc
*> *niters
;
332 /* Number of registers used in it. */
335 /* The size of version_info array allocated. */
336 unsigned version_info_size
;
338 /* The array of information for the ssa names. */
339 struct version_info
*version_info
;
341 /* The hashtable of loop invariant expressions created
343 hash_table
<iv_inv_expr_hasher
> *inv_expr_tab
;
345 /* Loop invariant expression id. */
348 /* The bitmap of indices in version_info whose value was changed. */
351 /* The uses of induction variables. */
352 vec
<iv_use
*> iv_uses
;
354 /* The candidates. */
355 vec
<iv_cand
*> iv_candidates
;
357 /* A bitmap of important candidates. */
358 bitmap important_candidates
;
360 /* Cache used by tree_to_aff_combination_expand. */
361 hash_map
<tree
, name_expansion
*> *name_expansion_cache
;
363 /* The hashtable of common candidates derived from iv uses. */
364 hash_table
<iv_common_cand_hasher
> *iv_common_cand_tab
;
366 /* The common candidates. */
367 vec
<iv_common_cand
*> iv_common_cands
;
369 /* The maximum invariant id. */
372 /* Number of no_overflow BIVs which are not used in memory address. */
373 unsigned bivs_not_used_in_addr
;
375 /* Obstack for iv structure. */
376 struct obstack iv_obstack
;
378 /* Whether to consider just related and important candidates when replacing a
380 bool consider_all_candidates
;
382 /* Are we optimizing for speed? */
385 /* Whether the loop body includes any function calls. */
386 bool body_includes_call
;
388 /* Whether the loop body can only be exited via single exit. */
389 bool loop_single_exit_p
;
392 /* An assignment of iv candidates to uses. */
396 /* The number of uses covered by the assignment. */
399 /* Number of uses that cannot be expressed by the candidates in the set. */
402 /* Candidate assigned to a use, together with the related costs. */
403 struct cost_pair
**cand_for_use
;
405 /* Number of times each candidate is used. */
406 unsigned *n_cand_uses
;
408 /* The candidates used. */
411 /* The number of candidates in the set. */
414 /* Total number of registers needed. */
417 /* Total cost of expressing uses. */
418 comp_cost cand_use_cost
;
420 /* Total cost of candidates. */
423 /* Number of times each invariant is used. */
424 unsigned *n_invariant_uses
;
426 /* The array holding the number of uses of each loop
427 invariant expressions created by ivopt. */
428 unsigned *used_inv_expr
;
430 /* The number of created loop invariants. */
431 unsigned num_used_inv_expr
;
433 /* Total cost of the assignment. */
437 /* Difference of two iv candidate assignments. */
444 /* An old assignment (for rollback purposes). */
445 struct cost_pair
*old_cp
;
447 /* A new assignment. */
448 struct cost_pair
*new_cp
;
450 /* Next change in the list. */
451 struct iv_ca_delta
*next_change
;
454 /* Bound on number of candidates below that all candidates are considered. */
456 #define CONSIDER_ALL_CANDIDATES_BOUND \
457 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
459 /* If there are more iv occurrences, we just give up (it is quite unlikely that
460 optimizing such a loop would help, and it would take ages). */
462 #define MAX_CONSIDERED_USES \
463 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
465 /* If there are at most this number of ivs in the set, try removing unnecessary
466 ivs from the set always. */
468 #define ALWAYS_PRUNE_CAND_SET_BOUND \
469 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
471 /* The list of trees for that the decl_rtl field must be reset is stored
474 static vec
<tree
> decl_rtl_to_reset
;
476 static comp_cost
force_expr_to_var_cost (tree
, bool);
478 /* Number of uses recorded in DATA. */
480 static inline unsigned
481 n_iv_uses (struct ivopts_data
*data
)
483 return data
->iv_uses
.length ();
486 /* Ith use recorded in DATA. */
488 static inline struct iv_use
*
489 iv_use (struct ivopts_data
*data
, unsigned i
)
491 return data
->iv_uses
[i
];
494 /* Number of candidates recorded in DATA. */
496 static inline unsigned
497 n_iv_cands (struct ivopts_data
*data
)
499 return data
->iv_candidates
.length ();
502 /* Ith candidate recorded in DATA. */
504 static inline struct iv_cand
*
505 iv_cand (struct ivopts_data
*data
, unsigned i
)
507 return data
->iv_candidates
[i
];
510 /* The single loop exit if it dominates the latch, NULL otherwise. */
513 single_dom_exit (struct loop
*loop
)
515 edge exit
= single_exit (loop
);
520 if (!just_once_each_iteration_p (loop
, exit
->src
))
526 /* Dumps information about the induction variable IV to FILE. */
529 dump_iv (FILE *file
, struct iv
*iv
, bool dump_name
)
531 if (iv
->ssa_name
&& dump_name
)
533 fprintf (file
, "ssa name ");
534 print_generic_expr (file
, iv
->ssa_name
, TDF_SLIM
);
535 fprintf (file
, "\n");
538 fprintf (file
, " type ");
539 print_generic_expr (file
, TREE_TYPE (iv
->base
), TDF_SLIM
);
540 fprintf (file
, "\n");
544 fprintf (file
, " base ");
545 print_generic_expr (file
, iv
->base
, TDF_SLIM
);
546 fprintf (file
, "\n");
548 fprintf (file
, " step ");
549 print_generic_expr (file
, iv
->step
, TDF_SLIM
);
550 fprintf (file
, "\n");
554 fprintf (file
, " invariant ");
555 print_generic_expr (file
, iv
->base
, TDF_SLIM
);
556 fprintf (file
, "\n");
561 fprintf (file
, " base object ");
562 print_generic_expr (file
, iv
->base_object
, TDF_SLIM
);
563 fprintf (file
, "\n");
567 fprintf (file
, " is a biv\n");
570 fprintf (file
, " iv doesn't overflow wrto loop niter\n");
573 /* Dumps information about the USE to FILE. */
576 dump_use (FILE *file
, struct iv_use
*use
)
578 fprintf (file
, "use %d", use
->id
);
580 fprintf (file
, ".%d", use
->sub_id
);
582 fprintf (file
, "\n");
586 case USE_NONLINEAR_EXPR
:
587 fprintf (file
, " generic\n");
591 fprintf (file
, " address\n");
595 fprintf (file
, " compare\n");
602 fprintf (file
, " in statement ");
603 print_gimple_stmt (file
, use
->stmt
, 0, 0);
604 fprintf (file
, "\n");
606 fprintf (file
, " at position ");
608 print_generic_expr (file
, *use
->op_p
, TDF_SLIM
);
609 fprintf (file
, "\n");
611 dump_iv (file
, use
->iv
, false);
613 if (use
->related_cands
)
615 fprintf (file
, " related candidates ");
616 dump_bitmap (file
, use
->related_cands
);
620 /* Dumps information about the uses to FILE. */
623 dump_uses (FILE *file
, struct ivopts_data
*data
)
628 for (i
= 0; i
< n_iv_uses (data
); i
++)
630 use
= iv_use (data
, i
);
633 dump_use (file
, use
);
637 fprintf (file
, "\n");
641 /* Dumps information about induction variable candidate CAND to FILE. */
644 dump_cand (FILE *file
, struct iv_cand
*cand
)
646 struct iv
*iv
= cand
->iv
;
648 fprintf (file
, "candidate %d%s\n",
649 cand
->id
, cand
->important
? " (important)" : "");
651 if (cand
->depends_on
)
653 fprintf (file
, " depends on ");
654 dump_bitmap (file
, cand
->depends_on
);
659 fprintf (file
, " final value replacement\n");
663 if (cand
->var_before
)
665 fprintf (file
, " var_before ");
666 print_generic_expr (file
, cand
->var_before
, TDF_SLIM
);
667 fprintf (file
, "\n");
671 fprintf (file
, " var_after ");
672 print_generic_expr (file
, cand
->var_after
, TDF_SLIM
);
673 fprintf (file
, "\n");
679 fprintf (file
, " incremented before exit test\n");
683 fprintf (file
, " incremented before use %d\n", cand
->ainc_use
->id
);
687 fprintf (file
, " incremented after use %d\n", cand
->ainc_use
->id
);
691 fprintf (file
, " incremented at end\n");
695 fprintf (file
, " original biv\n");
699 dump_iv (file
, iv
, false);
702 /* Returns the info for ssa version VER. */
704 static inline struct version_info
*
705 ver_info (struct ivopts_data
*data
, unsigned ver
)
707 return data
->version_info
+ ver
;
710 /* Returns the info for ssa name NAME. */
712 static inline struct version_info
*
713 name_info (struct ivopts_data
*data
, tree name
)
715 return ver_info (data
, SSA_NAME_VERSION (name
));
718 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
722 stmt_after_ip_normal_pos (struct loop
*loop
, gimple
*stmt
)
724 basic_block bb
= ip_normal_pos (loop
), sbb
= gimple_bb (stmt
);
728 if (sbb
== loop
->latch
)
734 return stmt
== last_stmt (bb
);
737 /* Returns true if STMT if after the place where the original induction
738 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
739 if the positions are identical. */
742 stmt_after_inc_pos (struct iv_cand
*cand
, gimple
*stmt
, bool true_if_equal
)
744 basic_block cand_bb
= gimple_bb (cand
->incremented_at
);
745 basic_block stmt_bb
= gimple_bb (stmt
);
747 if (!dominated_by_p (CDI_DOMINATORS
, stmt_bb
, cand_bb
))
750 if (stmt_bb
!= cand_bb
)
754 && gimple_uid (stmt
) == gimple_uid (cand
->incremented_at
))
756 return gimple_uid (stmt
) > gimple_uid (cand
->incremented_at
);
759 /* Returns true if STMT if after the place where the induction variable
760 CAND is incremented in LOOP. */
763 stmt_after_increment (struct loop
*loop
, struct iv_cand
*cand
, gimple
*stmt
)
771 return stmt_after_ip_normal_pos (loop
, stmt
);
775 return stmt_after_inc_pos (cand
, stmt
, false);
778 return stmt_after_inc_pos (cand
, stmt
, true);
785 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
788 abnormal_ssa_name_p (tree exp
)
793 if (TREE_CODE (exp
) != SSA_NAME
)
796 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp
) != 0;
799 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
800 abnormal phi node. Callback for for_each_index. */
803 idx_contains_abnormal_ssa_name_p (tree base
, tree
*index
,
804 void *data ATTRIBUTE_UNUSED
)
806 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
808 if (abnormal_ssa_name_p (TREE_OPERAND (base
, 2)))
810 if (abnormal_ssa_name_p (TREE_OPERAND (base
, 3)))
814 return !abnormal_ssa_name_p (*index
);
817 /* Returns true if EXPR contains a ssa name that occurs in an
818 abnormal phi node. */
821 contains_abnormal_ssa_name_p (tree expr
)
824 enum tree_code_class codeclass
;
829 code
= TREE_CODE (expr
);
830 codeclass
= TREE_CODE_CLASS (code
);
832 if (code
== SSA_NAME
)
833 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr
) != 0;
835 if (code
== INTEGER_CST
836 || is_gimple_min_invariant (expr
))
839 if (code
== ADDR_EXPR
)
840 return !for_each_index (&TREE_OPERAND (expr
, 0),
841 idx_contains_abnormal_ssa_name_p
,
844 if (code
== COND_EXPR
)
845 return contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 0))
846 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 1))
847 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 2));
853 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 1)))
858 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 0)))
870 /* Returns the structure describing number of iterations determined from
871 EXIT of DATA->current_loop, or NULL if something goes wrong. */
873 static struct tree_niter_desc
*
874 niter_for_exit (struct ivopts_data
*data
, edge exit
)
876 struct tree_niter_desc
*desc
;
877 tree_niter_desc
**slot
;
881 data
->niters
= new hash_map
<edge
, tree_niter_desc
*>;
885 slot
= data
->niters
->get (exit
);
889 /* Try to determine number of iterations. We cannot safely work with ssa
890 names that appear in phi nodes on abnormal edges, so that we do not
891 create overlapping life ranges for them (PR 27283). */
892 desc
= XNEW (struct tree_niter_desc
);
893 if (!number_of_iterations_exit (data
->current_loop
,
895 || contains_abnormal_ssa_name_p (desc
->niter
))
900 data
->niters
->put (exit
, desc
);
908 /* Returns the structure describing number of iterations determined from
909 single dominating exit of DATA->current_loop, or NULL if something
912 static struct tree_niter_desc
*
913 niter_for_single_dom_exit (struct ivopts_data
*data
)
915 edge exit
= single_dom_exit (data
->current_loop
);
920 return niter_for_exit (data
, exit
);
923 /* Initializes data structures used by the iv optimization pass, stored
927 tree_ssa_iv_optimize_init (struct ivopts_data
*data
)
929 data
->version_info_size
= 2 * num_ssa_names
;
930 data
->version_info
= XCNEWVEC (struct version_info
, data
->version_info_size
);
931 data
->relevant
= BITMAP_ALLOC (NULL
);
932 data
->important_candidates
= BITMAP_ALLOC (NULL
);
933 data
->max_inv_id
= 0;
935 data
->iv_uses
.create (20);
936 data
->iv_candidates
.create (20);
937 data
->inv_expr_tab
= new hash_table
<iv_inv_expr_hasher
> (10);
938 data
->inv_expr_id
= 0;
939 data
->name_expansion_cache
= NULL
;
940 data
->iv_common_cand_tab
= new hash_table
<iv_common_cand_hasher
> (10);
941 data
->iv_common_cands
.create (20);
942 decl_rtl_to_reset
.create (20);
943 gcc_obstack_init (&data
->iv_obstack
);
946 /* Returns a memory object to that EXPR points. In case we are able to
947 determine that it does not point to any such object, NULL is returned. */
950 determine_base_object (tree expr
)
952 enum tree_code code
= TREE_CODE (expr
);
955 /* If this is a pointer casted to any type, we need to determine
956 the base object for the pointer; so handle conversions before
957 throwing away non-pointer expressions. */
958 if (CONVERT_EXPR_P (expr
))
959 return determine_base_object (TREE_OPERAND (expr
, 0));
961 if (!POINTER_TYPE_P (TREE_TYPE (expr
)))
970 obj
= TREE_OPERAND (expr
, 0);
971 base
= get_base_address (obj
);
976 if (TREE_CODE (base
) == MEM_REF
)
977 return determine_base_object (TREE_OPERAND (base
, 0));
979 return fold_convert (ptr_type_node
,
980 build_fold_addr_expr (base
));
982 case POINTER_PLUS_EXPR
:
983 return determine_base_object (TREE_OPERAND (expr
, 0));
987 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
991 return fold_convert (ptr_type_node
, expr
);
995 /* Return true if address expression with non-DECL_P operand appears
999 contain_complex_addr_expr (tree expr
)
1004 switch (TREE_CODE (expr
))
1006 case POINTER_PLUS_EXPR
:
1009 res
|= contain_complex_addr_expr (TREE_OPERAND (expr
, 0));
1010 res
|= contain_complex_addr_expr (TREE_OPERAND (expr
, 1));
1014 return (!DECL_P (TREE_OPERAND (expr
, 0)));
1023 /* Allocates an induction variable with given initial value BASE and step STEP
1024 for loop LOOP. NO_OVERFLOW implies the iv doesn't overflow. */
1027 alloc_iv (struct ivopts_data
*data
, tree base
, tree step
,
1028 bool no_overflow
= false)
1031 struct iv
*iv
= (struct iv
*) obstack_alloc (&data
->iv_obstack
,
1032 sizeof (struct iv
));
1033 gcc_assert (step
!= NULL_TREE
);
1035 /* Lower address expression in base except ones with DECL_P as operand.
1037 1) More accurate cost can be computed for address expressions;
1038 2) Duplicate candidates won't be created for bases in different
1039 forms, like &a[0] and &a. */
1041 if ((TREE_CODE (expr
) == ADDR_EXPR
&& !DECL_P (TREE_OPERAND (expr
, 0)))
1042 || contain_complex_addr_expr (expr
))
1045 tree_to_aff_combination (expr
, TREE_TYPE (base
), &comb
);
1046 base
= fold_convert (TREE_TYPE (base
), aff_combination_to_tree (&comb
));
1050 iv
->base_object
= determine_base_object (base
);
1053 iv
->have_use_for
= false;
1055 iv
->ssa_name
= NULL_TREE
;
1056 iv
->no_overflow
= no_overflow
;
1057 iv
->have_address_use
= false;
1062 /* Sets STEP and BASE for induction variable IV. NO_OVERFLOW implies the IV
1063 doesn't overflow. */
1066 set_iv (struct ivopts_data
*data
, tree iv
, tree base
, tree step
,
1069 struct version_info
*info
= name_info (data
, iv
);
1071 gcc_assert (!info
->iv
);
1073 bitmap_set_bit (data
->relevant
, SSA_NAME_VERSION (iv
));
1074 info
->iv
= alloc_iv (data
, base
, step
, no_overflow
);
1075 info
->iv
->ssa_name
= iv
;
1078 /* Finds induction variable declaration for VAR. */
1081 get_iv (struct ivopts_data
*data
, tree var
)
1084 tree type
= TREE_TYPE (var
);
1086 if (!POINTER_TYPE_P (type
)
1087 && !INTEGRAL_TYPE_P (type
))
1090 if (!name_info (data
, var
)->iv
)
1092 bb
= gimple_bb (SSA_NAME_DEF_STMT (var
));
1095 || !flow_bb_inside_loop_p (data
->current_loop
, bb
))
1096 set_iv (data
, var
, var
, build_int_cst (type
, 0), true);
1099 return name_info (data
, var
)->iv
;
1102 /* Return the first non-invariant ssa var found in EXPR. */
1105 extract_single_var_from_expr (tree expr
)
1109 enum tree_code code
;
1111 if (!expr
|| is_gimple_min_invariant (expr
))
1114 code
= TREE_CODE (expr
);
1115 if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code
)))
1117 n
= TREE_OPERAND_LENGTH (expr
);
1118 for (i
= 0; i
< n
; i
++)
1120 tmp
= extract_single_var_from_expr (TREE_OPERAND (expr
, i
));
1126 return (TREE_CODE (expr
) == SSA_NAME
) ? expr
: NULL
;
1129 /* Finds basic ivs. */
1132 find_bivs (struct ivopts_data
*data
)
1136 tree step
, type
, base
, stop
;
1138 struct loop
*loop
= data
->current_loop
;
1141 for (psi
= gsi_start_phis (loop
->header
); !gsi_end_p (psi
); gsi_next (&psi
))
1145 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi
)))
1148 if (virtual_operand_p (PHI_RESULT (phi
)))
1151 if (!simple_iv (loop
, loop
, PHI_RESULT (phi
), &iv
, true))
1154 if (integer_zerop (iv
.step
))
1158 base
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_preheader_edge (loop
));
1159 /* Stop expanding iv base at the first ssa var referred by iv step.
1160 Ideally we should stop at any ssa var, because that's expensive
1161 and unusual to happen, we just do it on the first one.
1163 See PR64705 for the rationale. */
1164 stop
= extract_single_var_from_expr (step
);
1165 base
= expand_simple_operations (base
, stop
);
1166 if (contains_abnormal_ssa_name_p (base
)
1167 || contains_abnormal_ssa_name_p (step
))
1170 type
= TREE_TYPE (PHI_RESULT (phi
));
1171 base
= fold_convert (type
, base
);
1174 if (POINTER_TYPE_P (type
))
1175 step
= convert_to_ptrofftype (step
);
1177 step
= fold_convert (type
, step
);
1180 set_iv (data
, PHI_RESULT (phi
), base
, step
, iv
.no_overflow
);
1187 /* Marks basic ivs. */
1190 mark_bivs (struct ivopts_data
*data
)
1195 struct iv
*iv
, *incr_iv
;
1196 struct loop
*loop
= data
->current_loop
;
1197 basic_block incr_bb
;
1200 data
->bivs_not_used_in_addr
= 0;
1201 for (psi
= gsi_start_phis (loop
->header
); !gsi_end_p (psi
); gsi_next (&psi
))
1205 iv
= get_iv (data
, PHI_RESULT (phi
));
1209 var
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_latch_edge (loop
));
1210 def
= SSA_NAME_DEF_STMT (var
);
1211 /* Don't mark iv peeled from other one as biv. */
1213 && gimple_code (def
) == GIMPLE_PHI
1214 && gimple_bb (def
) == loop
->header
)
1217 incr_iv
= get_iv (data
, var
);
1221 /* If the increment is in the subloop, ignore it. */
1222 incr_bb
= gimple_bb (SSA_NAME_DEF_STMT (var
));
1223 if (incr_bb
->loop_father
!= data
->current_loop
1224 || (incr_bb
->flags
& BB_IRREDUCIBLE_LOOP
))
1228 incr_iv
->biv_p
= true;
1229 if (iv
->no_overflow
)
1230 data
->bivs_not_used_in_addr
++;
1231 if (incr_iv
->no_overflow
)
1232 data
->bivs_not_used_in_addr
++;
1236 /* Checks whether STMT defines a linear induction variable and stores its
1237 parameters to IV. */
1240 find_givs_in_stmt_scev (struct ivopts_data
*data
, gimple
*stmt
, affine_iv
*iv
)
1243 struct loop
*loop
= data
->current_loop
;
1245 iv
->base
= NULL_TREE
;
1246 iv
->step
= NULL_TREE
;
1248 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
1251 lhs
= gimple_assign_lhs (stmt
);
1252 if (TREE_CODE (lhs
) != SSA_NAME
)
1255 if (!simple_iv (loop
, loop_containing_stmt (stmt
), lhs
, iv
, true))
1258 /* Stop expanding iv base at the first ssa var referred by iv step.
1259 Ideally we should stop at any ssa var, because that's expensive
1260 and unusual to happen, we just do it on the first one.
1262 See PR64705 for the rationale. */
1263 stop
= extract_single_var_from_expr (iv
->step
);
1264 iv
->base
= expand_simple_operations (iv
->base
, stop
);
1265 if (contains_abnormal_ssa_name_p (iv
->base
)
1266 || contains_abnormal_ssa_name_p (iv
->step
))
1269 /* If STMT could throw, then do not consider STMT as defining a GIV.
1270 While this will suppress optimizations, we can not safely delete this
1271 GIV and associated statements, even if it appears it is not used. */
1272 if (stmt_could_throw_p (stmt
))
1278 /* Finds general ivs in statement STMT. */
1281 find_givs_in_stmt (struct ivopts_data
*data
, gimple
*stmt
)
1285 if (!find_givs_in_stmt_scev (data
, stmt
, &iv
))
1288 set_iv (data
, gimple_assign_lhs (stmt
), iv
.base
, iv
.step
, iv
.no_overflow
);
1291 /* Finds general ivs in basic block BB. */
1294 find_givs_in_bb (struct ivopts_data
*data
, basic_block bb
)
1296 gimple_stmt_iterator bsi
;
1298 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
1299 find_givs_in_stmt (data
, gsi_stmt (bsi
));
1302 /* Finds general ivs. */
1305 find_givs (struct ivopts_data
*data
)
1307 struct loop
*loop
= data
->current_loop
;
1308 basic_block
*body
= get_loop_body_in_dom_order (loop
);
1311 for (i
= 0; i
< loop
->num_nodes
; i
++)
1312 find_givs_in_bb (data
, body
[i
]);
1316 /* For each ssa name defined in LOOP determines whether it is an induction
1317 variable and if so, its initial value and step. */
1320 find_induction_variables (struct ivopts_data
*data
)
1325 if (!find_bivs (data
))
1331 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1333 struct tree_niter_desc
*niter
= niter_for_single_dom_exit (data
);
1337 fprintf (dump_file
, " number of iterations ");
1338 print_generic_expr (dump_file
, niter
->niter
, TDF_SLIM
);
1339 if (!integer_zerop (niter
->may_be_zero
))
1341 fprintf (dump_file
, "; zero if ");
1342 print_generic_expr (dump_file
, niter
->may_be_zero
, TDF_SLIM
);
1344 fprintf (dump_file
, "\n\n");
1347 fprintf (dump_file
, "Induction variables:\n\n");
1349 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
1351 if (ver_info (data
, i
)->iv
)
1352 dump_iv (dump_file
, ver_info (data
, i
)->iv
, true);
1359 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.
1360 For address type use, ADDR_BASE is the stripped IV base, ADDR_OFFSET
1361 is the const offset stripped from IV base. For uses of other types,
1362 ADDR_BASE and ADDR_OFFSET are zero by default. */
1364 static struct iv_use
*
1365 record_use (struct ivopts_data
*data
, tree
*use_p
, struct iv
*iv
,
1366 gimple
*stmt
, enum use_type use_type
, tree addr_base
= NULL
,
1367 unsigned HOST_WIDE_INT addr_offset
= 0)
1369 struct iv_use
*use
= XCNEW (struct iv_use
);
1371 use
->id
= n_iv_uses (data
);
1373 use
->type
= use_type
;
1377 use
->related_cands
= BITMAP_ALLOC (NULL
);
1379 use
->addr_base
= addr_base
;
1380 use
->addr_offset
= addr_offset
;
1382 data
->iv_uses
.safe_push (use
);
1387 /* Records a sub use of type USE_TYPE at *USE_P in STMT whose value is IV.
1388 The sub use is recorded under the one whose use id is ID_GROUP. */
1390 static struct iv_use
*
1391 record_sub_use (struct ivopts_data
*data
, tree
*use_p
,
1392 struct iv
*iv
, gimple
*stmt
, enum use_type use_type
,
1393 tree addr_base
, unsigned HOST_WIDE_INT addr_offset
,
1394 unsigned int id_group
)
1396 struct iv_use
*use
= XCNEW (struct iv_use
);
1397 struct iv_use
*group
= iv_use (data
, id_group
);
1399 use
->id
= group
->id
;
1401 use
->type
= use_type
;
1405 use
->related_cands
= NULL
;
1406 use
->addr_base
= addr_base
;
1407 use
->addr_offset
= addr_offset
;
1409 /* Sub use list is maintained in offset ascending order. */
1410 if (addr_offset
<= group
->addr_offset
)
1412 use
->related_cands
= group
->related_cands
;
1413 group
->related_cands
= NULL
;
1415 data
->iv_uses
[id_group
] = use
;
1423 group
= group
->next
;
1425 while (group
&& addr_offset
> group
->addr_offset
);
1426 use
->next
= pre
->next
;
1433 /* Checks whether OP is a loop-level invariant and if so, records it.
1434 NONLINEAR_USE is true if the invariant is used in a way we do not
1435 handle specially. */
1438 record_invariant (struct ivopts_data
*data
, tree op
, bool nonlinear_use
)
1441 struct version_info
*info
;
1443 if (TREE_CODE (op
) != SSA_NAME
1444 || virtual_operand_p (op
))
1447 bb
= gimple_bb (SSA_NAME_DEF_STMT (op
));
1449 && flow_bb_inside_loop_p (data
->current_loop
, bb
))
1452 info
= name_info (data
, op
);
1454 info
->has_nonlin_use
|= nonlinear_use
;
1456 info
->inv_id
= ++data
->max_inv_id
;
1457 bitmap_set_bit (data
->relevant
, SSA_NAME_VERSION (op
));
1460 /* Checks whether the use OP is interesting and if so, records it. */
1462 static struct iv_use
*
1463 find_interesting_uses_op (struct ivopts_data
*data
, tree op
)
1469 if (TREE_CODE (op
) != SSA_NAME
)
1472 iv
= get_iv (data
, op
);
1476 if (iv
->have_use_for
)
1478 use
= iv_use (data
, iv
->use_id
);
1480 gcc_assert (use
->type
== USE_NONLINEAR_EXPR
);
1484 if (integer_zerop (iv
->step
))
1486 record_invariant (data
, op
, true);
1489 iv
->have_use_for
= true;
1491 stmt
= SSA_NAME_DEF_STMT (op
);
1492 gcc_assert (gimple_code (stmt
) == GIMPLE_PHI
1493 || is_gimple_assign (stmt
));
1495 use
= record_use (data
, NULL
, iv
, stmt
, USE_NONLINEAR_EXPR
);
1496 iv
->use_id
= use
->id
;
1501 /* Given a condition in statement STMT, checks whether it is a compare
1502 of an induction variable and an invariant. If this is the case,
1503 CONTROL_VAR is set to location of the iv, BOUND to the location of
1504 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1505 induction variable descriptions, and true is returned. If this is not
1506 the case, CONTROL_VAR and BOUND are set to the arguments of the
1507 condition and false is returned. */
1510 extract_cond_operands (struct ivopts_data
*data
, gimple
*stmt
,
1511 tree
**control_var
, tree
**bound
,
1512 struct iv
**iv_var
, struct iv
**iv_bound
)
1514 /* The objects returned when COND has constant operands. */
1515 static struct iv const_iv
;
1517 tree
*op0
= &zero
, *op1
= &zero
;
1518 struct iv
*iv0
= &const_iv
, *iv1
= &const_iv
;
1521 if (gimple_code (stmt
) == GIMPLE_COND
)
1523 gcond
*cond_stmt
= as_a
<gcond
*> (stmt
);
1524 op0
= gimple_cond_lhs_ptr (cond_stmt
);
1525 op1
= gimple_cond_rhs_ptr (cond_stmt
);
1529 op0
= gimple_assign_rhs1_ptr (stmt
);
1530 op1
= gimple_assign_rhs2_ptr (stmt
);
1533 zero
= integer_zero_node
;
1534 const_iv
.step
= integer_zero_node
;
1536 if (TREE_CODE (*op0
) == SSA_NAME
)
1537 iv0
= get_iv (data
, *op0
);
1538 if (TREE_CODE (*op1
) == SSA_NAME
)
1539 iv1
= get_iv (data
, *op1
);
1541 /* Exactly one of the compared values must be an iv, and the other one must
1546 if (integer_zerop (iv0
->step
))
1548 /* Control variable may be on the other side. */
1549 std::swap (op0
, op1
);
1550 std::swap (iv0
, iv1
);
1552 ret
= !integer_zerop (iv0
->step
) && integer_zerop (iv1
->step
);
1567 /* Checks whether the condition in STMT is interesting and if so,
1571 find_interesting_uses_cond (struct ivopts_data
*data
, gimple
*stmt
)
1573 tree
*var_p
, *bound_p
;
1576 if (!extract_cond_operands (data
, stmt
, &var_p
, &bound_p
, &var_iv
, NULL
))
1578 find_interesting_uses_op (data
, *var_p
);
1579 find_interesting_uses_op (data
, *bound_p
);
1583 record_use (data
, NULL
, var_iv
, stmt
, USE_COMPARE
);
1586 /* Returns the outermost loop EXPR is obviously invariant in
1587 relative to the loop LOOP, i.e. if all its operands are defined
1588 outside of the returned loop. Returns NULL if EXPR is not
1589 even obviously invariant in LOOP. */
1592 outermost_invariant_loop_for_expr (struct loop
*loop
, tree expr
)
1597 if (is_gimple_min_invariant (expr
))
1598 return current_loops
->tree_root
;
1600 if (TREE_CODE (expr
) == SSA_NAME
)
1602 def_bb
= gimple_bb (SSA_NAME_DEF_STMT (expr
));
1605 if (flow_bb_inside_loop_p (loop
, def_bb
))
1607 return superloop_at_depth (loop
,
1608 loop_depth (def_bb
->loop_father
) + 1);
1611 return current_loops
->tree_root
;
1617 unsigned maxdepth
= 0;
1618 len
= TREE_OPERAND_LENGTH (expr
);
1619 for (i
= 0; i
< len
; i
++)
1621 struct loop
*ivloop
;
1622 if (!TREE_OPERAND (expr
, i
))
1625 ivloop
= outermost_invariant_loop_for_expr (loop
, TREE_OPERAND (expr
, i
));
1628 maxdepth
= MAX (maxdepth
, loop_depth (ivloop
));
1631 return superloop_at_depth (loop
, maxdepth
);
1634 /* Returns true if expression EXPR is obviously invariant in LOOP,
1635 i.e. if all its operands are defined outside of the LOOP. LOOP
1636 should not be the function body. */
1639 expr_invariant_in_loop_p (struct loop
*loop
, tree expr
)
1644 gcc_assert (loop_depth (loop
) > 0);
1646 if (is_gimple_min_invariant (expr
))
1649 if (TREE_CODE (expr
) == SSA_NAME
)
1651 def_bb
= gimple_bb (SSA_NAME_DEF_STMT (expr
));
1653 && flow_bb_inside_loop_p (loop
, def_bb
))
1662 len
= TREE_OPERAND_LENGTH (expr
);
1663 for (i
= 0; i
< len
; i
++)
1664 if (TREE_OPERAND (expr
, i
)
1665 && !expr_invariant_in_loop_p (loop
, TREE_OPERAND (expr
, i
)))
1671 /* Given expression EXPR which computes inductive values with respect
1672 to loop recorded in DATA, this function returns biv from which EXPR
1673 is derived by tracing definition chains of ssa variables in EXPR. */
1676 find_deriving_biv_for_expr (struct ivopts_data
*data
, tree expr
)
1681 enum tree_code code
;
1684 if (expr
== NULL_TREE
)
1687 if (is_gimple_min_invariant (expr
))
1690 code
= TREE_CODE (expr
);
1691 if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code
)))
1693 n
= TREE_OPERAND_LENGTH (expr
);
1694 for (i
= 0; i
< n
; i
++)
1696 iv
= find_deriving_biv_for_expr (data
, TREE_OPERAND (expr
, i
));
1702 /* Stop if it's not ssa name. */
1703 if (code
!= SSA_NAME
)
1706 iv
= get_iv (data
, expr
);
1707 if (!iv
|| integer_zerop (iv
->step
))
1712 stmt
= SSA_NAME_DEF_STMT (expr
);
1713 if (gphi
*phi
= dyn_cast
<gphi
*> (stmt
))
1716 use_operand_p use_p
;
1718 if (virtual_operand_p (gimple_phi_result (phi
)))
1721 FOR_EACH_PHI_ARG (use_p
, phi
, iter
, SSA_OP_USE
)
1723 tree use
= USE_FROM_PTR (use_p
);
1724 iv
= find_deriving_biv_for_expr (data
, use
);
1730 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
1733 e1
= gimple_assign_rhs1 (stmt
);
1734 code
= gimple_assign_rhs_code (stmt
);
1735 if (get_gimple_rhs_class (code
) == GIMPLE_SINGLE_RHS
)
1736 return find_deriving_biv_for_expr (data
, e1
);
1743 case POINTER_PLUS_EXPR
:
1744 /* Increments, decrements and multiplications by a constant
1746 e2
= gimple_assign_rhs2 (stmt
);
1747 iv
= find_deriving_biv_for_expr (data
, e2
);
1753 /* Casts are simple. */
1754 return find_deriving_biv_for_expr (data
, e1
);
1763 /* Record BIV, its predecessor and successor that they are used in
1764 address type uses. */
1767 record_biv_for_address_use (struct ivopts_data
*data
, struct iv
*biv
)
1770 tree type
, base_1
, base_2
;
1773 if (!biv
|| !biv
->biv_p
|| integer_zerop (biv
->step
)
1774 || biv
->have_address_use
|| !biv
->no_overflow
)
1777 type
= TREE_TYPE (biv
->base
);
1778 if (!INTEGRAL_TYPE_P (type
))
1781 biv
->have_address_use
= true;
1782 data
->bivs_not_used_in_addr
--;
1783 base_1
= fold_build2 (PLUS_EXPR
, type
, biv
->base
, biv
->step
);
1784 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
1786 struct iv
*iv
= ver_info (data
, i
)->iv
;
1788 if (!iv
|| !iv
->biv_p
|| integer_zerop (iv
->step
)
1789 || iv
->have_address_use
|| !iv
->no_overflow
)
1792 if (type
!= TREE_TYPE (iv
->base
)
1793 || !INTEGRAL_TYPE_P (TREE_TYPE (iv
->base
)))
1796 if (!operand_equal_p (biv
->step
, iv
->step
, 0))
1799 base_2
= fold_build2 (PLUS_EXPR
, type
, iv
->base
, iv
->step
);
1800 if (operand_equal_p (base_1
, iv
->base
, 0)
1801 || operand_equal_p (base_2
, biv
->base
, 0))
1803 iv
->have_address_use
= true;
1804 data
->bivs_not_used_in_addr
--;
1809 /* Cumulates the steps of indices into DATA and replaces their values with the
1810 initial ones. Returns false when the value of the index cannot be determined.
1811 Callback for for_each_index. */
1813 struct ifs_ivopts_data
1815 struct ivopts_data
*ivopts_data
;
1821 idx_find_step (tree base
, tree
*idx
, void *data
)
1823 struct ifs_ivopts_data
*dta
= (struct ifs_ivopts_data
*) data
;
1825 bool use_overflow_semantics
= false;
1826 tree step
, iv_base
, iv_step
, lbound
, off
;
1827 struct loop
*loop
= dta
->ivopts_data
->current_loop
;
1829 /* If base is a component ref, require that the offset of the reference
1831 if (TREE_CODE (base
) == COMPONENT_REF
)
1833 off
= component_ref_field_offset (base
);
1834 return expr_invariant_in_loop_p (loop
, off
);
1837 /* If base is array, first check whether we will be able to move the
1838 reference out of the loop (in order to take its address in strength
1839 reduction). In order for this to work we need both lower bound
1840 and step to be loop invariants. */
1841 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
1843 /* Moreover, for a range, the size needs to be invariant as well. */
1844 if (TREE_CODE (base
) == ARRAY_RANGE_REF
1845 && !expr_invariant_in_loop_p (loop
, TYPE_SIZE (TREE_TYPE (base
))))
1848 step
= array_ref_element_size (base
);
1849 lbound
= array_ref_low_bound (base
);
1851 if (!expr_invariant_in_loop_p (loop
, step
)
1852 || !expr_invariant_in_loop_p (loop
, lbound
))
1856 if (TREE_CODE (*idx
) != SSA_NAME
)
1859 iv
= get_iv (dta
->ivopts_data
, *idx
);
1863 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1864 *&x[0], which is not folded and does not trigger the
1865 ARRAY_REF path below. */
1868 if (integer_zerop (iv
->step
))
1871 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
1873 step
= array_ref_element_size (base
);
1875 /* We only handle addresses whose step is an integer constant. */
1876 if (TREE_CODE (step
) != INTEGER_CST
)
1880 /* The step for pointer arithmetics already is 1 byte. */
1881 step
= size_one_node
;
1885 if (iv
->no_overflow
&& nowrap_type_p (TREE_TYPE (iv_step
)))
1886 use_overflow_semantics
= true;
1888 if (!convert_affine_scev (dta
->ivopts_data
->current_loop
,
1889 sizetype
, &iv_base
, &iv_step
, dta
->stmt
,
1890 use_overflow_semantics
))
1892 /* The index might wrap. */
1896 step
= fold_build2 (MULT_EXPR
, sizetype
, step
, iv_step
);
1897 dta
->step
= fold_build2 (PLUS_EXPR
, sizetype
, dta
->step
, step
);
1899 if (dta
->ivopts_data
->bivs_not_used_in_addr
)
1902 iv
= find_deriving_biv_for_expr (dta
->ivopts_data
, iv
->ssa_name
);
1904 record_biv_for_address_use (dta
->ivopts_data
, iv
);
1909 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1910 object is passed to it in DATA. */
1913 idx_record_use (tree base
, tree
*idx
,
1916 struct ivopts_data
*data
= (struct ivopts_data
*) vdata
;
1917 find_interesting_uses_op (data
, *idx
);
1918 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
1920 find_interesting_uses_op (data
, array_ref_element_size (base
));
1921 find_interesting_uses_op (data
, array_ref_low_bound (base
));
1926 /* If we can prove that TOP = cst * BOT for some constant cst,
1927 store cst to MUL and return true. Otherwise return false.
1928 The returned value is always sign-extended, regardless of the
1929 signedness of TOP and BOT. */
1932 constant_multiple_of (tree top
, tree bot
, widest_int
*mul
)
1935 enum tree_code code
;
1936 unsigned precision
= TYPE_PRECISION (TREE_TYPE (top
));
1937 widest_int res
, p0
, p1
;
1942 if (operand_equal_p (top
, bot
, 0))
1948 code
= TREE_CODE (top
);
1952 mby
= TREE_OPERAND (top
, 1);
1953 if (TREE_CODE (mby
) != INTEGER_CST
)
1956 if (!constant_multiple_of (TREE_OPERAND (top
, 0), bot
, &res
))
1959 *mul
= wi::sext (res
* wi::to_widest (mby
), precision
);
1964 if (!constant_multiple_of (TREE_OPERAND (top
, 0), bot
, &p0
)
1965 || !constant_multiple_of (TREE_OPERAND (top
, 1), bot
, &p1
))
1968 if (code
== MINUS_EXPR
)
1970 *mul
= wi::sext (p0
+ p1
, precision
);
1974 if (TREE_CODE (bot
) != INTEGER_CST
)
1977 p0
= widest_int::from (top
, SIGNED
);
1978 p1
= widest_int::from (bot
, SIGNED
);
1981 *mul
= wi::sext (wi::divmod_trunc (p0
, p1
, SIGNED
, &res
), precision
);
1989 /* Return true if memory reference REF with step STEP may be unaligned. */
1992 may_be_unaligned_p (tree ref
, tree step
)
1994 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1995 thus they are not misaligned. */
1996 if (TREE_CODE (ref
) == TARGET_MEM_REF
)
1999 unsigned int align
= TYPE_ALIGN (TREE_TYPE (ref
));
2000 if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref
))) > align
)
2001 align
= GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref
)));
2003 unsigned HOST_WIDE_INT bitpos
;
2004 unsigned int ref_align
;
2005 get_object_alignment_1 (ref
, &ref_align
, &bitpos
);
2006 if (ref_align
< align
2007 || (bitpos
% align
) != 0
2008 || (bitpos
% BITS_PER_UNIT
) != 0)
2011 unsigned int trailing_zeros
= tree_ctz (step
);
2012 if (trailing_zeros
< HOST_BITS_PER_INT
2013 && (1U << trailing_zeros
) * BITS_PER_UNIT
< align
)
2019 /* Return true if EXPR may be non-addressable. */
2022 may_be_nonaddressable_p (tree expr
)
2024 switch (TREE_CODE (expr
))
2026 case TARGET_MEM_REF
:
2027 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
2028 target, thus they are always addressable. */
2032 /* Likewise for MEM_REFs, modulo the storage order. */
2033 return REF_REVERSE_STORAGE_ORDER (expr
);
2036 if (REF_REVERSE_STORAGE_ORDER (expr
))
2038 return may_be_nonaddressable_p (TREE_OPERAND (expr
, 0));
2041 if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr
, 0))))
2043 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr
, 1))
2044 || may_be_nonaddressable_p (TREE_OPERAND (expr
, 0));
2047 case ARRAY_RANGE_REF
:
2048 if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr
, 0))))
2050 return may_be_nonaddressable_p (TREE_OPERAND (expr
, 0));
2052 case VIEW_CONVERT_EXPR
:
2053 /* This kind of view-conversions may wrap non-addressable objects
2054 and make them look addressable. After some processing the
2055 non-addressability may be uncovered again, causing ADDR_EXPRs
2056 of inappropriate objects to be built. */
2057 if (is_gimple_reg (TREE_OPERAND (expr
, 0))
2058 || !is_gimple_addressable (TREE_OPERAND (expr
, 0)))
2060 return may_be_nonaddressable_p (TREE_OPERAND (expr
, 0));
2073 strip_offset (tree expr
, unsigned HOST_WIDE_INT
*offset
);
2075 /* Record a use of type USE_TYPE at *USE_P in STMT whose value is IV.
2076 If there is an existing use which has same stripped iv base and step,
2077 this function records this one as a sub use to that; otherwise records
2078 it as a normal one. */
2080 static struct iv_use
*
2081 record_group_use (struct ivopts_data
*data
, tree
*use_p
,
2082 struct iv
*iv
, gimple
*stmt
, enum use_type use_type
)
2087 unsigned HOST_WIDE_INT addr_offset
;
2089 /* Only support sub use for address type uses, that is, with base
2091 if (!iv
->base_object
)
2092 return record_use (data
, use_p
, iv
, stmt
, use_type
);
2094 addr_base
= strip_offset (iv
->base
, &addr_offset
);
2095 for (i
= 0; i
< n_iv_uses (data
); i
++)
2097 use
= iv_use (data
, i
);
2098 if (use
->type
!= USE_ADDRESS
|| !use
->iv
->base_object
)
2101 /* Check if it has the same stripped base and step. */
2102 if (operand_equal_p (iv
->base_object
, use
->iv
->base_object
, 0)
2103 && operand_equal_p (iv
->step
, use
->iv
->step
, 0)
2104 && operand_equal_p (addr_base
, use
->addr_base
, 0))
2108 if (i
== n_iv_uses (data
))
2109 return record_use (data
, use_p
, iv
, stmt
,
2110 use_type
, addr_base
, addr_offset
);
2112 return record_sub_use (data
, use_p
, iv
, stmt
,
2113 use_type
, addr_base
, addr_offset
, i
);
2116 /* Finds addresses in *OP_P inside STMT. */
2119 find_interesting_uses_address (struct ivopts_data
*data
, gimple
*stmt
,
2122 tree base
= *op_p
, step
= size_zero_node
;
2124 struct ifs_ivopts_data ifs_ivopts_data
;
2126 /* Do not play with volatile memory references. A bit too conservative,
2127 perhaps, but safe. */
2128 if (gimple_has_volatile_ops (stmt
))
2131 /* Ignore bitfields for now. Not really something terribly complicated
2133 if (TREE_CODE (base
) == BIT_FIELD_REF
)
2136 base
= unshare_expr (base
);
2138 if (TREE_CODE (base
) == TARGET_MEM_REF
)
2140 tree type
= build_pointer_type (TREE_TYPE (base
));
2144 && TREE_CODE (TMR_BASE (base
)) == SSA_NAME
)
2146 civ
= get_iv (data
, TMR_BASE (base
));
2150 TMR_BASE (base
) = civ
->base
;
2153 if (TMR_INDEX2 (base
)
2154 && TREE_CODE (TMR_INDEX2 (base
)) == SSA_NAME
)
2156 civ
= get_iv (data
, TMR_INDEX2 (base
));
2160 TMR_INDEX2 (base
) = civ
->base
;
2163 if (TMR_INDEX (base
)
2164 && TREE_CODE (TMR_INDEX (base
)) == SSA_NAME
)
2166 civ
= get_iv (data
, TMR_INDEX (base
));
2170 TMR_INDEX (base
) = civ
->base
;
2175 if (TMR_STEP (base
))
2176 astep
= fold_build2 (MULT_EXPR
, type
, TMR_STEP (base
), astep
);
2178 step
= fold_build2 (PLUS_EXPR
, type
, step
, astep
);
2182 if (integer_zerop (step
))
2184 base
= tree_mem_ref_addr (type
, base
);
2188 ifs_ivopts_data
.ivopts_data
= data
;
2189 ifs_ivopts_data
.stmt
= stmt
;
2190 ifs_ivopts_data
.step
= size_zero_node
;
2191 if (!for_each_index (&base
, idx_find_step
, &ifs_ivopts_data
)
2192 || integer_zerop (ifs_ivopts_data
.step
))
2194 step
= ifs_ivopts_data
.step
;
2196 /* Check that the base expression is addressable. This needs
2197 to be done after substituting bases of IVs into it. */
2198 if (may_be_nonaddressable_p (base
))
2201 /* Moreover, on strict alignment platforms, check that it is
2202 sufficiently aligned. */
2203 if (STRICT_ALIGNMENT
&& may_be_unaligned_p (base
, step
))
2206 base
= build_fold_addr_expr (base
);
2208 /* Substituting bases of IVs into the base expression might
2209 have caused folding opportunities. */
2210 if (TREE_CODE (base
) == ADDR_EXPR
)
2212 tree
*ref
= &TREE_OPERAND (base
, 0);
2213 while (handled_component_p (*ref
))
2214 ref
= &TREE_OPERAND (*ref
, 0);
2215 if (TREE_CODE (*ref
) == MEM_REF
)
2217 tree tem
= fold_binary (MEM_REF
, TREE_TYPE (*ref
),
2218 TREE_OPERAND (*ref
, 0),
2219 TREE_OPERAND (*ref
, 1));
2226 civ
= alloc_iv (data
, base
, step
);
2227 record_group_use (data
, op_p
, civ
, stmt
, USE_ADDRESS
);
2231 for_each_index (op_p
, idx_record_use
, data
);
2234 /* Finds and records invariants used in STMT. */
2237 find_invariants_stmt (struct ivopts_data
*data
, gimple
*stmt
)
2240 use_operand_p use_p
;
2243 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
2245 op
= USE_FROM_PTR (use_p
);
2246 record_invariant (data
, op
, false);
2250 /* Finds interesting uses of induction variables in the statement STMT. */
2253 find_interesting_uses_stmt (struct ivopts_data
*data
, gimple
*stmt
)
2256 tree op
, *lhs
, *rhs
;
2258 use_operand_p use_p
;
2259 enum tree_code code
;
2261 find_invariants_stmt (data
, stmt
);
2263 if (gimple_code (stmt
) == GIMPLE_COND
)
2265 find_interesting_uses_cond (data
, stmt
);
2269 if (is_gimple_assign (stmt
))
2271 lhs
= gimple_assign_lhs_ptr (stmt
);
2272 rhs
= gimple_assign_rhs1_ptr (stmt
);
2274 if (TREE_CODE (*lhs
) == SSA_NAME
)
2276 /* If the statement defines an induction variable, the uses are not
2277 interesting by themselves. */
2279 iv
= get_iv (data
, *lhs
);
2281 if (iv
&& !integer_zerop (iv
->step
))
2285 code
= gimple_assign_rhs_code (stmt
);
2286 if (get_gimple_rhs_class (code
) == GIMPLE_SINGLE_RHS
2287 && (REFERENCE_CLASS_P (*rhs
)
2288 || is_gimple_val (*rhs
)))
2290 if (REFERENCE_CLASS_P (*rhs
))
2291 find_interesting_uses_address (data
, stmt
, rhs
);
2293 find_interesting_uses_op (data
, *rhs
);
2295 if (REFERENCE_CLASS_P (*lhs
))
2296 find_interesting_uses_address (data
, stmt
, lhs
);
2299 else if (TREE_CODE_CLASS (code
) == tcc_comparison
)
2301 find_interesting_uses_cond (data
, stmt
);
2305 /* TODO -- we should also handle address uses of type
2307 memory = call (whatever);
2314 if (gimple_code (stmt
) == GIMPLE_PHI
2315 && gimple_bb (stmt
) == data
->current_loop
->header
)
2317 iv
= get_iv (data
, PHI_RESULT (stmt
));
2319 if (iv
&& !integer_zerop (iv
->step
))
2323 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
2325 op
= USE_FROM_PTR (use_p
);
2327 if (TREE_CODE (op
) != SSA_NAME
)
2330 iv
= get_iv (data
, op
);
2334 find_interesting_uses_op (data
, op
);
2338 /* Finds interesting uses of induction variables outside of loops
2339 on loop exit edge EXIT. */
2342 find_interesting_uses_outside (struct ivopts_data
*data
, edge exit
)
2348 for (psi
= gsi_start_phis (exit
->dest
); !gsi_end_p (psi
); gsi_next (&psi
))
2351 def
= PHI_ARG_DEF_FROM_EDGE (phi
, exit
);
2352 if (!virtual_operand_p (def
))
2353 find_interesting_uses_op (data
, def
);
2357 /* Finds uses of the induction variables that are interesting. */
2360 find_interesting_uses (struct ivopts_data
*data
)
2363 gimple_stmt_iterator bsi
;
2364 basic_block
*body
= get_loop_body (data
->current_loop
);
2366 struct version_info
*info
;
2369 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2370 fprintf (dump_file
, "Uses:\n\n");
2372 for (i
= 0; i
< data
->current_loop
->num_nodes
; i
++)
2377 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
2378 if (e
->dest
!= EXIT_BLOCK_PTR_FOR_FN (cfun
)
2379 && !flow_bb_inside_loop_p (data
->current_loop
, e
->dest
))
2380 find_interesting_uses_outside (data
, e
);
2382 for (bsi
= gsi_start_phis (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
2383 find_interesting_uses_stmt (data
, gsi_stmt (bsi
));
2384 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
2385 if (!is_gimple_debug (gsi_stmt (bsi
)))
2386 find_interesting_uses_stmt (data
, gsi_stmt (bsi
));
2389 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2393 fprintf (dump_file
, "\n");
2395 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
2397 info
= ver_info (data
, i
);
2400 fprintf (dump_file
, " ");
2401 print_generic_expr (dump_file
, info
->name
, TDF_SLIM
);
2402 fprintf (dump_file
, " is invariant (%d)%s\n",
2403 info
->inv_id
, info
->has_nonlin_use
? "" : ", eliminable");
2407 fprintf (dump_file
, "\n");
2413 /* Compute maximum offset of [base + offset] addressing mode
2414 for memory reference represented by USE. */
2416 static HOST_WIDE_INT
2417 compute_max_addr_offset (struct iv_use
*use
)
2421 HOST_WIDE_INT i
, off
;
2422 unsigned list_index
, num
;
2424 machine_mode mem_mode
, addr_mode
;
2425 static vec
<HOST_WIDE_INT
> max_offset_list
;
2427 as
= TYPE_ADDR_SPACE (TREE_TYPE (use
->iv
->base
));
2428 mem_mode
= TYPE_MODE (TREE_TYPE (*use
->op_p
));
2430 num
= max_offset_list
.length ();
2431 list_index
= (unsigned) as
* MAX_MACHINE_MODE
+ (unsigned) mem_mode
;
2432 if (list_index
>= num
)
2434 max_offset_list
.safe_grow (list_index
+ MAX_MACHINE_MODE
);
2435 for (; num
< max_offset_list
.length (); num
++)
2436 max_offset_list
[num
] = -1;
2439 off
= max_offset_list
[list_index
];
2443 addr_mode
= targetm
.addr_space
.address_mode (as
);
2444 reg
= gen_raw_REG (addr_mode
, LAST_VIRTUAL_REGISTER
+ 1);
2445 addr
= gen_rtx_fmt_ee (PLUS
, addr_mode
, reg
, NULL_RTX
);
2447 width
= GET_MODE_BITSIZE (addr_mode
) - 1;
2448 if (width
> (HOST_BITS_PER_WIDE_INT
- 1))
2449 width
= HOST_BITS_PER_WIDE_INT
- 1;
2451 for (i
= width
; i
> 0; i
--)
2453 off
= ((unsigned HOST_WIDE_INT
) 1 << i
) - 1;
2454 XEXP (addr
, 1) = gen_int_mode (off
, addr_mode
);
2455 if (memory_address_addr_space_p (mem_mode
, addr
, as
))
2458 /* For some strict-alignment targets, the offset must be naturally
2459 aligned. Try an aligned offset if mem_mode is not QImode. */
2460 off
= ((unsigned HOST_WIDE_INT
) 1 << i
);
2461 if (off
> GET_MODE_SIZE (mem_mode
) && mem_mode
!= QImode
)
2463 off
-= GET_MODE_SIZE (mem_mode
);
2464 XEXP (addr
, 1) = gen_int_mode (off
, addr_mode
);
2465 if (memory_address_addr_space_p (mem_mode
, addr
, as
))
2472 max_offset_list
[list_index
] = off
;
2476 /* Check if all small groups should be split. Return true if and
2479 1) At least one groups contain two uses with different offsets.
2480 2) No group contains more than two uses with different offsets.
2482 Return false otherwise. We want to split such groups because:
2484 1) Small groups don't have much benefit and may interfer with
2485 general candidate selection.
2486 2) Size for problem with only small groups is usually small and
2487 general algorithm can handle it well.
2489 TODO -- Above claim may not hold when auto increment is supported. */
2492 split_all_small_groups (struct ivopts_data
*data
)
2494 bool split_p
= false;
2495 unsigned int i
, n
, distinct
;
2496 struct iv_use
*pre
, *use
;
2498 n
= n_iv_uses (data
);
2499 for (i
= 0; i
< n
; i
++)
2501 use
= iv_use (data
, i
);
2506 gcc_assert (use
->type
== USE_ADDRESS
);
2507 for (pre
= use
, use
= use
->next
; use
; pre
= use
, use
= use
->next
)
2509 if (pre
->addr_offset
!= use
->addr_offset
)
2522 /* For each group of address type uses, this function further groups
2523 these uses according to the maximum offset supported by target's
2524 [base + offset] addressing mode. */
2527 group_address_uses (struct ivopts_data
*data
)
2529 HOST_WIDE_INT max_offset
= -1;
2530 unsigned int i
, n
, sub_id
;
2531 struct iv_use
*pre
, *use
;
2532 unsigned HOST_WIDE_INT addr_offset_first
;
2534 /* Reset max offset to split all small groups. */
2535 if (split_all_small_groups (data
))
2538 n
= n_iv_uses (data
);
2539 for (i
= 0; i
< n
; i
++)
2541 use
= iv_use (data
, i
);
2545 gcc_assert (use
->type
== USE_ADDRESS
);
2546 if (max_offset
!= 0)
2547 max_offset
= compute_max_addr_offset (use
);
2552 addr_offset_first
= use
->addr_offset
;
2553 /* Only uses with offset that can fit in offset part against
2554 the first use can be grouped together. */
2555 for (pre
= use
, use
= use
->next
;
2556 use
&& (use
->addr_offset
- addr_offset_first
2557 <= (unsigned HOST_WIDE_INT
) max_offset
);
2558 pre
= use
, use
= use
->next
)
2561 use
->sub_id
= ++sub_id
;
2564 /* Break the list and create new group. */
2568 use
->id
= n_iv_uses (data
);
2569 use
->related_cands
= BITMAP_ALLOC (NULL
);
2570 data
->iv_uses
.safe_push (use
);
2575 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2576 dump_uses (dump_file
, data
);
2579 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
2580 is true, assume we are inside an address. If TOP_COMPREF is true, assume
2581 we are at the top-level of the processed address. */
2584 strip_offset_1 (tree expr
, bool inside_addr
, bool top_compref
,
2585 HOST_WIDE_INT
*offset
)
2587 tree op0
= NULL_TREE
, op1
= NULL_TREE
, tmp
, step
;
2588 enum tree_code code
;
2589 tree type
, orig_type
= TREE_TYPE (expr
);
2590 HOST_WIDE_INT off0
, off1
, st
;
2591 tree orig_expr
= expr
;
2595 type
= TREE_TYPE (expr
);
2596 code
= TREE_CODE (expr
);
2602 if (!cst_and_fits_in_hwi (expr
)
2603 || integer_zerop (expr
))
2606 *offset
= int_cst_value (expr
);
2607 return build_int_cst (orig_type
, 0);
2609 case POINTER_PLUS_EXPR
:
2612 op0
= TREE_OPERAND (expr
, 0);
2613 op1
= TREE_OPERAND (expr
, 1);
2615 op0
= strip_offset_1 (op0
, false, false, &off0
);
2616 op1
= strip_offset_1 (op1
, false, false, &off1
);
2618 *offset
= (code
== MINUS_EXPR
? off0
- off1
: off0
+ off1
);
2619 if (op0
== TREE_OPERAND (expr
, 0)
2620 && op1
== TREE_OPERAND (expr
, 1))
2623 if (integer_zerop (op1
))
2625 else if (integer_zerop (op0
))
2627 if (code
== MINUS_EXPR
)
2628 expr
= fold_build1 (NEGATE_EXPR
, type
, op1
);
2633 expr
= fold_build2 (code
, type
, op0
, op1
);
2635 return fold_convert (orig_type
, expr
);
2638 op1
= TREE_OPERAND (expr
, 1);
2639 if (!cst_and_fits_in_hwi (op1
))
2642 op0
= TREE_OPERAND (expr
, 0);
2643 op0
= strip_offset_1 (op0
, false, false, &off0
);
2644 if (op0
== TREE_OPERAND (expr
, 0))
2647 *offset
= off0
* int_cst_value (op1
);
2648 if (integer_zerop (op0
))
2651 expr
= fold_build2 (MULT_EXPR
, type
, op0
, op1
);
2653 return fold_convert (orig_type
, expr
);
2656 case ARRAY_RANGE_REF
:
2660 step
= array_ref_element_size (expr
);
2661 if (!cst_and_fits_in_hwi (step
))
2664 st
= int_cst_value (step
);
2665 op1
= TREE_OPERAND (expr
, 1);
2666 op1
= strip_offset_1 (op1
, false, false, &off1
);
2667 *offset
= off1
* st
;
2670 && integer_zerop (op1
))
2672 /* Strip the component reference completely. */
2673 op0
= TREE_OPERAND (expr
, 0);
2674 op0
= strip_offset_1 (op0
, inside_addr
, top_compref
, &off0
);
2687 tmp
= component_ref_field_offset (expr
);
2688 field
= TREE_OPERAND (expr
, 1);
2690 && cst_and_fits_in_hwi (tmp
)
2691 && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field
)))
2693 HOST_WIDE_INT boffset
, abs_off
;
2695 /* Strip the component reference completely. */
2696 op0
= TREE_OPERAND (expr
, 0);
2697 op0
= strip_offset_1 (op0
, inside_addr
, top_compref
, &off0
);
2698 boffset
= int_cst_value (DECL_FIELD_BIT_OFFSET (field
));
2699 abs_off
= abs_hwi (boffset
) / BITS_PER_UNIT
;
2703 *offset
= off0
+ int_cst_value (tmp
) + abs_off
;
2710 op0
= TREE_OPERAND (expr
, 0);
2711 op0
= strip_offset_1 (op0
, true, true, &off0
);
2714 if (op0
== TREE_OPERAND (expr
, 0))
2717 expr
= build_fold_addr_expr (op0
);
2718 return fold_convert (orig_type
, expr
);
2721 /* ??? Offset operand? */
2722 inside_addr
= false;
2729 /* Default handling of expressions for that we want to recurse into
2730 the first operand. */
2731 op0
= TREE_OPERAND (expr
, 0);
2732 op0
= strip_offset_1 (op0
, inside_addr
, false, &off0
);
2735 if (op0
== TREE_OPERAND (expr
, 0)
2736 && (!op1
|| op1
== TREE_OPERAND (expr
, 1)))
2739 expr
= copy_node (expr
);
2740 TREE_OPERAND (expr
, 0) = op0
;
2742 TREE_OPERAND (expr
, 1) = op1
;
2744 /* Inside address, we might strip the top level component references,
2745 thus changing type of the expression. Handling of ADDR_EXPR
2747 expr
= fold_convert (orig_type
, expr
);
2752 /* Strips constant offsets from EXPR and stores them to OFFSET. */
2755 strip_offset (tree expr
, unsigned HOST_WIDE_INT
*offset
)
2758 tree core
= strip_offset_1 (expr
, false, false, &off
);
2763 /* Returns variant of TYPE that can be used as base for different uses.
2764 We return unsigned type with the same precision, which avoids problems
2768 generic_type_for (tree type
)
2770 if (POINTER_TYPE_P (type
))
2771 return unsigned_type_for (type
);
2773 if (TYPE_UNSIGNED (type
))
2776 return unsigned_type_for (type
);
2779 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2780 the bitmap to that we should store it. */
2782 static struct ivopts_data
*fd_ivopts_data
;
2784 find_depends (tree
*expr_p
, int *ws ATTRIBUTE_UNUSED
, void *data
)
2786 bitmap
*depends_on
= (bitmap
*) data
;
2787 struct version_info
*info
;
2789 if (TREE_CODE (*expr_p
) != SSA_NAME
)
2791 info
= name_info (fd_ivopts_data
, *expr_p
);
2793 if (!info
->inv_id
|| info
->has_nonlin_use
)
2797 *depends_on
= BITMAP_ALLOC (NULL
);
2798 bitmap_set_bit (*depends_on
, info
->inv_id
);
2803 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2804 position to POS. If USE is not NULL, the candidate is set as related to
2805 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
2806 replacement of the final value of the iv by a direct computation. */
2808 static struct iv_cand
*
2809 add_candidate_1 (struct ivopts_data
*data
,
2810 tree base
, tree step
, bool important
, enum iv_position pos
,
2811 struct iv_use
*use
, gimple
*incremented_at
,
2812 struct iv
*orig_iv
= NULL
)
2815 struct iv_cand
*cand
= NULL
;
2816 tree type
, orig_type
;
2818 /* -fkeep-gc-roots-live means that we have to keep a real pointer
2819 live, but the ivopts code may replace a real pointer with one
2820 pointing before or after the memory block that is then adjusted
2821 into the memory block during the loop. FIXME: It would likely be
2822 better to actually force the pointer live and still use ivopts;
2823 for example, it would be enough to write the pointer into memory
2824 and keep it there until after the loop. */
2825 if (flag_keep_gc_roots_live
&& POINTER_TYPE_P (TREE_TYPE (base
)))
2828 /* For non-original variables, make sure their values are computed in a type
2829 that does not invoke undefined behavior on overflows (since in general,
2830 we cannot prove that these induction variables are non-wrapping). */
2831 if (pos
!= IP_ORIGINAL
)
2833 orig_type
= TREE_TYPE (base
);
2834 type
= generic_type_for (orig_type
);
2835 if (type
!= orig_type
)
2837 base
= fold_convert (type
, base
);
2838 step
= fold_convert (type
, step
);
2842 for (i
= 0; i
< n_iv_cands (data
); i
++)
2844 cand
= iv_cand (data
, i
);
2846 if (cand
->pos
!= pos
)
2849 if (cand
->incremented_at
!= incremented_at
2850 || ((pos
== IP_AFTER_USE
|| pos
== IP_BEFORE_USE
)
2851 && cand
->ainc_use
!= use
))
2865 if (operand_equal_p (base
, cand
->iv
->base
, 0)
2866 && operand_equal_p (step
, cand
->iv
->step
, 0)
2867 && (TYPE_PRECISION (TREE_TYPE (base
))
2868 == TYPE_PRECISION (TREE_TYPE (cand
->iv
->base
))))
2872 if (i
== n_iv_cands (data
))
2874 cand
= XCNEW (struct iv_cand
);
2880 cand
->iv
= alloc_iv (data
, base
, step
);
2883 if (pos
!= IP_ORIGINAL
&& cand
->iv
)
2885 cand
->var_before
= create_tmp_var_raw (TREE_TYPE (base
), "ivtmp");
2886 cand
->var_after
= cand
->var_before
;
2888 cand
->important
= important
;
2889 cand
->incremented_at
= incremented_at
;
2890 data
->iv_candidates
.safe_push (cand
);
2893 && TREE_CODE (step
) != INTEGER_CST
)
2895 fd_ivopts_data
= data
;
2896 walk_tree (&step
, find_depends
, &cand
->depends_on
, NULL
);
2899 if (pos
== IP_AFTER_USE
|| pos
== IP_BEFORE_USE
)
2900 cand
->ainc_use
= use
;
2902 cand
->ainc_use
= NULL
;
2904 cand
->orig_iv
= orig_iv
;
2905 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2906 dump_cand (dump_file
, cand
);
2909 if (important
&& !cand
->important
)
2911 cand
->important
= true;
2912 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2913 fprintf (dump_file
, "Candidate %d is important\n", cand
->id
);
2918 bitmap_set_bit (use
->related_cands
, i
);
2919 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2920 fprintf (dump_file
, "Candidate %d is related to use %d\n",
2927 /* Returns true if incrementing the induction variable at the end of the LOOP
2930 The purpose is to avoid splitting latch edge with a biv increment, thus
2931 creating a jump, possibly confusing other optimization passes and leaving
2932 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2933 is not available (so we do not have a better alternative), or if the latch
2934 edge is already nonempty. */
2937 allow_ip_end_pos_p (struct loop
*loop
)
2939 if (!ip_normal_pos (loop
))
2942 if (!empty_block_p (ip_end_pos (loop
)))
2948 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2949 Important field is set to IMPORTANT. */
2952 add_autoinc_candidates (struct ivopts_data
*data
, tree base
, tree step
,
2953 bool important
, struct iv_use
*use
)
2955 basic_block use_bb
= gimple_bb (use
->stmt
);
2956 machine_mode mem_mode
;
2957 unsigned HOST_WIDE_INT cstepi
;
2959 /* If we insert the increment in any position other than the standard
2960 ones, we must ensure that it is incremented once per iteration.
2961 It must not be in an inner nested loop, or one side of an if
2963 if (use_bb
->loop_father
!= data
->current_loop
2964 || !dominated_by_p (CDI_DOMINATORS
, data
->current_loop
->latch
, use_bb
)
2965 || stmt_could_throw_p (use
->stmt
)
2966 || !cst_and_fits_in_hwi (step
))
2969 cstepi
= int_cst_value (step
);
2971 mem_mode
= TYPE_MODE (TREE_TYPE (*use
->op_p
));
2972 if (((USE_LOAD_PRE_INCREMENT (mem_mode
)
2973 || USE_STORE_PRE_INCREMENT (mem_mode
))
2974 && GET_MODE_SIZE (mem_mode
) == cstepi
)
2975 || ((USE_LOAD_PRE_DECREMENT (mem_mode
)
2976 || USE_STORE_PRE_DECREMENT (mem_mode
))
2977 && GET_MODE_SIZE (mem_mode
) == -cstepi
))
2979 enum tree_code code
= MINUS_EXPR
;
2981 tree new_step
= step
;
2983 if (POINTER_TYPE_P (TREE_TYPE (base
)))
2985 new_step
= fold_build1 (NEGATE_EXPR
, TREE_TYPE (step
), step
);
2986 code
= POINTER_PLUS_EXPR
;
2989 new_step
= fold_convert (TREE_TYPE (base
), new_step
);
2990 new_base
= fold_build2 (code
, TREE_TYPE (base
), base
, new_step
);
2991 add_candidate_1 (data
, new_base
, step
, important
, IP_BEFORE_USE
, use
,
2994 if (((USE_LOAD_POST_INCREMENT (mem_mode
)
2995 || USE_STORE_POST_INCREMENT (mem_mode
))
2996 && GET_MODE_SIZE (mem_mode
) == cstepi
)
2997 || ((USE_LOAD_POST_DECREMENT (mem_mode
)
2998 || USE_STORE_POST_DECREMENT (mem_mode
))
2999 && GET_MODE_SIZE (mem_mode
) == -cstepi
))
3001 add_candidate_1 (data
, base
, step
, important
, IP_AFTER_USE
, use
,
3006 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
3007 position to POS. If USE is not NULL, the candidate is set as related to
3008 it. The candidate computation is scheduled before exit condition and at
3012 add_candidate (struct ivopts_data
*data
,
3013 tree base
, tree step
, bool important
, struct iv_use
*use
,
3014 struct iv
*orig_iv
= NULL
)
3016 gcc_assert (use
== NULL
|| use
->sub_id
== 0);
3018 if (ip_normal_pos (data
->current_loop
))
3019 add_candidate_1 (data
, base
, step
, important
,
3020 IP_NORMAL
, use
, NULL
, orig_iv
);
3021 if (ip_end_pos (data
->current_loop
)
3022 && allow_ip_end_pos_p (data
->current_loop
))
3023 add_candidate_1 (data
, base
, step
, important
, IP_END
, use
, NULL
, orig_iv
);
3026 /* Adds standard iv candidates. */
3029 add_standard_iv_candidates (struct ivopts_data
*data
)
3031 add_candidate (data
, integer_zero_node
, integer_one_node
, true, NULL
);
3033 /* The same for a double-integer type if it is still fast enough. */
3035 (long_integer_type_node
) > TYPE_PRECISION (integer_type_node
)
3036 && TYPE_PRECISION (long_integer_type_node
) <= BITS_PER_WORD
)
3037 add_candidate (data
, build_int_cst (long_integer_type_node
, 0),
3038 build_int_cst (long_integer_type_node
, 1), true, NULL
);
3040 /* The same for a double-integer type if it is still fast enough. */
3042 (long_long_integer_type_node
) > TYPE_PRECISION (long_integer_type_node
)
3043 && TYPE_PRECISION (long_long_integer_type_node
) <= BITS_PER_WORD
)
3044 add_candidate (data
, build_int_cst (long_long_integer_type_node
, 0),
3045 build_int_cst (long_long_integer_type_node
, 1), true, NULL
);
3049 /* Adds candidates bases on the old induction variable IV. */
3052 add_iv_candidate_for_biv (struct ivopts_data
*data
, struct iv
*iv
)
3056 struct iv_cand
*cand
;
3058 /* Check if this biv is used in address type use. */
3059 if (iv
->no_overflow
&& iv
->have_address_use
3060 && INTEGRAL_TYPE_P (TREE_TYPE (iv
->base
))
3061 && TYPE_PRECISION (TREE_TYPE (iv
->base
)) < TYPE_PRECISION (sizetype
))
3063 tree base
= fold_convert (sizetype
, iv
->base
);
3064 tree step
= fold_convert (sizetype
, iv
->step
);
3066 /* Add iv cand of same precision as index part in TARGET_MEM_REF. */
3067 add_candidate (data
, base
, step
, true, NULL
, iv
);
3068 /* Add iv cand of the original type only if it has nonlinear use. */
3069 if (iv
->have_use_for
)
3070 add_candidate (data
, iv
->base
, iv
->step
, true, NULL
);
3073 add_candidate (data
, iv
->base
, iv
->step
, true, NULL
);
3075 /* The same, but with initial value zero. */
3076 if (POINTER_TYPE_P (TREE_TYPE (iv
->base
)))
3077 add_candidate (data
, size_int (0), iv
->step
, true, NULL
);
3079 add_candidate (data
, build_int_cst (TREE_TYPE (iv
->base
), 0),
3080 iv
->step
, true, NULL
);
3082 phi
= SSA_NAME_DEF_STMT (iv
->ssa_name
);
3083 if (gimple_code (phi
) == GIMPLE_PHI
)
3085 /* Additionally record the possibility of leaving the original iv
3087 def
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_latch_edge (data
->current_loop
));
3088 /* Don't add candidate if it's from another PHI node because
3089 it's an affine iv appearing in the form of PEELED_CHREC. */
3090 phi
= SSA_NAME_DEF_STMT (def
);
3091 if (gimple_code (phi
) != GIMPLE_PHI
)
3093 cand
= add_candidate_1 (data
,
3094 iv
->base
, iv
->step
, true, IP_ORIGINAL
, NULL
,
3095 SSA_NAME_DEF_STMT (def
));
3098 cand
->var_before
= iv
->ssa_name
;
3099 cand
->var_after
= def
;
3103 gcc_assert (gimple_bb (phi
) == data
->current_loop
->header
);
3107 /* Adds candidates based on the old induction variables. */
3110 add_iv_candidate_for_bivs (struct ivopts_data
*data
)
3116 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
3118 iv
= ver_info (data
, i
)->iv
;
3119 if (iv
&& iv
->biv_p
&& !integer_zerop (iv
->step
))
3120 add_iv_candidate_for_biv (data
, iv
);
3124 /* Record common candidate {BASE, STEP} derived from USE in hashtable. */
3127 record_common_cand (struct ivopts_data
*data
, tree base
,
3128 tree step
, struct iv_use
*use
)
3130 struct iv_common_cand ent
;
3131 struct iv_common_cand
**slot
;
3133 gcc_assert (use
!= NULL
);
3137 ent
.hash
= iterative_hash_expr (base
, 0);
3138 ent
.hash
= iterative_hash_expr (step
, ent
.hash
);
3140 slot
= data
->iv_common_cand_tab
->find_slot (&ent
, INSERT
);
3143 *slot
= new iv_common_cand ();
3144 (*slot
)->base
= base
;
3145 (*slot
)->step
= step
;
3146 (*slot
)->uses
.create (8);
3147 (*slot
)->hash
= ent
.hash
;
3148 data
->iv_common_cands
.safe_push ((*slot
));
3150 (*slot
)->uses
.safe_push (use
);
3154 /* Comparison function used to sort common candidates. */
3157 common_cand_cmp (const void *p1
, const void *p2
)
3160 const struct iv_common_cand
*const *const ccand1
3161 = (const struct iv_common_cand
*const *)p1
;
3162 const struct iv_common_cand
*const *const ccand2
3163 = (const struct iv_common_cand
*const *)p2
;
3165 n1
= (*ccand1
)->uses
.length ();
3166 n2
= (*ccand2
)->uses
.length ();
3170 /* Adds IV candidates based on common candidated recorded. */
3173 add_iv_candidate_derived_from_uses (struct ivopts_data
*data
)
3176 struct iv_cand
*cand_1
, *cand_2
;
3178 data
->iv_common_cands
.qsort (common_cand_cmp
);
3179 for (i
= 0; i
< data
->iv_common_cands
.length (); i
++)
3181 struct iv_common_cand
*ptr
= data
->iv_common_cands
[i
];
3183 /* Only add IV candidate if it's derived from multiple uses. */
3184 if (ptr
->uses
.length () <= 1)
3189 if (ip_normal_pos (data
->current_loop
))
3190 cand_1
= add_candidate_1 (data
, ptr
->base
, ptr
->step
,
3191 false, IP_NORMAL
, NULL
, NULL
);
3193 if (ip_end_pos (data
->current_loop
)
3194 && allow_ip_end_pos_p (data
->current_loop
))
3195 cand_2
= add_candidate_1 (data
, ptr
->base
, ptr
->step
,
3196 false, IP_END
, NULL
, NULL
);
3198 /* Bind deriving uses and the new candidates. */
3199 for (j
= 0; j
< ptr
->uses
.length (); j
++)
3201 struct iv_use
*use
= ptr
->uses
[j
];
3203 bitmap_set_bit (use
->related_cands
, cand_1
->id
);
3205 bitmap_set_bit (use
->related_cands
, cand_2
->id
);
3209 /* Release data since it is useless from this point. */
3210 data
->iv_common_cand_tab
->empty ();
3211 data
->iv_common_cands
.truncate (0);
3214 /* Adds candidates based on the value of USE's iv. */
3217 add_iv_candidate_for_use (struct ivopts_data
*data
, struct iv_use
*use
)
3219 unsigned HOST_WIDE_INT offset
;
3222 struct iv
*iv
= use
->iv
;
3224 add_candidate (data
, iv
->base
, iv
->step
, false, use
);
3226 /* Record common candidate for use in case it can be shared by others. */
3227 record_common_cand (data
, iv
->base
, iv
->step
, use
);
3229 /* Record common candidate with initial value zero. */
3230 basetype
= TREE_TYPE (iv
->base
);
3231 if (POINTER_TYPE_P (basetype
))
3232 basetype
= sizetype
;
3233 record_common_cand (data
, build_int_cst (basetype
, 0), iv
->step
, use
);
3235 /* Record common candidate with constant offset stripped in base. */
3237 base
= strip_offset (iv
->base
, &offset
);
3238 if (offset
|| base
!= iv
->base
)
3239 record_common_cand (data
, base
, iv
->step
, use
);
3242 /* Record common candidate with base_object removed in base. */
3243 if (iv
->base_object
!= NULL
)
3247 tree step
, base_object
= iv
->base_object
;
3253 STRIP_NOPS (base_object
);
3254 tree_to_aff_combination (base
, TREE_TYPE (base
), &aff_base
);
3255 for (i
= 0; i
< aff_base
.n
; i
++)
3257 if (aff_base
.elts
[i
].coef
!= 1)
3260 if (operand_equal_p (aff_base
.elts
[i
].val
, base_object
, 0))
3265 aff_combination_remove_elt (&aff_base
, i
);
3266 base
= aff_combination_to_tree (&aff_base
);
3267 basetype
= TREE_TYPE (base
);
3268 if (POINTER_TYPE_P (basetype
))
3269 basetype
= sizetype
;
3271 step
= fold_convert (basetype
, step
);
3272 record_common_cand (data
, base
, step
, use
);
3273 /* Also record common candidate with offset stripped. */
3274 base
= strip_offset (base
, &offset
);
3276 record_common_cand (data
, base
, step
, use
);
3280 /* At last, add auto-incremental candidates. Make such variables
3281 important since other iv uses with same base object may be based
3283 if (use
!= NULL
&& use
->type
== USE_ADDRESS
)
3284 add_autoinc_candidates (data
, iv
->base
, iv
->step
, true, use
);
3287 /* Adds candidates based on the uses. */
3290 add_iv_candidate_for_uses (struct ivopts_data
*data
)
3294 for (i
= 0; i
< n_iv_uses (data
); i
++)
3296 struct iv_use
*use
= iv_use (data
, i
);
3303 case USE_NONLINEAR_EXPR
:
3306 /* Just add the ivs based on the value of the iv used here. */
3307 add_iv_candidate_for_use (data
, use
);
3314 add_iv_candidate_derived_from_uses (data
);
3317 /* Record important candidates and add them to related_cands bitmaps. */
3320 record_important_candidates (struct ivopts_data
*data
)
3325 for (i
= 0; i
< n_iv_cands (data
); i
++)
3327 struct iv_cand
*cand
= iv_cand (data
, i
);
3329 if (cand
->important
)
3330 bitmap_set_bit (data
->important_candidates
, i
);
3333 data
->consider_all_candidates
= (n_iv_cands (data
)
3334 <= CONSIDER_ALL_CANDIDATES_BOUND
);
3336 /* Add important candidates to uses' related_cands bitmaps. */
3337 for (i
= 0; i
< n_iv_uses (data
); i
++)
3339 use
= iv_use (data
, i
);
3340 bitmap_ior_into (use
->related_cands
, data
->important_candidates
);
3344 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
3345 If consider_all_candidates is true, we use a two-dimensional array, otherwise
3346 we allocate a simple list to every use. */
3349 alloc_use_cost_map (struct ivopts_data
*data
)
3351 unsigned i
, size
, s
;
3353 for (i
= 0; i
< n_iv_uses (data
); i
++)
3355 struct iv_use
*use
= iv_use (data
, i
);
3357 if (data
->consider_all_candidates
)
3358 size
= n_iv_cands (data
);
3361 s
= bitmap_count_bits (use
->related_cands
);
3363 /* Round up to the power of two, so that moduling by it is fast. */
3364 size
= s
? (1 << ceil_log2 (s
)) : 1;
3367 use
->n_map_members
= size
;
3368 use
->cost_map
= XCNEWVEC (struct cost_pair
, size
);
3372 /* Returns description of computation cost of expression whose runtime
3373 cost is RUNTIME and complexity corresponds to COMPLEXITY. */
3376 new_cost (unsigned runtime
, unsigned complexity
)
3380 cost
.cost
= runtime
;
3381 cost
.complexity
= complexity
;
3386 /* Returns true if COST is infinite. */
3389 infinite_cost_p (comp_cost cost
)
3391 return cost
.cost
== INFTY
;
3394 /* Adds costs COST1 and COST2. */
3397 add_costs (comp_cost cost1
, comp_cost cost2
)
3399 if (infinite_cost_p (cost1
) || infinite_cost_p (cost2
))
3400 return infinite_cost
;
3402 cost1
.cost
+= cost2
.cost
;
3403 cost1
.complexity
+= cost2
.complexity
;
3407 /* Subtracts costs COST1 and COST2. */
3410 sub_costs (comp_cost cost1
, comp_cost cost2
)
3412 cost1
.cost
-= cost2
.cost
;
3413 cost1
.complexity
-= cost2
.complexity
;
3418 /* Returns a negative number if COST1 < COST2, a positive number if
3419 COST1 > COST2, and 0 if COST1 = COST2. */
3422 compare_costs (comp_cost cost1
, comp_cost cost2
)
3424 if (cost1
.cost
== cost2
.cost
)
3425 return cost1
.complexity
- cost2
.complexity
;
3427 return cost1
.cost
- cost2
.cost
;
3430 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
3431 on invariants DEPENDS_ON and that the value used in expressing it
3432 is VALUE, and in case of iv elimination the comparison operator is COMP. */
3435 set_use_iv_cost (struct ivopts_data
*data
,
3436 struct iv_use
*use
, struct iv_cand
*cand
,
3437 comp_cost cost
, bitmap depends_on
, tree value
,
3438 enum tree_code comp
, int inv_expr_id
)
3442 if (infinite_cost_p (cost
))
3444 BITMAP_FREE (depends_on
);
3448 if (data
->consider_all_candidates
)
3450 use
->cost_map
[cand
->id
].cand
= cand
;
3451 use
->cost_map
[cand
->id
].cost
= cost
;
3452 use
->cost_map
[cand
->id
].depends_on
= depends_on
;
3453 use
->cost_map
[cand
->id
].value
= value
;
3454 use
->cost_map
[cand
->id
].comp
= comp
;
3455 use
->cost_map
[cand
->id
].inv_expr_id
= inv_expr_id
;
3459 /* n_map_members is a power of two, so this computes modulo. */
3460 s
= cand
->id
& (use
->n_map_members
- 1);
3461 for (i
= s
; i
< use
->n_map_members
; i
++)
3462 if (!use
->cost_map
[i
].cand
)
3464 for (i
= 0; i
< s
; i
++)
3465 if (!use
->cost_map
[i
].cand
)
3471 use
->cost_map
[i
].cand
= cand
;
3472 use
->cost_map
[i
].cost
= cost
;
3473 use
->cost_map
[i
].depends_on
= depends_on
;
3474 use
->cost_map
[i
].value
= value
;
3475 use
->cost_map
[i
].comp
= comp
;
3476 use
->cost_map
[i
].inv_expr_id
= inv_expr_id
;
3479 /* Gets cost of (USE, CANDIDATE) pair. */
3481 static struct cost_pair
*
3482 get_use_iv_cost (struct ivopts_data
*data
, struct iv_use
*use
,
3483 struct iv_cand
*cand
)
3486 struct cost_pair
*ret
;
3491 if (data
->consider_all_candidates
)
3493 ret
= use
->cost_map
+ cand
->id
;
3500 /* n_map_members is a power of two, so this computes modulo. */
3501 s
= cand
->id
& (use
->n_map_members
- 1);
3502 for (i
= s
; i
< use
->n_map_members
; i
++)
3503 if (use
->cost_map
[i
].cand
== cand
)
3504 return use
->cost_map
+ i
;
3505 else if (use
->cost_map
[i
].cand
== NULL
)
3507 for (i
= 0; i
< s
; i
++)
3508 if (use
->cost_map
[i
].cand
== cand
)
3509 return use
->cost_map
+ i
;
3510 else if (use
->cost_map
[i
].cand
== NULL
)
3516 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
3518 produce_memory_decl_rtl (tree obj
, int *regno
)
3520 addr_space_t as
= TYPE_ADDR_SPACE (TREE_TYPE (obj
));
3521 machine_mode address_mode
= targetm
.addr_space
.address_mode (as
);
3525 if (TREE_STATIC (obj
) || DECL_EXTERNAL (obj
))
3527 const char *name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj
));
3528 x
= gen_rtx_SYMBOL_REF (address_mode
, name
);
3529 SET_SYMBOL_REF_DECL (x
, obj
);
3530 x
= gen_rtx_MEM (DECL_MODE (obj
), x
);
3531 set_mem_addr_space (x
, as
);
3532 targetm
.encode_section_info (obj
, x
, true);
3536 x
= gen_raw_REG (address_mode
, (*regno
)++);
3537 x
= gen_rtx_MEM (DECL_MODE (obj
), x
);
3538 set_mem_addr_space (x
, as
);
3544 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
3545 walk_tree. DATA contains the actual fake register number. */
3548 prepare_decl_rtl (tree
*expr_p
, int *ws
, void *data
)
3550 tree obj
= NULL_TREE
;
3552 int *regno
= (int *) data
;
3554 switch (TREE_CODE (*expr_p
))
3557 for (expr_p
= &TREE_OPERAND (*expr_p
, 0);
3558 handled_component_p (*expr_p
);
3559 expr_p
= &TREE_OPERAND (*expr_p
, 0))
3562 if (DECL_P (obj
) && HAS_RTL_P (obj
) && !DECL_RTL_SET_P (obj
))
3563 x
= produce_memory_decl_rtl (obj
, regno
);
3568 obj
= SSA_NAME_VAR (*expr_p
);
3569 /* Defer handling of anonymous SSA_NAMEs to the expander. */
3572 if (!DECL_RTL_SET_P (obj
))
3573 x
= gen_raw_REG (DECL_MODE (obj
), (*regno
)++);
3582 if (DECL_RTL_SET_P (obj
))
3585 if (DECL_MODE (obj
) == BLKmode
)
3586 x
= produce_memory_decl_rtl (obj
, regno
);
3588 x
= gen_raw_REG (DECL_MODE (obj
), (*regno
)++);
3598 decl_rtl_to_reset
.safe_push (obj
);
3599 SET_DECL_RTL (obj
, x
);
3605 /* Determines cost of the computation of EXPR. */
3608 computation_cost (tree expr
, bool speed
)
3612 tree type
= TREE_TYPE (expr
);
3614 /* Avoid using hard regs in ways which may be unsupported. */
3615 int regno
= LAST_VIRTUAL_REGISTER
+ 1;
3616 struct cgraph_node
*node
= cgraph_node::get (current_function_decl
);
3617 enum node_frequency real_frequency
= node
->frequency
;
3619 node
->frequency
= NODE_FREQUENCY_NORMAL
;
3620 crtl
->maybe_hot_insn_p
= speed
;
3621 walk_tree (&expr
, prepare_decl_rtl
, ®no
, NULL
);
3623 rslt
= expand_expr (expr
, NULL_RTX
, TYPE_MODE (type
), EXPAND_NORMAL
);
3626 default_rtl_profile ();
3627 node
->frequency
= real_frequency
;
3629 cost
= seq_cost (seq
, speed
);
3631 cost
+= address_cost (XEXP (rslt
, 0), TYPE_MODE (type
),
3632 TYPE_ADDR_SPACE (type
), speed
);
3633 else if (!REG_P (rslt
))
3634 cost
+= set_src_cost (rslt
, TYPE_MODE (type
), speed
);
3639 /* Returns variable containing the value of candidate CAND at statement AT. */
3642 var_at_stmt (struct loop
*loop
, struct iv_cand
*cand
, gimple
*stmt
)
3644 if (stmt_after_increment (loop
, cand
, stmt
))
3645 return cand
->var_after
;
3647 return cand
->var_before
;
3650 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
3651 same precision that is at least as wide as the precision of TYPE, stores
3652 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
3656 determine_common_wider_type (tree
*a
, tree
*b
)
3658 tree wider_type
= NULL
;
3660 tree atype
= TREE_TYPE (*a
);
3662 if (CONVERT_EXPR_P (*a
))
3664 suba
= TREE_OPERAND (*a
, 0);
3665 wider_type
= TREE_TYPE (suba
);
3666 if (TYPE_PRECISION (wider_type
) < TYPE_PRECISION (atype
))
3672 if (CONVERT_EXPR_P (*b
))
3674 subb
= TREE_OPERAND (*b
, 0);
3675 if (TYPE_PRECISION (wider_type
) != TYPE_PRECISION (TREE_TYPE (subb
)))
3686 /* Determines the expression by that USE is expressed from induction variable
3687 CAND at statement AT in LOOP. The expression is stored in a decomposed
3688 form into AFF. Returns false if USE cannot be expressed using CAND. */
3691 get_computation_aff (struct loop
*loop
,
3692 struct iv_use
*use
, struct iv_cand
*cand
, gimple
*at
,
3693 struct aff_tree
*aff
)
3695 tree ubase
= use
->iv
->base
;
3696 tree ustep
= use
->iv
->step
;
3697 tree cbase
= cand
->iv
->base
;
3698 tree cstep
= cand
->iv
->step
, cstep_common
;
3699 tree utype
= TREE_TYPE (ubase
), ctype
= TREE_TYPE (cbase
);
3700 tree common_type
, var
;
3702 aff_tree cbase_aff
, var_aff
;
3705 if (TYPE_PRECISION (utype
) > TYPE_PRECISION (ctype
))
3707 /* We do not have a precision to express the values of use. */
3711 var
= var_at_stmt (loop
, cand
, at
);
3712 uutype
= unsigned_type_for (utype
);
3714 /* If the conversion is not noop, perform it. */
3715 if (TYPE_PRECISION (utype
) < TYPE_PRECISION (ctype
))
3717 if (cand
->orig_iv
!= NULL
&& CONVERT_EXPR_P (cbase
)
3718 && (CONVERT_EXPR_P (cstep
) || TREE_CODE (cstep
) == INTEGER_CST
))
3720 tree inner_base
, inner_step
, inner_type
;
3721 inner_base
= TREE_OPERAND (cbase
, 0);
3722 if (CONVERT_EXPR_P (cstep
))
3723 inner_step
= TREE_OPERAND (cstep
, 0);
3727 inner_type
= TREE_TYPE (inner_base
);
3728 /* If candidate is added from a biv whose type is smaller than
3729 ctype, we know both candidate and the biv won't overflow.
3730 In this case, it's safe to skip the convertion in candidate.
3731 As an example, (unsigned short)((unsigned long)A) equals to
3732 (unsigned short)A, if A has a type no larger than short. */
3733 if (TYPE_PRECISION (inner_type
) <= TYPE_PRECISION (uutype
))
3739 cstep
= fold_convert (uutype
, cstep
);
3740 cbase
= fold_convert (uutype
, cbase
);
3741 var
= fold_convert (uutype
, var
);
3744 if (!constant_multiple_of (ustep
, cstep
, &rat
))
3747 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3748 type, we achieve better folding by computing their difference in this
3749 wider type, and cast the result to UUTYPE. We do not need to worry about
3750 overflows, as all the arithmetics will in the end be performed in UUTYPE
3752 common_type
= determine_common_wider_type (&ubase
, &cbase
);
3754 /* use = ubase - ratio * cbase + ratio * var. */
3755 tree_to_aff_combination (ubase
, common_type
, aff
);
3756 tree_to_aff_combination (cbase
, common_type
, &cbase_aff
);
3757 tree_to_aff_combination (var
, uutype
, &var_aff
);
3759 /* We need to shift the value if we are after the increment. */
3760 if (stmt_after_increment (loop
, cand
, at
))
3764 if (common_type
!= uutype
)
3765 cstep_common
= fold_convert (common_type
, cstep
);
3767 cstep_common
= cstep
;
3769 tree_to_aff_combination (cstep_common
, common_type
, &cstep_aff
);
3770 aff_combination_add (&cbase_aff
, &cstep_aff
);
3773 aff_combination_scale (&cbase_aff
, -rat
);
3774 aff_combination_add (aff
, &cbase_aff
);
3775 if (common_type
!= uutype
)
3776 aff_combination_convert (aff
, uutype
);
3778 aff_combination_scale (&var_aff
, rat
);
3779 aff_combination_add (aff
, &var_aff
);
3784 /* Return the type of USE. */
3787 get_use_type (struct iv_use
*use
)
3789 tree base_type
= TREE_TYPE (use
->iv
->base
);
3792 if (use
->type
== USE_ADDRESS
)
3794 /* The base_type may be a void pointer. Create a pointer type based on
3795 the mem_ref instead. */
3796 type
= build_pointer_type (TREE_TYPE (*use
->op_p
));
3797 gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type
))
3798 == TYPE_ADDR_SPACE (TREE_TYPE (base_type
)));
3806 /* Determines the expression by that USE is expressed from induction variable
3807 CAND at statement AT in LOOP. The computation is unshared. */
3810 get_computation_at (struct loop
*loop
,
3811 struct iv_use
*use
, struct iv_cand
*cand
, gimple
*at
)
3814 tree type
= get_use_type (use
);
3816 if (!get_computation_aff (loop
, use
, cand
, at
, &aff
))
3818 unshare_aff_combination (&aff
);
3819 return fold_convert (type
, aff_combination_to_tree (&aff
));
3822 /* Determines the expression by that USE is expressed from induction variable
3823 CAND in LOOP. The computation is unshared. */
3826 get_computation (struct loop
*loop
, struct iv_use
*use
, struct iv_cand
*cand
)
3828 return get_computation_at (loop
, use
, cand
, use
->stmt
);
3831 /* Adjust the cost COST for being in loop setup rather than loop body.
3832 If we're optimizing for space, the loop setup overhead is constant;
3833 if we're optimizing for speed, amortize it over the per-iteration cost. */
3835 adjust_setup_cost (struct ivopts_data
*data
, unsigned cost
)
3839 else if (optimize_loop_for_speed_p (data
->current_loop
))
3840 return cost
/ avg_loop_niter (data
->current_loop
);
3845 /* Returns true if multiplying by RATIO is allowed in an address. Test the
3846 validity for a memory reference accessing memory of mode MODE in
3847 address space AS. */
3851 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio
, machine_mode mode
,
3854 #define MAX_RATIO 128
3855 unsigned int data_index
= (int) as
* MAX_MACHINE_MODE
+ (int) mode
;
3856 static vec
<sbitmap
> valid_mult_list
;
3859 if (data_index
>= valid_mult_list
.length ())
3860 valid_mult_list
.safe_grow_cleared (data_index
+ 1);
3862 valid_mult
= valid_mult_list
[data_index
];
3865 machine_mode address_mode
= targetm
.addr_space
.address_mode (as
);
3866 rtx reg1
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 1);
3867 rtx reg2
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 2);
3871 valid_mult
= sbitmap_alloc (2 * MAX_RATIO
+ 1);
3872 bitmap_clear (valid_mult
);
3873 scaled
= gen_rtx_fmt_ee (MULT
, address_mode
, reg1
, NULL_RTX
);
3874 addr
= gen_rtx_fmt_ee (PLUS
, address_mode
, scaled
, reg2
);
3875 for (i
= -MAX_RATIO
; i
<= MAX_RATIO
; i
++)
3877 XEXP (scaled
, 1) = gen_int_mode (i
, address_mode
);
3878 if (memory_address_addr_space_p (mode
, addr
, as
)
3879 || memory_address_addr_space_p (mode
, scaled
, as
))
3880 bitmap_set_bit (valid_mult
, i
+ MAX_RATIO
);
3883 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3885 fprintf (dump_file
, " allowed multipliers:");
3886 for (i
= -MAX_RATIO
; i
<= MAX_RATIO
; i
++)
3887 if (bitmap_bit_p (valid_mult
, i
+ MAX_RATIO
))
3888 fprintf (dump_file
, " %d", (int) i
);
3889 fprintf (dump_file
, "\n");
3890 fprintf (dump_file
, "\n");
3893 valid_mult_list
[data_index
] = valid_mult
;
3896 if (ratio
> MAX_RATIO
|| ratio
< -MAX_RATIO
)
3899 return bitmap_bit_p (valid_mult
, ratio
+ MAX_RATIO
);
3902 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3903 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3904 variable is omitted. Compute the cost for a memory reference that accesses
3905 a memory location of mode MEM_MODE in address space AS.
3907 MAY_AUTOINC is set to true if the autoincrement (increasing index by
3908 size of MEM_MODE / RATIO) is available. To make this determination, we
3909 look at the size of the increment to be made, which is given in CSTEP.
3910 CSTEP may be zero if the step is unknown.
3911 STMT_AFTER_INC is true iff the statement we're looking at is after the
3912 increment of the original biv.
3914 TODO -- there must be some better way. This all is quite crude. */
3918 AINC_PRE_INC
, /* Pre increment. */
3919 AINC_PRE_DEC
, /* Pre decrement. */
3920 AINC_POST_INC
, /* Post increment. */
3921 AINC_POST_DEC
, /* Post decrement. */
3922 AINC_NONE
/* Also the number of auto increment types. */
3925 struct address_cost_data
3927 HOST_WIDE_INT min_offset
, max_offset
;
3928 unsigned costs
[2][2][2][2];
3929 unsigned ainc_costs
[AINC_NONE
];
3934 get_address_cost (bool symbol_present
, bool var_present
,
3935 unsigned HOST_WIDE_INT offset
, HOST_WIDE_INT ratio
,
3936 HOST_WIDE_INT cstep
, machine_mode mem_mode
,
3937 addr_space_t as
, bool speed
,
3938 bool stmt_after_inc
, bool *may_autoinc
)
3940 machine_mode address_mode
= targetm
.addr_space
.address_mode (as
);
3941 static vec
<address_cost_data
*> address_cost_data_list
;
3942 unsigned int data_index
= (int) as
* MAX_MACHINE_MODE
+ (int) mem_mode
;
3943 address_cost_data
*data
;
3944 static bool has_preinc
[MAX_MACHINE_MODE
], has_postinc
[MAX_MACHINE_MODE
];
3945 static bool has_predec
[MAX_MACHINE_MODE
], has_postdec
[MAX_MACHINE_MODE
];
3946 unsigned cost
, acost
, complexity
;
3947 enum ainc_type autoinc_type
;
3948 bool offset_p
, ratio_p
, autoinc
;
3949 HOST_WIDE_INT s_offset
, autoinc_offset
, msize
;
3950 unsigned HOST_WIDE_INT mask
;
3953 if (data_index
>= address_cost_data_list
.length ())
3954 address_cost_data_list
.safe_grow_cleared (data_index
+ 1);
3956 data
= address_cost_data_list
[data_index
];
3960 HOST_WIDE_INT rat
, off
= 0;
3961 int old_cse_not_expected
, width
;
3962 unsigned sym_p
, var_p
, off_p
, rat_p
, add_c
;
3967 data
= (address_cost_data
*) xcalloc (1, sizeof (*data
));
3969 reg1
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 1);
3971 width
= GET_MODE_BITSIZE (address_mode
) - 1;
3972 if (width
> (HOST_BITS_PER_WIDE_INT
- 1))
3973 width
= HOST_BITS_PER_WIDE_INT
- 1;
3974 addr
= gen_rtx_fmt_ee (PLUS
, address_mode
, reg1
, NULL_RTX
);
3976 for (i
= width
; i
>= 0; i
--)
3978 off
= -((unsigned HOST_WIDE_INT
) 1 << i
);
3979 XEXP (addr
, 1) = gen_int_mode (off
, address_mode
);
3980 if (memory_address_addr_space_p (mem_mode
, addr
, as
))
3983 data
->min_offset
= (i
== -1? 0 : off
);
3985 for (i
= width
; i
>= 0; i
--)
3987 off
= ((unsigned HOST_WIDE_INT
) 1 << i
) - 1;
3988 XEXP (addr
, 1) = gen_int_mode (off
, address_mode
);
3989 if (memory_address_addr_space_p (mem_mode
, addr
, as
))
3991 /* For some strict-alignment targets, the offset must be naturally
3992 aligned. Try an aligned offset if mem_mode is not QImode. */
3993 off
= mem_mode
!= QImode
3994 ? ((unsigned HOST_WIDE_INT
) 1 << i
)
3995 - GET_MODE_SIZE (mem_mode
)
3999 XEXP (addr
, 1) = gen_int_mode (off
, address_mode
);
4000 if (memory_address_addr_space_p (mem_mode
, addr
, as
))
4006 data
->max_offset
= off
;
4008 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4010 fprintf (dump_file
, "get_address_cost:\n");
4011 fprintf (dump_file
, " min offset %s " HOST_WIDE_INT_PRINT_DEC
"\n",
4012 GET_MODE_NAME (mem_mode
),
4014 fprintf (dump_file
, " max offset %s " HOST_WIDE_INT_PRINT_DEC
"\n",
4015 GET_MODE_NAME (mem_mode
),
4020 for (i
= 2; i
<= MAX_RATIO
; i
++)
4021 if (multiplier_allowed_in_address_p (i
, mem_mode
, as
))
4027 /* Compute the cost of various addressing modes. */
4029 reg0
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 1);
4030 reg1
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 2);
4032 if (USE_LOAD_PRE_DECREMENT (mem_mode
)
4033 || USE_STORE_PRE_DECREMENT (mem_mode
))
4035 addr
= gen_rtx_PRE_DEC (address_mode
, reg0
);
4036 has_predec
[mem_mode
]
4037 = memory_address_addr_space_p (mem_mode
, addr
, as
);
4039 if (has_predec
[mem_mode
])
4040 data
->ainc_costs
[AINC_PRE_DEC
]
4041 = address_cost (addr
, mem_mode
, as
, speed
);
4043 if (USE_LOAD_POST_DECREMENT (mem_mode
)
4044 || USE_STORE_POST_DECREMENT (mem_mode
))
4046 addr
= gen_rtx_POST_DEC (address_mode
, reg0
);
4047 has_postdec
[mem_mode
]
4048 = memory_address_addr_space_p (mem_mode
, addr
, as
);
4050 if (has_postdec
[mem_mode
])
4051 data
->ainc_costs
[AINC_POST_DEC
]
4052 = address_cost (addr
, mem_mode
, as
, speed
);
4054 if (USE_LOAD_PRE_INCREMENT (mem_mode
)
4055 || USE_STORE_PRE_DECREMENT (mem_mode
))
4057 addr
= gen_rtx_PRE_INC (address_mode
, reg0
);
4058 has_preinc
[mem_mode
]
4059 = memory_address_addr_space_p (mem_mode
, addr
, as
);
4061 if (has_preinc
[mem_mode
])
4062 data
->ainc_costs
[AINC_PRE_INC
]
4063 = address_cost (addr
, mem_mode
, as
, speed
);
4065 if (USE_LOAD_POST_INCREMENT (mem_mode
)
4066 || USE_STORE_POST_INCREMENT (mem_mode
))
4068 addr
= gen_rtx_POST_INC (address_mode
, reg0
);
4069 has_postinc
[mem_mode
]
4070 = memory_address_addr_space_p (mem_mode
, addr
, as
);
4072 if (has_postinc
[mem_mode
])
4073 data
->ainc_costs
[AINC_POST_INC
]
4074 = address_cost (addr
, mem_mode
, as
, speed
);
4076 for (i
= 0; i
< 16; i
++)
4079 var_p
= (i
>> 1) & 1;
4080 off_p
= (i
>> 2) & 1;
4081 rat_p
= (i
>> 3) & 1;
4085 addr
= gen_rtx_fmt_ee (MULT
, address_mode
, addr
,
4086 gen_int_mode (rat
, address_mode
));
4089 addr
= gen_rtx_fmt_ee (PLUS
, address_mode
, addr
, reg1
);
4093 base
= gen_rtx_SYMBOL_REF (address_mode
, ggc_strdup (""));
4094 /* ??? We can run into trouble with some backends by presenting
4095 it with symbols which haven't been properly passed through
4096 targetm.encode_section_info. By setting the local bit, we
4097 enhance the probability of things working. */
4098 SYMBOL_REF_FLAGS (base
) = SYMBOL_FLAG_LOCAL
;
4101 base
= gen_rtx_fmt_e (CONST
, address_mode
,
4103 (PLUS
, address_mode
, base
,
4104 gen_int_mode (off
, address_mode
)));
4107 base
= gen_int_mode (off
, address_mode
);
4112 addr
= gen_rtx_fmt_ee (PLUS
, address_mode
, addr
, base
);
4115 /* To avoid splitting addressing modes, pretend that no cse will
4117 old_cse_not_expected
= cse_not_expected
;
4118 cse_not_expected
= true;
4119 addr
= memory_address_addr_space (mem_mode
, addr
, as
);
4120 cse_not_expected
= old_cse_not_expected
;
4124 acost
= seq_cost (seq
, speed
);
4125 acost
+= address_cost (addr
, mem_mode
, as
, speed
);
4129 data
->costs
[sym_p
][var_p
][off_p
][rat_p
] = acost
;
4132 /* On some targets, it is quite expensive to load symbol to a register,
4133 which makes addresses that contain symbols look much more expensive.
4134 However, the symbol will have to be loaded in any case before the
4135 loop (and quite likely we have it in register already), so it does not
4136 make much sense to penalize them too heavily. So make some final
4137 tweaks for the SYMBOL_PRESENT modes:
4139 If VAR_PRESENT is false, and the mode obtained by changing symbol to
4140 var is cheaper, use this mode with small penalty.
4141 If VAR_PRESENT is true, try whether the mode with
4142 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
4143 if this is the case, use it. */
4144 add_c
= add_cost (speed
, address_mode
);
4145 for (i
= 0; i
< 8; i
++)
4148 off_p
= (i
>> 1) & 1;
4149 rat_p
= (i
>> 2) & 1;
4151 acost
= data
->costs
[0][1][off_p
][rat_p
] + 1;
4155 if (acost
< data
->costs
[1][var_p
][off_p
][rat_p
])
4156 data
->costs
[1][var_p
][off_p
][rat_p
] = acost
;
4159 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4161 fprintf (dump_file
, "Address costs:\n");
4163 for (i
= 0; i
< 16; i
++)
4166 var_p
= (i
>> 1) & 1;
4167 off_p
= (i
>> 2) & 1;
4168 rat_p
= (i
>> 3) & 1;
4170 fprintf (dump_file
, " ");
4172 fprintf (dump_file
, "sym + ");
4174 fprintf (dump_file
, "var + ");
4176 fprintf (dump_file
, "cst + ");
4178 fprintf (dump_file
, "rat * ");
4180 acost
= data
->costs
[sym_p
][var_p
][off_p
][rat_p
];
4181 fprintf (dump_file
, "index costs %d\n", acost
);
4183 if (has_predec
[mem_mode
] || has_postdec
[mem_mode
]
4184 || has_preinc
[mem_mode
] || has_postinc
[mem_mode
])
4185 fprintf (dump_file
, " May include autoinc/dec\n");
4186 fprintf (dump_file
, "\n");
4189 address_cost_data_list
[data_index
] = data
;
4192 bits
= GET_MODE_BITSIZE (address_mode
);
4193 mask
= ~(~(unsigned HOST_WIDE_INT
) 0 << (bits
- 1) << 1);
4195 if ((offset
>> (bits
- 1) & 1))
4200 autoinc_type
= AINC_NONE
;
4201 msize
= GET_MODE_SIZE (mem_mode
);
4202 autoinc_offset
= offset
;
4204 autoinc_offset
+= ratio
* cstep
;
4205 if (symbol_present
|| var_present
|| ratio
!= 1)
4209 if (has_postinc
[mem_mode
] && autoinc_offset
== 0
4211 autoinc_type
= AINC_POST_INC
;
4212 else if (has_postdec
[mem_mode
] && autoinc_offset
== 0
4214 autoinc_type
= AINC_POST_DEC
;
4215 else if (has_preinc
[mem_mode
] && autoinc_offset
== msize
4217 autoinc_type
= AINC_PRE_INC
;
4218 else if (has_predec
[mem_mode
] && autoinc_offset
== -msize
4220 autoinc_type
= AINC_PRE_DEC
;
4222 if (autoinc_type
!= AINC_NONE
)
4227 offset_p
= (s_offset
!= 0
4228 && data
->min_offset
<= s_offset
4229 && s_offset
<= data
->max_offset
);
4230 ratio_p
= (ratio
!= 1
4231 && multiplier_allowed_in_address_p (ratio
, mem_mode
, as
));
4233 if (ratio
!= 1 && !ratio_p
)
4234 cost
+= mult_by_coeff_cost (ratio
, address_mode
, speed
);
4236 if (s_offset
&& !offset_p
&& !symbol_present
)
4237 cost
+= add_cost (speed
, address_mode
);
4240 *may_autoinc
= autoinc
;
4242 acost
= data
->ainc_costs
[autoinc_type
];
4244 acost
= data
->costs
[symbol_present
][var_present
][offset_p
][ratio_p
];
4245 complexity
= (symbol_present
!= 0) + (var_present
!= 0) + offset_p
+ ratio_p
;
4246 return new_cost (cost
+ acost
, complexity
);
4249 /* Calculate the SPEED or size cost of shiftadd EXPR in MODE. MULT is the
4250 EXPR operand holding the shift. COST0 and COST1 are the costs for
4251 calculating the operands of EXPR. Returns true if successful, and returns
4252 the cost in COST. */
4255 get_shiftadd_cost (tree expr
, machine_mode mode
, comp_cost cost0
,
4256 comp_cost cost1
, tree mult
, bool speed
, comp_cost
*cost
)
4259 tree op1
= TREE_OPERAND (expr
, 1);
4260 tree cst
= TREE_OPERAND (mult
, 1);
4261 tree multop
= TREE_OPERAND (mult
, 0);
4262 int m
= exact_log2 (int_cst_value (cst
));
4263 int maxm
= MIN (BITS_PER_WORD
, GET_MODE_BITSIZE (mode
));
4264 int as_cost
, sa_cost
;
4267 if (!(m
>= 0 && m
< maxm
))
4271 mult_in_op1
= operand_equal_p (op1
, mult
, 0);
4273 as_cost
= add_cost (speed
, mode
) + shift_cost (speed
, mode
, m
);
4275 /* If the target has a cheap shift-and-add or shift-and-sub instruction,
4276 use that in preference to a shift insn followed by an add insn. */
4277 sa_cost
= (TREE_CODE (expr
) != MINUS_EXPR
4278 ? shiftadd_cost (speed
, mode
, m
)
4280 ? shiftsub1_cost (speed
, mode
, m
)
4281 : shiftsub0_cost (speed
, mode
, m
)));
4283 res
= new_cost (MIN (as_cost
, sa_cost
), 0);
4284 res
= add_costs (res
, mult_in_op1
? cost0
: cost1
);
4286 STRIP_NOPS (multop
);
4287 if (!is_gimple_val (multop
))
4288 res
= add_costs (res
, force_expr_to_var_cost (multop
, speed
));
4294 /* Estimates cost of forcing expression EXPR into a variable. */
4297 force_expr_to_var_cost (tree expr
, bool speed
)
4299 static bool costs_initialized
= false;
4300 static unsigned integer_cost
[2];
4301 static unsigned symbol_cost
[2];
4302 static unsigned address_cost
[2];
4304 comp_cost cost0
, cost1
, cost
;
4307 if (!costs_initialized
)
4309 tree type
= build_pointer_type (integer_type_node
);
4314 var
= create_tmp_var_raw (integer_type_node
, "test_var");
4315 TREE_STATIC (var
) = 1;
4316 x
= produce_memory_decl_rtl (var
, NULL
);
4317 SET_DECL_RTL (var
, x
);
4319 addr
= build1 (ADDR_EXPR
, type
, var
);
4322 for (i
= 0; i
< 2; i
++)
4324 integer_cost
[i
] = computation_cost (build_int_cst (integer_type_node
,
4327 symbol_cost
[i
] = computation_cost (addr
, i
) + 1;
4330 = computation_cost (fold_build_pointer_plus_hwi (addr
, 2000), i
) + 1;
4331 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4333 fprintf (dump_file
, "force_expr_to_var_cost %s costs:\n", i
? "speed" : "size");
4334 fprintf (dump_file
, " integer %d\n", (int) integer_cost
[i
]);
4335 fprintf (dump_file
, " symbol %d\n", (int) symbol_cost
[i
]);
4336 fprintf (dump_file
, " address %d\n", (int) address_cost
[i
]);
4337 fprintf (dump_file
, " other %d\n", (int) target_spill_cost
[i
]);
4338 fprintf (dump_file
, "\n");
4342 costs_initialized
= true;
4347 if (SSA_VAR_P (expr
))
4350 if (is_gimple_min_invariant (expr
))
4352 if (TREE_CODE (expr
) == INTEGER_CST
)
4353 return new_cost (integer_cost
[speed
], 0);
4355 if (TREE_CODE (expr
) == ADDR_EXPR
)
4357 tree obj
= TREE_OPERAND (expr
, 0);
4359 if (TREE_CODE (obj
) == VAR_DECL
4360 || TREE_CODE (obj
) == PARM_DECL
4361 || TREE_CODE (obj
) == RESULT_DECL
)
4362 return new_cost (symbol_cost
[speed
], 0);
4365 return new_cost (address_cost
[speed
], 0);
4368 switch (TREE_CODE (expr
))
4370 case POINTER_PLUS_EXPR
:
4374 op0
= TREE_OPERAND (expr
, 0);
4375 op1
= TREE_OPERAND (expr
, 1);
4382 op0
= TREE_OPERAND (expr
, 0);
4388 /* Just an arbitrary value, FIXME. */
4389 return new_cost (target_spill_cost
[speed
], 0);
4392 if (op0
== NULL_TREE
4393 || TREE_CODE (op0
) == SSA_NAME
|| CONSTANT_CLASS_P (op0
))
4396 cost0
= force_expr_to_var_cost (op0
, speed
);
4398 if (op1
== NULL_TREE
4399 || TREE_CODE (op1
) == SSA_NAME
|| CONSTANT_CLASS_P (op1
))
4402 cost1
= force_expr_to_var_cost (op1
, speed
);
4404 mode
= TYPE_MODE (TREE_TYPE (expr
));
4405 switch (TREE_CODE (expr
))
4407 case POINTER_PLUS_EXPR
:
4411 cost
= new_cost (add_cost (speed
, mode
), 0);
4412 if (TREE_CODE (expr
) != NEGATE_EXPR
)
4414 tree mult
= NULL_TREE
;
4416 if (TREE_CODE (op1
) == MULT_EXPR
)
4418 else if (TREE_CODE (op0
) == MULT_EXPR
)
4421 if (mult
!= NULL_TREE
4422 && cst_and_fits_in_hwi (TREE_OPERAND (mult
, 1))
4423 && get_shiftadd_cost (expr
, mode
, cost0
, cost1
, mult
,
4431 tree inner_mode
, outer_mode
;
4432 outer_mode
= TREE_TYPE (expr
);
4433 inner_mode
= TREE_TYPE (op0
);
4434 cost
= new_cost (convert_cost (TYPE_MODE (outer_mode
),
4435 TYPE_MODE (inner_mode
), speed
), 0);
4440 if (cst_and_fits_in_hwi (op0
))
4441 cost
= new_cost (mult_by_coeff_cost (int_cst_value (op0
),
4443 else if (cst_and_fits_in_hwi (op1
))
4444 cost
= new_cost (mult_by_coeff_cost (int_cst_value (op1
),
4447 return new_cost (target_spill_cost
[speed
], 0);
4454 cost
= add_costs (cost
, cost0
);
4455 cost
= add_costs (cost
, cost1
);
4457 /* Bound the cost by target_spill_cost. The parts of complicated
4458 computations often are either loop invariant or at least can
4459 be shared between several iv uses, so letting this grow without
4460 limits would not give reasonable results. */
4461 if (cost
.cost
> (int) target_spill_cost
[speed
])
4462 cost
.cost
= target_spill_cost
[speed
];
4467 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
4468 invariants the computation depends on. */
4471 force_var_cost (struct ivopts_data
*data
,
4472 tree expr
, bitmap
*depends_on
)
4476 fd_ivopts_data
= data
;
4477 walk_tree (&expr
, find_depends
, depends_on
, NULL
);
4480 return force_expr_to_var_cost (expr
, data
->speed
);
4483 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
4484 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
4485 to false if the corresponding part is missing. DEPENDS_ON is a set of the
4486 invariants the computation depends on. */
4489 split_address_cost (struct ivopts_data
*data
,
4490 tree addr
, bool *symbol_present
, bool *var_present
,
4491 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
4494 HOST_WIDE_INT bitsize
;
4495 HOST_WIDE_INT bitpos
;
4498 int unsignedp
, reversep
, volatilep
;
4500 core
= get_inner_reference (addr
, &bitsize
, &bitpos
, &toffset
, &mode
,
4501 &unsignedp
, &reversep
, &volatilep
, false);
4504 || bitpos
% BITS_PER_UNIT
!= 0
4506 || TREE_CODE (core
) != VAR_DECL
)
4508 *symbol_present
= false;
4509 *var_present
= true;
4510 fd_ivopts_data
= data
;
4512 walk_tree (&addr
, find_depends
, depends_on
, NULL
);
4514 return new_cost (target_spill_cost
[data
->speed
], 0);
4517 *offset
+= bitpos
/ BITS_PER_UNIT
;
4518 if (TREE_STATIC (core
)
4519 || DECL_EXTERNAL (core
))
4521 *symbol_present
= true;
4522 *var_present
= false;
4526 *symbol_present
= false;
4527 *var_present
= true;
4531 /* Estimates cost of expressing difference of addresses E1 - E2 as
4532 var + symbol + offset. The value of offset is added to OFFSET,
4533 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
4534 part is missing. DEPENDS_ON is a set of the invariants the computation
4538 ptr_difference_cost (struct ivopts_data
*data
,
4539 tree e1
, tree e2
, bool *symbol_present
, bool *var_present
,
4540 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
4542 HOST_WIDE_INT diff
= 0;
4543 aff_tree aff_e1
, aff_e2
;
4546 gcc_assert (TREE_CODE (e1
) == ADDR_EXPR
);
4548 if (ptr_difference_const (e1
, e2
, &diff
))
4551 *symbol_present
= false;
4552 *var_present
= false;
4556 if (integer_zerop (e2
))
4557 return split_address_cost (data
, TREE_OPERAND (e1
, 0),
4558 symbol_present
, var_present
, offset
, depends_on
);
4560 *symbol_present
= false;
4561 *var_present
= true;
4563 type
= signed_type_for (TREE_TYPE (e1
));
4564 tree_to_aff_combination (e1
, type
, &aff_e1
);
4565 tree_to_aff_combination (e2
, type
, &aff_e2
);
4566 aff_combination_scale (&aff_e2
, -1);
4567 aff_combination_add (&aff_e1
, &aff_e2
);
4569 return force_var_cost (data
, aff_combination_to_tree (&aff_e1
), depends_on
);
4572 /* Estimates cost of expressing difference E1 - E2 as
4573 var + symbol + offset. The value of offset is added to OFFSET,
4574 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
4575 part is missing. DEPENDS_ON is a set of the invariants the computation
4579 difference_cost (struct ivopts_data
*data
,
4580 tree e1
, tree e2
, bool *symbol_present
, bool *var_present
,
4581 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
4583 machine_mode mode
= TYPE_MODE (TREE_TYPE (e1
));
4584 unsigned HOST_WIDE_INT off1
, off2
;
4585 aff_tree aff_e1
, aff_e2
;
4588 e1
= strip_offset (e1
, &off1
);
4589 e2
= strip_offset (e2
, &off2
);
4590 *offset
+= off1
- off2
;
4595 if (TREE_CODE (e1
) == ADDR_EXPR
)
4596 return ptr_difference_cost (data
, e1
, e2
, symbol_present
, var_present
,
4597 offset
, depends_on
);
4598 *symbol_present
= false;
4600 if (operand_equal_p (e1
, e2
, 0))
4602 *var_present
= false;
4606 *var_present
= true;
4608 if (integer_zerop (e2
))
4609 return force_var_cost (data
, e1
, depends_on
);
4611 if (integer_zerop (e1
))
4613 comp_cost cost
= force_var_cost (data
, e2
, depends_on
);
4614 cost
.cost
+= mult_by_coeff_cost (-1, mode
, data
->speed
);
4618 type
= signed_type_for (TREE_TYPE (e1
));
4619 tree_to_aff_combination (e1
, type
, &aff_e1
);
4620 tree_to_aff_combination (e2
, type
, &aff_e2
);
4621 aff_combination_scale (&aff_e2
, -1);
4622 aff_combination_add (&aff_e1
, &aff_e2
);
4624 return force_var_cost (data
, aff_combination_to_tree (&aff_e1
), depends_on
);
4627 /* Returns true if AFF1 and AFF2 are identical. */
4630 compare_aff_trees (aff_tree
*aff1
, aff_tree
*aff2
)
4634 if (aff1
->n
!= aff2
->n
)
4637 for (i
= 0; i
< aff1
->n
; i
++)
4639 if (aff1
->elts
[i
].coef
!= aff2
->elts
[i
].coef
)
4642 if (!operand_equal_p (aff1
->elts
[i
].val
, aff2
->elts
[i
].val
, 0))
4648 /* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id. */
4651 get_expr_id (struct ivopts_data
*data
, tree expr
)
4653 struct iv_inv_expr_ent ent
;
4654 struct iv_inv_expr_ent
**slot
;
4657 ent
.hash
= iterative_hash_expr (expr
, 0);
4658 slot
= data
->inv_expr_tab
->find_slot (&ent
, INSERT
);
4662 *slot
= XNEW (struct iv_inv_expr_ent
);
4663 (*slot
)->expr
= expr
;
4664 (*slot
)->hash
= ent
.hash
;
4665 (*slot
)->id
= data
->inv_expr_id
++;
4669 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
4670 requires a new compiler generated temporary. Returns -1 otherwise.
4671 ADDRESS_P is a flag indicating if the expression is for address
4675 get_loop_invariant_expr_id (struct ivopts_data
*data
, tree ubase
,
4676 tree cbase
, HOST_WIDE_INT ratio
,
4679 aff_tree ubase_aff
, cbase_aff
;
4687 if ((TREE_CODE (ubase
) == INTEGER_CST
)
4688 && (TREE_CODE (cbase
) == INTEGER_CST
))
4691 /* Strips the constant part. */
4692 if (TREE_CODE (ubase
) == PLUS_EXPR
4693 || TREE_CODE (ubase
) == MINUS_EXPR
4694 || TREE_CODE (ubase
) == POINTER_PLUS_EXPR
)
4696 if (TREE_CODE (TREE_OPERAND (ubase
, 1)) == INTEGER_CST
)
4697 ubase
= TREE_OPERAND (ubase
, 0);
4700 /* Strips the constant part. */
4701 if (TREE_CODE (cbase
) == PLUS_EXPR
4702 || TREE_CODE (cbase
) == MINUS_EXPR
4703 || TREE_CODE (cbase
) == POINTER_PLUS_EXPR
)
4705 if (TREE_CODE (TREE_OPERAND (cbase
, 1)) == INTEGER_CST
)
4706 cbase
= TREE_OPERAND (cbase
, 0);
4711 if (((TREE_CODE (ubase
) == SSA_NAME
)
4712 || (TREE_CODE (ubase
) == ADDR_EXPR
4713 && is_gimple_min_invariant (ubase
)))
4714 && (TREE_CODE (cbase
) == INTEGER_CST
))
4717 if (((TREE_CODE (cbase
) == SSA_NAME
)
4718 || (TREE_CODE (cbase
) == ADDR_EXPR
4719 && is_gimple_min_invariant (cbase
)))
4720 && (TREE_CODE (ubase
) == INTEGER_CST
))
4726 if (operand_equal_p (ubase
, cbase
, 0))
4729 if (TREE_CODE (ubase
) == ADDR_EXPR
4730 && TREE_CODE (cbase
) == ADDR_EXPR
)
4734 usym
= TREE_OPERAND (ubase
, 0);
4735 csym
= TREE_OPERAND (cbase
, 0);
4736 if (TREE_CODE (usym
) == ARRAY_REF
)
4738 tree ind
= TREE_OPERAND (usym
, 1);
4739 if (TREE_CODE (ind
) == INTEGER_CST
4740 && tree_fits_shwi_p (ind
)
4741 && tree_to_shwi (ind
) == 0)
4742 usym
= TREE_OPERAND (usym
, 0);
4744 if (TREE_CODE (csym
) == ARRAY_REF
)
4746 tree ind
= TREE_OPERAND (csym
, 1);
4747 if (TREE_CODE (ind
) == INTEGER_CST
4748 && tree_fits_shwi_p (ind
)
4749 && tree_to_shwi (ind
) == 0)
4750 csym
= TREE_OPERAND (csym
, 0);
4752 if (operand_equal_p (usym
, csym
, 0))
4755 /* Now do more complex comparison */
4756 tree_to_aff_combination (ubase
, TREE_TYPE (ubase
), &ubase_aff
);
4757 tree_to_aff_combination (cbase
, TREE_TYPE (cbase
), &cbase_aff
);
4758 if (compare_aff_trees (&ubase_aff
, &cbase_aff
))
4762 tree_to_aff_combination (ub
, TREE_TYPE (ub
), &ubase_aff
);
4763 tree_to_aff_combination (cb
, TREE_TYPE (cb
), &cbase_aff
);
4765 aff_combination_scale (&cbase_aff
, -1 * ratio
);
4766 aff_combination_add (&ubase_aff
, &cbase_aff
);
4767 expr
= aff_combination_to_tree (&ubase_aff
);
4768 return get_expr_id (data
, expr
);
4773 /* Determines the cost of the computation by that USE is expressed
4774 from induction variable CAND. If ADDRESS_P is true, we just need
4775 to create an address from it, otherwise we want to get it into
4776 register. A set of invariants we depend on is stored in
4777 DEPENDS_ON. AT is the statement at that the value is computed.
4778 If CAN_AUTOINC is nonnull, use it to record whether autoinc
4779 addressing is likely. */
4782 get_computation_cost_at (struct ivopts_data
*data
,
4783 struct iv_use
*use
, struct iv_cand
*cand
,
4784 bool address_p
, bitmap
*depends_on
, gimple
*at
,
4788 tree ubase
= use
->iv
->base
, ustep
= use
->iv
->step
;
4790 tree utype
= TREE_TYPE (ubase
), ctype
;
4791 unsigned HOST_WIDE_INT cstepi
, offset
= 0;
4792 HOST_WIDE_INT ratio
, aratio
;
4793 bool var_present
, symbol_present
, stmt_is_after_inc
;
4796 bool speed
= optimize_bb_for_speed_p (gimple_bb (at
));
4797 machine_mode mem_mode
= (address_p
4798 ? TYPE_MODE (TREE_TYPE (*use
->op_p
))
4804 /* Only consider real candidates. */
4806 return infinite_cost
;
4808 cbase
= cand
->iv
->base
;
4809 cstep
= cand
->iv
->step
;
4810 ctype
= TREE_TYPE (cbase
);
4812 if (TYPE_PRECISION (utype
) > TYPE_PRECISION (ctype
))
4814 /* We do not have a precision to express the values of use. */
4815 return infinite_cost
;
4819 || (use
->iv
->base_object
4820 && cand
->iv
->base_object
4821 && POINTER_TYPE_P (TREE_TYPE (use
->iv
->base_object
))
4822 && POINTER_TYPE_P (TREE_TYPE (cand
->iv
->base_object
))))
4824 /* Do not try to express address of an object with computation based
4825 on address of a different object. This may cause problems in rtl
4826 level alias analysis (that does not expect this to be happening,
4827 as this is illegal in C), and would be unlikely to be useful
4829 if (use
->iv
->base_object
4830 && cand
->iv
->base_object
4831 && !operand_equal_p (use
->iv
->base_object
, cand
->iv
->base_object
, 0))
4832 return infinite_cost
;
4835 if (TYPE_PRECISION (utype
) < TYPE_PRECISION (ctype
))
4837 /* TODO -- add direct handling of this case. */
4841 /* CSTEPI is removed from the offset in case statement is after the
4842 increment. If the step is not constant, we use zero instead.
4843 This is a bit imprecise (there is the extra addition), but
4844 redundancy elimination is likely to transform the code so that
4845 it uses value of the variable before increment anyway,
4846 so it is not that much unrealistic. */
4847 if (cst_and_fits_in_hwi (cstep
))
4848 cstepi
= int_cst_value (cstep
);
4852 if (!constant_multiple_of (ustep
, cstep
, &rat
))
4853 return infinite_cost
;
4855 if (wi::fits_shwi_p (rat
))
4856 ratio
= rat
.to_shwi ();
4858 return infinite_cost
;
4861 ctype
= TREE_TYPE (cbase
);
4863 stmt_is_after_inc
= stmt_after_increment (data
->current_loop
, cand
, at
);
4865 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
4866 or ratio == 1, it is better to handle this like
4868 ubase - ratio * cbase + ratio * var
4870 (also holds in the case ratio == -1, TODO. */
4872 if (cst_and_fits_in_hwi (cbase
))
4874 offset
= - ratio
* (unsigned HOST_WIDE_INT
) int_cst_value (cbase
);
4875 cost
= difference_cost (data
,
4876 ubase
, build_int_cst (utype
, 0),
4877 &symbol_present
, &var_present
, &offset
,
4879 cost
.cost
/= avg_loop_niter (data
->current_loop
);
4881 else if (ratio
== 1)
4883 tree real_cbase
= cbase
;
4885 /* Check to see if any adjustment is needed. */
4886 if (cstepi
== 0 && stmt_is_after_inc
)
4888 aff_tree real_cbase_aff
;
4891 tree_to_aff_combination (cbase
, TREE_TYPE (real_cbase
),
4893 tree_to_aff_combination (cstep
, TREE_TYPE (cstep
), &cstep_aff
);
4895 aff_combination_add (&real_cbase_aff
, &cstep_aff
);
4896 real_cbase
= aff_combination_to_tree (&real_cbase_aff
);
4899 cost
= difference_cost (data
,
4901 &symbol_present
, &var_present
, &offset
,
4903 cost
.cost
/= avg_loop_niter (data
->current_loop
);
4906 && !POINTER_TYPE_P (ctype
)
4907 && multiplier_allowed_in_address_p
4909 TYPE_ADDR_SPACE (TREE_TYPE (utype
))))
4911 if (cstepi
== 0 && stmt_is_after_inc
)
4913 if (POINTER_TYPE_P (ctype
))
4914 cbase
= fold_build2 (POINTER_PLUS_EXPR
, ctype
, cbase
, cstep
);
4916 cbase
= fold_build2 (PLUS_EXPR
, ctype
, cbase
, cstep
);
4919 = fold_build2 (MULT_EXPR
, ctype
, cbase
, build_int_cst (ctype
, ratio
));
4920 cost
= difference_cost (data
,
4922 &symbol_present
, &var_present
, &offset
,
4924 cost
.cost
/= avg_loop_niter (data
->current_loop
);
4928 cost
= force_var_cost (data
, cbase
, depends_on
);
4929 cost
= add_costs (cost
,
4930 difference_cost (data
,
4931 ubase
, build_int_cst (utype
, 0),
4932 &symbol_present
, &var_present
,
4933 &offset
, depends_on
));
4934 cost
.cost
/= avg_loop_niter (data
->current_loop
);
4935 cost
.cost
+= add_cost (data
->speed
, TYPE_MODE (ctype
));
4938 /* Set of invariants depended on by sub use has already been computed
4939 for the first use in the group. */
4943 if (depends_on
&& *depends_on
)
4944 bitmap_clear (*depends_on
);
4946 else if (inv_expr_id
)
4949 get_loop_invariant_expr_id (data
, ubase
, cbase
, ratio
, address_p
);
4950 /* Clear depends on. */
4951 if (*inv_expr_id
!= -1 && depends_on
&& *depends_on
)
4952 bitmap_clear (*depends_on
);
4955 /* If we are after the increment, the value of the candidate is higher by
4957 if (stmt_is_after_inc
)
4958 offset
-= ratio
* cstepi
;
4960 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4961 (symbol/var1/const parts may be omitted). If we are looking for an
4962 address, find the cost of addressing this. */
4964 return add_costs (cost
,
4965 get_address_cost (symbol_present
, var_present
,
4966 offset
, ratio
, cstepi
,
4968 TYPE_ADDR_SPACE (TREE_TYPE (utype
)),
4969 speed
, stmt_is_after_inc
,
4972 /* Otherwise estimate the costs for computing the expression. */
4973 if (!symbol_present
&& !var_present
&& !offset
)
4976 cost
.cost
+= mult_by_coeff_cost (ratio
, TYPE_MODE (ctype
), speed
);
4980 /* Symbol + offset should be compile-time computable so consider that they
4981 are added once to the variable, if present. */
4982 if (var_present
&& (symbol_present
|| offset
))
4983 cost
.cost
+= adjust_setup_cost (data
,
4984 add_cost (speed
, TYPE_MODE (ctype
)));
4986 /* Having offset does not affect runtime cost in case it is added to
4987 symbol, but it increases complexity. */
4991 cost
.cost
+= add_cost (speed
, TYPE_MODE (ctype
));
4993 aratio
= ratio
> 0 ? ratio
: -ratio
;
4995 cost
.cost
+= mult_by_coeff_cost (aratio
, TYPE_MODE (ctype
), speed
);
5000 *can_autoinc
= false;
5003 /* Just get the expression, expand it and measure the cost. */
5004 tree comp
= get_computation_at (data
->current_loop
, use
, cand
, at
);
5007 return infinite_cost
;
5010 comp
= build_simple_mem_ref (comp
);
5012 return new_cost (computation_cost (comp
, speed
), 0);
5016 /* Determines the cost of the computation by that USE is expressed
5017 from induction variable CAND. If ADDRESS_P is true, we just need
5018 to create an address from it, otherwise we want to get it into
5019 register. A set of invariants we depend on is stored in
5020 DEPENDS_ON. If CAN_AUTOINC is nonnull, use it to record whether
5021 autoinc addressing is likely. */
5024 get_computation_cost (struct ivopts_data
*data
,
5025 struct iv_use
*use
, struct iv_cand
*cand
,
5026 bool address_p
, bitmap
*depends_on
,
5027 bool *can_autoinc
, int *inv_expr_id
)
5029 return get_computation_cost_at (data
,
5030 use
, cand
, address_p
, depends_on
, use
->stmt
,
5031 can_autoinc
, inv_expr_id
);
5034 /* Determines cost of basing replacement of USE on CAND in a generic
5038 determine_use_iv_cost_generic (struct ivopts_data
*data
,
5039 struct iv_use
*use
, struct iv_cand
*cand
)
5043 int inv_expr_id
= -1;
5045 /* The simple case first -- if we need to express value of the preserved
5046 original biv, the cost is 0. This also prevents us from counting the
5047 cost of increment twice -- once at this use and once in the cost of
5049 if (cand
->pos
== IP_ORIGINAL
5050 && cand
->incremented_at
== use
->stmt
)
5052 set_use_iv_cost (data
, use
, cand
, no_cost
, NULL
, NULL_TREE
,
5057 cost
= get_computation_cost (data
, use
, cand
, false, &depends_on
,
5058 NULL
, &inv_expr_id
);
5060 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, NULL_TREE
, ERROR_MARK
,
5063 return !infinite_cost_p (cost
);
5066 /* Determines cost of basing replacement of USE on CAND in an address. */
5069 determine_use_iv_cost_address (struct ivopts_data
*data
,
5070 struct iv_use
*use
, struct iv_cand
*cand
)
5074 int inv_expr_id
= -1;
5075 struct iv_use
*sub_use
;
5077 comp_cost cost
= get_computation_cost (data
, use
, cand
, true, &depends_on
,
5078 &can_autoinc
, &inv_expr_id
);
5080 if (cand
->ainc_use
== use
)
5083 cost
.cost
-= cand
->cost_step
;
5084 /* If we generated the candidate solely for exploiting autoincrement
5085 opportunities, and it turns out it can't be used, set the cost to
5086 infinity to make sure we ignore it. */
5087 else if (cand
->pos
== IP_AFTER_USE
|| cand
->pos
== IP_BEFORE_USE
)
5088 cost
= infinite_cost
;
5090 for (sub_use
= use
->next
;
5091 sub_use
&& !infinite_cost_p (cost
);
5092 sub_use
= sub_use
->next
)
5094 sub_cost
= get_computation_cost (data
, sub_use
, cand
, true, NULL
,
5095 &can_autoinc
, NULL
);
5096 cost
= add_costs (cost
, sub_cost
);
5099 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, NULL_TREE
, ERROR_MARK
,
5102 return !infinite_cost_p (cost
);
5105 /* Computes value of candidate CAND at position AT in iteration NITER, and
5106 stores it to VAL. */
5109 cand_value_at (struct loop
*loop
, struct iv_cand
*cand
, gimple
*at
, tree niter
,
5112 aff_tree step
, delta
, nit
;
5113 struct iv
*iv
= cand
->iv
;
5114 tree type
= TREE_TYPE (iv
->base
);
5115 tree steptype
= type
;
5116 if (POINTER_TYPE_P (type
))
5117 steptype
= sizetype
;
5118 steptype
= unsigned_type_for (type
);
5120 tree_to_aff_combination (iv
->step
, TREE_TYPE (iv
->step
), &step
);
5121 aff_combination_convert (&step
, steptype
);
5122 tree_to_aff_combination (niter
, TREE_TYPE (niter
), &nit
);
5123 aff_combination_convert (&nit
, steptype
);
5124 aff_combination_mult (&nit
, &step
, &delta
);
5125 if (stmt_after_increment (loop
, cand
, at
))
5126 aff_combination_add (&delta
, &step
);
5128 tree_to_aff_combination (iv
->base
, type
, val
);
5129 if (!POINTER_TYPE_P (type
))
5130 aff_combination_convert (val
, steptype
);
5131 aff_combination_add (val
, &delta
);
5134 /* Returns period of induction variable iv. */
5137 iv_period (struct iv
*iv
)
5139 tree step
= iv
->step
, period
, type
;
5142 gcc_assert (step
&& TREE_CODE (step
) == INTEGER_CST
);
5144 type
= unsigned_type_for (TREE_TYPE (step
));
5145 /* Period of the iv is lcm (step, type_range)/step -1,
5146 i.e., N*type_range/step - 1. Since type range is power
5147 of two, N == (step >> num_of_ending_zeros_binary (step),
5148 so the final result is
5150 (type_range >> num_of_ending_zeros_binary (step)) - 1
5153 pow2div
= num_ending_zeros (step
);
5155 period
= build_low_bits_mask (type
,
5156 (TYPE_PRECISION (type
)
5157 - tree_to_uhwi (pow2div
)));
5162 /* Returns the comparison operator used when eliminating the iv USE. */
5164 static enum tree_code
5165 iv_elimination_compare (struct ivopts_data
*data
, struct iv_use
*use
)
5167 struct loop
*loop
= data
->current_loop
;
5171 ex_bb
= gimple_bb (use
->stmt
);
5172 exit
= EDGE_SUCC (ex_bb
, 0);
5173 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
5174 exit
= EDGE_SUCC (ex_bb
, 1);
5176 return (exit
->flags
& EDGE_TRUE_VALUE
? EQ_EXPR
: NE_EXPR
);
5179 /* Returns true if we can prove that BASE - OFFSET does not overflow. For now,
5180 we only detect the situation that BASE = SOMETHING + OFFSET, where the
5181 calculation is performed in non-wrapping type.
5183 TODO: More generally, we could test for the situation that
5184 BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
5185 This would require knowing the sign of OFFSET. */
5188 difference_cannot_overflow_p (struct ivopts_data
*data
, tree base
, tree offset
)
5190 enum tree_code code
;
5192 aff_tree aff_e1
, aff_e2
, aff_offset
;
5194 if (!nowrap_type_p (TREE_TYPE (base
)))
5197 base
= expand_simple_operations (base
);
5199 if (TREE_CODE (base
) == SSA_NAME
)
5201 gimple
*stmt
= SSA_NAME_DEF_STMT (base
);
5203 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
5206 code
= gimple_assign_rhs_code (stmt
);
5207 if (get_gimple_rhs_class (code
) != GIMPLE_BINARY_RHS
)
5210 e1
= gimple_assign_rhs1 (stmt
);
5211 e2
= gimple_assign_rhs2 (stmt
);
5215 code
= TREE_CODE (base
);
5216 if (get_gimple_rhs_class (code
) != GIMPLE_BINARY_RHS
)
5218 e1
= TREE_OPERAND (base
, 0);
5219 e2
= TREE_OPERAND (base
, 1);
5222 /* Use affine expansion as deeper inspection to prove the equality. */
5223 tree_to_aff_combination_expand (e2
, TREE_TYPE (e2
),
5224 &aff_e2
, &data
->name_expansion_cache
);
5225 tree_to_aff_combination_expand (offset
, TREE_TYPE (offset
),
5226 &aff_offset
, &data
->name_expansion_cache
);
5227 aff_combination_scale (&aff_offset
, -1);
5231 aff_combination_add (&aff_e2
, &aff_offset
);
5232 if (aff_combination_zero_p (&aff_e2
))
5235 tree_to_aff_combination_expand (e1
, TREE_TYPE (e1
),
5236 &aff_e1
, &data
->name_expansion_cache
);
5237 aff_combination_add (&aff_e1
, &aff_offset
);
5238 return aff_combination_zero_p (&aff_e1
);
5240 case POINTER_PLUS_EXPR
:
5241 aff_combination_add (&aff_e2
, &aff_offset
);
5242 return aff_combination_zero_p (&aff_e2
);
5249 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
5250 comparison with CAND. NITER describes the number of iterations of
5251 the loops. If successful, the comparison in COMP_P is altered accordingly.
5253 We aim to handle the following situation:
5269 Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
5270 We aim to optimize this to
5278 while (p < p_0 - a + b);
5280 This preserves the correctness, since the pointer arithmetics does not
5281 overflow. More precisely:
5283 1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
5284 overflow in computing it or the values of p.
5285 2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
5286 overflow. To prove this, we use the fact that p_0 = base + a. */
5289 iv_elimination_compare_lt (struct ivopts_data
*data
,
5290 struct iv_cand
*cand
, enum tree_code
*comp_p
,
5291 struct tree_niter_desc
*niter
)
5293 tree cand_type
, a
, b
, mbz
, nit_type
= TREE_TYPE (niter
->niter
), offset
;
5294 struct aff_tree nit
, tmpa
, tmpb
;
5295 enum tree_code comp
;
5298 /* We need to know that the candidate induction variable does not overflow.
5299 While more complex analysis may be used to prove this, for now just
5300 check that the variable appears in the original program and that it
5301 is computed in a type that guarantees no overflows. */
5302 cand_type
= TREE_TYPE (cand
->iv
->base
);
5303 if (cand
->pos
!= IP_ORIGINAL
|| !nowrap_type_p (cand_type
))
5306 /* Make sure that the loop iterates till the loop bound is hit, as otherwise
5307 the calculation of the BOUND could overflow, making the comparison
5309 if (!data
->loop_single_exit_p
)
5312 /* We need to be able to decide whether candidate is increasing or decreasing
5313 in order to choose the right comparison operator. */
5314 if (!cst_and_fits_in_hwi (cand
->iv
->step
))
5316 step
= int_cst_value (cand
->iv
->step
);
5318 /* Check that the number of iterations matches the expected pattern:
5319 a + 1 > b ? 0 : b - a - 1. */
5320 mbz
= niter
->may_be_zero
;
5321 if (TREE_CODE (mbz
) == GT_EXPR
)
5323 /* Handle a + 1 > b. */
5324 tree op0
= TREE_OPERAND (mbz
, 0);
5325 if (TREE_CODE (op0
) == PLUS_EXPR
&& integer_onep (TREE_OPERAND (op0
, 1)))
5327 a
= TREE_OPERAND (op0
, 0);
5328 b
= TREE_OPERAND (mbz
, 1);
5333 else if (TREE_CODE (mbz
) == LT_EXPR
)
5335 tree op1
= TREE_OPERAND (mbz
, 1);
5337 /* Handle b < a + 1. */
5338 if (TREE_CODE (op1
) == PLUS_EXPR
&& integer_onep (TREE_OPERAND (op1
, 1)))
5340 a
= TREE_OPERAND (op1
, 0);
5341 b
= TREE_OPERAND (mbz
, 0);
5349 /* Expected number of iterations is B - A - 1. Check that it matches
5350 the actual number, i.e., that B - A - NITER = 1. */
5351 tree_to_aff_combination (niter
->niter
, nit_type
, &nit
);
5352 tree_to_aff_combination (fold_convert (nit_type
, a
), nit_type
, &tmpa
);
5353 tree_to_aff_combination (fold_convert (nit_type
, b
), nit_type
, &tmpb
);
5354 aff_combination_scale (&nit
, -1);
5355 aff_combination_scale (&tmpa
, -1);
5356 aff_combination_add (&tmpb
, &tmpa
);
5357 aff_combination_add (&tmpb
, &nit
);
5358 if (tmpb
.n
!= 0 || tmpb
.offset
!= 1)
5361 /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
5363 offset
= fold_build2 (MULT_EXPR
, TREE_TYPE (cand
->iv
->step
),
5365 fold_convert (TREE_TYPE (cand
->iv
->step
), a
));
5366 if (!difference_cannot_overflow_p (data
, cand
->iv
->base
, offset
))
5369 /* Determine the new comparison operator. */
5370 comp
= step
< 0 ? GT_EXPR
: LT_EXPR
;
5371 if (*comp_p
== NE_EXPR
)
5373 else if (*comp_p
== EQ_EXPR
)
5374 *comp_p
= invert_tree_comparison (comp
, false);
5381 /* Check whether it is possible to express the condition in USE by comparison
5382 of candidate CAND. If so, store the value compared with to BOUND, and the
5383 comparison operator to COMP. */
5386 may_eliminate_iv (struct ivopts_data
*data
,
5387 struct iv_use
*use
, struct iv_cand
*cand
, tree
*bound
,
5388 enum tree_code
*comp
)
5393 struct loop
*loop
= data
->current_loop
;
5395 struct tree_niter_desc
*desc
= NULL
;
5397 if (TREE_CODE (cand
->iv
->step
) != INTEGER_CST
)
5400 /* For now works only for exits that dominate the loop latch.
5401 TODO: extend to other conditions inside loop body. */
5402 ex_bb
= gimple_bb (use
->stmt
);
5403 if (use
->stmt
!= last_stmt (ex_bb
)
5404 || gimple_code (use
->stmt
) != GIMPLE_COND
5405 || !dominated_by_p (CDI_DOMINATORS
, loop
->latch
, ex_bb
))
5408 exit
= EDGE_SUCC (ex_bb
, 0);
5409 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
5410 exit
= EDGE_SUCC (ex_bb
, 1);
5411 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
5414 desc
= niter_for_exit (data
, exit
);
5418 /* Determine whether we can use the variable to test the exit condition.
5419 This is the case iff the period of the induction variable is greater
5420 than the number of iterations for which the exit condition is true. */
5421 period
= iv_period (cand
->iv
);
5423 /* If the number of iterations is constant, compare against it directly. */
5424 if (TREE_CODE (desc
->niter
) == INTEGER_CST
)
5426 /* See cand_value_at. */
5427 if (stmt_after_increment (loop
, cand
, use
->stmt
))
5429 if (!tree_int_cst_lt (desc
->niter
, period
))
5434 if (tree_int_cst_lt (period
, desc
->niter
))
5439 /* If not, and if this is the only possible exit of the loop, see whether
5440 we can get a conservative estimate on the number of iterations of the
5441 entire loop and compare against that instead. */
5444 widest_int period_value
, max_niter
;
5446 max_niter
= desc
->max
;
5447 if (stmt_after_increment (loop
, cand
, use
->stmt
))
5449 period_value
= wi::to_widest (period
);
5450 if (wi::gtu_p (max_niter
, period_value
))
5452 /* See if we can take advantage of inferred loop bound information. */
5453 if (data
->loop_single_exit_p
)
5455 if (!max_loop_iterations (loop
, &max_niter
))
5457 /* The loop bound is already adjusted by adding 1. */
5458 if (wi::gtu_p (max_niter
, period_value
))
5466 cand_value_at (loop
, cand
, use
->stmt
, desc
->niter
, &bnd
);
5468 *bound
= fold_convert (TREE_TYPE (cand
->iv
->base
),
5469 aff_combination_to_tree (&bnd
));
5470 *comp
= iv_elimination_compare (data
, use
);
5472 /* It is unlikely that computing the number of iterations using division
5473 would be more profitable than keeping the original induction variable. */
5474 if (expression_expensive_p (*bound
))
5477 /* Sometimes, it is possible to handle the situation that the number of
5478 iterations may be zero unless additional assumtions by using <
5479 instead of != in the exit condition.
5481 TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
5482 base the exit condition on it. However, that is often too
5484 if (!integer_zerop (desc
->may_be_zero
))
5485 return iv_elimination_compare_lt (data
, cand
, comp
, desc
);
5490 /* Calculates the cost of BOUND, if it is a PARM_DECL. A PARM_DECL must
5491 be copied, if it is used in the loop body and DATA->body_includes_call. */
5494 parm_decl_cost (struct ivopts_data
*data
, tree bound
)
5496 tree sbound
= bound
;
5497 STRIP_NOPS (sbound
);
5499 if (TREE_CODE (sbound
) == SSA_NAME
5500 && SSA_NAME_IS_DEFAULT_DEF (sbound
)
5501 && TREE_CODE (SSA_NAME_VAR (sbound
)) == PARM_DECL
5502 && data
->body_includes_call
)
5503 return COSTS_N_INSNS (1);
5508 /* Determines cost of basing replacement of USE on CAND in a condition. */
5511 determine_use_iv_cost_condition (struct ivopts_data
*data
,
5512 struct iv_use
*use
, struct iv_cand
*cand
)
5514 tree bound
= NULL_TREE
;
5516 bitmap depends_on_elim
= NULL
, depends_on_express
= NULL
, depends_on
;
5517 comp_cost elim_cost
, express_cost
, cost
, bound_cost
;
5519 int elim_inv_expr_id
= -1, express_inv_expr_id
= -1, inv_expr_id
;
5520 tree
*control_var
, *bound_cst
;
5521 enum tree_code comp
= ERROR_MARK
;
5523 /* Only consider real candidates. */
5526 set_use_iv_cost (data
, use
, cand
, infinite_cost
, NULL
, NULL_TREE
,
5531 /* Try iv elimination. */
5532 if (may_eliminate_iv (data
, use
, cand
, &bound
, &comp
))
5534 elim_cost
= force_var_cost (data
, bound
, &depends_on_elim
);
5535 if (elim_cost
.cost
== 0)
5536 elim_cost
.cost
= parm_decl_cost (data
, bound
);
5537 else if (TREE_CODE (bound
) == INTEGER_CST
)
5539 /* If we replace a loop condition 'i < n' with 'p < base + n',
5540 depends_on_elim will have 'base' and 'n' set, which implies
5541 that both 'base' and 'n' will be live during the loop. More likely,
5542 'base + n' will be loop invariant, resulting in only one live value
5543 during the loop. So in that case we clear depends_on_elim and set
5544 elim_inv_expr_id instead. */
5545 if (depends_on_elim
&& bitmap_count_bits (depends_on_elim
) > 1)
5547 elim_inv_expr_id
= get_expr_id (data
, bound
);
5548 bitmap_clear (depends_on_elim
);
5550 /* The bound is a loop invariant, so it will be only computed
5552 elim_cost
.cost
= adjust_setup_cost (data
, elim_cost
.cost
);
5555 elim_cost
= infinite_cost
;
5557 /* Try expressing the original giv. If it is compared with an invariant,
5558 note that we cannot get rid of it. */
5559 ok
= extract_cond_operands (data
, use
->stmt
, &control_var
, &bound_cst
,
5563 /* When the condition is a comparison of the candidate IV against
5564 zero, prefer this IV.
5566 TODO: The constant that we're subtracting from the cost should
5567 be target-dependent. This information should be added to the
5568 target costs for each backend. */
5569 if (!infinite_cost_p (elim_cost
) /* Do not try to decrease infinite! */
5570 && integer_zerop (*bound_cst
)
5571 && (operand_equal_p (*control_var
, cand
->var_after
, 0)
5572 || operand_equal_p (*control_var
, cand
->var_before
, 0)))
5573 elim_cost
.cost
-= 1;
5575 express_cost
= get_computation_cost (data
, use
, cand
, false,
5576 &depends_on_express
, NULL
,
5577 &express_inv_expr_id
);
5578 fd_ivopts_data
= data
;
5579 walk_tree (&cmp_iv
->base
, find_depends
, &depends_on_express
, NULL
);
5581 /* Count the cost of the original bound as well. */
5582 bound_cost
= force_var_cost (data
, *bound_cst
, NULL
);
5583 if (bound_cost
.cost
== 0)
5584 bound_cost
.cost
= parm_decl_cost (data
, *bound_cst
);
5585 else if (TREE_CODE (*bound_cst
) == INTEGER_CST
)
5586 bound_cost
.cost
= 0;
5587 express_cost
.cost
+= bound_cost
.cost
;
5589 /* Choose the better approach, preferring the eliminated IV. */
5590 if (compare_costs (elim_cost
, express_cost
) <= 0)
5593 depends_on
= depends_on_elim
;
5594 depends_on_elim
= NULL
;
5595 inv_expr_id
= elim_inv_expr_id
;
5599 cost
= express_cost
;
5600 depends_on
= depends_on_express
;
5601 depends_on_express
= NULL
;
5604 inv_expr_id
= express_inv_expr_id
;
5607 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, bound
, comp
, inv_expr_id
);
5609 if (depends_on_elim
)
5610 BITMAP_FREE (depends_on_elim
);
5611 if (depends_on_express
)
5612 BITMAP_FREE (depends_on_express
);
5614 return !infinite_cost_p (cost
);
5617 /* Determines cost of basing replacement of USE on CAND. Returns false
5618 if USE cannot be based on CAND. */
5621 determine_use_iv_cost (struct ivopts_data
*data
,
5622 struct iv_use
*use
, struct iv_cand
*cand
)
5626 case USE_NONLINEAR_EXPR
:
5627 return determine_use_iv_cost_generic (data
, use
, cand
);
5630 return determine_use_iv_cost_address (data
, use
, cand
);
5633 return determine_use_iv_cost_condition (data
, use
, cand
);
5640 /* Return true if get_computation_cost indicates that autoincrement is
5641 a possibility for the pair of USE and CAND, false otherwise. */
5644 autoinc_possible_for_pair (struct ivopts_data
*data
, struct iv_use
*use
,
5645 struct iv_cand
*cand
)
5651 if (use
->type
!= USE_ADDRESS
)
5654 cost
= get_computation_cost (data
, use
, cand
, true, &depends_on
,
5655 &can_autoinc
, NULL
);
5657 BITMAP_FREE (depends_on
);
5659 return !infinite_cost_p (cost
) && can_autoinc
;
5662 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
5663 use that allows autoincrement, and set their AINC_USE if possible. */
5666 set_autoinc_for_original_candidates (struct ivopts_data
*data
)
5670 for (i
= 0; i
< n_iv_cands (data
); i
++)
5672 struct iv_cand
*cand
= iv_cand (data
, i
);
5673 struct iv_use
*closest_before
= NULL
;
5674 struct iv_use
*closest_after
= NULL
;
5675 if (cand
->pos
!= IP_ORIGINAL
)
5678 for (j
= 0; j
< n_iv_uses (data
); j
++)
5680 struct iv_use
*use
= iv_use (data
, j
);
5681 unsigned uid
= gimple_uid (use
->stmt
);
5683 if (gimple_bb (use
->stmt
) != gimple_bb (cand
->incremented_at
))
5686 if (uid
< gimple_uid (cand
->incremented_at
)
5687 && (closest_before
== NULL
5688 || uid
> gimple_uid (closest_before
->stmt
)))
5689 closest_before
= use
;
5691 if (uid
> gimple_uid (cand
->incremented_at
)
5692 && (closest_after
== NULL
5693 || uid
< gimple_uid (closest_after
->stmt
)))
5694 closest_after
= use
;
5697 if (closest_before
!= NULL
5698 && autoinc_possible_for_pair (data
, closest_before
, cand
))
5699 cand
->ainc_use
= closest_before
;
5700 else if (closest_after
!= NULL
5701 && autoinc_possible_for_pair (data
, closest_after
, cand
))
5702 cand
->ainc_use
= closest_after
;
5706 /* Finds the candidates for the induction variables. */
5709 find_iv_candidates (struct ivopts_data
*data
)
5711 /* Add commonly used ivs. */
5712 add_standard_iv_candidates (data
);
5714 /* Add old induction variables. */
5715 add_iv_candidate_for_bivs (data
);
5717 /* Add induction variables derived from uses. */
5718 add_iv_candidate_for_uses (data
);
5720 set_autoinc_for_original_candidates (data
);
5722 /* Record the important candidates. */
5723 record_important_candidates (data
);
5726 /* Determines costs of basing the use of the iv on an iv candidate. */
5729 determine_use_iv_costs (struct ivopts_data
*data
)
5733 struct iv_cand
*cand
;
5734 bitmap to_clear
= BITMAP_ALLOC (NULL
);
5736 alloc_use_cost_map (data
);
5738 for (i
= 0; i
< n_iv_uses (data
); i
++)
5740 use
= iv_use (data
, i
);
5742 if (data
->consider_all_candidates
)
5744 for (j
= 0; j
< n_iv_cands (data
); j
++)
5746 cand
= iv_cand (data
, j
);
5747 determine_use_iv_cost (data
, use
, cand
);
5754 EXECUTE_IF_SET_IN_BITMAP (use
->related_cands
, 0, j
, bi
)
5756 cand
= iv_cand (data
, j
);
5757 if (!determine_use_iv_cost (data
, use
, cand
))
5758 bitmap_set_bit (to_clear
, j
);
5761 /* Remove the candidates for that the cost is infinite from
5762 the list of related candidates. */
5763 bitmap_and_compl_into (use
->related_cands
, to_clear
);
5764 bitmap_clear (to_clear
);
5768 BITMAP_FREE (to_clear
);
5770 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5772 fprintf (dump_file
, "Use-candidate costs:\n");
5774 for (i
= 0; i
< n_iv_uses (data
); i
++)
5776 use
= iv_use (data
, i
);
5778 fprintf (dump_file
, "Use %d:\n", i
);
5779 fprintf (dump_file
, " cand\tcost\tcompl.\tdepends on\n");
5780 for (j
= 0; j
< use
->n_map_members
; j
++)
5782 if (!use
->cost_map
[j
].cand
5783 || infinite_cost_p (use
->cost_map
[j
].cost
))
5786 fprintf (dump_file
, " %d\t%d\t%d\t",
5787 use
->cost_map
[j
].cand
->id
,
5788 use
->cost_map
[j
].cost
.cost
,
5789 use
->cost_map
[j
].cost
.complexity
);
5790 if (use
->cost_map
[j
].depends_on
)
5791 bitmap_print (dump_file
,
5792 use
->cost_map
[j
].depends_on
, "","");
5793 if (use
->cost_map
[j
].inv_expr_id
!= -1)
5794 fprintf (dump_file
, " inv_expr:%d", use
->cost_map
[j
].inv_expr_id
);
5795 fprintf (dump_file
, "\n");
5798 fprintf (dump_file
, "\n");
5800 fprintf (dump_file
, "\n");
5804 /* Determines cost of the candidate CAND. */
5807 determine_iv_cost (struct ivopts_data
*data
, struct iv_cand
*cand
)
5809 comp_cost cost_base
;
5810 unsigned cost
, cost_step
;
5819 /* There are two costs associated with the candidate -- its increment
5820 and its initialization. The second is almost negligible for any loop
5821 that rolls enough, so we take it just very little into account. */
5823 base
= cand
->iv
->base
;
5824 cost_base
= force_var_cost (data
, base
, NULL
);
5825 /* It will be exceptional that the iv register happens to be initialized with
5826 the proper value at no cost. In general, there will at least be a regcopy
5828 if (cost_base
.cost
== 0)
5829 cost_base
.cost
= COSTS_N_INSNS (1);
5830 cost_step
= add_cost (data
->speed
, TYPE_MODE (TREE_TYPE (base
)));
5832 cost
= cost_step
+ adjust_setup_cost (data
, cost_base
.cost
);
5834 /* Prefer the original ivs unless we may gain something by replacing it.
5835 The reason is to make debugging simpler; so this is not relevant for
5836 artificial ivs created by other optimization passes. */
5837 if (cand
->pos
!= IP_ORIGINAL
5838 || !SSA_NAME_VAR (cand
->var_before
)
5839 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand
->var_before
)))
5842 /* Prefer not to insert statements into latch unless there are some
5843 already (so that we do not create unnecessary jumps). */
5844 if (cand
->pos
== IP_END
5845 && empty_block_p (ip_end_pos (data
->current_loop
)))
5849 cand
->cost_step
= cost_step
;
5852 /* Determines costs of computation of the candidates. */
5855 determine_iv_costs (struct ivopts_data
*data
)
5859 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5861 fprintf (dump_file
, "Candidate costs:\n");
5862 fprintf (dump_file
, " cand\tcost\n");
5865 for (i
= 0; i
< n_iv_cands (data
); i
++)
5867 struct iv_cand
*cand
= iv_cand (data
, i
);
5869 determine_iv_cost (data
, cand
);
5871 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5872 fprintf (dump_file
, " %d\t%d\n", i
, cand
->cost
);
5875 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5876 fprintf (dump_file
, "\n");
5879 /* Calculates cost for having SIZE induction variables. */
5882 ivopts_global_cost_for_size (struct ivopts_data
*data
, unsigned size
)
5884 /* We add size to the cost, so that we prefer eliminating ivs
5886 return size
+ estimate_reg_pressure_cost (size
, data
->regs_used
, data
->speed
,
5887 data
->body_includes_call
);
5890 /* For each size of the induction variable set determine the penalty. */
5893 determine_set_costs (struct ivopts_data
*data
)
5899 struct loop
*loop
= data
->current_loop
;
5902 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5904 fprintf (dump_file
, "Global costs:\n");
5905 fprintf (dump_file
, " target_avail_regs %d\n", target_avail_regs
);
5906 fprintf (dump_file
, " target_clobbered_regs %d\n", target_clobbered_regs
);
5907 fprintf (dump_file
, " target_reg_cost %d\n", target_reg_cost
[data
->speed
]);
5908 fprintf (dump_file
, " target_spill_cost %d\n", target_spill_cost
[data
->speed
]);
5912 for (psi
= gsi_start_phis (loop
->header
); !gsi_end_p (psi
); gsi_next (&psi
))
5915 op
= PHI_RESULT (phi
);
5917 if (virtual_operand_p (op
))
5920 if (get_iv (data
, op
))
5926 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, j
, bi
)
5928 struct version_info
*info
= ver_info (data
, j
);
5930 if (info
->inv_id
&& info
->has_nonlin_use
)
5934 data
->regs_used
= n
;
5935 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5936 fprintf (dump_file
, " regs_used %d\n", n
);
5938 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5940 fprintf (dump_file
, " cost for size:\n");
5941 fprintf (dump_file
, " ivs\tcost\n");
5942 for (j
= 0; j
<= 2 * target_avail_regs
; j
++)
5943 fprintf (dump_file
, " %d\t%d\n", j
,
5944 ivopts_global_cost_for_size (data
, j
));
5945 fprintf (dump_file
, "\n");
5949 /* Returns true if A is a cheaper cost pair than B. */
5952 cheaper_cost_pair (struct cost_pair
*a
, struct cost_pair
*b
)
5962 cmp
= compare_costs (a
->cost
, b
->cost
);
5969 /* In case the costs are the same, prefer the cheaper candidate. */
5970 if (a
->cand
->cost
< b
->cand
->cost
)
5977 /* Returns candidate by that USE is expressed in IVS. */
5979 static struct cost_pair
*
5980 iv_ca_cand_for_use (struct iv_ca
*ivs
, struct iv_use
*use
)
5982 return ivs
->cand_for_use
[use
->id
];
5985 /* Computes the cost field of IVS structure. */
5988 iv_ca_recount_cost (struct ivopts_data
*data
, struct iv_ca
*ivs
)
5990 comp_cost cost
= ivs
->cand_use_cost
;
5992 cost
.cost
+= ivs
->cand_cost
;
5994 cost
.cost
+= ivopts_global_cost_for_size (data
,
5995 ivs
->n_regs
+ ivs
->num_used_inv_expr
);
6000 /* Remove invariants in set INVS to set IVS. */
6003 iv_ca_set_remove_invariants (struct iv_ca
*ivs
, bitmap invs
)
6011 EXECUTE_IF_SET_IN_BITMAP (invs
, 0, iid
, bi
)
6013 ivs
->n_invariant_uses
[iid
]--;
6014 if (ivs
->n_invariant_uses
[iid
] == 0)
6019 /* Set USE not to be expressed by any candidate in IVS. */
6022 iv_ca_set_no_cp (struct ivopts_data
*data
, struct iv_ca
*ivs
,
6025 unsigned uid
= use
->id
, cid
;
6026 struct cost_pair
*cp
;
6028 cp
= ivs
->cand_for_use
[uid
];
6034 ivs
->cand_for_use
[uid
] = NULL
;
6035 ivs
->n_cand_uses
[cid
]--;
6037 if (ivs
->n_cand_uses
[cid
] == 0)
6039 bitmap_clear_bit (ivs
->cands
, cid
);
6040 /* Do not count the pseudocandidates. */
6044 ivs
->cand_cost
-= cp
->cand
->cost
;
6046 iv_ca_set_remove_invariants (ivs
, cp
->cand
->depends_on
);
6049 ivs
->cand_use_cost
= sub_costs (ivs
->cand_use_cost
, cp
->cost
);
6051 iv_ca_set_remove_invariants (ivs
, cp
->depends_on
);
6053 if (cp
->inv_expr_id
!= -1)
6055 ivs
->used_inv_expr
[cp
->inv_expr_id
]--;
6056 if (ivs
->used_inv_expr
[cp
->inv_expr_id
] == 0)
6057 ivs
->num_used_inv_expr
--;
6059 iv_ca_recount_cost (data
, ivs
);
6062 /* Add invariants in set INVS to set IVS. */
6065 iv_ca_set_add_invariants (struct iv_ca
*ivs
, bitmap invs
)
6073 EXECUTE_IF_SET_IN_BITMAP (invs
, 0, iid
, bi
)
6075 ivs
->n_invariant_uses
[iid
]++;
6076 if (ivs
->n_invariant_uses
[iid
] == 1)
6081 /* Set cost pair for USE in set IVS to CP. */
6084 iv_ca_set_cp (struct ivopts_data
*data
, struct iv_ca
*ivs
,
6085 struct iv_use
*use
, struct cost_pair
*cp
)
6087 unsigned uid
= use
->id
, cid
;
6089 if (ivs
->cand_for_use
[uid
] == cp
)
6092 if (ivs
->cand_for_use
[uid
])
6093 iv_ca_set_no_cp (data
, ivs
, use
);
6100 ivs
->cand_for_use
[uid
] = cp
;
6101 ivs
->n_cand_uses
[cid
]++;
6102 if (ivs
->n_cand_uses
[cid
] == 1)
6104 bitmap_set_bit (ivs
->cands
, cid
);
6105 /* Do not count the pseudocandidates. */
6109 ivs
->cand_cost
+= cp
->cand
->cost
;
6111 iv_ca_set_add_invariants (ivs
, cp
->cand
->depends_on
);
6114 ivs
->cand_use_cost
= add_costs (ivs
->cand_use_cost
, cp
->cost
);
6115 iv_ca_set_add_invariants (ivs
, cp
->depends_on
);
6117 if (cp
->inv_expr_id
!= -1)
6119 ivs
->used_inv_expr
[cp
->inv_expr_id
]++;
6120 if (ivs
->used_inv_expr
[cp
->inv_expr_id
] == 1)
6121 ivs
->num_used_inv_expr
++;
6123 iv_ca_recount_cost (data
, ivs
);
6127 /* Extend set IVS by expressing USE by some of the candidates in it
6128 if possible. Consider all important candidates if candidates in
6129 set IVS don't give any result. */
6132 iv_ca_add_use (struct ivopts_data
*data
, struct iv_ca
*ivs
,
6135 struct cost_pair
*best_cp
= NULL
, *cp
;
6138 struct iv_cand
*cand
;
6140 gcc_assert (ivs
->upto
>= use
->id
);
6144 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, i
, bi
)
6146 cand
= iv_cand (data
, i
);
6147 cp
= get_use_iv_cost (data
, use
, cand
);
6148 if (cheaper_cost_pair (cp
, best_cp
))
6152 if (best_cp
== NULL
)
6154 EXECUTE_IF_SET_IN_BITMAP (data
->important_candidates
, 0, i
, bi
)
6156 cand
= iv_cand (data
, i
);
6157 cp
= get_use_iv_cost (data
, use
, cand
);
6158 if (cheaper_cost_pair (cp
, best_cp
))
6163 iv_ca_set_cp (data
, ivs
, use
, best_cp
);
6166 /* Get cost for assignment IVS. */
6169 iv_ca_cost (struct iv_ca
*ivs
)
6171 /* This was a conditional expression but it triggered a bug in
6174 return infinite_cost
;
6179 /* Returns true if all dependences of CP are among invariants in IVS. */
6182 iv_ca_has_deps (struct iv_ca
*ivs
, struct cost_pair
*cp
)
6187 if (!cp
->depends_on
)
6190 EXECUTE_IF_SET_IN_BITMAP (cp
->depends_on
, 0, i
, bi
)
6192 if (ivs
->n_invariant_uses
[i
] == 0)
6199 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
6200 it before NEXT_CHANGE. */
6202 static struct iv_ca_delta
*
6203 iv_ca_delta_add (struct iv_use
*use
, struct cost_pair
*old_cp
,
6204 struct cost_pair
*new_cp
, struct iv_ca_delta
*next_change
)
6206 struct iv_ca_delta
*change
= XNEW (struct iv_ca_delta
);
6209 change
->old_cp
= old_cp
;
6210 change
->new_cp
= new_cp
;
6211 change
->next_change
= next_change
;
6216 /* Joins two lists of changes L1 and L2. Destructive -- old lists
6219 static struct iv_ca_delta
*
6220 iv_ca_delta_join (struct iv_ca_delta
*l1
, struct iv_ca_delta
*l2
)
6222 struct iv_ca_delta
*last
;
6230 for (last
= l1
; last
->next_change
; last
= last
->next_change
)
6232 last
->next_change
= l2
;
6237 /* Reverse the list of changes DELTA, forming the inverse to it. */
6239 static struct iv_ca_delta
*
6240 iv_ca_delta_reverse (struct iv_ca_delta
*delta
)
6242 struct iv_ca_delta
*act
, *next
, *prev
= NULL
;
6244 for (act
= delta
; act
; act
= next
)
6246 next
= act
->next_change
;
6247 act
->next_change
= prev
;
6250 std::swap (act
->old_cp
, act
->new_cp
);
6256 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
6257 reverted instead. */
6260 iv_ca_delta_commit (struct ivopts_data
*data
, struct iv_ca
*ivs
,
6261 struct iv_ca_delta
*delta
, bool forward
)
6263 struct cost_pair
*from
, *to
;
6264 struct iv_ca_delta
*act
;
6267 delta
= iv_ca_delta_reverse (delta
);
6269 for (act
= delta
; act
; act
= act
->next_change
)
6273 gcc_assert (iv_ca_cand_for_use (ivs
, act
->use
) == from
);
6274 iv_ca_set_cp (data
, ivs
, act
->use
, to
);
6278 iv_ca_delta_reverse (delta
);
6281 /* Returns true if CAND is used in IVS. */
6284 iv_ca_cand_used_p (struct iv_ca
*ivs
, struct iv_cand
*cand
)
6286 return ivs
->n_cand_uses
[cand
->id
] > 0;
6289 /* Returns number of induction variable candidates in the set IVS. */
6292 iv_ca_n_cands (struct iv_ca
*ivs
)
6294 return ivs
->n_cands
;
6297 /* Free the list of changes DELTA. */
6300 iv_ca_delta_free (struct iv_ca_delta
**delta
)
6302 struct iv_ca_delta
*act
, *next
;
6304 for (act
= *delta
; act
; act
= next
)
6306 next
= act
->next_change
;
6313 /* Allocates new iv candidates assignment. */
6315 static struct iv_ca
*
6316 iv_ca_new (struct ivopts_data
*data
)
6318 struct iv_ca
*nw
= XNEW (struct iv_ca
);
6322 nw
->cand_for_use
= XCNEWVEC (struct cost_pair
*, n_iv_uses (data
));
6323 nw
->n_cand_uses
= XCNEWVEC (unsigned, n_iv_cands (data
));
6324 nw
->cands
= BITMAP_ALLOC (NULL
);
6327 nw
->cand_use_cost
= no_cost
;
6329 nw
->n_invariant_uses
= XCNEWVEC (unsigned, data
->max_inv_id
+ 1);
6331 nw
->used_inv_expr
= XCNEWVEC (unsigned, data
->inv_expr_id
+ 1);
6332 nw
->num_used_inv_expr
= 0;
6337 /* Free memory occupied by the set IVS. */
6340 iv_ca_free (struct iv_ca
**ivs
)
6342 free ((*ivs
)->cand_for_use
);
6343 free ((*ivs
)->n_cand_uses
);
6344 BITMAP_FREE ((*ivs
)->cands
);
6345 free ((*ivs
)->n_invariant_uses
);
6346 free ((*ivs
)->used_inv_expr
);
6351 /* Dumps IVS to FILE. */
6354 iv_ca_dump (struct ivopts_data
*data
, FILE *file
, struct iv_ca
*ivs
)
6356 const char *pref
= " invariants ";
6358 comp_cost cost
= iv_ca_cost (ivs
);
6360 fprintf (file
, " cost: %d (complexity %d)\n", cost
.cost
, cost
.complexity
);
6361 fprintf (file
, " cand_cost: %d\n cand_use_cost: %d (complexity %d)\n",
6362 ivs
->cand_cost
, ivs
->cand_use_cost
.cost
, ivs
->cand_use_cost
.complexity
);
6363 bitmap_print (file
, ivs
->cands
, " candidates: ","\n");
6365 for (i
= 0; i
< ivs
->upto
; i
++)
6367 struct iv_use
*use
= iv_use (data
, i
);
6368 struct cost_pair
*cp
= iv_ca_cand_for_use (ivs
, use
);
6370 fprintf (file
, " use:%d --> iv_cand:%d, cost=(%d,%d)\n",
6371 use
->id
, cp
->cand
->id
, cp
->cost
.cost
, cp
->cost
.complexity
);
6373 fprintf (file
, " use:%d --> ??\n", use
->id
);
6376 for (i
= 1; i
<= data
->max_inv_id
; i
++)
6377 if (ivs
->n_invariant_uses
[i
])
6379 fprintf (file
, "%s%d", pref
, i
);
6382 fprintf (file
, "\n\n");
6385 /* Try changing candidate in IVS to CAND for each use. Return cost of the
6386 new set, and store differences in DELTA. Number of induction variables
6387 in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
6388 the function will try to find a solution with mimimal iv candidates. */
6391 iv_ca_extend (struct ivopts_data
*data
, struct iv_ca
*ivs
,
6392 struct iv_cand
*cand
, struct iv_ca_delta
**delta
,
6393 unsigned *n_ivs
, bool min_ncand
)
6398 struct cost_pair
*old_cp
, *new_cp
;
6401 for (i
= 0; i
< ivs
->upto
; i
++)
6403 use
= iv_use (data
, i
);
6404 old_cp
= iv_ca_cand_for_use (ivs
, use
);
6407 && old_cp
->cand
== cand
)
6410 new_cp
= get_use_iv_cost (data
, use
, cand
);
6414 if (!min_ncand
&& !iv_ca_has_deps (ivs
, new_cp
))
6417 if (!min_ncand
&& !cheaper_cost_pair (new_cp
, old_cp
))
6420 *delta
= iv_ca_delta_add (use
, old_cp
, new_cp
, *delta
);
6423 iv_ca_delta_commit (data
, ivs
, *delta
, true);
6424 cost
= iv_ca_cost (ivs
);
6426 *n_ivs
= iv_ca_n_cands (ivs
);
6427 iv_ca_delta_commit (data
, ivs
, *delta
, false);
6432 /* Try narrowing set IVS by removing CAND. Return the cost of
6433 the new set and store the differences in DELTA. START is
6434 the candidate with which we start narrowing. */
6437 iv_ca_narrow (struct ivopts_data
*data
, struct iv_ca
*ivs
,
6438 struct iv_cand
*cand
, struct iv_cand
*start
,
6439 struct iv_ca_delta
**delta
)
6443 struct cost_pair
*old_cp
, *new_cp
, *cp
;
6445 struct iv_cand
*cnd
;
6446 comp_cost cost
, best_cost
, acost
;
6449 for (i
= 0; i
< n_iv_uses (data
); i
++)
6451 use
= iv_use (data
, i
);
6453 old_cp
= iv_ca_cand_for_use (ivs
, use
);
6454 if (old_cp
->cand
!= cand
)
6457 best_cost
= iv_ca_cost (ivs
);
6458 /* Start narrowing with START. */
6459 new_cp
= get_use_iv_cost (data
, use
, start
);
6461 if (data
->consider_all_candidates
)
6463 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, ci
, bi
)
6465 if (ci
== cand
->id
|| (start
&& ci
== start
->id
))
6468 cnd
= iv_cand (data
, ci
);
6470 cp
= get_use_iv_cost (data
, use
, cnd
);
6474 iv_ca_set_cp (data
, ivs
, use
, cp
);
6475 acost
= iv_ca_cost (ivs
);
6477 if (compare_costs (acost
, best_cost
) < 0)
6486 EXECUTE_IF_AND_IN_BITMAP (use
->related_cands
, ivs
->cands
, 0, ci
, bi
)
6488 if (ci
== cand
->id
|| (start
&& ci
== start
->id
))
6491 cnd
= iv_cand (data
, ci
);
6493 cp
= get_use_iv_cost (data
, use
, cnd
);
6497 iv_ca_set_cp (data
, ivs
, use
, cp
);
6498 acost
= iv_ca_cost (ivs
);
6500 if (compare_costs (acost
, best_cost
) < 0)
6507 /* Restore to old cp for use. */
6508 iv_ca_set_cp (data
, ivs
, use
, old_cp
);
6512 iv_ca_delta_free (delta
);
6513 return infinite_cost
;
6516 *delta
= iv_ca_delta_add (use
, old_cp
, new_cp
, *delta
);
6519 iv_ca_delta_commit (data
, ivs
, *delta
, true);
6520 cost
= iv_ca_cost (ivs
);
6521 iv_ca_delta_commit (data
, ivs
, *delta
, false);
6526 /* Try optimizing the set of candidates IVS by removing candidates different
6527 from to EXCEPT_CAND from it. Return cost of the new set, and store
6528 differences in DELTA. */
6531 iv_ca_prune (struct ivopts_data
*data
, struct iv_ca
*ivs
,
6532 struct iv_cand
*except_cand
, struct iv_ca_delta
**delta
)
6535 struct iv_ca_delta
*act_delta
, *best_delta
;
6537 comp_cost best_cost
, acost
;
6538 struct iv_cand
*cand
;
6541 best_cost
= iv_ca_cost (ivs
);
6543 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, i
, bi
)
6545 cand
= iv_cand (data
, i
);
6547 if (cand
== except_cand
)
6550 acost
= iv_ca_narrow (data
, ivs
, cand
, except_cand
, &act_delta
);
6552 if (compare_costs (acost
, best_cost
) < 0)
6555 iv_ca_delta_free (&best_delta
);
6556 best_delta
= act_delta
;
6559 iv_ca_delta_free (&act_delta
);
6568 /* Recurse to possibly remove other unnecessary ivs. */
6569 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
6570 best_cost
= iv_ca_prune (data
, ivs
, except_cand
, delta
);
6571 iv_ca_delta_commit (data
, ivs
, best_delta
, false);
6572 *delta
= iv_ca_delta_join (best_delta
, *delta
);
6576 /* Check if CAND_IDX is a candidate other than OLD_CAND and has
6577 cheaper local cost for USE than BEST_CP. Return pointer to
6578 the corresponding cost_pair, otherwise just return BEST_CP. */
6580 static struct cost_pair
*
6581 cheaper_cost_with_cand (struct ivopts_data
*data
, struct iv_use
*use
,
6582 unsigned int cand_idx
, struct iv_cand
*old_cand
,
6583 struct cost_pair
*best_cp
)
6585 struct iv_cand
*cand
;
6586 struct cost_pair
*cp
;
6588 gcc_assert (old_cand
!= NULL
&& best_cp
!= NULL
);
6589 if (cand_idx
== old_cand
->id
)
6592 cand
= iv_cand (data
, cand_idx
);
6593 cp
= get_use_iv_cost (data
, use
, cand
);
6594 if (cp
!= NULL
&& cheaper_cost_pair (cp
, best_cp
))
6600 /* Try breaking local optimal fixed-point for IVS by replacing candidates
6601 which are used by more than one iv uses. For each of those candidates,
6602 this function tries to represent iv uses under that candidate using
6603 other ones with lower local cost, then tries to prune the new set.
6604 If the new set has lower cost, It returns the new cost after recording
6605 candidate replacement in list DELTA. */
6608 iv_ca_replace (struct ivopts_data
*data
, struct iv_ca
*ivs
,
6609 struct iv_ca_delta
**delta
)
6611 bitmap_iterator bi
, bj
;
6612 unsigned int i
, j
, k
;
6614 struct iv_cand
*cand
;
6615 comp_cost orig_cost
, acost
;
6616 struct iv_ca_delta
*act_delta
, *tmp_delta
;
6617 struct cost_pair
*old_cp
, *best_cp
= NULL
;
6620 orig_cost
= iv_ca_cost (ivs
);
6622 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, i
, bi
)
6624 if (ivs
->n_cand_uses
[i
] == 1
6625 || ivs
->n_cand_uses
[i
] > ALWAYS_PRUNE_CAND_SET_BOUND
)
6628 cand
= iv_cand (data
, i
);
6631 /* Represent uses under current candidate using other ones with
6632 lower local cost. */
6633 for (j
= 0; j
< ivs
->upto
; j
++)
6635 use
= iv_use (data
, j
);
6636 old_cp
= iv_ca_cand_for_use (ivs
, use
);
6638 if (old_cp
->cand
!= cand
)
6642 if (data
->consider_all_candidates
)
6643 for (k
= 0; k
< n_iv_cands (data
); k
++)
6644 best_cp
= cheaper_cost_with_cand (data
, use
, k
,
6645 old_cp
->cand
, best_cp
);
6647 EXECUTE_IF_SET_IN_BITMAP (use
->related_cands
, 0, k
, bj
)
6648 best_cp
= cheaper_cost_with_cand (data
, use
, k
,
6649 old_cp
->cand
, best_cp
);
6651 if (best_cp
== old_cp
)
6654 act_delta
= iv_ca_delta_add (use
, old_cp
, best_cp
, act_delta
);
6656 /* No need for further prune. */
6660 /* Prune the new candidate set. */
6661 iv_ca_delta_commit (data
, ivs
, act_delta
, true);
6662 acost
= iv_ca_prune (data
, ivs
, NULL
, &tmp_delta
);
6663 iv_ca_delta_commit (data
, ivs
, act_delta
, false);
6664 act_delta
= iv_ca_delta_join (act_delta
, tmp_delta
);
6666 if (compare_costs (acost
, orig_cost
) < 0)
6672 iv_ca_delta_free (&act_delta
);
6678 /* Tries to extend the sets IVS in the best possible way in order
6679 to express the USE. If ORIGINALP is true, prefer candidates from
6680 the original set of IVs, otherwise favor important candidates not
6681 based on any memory object. */
6684 try_add_cand_for (struct ivopts_data
*data
, struct iv_ca
*ivs
,
6685 struct iv_use
*use
, bool originalp
)
6687 comp_cost best_cost
, act_cost
;
6690 struct iv_cand
*cand
;
6691 struct iv_ca_delta
*best_delta
= NULL
, *act_delta
;
6692 struct cost_pair
*cp
;
6694 iv_ca_add_use (data
, ivs
, use
);
6695 best_cost
= iv_ca_cost (ivs
);
6696 cp
= iv_ca_cand_for_use (ivs
, use
);
6699 best_delta
= iv_ca_delta_add (use
, NULL
, cp
, NULL
);
6700 iv_ca_set_no_cp (data
, ivs
, use
);
6703 /* If ORIGINALP is true, try to find the original IV for the use. Otherwise
6704 first try important candidates not based on any memory object. Only if
6705 this fails, try the specific ones. Rationale -- in loops with many
6706 variables the best choice often is to use just one generic biv. If we
6707 added here many ivs specific to the uses, the optimization algorithm later
6708 would be likely to get stuck in a local minimum, thus causing us to create
6709 too many ivs. The approach from few ivs to more seems more likely to be
6710 successful -- starting from few ivs, replacing an expensive use by a
6711 specific iv should always be a win. */
6712 EXECUTE_IF_SET_IN_BITMAP (use
->related_cands
, 0, i
, bi
)
6714 cand
= iv_cand (data
, i
);
6716 if (originalp
&& cand
->pos
!=IP_ORIGINAL
)
6719 if (!originalp
&& cand
->iv
->base_object
!= NULL_TREE
)
6722 if (iv_ca_cand_used_p (ivs
, cand
))
6725 cp
= get_use_iv_cost (data
, use
, cand
);
6729 iv_ca_set_cp (data
, ivs
, use
, cp
);
6730 act_cost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, NULL
,
6732 iv_ca_set_no_cp (data
, ivs
, use
);
6733 act_delta
= iv_ca_delta_add (use
, NULL
, cp
, act_delta
);
6735 if (compare_costs (act_cost
, best_cost
) < 0)
6737 best_cost
= act_cost
;
6739 iv_ca_delta_free (&best_delta
);
6740 best_delta
= act_delta
;
6743 iv_ca_delta_free (&act_delta
);
6746 if (infinite_cost_p (best_cost
))
6748 for (i
= 0; i
< use
->n_map_members
; i
++)
6750 cp
= use
->cost_map
+ i
;
6755 /* Already tried this. */
6756 if (cand
->important
)
6758 if (originalp
&& cand
->pos
== IP_ORIGINAL
)
6760 if (!originalp
&& cand
->iv
->base_object
== NULL_TREE
)
6764 if (iv_ca_cand_used_p (ivs
, cand
))
6768 iv_ca_set_cp (data
, ivs
, use
, cp
);
6769 act_cost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, NULL
, true);
6770 iv_ca_set_no_cp (data
, ivs
, use
);
6771 act_delta
= iv_ca_delta_add (use
, iv_ca_cand_for_use (ivs
, use
),
6774 if (compare_costs (act_cost
, best_cost
) < 0)
6776 best_cost
= act_cost
;
6779 iv_ca_delta_free (&best_delta
);
6780 best_delta
= act_delta
;
6783 iv_ca_delta_free (&act_delta
);
6787 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
6788 iv_ca_delta_free (&best_delta
);
6790 return !infinite_cost_p (best_cost
);
6793 /* Finds an initial assignment of candidates to uses. */
6795 static struct iv_ca
*
6796 get_initial_solution (struct ivopts_data
*data
, bool originalp
)
6798 struct iv_ca
*ivs
= iv_ca_new (data
);
6801 for (i
= 0; i
< n_iv_uses (data
); i
++)
6802 if (!try_add_cand_for (data
, ivs
, iv_use (data
, i
), originalp
))
6811 /* Tries to improve set of induction variables IVS. TRY_REPLACE_P
6812 points to a bool variable, this function tries to break local
6813 optimal fixed-point by replacing candidates in IVS if it's true. */
6816 try_improve_iv_set (struct ivopts_data
*data
,
6817 struct iv_ca
*ivs
, bool *try_replace_p
)
6820 comp_cost acost
, best_cost
= iv_ca_cost (ivs
);
6821 struct iv_ca_delta
*best_delta
= NULL
, *act_delta
, *tmp_delta
;
6822 struct iv_cand
*cand
;
6824 /* Try extending the set of induction variables by one. */
6825 for (i
= 0; i
< n_iv_cands (data
); i
++)
6827 cand
= iv_cand (data
, i
);
6829 if (iv_ca_cand_used_p (ivs
, cand
))
6832 acost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, &n_ivs
, false);
6836 /* If we successfully added the candidate and the set is small enough,
6837 try optimizing it by removing other candidates. */
6838 if (n_ivs
<= ALWAYS_PRUNE_CAND_SET_BOUND
)
6840 iv_ca_delta_commit (data
, ivs
, act_delta
, true);
6841 acost
= iv_ca_prune (data
, ivs
, cand
, &tmp_delta
);
6842 iv_ca_delta_commit (data
, ivs
, act_delta
, false);
6843 act_delta
= iv_ca_delta_join (act_delta
, tmp_delta
);
6846 if (compare_costs (acost
, best_cost
) < 0)
6849 iv_ca_delta_free (&best_delta
);
6850 best_delta
= act_delta
;
6853 iv_ca_delta_free (&act_delta
);
6858 /* Try removing the candidates from the set instead. */
6859 best_cost
= iv_ca_prune (data
, ivs
, NULL
, &best_delta
);
6861 if (!best_delta
&& *try_replace_p
)
6863 *try_replace_p
= false;
6864 /* So far candidate selecting algorithm tends to choose fewer IVs
6865 so that it can handle cases in which loops have many variables
6866 but the best choice is often to use only one general biv. One
6867 weakness is it can't handle opposite cases, in which different
6868 candidates should be chosen with respect to each use. To solve
6869 the problem, we replace candidates in a manner described by the
6870 comments of iv_ca_replace, thus give general algorithm a chance
6871 to break local optimal fixed-point in these cases. */
6872 best_cost
= iv_ca_replace (data
, ivs
, &best_delta
);
6879 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
6880 gcc_assert (compare_costs (best_cost
, iv_ca_cost (ivs
)) == 0);
6881 iv_ca_delta_free (&best_delta
);
6885 /* Attempts to find the optimal set of induction variables. We do simple
6886 greedy heuristic -- we try to replace at most one candidate in the selected
6887 solution and remove the unused ivs while this improves the cost. */
6889 static struct iv_ca
*
6890 find_optimal_iv_set_1 (struct ivopts_data
*data
, bool originalp
)
6893 bool try_replace_p
= true;
6895 /* Get the initial solution. */
6896 set
= get_initial_solution (data
, originalp
);
6899 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6900 fprintf (dump_file
, "Unable to substitute for ivs, failed.\n");
6904 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6906 fprintf (dump_file
, "Initial set of candidates:\n");
6907 iv_ca_dump (data
, dump_file
, set
);
6910 while (try_improve_iv_set (data
, set
, &try_replace_p
))
6912 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6914 fprintf (dump_file
, "Improved to:\n");
6915 iv_ca_dump (data
, dump_file
, set
);
6922 static struct iv_ca
*
6923 find_optimal_iv_set (struct ivopts_data
*data
)
6926 struct iv_ca
*set
, *origset
;
6928 comp_cost cost
, origcost
;
6930 /* Determine the cost based on a strategy that starts with original IVs,
6931 and try again using a strategy that prefers candidates not based
6933 origset
= find_optimal_iv_set_1 (data
, true);
6934 set
= find_optimal_iv_set_1 (data
, false);
6936 if (!origset
&& !set
)
6939 origcost
= origset
? iv_ca_cost (origset
) : infinite_cost
;
6940 cost
= set
? iv_ca_cost (set
) : infinite_cost
;
6942 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6944 fprintf (dump_file
, "Original cost %d (complexity %d)\n\n",
6945 origcost
.cost
, origcost
.complexity
);
6946 fprintf (dump_file
, "Final cost %d (complexity %d)\n\n",
6947 cost
.cost
, cost
.complexity
);
6950 /* Choose the one with the best cost. */
6951 if (compare_costs (origcost
, cost
) <= 0)
6958 iv_ca_free (&origset
);
6960 for (i
= 0; i
< n_iv_uses (data
); i
++)
6962 use
= iv_use (data
, i
);
6963 use
->selected
= iv_ca_cand_for_use (set
, use
)->cand
;
6969 /* Creates a new induction variable corresponding to CAND. */
6972 create_new_iv (struct ivopts_data
*data
, struct iv_cand
*cand
)
6974 gimple_stmt_iterator incr_pos
;
6984 incr_pos
= gsi_last_bb (ip_normal_pos (data
->current_loop
));
6988 incr_pos
= gsi_last_bb (ip_end_pos (data
->current_loop
));
6996 incr_pos
= gsi_for_stmt (cand
->incremented_at
);
7000 /* Mark that the iv is preserved. */
7001 name_info (data
, cand
->var_before
)->preserve_biv
= true;
7002 name_info (data
, cand
->var_after
)->preserve_biv
= true;
7004 /* Rewrite the increment so that it uses var_before directly. */
7005 find_interesting_uses_op (data
, cand
->var_after
)->selected
= cand
;
7009 gimple_add_tmp_var (cand
->var_before
);
7011 base
= unshare_expr (cand
->iv
->base
);
7013 create_iv (base
, unshare_expr (cand
->iv
->step
),
7014 cand
->var_before
, data
->current_loop
,
7015 &incr_pos
, after
, &cand
->var_before
, &cand
->var_after
);
7018 /* Creates new induction variables described in SET. */
7021 create_new_ivs (struct ivopts_data
*data
, struct iv_ca
*set
)
7024 struct iv_cand
*cand
;
7027 EXECUTE_IF_SET_IN_BITMAP (set
->cands
, 0, i
, bi
)
7029 cand
= iv_cand (data
, i
);
7030 create_new_iv (data
, cand
);
7033 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
7035 fprintf (dump_file
, "Selected IV set for loop %d",
7036 data
->current_loop
->num
);
7037 if (data
->loop_loc
!= UNKNOWN_LOCATION
)
7038 fprintf (dump_file
, " at %s:%d", LOCATION_FILE (data
->loop_loc
),
7039 LOCATION_LINE (data
->loop_loc
));
7040 fprintf (dump_file
, ", %lu IVs:\n", bitmap_count_bits (set
->cands
));
7041 EXECUTE_IF_SET_IN_BITMAP (set
->cands
, 0, i
, bi
)
7043 cand
= iv_cand (data
, i
);
7044 dump_cand (dump_file
, cand
);
7046 fprintf (dump_file
, "\n");
7050 /* Rewrites USE (definition of iv used in a nonlinear expression)
7051 using candidate CAND. */
7054 rewrite_use_nonlinear_expr (struct ivopts_data
*data
,
7055 struct iv_use
*use
, struct iv_cand
*cand
)
7060 gimple_stmt_iterator bsi
;
7062 /* An important special case -- if we are asked to express value of
7063 the original iv by itself, just exit; there is no need to
7064 introduce a new computation (that might also need casting the
7065 variable to unsigned and back). */
7066 if (cand
->pos
== IP_ORIGINAL
7067 && cand
->incremented_at
== use
->stmt
)
7069 enum tree_code stmt_code
;
7071 gcc_assert (is_gimple_assign (use
->stmt
));
7072 gcc_assert (gimple_assign_lhs (use
->stmt
) == cand
->var_after
);
7074 /* Check whether we may leave the computation unchanged.
7075 This is the case only if it does not rely on other
7076 computations in the loop -- otherwise, the computation
7077 we rely upon may be removed in remove_unused_ivs,
7078 thus leading to ICE. */
7079 stmt_code
= gimple_assign_rhs_code (use
->stmt
);
7080 if (stmt_code
== PLUS_EXPR
7081 || stmt_code
== MINUS_EXPR
7082 || stmt_code
== POINTER_PLUS_EXPR
)
7084 if (gimple_assign_rhs1 (use
->stmt
) == cand
->var_before
)
7085 op
= gimple_assign_rhs2 (use
->stmt
);
7086 else if (gimple_assign_rhs2 (use
->stmt
) == cand
->var_before
)
7087 op
= gimple_assign_rhs1 (use
->stmt
);
7094 if (op
&& expr_invariant_in_loop_p (data
->current_loop
, op
))
7098 comp
= get_computation (data
->current_loop
, use
, cand
);
7099 gcc_assert (comp
!= NULL_TREE
);
7101 switch (gimple_code (use
->stmt
))
7104 tgt
= PHI_RESULT (use
->stmt
);
7106 /* If we should keep the biv, do not replace it. */
7107 if (name_info (data
, tgt
)->preserve_biv
)
7110 bsi
= gsi_after_labels (gimple_bb (use
->stmt
));
7114 tgt
= gimple_assign_lhs (use
->stmt
);
7115 bsi
= gsi_for_stmt (use
->stmt
);
7122 if (!valid_gimple_rhs_p (comp
)
7123 || (gimple_code (use
->stmt
) != GIMPLE_PHI
7124 /* We can't allow re-allocating the stmt as it might be pointed
7126 && (get_gimple_rhs_num_ops (TREE_CODE (comp
))
7127 >= gimple_num_ops (gsi_stmt (bsi
)))))
7129 comp
= force_gimple_operand_gsi (&bsi
, comp
, true, NULL_TREE
,
7130 true, GSI_SAME_STMT
);
7131 if (POINTER_TYPE_P (TREE_TYPE (tgt
)))
7133 duplicate_ssa_name_ptr_info (comp
, SSA_NAME_PTR_INFO (tgt
));
7134 /* As this isn't a plain copy we have to reset alignment
7136 if (SSA_NAME_PTR_INFO (comp
))
7137 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp
));
7141 if (gimple_code (use
->stmt
) == GIMPLE_PHI
)
7143 ass
= gimple_build_assign (tgt
, comp
);
7144 gsi_insert_before (&bsi
, ass
, GSI_SAME_STMT
);
7146 bsi
= gsi_for_stmt (use
->stmt
);
7147 remove_phi_node (&bsi
, false);
7151 gimple_assign_set_rhs_from_tree (&bsi
, comp
);
7152 use
->stmt
= gsi_stmt (bsi
);
7156 /* Performs a peephole optimization to reorder the iv update statement with
7157 a mem ref to enable instruction combining in later phases. The mem ref uses
7158 the iv value before the update, so the reordering transformation requires
7159 adjustment of the offset. CAND is the selected IV_CAND.
7163 t = MEM_REF (base, iv1, 8, 16); // base, index, stride, offset
7171 directly propagating t over to (1) will introduce overlapping live range
7172 thus increase register pressure. This peephole transform it into:
7176 t = MEM_REF (base, iv2, 8, 8);
7183 adjust_iv_update_pos (struct iv_cand
*cand
, struct iv_use
*use
)
7186 gimple
*iv_update
, *stmt
;
7188 gimple_stmt_iterator gsi
, gsi_iv
;
7190 if (cand
->pos
!= IP_NORMAL
)
7193 var_after
= cand
->var_after
;
7194 iv_update
= SSA_NAME_DEF_STMT (var_after
);
7196 bb
= gimple_bb (iv_update
);
7197 gsi
= gsi_last_nondebug_bb (bb
);
7198 stmt
= gsi_stmt (gsi
);
7200 /* Only handle conditional statement for now. */
7201 if (gimple_code (stmt
) != GIMPLE_COND
)
7204 gsi_prev_nondebug (&gsi
);
7205 stmt
= gsi_stmt (gsi
);
7206 if (stmt
!= iv_update
)
7209 gsi_prev_nondebug (&gsi
);
7210 if (gsi_end_p (gsi
))
7213 stmt
= gsi_stmt (gsi
);
7214 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
7217 if (stmt
!= use
->stmt
)
7220 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
7223 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
7225 fprintf (dump_file
, "Reordering \n");
7226 print_gimple_stmt (dump_file
, iv_update
, 0, 0);
7227 print_gimple_stmt (dump_file
, use
->stmt
, 0, 0);
7228 fprintf (dump_file
, "\n");
7231 gsi
= gsi_for_stmt (use
->stmt
);
7232 gsi_iv
= gsi_for_stmt (iv_update
);
7233 gsi_move_before (&gsi_iv
, &gsi
);
7235 cand
->pos
= IP_BEFORE_USE
;
7236 cand
->incremented_at
= use
->stmt
;
7239 /* Rewrites USE (address that is an iv) using candidate CAND. */
7242 rewrite_use_address_1 (struct ivopts_data
*data
,
7243 struct iv_use
*use
, struct iv_cand
*cand
)
7246 gimple_stmt_iterator bsi
= gsi_for_stmt (use
->stmt
);
7247 tree base_hint
= NULL_TREE
;
7251 adjust_iv_update_pos (cand
, use
);
7252 ok
= get_computation_aff (data
->current_loop
, use
, cand
, use
->stmt
, &aff
);
7254 unshare_aff_combination (&aff
);
7256 /* To avoid undefined overflow problems, all IV candidates use unsigned
7257 integer types. The drawback is that this makes it impossible for
7258 create_mem_ref to distinguish an IV that is based on a memory object
7259 from one that represents simply an offset.
7261 To work around this problem, we pass a hint to create_mem_ref that
7262 indicates which variable (if any) in aff is an IV based on a memory
7263 object. Note that we only consider the candidate. If this is not
7264 based on an object, the base of the reference is in some subexpression
7265 of the use -- but these will use pointer types, so they are recognized
7266 by the create_mem_ref heuristics anyway. */
7267 if (cand
->iv
->base_object
)
7268 base_hint
= var_at_stmt (data
->current_loop
, cand
, use
->stmt
);
7270 iv
= var_at_stmt (data
->current_loop
, cand
, use
->stmt
);
7271 ref
= create_mem_ref (&bsi
, TREE_TYPE (*use
->op_p
), &aff
,
7272 reference_alias_ptr_type (*use
->op_p
),
7273 iv
, base_hint
, data
->speed
);
7274 copy_ref_info (ref
, *use
->op_p
);
7278 /* Rewrites USE (address that is an iv) using candidate CAND. If it's the
7279 first use of a group, rewrites sub uses in the group too. */
7282 rewrite_use_address (struct ivopts_data
*data
,
7283 struct iv_use
*use
, struct iv_cand
*cand
)
7285 struct iv_use
*next
;
7287 gcc_assert (use
->sub_id
== 0);
7288 rewrite_use_address_1 (data
, use
, cand
);
7289 update_stmt (use
->stmt
);
7291 for (next
= use
->next
; next
!= NULL
; next
= next
->next
)
7293 rewrite_use_address_1 (data
, next
, cand
);
7294 update_stmt (next
->stmt
);
7300 /* Rewrites USE (the condition such that one of the arguments is an iv) using
7304 rewrite_use_compare (struct ivopts_data
*data
,
7305 struct iv_use
*use
, struct iv_cand
*cand
)
7307 tree comp
, *var_p
, op
, bound
;
7308 gimple_stmt_iterator bsi
= gsi_for_stmt (use
->stmt
);
7309 enum tree_code compare
;
7310 struct cost_pair
*cp
= get_use_iv_cost (data
, use
, cand
);
7316 tree var
= var_at_stmt (data
->current_loop
, cand
, use
->stmt
);
7317 tree var_type
= TREE_TYPE (var
);
7320 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
7322 fprintf (dump_file
, "Replacing exit test: ");
7323 print_gimple_stmt (dump_file
, use
->stmt
, 0, TDF_SLIM
);
7326 bound
= unshare_expr (fold_convert (var_type
, bound
));
7327 op
= force_gimple_operand (bound
, &stmts
, true, NULL_TREE
);
7329 gsi_insert_seq_on_edge_immediate (
7330 loop_preheader_edge (data
->current_loop
),
7333 gcond
*cond_stmt
= as_a
<gcond
*> (use
->stmt
);
7334 gimple_cond_set_lhs (cond_stmt
, var
);
7335 gimple_cond_set_code (cond_stmt
, compare
);
7336 gimple_cond_set_rhs (cond_stmt
, op
);
7340 /* The induction variable elimination failed; just express the original
7342 comp
= get_computation (data
->current_loop
, use
, cand
);
7343 gcc_assert (comp
!= NULL_TREE
);
7345 ok
= extract_cond_operands (data
, use
->stmt
, &var_p
, NULL
, NULL
, NULL
);
7348 *var_p
= force_gimple_operand_gsi (&bsi
, comp
, true, SSA_NAME_VAR (*var_p
),
7349 true, GSI_SAME_STMT
);
7352 /* Rewrites USE using candidate CAND. */
7355 rewrite_use (struct ivopts_data
*data
, struct iv_use
*use
, struct iv_cand
*cand
)
7359 case USE_NONLINEAR_EXPR
:
7360 rewrite_use_nonlinear_expr (data
, use
, cand
);
7364 rewrite_use_address (data
, use
, cand
);
7368 rewrite_use_compare (data
, use
, cand
);
7375 update_stmt (use
->stmt
);
7378 /* Rewrite the uses using the selected induction variables. */
7381 rewrite_uses (struct ivopts_data
*data
)
7384 struct iv_cand
*cand
;
7387 for (i
= 0; i
< n_iv_uses (data
); i
++)
7389 use
= iv_use (data
, i
);
7390 cand
= use
->selected
;
7393 rewrite_use (data
, use
, cand
);
7397 /* Removes the ivs that are not used after rewriting. */
7400 remove_unused_ivs (struct ivopts_data
*data
)
7404 bitmap toremove
= BITMAP_ALLOC (NULL
);
7406 /* Figure out an order in which to release SSA DEFs so that we don't
7407 release something that we'd have to propagate into a debug stmt
7409 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, j
, bi
)
7411 struct version_info
*info
;
7413 info
= ver_info (data
, j
);
7415 && !integer_zerop (info
->iv
->step
)
7417 && !info
->iv
->have_use_for
7418 && !info
->preserve_biv
)
7420 bitmap_set_bit (toremove
, SSA_NAME_VERSION (info
->iv
->ssa_name
));
7422 tree def
= info
->iv
->ssa_name
;
7424 if (MAY_HAVE_DEBUG_STMTS
&& SSA_NAME_DEF_STMT (def
))
7426 imm_use_iterator imm_iter
;
7427 use_operand_p use_p
;
7431 FOR_EACH_IMM_USE_STMT (stmt
, imm_iter
, def
)
7433 if (!gimple_debug_bind_p (stmt
))
7436 /* We just want to determine whether to do nothing
7437 (count == 0), to substitute the computed
7438 expression into a single use of the SSA DEF by
7439 itself (count == 1), or to use a debug temp
7440 because the SSA DEF is used multiple times or as
7441 part of a larger expression (count > 1). */
7443 if (gimple_debug_bind_get_value (stmt
) != def
)
7447 BREAK_FROM_IMM_USE_STMT (imm_iter
);
7453 struct iv_use dummy_use
;
7454 struct iv_cand
*best_cand
= NULL
, *cand
;
7455 unsigned i
, best_pref
= 0, cand_pref
;
7457 memset (&dummy_use
, 0, sizeof (dummy_use
));
7458 dummy_use
.iv
= info
->iv
;
7459 for (i
= 0; i
< n_iv_uses (data
) && i
< 64; i
++)
7461 cand
= iv_use (data
, i
)->selected
;
7462 if (cand
== best_cand
)
7464 cand_pref
= operand_equal_p (cand
->iv
->step
,
7468 += TYPE_MODE (TREE_TYPE (cand
->iv
->base
))
7469 == TYPE_MODE (TREE_TYPE (info
->iv
->base
))
7472 += TREE_CODE (cand
->iv
->base
) == INTEGER_CST
7474 if (best_cand
== NULL
|| best_pref
< cand_pref
)
7477 best_pref
= cand_pref
;
7484 tree comp
= get_computation_at (data
->current_loop
,
7485 &dummy_use
, best_cand
,
7486 SSA_NAME_DEF_STMT (def
));
7492 tree vexpr
= make_node (DEBUG_EXPR_DECL
);
7493 DECL_ARTIFICIAL (vexpr
) = 1;
7494 TREE_TYPE (vexpr
) = TREE_TYPE (comp
);
7495 if (SSA_NAME_VAR (def
))
7496 DECL_MODE (vexpr
) = DECL_MODE (SSA_NAME_VAR (def
));
7498 DECL_MODE (vexpr
) = TYPE_MODE (TREE_TYPE (vexpr
));
7500 = gimple_build_debug_bind (vexpr
, comp
, NULL
);
7501 gimple_stmt_iterator gsi
;
7503 if (gimple_code (SSA_NAME_DEF_STMT (def
)) == GIMPLE_PHI
)
7504 gsi
= gsi_after_labels (gimple_bb
7505 (SSA_NAME_DEF_STMT (def
)));
7507 gsi
= gsi_for_stmt (SSA_NAME_DEF_STMT (def
));
7509 gsi_insert_before (&gsi
, def_temp
, GSI_SAME_STMT
);
7513 FOR_EACH_IMM_USE_STMT (stmt
, imm_iter
, def
)
7515 if (!gimple_debug_bind_p (stmt
))
7518 FOR_EACH_IMM_USE_ON_STMT (use_p
, imm_iter
)
7519 SET_USE (use_p
, comp
);
7527 release_defs_bitset (toremove
);
7529 BITMAP_FREE (toremove
);
7532 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
7533 for hash_map::traverse. */
7536 free_tree_niter_desc (edge
const &, tree_niter_desc
*const &value
, void *)
7542 /* Frees data allocated by the optimization of a single loop. */
7545 free_loop_data (struct ivopts_data
*data
)
7553 data
->niters
->traverse
<void *, free_tree_niter_desc
> (NULL
);
7554 delete data
->niters
;
7555 data
->niters
= NULL
;
7558 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
7560 struct version_info
*info
;
7562 info
= ver_info (data
, i
);
7564 info
->has_nonlin_use
= false;
7565 info
->preserve_biv
= false;
7568 bitmap_clear (data
->relevant
);
7569 bitmap_clear (data
->important_candidates
);
7571 for (i
= 0; i
< n_iv_uses (data
); i
++)
7573 struct iv_use
*use
= iv_use (data
, i
);
7574 struct iv_use
*pre
= use
, *sub
= use
->next
;
7578 gcc_assert (sub
->related_cands
== NULL
);
7579 gcc_assert (sub
->n_map_members
== 0 && sub
->cost_map
== NULL
);
7586 BITMAP_FREE (use
->related_cands
);
7587 for (j
= 0; j
< use
->n_map_members
; j
++)
7588 if (use
->cost_map
[j
].depends_on
)
7589 BITMAP_FREE (use
->cost_map
[j
].depends_on
);
7590 free (use
->cost_map
);
7593 data
->iv_uses
.truncate (0);
7595 for (i
= 0; i
< n_iv_cands (data
); i
++)
7597 struct iv_cand
*cand
= iv_cand (data
, i
);
7599 if (cand
->depends_on
)
7600 BITMAP_FREE (cand
->depends_on
);
7603 data
->iv_candidates
.truncate (0);
7605 if (data
->version_info_size
< num_ssa_names
)
7607 data
->version_info_size
= 2 * num_ssa_names
;
7608 free (data
->version_info
);
7609 data
->version_info
= XCNEWVEC (struct version_info
, data
->version_info_size
);
7612 data
->max_inv_id
= 0;
7614 FOR_EACH_VEC_ELT (decl_rtl_to_reset
, i
, obj
)
7615 SET_DECL_RTL (obj
, NULL_RTX
);
7617 decl_rtl_to_reset
.truncate (0);
7619 data
->inv_expr_tab
->empty ();
7620 data
->inv_expr_id
= 0;
7622 data
->iv_common_cand_tab
->empty ();
7623 data
->iv_common_cands
.truncate (0);
7626 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
7630 tree_ssa_iv_optimize_finalize (struct ivopts_data
*data
)
7632 free_loop_data (data
);
7633 free (data
->version_info
);
7634 BITMAP_FREE (data
->relevant
);
7635 BITMAP_FREE (data
->important_candidates
);
7637 decl_rtl_to_reset
.release ();
7638 data
->iv_uses
.release ();
7639 data
->iv_candidates
.release ();
7640 delete data
->inv_expr_tab
;
7641 data
->inv_expr_tab
= NULL
;
7642 free_affine_expand_cache (&data
->name_expansion_cache
);
7643 delete data
->iv_common_cand_tab
;
7644 data
->iv_common_cand_tab
= NULL
;
7645 data
->iv_common_cands
.release ();
7646 obstack_free (&data
->iv_obstack
, NULL
);
7649 /* Returns true if the loop body BODY includes any function calls. */
7652 loop_body_includes_call (basic_block
*body
, unsigned num_nodes
)
7654 gimple_stmt_iterator gsi
;
7657 for (i
= 0; i
< num_nodes
; i
++)
7658 for (gsi
= gsi_start_bb (body
[i
]); !gsi_end_p (gsi
); gsi_next (&gsi
))
7660 gimple
*stmt
= gsi_stmt (gsi
);
7661 if (is_gimple_call (stmt
)
7662 && !is_inexpensive_builtin (gimple_call_fndecl (stmt
)))
7668 /* Optimizes the LOOP. Returns true if anything changed. */
7671 tree_ssa_iv_optimize_loop (struct ivopts_data
*data
, struct loop
*loop
)
7673 bool changed
= false;
7674 struct iv_ca
*iv_ca
;
7675 edge exit
= single_dom_exit (loop
);
7678 gcc_assert (!data
->niters
);
7679 data
->current_loop
= loop
;
7680 data
->loop_loc
= find_loop_location (loop
);
7681 data
->speed
= optimize_loop_for_speed_p (loop
);
7683 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
7685 fprintf (dump_file
, "Processing loop %d", loop
->num
);
7686 if (data
->loop_loc
!= UNKNOWN_LOCATION
)
7687 fprintf (dump_file
, " at %s:%d", LOCATION_FILE (data
->loop_loc
),
7688 LOCATION_LINE (data
->loop_loc
));
7689 fprintf (dump_file
, "\n");
7693 fprintf (dump_file
, " single exit %d -> %d, exit condition ",
7694 exit
->src
->index
, exit
->dest
->index
);
7695 print_gimple_stmt (dump_file
, last_stmt (exit
->src
), 0, TDF_SLIM
);
7696 fprintf (dump_file
, "\n");
7699 fprintf (dump_file
, "\n");
7702 body
= get_loop_body (loop
);
7703 data
->body_includes_call
= loop_body_includes_call (body
, loop
->num_nodes
);
7704 renumber_gimple_stmt_uids_in_blocks (body
, loop
->num_nodes
);
7707 data
->loop_single_exit_p
= exit
!= NULL
&& loop_only_exit_p (loop
, exit
);
7709 /* For each ssa name determines whether it behaves as an induction variable
7711 if (!find_induction_variables (data
))
7714 /* Finds interesting uses (item 1). */
7715 find_interesting_uses (data
);
7716 group_address_uses (data
);
7717 if (n_iv_uses (data
) > MAX_CONSIDERED_USES
)
7720 /* Finds candidates for the induction variables (item 2). */
7721 find_iv_candidates (data
);
7723 /* Calculates the costs (item 3, part 1). */
7724 determine_iv_costs (data
);
7725 determine_use_iv_costs (data
);
7726 determine_set_costs (data
);
7728 /* Find the optimal set of induction variables (item 3, part 2). */
7729 iv_ca
= find_optimal_iv_set (data
);
7734 /* Create the new induction variables (item 4, part 1). */
7735 create_new_ivs (data
, iv_ca
);
7736 iv_ca_free (&iv_ca
);
7738 /* Rewrite the uses (item 4, part 2). */
7739 rewrite_uses (data
);
7741 /* Remove the ivs that are unused after rewriting. */
7742 remove_unused_ivs (data
);
7744 /* We have changed the structure of induction variables; it might happen
7745 that definitions in the scev database refer to some of them that were
7750 free_loop_data (data
);
7755 /* Main entry point. Optimizes induction variables in loops. */
7758 tree_ssa_iv_optimize (void)
7761 struct ivopts_data data
;
7763 tree_ssa_iv_optimize_init (&data
);
7765 /* Optimize the loops starting with the innermost ones. */
7766 FOR_EACH_LOOP (loop
, LI_FROM_INNERMOST
)
7768 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
7769 flow_loop_dump (loop
, dump_file
, NULL
, 1);
7771 tree_ssa_iv_optimize_loop (&data
, loop
);
7774 tree_ssa_iv_optimize_finalize (&data
);