1 /* Induction variable optimizations.
2 Copyright (C) 2003-2015 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 /* This pass tries to find the optimal set of induction variables for the loop.
21 It optimizes just the basic linear induction variables (although adding
22 support for other types should not be too hard). It includes the
23 optimizations commonly known as strength reduction, induction variable
24 coalescing and induction variable elimination. It does it in the
27 1) The interesting uses of induction variables are found. This includes
29 -- uses of induction variables in non-linear expressions
30 -- addresses of arrays
31 -- comparisons of induction variables
33 2) Candidates for the induction variables are found. This includes
35 -- old induction variables
36 -- the variables defined by expressions derived from the "interesting
39 3) The optimal (w.r. to a cost function) set of variables is chosen. The
40 cost function assigns a cost to sets of induction variables and consists
43 -- The use costs. Each of the interesting uses chooses the best induction
44 variable in the set and adds its cost to the sum. The cost reflects
45 the time spent on modifying the induction variables value to be usable
46 for the given purpose (adding base and offset for arrays, etc.).
47 -- The variable costs. Each of the variables has a cost assigned that
48 reflects the costs associated with incrementing the value of the
49 variable. The original variables are somewhat preferred.
50 -- The set cost. Depending on the size of the set, extra cost may be
51 added to reflect register pressure.
53 All the costs are defined in a machine-specific way, using the target
54 hooks and machine descriptions to determine them.
56 4) The trees are transformed to use the new variables, the dead code is
59 All of this is done loop by loop. Doing it globally is theoretically
60 possible, it might give a better performance and it might enable us
61 to decide costs more precisely, but getting all the interactions right
62 would be complicated. */
66 #include "coretypes.h"
74 #include "fold-const.h"
75 #include "stor-layout.h"
77 #include "gimple-pretty-print.h"
78 #include "internal-fn.h"
81 #include "gimple-iterator.h"
82 #include "gimplify-me.h"
85 #include "tree-ssa-loop-ivopts.h"
86 #include "tree-ssa-loop-manip.h"
87 #include "tree-ssa-loop-niter.h"
88 #include "tree-ssa-loop.h"
90 #include "insn-config.h"
100 #include "tree-ssa.h"
102 #include "tree-pass.h"
103 #include "tree-chrec.h"
104 #include "tree-scalar-evolution.h"
106 #include "langhooks.h"
107 #include "tree-affine.h"
109 #include "tree-inline.h"
110 #include "tree-ssa-propagate.h"
111 #include "tree-ssa-address.h"
112 #include "builtins.h"
113 #include "tree-vectorizer.h"
115 /* FIXME: Expressions are expanded to RTL in this pass to determine the
116 cost of different addressing modes. This should be moved to a TBD
117 interface between the GIMPLE and RTL worlds. */
120 /* The infinite cost. */
121 #define INFTY 10000000
123 #define AVG_LOOP_NITER(LOOP) 5
125 /* Returns the expected number of loop iterations for LOOP.
126 The average trip count is computed from profile data if it
129 static inline HOST_WIDE_INT
130 avg_loop_niter (struct loop
*loop
)
132 HOST_WIDE_INT niter
= estimated_stmt_executions_int (loop
);
134 return AVG_LOOP_NITER (loop
);
139 /* Representation of the induction variable. */
142 tree base
; /* Initial value of the iv. */
143 tree base_object
; /* A memory object to that the induction variable points. */
144 tree step
; /* Step of the iv (constant only). */
145 tree ssa_name
; /* The ssa name with the value. */
146 unsigned use_id
; /* The identifier in the use if it is the case. */
147 bool biv_p
; /* Is it a biv? */
148 bool have_use_for
; /* Do we already have a use for it? */
149 bool no_overflow
; /* True if the iv doesn't overflow. */
152 /* Per-ssa version information (induction variable descriptions, etc.). */
155 tree name
; /* The ssa name. */
156 struct iv
*iv
; /* Induction variable description. */
157 bool has_nonlin_use
; /* For a loop-level invariant, whether it is used in
158 an expression that is not an induction variable. */
159 bool preserve_biv
; /* For the original biv, whether to preserve it. */
160 unsigned inv_id
; /* Id of an invariant. */
166 USE_NONLINEAR_EXPR
, /* Use in a nonlinear expression. */
167 USE_ADDRESS
, /* Use in an address. */
168 USE_COMPARE
/* Use is a compare. */
171 /* Cost of a computation. */
174 int cost
; /* The runtime cost. */
175 unsigned complexity
; /* The estimate of the complexity of the code for
176 the computation (in no concrete units --
177 complexity field should be larger for more
178 complex expressions and addressing modes). */
181 static const comp_cost no_cost
= {0, 0};
182 static const comp_cost infinite_cost
= {INFTY
, INFTY
};
184 /* The candidate - cost pair. */
187 struct iv_cand
*cand
; /* The candidate. */
188 comp_cost cost
; /* The cost. */
189 bitmap depends_on
; /* The list of invariants that have to be
191 tree value
; /* For final value elimination, the expression for
192 the final value of the iv. For iv elimination,
193 the new bound to compare with. */
194 enum tree_code comp
; /* For iv elimination, the comparison. */
195 int inv_expr_id
; /* Loop invariant expression id. */
201 unsigned id
; /* The id of the use. */
202 unsigned sub_id
; /* The id of the sub use. */
203 enum use_type type
; /* Type of the use. */
204 struct iv
*iv
; /* The induction variable it is based on. */
205 gimple stmt
; /* Statement in that it occurs. */
206 tree
*op_p
; /* The place where it occurs. */
207 bitmap related_cands
; /* The set of "related" iv candidates, plus the common
210 unsigned n_map_members
; /* Number of candidates in the cost_map list. */
211 struct cost_pair
*cost_map
;
212 /* The costs wrto the iv candidates. */
214 struct iv_cand
*selected
;
215 /* The selected candidate. */
217 struct iv_use
*next
; /* The next sub use. */
218 tree addr_base
; /* Base address with const offset stripped. */
219 unsigned HOST_WIDE_INT addr_offset
;
220 /* Const offset stripped from base address. */
223 /* The position where the iv is computed. */
226 IP_NORMAL
, /* At the end, just before the exit condition. */
227 IP_END
, /* At the end of the latch block. */
228 IP_BEFORE_USE
, /* Immediately before a specific use. */
229 IP_AFTER_USE
, /* Immediately after a specific use. */
230 IP_ORIGINAL
/* The original biv. */
233 /* The induction variable candidate. */
236 unsigned id
; /* The number of the candidate. */
237 bool important
; /* Whether this is an "important" candidate, i.e. such
238 that it should be considered by all uses. */
239 ENUM_BITFIELD(iv_position
) pos
: 8; /* Where it is computed. */
240 gimple incremented_at
;/* For original biv, the statement where it is
242 tree var_before
; /* The variable used for it before increment. */
243 tree var_after
; /* The variable used for it after increment. */
244 struct iv
*iv
; /* The value of the candidate. NULL for
245 "pseudocandidate" used to indicate the possibility
246 to replace the final value of an iv by direct
247 computation of the value. */
248 unsigned cost
; /* Cost of the candidate. */
249 unsigned cost_step
; /* Cost of the candidate's increment operation. */
250 struct iv_use
*ainc_use
; /* For IP_{BEFORE,AFTER}_USE candidates, the place
251 where it is incremented. */
252 bitmap depends_on
; /* The list of invariants that are used in step of the
256 /* Loop invariant expression hashtable entry. */
257 struct iv_inv_expr_ent
264 /* The data used by the induction variable optimizations. */
266 typedef struct iv_use
*iv_use_p
;
268 typedef struct iv_cand
*iv_cand_p
;
270 /* Hashtable helpers. */
272 struct iv_inv_expr_hasher
: free_ptr_hash
<iv_inv_expr_ent
>
274 static inline hashval_t
hash (const iv_inv_expr_ent
*);
275 static inline bool equal (const iv_inv_expr_ent
*, const iv_inv_expr_ent
*);
278 /* Hash function for loop invariant expressions. */
281 iv_inv_expr_hasher::hash (const iv_inv_expr_ent
*expr
)
286 /* Hash table equality function for expressions. */
289 iv_inv_expr_hasher::equal (const iv_inv_expr_ent
*expr1
,
290 const iv_inv_expr_ent
*expr2
)
292 return expr1
->hash
== expr2
->hash
293 && operand_equal_p (expr1
->expr
, expr2
->expr
, 0);
298 /* The currently optimized loop. */
299 struct loop
*current_loop
;
300 source_location loop_loc
;
302 /* Numbers of iterations for all exits of the current loop. */
303 hash_map
<edge
, tree_niter_desc
*> *niters
;
305 /* Number of registers used in it. */
308 /* The size of version_info array allocated. */
309 unsigned version_info_size
;
311 /* The array of information for the ssa names. */
312 struct version_info
*version_info
;
314 /* The hashtable of loop invariant expressions created
316 hash_table
<iv_inv_expr_hasher
> *inv_expr_tab
;
318 /* Loop invariant expression id. */
321 /* The bitmap of indices in version_info whose value was changed. */
324 /* The uses of induction variables. */
325 vec
<iv_use_p
> iv_uses
;
327 /* The candidates. */
328 vec
<iv_cand_p
> iv_candidates
;
330 /* A bitmap of important candidates. */
331 bitmap important_candidates
;
333 /* Cache used by tree_to_aff_combination_expand. */
334 hash_map
<tree
, name_expansion
*> *name_expansion_cache
;
336 /* The maximum invariant id. */
339 /* Obstack for iv structure. */
340 struct obstack iv_obstack
;
342 /* Whether to consider just related and important candidates when replacing a
344 bool consider_all_candidates
;
346 /* Are we optimizing for speed? */
349 /* Whether the loop body includes any function calls. */
350 bool body_includes_call
;
352 /* Whether the loop body can only be exited via single exit. */
353 bool loop_single_exit_p
;
356 /* An assignment of iv candidates to uses. */
360 /* The number of uses covered by the assignment. */
363 /* Number of uses that cannot be expressed by the candidates in the set. */
366 /* Candidate assigned to a use, together with the related costs. */
367 struct cost_pair
**cand_for_use
;
369 /* Number of times each candidate is used. */
370 unsigned *n_cand_uses
;
372 /* The candidates used. */
375 /* The number of candidates in the set. */
378 /* Total number of registers needed. */
381 /* Total cost of expressing uses. */
382 comp_cost cand_use_cost
;
384 /* Total cost of candidates. */
387 /* Number of times each invariant is used. */
388 unsigned *n_invariant_uses
;
390 /* The array holding the number of uses of each loop
391 invariant expressions created by ivopt. */
392 unsigned *used_inv_expr
;
394 /* The number of created loop invariants. */
395 unsigned num_used_inv_expr
;
397 /* Total cost of the assignment. */
401 /* Difference of two iv candidate assignments. */
408 /* An old assignment (for rollback purposes). */
409 struct cost_pair
*old_cp
;
411 /* A new assignment. */
412 struct cost_pair
*new_cp
;
414 /* Next change in the list. */
415 struct iv_ca_delta
*next_change
;
418 /* Bound on number of candidates below that all candidates are considered. */
420 #define CONSIDER_ALL_CANDIDATES_BOUND \
421 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
423 /* If there are more iv occurrences, we just give up (it is quite unlikely that
424 optimizing such a loop would help, and it would take ages). */
426 #define MAX_CONSIDERED_USES \
427 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
429 /* If there are at most this number of ivs in the set, try removing unnecessary
430 ivs from the set always. */
432 #define ALWAYS_PRUNE_CAND_SET_BOUND \
433 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
435 /* The list of trees for that the decl_rtl field must be reset is stored
438 static vec
<tree
> decl_rtl_to_reset
;
440 static comp_cost
force_expr_to_var_cost (tree
, bool);
442 /* Number of uses recorded in DATA. */
444 static inline unsigned
445 n_iv_uses (struct ivopts_data
*data
)
447 return data
->iv_uses
.length ();
450 /* Ith use recorded in DATA. */
452 static inline struct iv_use
*
453 iv_use (struct ivopts_data
*data
, unsigned i
)
455 return data
->iv_uses
[i
];
458 /* Number of candidates recorded in DATA. */
460 static inline unsigned
461 n_iv_cands (struct ivopts_data
*data
)
463 return data
->iv_candidates
.length ();
466 /* Ith candidate recorded in DATA. */
468 static inline struct iv_cand
*
469 iv_cand (struct ivopts_data
*data
, unsigned i
)
471 return data
->iv_candidates
[i
];
474 /* The single loop exit if it dominates the latch, NULL otherwise. */
477 single_dom_exit (struct loop
*loop
)
479 edge exit
= single_exit (loop
);
484 if (!just_once_each_iteration_p (loop
, exit
->src
))
490 /* Dumps information about the induction variable IV to FILE. */
493 dump_iv (FILE *file
, struct iv
*iv
, bool dump_name
)
495 if (iv
->ssa_name
&& dump_name
)
497 fprintf (file
, "ssa name ");
498 print_generic_expr (file
, iv
->ssa_name
, TDF_SLIM
);
499 fprintf (file
, "\n");
502 fprintf (file
, " type ");
503 print_generic_expr (file
, TREE_TYPE (iv
->base
), TDF_SLIM
);
504 fprintf (file
, "\n");
508 fprintf (file
, " base ");
509 print_generic_expr (file
, iv
->base
, TDF_SLIM
);
510 fprintf (file
, "\n");
512 fprintf (file
, " step ");
513 print_generic_expr (file
, iv
->step
, TDF_SLIM
);
514 fprintf (file
, "\n");
518 fprintf (file
, " invariant ");
519 print_generic_expr (file
, iv
->base
, TDF_SLIM
);
520 fprintf (file
, "\n");
525 fprintf (file
, " base object ");
526 print_generic_expr (file
, iv
->base_object
, TDF_SLIM
);
527 fprintf (file
, "\n");
531 fprintf (file
, " is a biv\n");
534 /* Dumps information about the USE to FILE. */
537 dump_use (FILE *file
, struct iv_use
*use
)
539 fprintf (file
, "use %d", use
->id
);
541 fprintf (file
, ".%d", use
->sub_id
);
543 fprintf (file
, "\n");
547 case USE_NONLINEAR_EXPR
:
548 fprintf (file
, " generic\n");
552 fprintf (file
, " address\n");
556 fprintf (file
, " compare\n");
563 fprintf (file
, " in statement ");
564 print_gimple_stmt (file
, use
->stmt
, 0, 0);
565 fprintf (file
, "\n");
567 fprintf (file
, " at position ");
569 print_generic_expr (file
, *use
->op_p
, TDF_SLIM
);
570 fprintf (file
, "\n");
572 dump_iv (file
, use
->iv
, false);
574 if (use
->related_cands
)
576 fprintf (file
, " related candidates ");
577 dump_bitmap (file
, use
->related_cands
);
581 /* Dumps information about the uses to FILE. */
584 dump_uses (FILE *file
, struct ivopts_data
*data
)
589 for (i
= 0; i
< n_iv_uses (data
); i
++)
591 use
= iv_use (data
, i
);
594 dump_use (file
, use
);
598 fprintf (file
, "\n");
602 /* Dumps information about induction variable candidate CAND to FILE. */
605 dump_cand (FILE *file
, struct iv_cand
*cand
)
607 struct iv
*iv
= cand
->iv
;
609 fprintf (file
, "candidate %d%s\n",
610 cand
->id
, cand
->important
? " (important)" : "");
612 if (cand
->depends_on
)
614 fprintf (file
, " depends on ");
615 dump_bitmap (file
, cand
->depends_on
);
620 fprintf (file
, " final value replacement\n");
624 if (cand
->var_before
)
626 fprintf (file
, " var_before ");
627 print_generic_expr (file
, cand
->var_before
, TDF_SLIM
);
628 fprintf (file
, "\n");
632 fprintf (file
, " var_after ");
633 print_generic_expr (file
, cand
->var_after
, TDF_SLIM
);
634 fprintf (file
, "\n");
640 fprintf (file
, " incremented before exit test\n");
644 fprintf (file
, " incremented before use %d\n", cand
->ainc_use
->id
);
648 fprintf (file
, " incremented after use %d\n", cand
->ainc_use
->id
);
652 fprintf (file
, " incremented at end\n");
656 fprintf (file
, " original biv\n");
660 dump_iv (file
, iv
, false);
663 /* Returns the info for ssa version VER. */
665 static inline struct version_info
*
666 ver_info (struct ivopts_data
*data
, unsigned ver
)
668 return data
->version_info
+ ver
;
671 /* Returns the info for ssa name NAME. */
673 static inline struct version_info
*
674 name_info (struct ivopts_data
*data
, tree name
)
676 return ver_info (data
, SSA_NAME_VERSION (name
));
679 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
683 stmt_after_ip_normal_pos (struct loop
*loop
, gimple stmt
)
685 basic_block bb
= ip_normal_pos (loop
), sbb
= gimple_bb (stmt
);
689 if (sbb
== loop
->latch
)
695 return stmt
== last_stmt (bb
);
698 /* Returns true if STMT if after the place where the original induction
699 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
700 if the positions are identical. */
703 stmt_after_inc_pos (struct iv_cand
*cand
, gimple stmt
, bool true_if_equal
)
705 basic_block cand_bb
= gimple_bb (cand
->incremented_at
);
706 basic_block stmt_bb
= gimple_bb (stmt
);
708 if (!dominated_by_p (CDI_DOMINATORS
, stmt_bb
, cand_bb
))
711 if (stmt_bb
!= cand_bb
)
715 && gimple_uid (stmt
) == gimple_uid (cand
->incremented_at
))
717 return gimple_uid (stmt
) > gimple_uid (cand
->incremented_at
);
720 /* Returns true if STMT if after the place where the induction variable
721 CAND is incremented in LOOP. */
724 stmt_after_increment (struct loop
*loop
, struct iv_cand
*cand
, gimple stmt
)
732 return stmt_after_ip_normal_pos (loop
, stmt
);
736 return stmt_after_inc_pos (cand
, stmt
, false);
739 return stmt_after_inc_pos (cand
, stmt
, true);
746 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
749 abnormal_ssa_name_p (tree exp
)
754 if (TREE_CODE (exp
) != SSA_NAME
)
757 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp
) != 0;
760 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
761 abnormal phi node. Callback for for_each_index. */
764 idx_contains_abnormal_ssa_name_p (tree base
, tree
*index
,
765 void *data ATTRIBUTE_UNUSED
)
767 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
769 if (abnormal_ssa_name_p (TREE_OPERAND (base
, 2)))
771 if (abnormal_ssa_name_p (TREE_OPERAND (base
, 3)))
775 return !abnormal_ssa_name_p (*index
);
778 /* Returns true if EXPR contains a ssa name that occurs in an
779 abnormal phi node. */
782 contains_abnormal_ssa_name_p (tree expr
)
785 enum tree_code_class codeclass
;
790 code
= TREE_CODE (expr
);
791 codeclass
= TREE_CODE_CLASS (code
);
793 if (code
== SSA_NAME
)
794 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr
) != 0;
796 if (code
== INTEGER_CST
797 || is_gimple_min_invariant (expr
))
800 if (code
== ADDR_EXPR
)
801 return !for_each_index (&TREE_OPERAND (expr
, 0),
802 idx_contains_abnormal_ssa_name_p
,
805 if (code
== COND_EXPR
)
806 return contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 0))
807 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 1))
808 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 2));
814 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 1)))
819 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 0)))
831 /* Returns the structure describing number of iterations determined from
832 EXIT of DATA->current_loop, or NULL if something goes wrong. */
834 static struct tree_niter_desc
*
835 niter_for_exit (struct ivopts_data
*data
, edge exit
)
837 struct tree_niter_desc
*desc
;
838 tree_niter_desc
**slot
;
842 data
->niters
= new hash_map
<edge
, tree_niter_desc
*>;
846 slot
= data
->niters
->get (exit
);
850 /* Try to determine number of iterations. We cannot safely work with ssa
851 names that appear in phi nodes on abnormal edges, so that we do not
852 create overlapping life ranges for them (PR 27283). */
853 desc
= XNEW (struct tree_niter_desc
);
854 if (!number_of_iterations_exit (data
->current_loop
,
856 || contains_abnormal_ssa_name_p (desc
->niter
))
861 data
->niters
->put (exit
, desc
);
869 /* Returns the structure describing number of iterations determined from
870 single dominating exit of DATA->current_loop, or NULL if something
873 static struct tree_niter_desc
*
874 niter_for_single_dom_exit (struct ivopts_data
*data
)
876 edge exit
= single_dom_exit (data
->current_loop
);
881 return niter_for_exit (data
, exit
);
884 /* Initializes data structures used by the iv optimization pass, stored
888 tree_ssa_iv_optimize_init (struct ivopts_data
*data
)
890 data
->version_info_size
= 2 * num_ssa_names
;
891 data
->version_info
= XCNEWVEC (struct version_info
, data
->version_info_size
);
892 data
->relevant
= BITMAP_ALLOC (NULL
);
893 data
->important_candidates
= BITMAP_ALLOC (NULL
);
894 data
->max_inv_id
= 0;
896 data
->iv_uses
.create (20);
897 data
->iv_candidates
.create (20);
898 data
->inv_expr_tab
= new hash_table
<iv_inv_expr_hasher
> (10);
899 data
->inv_expr_id
= 0;
900 data
->name_expansion_cache
= NULL
;
901 decl_rtl_to_reset
.create (20);
902 gcc_obstack_init (&data
->iv_obstack
);
905 /* Returns a memory object to that EXPR points. In case we are able to
906 determine that it does not point to any such object, NULL is returned. */
909 determine_base_object (tree expr
)
911 enum tree_code code
= TREE_CODE (expr
);
914 /* If this is a pointer casted to any type, we need to determine
915 the base object for the pointer; so handle conversions before
916 throwing away non-pointer expressions. */
917 if (CONVERT_EXPR_P (expr
))
918 return determine_base_object (TREE_OPERAND (expr
, 0));
920 if (!POINTER_TYPE_P (TREE_TYPE (expr
)))
929 obj
= TREE_OPERAND (expr
, 0);
930 base
= get_base_address (obj
);
935 if (TREE_CODE (base
) == MEM_REF
)
936 return determine_base_object (TREE_OPERAND (base
, 0));
938 return fold_convert (ptr_type_node
,
939 build_fold_addr_expr (base
));
941 case POINTER_PLUS_EXPR
:
942 return determine_base_object (TREE_OPERAND (expr
, 0));
946 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
950 return fold_convert (ptr_type_node
, expr
);
954 /* Return true if address expression with non-DECL_P operand appears
958 contain_complex_addr_expr (tree expr
)
963 switch (TREE_CODE (expr
))
965 case POINTER_PLUS_EXPR
:
968 res
|= contain_complex_addr_expr (TREE_OPERAND (expr
, 0));
969 res
|= contain_complex_addr_expr (TREE_OPERAND (expr
, 1));
973 return (!DECL_P (TREE_OPERAND (expr
, 0)));
982 /* Allocates an induction variable with given initial value BASE and step STEP
983 for loop LOOP. NO_OVERFLOW implies the iv doesn't overflow. */
986 alloc_iv (struct ivopts_data
*data
, tree base
, tree step
,
987 bool no_overflow
= false)
990 struct iv
*iv
= (struct iv
*) obstack_alloc (&data
->iv_obstack
,
992 gcc_assert (step
!= NULL_TREE
);
994 /* Lower address expression in base except ones with DECL_P as operand.
996 1) More accurate cost can be computed for address expressions;
997 2) Duplicate candidates won't be created for bases in different
998 forms, like &a[0] and &a. */
1000 if ((TREE_CODE (expr
) == ADDR_EXPR
&& !DECL_P (TREE_OPERAND (expr
, 0)))
1001 || contain_complex_addr_expr (expr
))
1004 tree_to_aff_combination (expr
, TREE_TYPE (base
), &comb
);
1005 base
= fold_convert (TREE_TYPE (base
), aff_combination_to_tree (&comb
));
1009 iv
->base_object
= determine_base_object (base
);
1012 iv
->have_use_for
= false;
1014 iv
->ssa_name
= NULL_TREE
;
1015 iv
->no_overflow
= no_overflow
;
1020 /* Sets STEP and BASE for induction variable IV. NO_OVERFLOW implies the IV
1021 doesn't overflow. */
1024 set_iv (struct ivopts_data
*data
, tree iv
, tree base
, tree step
,
1027 struct version_info
*info
= name_info (data
, iv
);
1029 gcc_assert (!info
->iv
);
1031 bitmap_set_bit (data
->relevant
, SSA_NAME_VERSION (iv
));
1032 info
->iv
= alloc_iv (data
, base
, step
, no_overflow
);
1033 info
->iv
->ssa_name
= iv
;
1036 /* Finds induction variable declaration for VAR. */
1039 get_iv (struct ivopts_data
*data
, tree var
)
1042 tree type
= TREE_TYPE (var
);
1044 if (!POINTER_TYPE_P (type
)
1045 && !INTEGRAL_TYPE_P (type
))
1048 if (!name_info (data
, var
)->iv
)
1050 bb
= gimple_bb (SSA_NAME_DEF_STMT (var
));
1053 || !flow_bb_inside_loop_p (data
->current_loop
, bb
))
1054 set_iv (data
, var
, var
, build_int_cst (type
, 0), true);
1057 return name_info (data
, var
)->iv
;
1060 /* Return the first non-invariant ssa var found in EXPR. */
1063 extract_single_var_from_expr (tree expr
)
1067 enum tree_code code
;
1069 if (!expr
|| is_gimple_min_invariant (expr
))
1072 code
= TREE_CODE (expr
);
1073 if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code
)))
1075 n
= TREE_OPERAND_LENGTH (expr
);
1076 for (i
= 0; i
< n
; i
++)
1078 tmp
= extract_single_var_from_expr (TREE_OPERAND (expr
, i
));
1084 return (TREE_CODE (expr
) == SSA_NAME
) ? expr
: NULL
;
1087 /* Finds basic ivs. */
1090 find_bivs (struct ivopts_data
*data
)
1094 tree step
, type
, base
, stop
;
1096 struct loop
*loop
= data
->current_loop
;
1099 for (psi
= gsi_start_phis (loop
->header
); !gsi_end_p (psi
); gsi_next (&psi
))
1103 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi
)))
1106 if (virtual_operand_p (PHI_RESULT (phi
)))
1109 if (!simple_iv (loop
, loop
, PHI_RESULT (phi
), &iv
, true))
1112 if (integer_zerop (iv
.step
))
1116 base
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_preheader_edge (loop
));
1117 /* Stop expanding iv base at the first ssa var referred by iv step.
1118 Ideally we should stop at any ssa var, because that's expensive
1119 and unusual to happen, we just do it on the first one.
1121 See PR64705 for the rationale. */
1122 stop
= extract_single_var_from_expr (step
);
1123 base
= expand_simple_operations (base
, stop
);
1124 if (contains_abnormal_ssa_name_p (base
)
1125 || contains_abnormal_ssa_name_p (step
))
1128 type
= TREE_TYPE (PHI_RESULT (phi
));
1129 base
= fold_convert (type
, base
);
1132 if (POINTER_TYPE_P (type
))
1133 step
= convert_to_ptrofftype (step
);
1135 step
= fold_convert (type
, step
);
1138 set_iv (data
, PHI_RESULT (phi
), base
, step
, iv
.no_overflow
);
1145 /* Marks basic ivs. */
1148 mark_bivs (struct ivopts_data
*data
)
1153 struct iv
*iv
, *incr_iv
;
1154 struct loop
*loop
= data
->current_loop
;
1155 basic_block incr_bb
;
1158 for (psi
= gsi_start_phis (loop
->header
); !gsi_end_p (psi
); gsi_next (&psi
))
1162 iv
= get_iv (data
, PHI_RESULT (phi
));
1166 var
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_latch_edge (loop
));
1167 def
= SSA_NAME_DEF_STMT (var
);
1168 /* Don't mark iv peeled from other one as biv. */
1170 && gimple_code (def
) == GIMPLE_PHI
1171 && gimple_bb (def
) == loop
->header
)
1174 incr_iv
= get_iv (data
, var
);
1178 /* If the increment is in the subloop, ignore it. */
1179 incr_bb
= gimple_bb (SSA_NAME_DEF_STMT (var
));
1180 if (incr_bb
->loop_father
!= data
->current_loop
1181 || (incr_bb
->flags
& BB_IRREDUCIBLE_LOOP
))
1185 incr_iv
->biv_p
= true;
1189 /* Checks whether STMT defines a linear induction variable and stores its
1190 parameters to IV. */
1193 find_givs_in_stmt_scev (struct ivopts_data
*data
, gimple stmt
, affine_iv
*iv
)
1196 struct loop
*loop
= data
->current_loop
;
1198 iv
->base
= NULL_TREE
;
1199 iv
->step
= NULL_TREE
;
1201 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
1204 lhs
= gimple_assign_lhs (stmt
);
1205 if (TREE_CODE (lhs
) != SSA_NAME
)
1208 if (!simple_iv (loop
, loop_containing_stmt (stmt
), lhs
, iv
, true))
1211 /* Stop expanding iv base at the first ssa var referred by iv step.
1212 Ideally we should stop at any ssa var, because that's expensive
1213 and unusual to happen, we just do it on the first one.
1215 See PR64705 for the rationale. */
1216 stop
= extract_single_var_from_expr (iv
->step
);
1217 iv
->base
= expand_simple_operations (iv
->base
, stop
);
1218 if (contains_abnormal_ssa_name_p (iv
->base
)
1219 || contains_abnormal_ssa_name_p (iv
->step
))
1222 /* If STMT could throw, then do not consider STMT as defining a GIV.
1223 While this will suppress optimizations, we can not safely delete this
1224 GIV and associated statements, even if it appears it is not used. */
1225 if (stmt_could_throw_p (stmt
))
1231 /* Finds general ivs in statement STMT. */
1234 find_givs_in_stmt (struct ivopts_data
*data
, gimple stmt
)
1238 if (!find_givs_in_stmt_scev (data
, stmt
, &iv
))
1241 set_iv (data
, gimple_assign_lhs (stmt
), iv
.base
, iv
.step
, iv
.no_overflow
);
1244 /* Finds general ivs in basic block BB. */
1247 find_givs_in_bb (struct ivopts_data
*data
, basic_block bb
)
1249 gimple_stmt_iterator bsi
;
1251 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
1252 find_givs_in_stmt (data
, gsi_stmt (bsi
));
1255 /* Finds general ivs. */
1258 find_givs (struct ivopts_data
*data
)
1260 struct loop
*loop
= data
->current_loop
;
1261 basic_block
*body
= get_loop_body_in_dom_order (loop
);
1264 for (i
= 0; i
< loop
->num_nodes
; i
++)
1265 find_givs_in_bb (data
, body
[i
]);
1269 /* For each ssa name defined in LOOP determines whether it is an induction
1270 variable and if so, its initial value and step. */
1273 find_induction_variables (struct ivopts_data
*data
)
1278 if (!find_bivs (data
))
1284 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1286 struct tree_niter_desc
*niter
= niter_for_single_dom_exit (data
);
1290 fprintf (dump_file
, " number of iterations ");
1291 print_generic_expr (dump_file
, niter
->niter
, TDF_SLIM
);
1292 if (!integer_zerop (niter
->may_be_zero
))
1294 fprintf (dump_file
, "; zero if ");
1295 print_generic_expr (dump_file
, niter
->may_be_zero
, TDF_SLIM
);
1297 fprintf (dump_file
, "\n\n");
1300 fprintf (dump_file
, "Induction variables:\n\n");
1302 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
1304 if (ver_info (data
, i
)->iv
)
1305 dump_iv (dump_file
, ver_info (data
, i
)->iv
, true);
1312 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.
1313 For address type use, ADDR_BASE is the stripped IV base, ADDR_OFFSET
1314 is the const offset stripped from IV base. For uses of other types,
1315 ADDR_BASE and ADDR_OFFSET are zero by default. */
1317 static struct iv_use
*
1318 record_use (struct ivopts_data
*data
, tree
*use_p
, struct iv
*iv
,
1319 gimple stmt
, enum use_type use_type
, tree addr_base
= NULL
,
1320 unsigned HOST_WIDE_INT addr_offset
= 0)
1322 struct iv_use
*use
= XCNEW (struct iv_use
);
1324 use
->id
= n_iv_uses (data
);
1326 use
->type
= use_type
;
1330 use
->related_cands
= BITMAP_ALLOC (NULL
);
1332 use
->addr_base
= addr_base
;
1333 use
->addr_offset
= addr_offset
;
1335 data
->iv_uses
.safe_push (use
);
1340 /* Records a sub use of type USE_TYPE at *USE_P in STMT whose value is IV.
1341 The sub use is recorded under the one whose use id is ID_GROUP. */
1343 static struct iv_use
*
1344 record_sub_use (struct ivopts_data
*data
, tree
*use_p
,
1345 struct iv
*iv
, gimple stmt
, enum use_type use_type
,
1346 tree addr_base
, unsigned HOST_WIDE_INT addr_offset
,
1347 unsigned int id_group
)
1349 struct iv_use
*use
= XCNEW (struct iv_use
);
1350 struct iv_use
*group
= iv_use (data
, id_group
);
1352 use
->id
= group
->id
;
1354 use
->type
= use_type
;
1358 use
->related_cands
= NULL
;
1359 use
->addr_base
= addr_base
;
1360 use
->addr_offset
= addr_offset
;
1362 /* Sub use list is maintained in offset ascending order. */
1363 if (addr_offset
<= group
->addr_offset
)
1365 use
->related_cands
= group
->related_cands
;
1366 group
->related_cands
= NULL
;
1368 data
->iv_uses
[id_group
] = use
;
1376 group
= group
->next
;
1378 while (group
&& addr_offset
> group
->addr_offset
);
1379 use
->next
= pre
->next
;
1386 /* Checks whether OP is a loop-level invariant and if so, records it.
1387 NONLINEAR_USE is true if the invariant is used in a way we do not
1388 handle specially. */
1391 record_invariant (struct ivopts_data
*data
, tree op
, bool nonlinear_use
)
1394 struct version_info
*info
;
1396 if (TREE_CODE (op
) != SSA_NAME
1397 || virtual_operand_p (op
))
1400 bb
= gimple_bb (SSA_NAME_DEF_STMT (op
));
1402 && flow_bb_inside_loop_p (data
->current_loop
, bb
))
1405 info
= name_info (data
, op
);
1407 info
->has_nonlin_use
|= nonlinear_use
;
1409 info
->inv_id
= ++data
->max_inv_id
;
1410 bitmap_set_bit (data
->relevant
, SSA_NAME_VERSION (op
));
1413 /* Checks whether the use OP is interesting and if so, records it. */
1415 static struct iv_use
*
1416 find_interesting_uses_op (struct ivopts_data
*data
, tree op
)
1422 if (TREE_CODE (op
) != SSA_NAME
)
1425 iv
= get_iv (data
, op
);
1429 if (iv
->have_use_for
)
1431 use
= iv_use (data
, iv
->use_id
);
1433 gcc_assert (use
->type
== USE_NONLINEAR_EXPR
);
1437 if (integer_zerop (iv
->step
))
1439 record_invariant (data
, op
, true);
1442 iv
->have_use_for
= true;
1444 stmt
= SSA_NAME_DEF_STMT (op
);
1445 gcc_assert (gimple_code (stmt
) == GIMPLE_PHI
1446 || is_gimple_assign (stmt
));
1448 use
= record_use (data
, NULL
, iv
, stmt
, USE_NONLINEAR_EXPR
);
1449 iv
->use_id
= use
->id
;
1454 /* Given a condition in statement STMT, checks whether it is a compare
1455 of an induction variable and an invariant. If this is the case,
1456 CONTROL_VAR is set to location of the iv, BOUND to the location of
1457 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1458 induction variable descriptions, and true is returned. If this is not
1459 the case, CONTROL_VAR and BOUND are set to the arguments of the
1460 condition and false is returned. */
1463 extract_cond_operands (struct ivopts_data
*data
, gimple stmt
,
1464 tree
**control_var
, tree
**bound
,
1465 struct iv
**iv_var
, struct iv
**iv_bound
)
1467 /* The objects returned when COND has constant operands. */
1468 static struct iv const_iv
;
1470 tree
*op0
= &zero
, *op1
= &zero
;
1471 struct iv
*iv0
= &const_iv
, *iv1
= &const_iv
;
1474 if (gimple_code (stmt
) == GIMPLE_COND
)
1476 gcond
*cond_stmt
= as_a
<gcond
*> (stmt
);
1477 op0
= gimple_cond_lhs_ptr (cond_stmt
);
1478 op1
= gimple_cond_rhs_ptr (cond_stmt
);
1482 op0
= gimple_assign_rhs1_ptr (stmt
);
1483 op1
= gimple_assign_rhs2_ptr (stmt
);
1486 zero
= integer_zero_node
;
1487 const_iv
.step
= integer_zero_node
;
1489 if (TREE_CODE (*op0
) == SSA_NAME
)
1490 iv0
= get_iv (data
, *op0
);
1491 if (TREE_CODE (*op1
) == SSA_NAME
)
1492 iv1
= get_iv (data
, *op1
);
1494 /* Exactly one of the compared values must be an iv, and the other one must
1499 if (integer_zerop (iv0
->step
))
1501 /* Control variable may be on the other side. */
1502 std::swap (op0
, op1
);
1503 std::swap (iv0
, iv1
);
1505 ret
= !integer_zerop (iv0
->step
) && integer_zerop (iv1
->step
);
1520 /* Checks whether the condition in STMT is interesting and if so,
1524 find_interesting_uses_cond (struct ivopts_data
*data
, gimple stmt
)
1526 tree
*var_p
, *bound_p
;
1529 if (!extract_cond_operands (data
, stmt
, &var_p
, &bound_p
, &var_iv
, NULL
))
1531 find_interesting_uses_op (data
, *var_p
);
1532 find_interesting_uses_op (data
, *bound_p
);
1536 record_use (data
, NULL
, var_iv
, stmt
, USE_COMPARE
);
1539 /* Returns the outermost loop EXPR is obviously invariant in
1540 relative to the loop LOOP, i.e. if all its operands are defined
1541 outside of the returned loop. Returns NULL if EXPR is not
1542 even obviously invariant in LOOP. */
1545 outermost_invariant_loop_for_expr (struct loop
*loop
, tree expr
)
1550 if (is_gimple_min_invariant (expr
))
1551 return current_loops
->tree_root
;
1553 if (TREE_CODE (expr
) == SSA_NAME
)
1555 def_bb
= gimple_bb (SSA_NAME_DEF_STMT (expr
));
1558 if (flow_bb_inside_loop_p (loop
, def_bb
))
1560 return superloop_at_depth (loop
,
1561 loop_depth (def_bb
->loop_father
) + 1);
1564 return current_loops
->tree_root
;
1570 unsigned maxdepth
= 0;
1571 len
= TREE_OPERAND_LENGTH (expr
);
1572 for (i
= 0; i
< len
; i
++)
1574 struct loop
*ivloop
;
1575 if (!TREE_OPERAND (expr
, i
))
1578 ivloop
= outermost_invariant_loop_for_expr (loop
, TREE_OPERAND (expr
, i
));
1581 maxdepth
= MAX (maxdepth
, loop_depth (ivloop
));
1584 return superloop_at_depth (loop
, maxdepth
);
1587 /* Returns true if expression EXPR is obviously invariant in LOOP,
1588 i.e. if all its operands are defined outside of the LOOP. LOOP
1589 should not be the function body. */
1592 expr_invariant_in_loop_p (struct loop
*loop
, tree expr
)
1597 gcc_assert (loop_depth (loop
) > 0);
1599 if (is_gimple_min_invariant (expr
))
1602 if (TREE_CODE (expr
) == SSA_NAME
)
1604 def_bb
= gimple_bb (SSA_NAME_DEF_STMT (expr
));
1606 && flow_bb_inside_loop_p (loop
, def_bb
))
1615 len
= TREE_OPERAND_LENGTH (expr
);
1616 for (i
= 0; i
< len
; i
++)
1617 if (TREE_OPERAND (expr
, i
)
1618 && !expr_invariant_in_loop_p (loop
, TREE_OPERAND (expr
, i
)))
1624 /* Cumulates the steps of indices into DATA and replaces their values with the
1625 initial ones. Returns false when the value of the index cannot be determined.
1626 Callback for for_each_index. */
1628 struct ifs_ivopts_data
1630 struct ivopts_data
*ivopts_data
;
1636 idx_find_step (tree base
, tree
*idx
, void *data
)
1638 struct ifs_ivopts_data
*dta
= (struct ifs_ivopts_data
*) data
;
1640 bool use_overflow_semantics
= false;
1641 tree step
, iv_base
, iv_step
, lbound
, off
;
1642 struct loop
*loop
= dta
->ivopts_data
->current_loop
;
1644 /* If base is a component ref, require that the offset of the reference
1646 if (TREE_CODE (base
) == COMPONENT_REF
)
1648 off
= component_ref_field_offset (base
);
1649 return expr_invariant_in_loop_p (loop
, off
);
1652 /* If base is array, first check whether we will be able to move the
1653 reference out of the loop (in order to take its address in strength
1654 reduction). In order for this to work we need both lower bound
1655 and step to be loop invariants. */
1656 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
1658 /* Moreover, for a range, the size needs to be invariant as well. */
1659 if (TREE_CODE (base
) == ARRAY_RANGE_REF
1660 && !expr_invariant_in_loop_p (loop
, TYPE_SIZE (TREE_TYPE (base
))))
1663 step
= array_ref_element_size (base
);
1664 lbound
= array_ref_low_bound (base
);
1666 if (!expr_invariant_in_loop_p (loop
, step
)
1667 || !expr_invariant_in_loop_p (loop
, lbound
))
1671 if (TREE_CODE (*idx
) != SSA_NAME
)
1674 iv
= get_iv (dta
->ivopts_data
, *idx
);
1678 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1679 *&x[0], which is not folded and does not trigger the
1680 ARRAY_REF path below. */
1683 if (integer_zerop (iv
->step
))
1686 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
1688 step
= array_ref_element_size (base
);
1690 /* We only handle addresses whose step is an integer constant. */
1691 if (TREE_CODE (step
) != INTEGER_CST
)
1695 /* The step for pointer arithmetics already is 1 byte. */
1696 step
= size_one_node
;
1700 if (iv
->no_overflow
&& nowrap_type_p (TREE_TYPE (iv_step
)))
1701 use_overflow_semantics
= true;
1703 if (!convert_affine_scev (dta
->ivopts_data
->current_loop
,
1704 sizetype
, &iv_base
, &iv_step
, dta
->stmt
,
1705 use_overflow_semantics
))
1707 /* The index might wrap. */
1711 step
= fold_build2 (MULT_EXPR
, sizetype
, step
, iv_step
);
1712 dta
->step
= fold_build2 (PLUS_EXPR
, sizetype
, dta
->step
, step
);
1717 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1718 object is passed to it in DATA. */
1721 idx_record_use (tree base
, tree
*idx
,
1724 struct ivopts_data
*data
= (struct ivopts_data
*) vdata
;
1725 find_interesting_uses_op (data
, *idx
);
1726 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
1728 find_interesting_uses_op (data
, array_ref_element_size (base
));
1729 find_interesting_uses_op (data
, array_ref_low_bound (base
));
1734 /* If we can prove that TOP = cst * BOT for some constant cst,
1735 store cst to MUL and return true. Otherwise return false.
1736 The returned value is always sign-extended, regardless of the
1737 signedness of TOP and BOT. */
1740 constant_multiple_of (tree top
, tree bot
, widest_int
*mul
)
1743 enum tree_code code
;
1744 unsigned precision
= TYPE_PRECISION (TREE_TYPE (top
));
1745 widest_int res
, p0
, p1
;
1750 if (operand_equal_p (top
, bot
, 0))
1756 code
= TREE_CODE (top
);
1760 mby
= TREE_OPERAND (top
, 1);
1761 if (TREE_CODE (mby
) != INTEGER_CST
)
1764 if (!constant_multiple_of (TREE_OPERAND (top
, 0), bot
, &res
))
1767 *mul
= wi::sext (res
* wi::to_widest (mby
), precision
);
1772 if (!constant_multiple_of (TREE_OPERAND (top
, 0), bot
, &p0
)
1773 || !constant_multiple_of (TREE_OPERAND (top
, 1), bot
, &p1
))
1776 if (code
== MINUS_EXPR
)
1778 *mul
= wi::sext (p0
+ p1
, precision
);
1782 if (TREE_CODE (bot
) != INTEGER_CST
)
1785 p0
= widest_int::from (top
, SIGNED
);
1786 p1
= widest_int::from (bot
, SIGNED
);
1789 *mul
= wi::sext (wi::divmod_trunc (p0
, p1
, SIGNED
, &res
), precision
);
1797 /* Return true if memory reference REF with step STEP may be unaligned. */
1800 may_be_unaligned_p (tree ref
, tree step
)
1802 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1803 thus they are not misaligned. */
1804 if (TREE_CODE (ref
) == TARGET_MEM_REF
)
1807 unsigned int align
= TYPE_ALIGN (TREE_TYPE (ref
));
1808 if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref
))) > align
)
1809 align
= GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref
)));
1811 unsigned HOST_WIDE_INT bitpos
;
1812 unsigned int ref_align
;
1813 get_object_alignment_1 (ref
, &ref_align
, &bitpos
);
1814 if (ref_align
< align
1815 || (bitpos
% align
) != 0
1816 || (bitpos
% BITS_PER_UNIT
) != 0)
1819 unsigned int trailing_zeros
= tree_ctz (step
);
1820 if (trailing_zeros
< HOST_BITS_PER_INT
1821 && (1U << trailing_zeros
) * BITS_PER_UNIT
< align
)
1827 /* Return true if EXPR may be non-addressable. */
1830 may_be_nonaddressable_p (tree expr
)
1832 switch (TREE_CODE (expr
))
1834 case TARGET_MEM_REF
:
1835 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1836 target, thus they are always addressable. */
1840 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr
, 1))
1841 || may_be_nonaddressable_p (TREE_OPERAND (expr
, 0));
1843 case VIEW_CONVERT_EXPR
:
1844 /* This kind of view-conversions may wrap non-addressable objects
1845 and make them look addressable. After some processing the
1846 non-addressability may be uncovered again, causing ADDR_EXPRs
1847 of inappropriate objects to be built. */
1848 if (is_gimple_reg (TREE_OPERAND (expr
, 0))
1849 || !is_gimple_addressable (TREE_OPERAND (expr
, 0)))
1852 /* ... fall through ... */
1855 case ARRAY_RANGE_REF
:
1856 return may_be_nonaddressable_p (TREE_OPERAND (expr
, 0));
1869 strip_offset (tree expr
, unsigned HOST_WIDE_INT
*offset
);
1871 /* Record a use of type USE_TYPE at *USE_P in STMT whose value is IV.
1872 If there is an existing use which has same stripped iv base and step,
1873 this function records this one as a sub use to that; otherwise records
1874 it as a normal one. */
1876 static struct iv_use
*
1877 record_group_use (struct ivopts_data
*data
, tree
*use_p
,
1878 struct iv
*iv
, gimple stmt
, enum use_type use_type
)
1883 unsigned HOST_WIDE_INT addr_offset
;
1885 /* Only support sub use for address type uses, that is, with base
1887 if (!iv
->base_object
)
1888 return record_use (data
, use_p
, iv
, stmt
, use_type
);
1890 addr_base
= strip_offset (iv
->base
, &addr_offset
);
1891 for (i
= 0; i
< n_iv_uses (data
); i
++)
1893 use
= iv_use (data
, i
);
1894 if (use
->type
!= USE_ADDRESS
|| !use
->iv
->base_object
)
1897 /* Check if it has the same stripped base and step. */
1898 if (operand_equal_p (iv
->base_object
, use
->iv
->base_object
, 0)
1899 && operand_equal_p (iv
->step
, use
->iv
->step
, 0)
1900 && operand_equal_p (addr_base
, use
->addr_base
, 0))
1904 if (i
== n_iv_uses (data
))
1905 return record_use (data
, use_p
, iv
, stmt
,
1906 use_type
, addr_base
, addr_offset
);
1908 return record_sub_use (data
, use_p
, iv
, stmt
,
1909 use_type
, addr_base
, addr_offset
, i
);
1912 /* Finds addresses in *OP_P inside STMT. */
1915 find_interesting_uses_address (struct ivopts_data
*data
, gimple stmt
, tree
*op_p
)
1917 tree base
= *op_p
, step
= size_zero_node
;
1919 struct ifs_ivopts_data ifs_ivopts_data
;
1921 /* Do not play with volatile memory references. A bit too conservative,
1922 perhaps, but safe. */
1923 if (gimple_has_volatile_ops (stmt
))
1926 /* Ignore bitfields for now. Not really something terribly complicated
1928 if (TREE_CODE (base
) == BIT_FIELD_REF
)
1931 base
= unshare_expr (base
);
1933 if (TREE_CODE (base
) == TARGET_MEM_REF
)
1935 tree type
= build_pointer_type (TREE_TYPE (base
));
1939 && TREE_CODE (TMR_BASE (base
)) == SSA_NAME
)
1941 civ
= get_iv (data
, TMR_BASE (base
));
1945 TMR_BASE (base
) = civ
->base
;
1948 if (TMR_INDEX2 (base
)
1949 && TREE_CODE (TMR_INDEX2 (base
)) == SSA_NAME
)
1951 civ
= get_iv (data
, TMR_INDEX2 (base
));
1955 TMR_INDEX2 (base
) = civ
->base
;
1958 if (TMR_INDEX (base
)
1959 && TREE_CODE (TMR_INDEX (base
)) == SSA_NAME
)
1961 civ
= get_iv (data
, TMR_INDEX (base
));
1965 TMR_INDEX (base
) = civ
->base
;
1970 if (TMR_STEP (base
))
1971 astep
= fold_build2 (MULT_EXPR
, type
, TMR_STEP (base
), astep
);
1973 step
= fold_build2 (PLUS_EXPR
, type
, step
, astep
);
1977 if (integer_zerop (step
))
1979 base
= tree_mem_ref_addr (type
, base
);
1983 ifs_ivopts_data
.ivopts_data
= data
;
1984 ifs_ivopts_data
.stmt
= stmt
;
1985 ifs_ivopts_data
.step
= size_zero_node
;
1986 if (!for_each_index (&base
, idx_find_step
, &ifs_ivopts_data
)
1987 || integer_zerop (ifs_ivopts_data
.step
))
1989 step
= ifs_ivopts_data
.step
;
1991 /* Check that the base expression is addressable. This needs
1992 to be done after substituting bases of IVs into it. */
1993 if (may_be_nonaddressable_p (base
))
1996 /* Moreover, on strict alignment platforms, check that it is
1997 sufficiently aligned. */
1998 if (STRICT_ALIGNMENT
&& may_be_unaligned_p (base
, step
))
2001 base
= build_fold_addr_expr (base
);
2003 /* Substituting bases of IVs into the base expression might
2004 have caused folding opportunities. */
2005 if (TREE_CODE (base
) == ADDR_EXPR
)
2007 tree
*ref
= &TREE_OPERAND (base
, 0);
2008 while (handled_component_p (*ref
))
2009 ref
= &TREE_OPERAND (*ref
, 0);
2010 if (TREE_CODE (*ref
) == MEM_REF
)
2012 tree tem
= fold_binary (MEM_REF
, TREE_TYPE (*ref
),
2013 TREE_OPERAND (*ref
, 0),
2014 TREE_OPERAND (*ref
, 1));
2021 civ
= alloc_iv (data
, base
, step
);
2022 record_group_use (data
, op_p
, civ
, stmt
, USE_ADDRESS
);
2026 for_each_index (op_p
, idx_record_use
, data
);
2029 /* Finds and records invariants used in STMT. */
2032 find_invariants_stmt (struct ivopts_data
*data
, gimple stmt
)
2035 use_operand_p use_p
;
2038 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
2040 op
= USE_FROM_PTR (use_p
);
2041 record_invariant (data
, op
, false);
2045 /* Finds interesting uses of induction variables in the statement STMT. */
2048 find_interesting_uses_stmt (struct ivopts_data
*data
, gimple stmt
)
2051 tree op
, *lhs
, *rhs
;
2053 use_operand_p use_p
;
2054 enum tree_code code
;
2056 find_invariants_stmt (data
, stmt
);
2058 if (gimple_code (stmt
) == GIMPLE_COND
)
2060 find_interesting_uses_cond (data
, stmt
);
2064 if (is_gimple_assign (stmt
))
2066 lhs
= gimple_assign_lhs_ptr (stmt
);
2067 rhs
= gimple_assign_rhs1_ptr (stmt
);
2069 if (TREE_CODE (*lhs
) == SSA_NAME
)
2071 /* If the statement defines an induction variable, the uses are not
2072 interesting by themselves. */
2074 iv
= get_iv (data
, *lhs
);
2076 if (iv
&& !integer_zerop (iv
->step
))
2080 code
= gimple_assign_rhs_code (stmt
);
2081 if (get_gimple_rhs_class (code
) == GIMPLE_SINGLE_RHS
2082 && (REFERENCE_CLASS_P (*rhs
)
2083 || is_gimple_val (*rhs
)))
2085 if (REFERENCE_CLASS_P (*rhs
))
2086 find_interesting_uses_address (data
, stmt
, rhs
);
2088 find_interesting_uses_op (data
, *rhs
);
2090 if (REFERENCE_CLASS_P (*lhs
))
2091 find_interesting_uses_address (data
, stmt
, lhs
);
2094 else if (TREE_CODE_CLASS (code
) == tcc_comparison
)
2096 find_interesting_uses_cond (data
, stmt
);
2100 /* TODO -- we should also handle address uses of type
2102 memory = call (whatever);
2109 if (gimple_code (stmt
) == GIMPLE_PHI
2110 && gimple_bb (stmt
) == data
->current_loop
->header
)
2112 iv
= get_iv (data
, PHI_RESULT (stmt
));
2114 if (iv
&& !integer_zerop (iv
->step
))
2118 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
2120 op
= USE_FROM_PTR (use_p
);
2122 if (TREE_CODE (op
) != SSA_NAME
)
2125 iv
= get_iv (data
, op
);
2129 find_interesting_uses_op (data
, op
);
2133 /* Finds interesting uses of induction variables outside of loops
2134 on loop exit edge EXIT. */
2137 find_interesting_uses_outside (struct ivopts_data
*data
, edge exit
)
2143 for (psi
= gsi_start_phis (exit
->dest
); !gsi_end_p (psi
); gsi_next (&psi
))
2146 def
= PHI_ARG_DEF_FROM_EDGE (phi
, exit
);
2147 if (!virtual_operand_p (def
))
2148 find_interesting_uses_op (data
, def
);
2152 /* Finds uses of the induction variables that are interesting. */
2155 find_interesting_uses (struct ivopts_data
*data
)
2158 gimple_stmt_iterator bsi
;
2159 basic_block
*body
= get_loop_body (data
->current_loop
);
2161 struct version_info
*info
;
2164 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2165 fprintf (dump_file
, "Uses:\n\n");
2167 for (i
= 0; i
< data
->current_loop
->num_nodes
; i
++)
2172 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
2173 if (e
->dest
!= EXIT_BLOCK_PTR_FOR_FN (cfun
)
2174 && !flow_bb_inside_loop_p (data
->current_loop
, e
->dest
))
2175 find_interesting_uses_outside (data
, e
);
2177 for (bsi
= gsi_start_phis (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
2178 find_interesting_uses_stmt (data
, gsi_stmt (bsi
));
2179 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
2180 if (!is_gimple_debug (gsi_stmt (bsi
)))
2181 find_interesting_uses_stmt (data
, gsi_stmt (bsi
));
2184 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2188 fprintf (dump_file
, "\n");
2190 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
2192 info
= ver_info (data
, i
);
2195 fprintf (dump_file
, " ");
2196 print_generic_expr (dump_file
, info
->name
, TDF_SLIM
);
2197 fprintf (dump_file
, " is invariant (%d)%s\n",
2198 info
->inv_id
, info
->has_nonlin_use
? "" : ", eliminable");
2202 fprintf (dump_file
, "\n");
2208 /* Compute maximum offset of [base + offset] addressing mode
2209 for memory reference represented by USE. */
2211 static HOST_WIDE_INT
2212 compute_max_addr_offset (struct iv_use
*use
)
2216 HOST_WIDE_INT i
, off
;
2217 unsigned list_index
, num
;
2219 machine_mode mem_mode
, addr_mode
;
2220 static vec
<HOST_WIDE_INT
> max_offset_list
;
2222 as
= TYPE_ADDR_SPACE (TREE_TYPE (use
->iv
->base
));
2223 mem_mode
= TYPE_MODE (TREE_TYPE (*use
->op_p
));
2225 num
= max_offset_list
.length ();
2226 list_index
= (unsigned) as
* MAX_MACHINE_MODE
+ (unsigned) mem_mode
;
2227 if (list_index
>= num
)
2229 max_offset_list
.safe_grow (list_index
+ MAX_MACHINE_MODE
);
2230 for (; num
< max_offset_list
.length (); num
++)
2231 max_offset_list
[num
] = -1;
2234 off
= max_offset_list
[list_index
];
2238 addr_mode
= targetm
.addr_space
.address_mode (as
);
2239 reg
= gen_raw_REG (addr_mode
, LAST_VIRTUAL_REGISTER
+ 1);
2240 addr
= gen_rtx_fmt_ee (PLUS
, addr_mode
, reg
, NULL_RTX
);
2242 width
= GET_MODE_BITSIZE (addr_mode
) - 1;
2243 if (width
> (HOST_BITS_PER_WIDE_INT
- 1))
2244 width
= HOST_BITS_PER_WIDE_INT
- 1;
2246 for (i
= width
; i
> 0; i
--)
2248 off
= ((unsigned HOST_WIDE_INT
) 1 << i
) - 1;
2249 XEXP (addr
, 1) = gen_int_mode (off
, addr_mode
);
2250 if (memory_address_addr_space_p (mem_mode
, addr
, as
))
2253 /* For some strict-alignment targets, the offset must be naturally
2254 aligned. Try an aligned offset if mem_mode is not QImode. */
2255 off
= ((unsigned HOST_WIDE_INT
) 1 << i
);
2256 if (off
> GET_MODE_SIZE (mem_mode
) && mem_mode
!= QImode
)
2258 off
-= GET_MODE_SIZE (mem_mode
);
2259 XEXP (addr
, 1) = gen_int_mode (off
, addr_mode
);
2260 if (memory_address_addr_space_p (mem_mode
, addr
, as
))
2267 max_offset_list
[list_index
] = off
;
2271 /* Check if all small groups should be split. Return true if and
2274 1) At least one groups contain two uses with different offsets.
2275 2) No group contains more than two uses with different offsets.
2277 Return false otherwise. We want to split such groups because:
2279 1) Small groups don't have much benefit and may interfer with
2280 general candidate selection.
2281 2) Size for problem with only small groups is usually small and
2282 general algorithm can handle it well.
2284 TODO -- Above claim may not hold when auto increment is supported. */
2287 split_all_small_groups (struct ivopts_data
*data
)
2289 bool split_p
= false;
2290 unsigned int i
, n
, distinct
;
2291 struct iv_use
*pre
, *use
;
2293 n
= n_iv_uses (data
);
2294 for (i
= 0; i
< n
; i
++)
2296 use
= iv_use (data
, i
);
2301 gcc_assert (use
->type
== USE_ADDRESS
);
2302 for (pre
= use
, use
= use
->next
; use
; pre
= use
, use
= use
->next
)
2304 if (pre
->addr_offset
!= use
->addr_offset
)
2317 /* For each group of address type uses, this function further groups
2318 these uses according to the maximum offset supported by target's
2319 [base + offset] addressing mode. */
2322 group_address_uses (struct ivopts_data
*data
)
2324 HOST_WIDE_INT max_offset
= -1;
2325 unsigned int i
, n
, sub_id
;
2326 struct iv_use
*pre
, *use
;
2327 unsigned HOST_WIDE_INT addr_offset_first
;
2329 /* Reset max offset to split all small groups. */
2330 if (split_all_small_groups (data
))
2333 n
= n_iv_uses (data
);
2334 for (i
= 0; i
< n
; i
++)
2336 use
= iv_use (data
, i
);
2340 gcc_assert (use
->type
== USE_ADDRESS
);
2341 if (max_offset
!= 0)
2342 max_offset
= compute_max_addr_offset (use
);
2347 addr_offset_first
= use
->addr_offset
;
2348 /* Only uses with offset that can fit in offset part against
2349 the first use can be grouped together. */
2350 for (pre
= use
, use
= use
->next
;
2351 use
&& (use
->addr_offset
- addr_offset_first
2352 <= (unsigned HOST_WIDE_INT
) max_offset
);
2353 pre
= use
, use
= use
->next
)
2356 use
->sub_id
= ++sub_id
;
2359 /* Break the list and create new group. */
2363 use
->id
= n_iv_uses (data
);
2364 use
->related_cands
= BITMAP_ALLOC (NULL
);
2365 data
->iv_uses
.safe_push (use
);
2370 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2371 dump_uses (dump_file
, data
);
2374 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
2375 is true, assume we are inside an address. If TOP_COMPREF is true, assume
2376 we are at the top-level of the processed address. */
2379 strip_offset_1 (tree expr
, bool inside_addr
, bool top_compref
,
2380 HOST_WIDE_INT
*offset
)
2382 tree op0
= NULL_TREE
, op1
= NULL_TREE
, tmp
, step
;
2383 enum tree_code code
;
2384 tree type
, orig_type
= TREE_TYPE (expr
);
2385 HOST_WIDE_INT off0
, off1
, st
;
2386 tree orig_expr
= expr
;
2390 type
= TREE_TYPE (expr
);
2391 code
= TREE_CODE (expr
);
2397 if (!cst_and_fits_in_hwi (expr
)
2398 || integer_zerop (expr
))
2401 *offset
= int_cst_value (expr
);
2402 return build_int_cst (orig_type
, 0);
2404 case POINTER_PLUS_EXPR
:
2407 op0
= TREE_OPERAND (expr
, 0);
2408 op1
= TREE_OPERAND (expr
, 1);
2410 op0
= strip_offset_1 (op0
, false, false, &off0
);
2411 op1
= strip_offset_1 (op1
, false, false, &off1
);
2413 *offset
= (code
== MINUS_EXPR
? off0
- off1
: off0
+ off1
);
2414 if (op0
== TREE_OPERAND (expr
, 0)
2415 && op1
== TREE_OPERAND (expr
, 1))
2418 if (integer_zerop (op1
))
2420 else if (integer_zerop (op0
))
2422 if (code
== MINUS_EXPR
)
2423 expr
= fold_build1 (NEGATE_EXPR
, type
, op1
);
2428 expr
= fold_build2 (code
, type
, op0
, op1
);
2430 return fold_convert (orig_type
, expr
);
2433 op1
= TREE_OPERAND (expr
, 1);
2434 if (!cst_and_fits_in_hwi (op1
))
2437 op0
= TREE_OPERAND (expr
, 0);
2438 op0
= strip_offset_1 (op0
, false, false, &off0
);
2439 if (op0
== TREE_OPERAND (expr
, 0))
2442 *offset
= off0
* int_cst_value (op1
);
2443 if (integer_zerop (op0
))
2446 expr
= fold_build2 (MULT_EXPR
, type
, op0
, op1
);
2448 return fold_convert (orig_type
, expr
);
2451 case ARRAY_RANGE_REF
:
2455 step
= array_ref_element_size (expr
);
2456 if (!cst_and_fits_in_hwi (step
))
2459 st
= int_cst_value (step
);
2460 op1
= TREE_OPERAND (expr
, 1);
2461 op1
= strip_offset_1 (op1
, false, false, &off1
);
2462 *offset
= off1
* st
;
2465 && integer_zerop (op1
))
2467 /* Strip the component reference completely. */
2468 op0
= TREE_OPERAND (expr
, 0);
2469 op0
= strip_offset_1 (op0
, inside_addr
, top_compref
, &off0
);
2482 tmp
= component_ref_field_offset (expr
);
2483 field
= TREE_OPERAND (expr
, 1);
2485 && cst_and_fits_in_hwi (tmp
)
2486 && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field
)))
2488 HOST_WIDE_INT boffset
, abs_off
;
2490 /* Strip the component reference completely. */
2491 op0
= TREE_OPERAND (expr
, 0);
2492 op0
= strip_offset_1 (op0
, inside_addr
, top_compref
, &off0
);
2493 boffset
= int_cst_value (DECL_FIELD_BIT_OFFSET (field
));
2494 abs_off
= abs_hwi (boffset
) / BITS_PER_UNIT
;
2498 *offset
= off0
+ int_cst_value (tmp
) + abs_off
;
2505 op0
= TREE_OPERAND (expr
, 0);
2506 op0
= strip_offset_1 (op0
, true, true, &off0
);
2509 if (op0
== TREE_OPERAND (expr
, 0))
2512 expr
= build_fold_addr_expr (op0
);
2513 return fold_convert (orig_type
, expr
);
2516 /* ??? Offset operand? */
2517 inside_addr
= false;
2524 /* Default handling of expressions for that we want to recurse into
2525 the first operand. */
2526 op0
= TREE_OPERAND (expr
, 0);
2527 op0
= strip_offset_1 (op0
, inside_addr
, false, &off0
);
2530 if (op0
== TREE_OPERAND (expr
, 0)
2531 && (!op1
|| op1
== TREE_OPERAND (expr
, 1)))
2534 expr
= copy_node (expr
);
2535 TREE_OPERAND (expr
, 0) = op0
;
2537 TREE_OPERAND (expr
, 1) = op1
;
2539 /* Inside address, we might strip the top level component references,
2540 thus changing type of the expression. Handling of ADDR_EXPR
2542 expr
= fold_convert (orig_type
, expr
);
2547 /* Strips constant offsets from EXPR and stores them to OFFSET. */
2550 strip_offset (tree expr
, unsigned HOST_WIDE_INT
*offset
)
2553 tree core
= strip_offset_1 (expr
, false, false, &off
);
2558 /* Returns variant of TYPE that can be used as base for different uses.
2559 We return unsigned type with the same precision, which avoids problems
2563 generic_type_for (tree type
)
2565 if (POINTER_TYPE_P (type
))
2566 return unsigned_type_for (type
);
2568 if (TYPE_UNSIGNED (type
))
2571 return unsigned_type_for (type
);
2574 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2575 the bitmap to that we should store it. */
2577 static struct ivopts_data
*fd_ivopts_data
;
2579 find_depends (tree
*expr_p
, int *ws ATTRIBUTE_UNUSED
, void *data
)
2581 bitmap
*depends_on
= (bitmap
*) data
;
2582 struct version_info
*info
;
2584 if (TREE_CODE (*expr_p
) != SSA_NAME
)
2586 info
= name_info (fd_ivopts_data
, *expr_p
);
2588 if (!info
->inv_id
|| info
->has_nonlin_use
)
2592 *depends_on
= BITMAP_ALLOC (NULL
);
2593 bitmap_set_bit (*depends_on
, info
->inv_id
);
2598 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2599 position to POS. If USE is not NULL, the candidate is set as related to
2600 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
2601 replacement of the final value of the iv by a direct computation. */
2603 static struct iv_cand
*
2604 add_candidate_1 (struct ivopts_data
*data
,
2605 tree base
, tree step
, bool important
, enum iv_position pos
,
2606 struct iv_use
*use
, gimple incremented_at
)
2609 struct iv_cand
*cand
= NULL
;
2610 tree type
, orig_type
;
2612 /* For non-original variables, make sure their values are computed in a type
2613 that does not invoke undefined behavior on overflows (since in general,
2614 we cannot prove that these induction variables are non-wrapping). */
2615 if (pos
!= IP_ORIGINAL
)
2617 orig_type
= TREE_TYPE (base
);
2618 type
= generic_type_for (orig_type
);
2619 if (type
!= orig_type
)
2621 base
= fold_convert (type
, base
);
2622 step
= fold_convert (type
, step
);
2626 for (i
= 0; i
< n_iv_cands (data
); i
++)
2628 cand
= iv_cand (data
, i
);
2630 if (cand
->pos
!= pos
)
2633 if (cand
->incremented_at
!= incremented_at
2634 || ((pos
== IP_AFTER_USE
|| pos
== IP_BEFORE_USE
)
2635 && cand
->ainc_use
!= use
))
2649 if (operand_equal_p (base
, cand
->iv
->base
, 0)
2650 && operand_equal_p (step
, cand
->iv
->step
, 0)
2651 && (TYPE_PRECISION (TREE_TYPE (base
))
2652 == TYPE_PRECISION (TREE_TYPE (cand
->iv
->base
))))
2656 if (i
== n_iv_cands (data
))
2658 cand
= XCNEW (struct iv_cand
);
2664 cand
->iv
= alloc_iv (data
, base
, step
);
2667 if (pos
!= IP_ORIGINAL
&& cand
->iv
)
2669 cand
->var_before
= create_tmp_var_raw (TREE_TYPE (base
), "ivtmp");
2670 cand
->var_after
= cand
->var_before
;
2672 cand
->important
= important
;
2673 cand
->incremented_at
= incremented_at
;
2674 data
->iv_candidates
.safe_push (cand
);
2677 && TREE_CODE (step
) != INTEGER_CST
)
2679 fd_ivopts_data
= data
;
2680 walk_tree (&step
, find_depends
, &cand
->depends_on
, NULL
);
2683 if (pos
== IP_AFTER_USE
|| pos
== IP_BEFORE_USE
)
2684 cand
->ainc_use
= use
;
2686 cand
->ainc_use
= NULL
;
2688 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2689 dump_cand (dump_file
, cand
);
2692 if (important
&& !cand
->important
)
2694 cand
->important
= true;
2695 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2696 fprintf (dump_file
, "Candidate %d is important\n", cand
->id
);
2701 bitmap_set_bit (use
->related_cands
, i
);
2702 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2703 fprintf (dump_file
, "Candidate %d is related to use %d\n",
2710 /* Returns true if incrementing the induction variable at the end of the LOOP
2713 The purpose is to avoid splitting latch edge with a biv increment, thus
2714 creating a jump, possibly confusing other optimization passes and leaving
2715 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2716 is not available (so we do not have a better alternative), or if the latch
2717 edge is already nonempty. */
2720 allow_ip_end_pos_p (struct loop
*loop
)
2722 if (!ip_normal_pos (loop
))
2725 if (!empty_block_p (ip_end_pos (loop
)))
2731 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2732 Important field is set to IMPORTANT. */
2735 add_autoinc_candidates (struct ivopts_data
*data
, tree base
, tree step
,
2736 bool important
, struct iv_use
*use
)
2738 basic_block use_bb
= gimple_bb (use
->stmt
);
2739 machine_mode mem_mode
;
2740 unsigned HOST_WIDE_INT cstepi
;
2742 /* If we insert the increment in any position other than the standard
2743 ones, we must ensure that it is incremented once per iteration.
2744 It must not be in an inner nested loop, or one side of an if
2746 if (use_bb
->loop_father
!= data
->current_loop
2747 || !dominated_by_p (CDI_DOMINATORS
, data
->current_loop
->latch
, use_bb
)
2748 || stmt_could_throw_p (use
->stmt
)
2749 || !cst_and_fits_in_hwi (step
))
2752 cstepi
= int_cst_value (step
);
2754 mem_mode
= TYPE_MODE (TREE_TYPE (*use
->op_p
));
2755 if (((USE_LOAD_PRE_INCREMENT (mem_mode
)
2756 || USE_STORE_PRE_INCREMENT (mem_mode
))
2757 && GET_MODE_SIZE (mem_mode
) == cstepi
)
2758 || ((USE_LOAD_PRE_DECREMENT (mem_mode
)
2759 || USE_STORE_PRE_DECREMENT (mem_mode
))
2760 && GET_MODE_SIZE (mem_mode
) == -cstepi
))
2762 enum tree_code code
= MINUS_EXPR
;
2764 tree new_step
= step
;
2766 if (POINTER_TYPE_P (TREE_TYPE (base
)))
2768 new_step
= fold_build1 (NEGATE_EXPR
, TREE_TYPE (step
), step
);
2769 code
= POINTER_PLUS_EXPR
;
2772 new_step
= fold_convert (TREE_TYPE (base
), new_step
);
2773 new_base
= fold_build2 (code
, TREE_TYPE (base
), base
, new_step
);
2774 add_candidate_1 (data
, new_base
, step
, important
, IP_BEFORE_USE
, use
,
2777 if (((USE_LOAD_POST_INCREMENT (mem_mode
)
2778 || USE_STORE_POST_INCREMENT (mem_mode
))
2779 && GET_MODE_SIZE (mem_mode
) == cstepi
)
2780 || ((USE_LOAD_POST_DECREMENT (mem_mode
)
2781 || USE_STORE_POST_DECREMENT (mem_mode
))
2782 && GET_MODE_SIZE (mem_mode
) == -cstepi
))
2784 add_candidate_1 (data
, base
, step
, important
, IP_AFTER_USE
, use
,
2789 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2790 position to POS. If USE is not NULL, the candidate is set as related to
2791 it. The candidate computation is scheduled before exit condition and at
2795 add_candidate (struct ivopts_data
*data
,
2796 tree base
, tree step
, bool important
, struct iv_use
*use
)
2798 gcc_assert (use
== NULL
|| use
->sub_id
== 0);
2800 if (ip_normal_pos (data
->current_loop
))
2801 add_candidate_1 (data
, base
, step
, important
, IP_NORMAL
, use
, NULL
);
2802 if (ip_end_pos (data
->current_loop
)
2803 && allow_ip_end_pos_p (data
->current_loop
))
2804 add_candidate_1 (data
, base
, step
, important
, IP_END
, use
, NULL
);
2807 /* Adds standard iv candidates. */
2810 add_standard_iv_candidates (struct ivopts_data
*data
)
2812 add_candidate (data
, integer_zero_node
, integer_one_node
, true, NULL
);
2814 /* The same for a double-integer type if it is still fast enough. */
2816 (long_integer_type_node
) > TYPE_PRECISION (integer_type_node
)
2817 && TYPE_PRECISION (long_integer_type_node
) <= BITS_PER_WORD
)
2818 add_candidate (data
, build_int_cst (long_integer_type_node
, 0),
2819 build_int_cst (long_integer_type_node
, 1), true, NULL
);
2821 /* The same for a double-integer type if it is still fast enough. */
2823 (long_long_integer_type_node
) > TYPE_PRECISION (long_integer_type_node
)
2824 && TYPE_PRECISION (long_long_integer_type_node
) <= BITS_PER_WORD
)
2825 add_candidate (data
, build_int_cst (long_long_integer_type_node
, 0),
2826 build_int_cst (long_long_integer_type_node
, 1), true, NULL
);
2830 /* Adds candidates bases on the old induction variable IV. */
2833 add_iv_candidate_for_biv (struct ivopts_data
*data
, struct iv
*iv
)
2837 struct iv_cand
*cand
;
2839 add_candidate (data
, iv
->base
, iv
->step
, true, NULL
);
2841 /* The same, but with initial value zero. */
2842 if (POINTER_TYPE_P (TREE_TYPE (iv
->base
)))
2843 add_candidate (data
, size_int (0), iv
->step
, true, NULL
);
2845 add_candidate (data
, build_int_cst (TREE_TYPE (iv
->base
), 0),
2846 iv
->step
, true, NULL
);
2848 phi
= SSA_NAME_DEF_STMT (iv
->ssa_name
);
2849 if (gimple_code (phi
) == GIMPLE_PHI
)
2851 /* Additionally record the possibility of leaving the original iv
2853 def
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_latch_edge (data
->current_loop
));
2854 /* Don't add candidate if it's from another PHI node because
2855 it's an affine iv appearing in the form of PEELED_CHREC. */
2856 phi
= SSA_NAME_DEF_STMT (def
);
2857 if (gimple_code (phi
) != GIMPLE_PHI
)
2859 cand
= add_candidate_1 (data
,
2860 iv
->base
, iv
->step
, true, IP_ORIGINAL
, NULL
,
2861 SSA_NAME_DEF_STMT (def
));
2862 cand
->var_before
= iv
->ssa_name
;
2863 cand
->var_after
= def
;
2866 gcc_assert (gimple_bb (phi
) == data
->current_loop
->header
);
2870 /* Adds candidates based on the old induction variables. */
2873 add_iv_candidate_for_bivs (struct ivopts_data
*data
)
2879 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
2881 iv
= ver_info (data
, i
)->iv
;
2882 if (iv
&& iv
->biv_p
&& !integer_zerop (iv
->step
))
2883 add_iv_candidate_for_biv (data
, iv
);
2887 /* Adds candidates based on the value of USE's iv. */
2890 add_iv_candidate_for_use (struct ivopts_data
*data
, struct iv_use
*use
)
2892 unsigned HOST_WIDE_INT offset
;
2895 struct iv
*iv
= use
->iv
;
2897 add_candidate (data
, iv
->base
, iv
->step
, false, use
);
2899 /* The same, but with initial value zero. Make such variable important,
2900 since it is generic enough so that possibly many uses may be based
2902 basetype
= TREE_TYPE (iv
->base
);
2903 if (POINTER_TYPE_P (basetype
))
2904 basetype
= sizetype
;
2905 add_candidate (data
, build_int_cst (basetype
, 0), iv
->step
, true, use
);
2907 /* Third, try removing the constant offset. Make sure to even
2908 add a candidate for &a[0] vs. (T *)&a. */
2909 base
= strip_offset (iv
->base
, &offset
);
2910 if (offset
|| base
!= iv
->base
)
2911 add_candidate (data
, base
, iv
->step
, false, use
);
2913 /* At last, add auto-incremental candidates. Make such variables
2914 important since other iv uses with same base object may be based
2916 if (use
!= NULL
&& use
->type
== USE_ADDRESS
)
2917 add_autoinc_candidates (data
, iv
->base
, iv
->step
, true, use
);
2920 /* Adds candidates based on the uses. */
2923 add_iv_candidate_for_uses (struct ivopts_data
*data
)
2927 for (i
= 0; i
< n_iv_uses (data
); i
++)
2929 struct iv_use
*use
= iv_use (data
, i
);
2936 case USE_NONLINEAR_EXPR
:
2939 /* Just add the ivs based on the value of the iv used here. */
2940 add_iv_candidate_for_use (data
, use
);
2949 /* Record important candidates and add them to related_cands bitmaps
2953 record_important_candidates (struct ivopts_data
*data
)
2958 for (i
= 0; i
< n_iv_cands (data
); i
++)
2960 struct iv_cand
*cand
= iv_cand (data
, i
);
2962 if (cand
->important
)
2963 bitmap_set_bit (data
->important_candidates
, i
);
2966 data
->consider_all_candidates
= (n_iv_cands (data
)
2967 <= CONSIDER_ALL_CANDIDATES_BOUND
);
2969 if (data
->consider_all_candidates
)
2971 /* We will not need "related_cands" bitmaps in this case,
2972 so release them to decrease peak memory consumption. */
2973 for (i
= 0; i
< n_iv_uses (data
); i
++)
2975 use
= iv_use (data
, i
);
2976 BITMAP_FREE (use
->related_cands
);
2981 /* Add important candidates to the related_cands bitmaps. */
2982 for (i
= 0; i
< n_iv_uses (data
); i
++)
2983 bitmap_ior_into (iv_use (data
, i
)->related_cands
,
2984 data
->important_candidates
);
2988 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2989 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2990 we allocate a simple list to every use. */
2993 alloc_use_cost_map (struct ivopts_data
*data
)
2995 unsigned i
, size
, s
;
2997 for (i
= 0; i
< n_iv_uses (data
); i
++)
2999 struct iv_use
*use
= iv_use (data
, i
);
3001 if (data
->consider_all_candidates
)
3002 size
= n_iv_cands (data
);
3005 s
= bitmap_count_bits (use
->related_cands
);
3007 /* Round up to the power of two, so that moduling by it is fast. */
3008 size
= s
? (1 << ceil_log2 (s
)) : 1;
3011 use
->n_map_members
= size
;
3012 use
->cost_map
= XCNEWVEC (struct cost_pair
, size
);
3016 /* Returns description of computation cost of expression whose runtime
3017 cost is RUNTIME and complexity corresponds to COMPLEXITY. */
3020 new_cost (unsigned runtime
, unsigned complexity
)
3024 cost
.cost
= runtime
;
3025 cost
.complexity
= complexity
;
3030 /* Returns true if COST is infinite. */
3033 infinite_cost_p (comp_cost cost
)
3035 return cost
.cost
== INFTY
;
3038 /* Adds costs COST1 and COST2. */
3041 add_costs (comp_cost cost1
, comp_cost cost2
)
3043 if (infinite_cost_p (cost1
) || infinite_cost_p (cost2
))
3044 return infinite_cost
;
3046 cost1
.cost
+= cost2
.cost
;
3047 cost1
.complexity
+= cost2
.complexity
;
3051 /* Subtracts costs COST1 and COST2. */
3054 sub_costs (comp_cost cost1
, comp_cost cost2
)
3056 cost1
.cost
-= cost2
.cost
;
3057 cost1
.complexity
-= cost2
.complexity
;
3062 /* Returns a negative number if COST1 < COST2, a positive number if
3063 COST1 > COST2, and 0 if COST1 = COST2. */
3066 compare_costs (comp_cost cost1
, comp_cost cost2
)
3068 if (cost1
.cost
== cost2
.cost
)
3069 return cost1
.complexity
- cost2
.complexity
;
3071 return cost1
.cost
- cost2
.cost
;
3074 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
3075 on invariants DEPENDS_ON and that the value used in expressing it
3076 is VALUE, and in case of iv elimination the comparison operator is COMP. */
3079 set_use_iv_cost (struct ivopts_data
*data
,
3080 struct iv_use
*use
, struct iv_cand
*cand
,
3081 comp_cost cost
, bitmap depends_on
, tree value
,
3082 enum tree_code comp
, int inv_expr_id
)
3086 if (infinite_cost_p (cost
))
3088 BITMAP_FREE (depends_on
);
3092 if (data
->consider_all_candidates
)
3094 use
->cost_map
[cand
->id
].cand
= cand
;
3095 use
->cost_map
[cand
->id
].cost
= cost
;
3096 use
->cost_map
[cand
->id
].depends_on
= depends_on
;
3097 use
->cost_map
[cand
->id
].value
= value
;
3098 use
->cost_map
[cand
->id
].comp
= comp
;
3099 use
->cost_map
[cand
->id
].inv_expr_id
= inv_expr_id
;
3103 /* n_map_members is a power of two, so this computes modulo. */
3104 s
= cand
->id
& (use
->n_map_members
- 1);
3105 for (i
= s
; i
< use
->n_map_members
; i
++)
3106 if (!use
->cost_map
[i
].cand
)
3108 for (i
= 0; i
< s
; i
++)
3109 if (!use
->cost_map
[i
].cand
)
3115 use
->cost_map
[i
].cand
= cand
;
3116 use
->cost_map
[i
].cost
= cost
;
3117 use
->cost_map
[i
].depends_on
= depends_on
;
3118 use
->cost_map
[i
].value
= value
;
3119 use
->cost_map
[i
].comp
= comp
;
3120 use
->cost_map
[i
].inv_expr_id
= inv_expr_id
;
3123 /* Gets cost of (USE, CANDIDATE) pair. */
3125 static struct cost_pair
*
3126 get_use_iv_cost (struct ivopts_data
*data
, struct iv_use
*use
,
3127 struct iv_cand
*cand
)
3130 struct cost_pair
*ret
;
3135 if (data
->consider_all_candidates
)
3137 ret
= use
->cost_map
+ cand
->id
;
3144 /* n_map_members is a power of two, so this computes modulo. */
3145 s
= cand
->id
& (use
->n_map_members
- 1);
3146 for (i
= s
; i
< use
->n_map_members
; i
++)
3147 if (use
->cost_map
[i
].cand
== cand
)
3148 return use
->cost_map
+ i
;
3149 else if (use
->cost_map
[i
].cand
== NULL
)
3151 for (i
= 0; i
< s
; i
++)
3152 if (use
->cost_map
[i
].cand
== cand
)
3153 return use
->cost_map
+ i
;
3154 else if (use
->cost_map
[i
].cand
== NULL
)
3160 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
3162 produce_memory_decl_rtl (tree obj
, int *regno
)
3164 addr_space_t as
= TYPE_ADDR_SPACE (TREE_TYPE (obj
));
3165 machine_mode address_mode
= targetm
.addr_space
.address_mode (as
);
3169 if (TREE_STATIC (obj
) || DECL_EXTERNAL (obj
))
3171 const char *name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj
));
3172 x
= gen_rtx_SYMBOL_REF (address_mode
, name
);
3173 SET_SYMBOL_REF_DECL (x
, obj
);
3174 x
= gen_rtx_MEM (DECL_MODE (obj
), x
);
3175 set_mem_addr_space (x
, as
);
3176 targetm
.encode_section_info (obj
, x
, true);
3180 x
= gen_raw_REG (address_mode
, (*regno
)++);
3181 x
= gen_rtx_MEM (DECL_MODE (obj
), x
);
3182 set_mem_addr_space (x
, as
);
3188 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
3189 walk_tree. DATA contains the actual fake register number. */
3192 prepare_decl_rtl (tree
*expr_p
, int *ws
, void *data
)
3194 tree obj
= NULL_TREE
;
3196 int *regno
= (int *) data
;
3198 switch (TREE_CODE (*expr_p
))
3201 for (expr_p
= &TREE_OPERAND (*expr_p
, 0);
3202 handled_component_p (*expr_p
);
3203 expr_p
= &TREE_OPERAND (*expr_p
, 0))
3206 if (DECL_P (obj
) && HAS_RTL_P (obj
) && !DECL_RTL_SET_P (obj
))
3207 x
= produce_memory_decl_rtl (obj
, regno
);
3212 obj
= SSA_NAME_VAR (*expr_p
);
3213 /* Defer handling of anonymous SSA_NAMEs to the expander. */
3216 if (!DECL_RTL_SET_P (obj
))
3217 x
= gen_raw_REG (DECL_MODE (obj
), (*regno
)++);
3226 if (DECL_RTL_SET_P (obj
))
3229 if (DECL_MODE (obj
) == BLKmode
)
3230 x
= produce_memory_decl_rtl (obj
, regno
);
3232 x
= gen_raw_REG (DECL_MODE (obj
), (*regno
)++);
3242 decl_rtl_to_reset
.safe_push (obj
);
3243 SET_DECL_RTL (obj
, x
);
3249 /* Determines cost of the computation of EXPR. */
3252 computation_cost (tree expr
, bool speed
)
3256 tree type
= TREE_TYPE (expr
);
3258 /* Avoid using hard regs in ways which may be unsupported. */
3259 int regno
= LAST_VIRTUAL_REGISTER
+ 1;
3260 struct cgraph_node
*node
= cgraph_node::get (current_function_decl
);
3261 enum node_frequency real_frequency
= node
->frequency
;
3263 node
->frequency
= NODE_FREQUENCY_NORMAL
;
3264 crtl
->maybe_hot_insn_p
= speed
;
3265 walk_tree (&expr
, prepare_decl_rtl
, ®no
, NULL
);
3267 rslt
= expand_expr (expr
, NULL_RTX
, TYPE_MODE (type
), EXPAND_NORMAL
);
3270 default_rtl_profile ();
3271 node
->frequency
= real_frequency
;
3273 cost
= seq_cost (seq
, speed
);
3275 cost
+= address_cost (XEXP (rslt
, 0), TYPE_MODE (type
),
3276 TYPE_ADDR_SPACE (type
), speed
);
3277 else if (!REG_P (rslt
))
3278 cost
+= set_src_cost (rslt
, TYPE_MODE (type
), speed
);
3283 /* Returns variable containing the value of candidate CAND at statement AT. */
3286 var_at_stmt (struct loop
*loop
, struct iv_cand
*cand
, gimple stmt
)
3288 if (stmt_after_increment (loop
, cand
, stmt
))
3289 return cand
->var_after
;
3291 return cand
->var_before
;
3294 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
3295 same precision that is at least as wide as the precision of TYPE, stores
3296 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
3300 determine_common_wider_type (tree
*a
, tree
*b
)
3302 tree wider_type
= NULL
;
3304 tree atype
= TREE_TYPE (*a
);
3306 if (CONVERT_EXPR_P (*a
))
3308 suba
= TREE_OPERAND (*a
, 0);
3309 wider_type
= TREE_TYPE (suba
);
3310 if (TYPE_PRECISION (wider_type
) < TYPE_PRECISION (atype
))
3316 if (CONVERT_EXPR_P (*b
))
3318 subb
= TREE_OPERAND (*b
, 0);
3319 if (TYPE_PRECISION (wider_type
) != TYPE_PRECISION (TREE_TYPE (subb
)))
3330 /* Determines the expression by that USE is expressed from induction variable
3331 CAND at statement AT in LOOP. The expression is stored in a decomposed
3332 form into AFF. Returns false if USE cannot be expressed using CAND. */
3335 get_computation_aff (struct loop
*loop
,
3336 struct iv_use
*use
, struct iv_cand
*cand
, gimple at
,
3337 struct aff_tree
*aff
)
3339 tree ubase
= use
->iv
->base
;
3340 tree ustep
= use
->iv
->step
;
3341 tree cbase
= cand
->iv
->base
;
3342 tree cstep
= cand
->iv
->step
, cstep_common
;
3343 tree utype
= TREE_TYPE (ubase
), ctype
= TREE_TYPE (cbase
);
3344 tree common_type
, var
;
3346 aff_tree cbase_aff
, var_aff
;
3349 if (TYPE_PRECISION (utype
) > TYPE_PRECISION (ctype
))
3351 /* We do not have a precision to express the values of use. */
3355 var
= var_at_stmt (loop
, cand
, at
);
3356 uutype
= unsigned_type_for (utype
);
3358 /* If the conversion is not noop, perform it. */
3359 if (TYPE_PRECISION (utype
) < TYPE_PRECISION (ctype
))
3361 cstep
= fold_convert (uutype
, cstep
);
3362 cbase
= fold_convert (uutype
, cbase
);
3363 var
= fold_convert (uutype
, var
);
3366 if (!constant_multiple_of (ustep
, cstep
, &rat
))
3369 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3370 type, we achieve better folding by computing their difference in this
3371 wider type, and cast the result to UUTYPE. We do not need to worry about
3372 overflows, as all the arithmetics will in the end be performed in UUTYPE
3374 common_type
= determine_common_wider_type (&ubase
, &cbase
);
3376 /* use = ubase - ratio * cbase + ratio * var. */
3377 tree_to_aff_combination (ubase
, common_type
, aff
);
3378 tree_to_aff_combination (cbase
, common_type
, &cbase_aff
);
3379 tree_to_aff_combination (var
, uutype
, &var_aff
);
3381 /* We need to shift the value if we are after the increment. */
3382 if (stmt_after_increment (loop
, cand
, at
))
3386 if (common_type
!= uutype
)
3387 cstep_common
= fold_convert (common_type
, cstep
);
3389 cstep_common
= cstep
;
3391 tree_to_aff_combination (cstep_common
, common_type
, &cstep_aff
);
3392 aff_combination_add (&cbase_aff
, &cstep_aff
);
3395 aff_combination_scale (&cbase_aff
, -rat
);
3396 aff_combination_add (aff
, &cbase_aff
);
3397 if (common_type
!= uutype
)
3398 aff_combination_convert (aff
, uutype
);
3400 aff_combination_scale (&var_aff
, rat
);
3401 aff_combination_add (aff
, &var_aff
);
3406 /* Return the type of USE. */
3409 get_use_type (struct iv_use
*use
)
3411 tree base_type
= TREE_TYPE (use
->iv
->base
);
3414 if (use
->type
== USE_ADDRESS
)
3416 /* The base_type may be a void pointer. Create a pointer type based on
3417 the mem_ref instead. */
3418 type
= build_pointer_type (TREE_TYPE (*use
->op_p
));
3419 gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type
))
3420 == TYPE_ADDR_SPACE (TREE_TYPE (base_type
)));
3428 /* Determines the expression by that USE is expressed from induction variable
3429 CAND at statement AT in LOOP. The computation is unshared. */
3432 get_computation_at (struct loop
*loop
,
3433 struct iv_use
*use
, struct iv_cand
*cand
, gimple at
)
3436 tree type
= get_use_type (use
);
3438 if (!get_computation_aff (loop
, use
, cand
, at
, &aff
))
3440 unshare_aff_combination (&aff
);
3441 return fold_convert (type
, aff_combination_to_tree (&aff
));
3444 /* Determines the expression by that USE is expressed from induction variable
3445 CAND in LOOP. The computation is unshared. */
3448 get_computation (struct loop
*loop
, struct iv_use
*use
, struct iv_cand
*cand
)
3450 return get_computation_at (loop
, use
, cand
, use
->stmt
);
3453 /* Adjust the cost COST for being in loop setup rather than loop body.
3454 If we're optimizing for space, the loop setup overhead is constant;
3455 if we're optimizing for speed, amortize it over the per-iteration cost. */
3457 adjust_setup_cost (struct ivopts_data
*data
, unsigned cost
)
3461 else if (optimize_loop_for_speed_p (data
->current_loop
))
3462 return cost
/ avg_loop_niter (data
->current_loop
);
3467 /* Returns true if multiplying by RATIO is allowed in an address. Test the
3468 validity for a memory reference accessing memory of mode MODE in
3469 address space AS. */
3473 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio
, machine_mode mode
,
3476 #define MAX_RATIO 128
3477 unsigned int data_index
= (int) as
* MAX_MACHINE_MODE
+ (int) mode
;
3478 static vec
<sbitmap
> valid_mult_list
;
3481 if (data_index
>= valid_mult_list
.length ())
3482 valid_mult_list
.safe_grow_cleared (data_index
+ 1);
3484 valid_mult
= valid_mult_list
[data_index
];
3487 machine_mode address_mode
= targetm
.addr_space
.address_mode (as
);
3488 rtx reg1
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 1);
3489 rtx reg2
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 2);
3493 valid_mult
= sbitmap_alloc (2 * MAX_RATIO
+ 1);
3494 bitmap_clear (valid_mult
);
3495 scaled
= gen_rtx_fmt_ee (MULT
, address_mode
, reg1
, NULL_RTX
);
3496 addr
= gen_rtx_fmt_ee (PLUS
, address_mode
, scaled
, reg2
);
3497 for (i
= -MAX_RATIO
; i
<= MAX_RATIO
; i
++)
3499 XEXP (scaled
, 1) = gen_int_mode (i
, address_mode
);
3500 if (memory_address_addr_space_p (mode
, addr
, as
)
3501 || memory_address_addr_space_p (mode
, scaled
, as
))
3502 bitmap_set_bit (valid_mult
, i
+ MAX_RATIO
);
3505 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3507 fprintf (dump_file
, " allowed multipliers:");
3508 for (i
= -MAX_RATIO
; i
<= MAX_RATIO
; i
++)
3509 if (bitmap_bit_p (valid_mult
, i
+ MAX_RATIO
))
3510 fprintf (dump_file
, " %d", (int) i
);
3511 fprintf (dump_file
, "\n");
3512 fprintf (dump_file
, "\n");
3515 valid_mult_list
[data_index
] = valid_mult
;
3518 if (ratio
> MAX_RATIO
|| ratio
< -MAX_RATIO
)
3521 return bitmap_bit_p (valid_mult
, ratio
+ MAX_RATIO
);
3524 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3525 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3526 variable is omitted. Compute the cost for a memory reference that accesses
3527 a memory location of mode MEM_MODE in address space AS.
3529 MAY_AUTOINC is set to true if the autoincrement (increasing index by
3530 size of MEM_MODE / RATIO) is available. To make this determination, we
3531 look at the size of the increment to be made, which is given in CSTEP.
3532 CSTEP may be zero if the step is unknown.
3533 STMT_AFTER_INC is true iff the statement we're looking at is after the
3534 increment of the original biv.
3536 TODO -- there must be some better way. This all is quite crude. */
3540 AINC_PRE_INC
, /* Pre increment. */
3541 AINC_PRE_DEC
, /* Pre decrement. */
3542 AINC_POST_INC
, /* Post increment. */
3543 AINC_POST_DEC
, /* Post decrement. */
3544 AINC_NONE
/* Also the number of auto increment types. */
3547 typedef struct address_cost_data_s
3549 HOST_WIDE_INT min_offset
, max_offset
;
3550 unsigned costs
[2][2][2][2];
3551 unsigned ainc_costs
[AINC_NONE
];
3552 } *address_cost_data
;
3556 get_address_cost (bool symbol_present
, bool var_present
,
3557 unsigned HOST_WIDE_INT offset
, HOST_WIDE_INT ratio
,
3558 HOST_WIDE_INT cstep
, machine_mode mem_mode
,
3559 addr_space_t as
, bool speed
,
3560 bool stmt_after_inc
, bool *may_autoinc
)
3562 machine_mode address_mode
= targetm
.addr_space
.address_mode (as
);
3563 static vec
<address_cost_data
> address_cost_data_list
;
3564 unsigned int data_index
= (int) as
* MAX_MACHINE_MODE
+ (int) mem_mode
;
3565 address_cost_data data
;
3566 static bool has_preinc
[MAX_MACHINE_MODE
], has_postinc
[MAX_MACHINE_MODE
];
3567 static bool has_predec
[MAX_MACHINE_MODE
], has_postdec
[MAX_MACHINE_MODE
];
3568 unsigned cost
, acost
, complexity
;
3569 enum ainc_type autoinc_type
;
3570 bool offset_p
, ratio_p
, autoinc
;
3571 HOST_WIDE_INT s_offset
, autoinc_offset
, msize
;
3572 unsigned HOST_WIDE_INT mask
;
3575 if (data_index
>= address_cost_data_list
.length ())
3576 address_cost_data_list
.safe_grow_cleared (data_index
+ 1);
3578 data
= address_cost_data_list
[data_index
];
3582 HOST_WIDE_INT rat
, off
= 0;
3583 int old_cse_not_expected
, width
;
3584 unsigned sym_p
, var_p
, off_p
, rat_p
, add_c
;
3589 data
= (address_cost_data
) xcalloc (1, sizeof (*data
));
3591 reg1
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 1);
3593 width
= GET_MODE_BITSIZE (address_mode
) - 1;
3594 if (width
> (HOST_BITS_PER_WIDE_INT
- 1))
3595 width
= HOST_BITS_PER_WIDE_INT
- 1;
3596 addr
= gen_rtx_fmt_ee (PLUS
, address_mode
, reg1
, NULL_RTX
);
3598 for (i
= width
; i
>= 0; i
--)
3600 off
= -((unsigned HOST_WIDE_INT
) 1 << i
);
3601 XEXP (addr
, 1) = gen_int_mode (off
, address_mode
);
3602 if (memory_address_addr_space_p (mem_mode
, addr
, as
))
3605 data
->min_offset
= (i
== -1? 0 : off
);
3607 for (i
= width
; i
>= 0; i
--)
3609 off
= ((unsigned HOST_WIDE_INT
) 1 << i
) - 1;
3610 XEXP (addr
, 1) = gen_int_mode (off
, address_mode
);
3611 if (memory_address_addr_space_p (mem_mode
, addr
, as
))
3613 /* For some strict-alignment targets, the offset must be naturally
3614 aligned. Try an aligned offset if mem_mode is not QImode. */
3615 off
= mem_mode
!= QImode
3616 ? ((unsigned HOST_WIDE_INT
) 1 << i
)
3617 - GET_MODE_SIZE (mem_mode
)
3621 XEXP (addr
, 1) = gen_int_mode (off
, address_mode
);
3622 if (memory_address_addr_space_p (mem_mode
, addr
, as
))
3628 data
->max_offset
= off
;
3630 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3632 fprintf (dump_file
, "get_address_cost:\n");
3633 fprintf (dump_file
, " min offset %s " HOST_WIDE_INT_PRINT_DEC
"\n",
3634 GET_MODE_NAME (mem_mode
),
3636 fprintf (dump_file
, " max offset %s " HOST_WIDE_INT_PRINT_DEC
"\n",
3637 GET_MODE_NAME (mem_mode
),
3642 for (i
= 2; i
<= MAX_RATIO
; i
++)
3643 if (multiplier_allowed_in_address_p (i
, mem_mode
, as
))
3649 /* Compute the cost of various addressing modes. */
3651 reg0
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 1);
3652 reg1
= gen_raw_REG (address_mode
, LAST_VIRTUAL_REGISTER
+ 2);
3654 if (USE_LOAD_PRE_DECREMENT (mem_mode
)
3655 || USE_STORE_PRE_DECREMENT (mem_mode
))
3657 addr
= gen_rtx_PRE_DEC (address_mode
, reg0
);
3658 has_predec
[mem_mode
]
3659 = memory_address_addr_space_p (mem_mode
, addr
, as
);
3661 if (has_predec
[mem_mode
])
3662 data
->ainc_costs
[AINC_PRE_DEC
]
3663 = address_cost (addr
, mem_mode
, as
, speed
);
3665 if (USE_LOAD_POST_DECREMENT (mem_mode
)
3666 || USE_STORE_POST_DECREMENT (mem_mode
))
3668 addr
= gen_rtx_POST_DEC (address_mode
, reg0
);
3669 has_postdec
[mem_mode
]
3670 = memory_address_addr_space_p (mem_mode
, addr
, as
);
3672 if (has_postdec
[mem_mode
])
3673 data
->ainc_costs
[AINC_POST_DEC
]
3674 = address_cost (addr
, mem_mode
, as
, speed
);
3676 if (USE_LOAD_PRE_INCREMENT (mem_mode
)
3677 || USE_STORE_PRE_DECREMENT (mem_mode
))
3679 addr
= gen_rtx_PRE_INC (address_mode
, reg0
);
3680 has_preinc
[mem_mode
]
3681 = memory_address_addr_space_p (mem_mode
, addr
, as
);
3683 if (has_preinc
[mem_mode
])
3684 data
->ainc_costs
[AINC_PRE_INC
]
3685 = address_cost (addr
, mem_mode
, as
, speed
);
3687 if (USE_LOAD_POST_INCREMENT (mem_mode
)
3688 || USE_STORE_POST_INCREMENT (mem_mode
))
3690 addr
= gen_rtx_POST_INC (address_mode
, reg0
);
3691 has_postinc
[mem_mode
]
3692 = memory_address_addr_space_p (mem_mode
, addr
, as
);
3694 if (has_postinc
[mem_mode
])
3695 data
->ainc_costs
[AINC_POST_INC
]
3696 = address_cost (addr
, mem_mode
, as
, speed
);
3698 for (i
= 0; i
< 16; i
++)
3701 var_p
= (i
>> 1) & 1;
3702 off_p
= (i
>> 2) & 1;
3703 rat_p
= (i
>> 3) & 1;
3707 addr
= gen_rtx_fmt_ee (MULT
, address_mode
, addr
,
3708 gen_int_mode (rat
, address_mode
));
3711 addr
= gen_rtx_fmt_ee (PLUS
, address_mode
, addr
, reg1
);
3715 base
= gen_rtx_SYMBOL_REF (address_mode
, ggc_strdup (""));
3716 /* ??? We can run into trouble with some backends by presenting
3717 it with symbols which haven't been properly passed through
3718 targetm.encode_section_info. By setting the local bit, we
3719 enhance the probability of things working. */
3720 SYMBOL_REF_FLAGS (base
) = SYMBOL_FLAG_LOCAL
;
3723 base
= gen_rtx_fmt_e (CONST
, address_mode
,
3725 (PLUS
, address_mode
, base
,
3726 gen_int_mode (off
, address_mode
)));
3729 base
= gen_int_mode (off
, address_mode
);
3734 addr
= gen_rtx_fmt_ee (PLUS
, address_mode
, addr
, base
);
3737 /* To avoid splitting addressing modes, pretend that no cse will
3739 old_cse_not_expected
= cse_not_expected
;
3740 cse_not_expected
= true;
3741 addr
= memory_address_addr_space (mem_mode
, addr
, as
);
3742 cse_not_expected
= old_cse_not_expected
;
3746 acost
= seq_cost (seq
, speed
);
3747 acost
+= address_cost (addr
, mem_mode
, as
, speed
);
3751 data
->costs
[sym_p
][var_p
][off_p
][rat_p
] = acost
;
3754 /* On some targets, it is quite expensive to load symbol to a register,
3755 which makes addresses that contain symbols look much more expensive.
3756 However, the symbol will have to be loaded in any case before the
3757 loop (and quite likely we have it in register already), so it does not
3758 make much sense to penalize them too heavily. So make some final
3759 tweaks for the SYMBOL_PRESENT modes:
3761 If VAR_PRESENT is false, and the mode obtained by changing symbol to
3762 var is cheaper, use this mode with small penalty.
3763 If VAR_PRESENT is true, try whether the mode with
3764 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3765 if this is the case, use it. */
3766 add_c
= add_cost (speed
, address_mode
);
3767 for (i
= 0; i
< 8; i
++)
3770 off_p
= (i
>> 1) & 1;
3771 rat_p
= (i
>> 2) & 1;
3773 acost
= data
->costs
[0][1][off_p
][rat_p
] + 1;
3777 if (acost
< data
->costs
[1][var_p
][off_p
][rat_p
])
3778 data
->costs
[1][var_p
][off_p
][rat_p
] = acost
;
3781 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3783 fprintf (dump_file
, "Address costs:\n");
3785 for (i
= 0; i
< 16; i
++)
3788 var_p
= (i
>> 1) & 1;
3789 off_p
= (i
>> 2) & 1;
3790 rat_p
= (i
>> 3) & 1;
3792 fprintf (dump_file
, " ");
3794 fprintf (dump_file
, "sym + ");
3796 fprintf (dump_file
, "var + ");
3798 fprintf (dump_file
, "cst + ");
3800 fprintf (dump_file
, "rat * ");
3802 acost
= data
->costs
[sym_p
][var_p
][off_p
][rat_p
];
3803 fprintf (dump_file
, "index costs %d\n", acost
);
3805 if (has_predec
[mem_mode
] || has_postdec
[mem_mode
]
3806 || has_preinc
[mem_mode
] || has_postinc
[mem_mode
])
3807 fprintf (dump_file
, " May include autoinc/dec\n");
3808 fprintf (dump_file
, "\n");
3811 address_cost_data_list
[data_index
] = data
;
3814 bits
= GET_MODE_BITSIZE (address_mode
);
3815 mask
= ~(~(unsigned HOST_WIDE_INT
) 0 << (bits
- 1) << 1);
3817 if ((offset
>> (bits
- 1) & 1))
3822 autoinc_type
= AINC_NONE
;
3823 msize
= GET_MODE_SIZE (mem_mode
);
3824 autoinc_offset
= offset
;
3826 autoinc_offset
+= ratio
* cstep
;
3827 if (symbol_present
|| var_present
|| ratio
!= 1)
3831 if (has_postinc
[mem_mode
] && autoinc_offset
== 0
3833 autoinc_type
= AINC_POST_INC
;
3834 else if (has_postdec
[mem_mode
] && autoinc_offset
== 0
3836 autoinc_type
= AINC_POST_DEC
;
3837 else if (has_preinc
[mem_mode
] && autoinc_offset
== msize
3839 autoinc_type
= AINC_PRE_INC
;
3840 else if (has_predec
[mem_mode
] && autoinc_offset
== -msize
3842 autoinc_type
= AINC_PRE_DEC
;
3844 if (autoinc_type
!= AINC_NONE
)
3849 offset_p
= (s_offset
!= 0
3850 && data
->min_offset
<= s_offset
3851 && s_offset
<= data
->max_offset
);
3852 ratio_p
= (ratio
!= 1
3853 && multiplier_allowed_in_address_p (ratio
, mem_mode
, as
));
3855 if (ratio
!= 1 && !ratio_p
)
3856 cost
+= mult_by_coeff_cost (ratio
, address_mode
, speed
);
3858 if (s_offset
&& !offset_p
&& !symbol_present
)
3859 cost
+= add_cost (speed
, address_mode
);
3862 *may_autoinc
= autoinc
;
3864 acost
= data
->ainc_costs
[autoinc_type
];
3866 acost
= data
->costs
[symbol_present
][var_present
][offset_p
][ratio_p
];
3867 complexity
= (symbol_present
!= 0) + (var_present
!= 0) + offset_p
+ ratio_p
;
3868 return new_cost (cost
+ acost
, complexity
);
3871 /* Calculate the SPEED or size cost of shiftadd EXPR in MODE. MULT is the
3872 the EXPR operand holding the shift. COST0 and COST1 are the costs for
3873 calculating the operands of EXPR. Returns true if successful, and returns
3874 the cost in COST. */
3877 get_shiftadd_cost (tree expr
, machine_mode mode
, comp_cost cost0
,
3878 comp_cost cost1
, tree mult
, bool speed
, comp_cost
*cost
)
3881 tree op1
= TREE_OPERAND (expr
, 1);
3882 tree cst
= TREE_OPERAND (mult
, 1);
3883 tree multop
= TREE_OPERAND (mult
, 0);
3884 int m
= exact_log2 (int_cst_value (cst
));
3885 int maxm
= MIN (BITS_PER_WORD
, GET_MODE_BITSIZE (mode
));
3886 int as_cost
, sa_cost
;
3889 if (!(m
>= 0 && m
< maxm
))
3893 mult_in_op1
= operand_equal_p (op1
, mult
, 0);
3895 as_cost
= add_cost (speed
, mode
) + shift_cost (speed
, mode
, m
);
3897 /* If the target has a cheap shift-and-add or shift-and-sub instruction,
3898 use that in preference to a shift insn followed by an add insn. */
3899 sa_cost
= (TREE_CODE (expr
) != MINUS_EXPR
3900 ? shiftadd_cost (speed
, mode
, m
)
3902 ? shiftsub1_cost (speed
, mode
, m
)
3903 : shiftsub0_cost (speed
, mode
, m
)));
3905 res
= new_cost (MIN (as_cost
, sa_cost
), 0);
3906 res
= add_costs (res
, mult_in_op1
? cost0
: cost1
);
3908 STRIP_NOPS (multop
);
3909 if (!is_gimple_val (multop
))
3910 res
= add_costs (res
, force_expr_to_var_cost (multop
, speed
));
3916 /* Estimates cost of forcing expression EXPR into a variable. */
3919 force_expr_to_var_cost (tree expr
, bool speed
)
3921 static bool costs_initialized
= false;
3922 static unsigned integer_cost
[2];
3923 static unsigned symbol_cost
[2];
3924 static unsigned address_cost
[2];
3926 comp_cost cost0
, cost1
, cost
;
3929 if (!costs_initialized
)
3931 tree type
= build_pointer_type (integer_type_node
);
3936 var
= create_tmp_var_raw (integer_type_node
, "test_var");
3937 TREE_STATIC (var
) = 1;
3938 x
= produce_memory_decl_rtl (var
, NULL
);
3939 SET_DECL_RTL (var
, x
);
3941 addr
= build1 (ADDR_EXPR
, type
, var
);
3944 for (i
= 0; i
< 2; i
++)
3946 integer_cost
[i
] = computation_cost (build_int_cst (integer_type_node
,
3949 symbol_cost
[i
] = computation_cost (addr
, i
) + 1;
3952 = computation_cost (fold_build_pointer_plus_hwi (addr
, 2000), i
) + 1;
3953 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3955 fprintf (dump_file
, "force_expr_to_var_cost %s costs:\n", i
? "speed" : "size");
3956 fprintf (dump_file
, " integer %d\n", (int) integer_cost
[i
]);
3957 fprintf (dump_file
, " symbol %d\n", (int) symbol_cost
[i
]);
3958 fprintf (dump_file
, " address %d\n", (int) address_cost
[i
]);
3959 fprintf (dump_file
, " other %d\n", (int) target_spill_cost
[i
]);
3960 fprintf (dump_file
, "\n");
3964 costs_initialized
= true;
3969 if (SSA_VAR_P (expr
))
3972 if (is_gimple_min_invariant (expr
))
3974 if (TREE_CODE (expr
) == INTEGER_CST
)
3975 return new_cost (integer_cost
[speed
], 0);
3977 if (TREE_CODE (expr
) == ADDR_EXPR
)
3979 tree obj
= TREE_OPERAND (expr
, 0);
3981 if (TREE_CODE (obj
) == VAR_DECL
3982 || TREE_CODE (obj
) == PARM_DECL
3983 || TREE_CODE (obj
) == RESULT_DECL
)
3984 return new_cost (symbol_cost
[speed
], 0);
3987 return new_cost (address_cost
[speed
], 0);
3990 switch (TREE_CODE (expr
))
3992 case POINTER_PLUS_EXPR
:
3996 op0
= TREE_OPERAND (expr
, 0);
3997 op1
= TREE_OPERAND (expr
, 1);
4004 op0
= TREE_OPERAND (expr
, 0);
4010 /* Just an arbitrary value, FIXME. */
4011 return new_cost (target_spill_cost
[speed
], 0);
4014 if (op0
== NULL_TREE
4015 || TREE_CODE (op0
) == SSA_NAME
|| CONSTANT_CLASS_P (op0
))
4018 cost0
= force_expr_to_var_cost (op0
, speed
);
4020 if (op1
== NULL_TREE
4021 || TREE_CODE (op1
) == SSA_NAME
|| CONSTANT_CLASS_P (op1
))
4024 cost1
= force_expr_to_var_cost (op1
, speed
);
4026 mode
= TYPE_MODE (TREE_TYPE (expr
));
4027 switch (TREE_CODE (expr
))
4029 case POINTER_PLUS_EXPR
:
4033 cost
= new_cost (add_cost (speed
, mode
), 0);
4034 if (TREE_CODE (expr
) != NEGATE_EXPR
)
4036 tree mult
= NULL_TREE
;
4038 if (TREE_CODE (op1
) == MULT_EXPR
)
4040 else if (TREE_CODE (op0
) == MULT_EXPR
)
4043 if (mult
!= NULL_TREE
4044 && cst_and_fits_in_hwi (TREE_OPERAND (mult
, 1))
4045 && get_shiftadd_cost (expr
, mode
, cost0
, cost1
, mult
,
4053 tree inner_mode
, outer_mode
;
4054 outer_mode
= TREE_TYPE (expr
);
4055 inner_mode
= TREE_TYPE (op0
);
4056 cost
= new_cost (convert_cost (TYPE_MODE (outer_mode
),
4057 TYPE_MODE (inner_mode
), speed
), 0);
4062 if (cst_and_fits_in_hwi (op0
))
4063 cost
= new_cost (mult_by_coeff_cost (int_cst_value (op0
),
4065 else if (cst_and_fits_in_hwi (op1
))
4066 cost
= new_cost (mult_by_coeff_cost (int_cst_value (op1
),
4069 return new_cost (target_spill_cost
[speed
], 0);
4076 cost
= add_costs (cost
, cost0
);
4077 cost
= add_costs (cost
, cost1
);
4079 /* Bound the cost by target_spill_cost. The parts of complicated
4080 computations often are either loop invariant or at least can
4081 be shared between several iv uses, so letting this grow without
4082 limits would not give reasonable results. */
4083 if (cost
.cost
> (int) target_spill_cost
[speed
])
4084 cost
.cost
= target_spill_cost
[speed
];
4089 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
4090 invariants the computation depends on. */
4093 force_var_cost (struct ivopts_data
*data
,
4094 tree expr
, bitmap
*depends_on
)
4098 fd_ivopts_data
= data
;
4099 walk_tree (&expr
, find_depends
, depends_on
, NULL
);
4102 return force_expr_to_var_cost (expr
, data
->speed
);
4105 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
4106 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
4107 to false if the corresponding part is missing. DEPENDS_ON is a set of the
4108 invariants the computation depends on. */
4111 split_address_cost (struct ivopts_data
*data
,
4112 tree addr
, bool *symbol_present
, bool *var_present
,
4113 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
4116 HOST_WIDE_INT bitsize
;
4117 HOST_WIDE_INT bitpos
;
4120 int unsignedp
, volatilep
;
4122 core
= get_inner_reference (addr
, &bitsize
, &bitpos
, &toffset
, &mode
,
4123 &unsignedp
, &volatilep
, false);
4126 || bitpos
% BITS_PER_UNIT
!= 0
4127 || TREE_CODE (core
) != VAR_DECL
)
4129 *symbol_present
= false;
4130 *var_present
= true;
4131 fd_ivopts_data
= data
;
4132 walk_tree (&addr
, find_depends
, depends_on
, NULL
);
4133 return new_cost (target_spill_cost
[data
->speed
], 0);
4136 *offset
+= bitpos
/ BITS_PER_UNIT
;
4137 if (TREE_STATIC (core
)
4138 || DECL_EXTERNAL (core
))
4140 *symbol_present
= true;
4141 *var_present
= false;
4145 *symbol_present
= false;
4146 *var_present
= true;
4150 /* Estimates cost of expressing difference of addresses E1 - E2 as
4151 var + symbol + offset. The value of offset is added to OFFSET,
4152 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
4153 part is missing. DEPENDS_ON is a set of the invariants the computation
4157 ptr_difference_cost (struct ivopts_data
*data
,
4158 tree e1
, tree e2
, bool *symbol_present
, bool *var_present
,
4159 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
4161 HOST_WIDE_INT diff
= 0;
4162 aff_tree aff_e1
, aff_e2
;
4165 gcc_assert (TREE_CODE (e1
) == ADDR_EXPR
);
4167 if (ptr_difference_const (e1
, e2
, &diff
))
4170 *symbol_present
= false;
4171 *var_present
= false;
4175 if (integer_zerop (e2
))
4176 return split_address_cost (data
, TREE_OPERAND (e1
, 0),
4177 symbol_present
, var_present
, offset
, depends_on
);
4179 *symbol_present
= false;
4180 *var_present
= true;
4182 type
= signed_type_for (TREE_TYPE (e1
));
4183 tree_to_aff_combination (e1
, type
, &aff_e1
);
4184 tree_to_aff_combination (e2
, type
, &aff_e2
);
4185 aff_combination_scale (&aff_e2
, -1);
4186 aff_combination_add (&aff_e1
, &aff_e2
);
4188 return force_var_cost (data
, aff_combination_to_tree (&aff_e1
), depends_on
);
4191 /* Estimates cost of expressing difference E1 - E2 as
4192 var + symbol + offset. The value of offset is added to OFFSET,
4193 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
4194 part is missing. DEPENDS_ON is a set of the invariants the computation
4198 difference_cost (struct ivopts_data
*data
,
4199 tree e1
, tree e2
, bool *symbol_present
, bool *var_present
,
4200 unsigned HOST_WIDE_INT
*offset
, bitmap
*depends_on
)
4202 machine_mode mode
= TYPE_MODE (TREE_TYPE (e1
));
4203 unsigned HOST_WIDE_INT off1
, off2
;
4204 aff_tree aff_e1
, aff_e2
;
4207 e1
= strip_offset (e1
, &off1
);
4208 e2
= strip_offset (e2
, &off2
);
4209 *offset
+= off1
- off2
;
4214 if (TREE_CODE (e1
) == ADDR_EXPR
)
4215 return ptr_difference_cost (data
, e1
, e2
, symbol_present
, var_present
,
4216 offset
, depends_on
);
4217 *symbol_present
= false;
4219 if (operand_equal_p (e1
, e2
, 0))
4221 *var_present
= false;
4225 *var_present
= true;
4227 if (integer_zerop (e2
))
4228 return force_var_cost (data
, e1
, depends_on
);
4230 if (integer_zerop (e1
))
4232 comp_cost cost
= force_var_cost (data
, e2
, depends_on
);
4233 cost
.cost
+= mult_by_coeff_cost (-1, mode
, data
->speed
);
4237 type
= signed_type_for (TREE_TYPE (e1
));
4238 tree_to_aff_combination (e1
, type
, &aff_e1
);
4239 tree_to_aff_combination (e2
, type
, &aff_e2
);
4240 aff_combination_scale (&aff_e2
, -1);
4241 aff_combination_add (&aff_e1
, &aff_e2
);
4243 return force_var_cost (data
, aff_combination_to_tree (&aff_e1
), depends_on
);
4246 /* Returns true if AFF1 and AFF2 are identical. */
4249 compare_aff_trees (aff_tree
*aff1
, aff_tree
*aff2
)
4253 if (aff1
->n
!= aff2
->n
)
4256 for (i
= 0; i
< aff1
->n
; i
++)
4258 if (aff1
->elts
[i
].coef
!= aff2
->elts
[i
].coef
)
4261 if (!operand_equal_p (aff1
->elts
[i
].val
, aff2
->elts
[i
].val
, 0))
4267 /* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id. */
4270 get_expr_id (struct ivopts_data
*data
, tree expr
)
4272 struct iv_inv_expr_ent ent
;
4273 struct iv_inv_expr_ent
**slot
;
4276 ent
.hash
= iterative_hash_expr (expr
, 0);
4277 slot
= data
->inv_expr_tab
->find_slot (&ent
, INSERT
);
4281 *slot
= XNEW (struct iv_inv_expr_ent
);
4282 (*slot
)->expr
= expr
;
4283 (*slot
)->hash
= ent
.hash
;
4284 (*slot
)->id
= data
->inv_expr_id
++;
4288 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
4289 requires a new compiler generated temporary. Returns -1 otherwise.
4290 ADDRESS_P is a flag indicating if the expression is for address
4294 get_loop_invariant_expr_id (struct ivopts_data
*data
, tree ubase
,
4295 tree cbase
, HOST_WIDE_INT ratio
,
4298 aff_tree ubase_aff
, cbase_aff
;
4306 if ((TREE_CODE (ubase
) == INTEGER_CST
)
4307 && (TREE_CODE (cbase
) == INTEGER_CST
))
4310 /* Strips the constant part. */
4311 if (TREE_CODE (ubase
) == PLUS_EXPR
4312 || TREE_CODE (ubase
) == MINUS_EXPR
4313 || TREE_CODE (ubase
) == POINTER_PLUS_EXPR
)
4315 if (TREE_CODE (TREE_OPERAND (ubase
, 1)) == INTEGER_CST
)
4316 ubase
= TREE_OPERAND (ubase
, 0);
4319 /* Strips the constant part. */
4320 if (TREE_CODE (cbase
) == PLUS_EXPR
4321 || TREE_CODE (cbase
) == MINUS_EXPR
4322 || TREE_CODE (cbase
) == POINTER_PLUS_EXPR
)
4324 if (TREE_CODE (TREE_OPERAND (cbase
, 1)) == INTEGER_CST
)
4325 cbase
= TREE_OPERAND (cbase
, 0);
4330 if (((TREE_CODE (ubase
) == SSA_NAME
)
4331 || (TREE_CODE (ubase
) == ADDR_EXPR
4332 && is_gimple_min_invariant (ubase
)))
4333 && (TREE_CODE (cbase
) == INTEGER_CST
))
4336 if (((TREE_CODE (cbase
) == SSA_NAME
)
4337 || (TREE_CODE (cbase
) == ADDR_EXPR
4338 && is_gimple_min_invariant (cbase
)))
4339 && (TREE_CODE (ubase
) == INTEGER_CST
))
4345 if (operand_equal_p (ubase
, cbase
, 0))
4348 if (TREE_CODE (ubase
) == ADDR_EXPR
4349 && TREE_CODE (cbase
) == ADDR_EXPR
)
4353 usym
= TREE_OPERAND (ubase
, 0);
4354 csym
= TREE_OPERAND (cbase
, 0);
4355 if (TREE_CODE (usym
) == ARRAY_REF
)
4357 tree ind
= TREE_OPERAND (usym
, 1);
4358 if (TREE_CODE (ind
) == INTEGER_CST
4359 && tree_fits_shwi_p (ind
)
4360 && tree_to_shwi (ind
) == 0)
4361 usym
= TREE_OPERAND (usym
, 0);
4363 if (TREE_CODE (csym
) == ARRAY_REF
)
4365 tree ind
= TREE_OPERAND (csym
, 1);
4366 if (TREE_CODE (ind
) == INTEGER_CST
4367 && tree_fits_shwi_p (ind
)
4368 && tree_to_shwi (ind
) == 0)
4369 csym
= TREE_OPERAND (csym
, 0);
4371 if (operand_equal_p (usym
, csym
, 0))
4374 /* Now do more complex comparison */
4375 tree_to_aff_combination (ubase
, TREE_TYPE (ubase
), &ubase_aff
);
4376 tree_to_aff_combination (cbase
, TREE_TYPE (cbase
), &cbase_aff
);
4377 if (compare_aff_trees (&ubase_aff
, &cbase_aff
))
4381 tree_to_aff_combination (ub
, TREE_TYPE (ub
), &ubase_aff
);
4382 tree_to_aff_combination (cb
, TREE_TYPE (cb
), &cbase_aff
);
4384 aff_combination_scale (&cbase_aff
, -1 * ratio
);
4385 aff_combination_add (&ubase_aff
, &cbase_aff
);
4386 expr
= aff_combination_to_tree (&ubase_aff
);
4387 return get_expr_id (data
, expr
);
4392 /* Determines the cost of the computation by that USE is expressed
4393 from induction variable CAND. If ADDRESS_P is true, we just need
4394 to create an address from it, otherwise we want to get it into
4395 register. A set of invariants we depend on is stored in
4396 DEPENDS_ON. AT is the statement at that the value is computed.
4397 If CAN_AUTOINC is nonnull, use it to record whether autoinc
4398 addressing is likely. */
4401 get_computation_cost_at (struct ivopts_data
*data
,
4402 struct iv_use
*use
, struct iv_cand
*cand
,
4403 bool address_p
, bitmap
*depends_on
, gimple at
,
4407 tree ubase
= use
->iv
->base
, ustep
= use
->iv
->step
;
4409 tree utype
= TREE_TYPE (ubase
), ctype
;
4410 unsigned HOST_WIDE_INT cstepi
, offset
= 0;
4411 HOST_WIDE_INT ratio
, aratio
;
4412 bool var_present
, symbol_present
, stmt_is_after_inc
;
4415 bool speed
= optimize_bb_for_speed_p (gimple_bb (at
));
4416 machine_mode mem_mode
= (address_p
4417 ? TYPE_MODE (TREE_TYPE (*use
->op_p
))
4422 /* Only consider real candidates. */
4424 return infinite_cost
;
4426 cbase
= cand
->iv
->base
;
4427 cstep
= cand
->iv
->step
;
4428 ctype
= TREE_TYPE (cbase
);
4430 if (TYPE_PRECISION (utype
) > TYPE_PRECISION (ctype
))
4432 /* We do not have a precision to express the values of use. */
4433 return infinite_cost
;
4437 || (use
->iv
->base_object
4438 && cand
->iv
->base_object
4439 && POINTER_TYPE_P (TREE_TYPE (use
->iv
->base_object
))
4440 && POINTER_TYPE_P (TREE_TYPE (cand
->iv
->base_object
))))
4442 /* Do not try to express address of an object with computation based
4443 on address of a different object. This may cause problems in rtl
4444 level alias analysis (that does not expect this to be happening,
4445 as this is illegal in C), and would be unlikely to be useful
4447 if (use
->iv
->base_object
4448 && cand
->iv
->base_object
4449 && !operand_equal_p (use
->iv
->base_object
, cand
->iv
->base_object
, 0))
4450 return infinite_cost
;
4453 if (TYPE_PRECISION (utype
) < TYPE_PRECISION (ctype
))
4455 /* TODO -- add direct handling of this case. */
4459 /* CSTEPI is removed from the offset in case statement is after the
4460 increment. If the step is not constant, we use zero instead.
4461 This is a bit imprecise (there is the extra addition), but
4462 redundancy elimination is likely to transform the code so that
4463 it uses value of the variable before increment anyway,
4464 so it is not that much unrealistic. */
4465 if (cst_and_fits_in_hwi (cstep
))
4466 cstepi
= int_cst_value (cstep
);
4470 if (!constant_multiple_of (ustep
, cstep
, &rat
))
4471 return infinite_cost
;
4473 if (wi::fits_shwi_p (rat
))
4474 ratio
= rat
.to_shwi ();
4476 return infinite_cost
;
4479 ctype
= TREE_TYPE (cbase
);
4481 stmt_is_after_inc
= stmt_after_increment (data
->current_loop
, cand
, at
);
4483 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
4484 or ratio == 1, it is better to handle this like
4486 ubase - ratio * cbase + ratio * var
4488 (also holds in the case ratio == -1, TODO. */
4490 if (cst_and_fits_in_hwi (cbase
))
4492 offset
= - ratio
* (unsigned HOST_WIDE_INT
) int_cst_value (cbase
);
4493 cost
= difference_cost (data
,
4494 ubase
, build_int_cst (utype
, 0),
4495 &symbol_present
, &var_present
, &offset
,
4497 cost
.cost
/= avg_loop_niter (data
->current_loop
);
4499 else if (ratio
== 1)
4501 tree real_cbase
= cbase
;
4503 /* Check to see if any adjustment is needed. */
4504 if (cstepi
== 0 && stmt_is_after_inc
)
4506 aff_tree real_cbase_aff
;
4509 tree_to_aff_combination (cbase
, TREE_TYPE (real_cbase
),
4511 tree_to_aff_combination (cstep
, TREE_TYPE (cstep
), &cstep_aff
);
4513 aff_combination_add (&real_cbase_aff
, &cstep_aff
);
4514 real_cbase
= aff_combination_to_tree (&real_cbase_aff
);
4517 cost
= difference_cost (data
,
4519 &symbol_present
, &var_present
, &offset
,
4521 cost
.cost
/= avg_loop_niter (data
->current_loop
);
4524 && !POINTER_TYPE_P (ctype
)
4525 && multiplier_allowed_in_address_p
4527 TYPE_ADDR_SPACE (TREE_TYPE (utype
))))
4530 = fold_build2 (MULT_EXPR
, ctype
, cbase
, build_int_cst (ctype
, ratio
));
4531 cost
= difference_cost (data
,
4533 &symbol_present
, &var_present
, &offset
,
4535 cost
.cost
/= avg_loop_niter (data
->current_loop
);
4539 cost
= force_var_cost (data
, cbase
, depends_on
);
4540 cost
= add_costs (cost
,
4541 difference_cost (data
,
4542 ubase
, build_int_cst (utype
, 0),
4543 &symbol_present
, &var_present
,
4544 &offset
, depends_on
));
4545 cost
.cost
/= avg_loop_niter (data
->current_loop
);
4546 cost
.cost
+= add_cost (data
->speed
, TYPE_MODE (ctype
));
4549 /* Set of invariants depended on by sub use has already been computed
4550 for the first use in the group. */
4554 if (depends_on
&& *depends_on
)
4555 bitmap_clear (*depends_on
);
4557 else if (inv_expr_id
)
4560 get_loop_invariant_expr_id (data
, ubase
, cbase
, ratio
, address_p
);
4561 /* Clear depends on. */
4562 if (*inv_expr_id
!= -1 && depends_on
&& *depends_on
)
4563 bitmap_clear (*depends_on
);
4566 /* If we are after the increment, the value of the candidate is higher by
4568 if (stmt_is_after_inc
)
4569 offset
-= ratio
* cstepi
;
4571 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4572 (symbol/var1/const parts may be omitted). If we are looking for an
4573 address, find the cost of addressing this. */
4575 return add_costs (cost
,
4576 get_address_cost (symbol_present
, var_present
,
4577 offset
, ratio
, cstepi
,
4579 TYPE_ADDR_SPACE (TREE_TYPE (utype
)),
4580 speed
, stmt_is_after_inc
,
4583 /* Otherwise estimate the costs for computing the expression. */
4584 if (!symbol_present
&& !var_present
&& !offset
)
4587 cost
.cost
+= mult_by_coeff_cost (ratio
, TYPE_MODE (ctype
), speed
);
4591 /* Symbol + offset should be compile-time computable so consider that they
4592 are added once to the variable, if present. */
4593 if (var_present
&& (symbol_present
|| offset
))
4594 cost
.cost
+= adjust_setup_cost (data
,
4595 add_cost (speed
, TYPE_MODE (ctype
)));
4597 /* Having offset does not affect runtime cost in case it is added to
4598 symbol, but it increases complexity. */
4602 cost
.cost
+= add_cost (speed
, TYPE_MODE (ctype
));
4604 aratio
= ratio
> 0 ? ratio
: -ratio
;
4606 cost
.cost
+= mult_by_coeff_cost (aratio
, TYPE_MODE (ctype
), speed
);
4611 *can_autoinc
= false;
4614 /* Just get the expression, expand it and measure the cost. */
4615 tree comp
= get_computation_at (data
->current_loop
, use
, cand
, at
);
4618 return infinite_cost
;
4621 comp
= build_simple_mem_ref (comp
);
4623 return new_cost (computation_cost (comp
, speed
), 0);
4627 /* Determines the cost of the computation by that USE is expressed
4628 from induction variable CAND. If ADDRESS_P is true, we just need
4629 to create an address from it, otherwise we want to get it into
4630 register. A set of invariants we depend on is stored in
4631 DEPENDS_ON. If CAN_AUTOINC is nonnull, use it to record whether
4632 autoinc addressing is likely. */
4635 get_computation_cost (struct ivopts_data
*data
,
4636 struct iv_use
*use
, struct iv_cand
*cand
,
4637 bool address_p
, bitmap
*depends_on
,
4638 bool *can_autoinc
, int *inv_expr_id
)
4640 return get_computation_cost_at (data
,
4641 use
, cand
, address_p
, depends_on
, use
->stmt
,
4642 can_autoinc
, inv_expr_id
);
4645 /* Determines cost of basing replacement of USE on CAND in a generic
4649 determine_use_iv_cost_generic (struct ivopts_data
*data
,
4650 struct iv_use
*use
, struct iv_cand
*cand
)
4654 int inv_expr_id
= -1;
4656 /* The simple case first -- if we need to express value of the preserved
4657 original biv, the cost is 0. This also prevents us from counting the
4658 cost of increment twice -- once at this use and once in the cost of
4660 if (cand
->pos
== IP_ORIGINAL
4661 && cand
->incremented_at
== use
->stmt
)
4663 set_use_iv_cost (data
, use
, cand
, no_cost
, NULL
, NULL_TREE
,
4668 cost
= get_computation_cost (data
, use
, cand
, false, &depends_on
,
4669 NULL
, &inv_expr_id
);
4671 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, NULL_TREE
, ERROR_MARK
,
4674 return !infinite_cost_p (cost
);
4677 /* Determines cost of basing replacement of USE on CAND in an address. */
4680 determine_use_iv_cost_address (struct ivopts_data
*data
,
4681 struct iv_use
*use
, struct iv_cand
*cand
)
4685 int inv_expr_id
= -1;
4686 struct iv_use
*sub_use
;
4688 comp_cost cost
= get_computation_cost (data
, use
, cand
, true, &depends_on
,
4689 &can_autoinc
, &inv_expr_id
);
4691 if (cand
->ainc_use
== use
)
4694 cost
.cost
-= cand
->cost_step
;
4695 /* If we generated the candidate solely for exploiting autoincrement
4696 opportunities, and it turns out it can't be used, set the cost to
4697 infinity to make sure we ignore it. */
4698 else if (cand
->pos
== IP_AFTER_USE
|| cand
->pos
== IP_BEFORE_USE
)
4699 cost
= infinite_cost
;
4701 for (sub_use
= use
->next
;
4702 sub_use
&& !infinite_cost_p (cost
);
4703 sub_use
= sub_use
->next
)
4705 sub_cost
= get_computation_cost (data
, sub_use
, cand
, true, &depends_on
,
4706 &can_autoinc
, &inv_expr_id
);
4707 cost
= add_costs (cost
, sub_cost
);
4710 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, NULL_TREE
, ERROR_MARK
,
4713 return !infinite_cost_p (cost
);
4716 /* Computes value of candidate CAND at position AT in iteration NITER, and
4717 stores it to VAL. */
4720 cand_value_at (struct loop
*loop
, struct iv_cand
*cand
, gimple at
, tree niter
,
4723 aff_tree step
, delta
, nit
;
4724 struct iv
*iv
= cand
->iv
;
4725 tree type
= TREE_TYPE (iv
->base
);
4726 tree steptype
= type
;
4727 if (POINTER_TYPE_P (type
))
4728 steptype
= sizetype
;
4729 steptype
= unsigned_type_for (type
);
4731 tree_to_aff_combination (iv
->step
, TREE_TYPE (iv
->step
), &step
);
4732 aff_combination_convert (&step
, steptype
);
4733 tree_to_aff_combination (niter
, TREE_TYPE (niter
), &nit
);
4734 aff_combination_convert (&nit
, steptype
);
4735 aff_combination_mult (&nit
, &step
, &delta
);
4736 if (stmt_after_increment (loop
, cand
, at
))
4737 aff_combination_add (&delta
, &step
);
4739 tree_to_aff_combination (iv
->base
, type
, val
);
4740 if (!POINTER_TYPE_P (type
))
4741 aff_combination_convert (val
, steptype
);
4742 aff_combination_add (val
, &delta
);
4745 /* Returns period of induction variable iv. */
4748 iv_period (struct iv
*iv
)
4750 tree step
= iv
->step
, period
, type
;
4753 gcc_assert (step
&& TREE_CODE (step
) == INTEGER_CST
);
4755 type
= unsigned_type_for (TREE_TYPE (step
));
4756 /* Period of the iv is lcm (step, type_range)/step -1,
4757 i.e., N*type_range/step - 1. Since type range is power
4758 of two, N == (step >> num_of_ending_zeros_binary (step),
4759 so the final result is
4761 (type_range >> num_of_ending_zeros_binary (step)) - 1
4764 pow2div
= num_ending_zeros (step
);
4766 period
= build_low_bits_mask (type
,
4767 (TYPE_PRECISION (type
)
4768 - tree_to_uhwi (pow2div
)));
4773 /* Returns the comparison operator used when eliminating the iv USE. */
4775 static enum tree_code
4776 iv_elimination_compare (struct ivopts_data
*data
, struct iv_use
*use
)
4778 struct loop
*loop
= data
->current_loop
;
4782 ex_bb
= gimple_bb (use
->stmt
);
4783 exit
= EDGE_SUCC (ex_bb
, 0);
4784 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
4785 exit
= EDGE_SUCC (ex_bb
, 1);
4787 return (exit
->flags
& EDGE_TRUE_VALUE
? EQ_EXPR
: NE_EXPR
);
4790 /* Returns true if we can prove that BASE - OFFSET does not overflow. For now,
4791 we only detect the situation that BASE = SOMETHING + OFFSET, where the
4792 calculation is performed in non-wrapping type.
4794 TODO: More generally, we could test for the situation that
4795 BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
4796 This would require knowing the sign of OFFSET. */
4799 difference_cannot_overflow_p (struct ivopts_data
*data
, tree base
, tree offset
)
4801 enum tree_code code
;
4803 aff_tree aff_e1
, aff_e2
, aff_offset
;
4805 if (!nowrap_type_p (TREE_TYPE (base
)))
4808 base
= expand_simple_operations (base
);
4810 if (TREE_CODE (base
) == SSA_NAME
)
4812 gimple stmt
= SSA_NAME_DEF_STMT (base
);
4814 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
4817 code
= gimple_assign_rhs_code (stmt
);
4818 if (get_gimple_rhs_class (code
) != GIMPLE_BINARY_RHS
)
4821 e1
= gimple_assign_rhs1 (stmt
);
4822 e2
= gimple_assign_rhs2 (stmt
);
4826 code
= TREE_CODE (base
);
4827 if (get_gimple_rhs_class (code
) != GIMPLE_BINARY_RHS
)
4829 e1
= TREE_OPERAND (base
, 0);
4830 e2
= TREE_OPERAND (base
, 1);
4833 /* Use affine expansion as deeper inspection to prove the equality. */
4834 tree_to_aff_combination_expand (e2
, TREE_TYPE (e2
),
4835 &aff_e2
, &data
->name_expansion_cache
);
4836 tree_to_aff_combination_expand (offset
, TREE_TYPE (offset
),
4837 &aff_offset
, &data
->name_expansion_cache
);
4838 aff_combination_scale (&aff_offset
, -1);
4842 aff_combination_add (&aff_e2
, &aff_offset
);
4843 if (aff_combination_zero_p (&aff_e2
))
4846 tree_to_aff_combination_expand (e1
, TREE_TYPE (e1
),
4847 &aff_e1
, &data
->name_expansion_cache
);
4848 aff_combination_add (&aff_e1
, &aff_offset
);
4849 return aff_combination_zero_p (&aff_e1
);
4851 case POINTER_PLUS_EXPR
:
4852 aff_combination_add (&aff_e2
, &aff_offset
);
4853 return aff_combination_zero_p (&aff_e2
);
4860 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
4861 comparison with CAND. NITER describes the number of iterations of
4862 the loops. If successful, the comparison in COMP_P is altered accordingly.
4864 We aim to handle the following situation:
4880 Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
4881 We aim to optimize this to
4889 while (p < p_0 - a + b);
4891 This preserves the correctness, since the pointer arithmetics does not
4892 overflow. More precisely:
4894 1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
4895 overflow in computing it or the values of p.
4896 2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
4897 overflow. To prove this, we use the fact that p_0 = base + a. */
4900 iv_elimination_compare_lt (struct ivopts_data
*data
,
4901 struct iv_cand
*cand
, enum tree_code
*comp_p
,
4902 struct tree_niter_desc
*niter
)
4904 tree cand_type
, a
, b
, mbz
, nit_type
= TREE_TYPE (niter
->niter
), offset
;
4905 struct aff_tree nit
, tmpa
, tmpb
;
4906 enum tree_code comp
;
4909 /* We need to know that the candidate induction variable does not overflow.
4910 While more complex analysis may be used to prove this, for now just
4911 check that the variable appears in the original program and that it
4912 is computed in a type that guarantees no overflows. */
4913 cand_type
= TREE_TYPE (cand
->iv
->base
);
4914 if (cand
->pos
!= IP_ORIGINAL
|| !nowrap_type_p (cand_type
))
4917 /* Make sure that the loop iterates till the loop bound is hit, as otherwise
4918 the calculation of the BOUND could overflow, making the comparison
4920 if (!data
->loop_single_exit_p
)
4923 /* We need to be able to decide whether candidate is increasing or decreasing
4924 in order to choose the right comparison operator. */
4925 if (!cst_and_fits_in_hwi (cand
->iv
->step
))
4927 step
= int_cst_value (cand
->iv
->step
);
4929 /* Check that the number of iterations matches the expected pattern:
4930 a + 1 > b ? 0 : b - a - 1. */
4931 mbz
= niter
->may_be_zero
;
4932 if (TREE_CODE (mbz
) == GT_EXPR
)
4934 /* Handle a + 1 > b. */
4935 tree op0
= TREE_OPERAND (mbz
, 0);
4936 if (TREE_CODE (op0
) == PLUS_EXPR
&& integer_onep (TREE_OPERAND (op0
, 1)))
4938 a
= TREE_OPERAND (op0
, 0);
4939 b
= TREE_OPERAND (mbz
, 1);
4944 else if (TREE_CODE (mbz
) == LT_EXPR
)
4946 tree op1
= TREE_OPERAND (mbz
, 1);
4948 /* Handle b < a + 1. */
4949 if (TREE_CODE (op1
) == PLUS_EXPR
&& integer_onep (TREE_OPERAND (op1
, 1)))
4951 a
= TREE_OPERAND (op1
, 0);
4952 b
= TREE_OPERAND (mbz
, 0);
4960 /* Expected number of iterations is B - A - 1. Check that it matches
4961 the actual number, i.e., that B - A - NITER = 1. */
4962 tree_to_aff_combination (niter
->niter
, nit_type
, &nit
);
4963 tree_to_aff_combination (fold_convert (nit_type
, a
), nit_type
, &tmpa
);
4964 tree_to_aff_combination (fold_convert (nit_type
, b
), nit_type
, &tmpb
);
4965 aff_combination_scale (&nit
, -1);
4966 aff_combination_scale (&tmpa
, -1);
4967 aff_combination_add (&tmpb
, &tmpa
);
4968 aff_combination_add (&tmpb
, &nit
);
4969 if (tmpb
.n
!= 0 || tmpb
.offset
!= 1)
4972 /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
4974 offset
= fold_build2 (MULT_EXPR
, TREE_TYPE (cand
->iv
->step
),
4976 fold_convert (TREE_TYPE (cand
->iv
->step
), a
));
4977 if (!difference_cannot_overflow_p (data
, cand
->iv
->base
, offset
))
4980 /* Determine the new comparison operator. */
4981 comp
= step
< 0 ? GT_EXPR
: LT_EXPR
;
4982 if (*comp_p
== NE_EXPR
)
4984 else if (*comp_p
== EQ_EXPR
)
4985 *comp_p
= invert_tree_comparison (comp
, false);
4992 /* Check whether it is possible to express the condition in USE by comparison
4993 of candidate CAND. If so, store the value compared with to BOUND, and the
4994 comparison operator to COMP. */
4997 may_eliminate_iv (struct ivopts_data
*data
,
4998 struct iv_use
*use
, struct iv_cand
*cand
, tree
*bound
,
4999 enum tree_code
*comp
)
5004 struct loop
*loop
= data
->current_loop
;
5006 struct tree_niter_desc
*desc
= NULL
;
5008 if (TREE_CODE (cand
->iv
->step
) != INTEGER_CST
)
5011 /* For now works only for exits that dominate the loop latch.
5012 TODO: extend to other conditions inside loop body. */
5013 ex_bb
= gimple_bb (use
->stmt
);
5014 if (use
->stmt
!= last_stmt (ex_bb
)
5015 || gimple_code (use
->stmt
) != GIMPLE_COND
5016 || !dominated_by_p (CDI_DOMINATORS
, loop
->latch
, ex_bb
))
5019 exit
= EDGE_SUCC (ex_bb
, 0);
5020 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
5021 exit
= EDGE_SUCC (ex_bb
, 1);
5022 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
5025 desc
= niter_for_exit (data
, exit
);
5029 /* Determine whether we can use the variable to test the exit condition.
5030 This is the case iff the period of the induction variable is greater
5031 than the number of iterations for which the exit condition is true. */
5032 period
= iv_period (cand
->iv
);
5034 /* If the number of iterations is constant, compare against it directly. */
5035 if (TREE_CODE (desc
->niter
) == INTEGER_CST
)
5037 /* See cand_value_at. */
5038 if (stmt_after_increment (loop
, cand
, use
->stmt
))
5040 if (!tree_int_cst_lt (desc
->niter
, period
))
5045 if (tree_int_cst_lt (period
, desc
->niter
))
5050 /* If not, and if this is the only possible exit of the loop, see whether
5051 we can get a conservative estimate on the number of iterations of the
5052 entire loop and compare against that instead. */
5055 widest_int period_value
, max_niter
;
5057 max_niter
= desc
->max
;
5058 if (stmt_after_increment (loop
, cand
, use
->stmt
))
5060 period_value
= wi::to_widest (period
);
5061 if (wi::gtu_p (max_niter
, period_value
))
5063 /* See if we can take advantage of inferred loop bound information. */
5064 if (data
->loop_single_exit_p
)
5066 if (!max_loop_iterations (loop
, &max_niter
))
5068 /* The loop bound is already adjusted by adding 1. */
5069 if (wi::gtu_p (max_niter
, period_value
))
5077 cand_value_at (loop
, cand
, use
->stmt
, desc
->niter
, &bnd
);
5079 *bound
= fold_convert (TREE_TYPE (cand
->iv
->base
),
5080 aff_combination_to_tree (&bnd
));
5081 *comp
= iv_elimination_compare (data
, use
);
5083 /* It is unlikely that computing the number of iterations using division
5084 would be more profitable than keeping the original induction variable. */
5085 if (expression_expensive_p (*bound
))
5088 /* Sometimes, it is possible to handle the situation that the number of
5089 iterations may be zero unless additional assumtions by using <
5090 instead of != in the exit condition.
5092 TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
5093 base the exit condition on it. However, that is often too
5095 if (!integer_zerop (desc
->may_be_zero
))
5096 return iv_elimination_compare_lt (data
, cand
, comp
, desc
);
5101 /* Calculates the cost of BOUND, if it is a PARM_DECL. A PARM_DECL must
5102 be copied, if it is used in the loop body and DATA->body_includes_call. */
5105 parm_decl_cost (struct ivopts_data
*data
, tree bound
)
5107 tree sbound
= bound
;
5108 STRIP_NOPS (sbound
);
5110 if (TREE_CODE (sbound
) == SSA_NAME
5111 && SSA_NAME_IS_DEFAULT_DEF (sbound
)
5112 && TREE_CODE (SSA_NAME_VAR (sbound
)) == PARM_DECL
5113 && data
->body_includes_call
)
5114 return COSTS_N_INSNS (1);
5119 /* Determines cost of basing replacement of USE on CAND in a condition. */
5122 determine_use_iv_cost_condition (struct ivopts_data
*data
,
5123 struct iv_use
*use
, struct iv_cand
*cand
)
5125 tree bound
= NULL_TREE
;
5127 bitmap depends_on_elim
= NULL
, depends_on_express
= NULL
, depends_on
;
5128 comp_cost elim_cost
, express_cost
, cost
, bound_cost
;
5130 int elim_inv_expr_id
= -1, express_inv_expr_id
= -1, inv_expr_id
;
5131 tree
*control_var
, *bound_cst
;
5132 enum tree_code comp
= ERROR_MARK
;
5134 /* Only consider real candidates. */
5137 set_use_iv_cost (data
, use
, cand
, infinite_cost
, NULL
, NULL_TREE
,
5142 /* Try iv elimination. */
5143 if (may_eliminate_iv (data
, use
, cand
, &bound
, &comp
))
5145 elim_cost
= force_var_cost (data
, bound
, &depends_on_elim
);
5146 if (elim_cost
.cost
== 0)
5147 elim_cost
.cost
= parm_decl_cost (data
, bound
);
5148 else if (TREE_CODE (bound
) == INTEGER_CST
)
5150 /* If we replace a loop condition 'i < n' with 'p < base + n',
5151 depends_on_elim will have 'base' and 'n' set, which implies
5152 that both 'base' and 'n' will be live during the loop. More likely,
5153 'base + n' will be loop invariant, resulting in only one live value
5154 during the loop. So in that case we clear depends_on_elim and set
5155 elim_inv_expr_id instead. */
5156 if (depends_on_elim
&& bitmap_count_bits (depends_on_elim
) > 1)
5158 elim_inv_expr_id
= get_expr_id (data
, bound
);
5159 bitmap_clear (depends_on_elim
);
5161 /* The bound is a loop invariant, so it will be only computed
5163 elim_cost
.cost
= adjust_setup_cost (data
, elim_cost
.cost
);
5166 elim_cost
= infinite_cost
;
5168 /* Try expressing the original giv. If it is compared with an invariant,
5169 note that we cannot get rid of it. */
5170 ok
= extract_cond_operands (data
, use
->stmt
, &control_var
, &bound_cst
,
5174 /* When the condition is a comparison of the candidate IV against
5175 zero, prefer this IV.
5177 TODO: The constant that we're subtracting from the cost should
5178 be target-dependent. This information should be added to the
5179 target costs for each backend. */
5180 if (!infinite_cost_p (elim_cost
) /* Do not try to decrease infinite! */
5181 && integer_zerop (*bound_cst
)
5182 && (operand_equal_p (*control_var
, cand
->var_after
, 0)
5183 || operand_equal_p (*control_var
, cand
->var_before
, 0)))
5184 elim_cost
.cost
-= 1;
5186 express_cost
= get_computation_cost (data
, use
, cand
, false,
5187 &depends_on_express
, NULL
,
5188 &express_inv_expr_id
);
5189 fd_ivopts_data
= data
;
5190 walk_tree (&cmp_iv
->base
, find_depends
, &depends_on_express
, NULL
);
5192 /* Count the cost of the original bound as well. */
5193 bound_cost
= force_var_cost (data
, *bound_cst
, NULL
);
5194 if (bound_cost
.cost
== 0)
5195 bound_cost
.cost
= parm_decl_cost (data
, *bound_cst
);
5196 else if (TREE_CODE (*bound_cst
) == INTEGER_CST
)
5197 bound_cost
.cost
= 0;
5198 express_cost
.cost
+= bound_cost
.cost
;
5200 /* Choose the better approach, preferring the eliminated IV. */
5201 if (compare_costs (elim_cost
, express_cost
) <= 0)
5204 depends_on
= depends_on_elim
;
5205 depends_on_elim
= NULL
;
5206 inv_expr_id
= elim_inv_expr_id
;
5210 cost
= express_cost
;
5211 depends_on
= depends_on_express
;
5212 depends_on_express
= NULL
;
5215 inv_expr_id
= express_inv_expr_id
;
5218 set_use_iv_cost (data
, use
, cand
, cost
, depends_on
, bound
, comp
, inv_expr_id
);
5220 if (depends_on_elim
)
5221 BITMAP_FREE (depends_on_elim
);
5222 if (depends_on_express
)
5223 BITMAP_FREE (depends_on_express
);
5225 return !infinite_cost_p (cost
);
5228 /* Determines cost of basing replacement of USE on CAND. Returns false
5229 if USE cannot be based on CAND. */
5232 determine_use_iv_cost (struct ivopts_data
*data
,
5233 struct iv_use
*use
, struct iv_cand
*cand
)
5237 case USE_NONLINEAR_EXPR
:
5238 return determine_use_iv_cost_generic (data
, use
, cand
);
5241 return determine_use_iv_cost_address (data
, use
, cand
);
5244 return determine_use_iv_cost_condition (data
, use
, cand
);
5251 /* Return true if get_computation_cost indicates that autoincrement is
5252 a possibility for the pair of USE and CAND, false otherwise. */
5255 autoinc_possible_for_pair (struct ivopts_data
*data
, struct iv_use
*use
,
5256 struct iv_cand
*cand
)
5262 if (use
->type
!= USE_ADDRESS
)
5265 cost
= get_computation_cost (data
, use
, cand
, true, &depends_on
,
5266 &can_autoinc
, NULL
);
5268 BITMAP_FREE (depends_on
);
5270 return !infinite_cost_p (cost
) && can_autoinc
;
5273 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
5274 use that allows autoincrement, and set their AINC_USE if possible. */
5277 set_autoinc_for_original_candidates (struct ivopts_data
*data
)
5281 for (i
= 0; i
< n_iv_cands (data
); i
++)
5283 struct iv_cand
*cand
= iv_cand (data
, i
);
5284 struct iv_use
*closest_before
= NULL
;
5285 struct iv_use
*closest_after
= NULL
;
5286 if (cand
->pos
!= IP_ORIGINAL
)
5289 for (j
= 0; j
< n_iv_uses (data
); j
++)
5291 struct iv_use
*use
= iv_use (data
, j
);
5292 unsigned uid
= gimple_uid (use
->stmt
);
5294 if (gimple_bb (use
->stmt
) != gimple_bb (cand
->incremented_at
))
5297 if (uid
< gimple_uid (cand
->incremented_at
)
5298 && (closest_before
== NULL
5299 || uid
> gimple_uid (closest_before
->stmt
)))
5300 closest_before
= use
;
5302 if (uid
> gimple_uid (cand
->incremented_at
)
5303 && (closest_after
== NULL
5304 || uid
< gimple_uid (closest_after
->stmt
)))
5305 closest_after
= use
;
5308 if (closest_before
!= NULL
5309 && autoinc_possible_for_pair (data
, closest_before
, cand
))
5310 cand
->ainc_use
= closest_before
;
5311 else if (closest_after
!= NULL
5312 && autoinc_possible_for_pair (data
, closest_after
, cand
))
5313 cand
->ainc_use
= closest_after
;
5317 /* Finds the candidates for the induction variables. */
5320 find_iv_candidates (struct ivopts_data
*data
)
5322 /* Add commonly used ivs. */
5323 add_standard_iv_candidates (data
);
5325 /* Add old induction variables. */
5326 add_iv_candidate_for_bivs (data
);
5328 /* Add induction variables derived from uses. */
5329 add_iv_candidate_for_uses (data
);
5331 set_autoinc_for_original_candidates (data
);
5333 /* Record the important candidates. */
5334 record_important_candidates (data
);
5337 /* Determines costs of basing the use of the iv on an iv candidate. */
5340 determine_use_iv_costs (struct ivopts_data
*data
)
5344 struct iv_cand
*cand
;
5345 bitmap to_clear
= BITMAP_ALLOC (NULL
);
5347 alloc_use_cost_map (data
);
5349 for (i
= 0; i
< n_iv_uses (data
); i
++)
5351 use
= iv_use (data
, i
);
5353 if (data
->consider_all_candidates
)
5355 for (j
= 0; j
< n_iv_cands (data
); j
++)
5357 cand
= iv_cand (data
, j
);
5358 determine_use_iv_cost (data
, use
, cand
);
5365 EXECUTE_IF_SET_IN_BITMAP (use
->related_cands
, 0, j
, bi
)
5367 cand
= iv_cand (data
, j
);
5368 if (!determine_use_iv_cost (data
, use
, cand
))
5369 bitmap_set_bit (to_clear
, j
);
5372 /* Remove the candidates for that the cost is infinite from
5373 the list of related candidates. */
5374 bitmap_and_compl_into (use
->related_cands
, to_clear
);
5375 bitmap_clear (to_clear
);
5379 BITMAP_FREE (to_clear
);
5381 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5383 fprintf (dump_file
, "Use-candidate costs:\n");
5385 for (i
= 0; i
< n_iv_uses (data
); i
++)
5387 use
= iv_use (data
, i
);
5389 fprintf (dump_file
, "Use %d:\n", i
);
5390 fprintf (dump_file
, " cand\tcost\tcompl.\tdepends on\n");
5391 for (j
= 0; j
< use
->n_map_members
; j
++)
5393 if (!use
->cost_map
[j
].cand
5394 || infinite_cost_p (use
->cost_map
[j
].cost
))
5397 fprintf (dump_file
, " %d\t%d\t%d\t",
5398 use
->cost_map
[j
].cand
->id
,
5399 use
->cost_map
[j
].cost
.cost
,
5400 use
->cost_map
[j
].cost
.complexity
);
5401 if (use
->cost_map
[j
].depends_on
)
5402 bitmap_print (dump_file
,
5403 use
->cost_map
[j
].depends_on
, "","");
5404 if (use
->cost_map
[j
].inv_expr_id
!= -1)
5405 fprintf (dump_file
, " inv_expr:%d", use
->cost_map
[j
].inv_expr_id
);
5406 fprintf (dump_file
, "\n");
5409 fprintf (dump_file
, "\n");
5411 fprintf (dump_file
, "\n");
5415 /* Determines cost of the candidate CAND. */
5418 determine_iv_cost (struct ivopts_data
*data
, struct iv_cand
*cand
)
5420 comp_cost cost_base
;
5421 unsigned cost
, cost_step
;
5430 /* There are two costs associated with the candidate -- its increment
5431 and its initialization. The second is almost negligible for any loop
5432 that rolls enough, so we take it just very little into account. */
5434 base
= cand
->iv
->base
;
5435 cost_base
= force_var_cost (data
, base
, NULL
);
5436 /* It will be exceptional that the iv register happens to be initialized with
5437 the proper value at no cost. In general, there will at least be a regcopy
5439 if (cost_base
.cost
== 0)
5440 cost_base
.cost
= COSTS_N_INSNS (1);
5441 cost_step
= add_cost (data
->speed
, TYPE_MODE (TREE_TYPE (base
)));
5443 cost
= cost_step
+ adjust_setup_cost (data
, cost_base
.cost
);
5445 /* Prefer the original ivs unless we may gain something by replacing it.
5446 The reason is to make debugging simpler; so this is not relevant for
5447 artificial ivs created by other optimization passes. */
5448 if (cand
->pos
!= IP_ORIGINAL
5449 || !SSA_NAME_VAR (cand
->var_before
)
5450 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand
->var_before
)))
5453 /* Prefer not to insert statements into latch unless there are some
5454 already (so that we do not create unnecessary jumps). */
5455 if (cand
->pos
== IP_END
5456 && empty_block_p (ip_end_pos (data
->current_loop
)))
5460 cand
->cost_step
= cost_step
;
5463 /* Determines costs of computation of the candidates. */
5466 determine_iv_costs (struct ivopts_data
*data
)
5470 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5472 fprintf (dump_file
, "Candidate costs:\n");
5473 fprintf (dump_file
, " cand\tcost\n");
5476 for (i
= 0; i
< n_iv_cands (data
); i
++)
5478 struct iv_cand
*cand
= iv_cand (data
, i
);
5480 determine_iv_cost (data
, cand
);
5482 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5483 fprintf (dump_file
, " %d\t%d\n", i
, cand
->cost
);
5486 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5487 fprintf (dump_file
, "\n");
5490 /* Calculates cost for having SIZE induction variables. */
5493 ivopts_global_cost_for_size (struct ivopts_data
*data
, unsigned size
)
5495 /* We add size to the cost, so that we prefer eliminating ivs
5497 return size
+ estimate_reg_pressure_cost (size
, data
->regs_used
, data
->speed
,
5498 data
->body_includes_call
);
5501 /* For each size of the induction variable set determine the penalty. */
5504 determine_set_costs (struct ivopts_data
*data
)
5510 struct loop
*loop
= data
->current_loop
;
5513 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5515 fprintf (dump_file
, "Global costs:\n");
5516 fprintf (dump_file
, " target_avail_regs %d\n", target_avail_regs
);
5517 fprintf (dump_file
, " target_clobbered_regs %d\n", target_clobbered_regs
);
5518 fprintf (dump_file
, " target_reg_cost %d\n", target_reg_cost
[data
->speed
]);
5519 fprintf (dump_file
, " target_spill_cost %d\n", target_spill_cost
[data
->speed
]);
5523 for (psi
= gsi_start_phis (loop
->header
); !gsi_end_p (psi
); gsi_next (&psi
))
5526 op
= PHI_RESULT (phi
);
5528 if (virtual_operand_p (op
))
5531 if (get_iv (data
, op
))
5537 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, j
, bi
)
5539 struct version_info
*info
= ver_info (data
, j
);
5541 if (info
->inv_id
&& info
->has_nonlin_use
)
5545 data
->regs_used
= n
;
5546 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5547 fprintf (dump_file
, " regs_used %d\n", n
);
5549 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5551 fprintf (dump_file
, " cost for size:\n");
5552 fprintf (dump_file
, " ivs\tcost\n");
5553 for (j
= 0; j
<= 2 * target_avail_regs
; j
++)
5554 fprintf (dump_file
, " %d\t%d\n", j
,
5555 ivopts_global_cost_for_size (data
, j
));
5556 fprintf (dump_file
, "\n");
5560 /* Returns true if A is a cheaper cost pair than B. */
5563 cheaper_cost_pair (struct cost_pair
*a
, struct cost_pair
*b
)
5573 cmp
= compare_costs (a
->cost
, b
->cost
);
5580 /* In case the costs are the same, prefer the cheaper candidate. */
5581 if (a
->cand
->cost
< b
->cand
->cost
)
5588 /* Returns candidate by that USE is expressed in IVS. */
5590 static struct cost_pair
*
5591 iv_ca_cand_for_use (struct iv_ca
*ivs
, struct iv_use
*use
)
5593 return ivs
->cand_for_use
[use
->id
];
5596 /* Computes the cost field of IVS structure. */
5599 iv_ca_recount_cost (struct ivopts_data
*data
, struct iv_ca
*ivs
)
5601 comp_cost cost
= ivs
->cand_use_cost
;
5603 cost
.cost
+= ivs
->cand_cost
;
5605 cost
.cost
+= ivopts_global_cost_for_size (data
,
5606 ivs
->n_regs
+ ivs
->num_used_inv_expr
);
5611 /* Remove invariants in set INVS to set IVS. */
5614 iv_ca_set_remove_invariants (struct iv_ca
*ivs
, bitmap invs
)
5622 EXECUTE_IF_SET_IN_BITMAP (invs
, 0, iid
, bi
)
5624 ivs
->n_invariant_uses
[iid
]--;
5625 if (ivs
->n_invariant_uses
[iid
] == 0)
5630 /* Set USE not to be expressed by any candidate in IVS. */
5633 iv_ca_set_no_cp (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5636 unsigned uid
= use
->id
, cid
;
5637 struct cost_pair
*cp
;
5639 cp
= ivs
->cand_for_use
[uid
];
5645 ivs
->cand_for_use
[uid
] = NULL
;
5646 ivs
->n_cand_uses
[cid
]--;
5648 if (ivs
->n_cand_uses
[cid
] == 0)
5650 bitmap_clear_bit (ivs
->cands
, cid
);
5651 /* Do not count the pseudocandidates. */
5655 ivs
->cand_cost
-= cp
->cand
->cost
;
5657 iv_ca_set_remove_invariants (ivs
, cp
->cand
->depends_on
);
5660 ivs
->cand_use_cost
= sub_costs (ivs
->cand_use_cost
, cp
->cost
);
5662 iv_ca_set_remove_invariants (ivs
, cp
->depends_on
);
5664 if (cp
->inv_expr_id
!= -1)
5666 ivs
->used_inv_expr
[cp
->inv_expr_id
]--;
5667 if (ivs
->used_inv_expr
[cp
->inv_expr_id
] == 0)
5668 ivs
->num_used_inv_expr
--;
5670 iv_ca_recount_cost (data
, ivs
);
5673 /* Add invariants in set INVS to set IVS. */
5676 iv_ca_set_add_invariants (struct iv_ca
*ivs
, bitmap invs
)
5684 EXECUTE_IF_SET_IN_BITMAP (invs
, 0, iid
, bi
)
5686 ivs
->n_invariant_uses
[iid
]++;
5687 if (ivs
->n_invariant_uses
[iid
] == 1)
5692 /* Set cost pair for USE in set IVS to CP. */
5695 iv_ca_set_cp (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5696 struct iv_use
*use
, struct cost_pair
*cp
)
5698 unsigned uid
= use
->id
, cid
;
5700 if (ivs
->cand_for_use
[uid
] == cp
)
5703 if (ivs
->cand_for_use
[uid
])
5704 iv_ca_set_no_cp (data
, ivs
, use
);
5711 ivs
->cand_for_use
[uid
] = cp
;
5712 ivs
->n_cand_uses
[cid
]++;
5713 if (ivs
->n_cand_uses
[cid
] == 1)
5715 bitmap_set_bit (ivs
->cands
, cid
);
5716 /* Do not count the pseudocandidates. */
5720 ivs
->cand_cost
+= cp
->cand
->cost
;
5722 iv_ca_set_add_invariants (ivs
, cp
->cand
->depends_on
);
5725 ivs
->cand_use_cost
= add_costs (ivs
->cand_use_cost
, cp
->cost
);
5726 iv_ca_set_add_invariants (ivs
, cp
->depends_on
);
5728 if (cp
->inv_expr_id
!= -1)
5730 ivs
->used_inv_expr
[cp
->inv_expr_id
]++;
5731 if (ivs
->used_inv_expr
[cp
->inv_expr_id
] == 1)
5732 ivs
->num_used_inv_expr
++;
5734 iv_ca_recount_cost (data
, ivs
);
5738 /* Extend set IVS by expressing USE by some of the candidates in it
5739 if possible. Consider all important candidates if candidates in
5740 set IVS don't give any result. */
5743 iv_ca_add_use (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5746 struct cost_pair
*best_cp
= NULL
, *cp
;
5749 struct iv_cand
*cand
;
5751 gcc_assert (ivs
->upto
>= use
->id
);
5755 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, i
, bi
)
5757 cand
= iv_cand (data
, i
);
5758 cp
= get_use_iv_cost (data
, use
, cand
);
5759 if (cheaper_cost_pair (cp
, best_cp
))
5763 if (best_cp
== NULL
)
5765 EXECUTE_IF_SET_IN_BITMAP (data
->important_candidates
, 0, i
, bi
)
5767 cand
= iv_cand (data
, i
);
5768 cp
= get_use_iv_cost (data
, use
, cand
);
5769 if (cheaper_cost_pair (cp
, best_cp
))
5774 iv_ca_set_cp (data
, ivs
, use
, best_cp
);
5777 /* Get cost for assignment IVS. */
5780 iv_ca_cost (struct iv_ca
*ivs
)
5782 /* This was a conditional expression but it triggered a bug in
5785 return infinite_cost
;
5790 /* Returns true if all dependences of CP are among invariants in IVS. */
5793 iv_ca_has_deps (struct iv_ca
*ivs
, struct cost_pair
*cp
)
5798 if (!cp
->depends_on
)
5801 EXECUTE_IF_SET_IN_BITMAP (cp
->depends_on
, 0, i
, bi
)
5803 if (ivs
->n_invariant_uses
[i
] == 0)
5810 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
5811 it before NEXT_CHANGE. */
5813 static struct iv_ca_delta
*
5814 iv_ca_delta_add (struct iv_use
*use
, struct cost_pair
*old_cp
,
5815 struct cost_pair
*new_cp
, struct iv_ca_delta
*next_change
)
5817 struct iv_ca_delta
*change
= XNEW (struct iv_ca_delta
);
5820 change
->old_cp
= old_cp
;
5821 change
->new_cp
= new_cp
;
5822 change
->next_change
= next_change
;
5827 /* Joins two lists of changes L1 and L2. Destructive -- old lists
5830 static struct iv_ca_delta
*
5831 iv_ca_delta_join (struct iv_ca_delta
*l1
, struct iv_ca_delta
*l2
)
5833 struct iv_ca_delta
*last
;
5841 for (last
= l1
; last
->next_change
; last
= last
->next_change
)
5843 last
->next_change
= l2
;
5848 /* Reverse the list of changes DELTA, forming the inverse to it. */
5850 static struct iv_ca_delta
*
5851 iv_ca_delta_reverse (struct iv_ca_delta
*delta
)
5853 struct iv_ca_delta
*act
, *next
, *prev
= NULL
;
5855 for (act
= delta
; act
; act
= next
)
5857 next
= act
->next_change
;
5858 act
->next_change
= prev
;
5861 std::swap (act
->old_cp
, act
->new_cp
);
5867 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
5868 reverted instead. */
5871 iv_ca_delta_commit (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5872 struct iv_ca_delta
*delta
, bool forward
)
5874 struct cost_pair
*from
, *to
;
5875 struct iv_ca_delta
*act
;
5878 delta
= iv_ca_delta_reverse (delta
);
5880 for (act
= delta
; act
; act
= act
->next_change
)
5884 gcc_assert (iv_ca_cand_for_use (ivs
, act
->use
) == from
);
5885 iv_ca_set_cp (data
, ivs
, act
->use
, to
);
5889 iv_ca_delta_reverse (delta
);
5892 /* Returns true if CAND is used in IVS. */
5895 iv_ca_cand_used_p (struct iv_ca
*ivs
, struct iv_cand
*cand
)
5897 return ivs
->n_cand_uses
[cand
->id
] > 0;
5900 /* Returns number of induction variable candidates in the set IVS. */
5903 iv_ca_n_cands (struct iv_ca
*ivs
)
5905 return ivs
->n_cands
;
5908 /* Free the list of changes DELTA. */
5911 iv_ca_delta_free (struct iv_ca_delta
**delta
)
5913 struct iv_ca_delta
*act
, *next
;
5915 for (act
= *delta
; act
; act
= next
)
5917 next
= act
->next_change
;
5924 /* Allocates new iv candidates assignment. */
5926 static struct iv_ca
*
5927 iv_ca_new (struct ivopts_data
*data
)
5929 struct iv_ca
*nw
= XNEW (struct iv_ca
);
5933 nw
->cand_for_use
= XCNEWVEC (struct cost_pair
*, n_iv_uses (data
));
5934 nw
->n_cand_uses
= XCNEWVEC (unsigned, n_iv_cands (data
));
5935 nw
->cands
= BITMAP_ALLOC (NULL
);
5938 nw
->cand_use_cost
= no_cost
;
5940 nw
->n_invariant_uses
= XCNEWVEC (unsigned, data
->max_inv_id
+ 1);
5942 nw
->used_inv_expr
= XCNEWVEC (unsigned, data
->inv_expr_id
+ 1);
5943 nw
->num_used_inv_expr
= 0;
5948 /* Free memory occupied by the set IVS. */
5951 iv_ca_free (struct iv_ca
**ivs
)
5953 free ((*ivs
)->cand_for_use
);
5954 free ((*ivs
)->n_cand_uses
);
5955 BITMAP_FREE ((*ivs
)->cands
);
5956 free ((*ivs
)->n_invariant_uses
);
5957 free ((*ivs
)->used_inv_expr
);
5962 /* Dumps IVS to FILE. */
5965 iv_ca_dump (struct ivopts_data
*data
, FILE *file
, struct iv_ca
*ivs
)
5967 const char *pref
= " invariants ";
5969 comp_cost cost
= iv_ca_cost (ivs
);
5971 fprintf (file
, " cost: %d (complexity %d)\n", cost
.cost
, cost
.complexity
);
5972 fprintf (file
, " cand_cost: %d\n cand_use_cost: %d (complexity %d)\n",
5973 ivs
->cand_cost
, ivs
->cand_use_cost
.cost
, ivs
->cand_use_cost
.complexity
);
5974 bitmap_print (file
, ivs
->cands
, " candidates: ","\n");
5976 for (i
= 0; i
< ivs
->upto
; i
++)
5978 struct iv_use
*use
= iv_use (data
, i
);
5979 struct cost_pair
*cp
= iv_ca_cand_for_use (ivs
, use
);
5981 fprintf (file
, " use:%d --> iv_cand:%d, cost=(%d,%d)\n",
5982 use
->id
, cp
->cand
->id
, cp
->cost
.cost
, cp
->cost
.complexity
);
5984 fprintf (file
, " use:%d --> ??\n", use
->id
);
5987 for (i
= 1; i
<= data
->max_inv_id
; i
++)
5988 if (ivs
->n_invariant_uses
[i
])
5990 fprintf (file
, "%s%d", pref
, i
);
5993 fprintf (file
, "\n\n");
5996 /* Try changing candidate in IVS to CAND for each use. Return cost of the
5997 new set, and store differences in DELTA. Number of induction variables
5998 in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
5999 the function will try to find a solution with mimimal iv candidates. */
6002 iv_ca_extend (struct ivopts_data
*data
, struct iv_ca
*ivs
,
6003 struct iv_cand
*cand
, struct iv_ca_delta
**delta
,
6004 unsigned *n_ivs
, bool min_ncand
)
6009 struct cost_pair
*old_cp
, *new_cp
;
6012 for (i
= 0; i
< ivs
->upto
; i
++)
6014 use
= iv_use (data
, i
);
6015 old_cp
= iv_ca_cand_for_use (ivs
, use
);
6018 && old_cp
->cand
== cand
)
6021 new_cp
= get_use_iv_cost (data
, use
, cand
);
6025 if (!min_ncand
&& !iv_ca_has_deps (ivs
, new_cp
))
6028 if (!min_ncand
&& !cheaper_cost_pair (new_cp
, old_cp
))
6031 *delta
= iv_ca_delta_add (use
, old_cp
, new_cp
, *delta
);
6034 iv_ca_delta_commit (data
, ivs
, *delta
, true);
6035 cost
= iv_ca_cost (ivs
);
6037 *n_ivs
= iv_ca_n_cands (ivs
);
6038 iv_ca_delta_commit (data
, ivs
, *delta
, false);
6043 /* Try narrowing set IVS by removing CAND. Return the cost of
6044 the new set and store the differences in DELTA. START is
6045 the candidate with which we start narrowing. */
6048 iv_ca_narrow (struct ivopts_data
*data
, struct iv_ca
*ivs
,
6049 struct iv_cand
*cand
, struct iv_cand
*start
,
6050 struct iv_ca_delta
**delta
)
6054 struct cost_pair
*old_cp
, *new_cp
, *cp
;
6056 struct iv_cand
*cnd
;
6057 comp_cost cost
, best_cost
, acost
;
6060 for (i
= 0; i
< n_iv_uses (data
); i
++)
6062 use
= iv_use (data
, i
);
6064 old_cp
= iv_ca_cand_for_use (ivs
, use
);
6065 if (old_cp
->cand
!= cand
)
6068 best_cost
= iv_ca_cost (ivs
);
6069 /* Start narrowing with START. */
6070 new_cp
= get_use_iv_cost (data
, use
, start
);
6072 if (data
->consider_all_candidates
)
6074 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, ci
, bi
)
6076 if (ci
== cand
->id
|| (start
&& ci
== start
->id
))
6079 cnd
= iv_cand (data
, ci
);
6081 cp
= get_use_iv_cost (data
, use
, cnd
);
6085 iv_ca_set_cp (data
, ivs
, use
, cp
);
6086 acost
= iv_ca_cost (ivs
);
6088 if (compare_costs (acost
, best_cost
) < 0)
6097 EXECUTE_IF_AND_IN_BITMAP (use
->related_cands
, ivs
->cands
, 0, ci
, bi
)
6099 if (ci
== cand
->id
|| (start
&& ci
== start
->id
))
6102 cnd
= iv_cand (data
, ci
);
6104 cp
= get_use_iv_cost (data
, use
, cnd
);
6108 iv_ca_set_cp (data
, ivs
, use
, cp
);
6109 acost
= iv_ca_cost (ivs
);
6111 if (compare_costs (acost
, best_cost
) < 0)
6118 /* Restore to old cp for use. */
6119 iv_ca_set_cp (data
, ivs
, use
, old_cp
);
6123 iv_ca_delta_free (delta
);
6124 return infinite_cost
;
6127 *delta
= iv_ca_delta_add (use
, old_cp
, new_cp
, *delta
);
6130 iv_ca_delta_commit (data
, ivs
, *delta
, true);
6131 cost
= iv_ca_cost (ivs
);
6132 iv_ca_delta_commit (data
, ivs
, *delta
, false);
6137 /* Try optimizing the set of candidates IVS by removing candidates different
6138 from to EXCEPT_CAND from it. Return cost of the new set, and store
6139 differences in DELTA. */
6142 iv_ca_prune (struct ivopts_data
*data
, struct iv_ca
*ivs
,
6143 struct iv_cand
*except_cand
, struct iv_ca_delta
**delta
)
6146 struct iv_ca_delta
*act_delta
, *best_delta
;
6148 comp_cost best_cost
, acost
;
6149 struct iv_cand
*cand
;
6152 best_cost
= iv_ca_cost (ivs
);
6154 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, i
, bi
)
6156 cand
= iv_cand (data
, i
);
6158 if (cand
== except_cand
)
6161 acost
= iv_ca_narrow (data
, ivs
, cand
, except_cand
, &act_delta
);
6163 if (compare_costs (acost
, best_cost
) < 0)
6166 iv_ca_delta_free (&best_delta
);
6167 best_delta
= act_delta
;
6170 iv_ca_delta_free (&act_delta
);
6179 /* Recurse to possibly remove other unnecessary ivs. */
6180 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
6181 best_cost
= iv_ca_prune (data
, ivs
, except_cand
, delta
);
6182 iv_ca_delta_commit (data
, ivs
, best_delta
, false);
6183 *delta
= iv_ca_delta_join (best_delta
, *delta
);
6187 /* Check if CAND_IDX is a candidate other than OLD_CAND and has
6188 cheaper local cost for USE than BEST_CP. Return pointer to
6189 the corresponding cost_pair, otherwise just return BEST_CP. */
6191 static struct cost_pair
*
6192 cheaper_cost_with_cand (struct ivopts_data
*data
, struct iv_use
*use
,
6193 unsigned int cand_idx
, struct iv_cand
*old_cand
,
6194 struct cost_pair
*best_cp
)
6196 struct iv_cand
*cand
;
6197 struct cost_pair
*cp
;
6199 gcc_assert (old_cand
!= NULL
&& best_cp
!= NULL
);
6200 if (cand_idx
== old_cand
->id
)
6203 cand
= iv_cand (data
, cand_idx
);
6204 cp
= get_use_iv_cost (data
, use
, cand
);
6205 if (cp
!= NULL
&& cheaper_cost_pair (cp
, best_cp
))
6211 /* Try breaking local optimal fixed-point for IVS by replacing candidates
6212 which are used by more than one iv uses. For each of those candidates,
6213 this function tries to represent iv uses under that candidate using
6214 other ones with lower local cost, then tries to prune the new set.
6215 If the new set has lower cost, It returns the new cost after recording
6216 candidate replacement in list DELTA. */
6219 iv_ca_replace (struct ivopts_data
*data
, struct iv_ca
*ivs
,
6220 struct iv_ca_delta
**delta
)
6222 bitmap_iterator bi
, bj
;
6223 unsigned int i
, j
, k
;
6225 struct iv_cand
*cand
;
6226 comp_cost orig_cost
, acost
;
6227 struct iv_ca_delta
*act_delta
, *tmp_delta
;
6228 struct cost_pair
*old_cp
, *best_cp
= NULL
;
6231 orig_cost
= iv_ca_cost (ivs
);
6233 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, i
, bi
)
6235 if (ivs
->n_cand_uses
[i
] == 1
6236 || ivs
->n_cand_uses
[i
] > ALWAYS_PRUNE_CAND_SET_BOUND
)
6239 cand
= iv_cand (data
, i
);
6242 /* Represent uses under current candidate using other ones with
6243 lower local cost. */
6244 for (j
= 0; j
< ivs
->upto
; j
++)
6246 use
= iv_use (data
, j
);
6247 old_cp
= iv_ca_cand_for_use (ivs
, use
);
6249 if (old_cp
->cand
!= cand
)
6253 if (data
->consider_all_candidates
)
6254 for (k
= 0; k
< n_iv_cands (data
); k
++)
6255 best_cp
= cheaper_cost_with_cand (data
, use
, k
,
6256 old_cp
->cand
, best_cp
);
6258 EXECUTE_IF_SET_IN_BITMAP (use
->related_cands
, 0, k
, bj
)
6259 best_cp
= cheaper_cost_with_cand (data
, use
, k
,
6260 old_cp
->cand
, best_cp
);
6262 if (best_cp
== old_cp
)
6265 act_delta
= iv_ca_delta_add (use
, old_cp
, best_cp
, act_delta
);
6267 /* No need for further prune. */
6271 /* Prune the new candidate set. */
6272 iv_ca_delta_commit (data
, ivs
, act_delta
, true);
6273 acost
= iv_ca_prune (data
, ivs
, NULL
, &tmp_delta
);
6274 iv_ca_delta_commit (data
, ivs
, act_delta
, false);
6275 act_delta
= iv_ca_delta_join (act_delta
, tmp_delta
);
6277 if (compare_costs (acost
, orig_cost
) < 0)
6283 iv_ca_delta_free (&act_delta
);
6289 /* Tries to extend the sets IVS in the best possible way in order
6290 to express the USE. If ORIGINALP is true, prefer candidates from
6291 the original set of IVs, otherwise favor important candidates not
6292 based on any memory object. */
6295 try_add_cand_for (struct ivopts_data
*data
, struct iv_ca
*ivs
,
6296 struct iv_use
*use
, bool originalp
)
6298 comp_cost best_cost
, act_cost
;
6301 struct iv_cand
*cand
;
6302 struct iv_ca_delta
*best_delta
= NULL
, *act_delta
;
6303 struct cost_pair
*cp
;
6305 iv_ca_add_use (data
, ivs
, use
);
6306 best_cost
= iv_ca_cost (ivs
);
6307 cp
= iv_ca_cand_for_use (ivs
, use
);
6310 best_delta
= iv_ca_delta_add (use
, NULL
, cp
, NULL
);
6311 iv_ca_set_no_cp (data
, ivs
, use
);
6314 /* If ORIGINALP is true, try to find the original IV for the use. Otherwise
6315 first try important candidates not based on any memory object. Only if
6316 this fails, try the specific ones. Rationale -- in loops with many
6317 variables the best choice often is to use just one generic biv. If we
6318 added here many ivs specific to the uses, the optimization algorithm later
6319 would be likely to get stuck in a local minimum, thus causing us to create
6320 too many ivs. The approach from few ivs to more seems more likely to be
6321 successful -- starting from few ivs, replacing an expensive use by a
6322 specific iv should always be a win. */
6323 EXECUTE_IF_SET_IN_BITMAP (data
->important_candidates
, 0, i
, bi
)
6325 cand
= iv_cand (data
, i
);
6327 if (originalp
&& cand
->pos
!=IP_ORIGINAL
)
6330 if (!originalp
&& cand
->iv
->base_object
!= NULL_TREE
)
6333 if (iv_ca_cand_used_p (ivs
, cand
))
6336 cp
= get_use_iv_cost (data
, use
, cand
);
6340 iv_ca_set_cp (data
, ivs
, use
, cp
);
6341 act_cost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, NULL
,
6343 iv_ca_set_no_cp (data
, ivs
, use
);
6344 act_delta
= iv_ca_delta_add (use
, NULL
, cp
, act_delta
);
6346 if (compare_costs (act_cost
, best_cost
) < 0)
6348 best_cost
= act_cost
;
6350 iv_ca_delta_free (&best_delta
);
6351 best_delta
= act_delta
;
6354 iv_ca_delta_free (&act_delta
);
6357 if (infinite_cost_p (best_cost
))
6359 for (i
= 0; i
< use
->n_map_members
; i
++)
6361 cp
= use
->cost_map
+ i
;
6366 /* Already tried this. */
6367 if (cand
->important
)
6369 if (originalp
&& cand
->pos
== IP_ORIGINAL
)
6371 if (!originalp
&& cand
->iv
->base_object
== NULL_TREE
)
6375 if (iv_ca_cand_used_p (ivs
, cand
))
6379 iv_ca_set_cp (data
, ivs
, use
, cp
);
6380 act_cost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, NULL
, true);
6381 iv_ca_set_no_cp (data
, ivs
, use
);
6382 act_delta
= iv_ca_delta_add (use
, iv_ca_cand_for_use (ivs
, use
),
6385 if (compare_costs (act_cost
, best_cost
) < 0)
6387 best_cost
= act_cost
;
6390 iv_ca_delta_free (&best_delta
);
6391 best_delta
= act_delta
;
6394 iv_ca_delta_free (&act_delta
);
6398 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
6399 iv_ca_delta_free (&best_delta
);
6401 return !infinite_cost_p (best_cost
);
6404 /* Finds an initial assignment of candidates to uses. */
6406 static struct iv_ca
*
6407 get_initial_solution (struct ivopts_data
*data
, bool originalp
)
6409 struct iv_ca
*ivs
= iv_ca_new (data
);
6412 for (i
= 0; i
< n_iv_uses (data
); i
++)
6413 if (!try_add_cand_for (data
, ivs
, iv_use (data
, i
), originalp
))
6422 /* Tries to improve set of induction variables IVS. TRY_REPLACE_P
6423 points to a bool variable, this function tries to break local
6424 optimal fixed-point by replacing candidates in IVS if it's true. */
6427 try_improve_iv_set (struct ivopts_data
*data
,
6428 struct iv_ca
*ivs
, bool *try_replace_p
)
6431 comp_cost acost
, best_cost
= iv_ca_cost (ivs
);
6432 struct iv_ca_delta
*best_delta
= NULL
, *act_delta
, *tmp_delta
;
6433 struct iv_cand
*cand
;
6435 /* Try extending the set of induction variables by one. */
6436 for (i
= 0; i
< n_iv_cands (data
); i
++)
6438 cand
= iv_cand (data
, i
);
6440 if (iv_ca_cand_used_p (ivs
, cand
))
6443 acost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, &n_ivs
, false);
6447 /* If we successfully added the candidate and the set is small enough,
6448 try optimizing it by removing other candidates. */
6449 if (n_ivs
<= ALWAYS_PRUNE_CAND_SET_BOUND
)
6451 iv_ca_delta_commit (data
, ivs
, act_delta
, true);
6452 acost
= iv_ca_prune (data
, ivs
, cand
, &tmp_delta
);
6453 iv_ca_delta_commit (data
, ivs
, act_delta
, false);
6454 act_delta
= iv_ca_delta_join (act_delta
, tmp_delta
);
6457 if (compare_costs (acost
, best_cost
) < 0)
6460 iv_ca_delta_free (&best_delta
);
6461 best_delta
= act_delta
;
6464 iv_ca_delta_free (&act_delta
);
6469 /* Try removing the candidates from the set instead. */
6470 best_cost
= iv_ca_prune (data
, ivs
, NULL
, &best_delta
);
6472 if (!best_delta
&& *try_replace_p
)
6474 *try_replace_p
= false;
6475 /* So far candidate selecting algorithm tends to choose fewer IVs
6476 so that it can handle cases in which loops have many variables
6477 but the best choice is often to use only one general biv. One
6478 weakness is it can't handle opposite cases, in which different
6479 candidates should be chosen with respect to each use. To solve
6480 the problem, we replace candidates in a manner described by the
6481 comments of iv_ca_replace, thus give general algorithm a chance
6482 to break local optimal fixed-point in these cases. */
6483 best_cost
= iv_ca_replace (data
, ivs
, &best_delta
);
6490 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
6491 gcc_assert (compare_costs (best_cost
, iv_ca_cost (ivs
)) == 0);
6492 iv_ca_delta_free (&best_delta
);
6496 /* Attempts to find the optimal set of induction variables. We do simple
6497 greedy heuristic -- we try to replace at most one candidate in the selected
6498 solution and remove the unused ivs while this improves the cost. */
6500 static struct iv_ca
*
6501 find_optimal_iv_set_1 (struct ivopts_data
*data
, bool originalp
)
6504 bool try_replace_p
= true;
6506 /* Get the initial solution. */
6507 set
= get_initial_solution (data
, originalp
);
6510 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6511 fprintf (dump_file
, "Unable to substitute for ivs, failed.\n");
6515 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6517 fprintf (dump_file
, "Initial set of candidates:\n");
6518 iv_ca_dump (data
, dump_file
, set
);
6521 while (try_improve_iv_set (data
, set
, &try_replace_p
))
6523 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6525 fprintf (dump_file
, "Improved to:\n");
6526 iv_ca_dump (data
, dump_file
, set
);
6533 static struct iv_ca
*
6534 find_optimal_iv_set (struct ivopts_data
*data
)
6537 struct iv_ca
*set
, *origset
;
6539 comp_cost cost
, origcost
;
6541 /* Determine the cost based on a strategy that starts with original IVs,
6542 and try again using a strategy that prefers candidates not based
6544 origset
= find_optimal_iv_set_1 (data
, true);
6545 set
= find_optimal_iv_set_1 (data
, false);
6547 if (!origset
&& !set
)
6550 origcost
= origset
? iv_ca_cost (origset
) : infinite_cost
;
6551 cost
= set
? iv_ca_cost (set
) : infinite_cost
;
6553 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6555 fprintf (dump_file
, "Original cost %d (complexity %d)\n\n",
6556 origcost
.cost
, origcost
.complexity
);
6557 fprintf (dump_file
, "Final cost %d (complexity %d)\n\n",
6558 cost
.cost
, cost
.complexity
);
6561 /* Choose the one with the best cost. */
6562 if (compare_costs (origcost
, cost
) <= 0)
6569 iv_ca_free (&origset
);
6571 for (i
= 0; i
< n_iv_uses (data
); i
++)
6573 use
= iv_use (data
, i
);
6574 use
->selected
= iv_ca_cand_for_use (set
, use
)->cand
;
6580 /* Creates a new induction variable corresponding to CAND. */
6583 create_new_iv (struct ivopts_data
*data
, struct iv_cand
*cand
)
6585 gimple_stmt_iterator incr_pos
;
6595 incr_pos
= gsi_last_bb (ip_normal_pos (data
->current_loop
));
6599 incr_pos
= gsi_last_bb (ip_end_pos (data
->current_loop
));
6607 incr_pos
= gsi_for_stmt (cand
->incremented_at
);
6611 /* Mark that the iv is preserved. */
6612 name_info (data
, cand
->var_before
)->preserve_biv
= true;
6613 name_info (data
, cand
->var_after
)->preserve_biv
= true;
6615 /* Rewrite the increment so that it uses var_before directly. */
6616 find_interesting_uses_op (data
, cand
->var_after
)->selected
= cand
;
6620 gimple_add_tmp_var (cand
->var_before
);
6622 base
= unshare_expr (cand
->iv
->base
);
6624 create_iv (base
, unshare_expr (cand
->iv
->step
),
6625 cand
->var_before
, data
->current_loop
,
6626 &incr_pos
, after
, &cand
->var_before
, &cand
->var_after
);
6629 /* Creates new induction variables described in SET. */
6632 create_new_ivs (struct ivopts_data
*data
, struct iv_ca
*set
)
6635 struct iv_cand
*cand
;
6638 EXECUTE_IF_SET_IN_BITMAP (set
->cands
, 0, i
, bi
)
6640 cand
= iv_cand (data
, i
);
6641 create_new_iv (data
, cand
);
6644 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6646 fprintf (dump_file
, "Selected IV set for loop %d",
6647 data
->current_loop
->num
);
6648 if (data
->loop_loc
!= UNKNOWN_LOCATION
)
6649 fprintf (dump_file
, " at %s:%d", LOCATION_FILE (data
->loop_loc
),
6650 LOCATION_LINE (data
->loop_loc
));
6651 fprintf (dump_file
, ", %lu IVs:\n", bitmap_count_bits (set
->cands
));
6652 EXECUTE_IF_SET_IN_BITMAP (set
->cands
, 0, i
, bi
)
6654 cand
= iv_cand (data
, i
);
6655 dump_cand (dump_file
, cand
);
6657 fprintf (dump_file
, "\n");
6661 /* Rewrites USE (definition of iv used in a nonlinear expression)
6662 using candidate CAND. */
6665 rewrite_use_nonlinear_expr (struct ivopts_data
*data
,
6666 struct iv_use
*use
, struct iv_cand
*cand
)
6671 gimple_stmt_iterator bsi
;
6673 /* An important special case -- if we are asked to express value of
6674 the original iv by itself, just exit; there is no need to
6675 introduce a new computation (that might also need casting the
6676 variable to unsigned and back). */
6677 if (cand
->pos
== IP_ORIGINAL
6678 && cand
->incremented_at
== use
->stmt
)
6680 enum tree_code stmt_code
;
6682 gcc_assert (is_gimple_assign (use
->stmt
));
6683 gcc_assert (gimple_assign_lhs (use
->stmt
) == cand
->var_after
);
6685 /* Check whether we may leave the computation unchanged.
6686 This is the case only if it does not rely on other
6687 computations in the loop -- otherwise, the computation
6688 we rely upon may be removed in remove_unused_ivs,
6689 thus leading to ICE. */
6690 stmt_code
= gimple_assign_rhs_code (use
->stmt
);
6691 if (stmt_code
== PLUS_EXPR
6692 || stmt_code
== MINUS_EXPR
6693 || stmt_code
== POINTER_PLUS_EXPR
)
6695 if (gimple_assign_rhs1 (use
->stmt
) == cand
->var_before
)
6696 op
= gimple_assign_rhs2 (use
->stmt
);
6697 else if (gimple_assign_rhs2 (use
->stmt
) == cand
->var_before
)
6698 op
= gimple_assign_rhs1 (use
->stmt
);
6705 if (op
&& expr_invariant_in_loop_p (data
->current_loop
, op
))
6709 comp
= get_computation (data
->current_loop
, use
, cand
);
6710 gcc_assert (comp
!= NULL_TREE
);
6712 switch (gimple_code (use
->stmt
))
6715 tgt
= PHI_RESULT (use
->stmt
);
6717 /* If we should keep the biv, do not replace it. */
6718 if (name_info (data
, tgt
)->preserve_biv
)
6721 bsi
= gsi_after_labels (gimple_bb (use
->stmt
));
6725 tgt
= gimple_assign_lhs (use
->stmt
);
6726 bsi
= gsi_for_stmt (use
->stmt
);
6733 if (!valid_gimple_rhs_p (comp
)
6734 || (gimple_code (use
->stmt
) != GIMPLE_PHI
6735 /* We can't allow re-allocating the stmt as it might be pointed
6737 && (get_gimple_rhs_num_ops (TREE_CODE (comp
))
6738 >= gimple_num_ops (gsi_stmt (bsi
)))))
6740 comp
= force_gimple_operand_gsi (&bsi
, comp
, true, NULL_TREE
,
6741 true, GSI_SAME_STMT
);
6742 if (POINTER_TYPE_P (TREE_TYPE (tgt
)))
6744 duplicate_ssa_name_ptr_info (comp
, SSA_NAME_PTR_INFO (tgt
));
6745 /* As this isn't a plain copy we have to reset alignment
6747 if (SSA_NAME_PTR_INFO (comp
))
6748 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp
));
6752 if (gimple_code (use
->stmt
) == GIMPLE_PHI
)
6754 ass
= gimple_build_assign (tgt
, comp
);
6755 gsi_insert_before (&bsi
, ass
, GSI_SAME_STMT
);
6757 bsi
= gsi_for_stmt (use
->stmt
);
6758 remove_phi_node (&bsi
, false);
6762 gimple_assign_set_rhs_from_tree (&bsi
, comp
);
6763 use
->stmt
= gsi_stmt (bsi
);
6767 /* Performs a peephole optimization to reorder the iv update statement with
6768 a mem ref to enable instruction combining in later phases. The mem ref uses
6769 the iv value before the update, so the reordering transformation requires
6770 adjustment of the offset. CAND is the selected IV_CAND.
6774 t = MEM_REF (base, iv1, 8, 16); // base, index, stride, offset
6782 directly propagating t over to (1) will introduce overlapping live range
6783 thus increase register pressure. This peephole transform it into:
6787 t = MEM_REF (base, iv2, 8, 8);
6794 adjust_iv_update_pos (struct iv_cand
*cand
, struct iv_use
*use
)
6797 gimple iv_update
, stmt
;
6799 gimple_stmt_iterator gsi
, gsi_iv
;
6801 if (cand
->pos
!= IP_NORMAL
)
6804 var_after
= cand
->var_after
;
6805 iv_update
= SSA_NAME_DEF_STMT (var_after
);
6807 bb
= gimple_bb (iv_update
);
6808 gsi
= gsi_last_nondebug_bb (bb
);
6809 stmt
= gsi_stmt (gsi
);
6811 /* Only handle conditional statement for now. */
6812 if (gimple_code (stmt
) != GIMPLE_COND
)
6815 gsi_prev_nondebug (&gsi
);
6816 stmt
= gsi_stmt (gsi
);
6817 if (stmt
!= iv_update
)
6820 gsi_prev_nondebug (&gsi
);
6821 if (gsi_end_p (gsi
))
6824 stmt
= gsi_stmt (gsi
);
6825 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
6828 if (stmt
!= use
->stmt
)
6831 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
6834 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6836 fprintf (dump_file
, "Reordering \n");
6837 print_gimple_stmt (dump_file
, iv_update
, 0, 0);
6838 print_gimple_stmt (dump_file
, use
->stmt
, 0, 0);
6839 fprintf (dump_file
, "\n");
6842 gsi
= gsi_for_stmt (use
->stmt
);
6843 gsi_iv
= gsi_for_stmt (iv_update
);
6844 gsi_move_before (&gsi_iv
, &gsi
);
6846 cand
->pos
= IP_BEFORE_USE
;
6847 cand
->incremented_at
= use
->stmt
;
6850 /* Rewrites USE (address that is an iv) using candidate CAND. */
6853 rewrite_use_address_1 (struct ivopts_data
*data
,
6854 struct iv_use
*use
, struct iv_cand
*cand
)
6857 gimple_stmt_iterator bsi
= gsi_for_stmt (use
->stmt
);
6858 tree base_hint
= NULL_TREE
;
6862 adjust_iv_update_pos (cand
, use
);
6863 ok
= get_computation_aff (data
->current_loop
, use
, cand
, use
->stmt
, &aff
);
6865 unshare_aff_combination (&aff
);
6867 /* To avoid undefined overflow problems, all IV candidates use unsigned
6868 integer types. The drawback is that this makes it impossible for
6869 create_mem_ref to distinguish an IV that is based on a memory object
6870 from one that represents simply an offset.
6872 To work around this problem, we pass a hint to create_mem_ref that
6873 indicates which variable (if any) in aff is an IV based on a memory
6874 object. Note that we only consider the candidate. If this is not
6875 based on an object, the base of the reference is in some subexpression
6876 of the use -- but these will use pointer types, so they are recognized
6877 by the create_mem_ref heuristics anyway. */
6878 if (cand
->iv
->base_object
)
6879 base_hint
= var_at_stmt (data
->current_loop
, cand
, use
->stmt
);
6881 iv
= var_at_stmt (data
->current_loop
, cand
, use
->stmt
);
6882 ref
= create_mem_ref (&bsi
, TREE_TYPE (*use
->op_p
), &aff
,
6883 reference_alias_ptr_type (*use
->op_p
),
6884 iv
, base_hint
, data
->speed
);
6885 copy_ref_info (ref
, *use
->op_p
);
6889 /* Rewrites USE (address that is an iv) using candidate CAND. If it's the
6890 first use of a group, rewrites sub uses in the group too. */
6893 rewrite_use_address (struct ivopts_data
*data
,
6894 struct iv_use
*use
, struct iv_cand
*cand
)
6896 struct iv_use
*next
;
6898 gcc_assert (use
->sub_id
== 0);
6899 rewrite_use_address_1 (data
, use
, cand
);
6900 update_stmt (use
->stmt
);
6902 for (next
= use
->next
; next
!= NULL
; next
= next
->next
)
6904 rewrite_use_address_1 (data
, next
, cand
);
6905 update_stmt (next
->stmt
);
6911 /* Rewrites USE (the condition such that one of the arguments is an iv) using
6915 rewrite_use_compare (struct ivopts_data
*data
,
6916 struct iv_use
*use
, struct iv_cand
*cand
)
6918 tree comp
, *var_p
, op
, bound
;
6919 gimple_stmt_iterator bsi
= gsi_for_stmt (use
->stmt
);
6920 enum tree_code compare
;
6921 struct cost_pair
*cp
= get_use_iv_cost (data
, use
, cand
);
6927 tree var
= var_at_stmt (data
->current_loop
, cand
, use
->stmt
);
6928 tree var_type
= TREE_TYPE (var
);
6931 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6933 fprintf (dump_file
, "Replacing exit test: ");
6934 print_gimple_stmt (dump_file
, use
->stmt
, 0, TDF_SLIM
);
6937 bound
= unshare_expr (fold_convert (var_type
, bound
));
6938 op
= force_gimple_operand (bound
, &stmts
, true, NULL_TREE
);
6940 gsi_insert_seq_on_edge_immediate (
6941 loop_preheader_edge (data
->current_loop
),
6944 gcond
*cond_stmt
= as_a
<gcond
*> (use
->stmt
);
6945 gimple_cond_set_lhs (cond_stmt
, var
);
6946 gimple_cond_set_code (cond_stmt
, compare
);
6947 gimple_cond_set_rhs (cond_stmt
, op
);
6951 /* The induction variable elimination failed; just express the original
6953 comp
= get_computation (data
->current_loop
, use
, cand
);
6954 gcc_assert (comp
!= NULL_TREE
);
6956 ok
= extract_cond_operands (data
, use
->stmt
, &var_p
, NULL
, NULL
, NULL
);
6959 *var_p
= force_gimple_operand_gsi (&bsi
, comp
, true, SSA_NAME_VAR (*var_p
),
6960 true, GSI_SAME_STMT
);
6963 /* Rewrites USE using candidate CAND. */
6966 rewrite_use (struct ivopts_data
*data
, struct iv_use
*use
, struct iv_cand
*cand
)
6970 case USE_NONLINEAR_EXPR
:
6971 rewrite_use_nonlinear_expr (data
, use
, cand
);
6975 rewrite_use_address (data
, use
, cand
);
6979 rewrite_use_compare (data
, use
, cand
);
6986 update_stmt (use
->stmt
);
6989 /* Rewrite the uses using the selected induction variables. */
6992 rewrite_uses (struct ivopts_data
*data
)
6995 struct iv_cand
*cand
;
6998 for (i
= 0; i
< n_iv_uses (data
); i
++)
7000 use
= iv_use (data
, i
);
7001 cand
= use
->selected
;
7004 rewrite_use (data
, use
, cand
);
7008 /* Removes the ivs that are not used after rewriting. */
7011 remove_unused_ivs (struct ivopts_data
*data
)
7015 bitmap toremove
= BITMAP_ALLOC (NULL
);
7017 /* Figure out an order in which to release SSA DEFs so that we don't
7018 release something that we'd have to propagate into a debug stmt
7020 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, j
, bi
)
7022 struct version_info
*info
;
7024 info
= ver_info (data
, j
);
7026 && !integer_zerop (info
->iv
->step
)
7028 && !info
->iv
->have_use_for
7029 && !info
->preserve_biv
)
7031 bitmap_set_bit (toremove
, SSA_NAME_VERSION (info
->iv
->ssa_name
));
7033 tree def
= info
->iv
->ssa_name
;
7035 if (MAY_HAVE_DEBUG_STMTS
&& SSA_NAME_DEF_STMT (def
))
7037 imm_use_iterator imm_iter
;
7038 use_operand_p use_p
;
7042 FOR_EACH_IMM_USE_STMT (stmt
, imm_iter
, def
)
7044 if (!gimple_debug_bind_p (stmt
))
7047 /* We just want to determine whether to do nothing
7048 (count == 0), to substitute the computed
7049 expression into a single use of the SSA DEF by
7050 itself (count == 1), or to use a debug temp
7051 because the SSA DEF is used multiple times or as
7052 part of a larger expression (count > 1). */
7054 if (gimple_debug_bind_get_value (stmt
) != def
)
7058 BREAK_FROM_IMM_USE_STMT (imm_iter
);
7064 struct iv_use dummy_use
;
7065 struct iv_cand
*best_cand
= NULL
, *cand
;
7066 unsigned i
, best_pref
= 0, cand_pref
;
7068 memset (&dummy_use
, 0, sizeof (dummy_use
));
7069 dummy_use
.iv
= info
->iv
;
7070 for (i
= 0; i
< n_iv_uses (data
) && i
< 64; i
++)
7072 cand
= iv_use (data
, i
)->selected
;
7073 if (cand
== best_cand
)
7075 cand_pref
= operand_equal_p (cand
->iv
->step
,
7079 += TYPE_MODE (TREE_TYPE (cand
->iv
->base
))
7080 == TYPE_MODE (TREE_TYPE (info
->iv
->base
))
7083 += TREE_CODE (cand
->iv
->base
) == INTEGER_CST
7085 if (best_cand
== NULL
|| best_pref
< cand_pref
)
7088 best_pref
= cand_pref
;
7095 tree comp
= get_computation_at (data
->current_loop
,
7096 &dummy_use
, best_cand
,
7097 SSA_NAME_DEF_STMT (def
));
7103 tree vexpr
= make_node (DEBUG_EXPR_DECL
);
7104 DECL_ARTIFICIAL (vexpr
) = 1;
7105 TREE_TYPE (vexpr
) = TREE_TYPE (comp
);
7106 if (SSA_NAME_VAR (def
))
7107 DECL_MODE (vexpr
) = DECL_MODE (SSA_NAME_VAR (def
));
7109 DECL_MODE (vexpr
) = TYPE_MODE (TREE_TYPE (vexpr
));
7111 = gimple_build_debug_bind (vexpr
, comp
, NULL
);
7112 gimple_stmt_iterator gsi
;
7114 if (gimple_code (SSA_NAME_DEF_STMT (def
)) == GIMPLE_PHI
)
7115 gsi
= gsi_after_labels (gimple_bb
7116 (SSA_NAME_DEF_STMT (def
)));
7118 gsi
= gsi_for_stmt (SSA_NAME_DEF_STMT (def
));
7120 gsi_insert_before (&gsi
, def_temp
, GSI_SAME_STMT
);
7124 FOR_EACH_IMM_USE_STMT (stmt
, imm_iter
, def
)
7126 if (!gimple_debug_bind_p (stmt
))
7129 FOR_EACH_IMM_USE_ON_STMT (use_p
, imm_iter
)
7130 SET_USE (use_p
, comp
);
7138 release_defs_bitset (toremove
);
7140 BITMAP_FREE (toremove
);
7143 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
7144 for hash_map::traverse. */
7147 free_tree_niter_desc (edge
const &, tree_niter_desc
*const &value
, void *)
7153 /* Frees data allocated by the optimization of a single loop. */
7156 free_loop_data (struct ivopts_data
*data
)
7164 data
->niters
->traverse
<void *, free_tree_niter_desc
> (NULL
);
7165 delete data
->niters
;
7166 data
->niters
= NULL
;
7169 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
7171 struct version_info
*info
;
7173 info
= ver_info (data
, i
);
7175 info
->has_nonlin_use
= false;
7176 info
->preserve_biv
= false;
7179 bitmap_clear (data
->relevant
);
7180 bitmap_clear (data
->important_candidates
);
7182 for (i
= 0; i
< n_iv_uses (data
); i
++)
7184 struct iv_use
*use
= iv_use (data
, i
);
7185 struct iv_use
*pre
= use
, *sub
= use
->next
;
7189 gcc_assert (sub
->related_cands
== NULL
);
7190 gcc_assert (sub
->n_map_members
== 0 && sub
->cost_map
== NULL
);
7197 BITMAP_FREE (use
->related_cands
);
7198 for (j
= 0; j
< use
->n_map_members
; j
++)
7199 if (use
->cost_map
[j
].depends_on
)
7200 BITMAP_FREE (use
->cost_map
[j
].depends_on
);
7201 free (use
->cost_map
);
7204 data
->iv_uses
.truncate (0);
7206 for (i
= 0; i
< n_iv_cands (data
); i
++)
7208 struct iv_cand
*cand
= iv_cand (data
, i
);
7210 if (cand
->depends_on
)
7211 BITMAP_FREE (cand
->depends_on
);
7214 data
->iv_candidates
.truncate (0);
7216 if (data
->version_info_size
< num_ssa_names
)
7218 data
->version_info_size
= 2 * num_ssa_names
;
7219 free (data
->version_info
);
7220 data
->version_info
= XCNEWVEC (struct version_info
, data
->version_info_size
);
7223 data
->max_inv_id
= 0;
7225 FOR_EACH_VEC_ELT (decl_rtl_to_reset
, i
, obj
)
7226 SET_DECL_RTL (obj
, NULL_RTX
);
7228 decl_rtl_to_reset
.truncate (0);
7230 data
->inv_expr_tab
->empty ();
7231 data
->inv_expr_id
= 0;
7234 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
7238 tree_ssa_iv_optimize_finalize (struct ivopts_data
*data
)
7240 free_loop_data (data
);
7241 free (data
->version_info
);
7242 BITMAP_FREE (data
->relevant
);
7243 BITMAP_FREE (data
->important_candidates
);
7245 decl_rtl_to_reset
.release ();
7246 data
->iv_uses
.release ();
7247 data
->iv_candidates
.release ();
7248 delete data
->inv_expr_tab
;
7249 data
->inv_expr_tab
= NULL
;
7250 free_affine_expand_cache (&data
->name_expansion_cache
);
7251 obstack_free (&data
->iv_obstack
, NULL
);
7254 /* Returns true if the loop body BODY includes any function calls. */
7257 loop_body_includes_call (basic_block
*body
, unsigned num_nodes
)
7259 gimple_stmt_iterator gsi
;
7262 for (i
= 0; i
< num_nodes
; i
++)
7263 for (gsi
= gsi_start_bb (body
[i
]); !gsi_end_p (gsi
); gsi_next (&gsi
))
7265 gimple stmt
= gsi_stmt (gsi
);
7266 if (is_gimple_call (stmt
)
7267 && !is_inexpensive_builtin (gimple_call_fndecl (stmt
)))
7273 /* Optimizes the LOOP. Returns true if anything changed. */
7276 tree_ssa_iv_optimize_loop (struct ivopts_data
*data
, struct loop
*loop
)
7278 bool changed
= false;
7279 struct iv_ca
*iv_ca
;
7280 edge exit
= single_dom_exit (loop
);
7283 gcc_assert (!data
->niters
);
7284 data
->current_loop
= loop
;
7285 data
->loop_loc
= find_loop_location (loop
);
7286 data
->speed
= optimize_loop_for_speed_p (loop
);
7288 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
7290 fprintf (dump_file
, "Processing loop %d", loop
->num
);
7291 if (data
->loop_loc
!= UNKNOWN_LOCATION
)
7292 fprintf (dump_file
, " at %s:%d", LOCATION_FILE (data
->loop_loc
),
7293 LOCATION_LINE (data
->loop_loc
));
7294 fprintf (dump_file
, "\n");
7298 fprintf (dump_file
, " single exit %d -> %d, exit condition ",
7299 exit
->src
->index
, exit
->dest
->index
);
7300 print_gimple_stmt (dump_file
, last_stmt (exit
->src
), 0, TDF_SLIM
);
7301 fprintf (dump_file
, "\n");
7304 fprintf (dump_file
, "\n");
7307 body
= get_loop_body (loop
);
7308 data
->body_includes_call
= loop_body_includes_call (body
, loop
->num_nodes
);
7309 renumber_gimple_stmt_uids_in_blocks (body
, loop
->num_nodes
);
7312 data
->loop_single_exit_p
= exit
!= NULL
&& loop_only_exit_p (loop
, exit
);
7314 /* For each ssa name determines whether it behaves as an induction variable
7316 if (!find_induction_variables (data
))
7319 /* Finds interesting uses (item 1). */
7320 find_interesting_uses (data
);
7321 group_address_uses (data
);
7322 if (n_iv_uses (data
) > MAX_CONSIDERED_USES
)
7325 /* Finds candidates for the induction variables (item 2). */
7326 find_iv_candidates (data
);
7328 /* Calculates the costs (item 3, part 1). */
7329 determine_iv_costs (data
);
7330 determine_use_iv_costs (data
);
7331 determine_set_costs (data
);
7333 /* Find the optimal set of induction variables (item 3, part 2). */
7334 iv_ca
= find_optimal_iv_set (data
);
7339 /* Create the new induction variables (item 4, part 1). */
7340 create_new_ivs (data
, iv_ca
);
7341 iv_ca_free (&iv_ca
);
7343 /* Rewrite the uses (item 4, part 2). */
7344 rewrite_uses (data
);
7346 /* Remove the ivs that are unused after rewriting. */
7347 remove_unused_ivs (data
);
7349 /* We have changed the structure of induction variables; it might happen
7350 that definitions in the scev database refer to some of them that were
7355 free_loop_data (data
);
7360 /* Main entry point. Optimizes induction variables in loops. */
7363 tree_ssa_iv_optimize (void)
7366 struct ivopts_data data
;
7368 tree_ssa_iv_optimize_init (&data
);
7370 /* Optimize the loops starting with the innermost ones. */
7371 FOR_EACH_LOOP (loop
, LI_FROM_INNERMOST
)
7373 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
7374 flow_loop_dump (loop
, dump_file
, NULL
, 1);
7376 tree_ssa_iv_optimize_loop (&data
, loop
);
7379 tree_ssa_iv_optimize_finalize (&data
);