1 /* Induction variable optimizations.
2 Copyright (C) 2003-2019 Free Software Foundation, Inc.
4 This file is part of GCC.
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
20 /* This pass tries to find the optimal set of induction variables for the loop.
21 It optimizes just the basic linear induction variables (although adding
22 support for other types should not be too hard). It includes the
23 optimizations commonly known as strength reduction, induction variable
24 coalescing and induction variable elimination. It does it in the
27 1) The interesting uses of induction variables are found. This includes
29 -- uses of induction variables in non-linear expressions
30 -- addresses of arrays
31 -- comparisons of induction variables
33 Note the interesting uses are categorized and handled in group.
34 Generally, address type uses are grouped together if their iv bases
35 are different in constant offset.
37 2) Candidates for the induction variables are found. This includes
39 -- old induction variables
40 -- the variables defined by expressions derived from the "interesting
43 3) The optimal (w.r. to a cost function) set of variables is chosen. The
44 cost function assigns a cost to sets of induction variables and consists
47 -- The group/use costs. Each of the interesting groups/uses chooses
48 the best induction variable in the set and adds its cost to the sum.
49 The cost reflects the time spent on modifying the induction variables
50 value to be usable for the given purpose (adding base and offset for
52 -- The variable costs. Each of the variables has a cost assigned that
53 reflects the costs associated with incrementing the value of the
54 variable. The original variables are somewhat preferred.
55 -- The set cost. Depending on the size of the set, extra cost may be
56 added to reflect register pressure.
58 All the costs are defined in a machine-specific way, using the target
59 hooks and machine descriptions to determine them.
61 4) The trees are transformed to use the new variables, the dead code is
64 All of this is done loop by loop. Doing it globally is theoretically
65 possible, it might give a better performance and it might enable us
66 to decide costs more precisely, but getting all the interactions right
67 would be complicated. */
71 #include "coretypes.h"
77 #include "tree-pass.h"
82 #include "insn-config.h"
86 #include "gimple-pretty-print.h"
88 #include "fold-const.h"
89 #include "stor-layout.h"
92 #include "gimple-iterator.h"
93 #include "gimplify-me.h"
95 #include "tree-ssa-loop-ivopts.h"
96 #include "tree-ssa-loop-manip.h"
97 #include "tree-ssa-loop-niter.h"
98 #include "tree-ssa-loop.h"
101 #include "tree-dfa.h"
102 #include "tree-ssa.h"
104 #include "tree-scalar-evolution.h"
106 #include "tree-affine.h"
107 #include "tree-ssa-propagate.h"
108 #include "tree-ssa-address.h"
109 #include "builtins.h"
110 #include "tree-vectorizer.h"
112 /* FIXME: Expressions are expanded to RTL in this pass to determine the
113 cost of different addressing modes. This should be moved to a TBD
114 interface between the GIMPLE and RTL worlds. */
116 /* The infinite cost. */
117 #define INFTY 10000000
119 /* Returns the expected number of loop iterations for LOOP.
120 The average trip count is computed from profile data if it
123 static inline HOST_WIDE_INT
124 avg_loop_niter (struct loop
*loop
)
126 HOST_WIDE_INT niter
= estimated_stmt_executions_int (loop
);
129 niter
= likely_max_stmt_executions_int (loop
);
131 if (niter
== -1 || niter
> PARAM_VALUE (PARAM_AVG_LOOP_NITER
))
132 return PARAM_VALUE (PARAM_AVG_LOOP_NITER
);
140 /* Representation of the induction variable. */
143 tree base
; /* Initial value of the iv. */
144 tree base_object
; /* A memory object to that the induction variable points. */
145 tree step
; /* Step of the iv (constant only). */
146 tree ssa_name
; /* The ssa name with the value. */
147 struct iv_use
*nonlin_use
; /* The identifier in the use if it is the case. */
148 bool biv_p
; /* Is it a biv? */
149 bool no_overflow
; /* True if the iv doesn't overflow. */
150 bool have_address_use
;/* For biv, indicate if it's used in any address
154 /* Per-ssa version information (induction variable descriptions, etc.). */
157 tree name
; /* The ssa name. */
158 struct iv
*iv
; /* Induction variable description. */
159 bool has_nonlin_use
; /* For a loop-level invariant, whether it is used in
160 an expression that is not an induction variable. */
161 bool preserve_biv
; /* For the original biv, whether to preserve it. */
162 unsigned inv_id
; /* Id of an invariant. */
168 USE_NONLINEAR_EXPR
, /* Use in a nonlinear expression. */
169 USE_REF_ADDRESS
, /* Use is an address for an explicit memory
171 USE_PTR_ADDRESS
, /* Use is a pointer argument to a function in
172 cases where the expansion of the function
173 will turn the argument into a normal address. */
174 USE_COMPARE
/* Use is a compare. */
177 /* Cost of a computation. */
180 comp_cost (): cost (0), complexity (0), scratch (0)
183 comp_cost (int cost
, unsigned complexity
, int scratch
= 0)
184 : cost (cost
), complexity (complexity
), scratch (scratch
)
187 /* Returns true if COST is infinite. */
188 bool infinite_cost_p ();
190 /* Adds costs COST1 and COST2. */
191 friend comp_cost
operator+ (comp_cost cost1
, comp_cost cost2
);
193 /* Adds COST to the comp_cost. */
194 comp_cost
operator+= (comp_cost cost
);
196 /* Adds constant C to this comp_cost. */
197 comp_cost
operator+= (HOST_WIDE_INT c
);
199 /* Subtracts constant C to this comp_cost. */
200 comp_cost
operator-= (HOST_WIDE_INT c
);
202 /* Divide the comp_cost by constant C. */
203 comp_cost
operator/= (HOST_WIDE_INT c
);
205 /* Multiply the comp_cost by constant C. */
206 comp_cost
operator*= (HOST_WIDE_INT c
);
208 /* Subtracts costs COST1 and COST2. */
209 friend comp_cost
operator- (comp_cost cost1
, comp_cost cost2
);
211 /* Subtracts COST from this comp_cost. */
212 comp_cost
operator-= (comp_cost cost
);
214 /* Returns true if COST1 is smaller than COST2. */
215 friend bool operator< (comp_cost cost1
, comp_cost cost2
);
217 /* Returns true if COST1 and COST2 are equal. */
218 friend bool operator== (comp_cost cost1
, comp_cost cost2
);
220 /* Returns true if COST1 is smaller or equal than COST2. */
221 friend bool operator<= (comp_cost cost1
, comp_cost cost2
);
223 int cost
; /* The runtime cost. */
224 unsigned complexity
; /* The estimate of the complexity of the code for
225 the computation (in no concrete units --
226 complexity field should be larger for more
227 complex expressions and addressing modes). */
228 int scratch
; /* Scratch used during cost computation. */
231 static const comp_cost no_cost
;
232 static const comp_cost
infinite_cost (INFTY
, INFTY
, INFTY
);
235 comp_cost::infinite_cost_p ()
237 return cost
== INFTY
;
241 operator+ (comp_cost cost1
, comp_cost cost2
)
243 if (cost1
.infinite_cost_p () || cost2
.infinite_cost_p ())
244 return infinite_cost
;
246 cost1
.cost
+= cost2
.cost
;
247 cost1
.complexity
+= cost2
.complexity
;
253 operator- (comp_cost cost1
, comp_cost cost2
)
255 if (cost1
.infinite_cost_p ())
256 return infinite_cost
;
258 gcc_assert (!cost2
.infinite_cost_p ());
260 cost1
.cost
-= cost2
.cost
;
261 cost1
.complexity
-= cost2
.complexity
;
267 comp_cost::operator+= (comp_cost cost
)
269 *this = *this + cost
;
274 comp_cost::operator+= (HOST_WIDE_INT c
)
276 if (infinite_cost_p ())
285 comp_cost::operator-= (HOST_WIDE_INT c
)
287 if (infinite_cost_p ())
296 comp_cost::operator/= (HOST_WIDE_INT c
)
298 if (infinite_cost_p ())
307 comp_cost::operator*= (HOST_WIDE_INT c
)
309 if (infinite_cost_p ())
318 comp_cost::operator-= (comp_cost cost
)
320 *this = *this - cost
;
325 operator< (comp_cost cost1
, comp_cost cost2
)
327 if (cost1
.cost
== cost2
.cost
)
328 return cost1
.complexity
< cost2
.complexity
;
330 return cost1
.cost
< cost2
.cost
;
334 operator== (comp_cost cost1
, comp_cost cost2
)
336 return cost1
.cost
== cost2
.cost
337 && cost1
.complexity
== cost2
.complexity
;
341 operator<= (comp_cost cost1
, comp_cost cost2
)
343 return cost1
< cost2
|| cost1
== cost2
;
346 struct iv_inv_expr_ent
;
348 /* The candidate - cost pair. */
351 struct iv_cand
*cand
; /* The candidate. */
352 comp_cost cost
; /* The cost. */
353 enum tree_code comp
; /* For iv elimination, the comparison. */
354 bitmap inv_vars
; /* The list of invariant ssa_vars that have to be
355 preserved when representing iv_use with iv_cand. */
356 bitmap inv_exprs
; /* The list of newly created invariant expressions
357 when representing iv_use with iv_cand. */
358 tree value
; /* For final value elimination, the expression for
359 the final value of the iv. For iv elimination,
360 the new bound to compare with. */
366 unsigned id
; /* The id of the use. */
367 unsigned group_id
; /* The group id the use belongs to. */
368 enum use_type type
; /* Type of the use. */
369 tree mem_type
; /* The memory type to use when testing whether an
370 address is legitimate, and what the address's
372 struct iv
*iv
; /* The induction variable it is based on. */
373 gimple
*stmt
; /* Statement in that it occurs. */
374 tree
*op_p
; /* The place where it occurs. */
376 tree addr_base
; /* Base address with const offset stripped. */
377 poly_uint64_pod addr_offset
;
378 /* Const offset stripped from base address. */
384 /* The id of the group. */
386 /* Uses of the group are of the same type. */
388 /* The set of "related" IV candidates, plus the important ones. */
389 bitmap related_cands
;
390 /* Number of IV candidates in the cost_map. */
391 unsigned n_map_members
;
392 /* The costs wrto the iv candidates. */
393 struct cost_pair
*cost_map
;
394 /* The selected candidate for the group. */
395 struct iv_cand
*selected
;
396 /* Uses in the group. */
397 vec
<struct iv_use
*> vuses
;
400 /* The position where the iv is computed. */
403 IP_NORMAL
, /* At the end, just before the exit condition. */
404 IP_END
, /* At the end of the latch block. */
405 IP_BEFORE_USE
, /* Immediately before a specific use. */
406 IP_AFTER_USE
, /* Immediately after a specific use. */
407 IP_ORIGINAL
/* The original biv. */
410 /* The induction variable candidate. */
413 unsigned id
; /* The number of the candidate. */
414 bool important
; /* Whether this is an "important" candidate, i.e. such
415 that it should be considered by all uses. */
416 ENUM_BITFIELD(iv_position
) pos
: 8; /* Where it is computed. */
417 gimple
*incremented_at
;/* For original biv, the statement where it is
419 tree var_before
; /* The variable used for it before increment. */
420 tree var_after
; /* The variable used for it after increment. */
421 struct iv
*iv
; /* The value of the candidate. NULL for
422 "pseudocandidate" used to indicate the possibility
423 to replace the final value of an iv by direct
424 computation of the value. */
425 unsigned cost
; /* Cost of the candidate. */
426 unsigned cost_step
; /* Cost of the candidate's increment operation. */
427 struct iv_use
*ainc_use
; /* For IP_{BEFORE,AFTER}_USE candidates, the place
428 where it is incremented. */
429 bitmap inv_vars
; /* The list of invariant ssa_vars used in step of the
431 bitmap inv_exprs
; /* If step is more complicated than a single ssa_var,
432 hanlde it as a new invariant expression which will
433 be hoisted out of loop. */
434 struct iv
*orig_iv
; /* The original iv if this cand is added from biv with
438 /* Hashtable entry for common candidate derived from iv uses. */
439 struct iv_common_cand
443 /* IV uses from which this common candidate is derived. */
444 auto_vec
<struct iv_use
*> uses
;
448 /* Hashtable helpers. */
450 struct iv_common_cand_hasher
: delete_ptr_hash
<iv_common_cand
>
452 static inline hashval_t
hash (const iv_common_cand
*);
453 static inline bool equal (const iv_common_cand
*, const iv_common_cand
*);
456 /* Hash function for possible common candidates. */
459 iv_common_cand_hasher::hash (const iv_common_cand
*ccand
)
464 /* Hash table equality function for common candidates. */
467 iv_common_cand_hasher::equal (const iv_common_cand
*ccand1
,
468 const iv_common_cand
*ccand2
)
470 return (ccand1
->hash
== ccand2
->hash
471 && operand_equal_p (ccand1
->base
, ccand2
->base
, 0)
472 && operand_equal_p (ccand1
->step
, ccand2
->step
, 0)
473 && (TYPE_PRECISION (TREE_TYPE (ccand1
->base
))
474 == TYPE_PRECISION (TREE_TYPE (ccand2
->base
))));
477 /* Loop invariant expression hashtable entry. */
479 struct iv_inv_expr_ent
481 /* Tree expression of the entry. */
483 /* Unique indentifier. */
489 /* Sort iv_inv_expr_ent pair A and B by id field. */
492 sort_iv_inv_expr_ent (const void *a
, const void *b
)
494 const iv_inv_expr_ent
* const *e1
= (const iv_inv_expr_ent
* const *) (a
);
495 const iv_inv_expr_ent
* const *e2
= (const iv_inv_expr_ent
* const *) (b
);
497 unsigned id1
= (*e1
)->id
;
498 unsigned id2
= (*e2
)->id
;
508 /* Hashtable helpers. */
510 struct iv_inv_expr_hasher
: free_ptr_hash
<iv_inv_expr_ent
>
512 static inline hashval_t
hash (const iv_inv_expr_ent
*);
513 static inline bool equal (const iv_inv_expr_ent
*, const iv_inv_expr_ent
*);
516 /* Return true if uses of type TYPE represent some form of address. */
519 address_p (use_type type
)
521 return type
== USE_REF_ADDRESS
|| type
== USE_PTR_ADDRESS
;
524 /* Hash function for loop invariant expressions. */
527 iv_inv_expr_hasher::hash (const iv_inv_expr_ent
*expr
)
532 /* Hash table equality function for expressions. */
535 iv_inv_expr_hasher::equal (const iv_inv_expr_ent
*expr1
,
536 const iv_inv_expr_ent
*expr2
)
538 return expr1
->hash
== expr2
->hash
539 && operand_equal_p (expr1
->expr
, expr2
->expr
, 0);
544 /* The currently optimized loop. */
545 struct loop
*current_loop
;
548 /* Numbers of iterations for all exits of the current loop. */
549 hash_map
<edge
, tree_niter_desc
*> *niters
;
551 /* Number of registers used in it. */
554 /* The size of version_info array allocated. */
555 unsigned version_info_size
;
557 /* The array of information for the ssa names. */
558 struct version_info
*version_info
;
560 /* The hashtable of loop invariant expressions created
562 hash_table
<iv_inv_expr_hasher
> *inv_expr_tab
;
564 /* The bitmap of indices in version_info whose value was changed. */
567 /* The uses of induction variables. */
568 vec
<iv_group
*> vgroups
;
570 /* The candidates. */
571 vec
<iv_cand
*> vcands
;
573 /* A bitmap of important candidates. */
574 bitmap important_candidates
;
576 /* Cache used by tree_to_aff_combination_expand. */
577 hash_map
<tree
, name_expansion
*> *name_expansion_cache
;
579 /* The hashtable of common candidates derived from iv uses. */
580 hash_table
<iv_common_cand_hasher
> *iv_common_cand_tab
;
582 /* The common candidates. */
583 vec
<iv_common_cand
*> iv_common_cands
;
585 /* The maximum invariant variable id. */
586 unsigned max_inv_var_id
;
588 /* The maximum invariant expression id. */
589 unsigned max_inv_expr_id
;
591 /* Number of no_overflow BIVs which are not used in memory address. */
592 unsigned bivs_not_used_in_addr
;
594 /* Obstack for iv structure. */
595 struct obstack iv_obstack
;
597 /* Whether to consider just related and important candidates when replacing a
599 bool consider_all_candidates
;
601 /* Are we optimizing for speed? */
604 /* Whether the loop body includes any function calls. */
605 bool body_includes_call
;
607 /* Whether the loop body can only be exited via single exit. */
608 bool loop_single_exit_p
;
611 /* An assignment of iv candidates to uses. */
615 /* The number of uses covered by the assignment. */
618 /* Number of uses that cannot be expressed by the candidates in the set. */
621 /* Candidate assigned to a use, together with the related costs. */
622 struct cost_pair
**cand_for_group
;
624 /* Number of times each candidate is used. */
625 unsigned *n_cand_uses
;
627 /* The candidates used. */
630 /* The number of candidates in the set. */
633 /* The number of invariants needed, including both invariant variants and
634 invariant expressions. */
637 /* Total cost of expressing uses. */
638 comp_cost cand_use_cost
;
640 /* Total cost of candidates. */
643 /* Number of times each invariant variable is used. */
644 unsigned *n_inv_var_uses
;
646 /* Number of times each invariant expression is used. */
647 unsigned *n_inv_expr_uses
;
649 /* Total cost of the assignment. */
653 /* Difference of two iv candidate assignments. */
658 struct iv_group
*group
;
660 /* An old assignment (for rollback purposes). */
661 struct cost_pair
*old_cp
;
663 /* A new assignment. */
664 struct cost_pair
*new_cp
;
666 /* Next change in the list. */
667 struct iv_ca_delta
*next
;
670 /* Bound on number of candidates below that all candidates are considered. */
672 #define CONSIDER_ALL_CANDIDATES_BOUND \
673 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
675 /* If there are more iv occurrences, we just give up (it is quite unlikely that
676 optimizing such a loop would help, and it would take ages). */
678 #define MAX_CONSIDERED_GROUPS \
679 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
681 /* If there are at most this number of ivs in the set, try removing unnecessary
682 ivs from the set always. */
684 #define ALWAYS_PRUNE_CAND_SET_BOUND \
685 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
687 /* The list of trees for that the decl_rtl field must be reset is stored
690 static vec
<tree
> decl_rtl_to_reset
;
692 static comp_cost
force_expr_to_var_cost (tree
, bool);
694 /* The single loop exit if it dominates the latch, NULL otherwise. */
697 single_dom_exit (struct loop
*loop
)
699 edge exit
= single_exit (loop
);
704 if (!just_once_each_iteration_p (loop
, exit
->src
))
710 /* Dumps information about the induction variable IV to FILE. Don't dump
711 variable's name if DUMP_NAME is FALSE. The information is dumped with
712 preceding spaces indicated by INDENT_LEVEL. */
715 dump_iv (FILE *file
, struct iv
*iv
, bool dump_name
, unsigned indent_level
)
718 const char spaces
[9] = {' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '\0'};
720 if (indent_level
> 4)
722 p
= spaces
+ 8 - (indent_level
<< 1);
724 fprintf (file
, "%sIV struct:\n", p
);
725 if (iv
->ssa_name
&& dump_name
)
727 fprintf (file
, "%s SSA_NAME:\t", p
);
728 print_generic_expr (file
, iv
->ssa_name
, TDF_SLIM
);
729 fprintf (file
, "\n");
732 fprintf (file
, "%s Type:\t", p
);
733 print_generic_expr (file
, TREE_TYPE (iv
->base
), TDF_SLIM
);
734 fprintf (file
, "\n");
736 fprintf (file
, "%s Base:\t", p
);
737 print_generic_expr (file
, iv
->base
, TDF_SLIM
);
738 fprintf (file
, "\n");
740 fprintf (file
, "%s Step:\t", p
);
741 print_generic_expr (file
, iv
->step
, TDF_SLIM
);
742 fprintf (file
, "\n");
746 fprintf (file
, "%s Object:\t", p
);
747 print_generic_expr (file
, iv
->base_object
, TDF_SLIM
);
748 fprintf (file
, "\n");
751 fprintf (file
, "%s Biv:\t%c\n", p
, iv
->biv_p
? 'Y' : 'N');
753 fprintf (file
, "%s Overflowness wrto loop niter:\t%s\n",
754 p
, iv
->no_overflow
? "No-overflow" : "Overflow");
757 /* Dumps information about the USE to FILE. */
760 dump_use (FILE *file
, struct iv_use
*use
)
762 fprintf (file
, " Use %d.%d:\n", use
->group_id
, use
->id
);
763 fprintf (file
, " At stmt:\t");
764 print_gimple_stmt (file
, use
->stmt
, 0);
765 fprintf (file
, " At pos:\t");
767 print_generic_expr (file
, *use
->op_p
, TDF_SLIM
);
768 fprintf (file
, "\n");
769 dump_iv (file
, use
->iv
, false, 2);
772 /* Dumps information about the uses to FILE. */
775 dump_groups (FILE *file
, struct ivopts_data
*data
)
778 struct iv_group
*group
;
780 for (i
= 0; i
< data
->vgroups
.length (); i
++)
782 group
= data
->vgroups
[i
];
783 fprintf (file
, "Group %d:\n", group
->id
);
784 if (group
->type
== USE_NONLINEAR_EXPR
)
785 fprintf (file
, " Type:\tGENERIC\n");
786 else if (group
->type
== USE_REF_ADDRESS
)
787 fprintf (file
, " Type:\tREFERENCE ADDRESS\n");
788 else if (group
->type
== USE_PTR_ADDRESS
)
789 fprintf (file
, " Type:\tPOINTER ARGUMENT ADDRESS\n");
792 gcc_assert (group
->type
== USE_COMPARE
);
793 fprintf (file
, " Type:\tCOMPARE\n");
795 for (j
= 0; j
< group
->vuses
.length (); j
++)
796 dump_use (file
, group
->vuses
[j
]);
800 /* Dumps information about induction variable candidate CAND to FILE. */
803 dump_cand (FILE *file
, struct iv_cand
*cand
)
805 struct iv
*iv
= cand
->iv
;
807 fprintf (file
, "Candidate %d:\n", cand
->id
);
810 fprintf (file
, " Depend on inv.vars: ");
811 dump_bitmap (file
, cand
->inv_vars
);
815 fprintf (file
, " Depend on inv.exprs: ");
816 dump_bitmap (file
, cand
->inv_exprs
);
819 if (cand
->var_before
)
821 fprintf (file
, " Var befor: ");
822 print_generic_expr (file
, cand
->var_before
, TDF_SLIM
);
823 fprintf (file
, "\n");
827 fprintf (file
, " Var after: ");
828 print_generic_expr (file
, cand
->var_after
, TDF_SLIM
);
829 fprintf (file
, "\n");
835 fprintf (file
, " Incr POS: before exit test\n");
839 fprintf (file
, " Incr POS: before use %d\n", cand
->ainc_use
->id
);
843 fprintf (file
, " Incr POS: after use %d\n", cand
->ainc_use
->id
);
847 fprintf (file
, " Incr POS: at end\n");
851 fprintf (file
, " Incr POS: orig biv\n");
855 dump_iv (file
, iv
, false, 1);
858 /* Returns the info for ssa version VER. */
860 static inline struct version_info
*
861 ver_info (struct ivopts_data
*data
, unsigned ver
)
863 return data
->version_info
+ ver
;
866 /* Returns the info for ssa name NAME. */
868 static inline struct version_info
*
869 name_info (struct ivopts_data
*data
, tree name
)
871 return ver_info (data
, SSA_NAME_VERSION (name
));
874 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
878 stmt_after_ip_normal_pos (struct loop
*loop
, gimple
*stmt
)
880 basic_block bb
= ip_normal_pos (loop
), sbb
= gimple_bb (stmt
);
884 if (sbb
== loop
->latch
)
890 return stmt
== last_stmt (bb
);
893 /* Returns true if STMT if after the place where the original induction
894 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
895 if the positions are identical. */
898 stmt_after_inc_pos (struct iv_cand
*cand
, gimple
*stmt
, bool true_if_equal
)
900 basic_block cand_bb
= gimple_bb (cand
->incremented_at
);
901 basic_block stmt_bb
= gimple_bb (stmt
);
903 if (!dominated_by_p (CDI_DOMINATORS
, stmt_bb
, cand_bb
))
906 if (stmt_bb
!= cand_bb
)
910 && gimple_uid (stmt
) == gimple_uid (cand
->incremented_at
))
912 return gimple_uid (stmt
) > gimple_uid (cand
->incremented_at
);
915 /* Returns true if STMT if after the place where the induction variable
916 CAND is incremented in LOOP. */
919 stmt_after_increment (struct loop
*loop
, struct iv_cand
*cand
, gimple
*stmt
)
927 return stmt_after_ip_normal_pos (loop
, stmt
);
931 return stmt_after_inc_pos (cand
, stmt
, false);
934 return stmt_after_inc_pos (cand
, stmt
, true);
941 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
944 abnormal_ssa_name_p (tree exp
)
949 if (TREE_CODE (exp
) != SSA_NAME
)
952 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp
) != 0;
955 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
956 abnormal phi node. Callback for for_each_index. */
959 idx_contains_abnormal_ssa_name_p (tree base
, tree
*index
,
960 void *data ATTRIBUTE_UNUSED
)
962 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
964 if (abnormal_ssa_name_p (TREE_OPERAND (base
, 2)))
966 if (abnormal_ssa_name_p (TREE_OPERAND (base
, 3)))
970 return !abnormal_ssa_name_p (*index
);
973 /* Returns true if EXPR contains a ssa name that occurs in an
974 abnormal phi node. */
977 contains_abnormal_ssa_name_p (tree expr
)
980 enum tree_code_class codeclass
;
985 code
= TREE_CODE (expr
);
986 codeclass
= TREE_CODE_CLASS (code
);
988 if (code
== CALL_EXPR
)
991 call_expr_arg_iterator iter
;
992 FOR_EACH_CALL_EXPR_ARG (arg
, iter
, expr
)
993 if (contains_abnormal_ssa_name_p (arg
))
998 if (code
== SSA_NAME
)
999 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr
) != 0;
1001 if (code
== INTEGER_CST
1002 || is_gimple_min_invariant (expr
))
1005 if (code
== ADDR_EXPR
)
1006 return !for_each_index (&TREE_OPERAND (expr
, 0),
1007 idx_contains_abnormal_ssa_name_p
,
1010 if (code
== COND_EXPR
)
1011 return contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 0))
1012 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 1))
1013 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 2));
1018 case tcc_comparison
:
1019 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 1)))
1024 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr
, 0)))
1036 /* Returns the structure describing number of iterations determined from
1037 EXIT of DATA->current_loop, or NULL if something goes wrong. */
1039 static struct tree_niter_desc
*
1040 niter_for_exit (struct ivopts_data
*data
, edge exit
)
1042 struct tree_niter_desc
*desc
;
1043 tree_niter_desc
**slot
;
1047 data
->niters
= new hash_map
<edge
, tree_niter_desc
*>;
1051 slot
= data
->niters
->get (exit
);
1055 /* Try to determine number of iterations. We cannot safely work with ssa
1056 names that appear in phi nodes on abnormal edges, so that we do not
1057 create overlapping life ranges for them (PR 27283). */
1058 desc
= XNEW (struct tree_niter_desc
);
1059 if (!number_of_iterations_exit (data
->current_loop
,
1061 || contains_abnormal_ssa_name_p (desc
->niter
))
1066 data
->niters
->put (exit
, desc
);
1074 /* Returns the structure describing number of iterations determined from
1075 single dominating exit of DATA->current_loop, or NULL if something
1078 static struct tree_niter_desc
*
1079 niter_for_single_dom_exit (struct ivopts_data
*data
)
1081 edge exit
= single_dom_exit (data
->current_loop
);
1086 return niter_for_exit (data
, exit
);
1089 /* Initializes data structures used by the iv optimization pass, stored
1093 tree_ssa_iv_optimize_init (struct ivopts_data
*data
)
1095 data
->version_info_size
= 2 * num_ssa_names
;
1096 data
->version_info
= XCNEWVEC (struct version_info
, data
->version_info_size
);
1097 data
->relevant
= BITMAP_ALLOC (NULL
);
1098 data
->important_candidates
= BITMAP_ALLOC (NULL
);
1099 data
->max_inv_var_id
= 0;
1100 data
->max_inv_expr_id
= 0;
1101 data
->niters
= NULL
;
1102 data
->vgroups
.create (20);
1103 data
->vcands
.create (20);
1104 data
->inv_expr_tab
= new hash_table
<iv_inv_expr_hasher
> (10);
1105 data
->name_expansion_cache
= NULL
;
1106 data
->iv_common_cand_tab
= new hash_table
<iv_common_cand_hasher
> (10);
1107 data
->iv_common_cands
.create (20);
1108 decl_rtl_to_reset
.create (20);
1109 gcc_obstack_init (&data
->iv_obstack
);
1112 /* Returns a memory object to that EXPR points. In case we are able to
1113 determine that it does not point to any such object, NULL is returned. */
1116 determine_base_object (tree expr
)
1118 enum tree_code code
= TREE_CODE (expr
);
1121 /* If this is a pointer casted to any type, we need to determine
1122 the base object for the pointer; so handle conversions before
1123 throwing away non-pointer expressions. */
1124 if (CONVERT_EXPR_P (expr
))
1125 return determine_base_object (TREE_OPERAND (expr
, 0));
1127 if (!POINTER_TYPE_P (TREE_TYPE (expr
)))
1136 obj
= TREE_OPERAND (expr
, 0);
1137 base
= get_base_address (obj
);
1142 if (TREE_CODE (base
) == MEM_REF
)
1143 return determine_base_object (TREE_OPERAND (base
, 0));
1145 return fold_convert (ptr_type_node
,
1146 build_fold_addr_expr (base
));
1148 case POINTER_PLUS_EXPR
:
1149 return determine_base_object (TREE_OPERAND (expr
, 0));
1153 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
1157 if (POLY_INT_CST_P (expr
))
1159 return fold_convert (ptr_type_node
, expr
);
1163 /* Return true if address expression with non-DECL_P operand appears
1167 contain_complex_addr_expr (tree expr
)
1172 switch (TREE_CODE (expr
))
1174 case POINTER_PLUS_EXPR
:
1177 res
|= contain_complex_addr_expr (TREE_OPERAND (expr
, 0));
1178 res
|= contain_complex_addr_expr (TREE_OPERAND (expr
, 1));
1182 return (!DECL_P (TREE_OPERAND (expr
, 0)));
1191 /* Allocates an induction variable with given initial value BASE and step STEP
1192 for loop LOOP. NO_OVERFLOW implies the iv doesn't overflow. */
1195 alloc_iv (struct ivopts_data
*data
, tree base
, tree step
,
1196 bool no_overflow
= false)
1199 struct iv
*iv
= (struct iv
*) obstack_alloc (&data
->iv_obstack
,
1200 sizeof (struct iv
));
1201 gcc_assert (step
!= NULL_TREE
);
1203 /* Lower address expression in base except ones with DECL_P as operand.
1205 1) More accurate cost can be computed for address expressions;
1206 2) Duplicate candidates won't be created for bases in different
1207 forms, like &a[0] and &a. */
1209 if ((TREE_CODE (expr
) == ADDR_EXPR
&& !DECL_P (TREE_OPERAND (expr
, 0)))
1210 || contain_complex_addr_expr (expr
))
1213 tree_to_aff_combination (expr
, TREE_TYPE (expr
), &comb
);
1214 base
= fold_convert (TREE_TYPE (base
), aff_combination_to_tree (&comb
));
1218 iv
->base_object
= determine_base_object (base
);
1221 iv
->nonlin_use
= NULL
;
1222 iv
->ssa_name
= NULL_TREE
;
1224 && !iv_can_overflow_p (data
->current_loop
, TREE_TYPE (base
),
1227 iv
->no_overflow
= no_overflow
;
1228 iv
->have_address_use
= false;
1233 /* Sets STEP and BASE for induction variable IV. NO_OVERFLOW implies the IV
1234 doesn't overflow. */
1237 set_iv (struct ivopts_data
*data
, tree iv
, tree base
, tree step
,
1240 struct version_info
*info
= name_info (data
, iv
);
1242 gcc_assert (!info
->iv
);
1244 bitmap_set_bit (data
->relevant
, SSA_NAME_VERSION (iv
));
1245 info
->iv
= alloc_iv (data
, base
, step
, no_overflow
);
1246 info
->iv
->ssa_name
= iv
;
1249 /* Finds induction variable declaration for VAR. */
1252 get_iv (struct ivopts_data
*data
, tree var
)
1255 tree type
= TREE_TYPE (var
);
1257 if (!POINTER_TYPE_P (type
)
1258 && !INTEGRAL_TYPE_P (type
))
1261 if (!name_info (data
, var
)->iv
)
1263 bb
= gimple_bb (SSA_NAME_DEF_STMT (var
));
1266 || !flow_bb_inside_loop_p (data
->current_loop
, bb
))
1267 set_iv (data
, var
, var
, build_int_cst (type
, 0), true);
1270 return name_info (data
, var
)->iv
;
1273 /* Return the first non-invariant ssa var found in EXPR. */
1276 extract_single_var_from_expr (tree expr
)
1280 enum tree_code code
;
1282 if (!expr
|| is_gimple_min_invariant (expr
))
1285 code
= TREE_CODE (expr
);
1286 if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code
)))
1288 n
= TREE_OPERAND_LENGTH (expr
);
1289 for (i
= 0; i
< n
; i
++)
1291 tmp
= extract_single_var_from_expr (TREE_OPERAND (expr
, i
));
1297 return (TREE_CODE (expr
) == SSA_NAME
) ? expr
: NULL
;
1300 /* Finds basic ivs. */
1303 find_bivs (struct ivopts_data
*data
)
1307 tree step
, type
, base
, stop
;
1309 struct loop
*loop
= data
->current_loop
;
1312 for (psi
= gsi_start_phis (loop
->header
); !gsi_end_p (psi
); gsi_next (&psi
))
1316 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi
)))
1319 if (virtual_operand_p (PHI_RESULT (phi
)))
1322 if (!simple_iv (loop
, loop
, PHI_RESULT (phi
), &iv
, true))
1325 if (integer_zerop (iv
.step
))
1329 base
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_preheader_edge (loop
));
1330 /* Stop expanding iv base at the first ssa var referred by iv step.
1331 Ideally we should stop at any ssa var, because that's expensive
1332 and unusual to happen, we just do it on the first one.
1334 See PR64705 for the rationale. */
1335 stop
= extract_single_var_from_expr (step
);
1336 base
= expand_simple_operations (base
, stop
);
1337 if (contains_abnormal_ssa_name_p (base
)
1338 || contains_abnormal_ssa_name_p (step
))
1341 type
= TREE_TYPE (PHI_RESULT (phi
));
1342 base
= fold_convert (type
, base
);
1345 if (POINTER_TYPE_P (type
))
1346 step
= convert_to_ptrofftype (step
);
1348 step
= fold_convert (type
, step
);
1351 set_iv (data
, PHI_RESULT (phi
), base
, step
, iv
.no_overflow
);
1358 /* Marks basic ivs. */
1361 mark_bivs (struct ivopts_data
*data
)
1366 struct iv
*iv
, *incr_iv
;
1367 struct loop
*loop
= data
->current_loop
;
1368 basic_block incr_bb
;
1371 data
->bivs_not_used_in_addr
= 0;
1372 for (psi
= gsi_start_phis (loop
->header
); !gsi_end_p (psi
); gsi_next (&psi
))
1376 iv
= get_iv (data
, PHI_RESULT (phi
));
1380 var
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_latch_edge (loop
));
1381 def
= SSA_NAME_DEF_STMT (var
);
1382 /* Don't mark iv peeled from other one as biv. */
1384 && gimple_code (def
) == GIMPLE_PHI
1385 && gimple_bb (def
) == loop
->header
)
1388 incr_iv
= get_iv (data
, var
);
1392 /* If the increment is in the subloop, ignore it. */
1393 incr_bb
= gimple_bb (SSA_NAME_DEF_STMT (var
));
1394 if (incr_bb
->loop_father
!= data
->current_loop
1395 || (incr_bb
->flags
& BB_IRREDUCIBLE_LOOP
))
1399 incr_iv
->biv_p
= true;
1400 if (iv
->no_overflow
)
1401 data
->bivs_not_used_in_addr
++;
1402 if (incr_iv
->no_overflow
)
1403 data
->bivs_not_used_in_addr
++;
1407 /* Checks whether STMT defines a linear induction variable and stores its
1408 parameters to IV. */
1411 find_givs_in_stmt_scev (struct ivopts_data
*data
, gimple
*stmt
, affine_iv
*iv
)
1414 struct loop
*loop
= data
->current_loop
;
1416 iv
->base
= NULL_TREE
;
1417 iv
->step
= NULL_TREE
;
1419 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
1422 lhs
= gimple_assign_lhs (stmt
);
1423 if (TREE_CODE (lhs
) != SSA_NAME
)
1426 if (!simple_iv (loop
, loop_containing_stmt (stmt
), lhs
, iv
, true))
1429 /* Stop expanding iv base at the first ssa var referred by iv step.
1430 Ideally we should stop at any ssa var, because that's expensive
1431 and unusual to happen, we just do it on the first one.
1433 See PR64705 for the rationale. */
1434 stop
= extract_single_var_from_expr (iv
->step
);
1435 iv
->base
= expand_simple_operations (iv
->base
, stop
);
1436 if (contains_abnormal_ssa_name_p (iv
->base
)
1437 || contains_abnormal_ssa_name_p (iv
->step
))
1440 /* If STMT could throw, then do not consider STMT as defining a GIV.
1441 While this will suppress optimizations, we cannot safely delete this
1442 GIV and associated statements, even if it appears it is not used. */
1443 if (stmt_could_throw_p (cfun
, stmt
))
1449 /* Finds general ivs in statement STMT. */
1452 find_givs_in_stmt (struct ivopts_data
*data
, gimple
*stmt
)
1456 if (!find_givs_in_stmt_scev (data
, stmt
, &iv
))
1459 set_iv (data
, gimple_assign_lhs (stmt
), iv
.base
, iv
.step
, iv
.no_overflow
);
1462 /* Finds general ivs in basic block BB. */
1465 find_givs_in_bb (struct ivopts_data
*data
, basic_block bb
)
1467 gimple_stmt_iterator bsi
;
1469 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
1470 find_givs_in_stmt (data
, gsi_stmt (bsi
));
1473 /* Finds general ivs. */
1476 find_givs (struct ivopts_data
*data
)
1478 struct loop
*loop
= data
->current_loop
;
1479 basic_block
*body
= get_loop_body_in_dom_order (loop
);
1482 for (i
= 0; i
< loop
->num_nodes
; i
++)
1483 find_givs_in_bb (data
, body
[i
]);
1487 /* For each ssa name defined in LOOP determines whether it is an induction
1488 variable and if so, its initial value and step. */
1491 find_induction_variables (struct ivopts_data
*data
)
1496 if (!find_bivs (data
))
1502 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
1504 struct tree_niter_desc
*niter
= niter_for_single_dom_exit (data
);
1508 fprintf (dump_file
, " number of iterations ");
1509 print_generic_expr (dump_file
, niter
->niter
, TDF_SLIM
);
1510 if (!integer_zerop (niter
->may_be_zero
))
1512 fprintf (dump_file
, "; zero if ");
1513 print_generic_expr (dump_file
, niter
->may_be_zero
, TDF_SLIM
);
1515 fprintf (dump_file
, "\n");
1518 fprintf (dump_file
, "\n<Induction Vars>:\n");
1519 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
1521 struct version_info
*info
= ver_info (data
, i
);
1522 if (info
->iv
&& info
->iv
->step
&& !integer_zerop (info
->iv
->step
))
1523 dump_iv (dump_file
, ver_info (data
, i
)->iv
, true, 0);
1530 /* Records a use of TYPE at *USE_P in STMT whose value is IV in GROUP.
1531 For address type use, ADDR_BASE is the stripped IV base, ADDR_OFFSET
1532 is the const offset stripped from IV base and MEM_TYPE is the type
1533 of the memory being addressed. For uses of other types, ADDR_BASE
1534 and ADDR_OFFSET are zero by default and MEM_TYPE is NULL_TREE. */
1536 static struct iv_use
*
1537 record_use (struct iv_group
*group
, tree
*use_p
, struct iv
*iv
,
1538 gimple
*stmt
, enum use_type type
, tree mem_type
,
1539 tree addr_base
, poly_uint64 addr_offset
)
1541 struct iv_use
*use
= XCNEW (struct iv_use
);
1543 use
->id
= group
->vuses
.length ();
1544 use
->group_id
= group
->id
;
1546 use
->mem_type
= mem_type
;
1550 use
->addr_base
= addr_base
;
1551 use
->addr_offset
= addr_offset
;
1553 group
->vuses
.safe_push (use
);
1557 /* Checks whether OP is a loop-level invariant and if so, records it.
1558 NONLINEAR_USE is true if the invariant is used in a way we do not
1559 handle specially. */
1562 record_invariant (struct ivopts_data
*data
, tree op
, bool nonlinear_use
)
1565 struct version_info
*info
;
1567 if (TREE_CODE (op
) != SSA_NAME
1568 || virtual_operand_p (op
))
1571 bb
= gimple_bb (SSA_NAME_DEF_STMT (op
));
1573 && flow_bb_inside_loop_p (data
->current_loop
, bb
))
1576 info
= name_info (data
, op
);
1578 info
->has_nonlin_use
|= nonlinear_use
;
1580 info
->inv_id
= ++data
->max_inv_var_id
;
1581 bitmap_set_bit (data
->relevant
, SSA_NAME_VERSION (op
));
1584 /* Record a group of TYPE. */
1586 static struct iv_group
*
1587 record_group (struct ivopts_data
*data
, enum use_type type
)
1589 struct iv_group
*group
= XCNEW (struct iv_group
);
1591 group
->id
= data
->vgroups
.length ();
1593 group
->related_cands
= BITMAP_ALLOC (NULL
);
1594 group
->vuses
.create (1);
1596 data
->vgroups
.safe_push (group
);
1600 /* Record a use of TYPE at *USE_P in STMT whose value is IV in a group.
1601 New group will be created if there is no existing group for the use.
1602 MEM_TYPE is the type of memory being addressed, or NULL if this
1603 isn't an address reference. */
1605 static struct iv_use
*
1606 record_group_use (struct ivopts_data
*data
, tree
*use_p
,
1607 struct iv
*iv
, gimple
*stmt
, enum use_type type
,
1610 tree addr_base
= NULL
;
1611 struct iv_group
*group
= NULL
;
1612 poly_uint64 addr_offset
= 0;
1614 /* Record non address type use in a new group. */
1615 if (address_p (type
))
1619 addr_base
= strip_offset (iv
->base
, &addr_offset
);
1620 for (i
= 0; i
< data
->vgroups
.length (); i
++)
1624 group
= data
->vgroups
[i
];
1625 use
= group
->vuses
[0];
1626 if (!address_p (use
->type
))
1629 /* Check if it has the same stripped base and step. */
1630 if (operand_equal_p (iv
->base_object
, use
->iv
->base_object
, 0)
1631 && operand_equal_p (iv
->step
, use
->iv
->step
, 0)
1632 && operand_equal_p (addr_base
, use
->addr_base
, 0))
1635 if (i
== data
->vgroups
.length ())
1640 group
= record_group (data
, type
);
1642 return record_use (group
, use_p
, iv
, stmt
, type
, mem_type
,
1643 addr_base
, addr_offset
);
1646 /* Checks whether the use OP is interesting and if so, records it. */
1648 static struct iv_use
*
1649 find_interesting_uses_op (struct ivopts_data
*data
, tree op
)
1655 if (TREE_CODE (op
) != SSA_NAME
)
1658 iv
= get_iv (data
, op
);
1664 gcc_assert (iv
->nonlin_use
->type
== USE_NONLINEAR_EXPR
);
1665 return iv
->nonlin_use
;
1668 if (integer_zerop (iv
->step
))
1670 record_invariant (data
, op
, true);
1674 stmt
= SSA_NAME_DEF_STMT (op
);
1675 gcc_assert (gimple_code (stmt
) == GIMPLE_PHI
|| is_gimple_assign (stmt
));
1677 use
= record_group_use (data
, NULL
, iv
, stmt
, USE_NONLINEAR_EXPR
, NULL_TREE
);
1678 iv
->nonlin_use
= use
;
1682 /* Indicate how compare type iv_use can be handled. */
1683 enum comp_iv_rewrite
1686 /* We may rewrite compare type iv_use by expressing value of the iv_use. */
1688 /* We may rewrite compare type iv_uses on both sides of comparison by
1689 expressing value of each iv_use. */
1691 /* We may rewrite compare type iv_use by expressing value of the iv_use
1692 or by eliminating it with other iv_cand. */
1696 /* Given a condition in statement STMT, checks whether it is a compare
1697 of an induction variable and an invariant. If this is the case,
1698 CONTROL_VAR is set to location of the iv, BOUND to the location of
1699 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1700 induction variable descriptions, and true is returned. If this is not
1701 the case, CONTROL_VAR and BOUND are set to the arguments of the
1702 condition and false is returned. */
1704 static enum comp_iv_rewrite
1705 extract_cond_operands (struct ivopts_data
*data
, gimple
*stmt
,
1706 tree
**control_var
, tree
**bound
,
1707 struct iv
**iv_var
, struct iv
**iv_bound
)
1709 /* The objects returned when COND has constant operands. */
1710 static struct iv const_iv
;
1712 tree
*op0
= &zero
, *op1
= &zero
;
1713 struct iv
*iv0
= &const_iv
, *iv1
= &const_iv
;
1714 enum comp_iv_rewrite rewrite_type
= COMP_IV_NA
;
1716 if (gimple_code (stmt
) == GIMPLE_COND
)
1718 gcond
*cond_stmt
= as_a
<gcond
*> (stmt
);
1719 op0
= gimple_cond_lhs_ptr (cond_stmt
);
1720 op1
= gimple_cond_rhs_ptr (cond_stmt
);
1724 op0
= gimple_assign_rhs1_ptr (stmt
);
1725 op1
= gimple_assign_rhs2_ptr (stmt
);
1728 zero
= integer_zero_node
;
1729 const_iv
.step
= integer_zero_node
;
1731 if (TREE_CODE (*op0
) == SSA_NAME
)
1732 iv0
= get_iv (data
, *op0
);
1733 if (TREE_CODE (*op1
) == SSA_NAME
)
1734 iv1
= get_iv (data
, *op1
);
1736 /* If both sides of comparison are IVs. We can express ivs on both end. */
1737 if (iv0
&& iv1
&& !integer_zerop (iv0
->step
) && !integer_zerop (iv1
->step
))
1739 rewrite_type
= COMP_IV_EXPR_2
;
1743 /* If none side of comparison is IV. */
1744 if ((!iv0
|| integer_zerop (iv0
->step
))
1745 && (!iv1
|| integer_zerop (iv1
->step
)))
1748 /* Control variable may be on the other side. */
1749 if (!iv0
|| integer_zerop (iv0
->step
))
1751 std::swap (op0
, op1
);
1752 std::swap (iv0
, iv1
);
1754 /* If one side is IV and the other side isn't loop invariant. */
1756 rewrite_type
= COMP_IV_EXPR
;
1757 /* If one side is IV and the other side is loop invariant. */
1758 else if (!integer_zerop (iv0
->step
) && integer_zerop (iv1
->step
))
1759 rewrite_type
= COMP_IV_ELIM
;
1771 return rewrite_type
;
1774 /* Checks whether the condition in STMT is interesting and if so,
1778 find_interesting_uses_cond (struct ivopts_data
*data
, gimple
*stmt
)
1780 tree
*var_p
, *bound_p
;
1781 struct iv
*var_iv
, *bound_iv
;
1782 enum comp_iv_rewrite ret
;
1784 ret
= extract_cond_operands (data
, stmt
,
1785 &var_p
, &bound_p
, &var_iv
, &bound_iv
);
1786 if (ret
== COMP_IV_NA
)
1788 find_interesting_uses_op (data
, *var_p
);
1789 find_interesting_uses_op (data
, *bound_p
);
1793 record_group_use (data
, var_p
, var_iv
, stmt
, USE_COMPARE
, NULL_TREE
);
1794 /* Record compare type iv_use for iv on the other side of comparison. */
1795 if (ret
== COMP_IV_EXPR_2
)
1796 record_group_use (data
, bound_p
, bound_iv
, stmt
, USE_COMPARE
, NULL_TREE
);
1799 /* Returns the outermost loop EXPR is obviously invariant in
1800 relative to the loop LOOP, i.e. if all its operands are defined
1801 outside of the returned loop. Returns NULL if EXPR is not
1802 even obviously invariant in LOOP. */
1805 outermost_invariant_loop_for_expr (struct loop
*loop
, tree expr
)
1810 if (is_gimple_min_invariant (expr
))
1811 return current_loops
->tree_root
;
1813 if (TREE_CODE (expr
) == SSA_NAME
)
1815 def_bb
= gimple_bb (SSA_NAME_DEF_STMT (expr
));
1818 if (flow_bb_inside_loop_p (loop
, def_bb
))
1820 return superloop_at_depth (loop
,
1821 loop_depth (def_bb
->loop_father
) + 1);
1824 return current_loops
->tree_root
;
1830 unsigned maxdepth
= 0;
1831 len
= TREE_OPERAND_LENGTH (expr
);
1832 for (i
= 0; i
< len
; i
++)
1834 struct loop
*ivloop
;
1835 if (!TREE_OPERAND (expr
, i
))
1838 ivloop
= outermost_invariant_loop_for_expr (loop
, TREE_OPERAND (expr
, i
));
1841 maxdepth
= MAX (maxdepth
, loop_depth (ivloop
));
1844 return superloop_at_depth (loop
, maxdepth
);
1847 /* Returns true if expression EXPR is obviously invariant in LOOP,
1848 i.e. if all its operands are defined outside of the LOOP. LOOP
1849 should not be the function body. */
1852 expr_invariant_in_loop_p (struct loop
*loop
, tree expr
)
1857 gcc_assert (loop_depth (loop
) > 0);
1859 if (is_gimple_min_invariant (expr
))
1862 if (TREE_CODE (expr
) == SSA_NAME
)
1864 def_bb
= gimple_bb (SSA_NAME_DEF_STMT (expr
));
1866 && flow_bb_inside_loop_p (loop
, def_bb
))
1875 len
= TREE_OPERAND_LENGTH (expr
);
1876 for (i
= 0; i
< len
; i
++)
1877 if (TREE_OPERAND (expr
, i
)
1878 && !expr_invariant_in_loop_p (loop
, TREE_OPERAND (expr
, i
)))
1884 /* Given expression EXPR which computes inductive values with respect
1885 to loop recorded in DATA, this function returns biv from which EXPR
1886 is derived by tracing definition chains of ssa variables in EXPR. */
1889 find_deriving_biv_for_expr (struct ivopts_data
*data
, tree expr
)
1894 enum tree_code code
;
1897 if (expr
== NULL_TREE
)
1900 if (is_gimple_min_invariant (expr
))
1903 code
= TREE_CODE (expr
);
1904 if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code
)))
1906 n
= TREE_OPERAND_LENGTH (expr
);
1907 for (i
= 0; i
< n
; i
++)
1909 iv
= find_deriving_biv_for_expr (data
, TREE_OPERAND (expr
, i
));
1915 /* Stop if it's not ssa name. */
1916 if (code
!= SSA_NAME
)
1919 iv
= get_iv (data
, expr
);
1920 if (!iv
|| integer_zerop (iv
->step
))
1925 stmt
= SSA_NAME_DEF_STMT (expr
);
1926 if (gphi
*phi
= dyn_cast
<gphi
*> (stmt
))
1929 use_operand_p use_p
;
1930 basic_block phi_bb
= gimple_bb (phi
);
1932 /* Skip loop header PHI that doesn't define biv. */
1933 if (phi_bb
->loop_father
== data
->current_loop
)
1936 if (virtual_operand_p (gimple_phi_result (phi
)))
1939 FOR_EACH_PHI_ARG (use_p
, phi
, iter
, SSA_OP_USE
)
1941 tree use
= USE_FROM_PTR (use_p
);
1942 iv
= find_deriving_biv_for_expr (data
, use
);
1948 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
1951 e1
= gimple_assign_rhs1 (stmt
);
1952 code
= gimple_assign_rhs_code (stmt
);
1953 if (get_gimple_rhs_class (code
) == GIMPLE_SINGLE_RHS
)
1954 return find_deriving_biv_for_expr (data
, e1
);
1961 case POINTER_PLUS_EXPR
:
1962 /* Increments, decrements and multiplications by a constant
1964 e2
= gimple_assign_rhs2 (stmt
);
1965 iv
= find_deriving_biv_for_expr (data
, e2
);
1971 /* Casts are simple. */
1972 return find_deriving_biv_for_expr (data
, e1
);
1981 /* Record BIV, its predecessor and successor that they are used in
1982 address type uses. */
1985 record_biv_for_address_use (struct ivopts_data
*data
, struct iv
*biv
)
1988 tree type
, base_1
, base_2
;
1991 if (!biv
|| !biv
->biv_p
|| integer_zerop (biv
->step
)
1992 || biv
->have_address_use
|| !biv
->no_overflow
)
1995 type
= TREE_TYPE (biv
->base
);
1996 if (!INTEGRAL_TYPE_P (type
))
1999 biv
->have_address_use
= true;
2000 data
->bivs_not_used_in_addr
--;
2001 base_1
= fold_build2 (PLUS_EXPR
, type
, biv
->base
, biv
->step
);
2002 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
2004 struct iv
*iv
= ver_info (data
, i
)->iv
;
2006 if (!iv
|| !iv
->biv_p
|| integer_zerop (iv
->step
)
2007 || iv
->have_address_use
|| !iv
->no_overflow
)
2010 if (type
!= TREE_TYPE (iv
->base
)
2011 || !INTEGRAL_TYPE_P (TREE_TYPE (iv
->base
)))
2014 if (!operand_equal_p (biv
->step
, iv
->step
, 0))
2017 base_2
= fold_build2 (PLUS_EXPR
, type
, iv
->base
, iv
->step
);
2018 if (operand_equal_p (base_1
, iv
->base
, 0)
2019 || operand_equal_p (base_2
, biv
->base
, 0))
2021 iv
->have_address_use
= true;
2022 data
->bivs_not_used_in_addr
--;
2027 /* Cumulates the steps of indices into DATA and replaces their values with the
2028 initial ones. Returns false when the value of the index cannot be determined.
2029 Callback for for_each_index. */
2031 struct ifs_ivopts_data
2033 struct ivopts_data
*ivopts_data
;
2039 idx_find_step (tree base
, tree
*idx
, void *data
)
2041 struct ifs_ivopts_data
*dta
= (struct ifs_ivopts_data
*) data
;
2043 bool use_overflow_semantics
= false;
2044 tree step
, iv_base
, iv_step
, lbound
, off
;
2045 struct loop
*loop
= dta
->ivopts_data
->current_loop
;
2047 /* If base is a component ref, require that the offset of the reference
2049 if (TREE_CODE (base
) == COMPONENT_REF
)
2051 off
= component_ref_field_offset (base
);
2052 return expr_invariant_in_loop_p (loop
, off
);
2055 /* If base is array, first check whether we will be able to move the
2056 reference out of the loop (in order to take its address in strength
2057 reduction). In order for this to work we need both lower bound
2058 and step to be loop invariants. */
2059 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
2061 /* Moreover, for a range, the size needs to be invariant as well. */
2062 if (TREE_CODE (base
) == ARRAY_RANGE_REF
2063 && !expr_invariant_in_loop_p (loop
, TYPE_SIZE (TREE_TYPE (base
))))
2066 step
= array_ref_element_size (base
);
2067 lbound
= array_ref_low_bound (base
);
2069 if (!expr_invariant_in_loop_p (loop
, step
)
2070 || !expr_invariant_in_loop_p (loop
, lbound
))
2074 if (TREE_CODE (*idx
) != SSA_NAME
)
2077 iv
= get_iv (dta
->ivopts_data
, *idx
);
2081 /* XXX We produce for a base of *D42 with iv->base being &x[0]
2082 *&x[0], which is not folded and does not trigger the
2083 ARRAY_REF path below. */
2086 if (integer_zerop (iv
->step
))
2089 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
2091 step
= array_ref_element_size (base
);
2093 /* We only handle addresses whose step is an integer constant. */
2094 if (TREE_CODE (step
) != INTEGER_CST
)
2098 /* The step for pointer arithmetics already is 1 byte. */
2099 step
= size_one_node
;
2103 if (iv
->no_overflow
&& nowrap_type_p (TREE_TYPE (iv_step
)))
2104 use_overflow_semantics
= true;
2106 if (!convert_affine_scev (dta
->ivopts_data
->current_loop
,
2107 sizetype
, &iv_base
, &iv_step
, dta
->stmt
,
2108 use_overflow_semantics
))
2110 /* The index might wrap. */
2114 step
= fold_build2 (MULT_EXPR
, sizetype
, step
, iv_step
);
2115 dta
->step
= fold_build2 (PLUS_EXPR
, sizetype
, dta
->step
, step
);
2117 if (dta
->ivopts_data
->bivs_not_used_in_addr
)
2120 iv
= find_deriving_biv_for_expr (dta
->ivopts_data
, iv
->ssa_name
);
2122 record_biv_for_address_use (dta
->ivopts_data
, iv
);
2127 /* Records use in index IDX. Callback for for_each_index. Ivopts data
2128 object is passed to it in DATA. */
2131 idx_record_use (tree base
, tree
*idx
,
2134 struct ivopts_data
*data
= (struct ivopts_data
*) vdata
;
2135 find_interesting_uses_op (data
, *idx
);
2136 if (TREE_CODE (base
) == ARRAY_REF
|| TREE_CODE (base
) == ARRAY_RANGE_REF
)
2138 find_interesting_uses_op (data
, array_ref_element_size (base
));
2139 find_interesting_uses_op (data
, array_ref_low_bound (base
));
2144 /* If we can prove that TOP = cst * BOT for some constant cst,
2145 store cst to MUL and return true. Otherwise return false.
2146 The returned value is always sign-extended, regardless of the
2147 signedness of TOP and BOT. */
2150 constant_multiple_of (tree top
, tree bot
, widest_int
*mul
)
2153 enum tree_code code
;
2154 unsigned precision
= TYPE_PRECISION (TREE_TYPE (top
));
2155 widest_int res
, p0
, p1
;
2160 if (operand_equal_p (top
, bot
, 0))
2166 code
= TREE_CODE (top
);
2170 mby
= TREE_OPERAND (top
, 1);
2171 if (TREE_CODE (mby
) != INTEGER_CST
)
2174 if (!constant_multiple_of (TREE_OPERAND (top
, 0), bot
, &res
))
2177 *mul
= wi::sext (res
* wi::to_widest (mby
), precision
);
2182 if (!constant_multiple_of (TREE_OPERAND (top
, 0), bot
, &p0
)
2183 || !constant_multiple_of (TREE_OPERAND (top
, 1), bot
, &p1
))
2186 if (code
== MINUS_EXPR
)
2188 *mul
= wi::sext (p0
+ p1
, precision
);
2192 if (TREE_CODE (bot
) != INTEGER_CST
)
2195 p0
= widest_int::from (wi::to_wide (top
), SIGNED
);
2196 p1
= widest_int::from (wi::to_wide (bot
), SIGNED
);
2199 *mul
= wi::sext (wi::divmod_trunc (p0
, p1
, SIGNED
, &res
), precision
);
2203 if (POLY_INT_CST_P (top
)
2204 && POLY_INT_CST_P (bot
)
2205 && constant_multiple_p (wi::to_poly_widest (top
),
2206 wi::to_poly_widest (bot
), mul
))
2213 /* Return true if memory reference REF with step STEP may be unaligned. */
2216 may_be_unaligned_p (tree ref
, tree step
)
2218 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
2219 thus they are not misaligned. */
2220 if (TREE_CODE (ref
) == TARGET_MEM_REF
)
2223 unsigned int align
= TYPE_ALIGN (TREE_TYPE (ref
));
2224 if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref
))) > align
)
2225 align
= GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref
)));
2227 unsigned HOST_WIDE_INT bitpos
;
2228 unsigned int ref_align
;
2229 get_object_alignment_1 (ref
, &ref_align
, &bitpos
);
2230 if (ref_align
< align
2231 || (bitpos
% align
) != 0
2232 || (bitpos
% BITS_PER_UNIT
) != 0)
2235 unsigned int trailing_zeros
= tree_ctz (step
);
2236 if (trailing_zeros
< HOST_BITS_PER_INT
2237 && (1U << trailing_zeros
) * BITS_PER_UNIT
< align
)
2243 /* Return true if EXPR may be non-addressable. */
2246 may_be_nonaddressable_p (tree expr
)
2248 switch (TREE_CODE (expr
))
2251 /* Check if it's a register variable. */
2252 return DECL_HARD_REGISTER (expr
);
2254 case TARGET_MEM_REF
:
2255 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
2256 target, thus they are always addressable. */
2260 /* Likewise for MEM_REFs, modulo the storage order. */
2261 return REF_REVERSE_STORAGE_ORDER (expr
);
2264 if (REF_REVERSE_STORAGE_ORDER (expr
))
2266 return may_be_nonaddressable_p (TREE_OPERAND (expr
, 0));
2269 if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr
, 0))))
2271 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr
, 1))
2272 || may_be_nonaddressable_p (TREE_OPERAND (expr
, 0));
2275 case ARRAY_RANGE_REF
:
2276 if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr
, 0))))
2278 return may_be_nonaddressable_p (TREE_OPERAND (expr
, 0));
2280 case VIEW_CONVERT_EXPR
:
2281 /* This kind of view-conversions may wrap non-addressable objects
2282 and make them look addressable. After some processing the
2283 non-addressability may be uncovered again, causing ADDR_EXPRs
2284 of inappropriate objects to be built. */
2285 if (is_gimple_reg (TREE_OPERAND (expr
, 0))
2286 || !is_gimple_addressable (TREE_OPERAND (expr
, 0)))
2288 return may_be_nonaddressable_p (TREE_OPERAND (expr
, 0));
2300 /* Finds addresses in *OP_P inside STMT. */
2303 find_interesting_uses_address (struct ivopts_data
*data
, gimple
*stmt
,
2306 tree base
= *op_p
, step
= size_zero_node
;
2308 struct ifs_ivopts_data ifs_ivopts_data
;
2310 /* Do not play with volatile memory references. A bit too conservative,
2311 perhaps, but safe. */
2312 if (gimple_has_volatile_ops (stmt
))
2315 /* Ignore bitfields for now. Not really something terribly complicated
2317 if (TREE_CODE (base
) == BIT_FIELD_REF
)
2320 base
= unshare_expr (base
);
2322 if (TREE_CODE (base
) == TARGET_MEM_REF
)
2324 tree type
= build_pointer_type (TREE_TYPE (base
));
2328 && TREE_CODE (TMR_BASE (base
)) == SSA_NAME
)
2330 civ
= get_iv (data
, TMR_BASE (base
));
2334 TMR_BASE (base
) = civ
->base
;
2337 if (TMR_INDEX2 (base
)
2338 && TREE_CODE (TMR_INDEX2 (base
)) == SSA_NAME
)
2340 civ
= get_iv (data
, TMR_INDEX2 (base
));
2344 TMR_INDEX2 (base
) = civ
->base
;
2347 if (TMR_INDEX (base
)
2348 && TREE_CODE (TMR_INDEX (base
)) == SSA_NAME
)
2350 civ
= get_iv (data
, TMR_INDEX (base
));
2354 TMR_INDEX (base
) = civ
->base
;
2359 if (TMR_STEP (base
))
2360 astep
= fold_build2 (MULT_EXPR
, type
, TMR_STEP (base
), astep
);
2362 step
= fold_build2 (PLUS_EXPR
, type
, step
, astep
);
2366 if (integer_zerop (step
))
2368 base
= tree_mem_ref_addr (type
, base
);
2372 ifs_ivopts_data
.ivopts_data
= data
;
2373 ifs_ivopts_data
.stmt
= stmt
;
2374 ifs_ivopts_data
.step
= size_zero_node
;
2375 if (!for_each_index (&base
, idx_find_step
, &ifs_ivopts_data
)
2376 || integer_zerop (ifs_ivopts_data
.step
))
2378 step
= ifs_ivopts_data
.step
;
2380 /* Check that the base expression is addressable. This needs
2381 to be done after substituting bases of IVs into it. */
2382 if (may_be_nonaddressable_p (base
))
2385 /* Moreover, on strict alignment platforms, check that it is
2386 sufficiently aligned. */
2387 if (STRICT_ALIGNMENT
&& may_be_unaligned_p (base
, step
))
2390 base
= build_fold_addr_expr (base
);
2392 /* Substituting bases of IVs into the base expression might
2393 have caused folding opportunities. */
2394 if (TREE_CODE (base
) == ADDR_EXPR
)
2396 tree
*ref
= &TREE_OPERAND (base
, 0);
2397 while (handled_component_p (*ref
))
2398 ref
= &TREE_OPERAND (*ref
, 0);
2399 if (TREE_CODE (*ref
) == MEM_REF
)
2401 tree tem
= fold_binary (MEM_REF
, TREE_TYPE (*ref
),
2402 TREE_OPERAND (*ref
, 0),
2403 TREE_OPERAND (*ref
, 1));
2410 civ
= alloc_iv (data
, base
, step
);
2411 /* Fail if base object of this memory reference is unknown. */
2412 if (civ
->base_object
== NULL_TREE
)
2415 record_group_use (data
, op_p
, civ
, stmt
, USE_REF_ADDRESS
, TREE_TYPE (*op_p
));
2419 for_each_index (op_p
, idx_record_use
, data
);
2422 /* Finds and records invariants used in STMT. */
2425 find_invariants_stmt (struct ivopts_data
*data
, gimple
*stmt
)
2428 use_operand_p use_p
;
2431 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
2433 op
= USE_FROM_PTR (use_p
);
2434 record_invariant (data
, op
, false);
2438 /* CALL calls an internal function. If operand *OP_P will become an
2439 address when the call is expanded, return the type of the memory
2440 being addressed, otherwise return null. */
2443 get_mem_type_for_internal_fn (gcall
*call
, tree
*op_p
)
2445 switch (gimple_call_internal_fn (call
))
2448 if (op_p
== gimple_call_arg_ptr (call
, 0))
2449 return TREE_TYPE (gimple_call_lhs (call
));
2452 case IFN_MASK_STORE
:
2453 if (op_p
== gimple_call_arg_ptr (call
, 0))
2454 return TREE_TYPE (gimple_call_arg (call
, 3));
2462 /* IV is a (non-address) iv that describes operand *OP_P of STMT.
2463 Return true if the operand will become an address when STMT
2464 is expanded and record the associated address use if so. */
2467 find_address_like_use (struct ivopts_data
*data
, gimple
*stmt
, tree
*op_p
,
2470 /* Fail if base object of this memory reference is unknown. */
2471 if (iv
->base_object
== NULL_TREE
)
2474 tree mem_type
= NULL_TREE
;
2475 if (gcall
*call
= dyn_cast
<gcall
*> (stmt
))
2476 if (gimple_call_internal_p (call
))
2477 mem_type
= get_mem_type_for_internal_fn (call
, op_p
);
2480 iv
= alloc_iv (data
, iv
->base
, iv
->step
);
2481 record_group_use (data
, op_p
, iv
, stmt
, USE_PTR_ADDRESS
, mem_type
);
2487 /* Finds interesting uses of induction variables in the statement STMT. */
2490 find_interesting_uses_stmt (struct ivopts_data
*data
, gimple
*stmt
)
2493 tree op
, *lhs
, *rhs
;
2495 use_operand_p use_p
;
2496 enum tree_code code
;
2498 find_invariants_stmt (data
, stmt
);
2500 if (gimple_code (stmt
) == GIMPLE_COND
)
2502 find_interesting_uses_cond (data
, stmt
);
2506 if (is_gimple_assign (stmt
))
2508 lhs
= gimple_assign_lhs_ptr (stmt
);
2509 rhs
= gimple_assign_rhs1_ptr (stmt
);
2511 if (TREE_CODE (*lhs
) == SSA_NAME
)
2513 /* If the statement defines an induction variable, the uses are not
2514 interesting by themselves. */
2516 iv
= get_iv (data
, *lhs
);
2518 if (iv
&& !integer_zerop (iv
->step
))
2522 code
= gimple_assign_rhs_code (stmt
);
2523 if (get_gimple_rhs_class (code
) == GIMPLE_SINGLE_RHS
2524 && (REFERENCE_CLASS_P (*rhs
)
2525 || is_gimple_val (*rhs
)))
2527 if (REFERENCE_CLASS_P (*rhs
))
2528 find_interesting_uses_address (data
, stmt
, rhs
);
2530 find_interesting_uses_op (data
, *rhs
);
2532 if (REFERENCE_CLASS_P (*lhs
))
2533 find_interesting_uses_address (data
, stmt
, lhs
);
2536 else if (TREE_CODE_CLASS (code
) == tcc_comparison
)
2538 find_interesting_uses_cond (data
, stmt
);
2542 /* TODO -- we should also handle address uses of type
2544 memory = call (whatever);
2551 if (gimple_code (stmt
) == GIMPLE_PHI
2552 && gimple_bb (stmt
) == data
->current_loop
->header
)
2554 iv
= get_iv (data
, PHI_RESULT (stmt
));
2556 if (iv
&& !integer_zerop (iv
->step
))
2560 FOR_EACH_PHI_OR_STMT_USE (use_p
, stmt
, iter
, SSA_OP_USE
)
2562 op
= USE_FROM_PTR (use_p
);
2564 if (TREE_CODE (op
) != SSA_NAME
)
2567 iv
= get_iv (data
, op
);
2571 if (!find_address_like_use (data
, stmt
, use_p
->use
, iv
))
2572 find_interesting_uses_op (data
, op
);
2576 /* Finds interesting uses of induction variables outside of loops
2577 on loop exit edge EXIT. */
2580 find_interesting_uses_outside (struct ivopts_data
*data
, edge exit
)
2586 for (psi
= gsi_start_phis (exit
->dest
); !gsi_end_p (psi
); gsi_next (&psi
))
2589 def
= PHI_ARG_DEF_FROM_EDGE (phi
, exit
);
2590 if (!virtual_operand_p (def
))
2591 find_interesting_uses_op (data
, def
);
2595 /* Return TRUE if OFFSET is within the range of [base + offset] addressing
2596 mode for memory reference represented by USE. */
2598 static GTY (()) vec
<rtx
, va_gc
> *addr_list
;
2601 addr_offset_valid_p (struct iv_use
*use
, poly_int64 offset
)
2604 unsigned list_index
;
2605 addr_space_t as
= TYPE_ADDR_SPACE (TREE_TYPE (use
->iv
->base
));
2606 machine_mode addr_mode
, mem_mode
= TYPE_MODE (use
->mem_type
);
2608 list_index
= (unsigned) as
* MAX_MACHINE_MODE
+ (unsigned) mem_mode
;
2609 if (list_index
>= vec_safe_length (addr_list
))
2610 vec_safe_grow_cleared (addr_list
, list_index
+ MAX_MACHINE_MODE
);
2612 addr
= (*addr_list
)[list_index
];
2615 addr_mode
= targetm
.addr_space
.address_mode (as
);
2616 reg
= gen_raw_REG (addr_mode
, LAST_VIRTUAL_REGISTER
+ 1);
2617 addr
= gen_rtx_fmt_ee (PLUS
, addr_mode
, reg
, NULL_RTX
);
2618 (*addr_list
)[list_index
] = addr
;
2621 addr_mode
= GET_MODE (addr
);
2623 XEXP (addr
, 1) = gen_int_mode (offset
, addr_mode
);
2624 return (memory_address_addr_space_p (mem_mode
, addr
, as
));
2627 /* Comparison function to sort group in ascending order of addr_offset. */
2630 group_compare_offset (const void *a
, const void *b
)
2632 const struct iv_use
*const *u1
= (const struct iv_use
*const *) a
;
2633 const struct iv_use
*const *u2
= (const struct iv_use
*const *) b
;
2635 return compare_sizes_for_sort ((*u1
)->addr_offset
, (*u2
)->addr_offset
);
2638 /* Check if small groups should be split. Return true if no group
2639 contains more than two uses with distinct addr_offsets. Return
2640 false otherwise. We want to split such groups because:
2642 1) Small groups don't have much benefit and may interfer with
2643 general candidate selection.
2644 2) Size for problem with only small groups is usually small and
2645 general algorithm can handle it well.
2647 TODO -- Above claim may not hold when we want to merge memory
2648 accesses with conseuctive addresses. */
2651 split_small_address_groups_p (struct ivopts_data
*data
)
2653 unsigned int i
, j
, distinct
= 1;
2655 struct iv_group
*group
;
2657 for (i
= 0; i
< data
->vgroups
.length (); i
++)
2659 group
= data
->vgroups
[i
];
2660 if (group
->vuses
.length () == 1)
2663 gcc_assert (address_p (group
->type
));
2664 if (group
->vuses
.length () == 2)
2666 if (compare_sizes_for_sort (group
->vuses
[0]->addr_offset
,
2667 group
->vuses
[1]->addr_offset
) > 0)
2668 std::swap (group
->vuses
[0], group
->vuses
[1]);
2671 group
->vuses
.qsort (group_compare_offset
);
2677 for (pre
= group
->vuses
[0], j
= 1; j
< group
->vuses
.length (); j
++)
2679 if (maybe_ne (group
->vuses
[j
]->addr_offset
, pre
->addr_offset
))
2681 pre
= group
->vuses
[j
];
2690 return (distinct
<= 2);
2693 /* For each group of address type uses, this function further groups
2694 these uses according to the maximum offset supported by target's
2695 [base + offset] addressing mode. */
2698 split_address_groups (struct ivopts_data
*data
)
2701 /* Always split group. */
2702 bool split_p
= split_small_address_groups_p (data
);
2704 for (i
= 0; i
< data
->vgroups
.length (); i
++)
2706 struct iv_group
*new_group
= NULL
;
2707 struct iv_group
*group
= data
->vgroups
[i
];
2708 struct iv_use
*use
= group
->vuses
[0];
2711 use
->group_id
= group
->id
;
2712 if (group
->vuses
.length () == 1)
2715 gcc_assert (address_p (use
->type
));
2717 for (j
= 1; j
< group
->vuses
.length ();)
2719 struct iv_use
*next
= group
->vuses
[j
];
2720 poly_int64 offset
= next
->addr_offset
- use
->addr_offset
;
2722 /* Split group if aksed to, or the offset against the first
2723 use can't fit in offset part of addressing mode. IV uses
2724 having the same offset are still kept in one group. */
2725 if (maybe_ne (offset
, 0)
2726 && (split_p
|| !addr_offset_valid_p (use
, offset
)))
2729 new_group
= record_group (data
, group
->type
);
2730 group
->vuses
.ordered_remove (j
);
2731 new_group
->vuses
.safe_push (next
);
2736 next
->group_id
= group
->id
;
2742 /* Finds uses of the induction variables that are interesting. */
2745 find_interesting_uses (struct ivopts_data
*data
)
2748 gimple_stmt_iterator bsi
;
2749 basic_block
*body
= get_loop_body (data
->current_loop
);
2753 for (i
= 0; i
< data
->current_loop
->num_nodes
; i
++)
2758 FOR_EACH_EDGE (e
, ei
, bb
->succs
)
2759 if (e
->dest
!= EXIT_BLOCK_PTR_FOR_FN (cfun
)
2760 && !flow_bb_inside_loop_p (data
->current_loop
, e
->dest
))
2761 find_interesting_uses_outside (data
, e
);
2763 for (bsi
= gsi_start_phis (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
2764 find_interesting_uses_stmt (data
, gsi_stmt (bsi
));
2765 for (bsi
= gsi_start_bb (bb
); !gsi_end_p (bsi
); gsi_next (&bsi
))
2766 if (!is_gimple_debug (gsi_stmt (bsi
)))
2767 find_interesting_uses_stmt (data
, gsi_stmt (bsi
));
2771 split_address_groups (data
);
2773 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
2775 fprintf (dump_file
, "\n<IV Groups>:\n");
2776 dump_groups (dump_file
, data
);
2777 fprintf (dump_file
, "\n");
2781 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
2782 is true, assume we are inside an address. If TOP_COMPREF is true, assume
2783 we are at the top-level of the processed address. */
2786 strip_offset_1 (tree expr
, bool inside_addr
, bool top_compref
,
2789 tree op0
= NULL_TREE
, op1
= NULL_TREE
, tmp
, step
;
2790 enum tree_code code
;
2791 tree type
, orig_type
= TREE_TYPE (expr
);
2792 poly_int64 off0
, off1
;
2794 tree orig_expr
= expr
;
2798 type
= TREE_TYPE (expr
);
2799 code
= TREE_CODE (expr
);
2804 case POINTER_PLUS_EXPR
:
2807 op0
= TREE_OPERAND (expr
, 0);
2808 op1
= TREE_OPERAND (expr
, 1);
2810 op0
= strip_offset_1 (op0
, false, false, &off0
);
2811 op1
= strip_offset_1 (op1
, false, false, &off1
);
2813 *offset
= (code
== MINUS_EXPR
? off0
- off1
: off0
+ off1
);
2814 if (op0
== TREE_OPERAND (expr
, 0)
2815 && op1
== TREE_OPERAND (expr
, 1))
2818 if (integer_zerop (op1
))
2820 else if (integer_zerop (op0
))
2822 if (code
== MINUS_EXPR
)
2823 expr
= fold_build1 (NEGATE_EXPR
, type
, op1
);
2828 expr
= fold_build2 (code
, type
, op0
, op1
);
2830 return fold_convert (orig_type
, expr
);
2833 op1
= TREE_OPERAND (expr
, 1);
2834 if (!cst_and_fits_in_hwi (op1
))
2837 op0
= TREE_OPERAND (expr
, 0);
2838 op0
= strip_offset_1 (op0
, false, false, &off0
);
2839 if (op0
== TREE_OPERAND (expr
, 0))
2842 *offset
= off0
* int_cst_value (op1
);
2843 if (integer_zerop (op0
))
2846 expr
= fold_build2 (MULT_EXPR
, type
, op0
, op1
);
2848 return fold_convert (orig_type
, expr
);
2851 case ARRAY_RANGE_REF
:
2855 step
= array_ref_element_size (expr
);
2856 if (!cst_and_fits_in_hwi (step
))
2859 st
= int_cst_value (step
);
2860 op1
= TREE_OPERAND (expr
, 1);
2861 op1
= strip_offset_1 (op1
, false, false, &off1
);
2862 *offset
= off1
* st
;
2865 && integer_zerop (op1
))
2867 /* Strip the component reference completely. */
2868 op0
= TREE_OPERAND (expr
, 0);
2869 op0
= strip_offset_1 (op0
, inside_addr
, top_compref
, &off0
);
2882 tmp
= component_ref_field_offset (expr
);
2883 field
= TREE_OPERAND (expr
, 1);
2885 && cst_and_fits_in_hwi (tmp
)
2886 && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field
)))
2888 HOST_WIDE_INT boffset
, abs_off
;
2890 /* Strip the component reference completely. */
2891 op0
= TREE_OPERAND (expr
, 0);
2892 op0
= strip_offset_1 (op0
, inside_addr
, top_compref
, &off0
);
2893 boffset
= int_cst_value (DECL_FIELD_BIT_OFFSET (field
));
2894 abs_off
= abs_hwi (boffset
) / BITS_PER_UNIT
;
2898 *offset
= off0
+ int_cst_value (tmp
) + abs_off
;
2905 op0
= TREE_OPERAND (expr
, 0);
2906 op0
= strip_offset_1 (op0
, true, true, &off0
);
2909 if (op0
== TREE_OPERAND (expr
, 0))
2912 expr
= build_fold_addr_expr (op0
);
2913 return fold_convert (orig_type
, expr
);
2916 /* ??? Offset operand? */
2917 inside_addr
= false;
2921 if (ptrdiff_tree_p (expr
, offset
) && maybe_ne (*offset
, 0))
2922 return build_int_cst (orig_type
, 0);
2926 /* Default handling of expressions for that we want to recurse into
2927 the first operand. */
2928 op0
= TREE_OPERAND (expr
, 0);
2929 op0
= strip_offset_1 (op0
, inside_addr
, false, &off0
);
2932 if (op0
== TREE_OPERAND (expr
, 0)
2933 && (!op1
|| op1
== TREE_OPERAND (expr
, 1)))
2936 expr
= copy_node (expr
);
2937 TREE_OPERAND (expr
, 0) = op0
;
2939 TREE_OPERAND (expr
, 1) = op1
;
2941 /* Inside address, we might strip the top level component references,
2942 thus changing type of the expression. Handling of ADDR_EXPR
2944 expr
= fold_convert (orig_type
, expr
);
2949 /* Strips constant offsets from EXPR and stores them to OFFSET. */
2952 strip_offset (tree expr
, poly_uint64_pod
*offset
)
2955 tree core
= strip_offset_1 (expr
, false, false, &off
);
2960 /* Returns variant of TYPE that can be used as base for different uses.
2961 We return unsigned type with the same precision, which avoids problems
2965 generic_type_for (tree type
)
2967 if (POINTER_TYPE_P (type
))
2968 return unsigned_type_for (type
);
2970 if (TYPE_UNSIGNED (type
))
2973 return unsigned_type_for (type
);
2976 /* Private data for walk_tree. */
2978 struct walk_tree_data
2981 struct ivopts_data
*idata
;
2984 /* Callback function for walk_tree, it records invariants and symbol
2985 reference in *EXPR_P. DATA is the structure storing result info. */
2988 find_inv_vars_cb (tree
*expr_p
, int *ws ATTRIBUTE_UNUSED
, void *data
)
2991 struct version_info
*info
;
2992 struct walk_tree_data
*wdata
= (struct walk_tree_data
*) data
;
2994 if (TREE_CODE (op
) != SSA_NAME
)
2997 info
= name_info (wdata
->idata
, op
);
2998 /* Because we expand simple operations when finding IVs, loop invariant
2999 variable that isn't referred by the original loop could be used now.
3000 Record such invariant variables here. */
3003 struct ivopts_data
*idata
= wdata
->idata
;
3004 basic_block bb
= gimple_bb (SSA_NAME_DEF_STMT (op
));
3006 if (!bb
|| !flow_bb_inside_loop_p (idata
->current_loop
, bb
))
3008 set_iv (idata
, op
, op
, build_int_cst (TREE_TYPE (op
), 0), true);
3009 record_invariant (idata
, op
, false);
3012 if (!info
->inv_id
|| info
->has_nonlin_use
)
3015 if (!*wdata
->inv_vars
)
3016 *wdata
->inv_vars
= BITMAP_ALLOC (NULL
);
3017 bitmap_set_bit (*wdata
->inv_vars
, info
->inv_id
);
3022 /* Records invariants in *EXPR_P. INV_VARS is the bitmap to that we should
3026 find_inv_vars (struct ivopts_data
*data
, tree
*expr_p
, bitmap
*inv_vars
)
3028 struct walk_tree_data wdata
;
3034 wdata
.inv_vars
= inv_vars
;
3035 walk_tree (expr_p
, find_inv_vars_cb
, &wdata
, NULL
);
3038 /* Get entry from invariant expr hash table for INV_EXPR. New entry
3039 will be recorded if it doesn't exist yet. Given below two exprs:
3040 inv_expr + cst1, inv_expr + cst2
3041 It's hard to make decision whether constant part should be stripped
3042 or not. We choose to not strip based on below facts:
3043 1) We need to count ADD cost for constant part if it's stripped,
3044 which isn't always trivial where this functions is called.
3045 2) Stripping constant away may be conflict with following loop
3046 invariant hoisting pass.
3047 3) Not stripping constant away results in more invariant exprs,
3048 which usually leads to decision preferring lower reg pressure. */
3050 static iv_inv_expr_ent
*
3051 get_loop_invariant_expr (struct ivopts_data
*data
, tree inv_expr
)
3053 STRIP_NOPS (inv_expr
);
3055 if (poly_int_tree_p (inv_expr
)
3056 || TREE_CODE (inv_expr
) == SSA_NAME
)
3059 /* Don't strip constant part away as we used to. */
3061 /* Stores EXPR in DATA->inv_expr_tab, return pointer to iv_inv_expr_ent. */
3062 struct iv_inv_expr_ent ent
;
3063 ent
.expr
= inv_expr
;
3064 ent
.hash
= iterative_hash_expr (inv_expr
, 0);
3065 struct iv_inv_expr_ent
**slot
= data
->inv_expr_tab
->find_slot (&ent
, INSERT
);
3069 *slot
= XNEW (struct iv_inv_expr_ent
);
3070 (*slot
)->expr
= inv_expr
;
3071 (*slot
)->hash
= ent
.hash
;
3072 (*slot
)->id
= ++data
->max_inv_expr_id
;
3078 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
3079 position to POS. If USE is not NULL, the candidate is set as related to
3080 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
3081 replacement of the final value of the iv by a direct computation. */
3083 static struct iv_cand
*
3084 add_candidate_1 (struct ivopts_data
*data
,
3085 tree base
, tree step
, bool important
, enum iv_position pos
,
3086 struct iv_use
*use
, gimple
*incremented_at
,
3087 struct iv
*orig_iv
= NULL
)
3090 struct iv_cand
*cand
= NULL
;
3091 tree type
, orig_type
;
3093 gcc_assert (base
&& step
);
3095 /* -fkeep-gc-roots-live means that we have to keep a real pointer
3096 live, but the ivopts code may replace a real pointer with one
3097 pointing before or after the memory block that is then adjusted
3098 into the memory block during the loop. FIXME: It would likely be
3099 better to actually force the pointer live and still use ivopts;
3100 for example, it would be enough to write the pointer into memory
3101 and keep it there until after the loop. */
3102 if (flag_keep_gc_roots_live
&& POINTER_TYPE_P (TREE_TYPE (base
)))
3105 /* For non-original variables, make sure their values are computed in a type
3106 that does not invoke undefined behavior on overflows (since in general,
3107 we cannot prove that these induction variables are non-wrapping). */
3108 if (pos
!= IP_ORIGINAL
)
3110 orig_type
= TREE_TYPE (base
);
3111 type
= generic_type_for (orig_type
);
3112 if (type
!= orig_type
)
3114 base
= fold_convert (type
, base
);
3115 step
= fold_convert (type
, step
);
3119 for (i
= 0; i
< data
->vcands
.length (); i
++)
3121 cand
= data
->vcands
[i
];
3123 if (cand
->pos
!= pos
)
3126 if (cand
->incremented_at
!= incremented_at
3127 || ((pos
== IP_AFTER_USE
|| pos
== IP_BEFORE_USE
)
3128 && cand
->ainc_use
!= use
))
3131 if (operand_equal_p (base
, cand
->iv
->base
, 0)
3132 && operand_equal_p (step
, cand
->iv
->step
, 0)
3133 && (TYPE_PRECISION (TREE_TYPE (base
))
3134 == TYPE_PRECISION (TREE_TYPE (cand
->iv
->base
))))
3138 if (i
== data
->vcands
.length ())
3140 cand
= XCNEW (struct iv_cand
);
3142 cand
->iv
= alloc_iv (data
, base
, step
);
3144 if (pos
!= IP_ORIGINAL
)
3146 cand
->var_before
= create_tmp_var_raw (TREE_TYPE (base
), "ivtmp");
3147 cand
->var_after
= cand
->var_before
;
3149 cand
->important
= important
;
3150 cand
->incremented_at
= incremented_at
;
3151 data
->vcands
.safe_push (cand
);
3153 if (!poly_int_tree_p (step
))
3155 find_inv_vars (data
, &step
, &cand
->inv_vars
);
3157 iv_inv_expr_ent
*inv_expr
= get_loop_invariant_expr (data
, step
);
3158 /* Share bitmap between inv_vars and inv_exprs for cand. */
3159 if (inv_expr
!= NULL
)
3161 cand
->inv_exprs
= cand
->inv_vars
;
3162 cand
->inv_vars
= NULL
;
3163 if (cand
->inv_exprs
)
3164 bitmap_clear (cand
->inv_exprs
);
3166 cand
->inv_exprs
= BITMAP_ALLOC (NULL
);
3168 bitmap_set_bit (cand
->inv_exprs
, inv_expr
->id
);
3172 if (pos
== IP_AFTER_USE
|| pos
== IP_BEFORE_USE
)
3173 cand
->ainc_use
= use
;
3175 cand
->ainc_use
= NULL
;
3177 cand
->orig_iv
= orig_iv
;
3178 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
3179 dump_cand (dump_file
, cand
);
3182 cand
->important
|= important
;
3184 /* Relate candidate to the group for which it is added. */
3186 bitmap_set_bit (data
->vgroups
[use
->group_id
]->related_cands
, i
);
3191 /* Returns true if incrementing the induction variable at the end of the LOOP
3194 The purpose is to avoid splitting latch edge with a biv increment, thus
3195 creating a jump, possibly confusing other optimization passes and leaving
3196 less freedom to scheduler. So we allow IP_END only if IP_NORMAL is not
3197 available (so we do not have a better alternative), or if the latch edge
3198 is already nonempty. */
3201 allow_ip_end_pos_p (struct loop
*loop
)
3203 if (!ip_normal_pos (loop
))
3206 if (!empty_block_p (ip_end_pos (loop
)))
3212 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
3213 Important field is set to IMPORTANT. */
3216 add_autoinc_candidates (struct ivopts_data
*data
, tree base
, tree step
,
3217 bool important
, struct iv_use
*use
)
3219 basic_block use_bb
= gimple_bb (use
->stmt
);
3220 machine_mode mem_mode
;
3221 unsigned HOST_WIDE_INT cstepi
;
3223 /* If we insert the increment in any position other than the standard
3224 ones, we must ensure that it is incremented once per iteration.
3225 It must not be in an inner nested loop, or one side of an if
3227 if (use_bb
->loop_father
!= data
->current_loop
3228 || !dominated_by_p (CDI_DOMINATORS
, data
->current_loop
->latch
, use_bb
)
3229 || stmt_can_throw_internal (cfun
, use
->stmt
)
3230 || !cst_and_fits_in_hwi (step
))
3233 cstepi
= int_cst_value (step
);
3235 mem_mode
= TYPE_MODE (use
->mem_type
);
3236 if (((USE_LOAD_PRE_INCREMENT (mem_mode
)
3237 || USE_STORE_PRE_INCREMENT (mem_mode
))
3238 && known_eq (GET_MODE_SIZE (mem_mode
), cstepi
))
3239 || ((USE_LOAD_PRE_DECREMENT (mem_mode
)
3240 || USE_STORE_PRE_DECREMENT (mem_mode
))
3241 && known_eq (GET_MODE_SIZE (mem_mode
), -cstepi
)))
3243 enum tree_code code
= MINUS_EXPR
;
3245 tree new_step
= step
;
3247 if (POINTER_TYPE_P (TREE_TYPE (base
)))
3249 new_step
= fold_build1 (NEGATE_EXPR
, TREE_TYPE (step
), step
);
3250 code
= POINTER_PLUS_EXPR
;
3253 new_step
= fold_convert (TREE_TYPE (base
), new_step
);
3254 new_base
= fold_build2 (code
, TREE_TYPE (base
), base
, new_step
);
3255 add_candidate_1 (data
, new_base
, step
, important
, IP_BEFORE_USE
, use
,
3258 if (((USE_LOAD_POST_INCREMENT (mem_mode
)
3259 || USE_STORE_POST_INCREMENT (mem_mode
))
3260 && known_eq (GET_MODE_SIZE (mem_mode
), cstepi
))
3261 || ((USE_LOAD_POST_DECREMENT (mem_mode
)
3262 || USE_STORE_POST_DECREMENT (mem_mode
))
3263 && known_eq (GET_MODE_SIZE (mem_mode
), -cstepi
)))
3265 add_candidate_1 (data
, base
, step
, important
, IP_AFTER_USE
, use
,
3270 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
3271 position to POS. If USE is not NULL, the candidate is set as related to
3272 it. The candidate computation is scheduled before exit condition and at
3276 add_candidate (struct ivopts_data
*data
,
3277 tree base
, tree step
, bool important
, struct iv_use
*use
,
3278 struct iv
*orig_iv
= NULL
)
3280 if (ip_normal_pos (data
->current_loop
))
3281 add_candidate_1 (data
, base
, step
, important
,
3282 IP_NORMAL
, use
, NULL
, orig_iv
);
3283 if (ip_end_pos (data
->current_loop
)
3284 && allow_ip_end_pos_p (data
->current_loop
))
3285 add_candidate_1 (data
, base
, step
, important
, IP_END
, use
, NULL
, orig_iv
);
3288 /* Adds standard iv candidates. */
3291 add_standard_iv_candidates (struct ivopts_data
*data
)
3293 add_candidate (data
, integer_zero_node
, integer_one_node
, true, NULL
);
3295 /* The same for a double-integer type if it is still fast enough. */
3297 (long_integer_type_node
) > TYPE_PRECISION (integer_type_node
)
3298 && TYPE_PRECISION (long_integer_type_node
) <= BITS_PER_WORD
)
3299 add_candidate (data
, build_int_cst (long_integer_type_node
, 0),
3300 build_int_cst (long_integer_type_node
, 1), true, NULL
);
3302 /* The same for a double-integer type if it is still fast enough. */
3304 (long_long_integer_type_node
) > TYPE_PRECISION (long_integer_type_node
)
3305 && TYPE_PRECISION (long_long_integer_type_node
) <= BITS_PER_WORD
)
3306 add_candidate (data
, build_int_cst (long_long_integer_type_node
, 0),
3307 build_int_cst (long_long_integer_type_node
, 1), true, NULL
);
3311 /* Adds candidates bases on the old induction variable IV. */
3314 add_iv_candidate_for_biv (struct ivopts_data
*data
, struct iv
*iv
)
3318 struct iv_cand
*cand
;
3320 /* Check if this biv is used in address type use. */
3321 if (iv
->no_overflow
&& iv
->have_address_use
3322 && INTEGRAL_TYPE_P (TREE_TYPE (iv
->base
))
3323 && TYPE_PRECISION (TREE_TYPE (iv
->base
)) < TYPE_PRECISION (sizetype
))
3325 tree base
= fold_convert (sizetype
, iv
->base
);
3326 tree step
= fold_convert (sizetype
, iv
->step
);
3328 /* Add iv cand of same precision as index part in TARGET_MEM_REF. */
3329 add_candidate (data
, base
, step
, true, NULL
, iv
);
3330 /* Add iv cand of the original type only if it has nonlinear use. */
3332 add_candidate (data
, iv
->base
, iv
->step
, true, NULL
);
3335 add_candidate (data
, iv
->base
, iv
->step
, true, NULL
);
3337 /* The same, but with initial value zero. */
3338 if (POINTER_TYPE_P (TREE_TYPE (iv
->base
)))
3339 add_candidate (data
, size_int (0), iv
->step
, true, NULL
);
3341 add_candidate (data
, build_int_cst (TREE_TYPE (iv
->base
), 0),
3342 iv
->step
, true, NULL
);
3344 phi
= SSA_NAME_DEF_STMT (iv
->ssa_name
);
3345 if (gimple_code (phi
) == GIMPLE_PHI
)
3347 /* Additionally record the possibility of leaving the original iv
3349 def
= PHI_ARG_DEF_FROM_EDGE (phi
, loop_latch_edge (data
->current_loop
));
3350 /* Don't add candidate if it's from another PHI node because
3351 it's an affine iv appearing in the form of PEELED_CHREC. */
3352 phi
= SSA_NAME_DEF_STMT (def
);
3353 if (gimple_code (phi
) != GIMPLE_PHI
)
3355 cand
= add_candidate_1 (data
,
3356 iv
->base
, iv
->step
, true, IP_ORIGINAL
, NULL
,
3357 SSA_NAME_DEF_STMT (def
));
3360 cand
->var_before
= iv
->ssa_name
;
3361 cand
->var_after
= def
;
3365 gcc_assert (gimple_bb (phi
) == data
->current_loop
->header
);
3369 /* Adds candidates based on the old induction variables. */
3372 add_iv_candidate_for_bivs (struct ivopts_data
*data
)
3378 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
3380 iv
= ver_info (data
, i
)->iv
;
3381 if (iv
&& iv
->biv_p
&& !integer_zerop (iv
->step
))
3382 add_iv_candidate_for_biv (data
, iv
);
3386 /* Record common candidate {BASE, STEP} derived from USE in hashtable. */
3389 record_common_cand (struct ivopts_data
*data
, tree base
,
3390 tree step
, struct iv_use
*use
)
3392 struct iv_common_cand ent
;
3393 struct iv_common_cand
**slot
;
3397 ent
.hash
= iterative_hash_expr (base
, 0);
3398 ent
.hash
= iterative_hash_expr (step
, ent
.hash
);
3400 slot
= data
->iv_common_cand_tab
->find_slot (&ent
, INSERT
);
3403 *slot
= new iv_common_cand ();
3404 (*slot
)->base
= base
;
3405 (*slot
)->step
= step
;
3406 (*slot
)->uses
.create (8);
3407 (*slot
)->hash
= ent
.hash
;
3408 data
->iv_common_cands
.safe_push ((*slot
));
3411 gcc_assert (use
!= NULL
);
3412 (*slot
)->uses
.safe_push (use
);
3416 /* Comparison function used to sort common candidates. */
3419 common_cand_cmp (const void *p1
, const void *p2
)
3422 const struct iv_common_cand
*const *const ccand1
3423 = (const struct iv_common_cand
*const *)p1
;
3424 const struct iv_common_cand
*const *const ccand2
3425 = (const struct iv_common_cand
*const *)p2
;
3427 n1
= (*ccand1
)->uses
.length ();
3428 n2
= (*ccand2
)->uses
.length ();
3432 /* Adds IV candidates based on common candidated recorded. */
3435 add_iv_candidate_derived_from_uses (struct ivopts_data
*data
)
3438 struct iv_cand
*cand_1
, *cand_2
;
3440 data
->iv_common_cands
.qsort (common_cand_cmp
);
3441 for (i
= 0; i
< data
->iv_common_cands
.length (); i
++)
3443 struct iv_common_cand
*ptr
= data
->iv_common_cands
[i
];
3445 /* Only add IV candidate if it's derived from multiple uses. */
3446 if (ptr
->uses
.length () <= 1)
3451 if (ip_normal_pos (data
->current_loop
))
3452 cand_1
= add_candidate_1 (data
, ptr
->base
, ptr
->step
,
3453 false, IP_NORMAL
, NULL
, NULL
);
3455 if (ip_end_pos (data
->current_loop
)
3456 && allow_ip_end_pos_p (data
->current_loop
))
3457 cand_2
= add_candidate_1 (data
, ptr
->base
, ptr
->step
,
3458 false, IP_END
, NULL
, NULL
);
3460 /* Bind deriving uses and the new candidates. */
3461 for (j
= 0; j
< ptr
->uses
.length (); j
++)
3463 struct iv_group
*group
= data
->vgroups
[ptr
->uses
[j
]->group_id
];
3465 bitmap_set_bit (group
->related_cands
, cand_1
->id
);
3467 bitmap_set_bit (group
->related_cands
, cand_2
->id
);
3471 /* Release data since it is useless from this point. */
3472 data
->iv_common_cand_tab
->empty ();
3473 data
->iv_common_cands
.truncate (0);
3476 /* Adds candidates based on the value of USE's iv. */
3479 add_iv_candidate_for_use (struct ivopts_data
*data
, struct iv_use
*use
)
3484 struct iv
*iv
= use
->iv
;
3486 add_candidate (data
, iv
->base
, iv
->step
, false, use
);
3488 /* Record common candidate for use in case it can be shared by others. */
3489 record_common_cand (data
, iv
->base
, iv
->step
, use
);
3491 /* Record common candidate with initial value zero. */
3492 basetype
= TREE_TYPE (iv
->base
);
3493 if (POINTER_TYPE_P (basetype
))
3494 basetype
= sizetype
;
3495 record_common_cand (data
, build_int_cst (basetype
, 0), iv
->step
, use
);
3497 /* Record common candidate with constant offset stripped in base.
3498 Like the use itself, we also add candidate directly for it. */
3499 base
= strip_offset (iv
->base
, &offset
);
3500 if (maybe_ne (offset
, 0U) || base
!= iv
->base
)
3502 record_common_cand (data
, base
, iv
->step
, use
);
3503 add_candidate (data
, base
, iv
->step
, false, use
);
3506 /* Record common candidate with base_object removed in base. */
3509 if (iv
->base_object
!= NULL
&& TREE_CODE (base
) == POINTER_PLUS_EXPR
)
3511 tree step
= iv
->step
;
3514 base
= TREE_OPERAND (base
, 1);
3515 step
= fold_convert (sizetype
, step
);
3516 record_common_cand (data
, base
, step
, use
);
3517 /* Also record common candidate with offset stripped. */
3518 base
= strip_offset (base
, &offset
);
3519 if (maybe_ne (offset
, 0U))
3520 record_common_cand (data
, base
, step
, use
);
3523 /* At last, add auto-incremental candidates. Make such variables
3524 important since other iv uses with same base object may be based
3526 if (use
!= NULL
&& address_p (use
->type
))
3527 add_autoinc_candidates (data
, iv
->base
, iv
->step
, true, use
);
3530 /* Adds candidates based on the uses. */
3533 add_iv_candidate_for_groups (struct ivopts_data
*data
)
3537 /* Only add candidate for the first use in group. */
3538 for (i
= 0; i
< data
->vgroups
.length (); i
++)
3540 struct iv_group
*group
= data
->vgroups
[i
];
3542 gcc_assert (group
->vuses
[0] != NULL
);
3543 add_iv_candidate_for_use (data
, group
->vuses
[0]);
3545 add_iv_candidate_derived_from_uses (data
);
3548 /* Record important candidates and add them to related_cands bitmaps. */
3551 record_important_candidates (struct ivopts_data
*data
)
3554 struct iv_group
*group
;
3556 for (i
= 0; i
< data
->vcands
.length (); i
++)
3558 struct iv_cand
*cand
= data
->vcands
[i
];
3560 if (cand
->important
)
3561 bitmap_set_bit (data
->important_candidates
, i
);
3564 data
->consider_all_candidates
= (data
->vcands
.length ()
3565 <= CONSIDER_ALL_CANDIDATES_BOUND
);
3567 /* Add important candidates to groups' related_cands bitmaps. */
3568 for (i
= 0; i
< data
->vgroups
.length (); i
++)
3570 group
= data
->vgroups
[i
];
3571 bitmap_ior_into (group
->related_cands
, data
->important_candidates
);
3575 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
3576 If consider_all_candidates is true, we use a two-dimensional array, otherwise
3577 we allocate a simple list to every use. */
3580 alloc_use_cost_map (struct ivopts_data
*data
)
3582 unsigned i
, size
, s
;
3584 for (i
= 0; i
< data
->vgroups
.length (); i
++)
3586 struct iv_group
*group
= data
->vgroups
[i
];
3588 if (data
->consider_all_candidates
)
3589 size
= data
->vcands
.length ();
3592 s
= bitmap_count_bits (group
->related_cands
);
3594 /* Round up to the power of two, so that moduling by it is fast. */
3595 size
= s
? (1 << ceil_log2 (s
)) : 1;
3598 group
->n_map_members
= size
;
3599 group
->cost_map
= XCNEWVEC (struct cost_pair
, size
);
3603 /* Sets cost of (GROUP, CAND) pair to COST and record that it depends
3604 on invariants INV_VARS and that the value used in expressing it is
3605 VALUE, and in case of iv elimination the comparison operator is COMP. */
3608 set_group_iv_cost (struct ivopts_data
*data
,
3609 struct iv_group
*group
, struct iv_cand
*cand
,
3610 comp_cost cost
, bitmap inv_vars
, tree value
,
3611 enum tree_code comp
, bitmap inv_exprs
)
3615 if (cost
.infinite_cost_p ())
3617 BITMAP_FREE (inv_vars
);
3618 BITMAP_FREE (inv_exprs
);
3622 if (data
->consider_all_candidates
)
3624 group
->cost_map
[cand
->id
].cand
= cand
;
3625 group
->cost_map
[cand
->id
].cost
= cost
;
3626 group
->cost_map
[cand
->id
].inv_vars
= inv_vars
;
3627 group
->cost_map
[cand
->id
].inv_exprs
= inv_exprs
;
3628 group
->cost_map
[cand
->id
].value
= value
;
3629 group
->cost_map
[cand
->id
].comp
= comp
;
3633 /* n_map_members is a power of two, so this computes modulo. */
3634 s
= cand
->id
& (group
->n_map_members
- 1);
3635 for (i
= s
; i
< group
->n_map_members
; i
++)
3636 if (!group
->cost_map
[i
].cand
)
3638 for (i
= 0; i
< s
; i
++)
3639 if (!group
->cost_map
[i
].cand
)
3645 group
->cost_map
[i
].cand
= cand
;
3646 group
->cost_map
[i
].cost
= cost
;
3647 group
->cost_map
[i
].inv_vars
= inv_vars
;
3648 group
->cost_map
[i
].inv_exprs
= inv_exprs
;
3649 group
->cost_map
[i
].value
= value
;
3650 group
->cost_map
[i
].comp
= comp
;
3653 /* Gets cost of (GROUP, CAND) pair. */
3655 static struct cost_pair
*
3656 get_group_iv_cost (struct ivopts_data
*data
, struct iv_group
*group
,
3657 struct iv_cand
*cand
)
3660 struct cost_pair
*ret
;
3665 if (data
->consider_all_candidates
)
3667 ret
= group
->cost_map
+ cand
->id
;
3674 /* n_map_members is a power of two, so this computes modulo. */
3675 s
= cand
->id
& (group
->n_map_members
- 1);
3676 for (i
= s
; i
< group
->n_map_members
; i
++)
3677 if (group
->cost_map
[i
].cand
== cand
)
3678 return group
->cost_map
+ i
;
3679 else if (group
->cost_map
[i
].cand
== NULL
)
3681 for (i
= 0; i
< s
; i
++)
3682 if (group
->cost_map
[i
].cand
== cand
)
3683 return group
->cost_map
+ i
;
3684 else if (group
->cost_map
[i
].cand
== NULL
)
3690 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
3692 produce_memory_decl_rtl (tree obj
, int *regno
)
3694 addr_space_t as
= TYPE_ADDR_SPACE (TREE_TYPE (obj
));
3695 machine_mode address_mode
= targetm
.addr_space
.address_mode (as
);
3699 if (TREE_STATIC (obj
) || DECL_EXTERNAL (obj
))
3701 const char *name
= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj
));
3702 x
= gen_rtx_SYMBOL_REF (address_mode
, name
);
3703 SET_SYMBOL_REF_DECL (x
, obj
);
3704 x
= gen_rtx_MEM (DECL_MODE (obj
), x
);
3705 set_mem_addr_space (x
, as
);
3706 targetm
.encode_section_info (obj
, x
, true);
3710 x
= gen_raw_REG (address_mode
, (*regno
)++);
3711 x
= gen_rtx_MEM (DECL_MODE (obj
), x
);
3712 set_mem_addr_space (x
, as
);
3718 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
3719 walk_tree. DATA contains the actual fake register number. */
3722 prepare_decl_rtl (tree
*expr_p
, int *ws
, void *data
)
3724 tree obj
= NULL_TREE
;
3726 int *regno
= (int *) data
;
3728 switch (TREE_CODE (*expr_p
))
3731 for (expr_p
= &TREE_OPERAND (*expr_p
, 0);
3732 handled_component_p (*expr_p
);
3733 expr_p
= &TREE_OPERAND (*expr_p
, 0))
3736 if (DECL_P (obj
) && HAS_RTL_P (obj
) && !DECL_RTL_SET_P (obj
))
3737 x
= produce_memory_decl_rtl (obj
, regno
);
3742 obj
= SSA_NAME_VAR (*expr_p
);
3743 /* Defer handling of anonymous SSA_NAMEs to the expander. */
3746 if (!DECL_RTL_SET_P (obj
))
3747 x
= gen_raw_REG (DECL_MODE (obj
), (*regno
)++);
3756 if (DECL_RTL_SET_P (obj
))
3759 if (DECL_MODE (obj
) == BLKmode
)
3760 x
= produce_memory_decl_rtl (obj
, regno
);
3762 x
= gen_raw_REG (DECL_MODE (obj
), (*regno
)++);
3772 decl_rtl_to_reset
.safe_push (obj
);
3773 SET_DECL_RTL (obj
, x
);
3779 /* Determines cost of the computation of EXPR. */
3782 computation_cost (tree expr
, bool speed
)
3786 tree type
= TREE_TYPE (expr
);
3788 /* Avoid using hard regs in ways which may be unsupported. */
3789 int regno
= LAST_VIRTUAL_REGISTER
+ 1;
3790 struct cgraph_node
*node
= cgraph_node::get (current_function_decl
);
3791 enum node_frequency real_frequency
= node
->frequency
;
3793 node
->frequency
= NODE_FREQUENCY_NORMAL
;
3794 crtl
->maybe_hot_insn_p
= speed
;
3795 walk_tree (&expr
, prepare_decl_rtl
, ®no
, NULL
);
3797 rslt
= expand_expr (expr
, NULL_RTX
, TYPE_MODE (type
), EXPAND_NORMAL
);
3800 default_rtl_profile ();
3801 node
->frequency
= real_frequency
;
3803 cost
= seq_cost (seq
, speed
);
3805 cost
+= address_cost (XEXP (rslt
, 0), TYPE_MODE (type
),
3806 TYPE_ADDR_SPACE (type
), speed
);
3807 else if (!REG_P (rslt
))
3808 cost
+= set_src_cost (rslt
, TYPE_MODE (type
), speed
);
3813 /* Returns variable containing the value of candidate CAND at statement AT. */
3816 var_at_stmt (struct loop
*loop
, struct iv_cand
*cand
, gimple
*stmt
)
3818 if (stmt_after_increment (loop
, cand
, stmt
))
3819 return cand
->var_after
;
3821 return cand
->var_before
;
3824 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
3825 same precision that is at least as wide as the precision of TYPE, stores
3826 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
3830 determine_common_wider_type (tree
*a
, tree
*b
)
3832 tree wider_type
= NULL
;
3834 tree atype
= TREE_TYPE (*a
);
3836 if (CONVERT_EXPR_P (*a
))
3838 suba
= TREE_OPERAND (*a
, 0);
3839 wider_type
= TREE_TYPE (suba
);
3840 if (TYPE_PRECISION (wider_type
) < TYPE_PRECISION (atype
))
3846 if (CONVERT_EXPR_P (*b
))
3848 subb
= TREE_OPERAND (*b
, 0);
3849 if (TYPE_PRECISION (wider_type
) != TYPE_PRECISION (TREE_TYPE (subb
)))
3860 /* Determines the expression by that USE is expressed from induction variable
3861 CAND at statement AT in LOOP. The expression is stored in two parts in a
3862 decomposed form. The invariant part is stored in AFF_INV; while variant
3863 part in AFF_VAR. Store ratio of CAND.step over USE.step in PRAT if it's
3864 non-null. Returns false if USE cannot be expressed using CAND. */
3867 get_computation_aff_1 (struct loop
*loop
, gimple
*at
, struct iv_use
*use
,
3868 struct iv_cand
*cand
, struct aff_tree
*aff_inv
,
3869 struct aff_tree
*aff_var
, widest_int
*prat
= NULL
)
3871 tree ubase
= use
->iv
->base
, ustep
= use
->iv
->step
;
3872 tree cbase
= cand
->iv
->base
, cstep
= cand
->iv
->step
;
3873 tree common_type
, uutype
, var
, cstep_common
;
3874 tree utype
= TREE_TYPE (ubase
), ctype
= TREE_TYPE (cbase
);
3878 /* We must have a precision to express the values of use. */
3879 if (TYPE_PRECISION (utype
) > TYPE_PRECISION (ctype
))
3882 var
= var_at_stmt (loop
, cand
, at
);
3883 uutype
= unsigned_type_for (utype
);
3885 /* If the conversion is not noop, perform it. */
3886 if (TYPE_PRECISION (utype
) < TYPE_PRECISION (ctype
))
3888 if (cand
->orig_iv
!= NULL
&& CONVERT_EXPR_P (cbase
)
3889 && (CONVERT_EXPR_P (cstep
) || poly_int_tree_p (cstep
)))
3891 tree inner_base
, inner_step
, inner_type
;
3892 inner_base
= TREE_OPERAND (cbase
, 0);
3893 if (CONVERT_EXPR_P (cstep
))
3894 inner_step
= TREE_OPERAND (cstep
, 0);
3898 inner_type
= TREE_TYPE (inner_base
);
3899 /* If candidate is added from a biv whose type is smaller than
3900 ctype, we know both candidate and the biv won't overflow.
3901 In this case, it's safe to skip the convertion in candidate.
3902 As an example, (unsigned short)((unsigned long)A) equals to
3903 (unsigned short)A, if A has a type no larger than short. */
3904 if (TYPE_PRECISION (inner_type
) <= TYPE_PRECISION (uutype
))
3910 cbase
= fold_convert (uutype
, cbase
);
3911 cstep
= fold_convert (uutype
, cstep
);
3912 var
= fold_convert (uutype
, var
);
3915 /* Ratio is 1 when computing the value of biv cand by itself.
3916 We can't rely on constant_multiple_of in this case because the
3917 use is created after the original biv is selected. The call
3918 could fail because of inconsistent fold behavior. See PR68021
3919 for more information. */
3920 if (cand
->pos
== IP_ORIGINAL
&& cand
->incremented_at
== use
->stmt
)
3922 gcc_assert (is_gimple_assign (use
->stmt
));
3923 gcc_assert (use
->iv
->ssa_name
== cand
->var_after
);
3924 gcc_assert (gimple_assign_lhs (use
->stmt
) == cand
->var_after
);
3927 else if (!constant_multiple_of (ustep
, cstep
, &rat
))
3933 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3934 type, we achieve better folding by computing their difference in this
3935 wider type, and cast the result to UUTYPE. We do not need to worry about
3936 overflows, as all the arithmetics will in the end be performed in UUTYPE
3938 common_type
= determine_common_wider_type (&ubase
, &cbase
);
3940 /* use = ubase - ratio * cbase + ratio * var. */
3941 tree_to_aff_combination (ubase
, common_type
, aff_inv
);
3942 tree_to_aff_combination (cbase
, common_type
, &aff_cbase
);
3943 tree_to_aff_combination (var
, uutype
, aff_var
);
3945 /* We need to shift the value if we are after the increment. */
3946 if (stmt_after_increment (loop
, cand
, at
))
3950 if (common_type
!= uutype
)
3951 cstep_common
= fold_convert (common_type
, cstep
);
3953 cstep_common
= cstep
;
3955 tree_to_aff_combination (cstep_common
, common_type
, &cstep_aff
);
3956 aff_combination_add (&aff_cbase
, &cstep_aff
);
3959 aff_combination_scale (&aff_cbase
, -rat
);
3960 aff_combination_add (aff_inv
, &aff_cbase
);
3961 if (common_type
!= uutype
)
3962 aff_combination_convert (aff_inv
, uutype
);
3964 aff_combination_scale (aff_var
, rat
);
3968 /* Determines the expression by that USE is expressed from induction variable
3969 CAND at statement AT in LOOP. The expression is stored in a decomposed
3970 form into AFF. Returns false if USE cannot be expressed using CAND. */
3973 get_computation_aff (struct loop
*loop
, gimple
*at
, struct iv_use
*use
,
3974 struct iv_cand
*cand
, struct aff_tree
*aff
)
3978 if (!get_computation_aff_1 (loop
, at
, use
, cand
, aff
, &aff_var
))
3981 aff_combination_add (aff
, &aff_var
);
3985 /* Return the type of USE. */
3988 get_use_type (struct iv_use
*use
)
3990 tree base_type
= TREE_TYPE (use
->iv
->base
);
3993 if (use
->type
== USE_REF_ADDRESS
)
3995 /* The base_type may be a void pointer. Create a pointer type based on
3996 the mem_ref instead. */
3997 type
= build_pointer_type (TREE_TYPE (*use
->op_p
));
3998 gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type
))
3999 == TYPE_ADDR_SPACE (TREE_TYPE (base_type
)));
4007 /* Determines the expression by that USE is expressed from induction variable
4008 CAND at statement AT in LOOP. The computation is unshared. */
4011 get_computation_at (struct loop
*loop
, gimple
*at
,
4012 struct iv_use
*use
, struct iv_cand
*cand
)
4015 tree type
= get_use_type (use
);
4017 if (!get_computation_aff (loop
, at
, use
, cand
, &aff
))
4019 unshare_aff_combination (&aff
);
4020 return fold_convert (type
, aff_combination_to_tree (&aff
));
4023 /* Adjust the cost COST for being in loop setup rather than loop body.
4024 If we're optimizing for space, the loop setup overhead is constant;
4025 if we're optimizing for speed, amortize it over the per-iteration cost.
4026 If ROUND_UP_P is true, the result is round up rather than to zero when
4027 optimizing for speed. */
4029 adjust_setup_cost (struct ivopts_data
*data
, unsigned cost
,
4030 bool round_up_p
= false)
4034 else if (optimize_loop_for_speed_p (data
->current_loop
))
4036 HOST_WIDE_INT niters
= avg_loop_niter (data
->current_loop
);
4037 return ((HOST_WIDE_INT
) cost
+ (round_up_p
? niters
- 1 : 0)) / niters
;
4043 /* Calculate the SPEED or size cost of shiftadd EXPR in MODE. MULT is the
4044 EXPR operand holding the shift. COST0 and COST1 are the costs for
4045 calculating the operands of EXPR. Returns true if successful, and returns
4046 the cost in COST. */
4049 get_shiftadd_cost (tree expr
, scalar_int_mode mode
, comp_cost cost0
,
4050 comp_cost cost1
, tree mult
, bool speed
, comp_cost
*cost
)
4053 tree op1
= TREE_OPERAND (expr
, 1);
4054 tree cst
= TREE_OPERAND (mult
, 1);
4055 tree multop
= TREE_OPERAND (mult
, 0);
4056 int m
= exact_log2 (int_cst_value (cst
));
4057 int maxm
= MIN (BITS_PER_WORD
, GET_MODE_BITSIZE (mode
));
4058 int as_cost
, sa_cost
;
4061 if (!(m
>= 0 && m
< maxm
))
4065 mult_in_op1
= operand_equal_p (op1
, mult
, 0);
4067 as_cost
= add_cost (speed
, mode
) + shift_cost (speed
, mode
, m
);
4069 /* If the target has a cheap shift-and-add or shift-and-sub instruction,
4070 use that in preference to a shift insn followed by an add insn. */
4071 sa_cost
= (TREE_CODE (expr
) != MINUS_EXPR
4072 ? shiftadd_cost (speed
, mode
, m
)
4074 ? shiftsub1_cost (speed
, mode
, m
)
4075 : shiftsub0_cost (speed
, mode
, m
)));
4077 res
= comp_cost (MIN (as_cost
, sa_cost
), 0);
4078 res
+= (mult_in_op1
? cost0
: cost1
);
4080 STRIP_NOPS (multop
);
4081 if (!is_gimple_val (multop
))
4082 res
+= force_expr_to_var_cost (multop
, speed
);
4088 /* Estimates cost of forcing expression EXPR into a variable. */
4091 force_expr_to_var_cost (tree expr
, bool speed
)
4093 static bool costs_initialized
= false;
4094 static unsigned integer_cost
[2];
4095 static unsigned symbol_cost
[2];
4096 static unsigned address_cost
[2];
4098 comp_cost cost0
, cost1
, cost
;
4100 scalar_int_mode int_mode
;
4102 if (!costs_initialized
)
4104 tree type
= build_pointer_type (integer_type_node
);
4109 var
= create_tmp_var_raw (integer_type_node
, "test_var");
4110 TREE_STATIC (var
) = 1;
4111 x
= produce_memory_decl_rtl (var
, NULL
);
4112 SET_DECL_RTL (var
, x
);
4114 addr
= build1 (ADDR_EXPR
, type
, var
);
4117 for (i
= 0; i
< 2; i
++)
4119 integer_cost
[i
] = computation_cost (build_int_cst (integer_type_node
,
4122 symbol_cost
[i
] = computation_cost (addr
, i
) + 1;
4125 = computation_cost (fold_build_pointer_plus_hwi (addr
, 2000), i
) + 1;
4126 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4128 fprintf (dump_file
, "force_expr_to_var_cost %s costs:\n", i
? "speed" : "size");
4129 fprintf (dump_file
, " integer %d\n", (int) integer_cost
[i
]);
4130 fprintf (dump_file
, " symbol %d\n", (int) symbol_cost
[i
]);
4131 fprintf (dump_file
, " address %d\n", (int) address_cost
[i
]);
4132 fprintf (dump_file
, " other %d\n", (int) target_spill_cost
[i
]);
4133 fprintf (dump_file
, "\n");
4137 costs_initialized
= true;
4142 if (SSA_VAR_P (expr
))
4145 if (is_gimple_min_invariant (expr
))
4147 if (poly_int_tree_p (expr
))
4148 return comp_cost (integer_cost
[speed
], 0);
4150 if (TREE_CODE (expr
) == ADDR_EXPR
)
4152 tree obj
= TREE_OPERAND (expr
, 0);
4155 || TREE_CODE (obj
) == PARM_DECL
4156 || TREE_CODE (obj
) == RESULT_DECL
)
4157 return comp_cost (symbol_cost
[speed
], 0);
4160 return comp_cost (address_cost
[speed
], 0);
4163 switch (TREE_CODE (expr
))
4165 case POINTER_PLUS_EXPR
:
4169 case TRUNC_DIV_EXPR
:
4174 op0
= TREE_OPERAND (expr
, 0);
4175 op1
= TREE_OPERAND (expr
, 1);
4183 op0
= TREE_OPERAND (expr
, 0);
4189 /* Just an arbitrary value, FIXME. */
4190 return comp_cost (target_spill_cost
[speed
], 0);
4193 if (op0
== NULL_TREE
4194 || TREE_CODE (op0
) == SSA_NAME
|| CONSTANT_CLASS_P (op0
))
4197 cost0
= force_expr_to_var_cost (op0
, speed
);
4199 if (op1
== NULL_TREE
4200 || TREE_CODE (op1
) == SSA_NAME
|| CONSTANT_CLASS_P (op1
))
4203 cost1
= force_expr_to_var_cost (op1
, speed
);
4205 mode
= TYPE_MODE (TREE_TYPE (expr
));
4206 switch (TREE_CODE (expr
))
4208 case POINTER_PLUS_EXPR
:
4212 cost
= comp_cost (add_cost (speed
, mode
), 0);
4213 if (TREE_CODE (expr
) != NEGATE_EXPR
)
4215 tree mult
= NULL_TREE
;
4217 if (TREE_CODE (op1
) == MULT_EXPR
)
4219 else if (TREE_CODE (op0
) == MULT_EXPR
)
4222 if (mult
!= NULL_TREE
4223 && is_a
<scalar_int_mode
> (mode
, &int_mode
)
4224 && cst_and_fits_in_hwi (TREE_OPERAND (mult
, 1))
4225 && get_shiftadd_cost (expr
, int_mode
, cost0
, cost1
, mult
,
4233 tree inner_mode
, outer_mode
;
4234 outer_mode
= TREE_TYPE (expr
);
4235 inner_mode
= TREE_TYPE (op0
);
4236 cost
= comp_cost (convert_cost (TYPE_MODE (outer_mode
),
4237 TYPE_MODE (inner_mode
), speed
), 0);
4242 if (cst_and_fits_in_hwi (op0
))
4243 cost
= comp_cost (mult_by_coeff_cost (int_cst_value (op0
),
4245 else if (cst_and_fits_in_hwi (op1
))
4246 cost
= comp_cost (mult_by_coeff_cost (int_cst_value (op1
),
4249 return comp_cost (target_spill_cost
[speed
], 0);
4252 case TRUNC_DIV_EXPR
:
4253 /* Division by power of two is usually cheap, so we allow it. Forbid
4255 if (integer_pow2p (TREE_OPERAND (expr
, 1)))
4256 cost
= comp_cost (add_cost (speed
, mode
), 0);
4258 cost
= comp_cost (target_spill_cost
[speed
], 0);
4266 cost
= comp_cost (add_cost (speed
, mode
), 0);
4278 /* Estimates cost of forcing EXPR into a variable. INV_VARS is a set of the
4279 invariants the computation depends on. */
4282 force_var_cost (struct ivopts_data
*data
, tree expr
, bitmap
*inv_vars
)
4287 find_inv_vars (data
, &expr
, inv_vars
);
4288 return force_expr_to_var_cost (expr
, data
->speed
);
4291 /* Returns cost of auto-modifying address expression in shape base + offset.
4292 AINC_STEP is step size of the address IV. AINC_OFFSET is offset of the
4293 address expression. The address expression has ADDR_MODE in addr space
4294 AS. The memory access has MEM_MODE. SPEED means we are optimizing for
4299 AINC_PRE_INC
, /* Pre increment. */
4300 AINC_PRE_DEC
, /* Pre decrement. */
4301 AINC_POST_INC
, /* Post increment. */
4302 AINC_POST_DEC
, /* Post decrement. */
4303 AINC_NONE
/* Also the number of auto increment types. */
4306 struct ainc_cost_data
4308 unsigned costs
[AINC_NONE
];
4312 get_address_cost_ainc (poly_int64 ainc_step
, poly_int64 ainc_offset
,
4313 machine_mode addr_mode
, machine_mode mem_mode
,
4314 addr_space_t as
, bool speed
)
4316 if (!USE_LOAD_PRE_DECREMENT (mem_mode
)
4317 && !USE_STORE_PRE_DECREMENT (mem_mode
)
4318 && !USE_LOAD_POST_DECREMENT (mem_mode
)
4319 && !USE_STORE_POST_DECREMENT (mem_mode
)
4320 && !USE_LOAD_PRE_INCREMENT (mem_mode
)
4321 && !USE_STORE_PRE_INCREMENT (mem_mode
)
4322 && !USE_LOAD_POST_INCREMENT (mem_mode
)
4323 && !USE_STORE_POST_INCREMENT (mem_mode
))
4324 return infinite_cost
;
4326 static vec
<ainc_cost_data
*> ainc_cost_data_list
;
4327 unsigned idx
= (unsigned) as
* MAX_MACHINE_MODE
+ (unsigned) mem_mode
;
4328 if (idx
>= ainc_cost_data_list
.length ())
4330 unsigned nsize
= ((unsigned) as
+ 1) *MAX_MACHINE_MODE
;
4332 gcc_assert (nsize
> idx
);
4333 ainc_cost_data_list
.safe_grow_cleared (nsize
);
4336 ainc_cost_data
*data
= ainc_cost_data_list
[idx
];
4339 rtx reg
= gen_raw_REG (addr_mode
, LAST_VIRTUAL_REGISTER
+ 1);
4341 data
= (ainc_cost_data
*) xcalloc (1, sizeof (*data
));
4342 data
->costs
[AINC_PRE_DEC
] = INFTY
;
4343 data
->costs
[AINC_POST_DEC
] = INFTY
;
4344 data
->costs
[AINC_PRE_INC
] = INFTY
;
4345 data
->costs
[AINC_POST_INC
] = INFTY
;
4346 if (USE_LOAD_PRE_DECREMENT (mem_mode
)
4347 || USE_STORE_PRE_DECREMENT (mem_mode
))
4349 rtx addr
= gen_rtx_PRE_DEC (addr_mode
, reg
);
4351 if (memory_address_addr_space_p (mem_mode
, addr
, as
))
4352 data
->costs
[AINC_PRE_DEC
]
4353 = address_cost (addr
, mem_mode
, as
, speed
);
4355 if (USE_LOAD_POST_DECREMENT (mem_mode
)
4356 || USE_STORE_POST_DECREMENT (mem_mode
))
4358 rtx addr
= gen_rtx_POST_DEC (addr_mode
, reg
);
4360 if (memory_address_addr_space_p (mem_mode
, addr
, as
))
4361 data
->costs
[AINC_POST_DEC
]
4362 = address_cost (addr
, mem_mode
, as
, speed
);
4364 if (USE_LOAD_PRE_INCREMENT (mem_mode
)
4365 || USE_STORE_PRE_INCREMENT (mem_mode
))
4367 rtx addr
= gen_rtx_PRE_INC (addr_mode
, reg
);
4369 if (memory_address_addr_space_p (mem_mode
, addr
, as
))
4370 data
->costs
[AINC_PRE_INC
]
4371 = address_cost (addr
, mem_mode
, as
, speed
);
4373 if (USE_LOAD_POST_INCREMENT (mem_mode
)
4374 || USE_STORE_POST_INCREMENT (mem_mode
))
4376 rtx addr
= gen_rtx_POST_INC (addr_mode
, reg
);
4378 if (memory_address_addr_space_p (mem_mode
, addr
, as
))
4379 data
->costs
[AINC_POST_INC
]
4380 = address_cost (addr
, mem_mode
, as
, speed
);
4382 ainc_cost_data_list
[idx
] = data
;
4385 poly_int64 msize
= GET_MODE_SIZE (mem_mode
);
4386 if (known_eq (ainc_offset
, 0) && known_eq (msize
, ainc_step
))
4387 return comp_cost (data
->costs
[AINC_POST_INC
], 0);
4388 if (known_eq (ainc_offset
, 0) && known_eq (msize
, -ainc_step
))
4389 return comp_cost (data
->costs
[AINC_POST_DEC
], 0);
4390 if (known_eq (ainc_offset
, msize
) && known_eq (msize
, ainc_step
))
4391 return comp_cost (data
->costs
[AINC_PRE_INC
], 0);
4392 if (known_eq (ainc_offset
, -msize
) && known_eq (msize
, -ainc_step
))
4393 return comp_cost (data
->costs
[AINC_PRE_DEC
], 0);
4395 return infinite_cost
;
4398 /* Return cost of computing USE's address expression by using CAND.
4399 AFF_INV and AFF_VAR represent invariant and variant parts of the
4400 address expression, respectively. If AFF_INV is simple, store
4401 the loop invariant variables which are depended by it in INV_VARS;
4402 if AFF_INV is complicated, handle it as a new invariant expression
4403 and record it in INV_EXPR. RATIO indicates multiple times between
4404 steps of USE and CAND. If CAN_AUTOINC is nonNULL, store boolean
4405 value to it indicating if this is an auto-increment address. */
4408 get_address_cost (struct ivopts_data
*data
, struct iv_use
*use
,
4409 struct iv_cand
*cand
, aff_tree
*aff_inv
,
4410 aff_tree
*aff_var
, HOST_WIDE_INT ratio
,
4411 bitmap
*inv_vars
, iv_inv_expr_ent
**inv_expr
,
4412 bool *can_autoinc
, bool speed
)
4415 bool simple_inv
= true;
4416 tree comp_inv
= NULL_TREE
, type
= aff_var
->type
;
4417 comp_cost var_cost
= no_cost
, cost
= no_cost
;
4418 struct mem_address parts
= {NULL_TREE
, integer_one_node
,
4419 NULL_TREE
, NULL_TREE
, NULL_TREE
};
4420 machine_mode addr_mode
= TYPE_MODE (type
);
4421 machine_mode mem_mode
= TYPE_MODE (use
->mem_type
);
4422 addr_space_t as
= TYPE_ADDR_SPACE (TREE_TYPE (use
->iv
->base
));
4423 /* Only true if ratio != 1. */
4424 bool ok_with_ratio_p
= false;
4425 bool ok_without_ratio_p
= false;
4427 if (!aff_combination_const_p (aff_inv
))
4429 parts
.index
= integer_one_node
;
4430 /* Addressing mode "base + index". */
4431 ok_without_ratio_p
= valid_mem_ref_p (mem_mode
, as
, &parts
);
4434 parts
.step
= wide_int_to_tree (type
, ratio
);
4435 /* Addressing mode "base + index << scale". */
4436 ok_with_ratio_p
= valid_mem_ref_p (mem_mode
, as
, &parts
);
4437 if (!ok_with_ratio_p
)
4438 parts
.step
= NULL_TREE
;
4440 if (ok_with_ratio_p
|| ok_without_ratio_p
)
4442 if (maybe_ne (aff_inv
->offset
, 0))
4444 parts
.offset
= wide_int_to_tree (sizetype
, aff_inv
->offset
);
4445 /* Addressing mode "base + index [<< scale] + offset". */
4446 if (!valid_mem_ref_p (mem_mode
, as
, &parts
))
4447 parts
.offset
= NULL_TREE
;
4449 aff_inv
->offset
= 0;
4452 move_fixed_address_to_symbol (&parts
, aff_inv
);
4453 /* Base is fixed address and is moved to symbol part. */
4454 if (parts
.symbol
!= NULL_TREE
&& aff_combination_zero_p (aff_inv
))
4455 parts
.base
= NULL_TREE
;
4457 /* Addressing mode "symbol + base + index [<< scale] [+ offset]". */
4458 if (parts
.symbol
!= NULL_TREE
4459 && !valid_mem_ref_p (mem_mode
, as
, &parts
))
4461 aff_combination_add_elt (aff_inv
, parts
.symbol
, 1);
4462 parts
.symbol
= NULL_TREE
;
4463 /* Reset SIMPLE_INV since symbol address needs to be computed
4464 outside of address expression in this case. */
4466 /* Symbol part is moved back to base part, it can't be NULL. */
4467 parts
.base
= integer_one_node
;
4471 parts
.index
= NULL_TREE
;
4475 poly_int64 ainc_step
;
4478 && ptrdiff_tree_p (cand
->iv
->step
, &ainc_step
))
4480 poly_int64 ainc_offset
= (aff_inv
->offset
).force_shwi ();
4482 if (stmt_after_increment (data
->current_loop
, cand
, use
->stmt
))
4483 ainc_offset
+= ainc_step
;
4484 cost
= get_address_cost_ainc (ainc_step
, ainc_offset
,
4485 addr_mode
, mem_mode
, as
, speed
);
4486 if (!cost
.infinite_cost_p ())
4488 *can_autoinc
= true;
4493 if (!aff_combination_zero_p (aff_inv
))
4495 parts
.offset
= wide_int_to_tree (sizetype
, aff_inv
->offset
);
4496 /* Addressing mode "base + offset". */
4497 if (!valid_mem_ref_p (mem_mode
, as
, &parts
))
4498 parts
.offset
= NULL_TREE
;
4500 aff_inv
->offset
= 0;
4505 simple_inv
= (aff_inv
== NULL
4506 || aff_combination_const_p (aff_inv
)
4507 || aff_combination_singleton_var_p (aff_inv
));
4508 if (!aff_combination_zero_p (aff_inv
))
4509 comp_inv
= aff_combination_to_tree (aff_inv
);
4510 if (comp_inv
!= NULL_TREE
)
4511 cost
= force_var_cost (data
, comp_inv
, inv_vars
);
4512 if (ratio
!= 1 && parts
.step
== NULL_TREE
)
4513 var_cost
+= mult_by_coeff_cost (ratio
, addr_mode
, speed
);
4514 if (comp_inv
!= NULL_TREE
&& parts
.index
== NULL_TREE
)
4515 var_cost
+= add_cost (speed
, addr_mode
);
4517 if (comp_inv
&& inv_expr
&& !simple_inv
)
4519 *inv_expr
= get_loop_invariant_expr (data
, comp_inv
);
4520 /* Clear depends on. */
4521 if (*inv_expr
!= NULL
&& inv_vars
&& *inv_vars
)
4522 bitmap_clear (*inv_vars
);
4524 /* Cost of small invariant expression adjusted against loop niters
4525 is usually zero, which makes it difficult to be differentiated
4526 from candidate based on loop invariant variables. Secondly, the
4527 generated invariant expression may not be hoisted out of loop by
4528 following pass. We penalize the cost by rounding up in order to
4529 neutralize such effects. */
4530 cost
.cost
= adjust_setup_cost (data
, cost
.cost
, true);
4531 cost
.scratch
= cost
.cost
;
4535 addr
= addr_for_mem_ref (&parts
, as
, false);
4536 gcc_assert (memory_address_addr_space_p (mem_mode
, addr
, as
));
4537 cost
+= address_cost (addr
, mem_mode
, as
, speed
);
4539 if (parts
.symbol
!= NULL_TREE
)
4540 cost
.complexity
+= 1;
4541 /* Don't increase the complexity of adding a scaled index if it's
4542 the only kind of index that the target allows. */
4543 if (parts
.step
!= NULL_TREE
&& ok_without_ratio_p
)
4544 cost
.complexity
+= 1;
4545 if (parts
.base
!= NULL_TREE
&& parts
.index
!= NULL_TREE
)
4546 cost
.complexity
+= 1;
4547 if (parts
.offset
!= NULL_TREE
&& !integer_zerop (parts
.offset
))
4548 cost
.complexity
+= 1;
4553 /* Scale (multiply) the computed COST (except scratch part that should be
4554 hoisted out a loop) by header->frequency / AT->frequency, which makes
4555 expected cost more accurate. */
4558 get_scaled_computation_cost_at (ivopts_data
*data
, gimple
*at
, comp_cost cost
)
4561 && data
->current_loop
->header
->count
.to_frequency (cfun
) > 0)
4563 basic_block bb
= gimple_bb (at
);
4564 gcc_assert (cost
.scratch
<= cost
.cost
);
4565 int scale_factor
= (int)(intptr_t) bb
->aux
;
4566 if (scale_factor
== 1)
4570 = cost
.scratch
+ (cost
.cost
- cost
.scratch
) * scale_factor
;
4572 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
4573 fprintf (dump_file
, "Scaling cost based on bb prob "
4574 "by %2.2f: %d (scratch: %d) -> %d\n",
4575 1.0f
* scale_factor
, cost
.cost
, cost
.scratch
, scaled_cost
);
4577 cost
.cost
= scaled_cost
;
4583 /* Determines the cost of the computation by that USE is expressed
4584 from induction variable CAND. If ADDRESS_P is true, we just need
4585 to create an address from it, otherwise we want to get it into
4586 register. A set of invariants we depend on is stored in INV_VARS.
4587 If CAN_AUTOINC is nonnull, use it to record whether autoinc
4588 addressing is likely. If INV_EXPR is nonnull, record invariant
4589 expr entry in it. */
4592 get_computation_cost (struct ivopts_data
*data
, struct iv_use
*use
,
4593 struct iv_cand
*cand
, bool address_p
, bitmap
*inv_vars
,
4594 bool *can_autoinc
, iv_inv_expr_ent
**inv_expr
)
4596 gimple
*at
= use
->stmt
;
4597 tree ubase
= use
->iv
->base
, cbase
= cand
->iv
->base
;
4598 tree utype
= TREE_TYPE (ubase
), ctype
= TREE_TYPE (cbase
);
4599 tree comp_inv
= NULL_TREE
;
4600 HOST_WIDE_INT ratio
, aratio
;
4603 aff_tree aff_inv
, aff_var
;
4604 bool speed
= optimize_bb_for_speed_p (gimple_bb (at
));
4609 *can_autoinc
= false;
4613 /* Check if we have enough precision to express the values of use. */
4614 if (TYPE_PRECISION (utype
) > TYPE_PRECISION (ctype
))
4615 return infinite_cost
;
4618 || (use
->iv
->base_object
4619 && cand
->iv
->base_object
4620 && POINTER_TYPE_P (TREE_TYPE (use
->iv
->base_object
))
4621 && POINTER_TYPE_P (TREE_TYPE (cand
->iv
->base_object
))))
4623 /* Do not try to express address of an object with computation based
4624 on address of a different object. This may cause problems in rtl
4625 level alias analysis (that does not expect this to be happening,
4626 as this is illegal in C), and would be unlikely to be useful
4628 if (use
->iv
->base_object
4629 && cand
->iv
->base_object
4630 && !operand_equal_p (use
->iv
->base_object
, cand
->iv
->base_object
, 0))
4631 return infinite_cost
;
4634 if (!get_computation_aff_1 (data
->current_loop
, at
, use
,
4635 cand
, &aff_inv
, &aff_var
, &rat
)
4636 || !wi::fits_shwi_p (rat
))
4637 return infinite_cost
;
4639 ratio
= rat
.to_shwi ();
4642 cost
= get_address_cost (data
, use
, cand
, &aff_inv
, &aff_var
, ratio
,
4643 inv_vars
, inv_expr
, can_autoinc
, speed
);
4644 return get_scaled_computation_cost_at (data
, at
, cost
);
4647 bool simple_inv
= (aff_combination_const_p (&aff_inv
)
4648 || aff_combination_singleton_var_p (&aff_inv
));
4649 tree signed_type
= signed_type_for (aff_combination_type (&aff_inv
));
4650 aff_combination_convert (&aff_inv
, signed_type
);
4651 if (!aff_combination_zero_p (&aff_inv
))
4652 comp_inv
= aff_combination_to_tree (&aff_inv
);
4654 cost
= force_var_cost (data
, comp_inv
, inv_vars
);
4655 if (comp_inv
&& inv_expr
&& !simple_inv
)
4657 *inv_expr
= get_loop_invariant_expr (data
, comp_inv
);
4658 /* Clear depends on. */
4659 if (*inv_expr
!= NULL
&& inv_vars
&& *inv_vars
)
4660 bitmap_clear (*inv_vars
);
4662 cost
.cost
= adjust_setup_cost (data
, cost
.cost
);
4663 /* Record setup cost in scratch field. */
4664 cost
.scratch
= cost
.cost
;
4666 /* Cost of constant integer can be covered when adding invariant part to
4668 else if (comp_inv
&& CONSTANT_CLASS_P (comp_inv
))
4671 /* Need type narrowing to represent use with cand. */
4672 if (TYPE_PRECISION (utype
) < TYPE_PRECISION (ctype
))
4674 machine_mode outer_mode
= TYPE_MODE (utype
);
4675 machine_mode inner_mode
= TYPE_MODE (ctype
);
4676 cost
+= comp_cost (convert_cost (outer_mode
, inner_mode
, speed
), 0);
4679 /* Turn a + i * (-c) into a - i * c. */
4680 if (ratio
< 0 && comp_inv
&& !integer_zerop (comp_inv
))
4686 cost
+= mult_by_coeff_cost (aratio
, TYPE_MODE (utype
), speed
);
4688 /* TODO: We may also need to check if we can compute a + i * 4 in one
4690 /* Need to add up the invariant and variant parts. */
4691 if (comp_inv
&& !integer_zerop (comp_inv
))
4692 cost
+= add_cost (speed
, TYPE_MODE (utype
));
4694 return get_scaled_computation_cost_at (data
, at
, cost
);
4697 /* Determines cost of computing the use in GROUP with CAND in a generic
4701 determine_group_iv_cost_generic (struct ivopts_data
*data
,
4702 struct iv_group
*group
, struct iv_cand
*cand
)
4705 iv_inv_expr_ent
*inv_expr
= NULL
;
4706 bitmap inv_vars
= NULL
, inv_exprs
= NULL
;
4707 struct iv_use
*use
= group
->vuses
[0];
4709 /* The simple case first -- if we need to express value of the preserved
4710 original biv, the cost is 0. This also prevents us from counting the
4711 cost of increment twice -- once at this use and once in the cost of
4713 if (cand
->pos
== IP_ORIGINAL
&& cand
->incremented_at
== use
->stmt
)
4716 cost
= get_computation_cost (data
, use
, cand
, false,
4717 &inv_vars
, NULL
, &inv_expr
);
4721 inv_exprs
= BITMAP_ALLOC (NULL
);
4722 bitmap_set_bit (inv_exprs
, inv_expr
->id
);
4724 set_group_iv_cost (data
, group
, cand
, cost
, inv_vars
,
4725 NULL_TREE
, ERROR_MARK
, inv_exprs
);
4726 return !cost
.infinite_cost_p ();
4729 /* Determines cost of computing uses in GROUP with CAND in addresses. */
4732 determine_group_iv_cost_address (struct ivopts_data
*data
,
4733 struct iv_group
*group
, struct iv_cand
*cand
)
4736 bitmap inv_vars
= NULL
, inv_exprs
= NULL
;
4738 iv_inv_expr_ent
*inv_expr
= NULL
;
4739 struct iv_use
*use
= group
->vuses
[0];
4740 comp_cost sum_cost
= no_cost
, cost
;
4742 cost
= get_computation_cost (data
, use
, cand
, true,
4743 &inv_vars
, &can_autoinc
, &inv_expr
);
4747 inv_exprs
= BITMAP_ALLOC (NULL
);
4748 bitmap_set_bit (inv_exprs
, inv_expr
->id
);
4751 if (!sum_cost
.infinite_cost_p () && cand
->ainc_use
== use
)
4754 sum_cost
-= cand
->cost_step
;
4755 /* If we generated the candidate solely for exploiting autoincrement
4756 opportunities, and it turns out it can't be used, set the cost to
4757 infinity to make sure we ignore it. */
4758 else if (cand
->pos
== IP_AFTER_USE
|| cand
->pos
== IP_BEFORE_USE
)
4759 sum_cost
= infinite_cost
;
4762 /* Uses in a group can share setup code, so only add setup cost once. */
4763 cost
-= cost
.scratch
;
4764 /* Compute and add costs for rest uses of this group. */
4765 for (i
= 1; i
< group
->vuses
.length () && !sum_cost
.infinite_cost_p (); i
++)
4767 struct iv_use
*next
= group
->vuses
[i
];
4769 /* TODO: We could skip computing cost for sub iv_use when it has the
4770 same cost as the first iv_use, but the cost really depends on the
4771 offset and where the iv_use is. */
4772 cost
= get_computation_cost (data
, next
, cand
, true,
4773 NULL
, &can_autoinc
, &inv_expr
);
4777 inv_exprs
= BITMAP_ALLOC (NULL
);
4779 bitmap_set_bit (inv_exprs
, inv_expr
->id
);
4783 set_group_iv_cost (data
, group
, cand
, sum_cost
, inv_vars
,
4784 NULL_TREE
, ERROR_MARK
, inv_exprs
);
4786 return !sum_cost
.infinite_cost_p ();
4789 /* Computes value of candidate CAND at position AT in iteration NITER, and
4790 stores it to VAL. */
4793 cand_value_at (struct loop
*loop
, struct iv_cand
*cand
, gimple
*at
, tree niter
,
4796 aff_tree step
, delta
, nit
;
4797 struct iv
*iv
= cand
->iv
;
4798 tree type
= TREE_TYPE (iv
->base
);
4800 if (POINTER_TYPE_P (type
))
4801 steptype
= sizetype
;
4803 steptype
= unsigned_type_for (type
);
4805 tree_to_aff_combination (iv
->step
, TREE_TYPE (iv
->step
), &step
);
4806 aff_combination_convert (&step
, steptype
);
4807 tree_to_aff_combination (niter
, TREE_TYPE (niter
), &nit
);
4808 aff_combination_convert (&nit
, steptype
);
4809 aff_combination_mult (&nit
, &step
, &delta
);
4810 if (stmt_after_increment (loop
, cand
, at
))
4811 aff_combination_add (&delta
, &step
);
4813 tree_to_aff_combination (iv
->base
, type
, val
);
4814 if (!POINTER_TYPE_P (type
))
4815 aff_combination_convert (val
, steptype
);
4816 aff_combination_add (val
, &delta
);
4819 /* Returns period of induction variable iv. */
4822 iv_period (struct iv
*iv
)
4824 tree step
= iv
->step
, period
, type
;
4827 gcc_assert (step
&& TREE_CODE (step
) == INTEGER_CST
);
4829 type
= unsigned_type_for (TREE_TYPE (step
));
4830 /* Period of the iv is lcm (step, type_range)/step -1,
4831 i.e., N*type_range/step - 1. Since type range is power
4832 of two, N == (step >> num_of_ending_zeros_binary (step),
4833 so the final result is
4835 (type_range >> num_of_ending_zeros_binary (step)) - 1
4838 pow2div
= num_ending_zeros (step
);
4840 period
= build_low_bits_mask (type
,
4841 (TYPE_PRECISION (type
)
4842 - tree_to_uhwi (pow2div
)));
4847 /* Returns the comparison operator used when eliminating the iv USE. */
4849 static enum tree_code
4850 iv_elimination_compare (struct ivopts_data
*data
, struct iv_use
*use
)
4852 struct loop
*loop
= data
->current_loop
;
4856 ex_bb
= gimple_bb (use
->stmt
);
4857 exit
= EDGE_SUCC (ex_bb
, 0);
4858 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
4859 exit
= EDGE_SUCC (ex_bb
, 1);
4861 return (exit
->flags
& EDGE_TRUE_VALUE
? EQ_EXPR
: NE_EXPR
);
4864 /* Returns true if we can prove that BASE - OFFSET does not overflow. For now,
4865 we only detect the situation that BASE = SOMETHING + OFFSET, where the
4866 calculation is performed in non-wrapping type.
4868 TODO: More generally, we could test for the situation that
4869 BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
4870 This would require knowing the sign of OFFSET. */
4873 difference_cannot_overflow_p (struct ivopts_data
*data
, tree base
, tree offset
)
4875 enum tree_code code
;
4877 aff_tree aff_e1
, aff_e2
, aff_offset
;
4879 if (!nowrap_type_p (TREE_TYPE (base
)))
4882 base
= expand_simple_operations (base
);
4884 if (TREE_CODE (base
) == SSA_NAME
)
4886 gimple
*stmt
= SSA_NAME_DEF_STMT (base
);
4888 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
4891 code
= gimple_assign_rhs_code (stmt
);
4892 if (get_gimple_rhs_class (code
) != GIMPLE_BINARY_RHS
)
4895 e1
= gimple_assign_rhs1 (stmt
);
4896 e2
= gimple_assign_rhs2 (stmt
);
4900 code
= TREE_CODE (base
);
4901 if (get_gimple_rhs_class (code
) != GIMPLE_BINARY_RHS
)
4903 e1
= TREE_OPERAND (base
, 0);
4904 e2
= TREE_OPERAND (base
, 1);
4907 /* Use affine expansion as deeper inspection to prove the equality. */
4908 tree_to_aff_combination_expand (e2
, TREE_TYPE (e2
),
4909 &aff_e2
, &data
->name_expansion_cache
);
4910 tree_to_aff_combination_expand (offset
, TREE_TYPE (offset
),
4911 &aff_offset
, &data
->name_expansion_cache
);
4912 aff_combination_scale (&aff_offset
, -1);
4916 aff_combination_add (&aff_e2
, &aff_offset
);
4917 if (aff_combination_zero_p (&aff_e2
))
4920 tree_to_aff_combination_expand (e1
, TREE_TYPE (e1
),
4921 &aff_e1
, &data
->name_expansion_cache
);
4922 aff_combination_add (&aff_e1
, &aff_offset
);
4923 return aff_combination_zero_p (&aff_e1
);
4925 case POINTER_PLUS_EXPR
:
4926 aff_combination_add (&aff_e2
, &aff_offset
);
4927 return aff_combination_zero_p (&aff_e2
);
4934 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
4935 comparison with CAND. NITER describes the number of iterations of
4936 the loops. If successful, the comparison in COMP_P is altered accordingly.
4938 We aim to handle the following situation:
4954 Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
4955 We aim to optimize this to
4963 while (p < p_0 - a + b);
4965 This preserves the correctness, since the pointer arithmetics does not
4966 overflow. More precisely:
4968 1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
4969 overflow in computing it or the values of p.
4970 2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
4971 overflow. To prove this, we use the fact that p_0 = base + a. */
4974 iv_elimination_compare_lt (struct ivopts_data
*data
,
4975 struct iv_cand
*cand
, enum tree_code
*comp_p
,
4976 struct tree_niter_desc
*niter
)
4978 tree cand_type
, a
, b
, mbz
, nit_type
= TREE_TYPE (niter
->niter
), offset
;
4979 struct aff_tree nit
, tmpa
, tmpb
;
4980 enum tree_code comp
;
4983 /* We need to know that the candidate induction variable does not overflow.
4984 While more complex analysis may be used to prove this, for now just
4985 check that the variable appears in the original program and that it
4986 is computed in a type that guarantees no overflows. */
4987 cand_type
= TREE_TYPE (cand
->iv
->base
);
4988 if (cand
->pos
!= IP_ORIGINAL
|| !nowrap_type_p (cand_type
))
4991 /* Make sure that the loop iterates till the loop bound is hit, as otherwise
4992 the calculation of the BOUND could overflow, making the comparison
4994 if (!data
->loop_single_exit_p
)
4997 /* We need to be able to decide whether candidate is increasing or decreasing
4998 in order to choose the right comparison operator. */
4999 if (!cst_and_fits_in_hwi (cand
->iv
->step
))
5001 step
= int_cst_value (cand
->iv
->step
);
5003 /* Check that the number of iterations matches the expected pattern:
5004 a + 1 > b ? 0 : b - a - 1. */
5005 mbz
= niter
->may_be_zero
;
5006 if (TREE_CODE (mbz
) == GT_EXPR
)
5008 /* Handle a + 1 > b. */
5009 tree op0
= TREE_OPERAND (mbz
, 0);
5010 if (TREE_CODE (op0
) == PLUS_EXPR
&& integer_onep (TREE_OPERAND (op0
, 1)))
5012 a
= TREE_OPERAND (op0
, 0);
5013 b
= TREE_OPERAND (mbz
, 1);
5018 else if (TREE_CODE (mbz
) == LT_EXPR
)
5020 tree op1
= TREE_OPERAND (mbz
, 1);
5022 /* Handle b < a + 1. */
5023 if (TREE_CODE (op1
) == PLUS_EXPR
&& integer_onep (TREE_OPERAND (op1
, 1)))
5025 a
= TREE_OPERAND (op1
, 0);
5026 b
= TREE_OPERAND (mbz
, 0);
5034 /* Expected number of iterations is B - A - 1. Check that it matches
5035 the actual number, i.e., that B - A - NITER = 1. */
5036 tree_to_aff_combination (niter
->niter
, nit_type
, &nit
);
5037 tree_to_aff_combination (fold_convert (nit_type
, a
), nit_type
, &tmpa
);
5038 tree_to_aff_combination (fold_convert (nit_type
, b
), nit_type
, &tmpb
);
5039 aff_combination_scale (&nit
, -1);
5040 aff_combination_scale (&tmpa
, -1);
5041 aff_combination_add (&tmpb
, &tmpa
);
5042 aff_combination_add (&tmpb
, &nit
);
5043 if (tmpb
.n
!= 0 || maybe_ne (tmpb
.offset
, 1))
5046 /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
5048 offset
= fold_build2 (MULT_EXPR
, TREE_TYPE (cand
->iv
->step
),
5050 fold_convert (TREE_TYPE (cand
->iv
->step
), a
));
5051 if (!difference_cannot_overflow_p (data
, cand
->iv
->base
, offset
))
5054 /* Determine the new comparison operator. */
5055 comp
= step
< 0 ? GT_EXPR
: LT_EXPR
;
5056 if (*comp_p
== NE_EXPR
)
5058 else if (*comp_p
== EQ_EXPR
)
5059 *comp_p
= invert_tree_comparison (comp
, false);
5066 /* Check whether it is possible to express the condition in USE by comparison
5067 of candidate CAND. If so, store the value compared with to BOUND, and the
5068 comparison operator to COMP. */
5071 may_eliminate_iv (struct ivopts_data
*data
,
5072 struct iv_use
*use
, struct iv_cand
*cand
, tree
*bound
,
5073 enum tree_code
*comp
)
5078 struct loop
*loop
= data
->current_loop
;
5080 struct tree_niter_desc
*desc
= NULL
;
5082 if (TREE_CODE (cand
->iv
->step
) != INTEGER_CST
)
5085 /* For now works only for exits that dominate the loop latch.
5086 TODO: extend to other conditions inside loop body. */
5087 ex_bb
= gimple_bb (use
->stmt
);
5088 if (use
->stmt
!= last_stmt (ex_bb
)
5089 || gimple_code (use
->stmt
) != GIMPLE_COND
5090 || !dominated_by_p (CDI_DOMINATORS
, loop
->latch
, ex_bb
))
5093 exit
= EDGE_SUCC (ex_bb
, 0);
5094 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
5095 exit
= EDGE_SUCC (ex_bb
, 1);
5096 if (flow_bb_inside_loop_p (loop
, exit
->dest
))
5099 desc
= niter_for_exit (data
, exit
);
5103 /* Determine whether we can use the variable to test the exit condition.
5104 This is the case iff the period of the induction variable is greater
5105 than the number of iterations for which the exit condition is true. */
5106 period
= iv_period (cand
->iv
);
5108 /* If the number of iterations is constant, compare against it directly. */
5109 if (TREE_CODE (desc
->niter
) == INTEGER_CST
)
5111 /* See cand_value_at. */
5112 if (stmt_after_increment (loop
, cand
, use
->stmt
))
5114 if (!tree_int_cst_lt (desc
->niter
, period
))
5119 if (tree_int_cst_lt (period
, desc
->niter
))
5124 /* If not, and if this is the only possible exit of the loop, see whether
5125 we can get a conservative estimate on the number of iterations of the
5126 entire loop and compare against that instead. */
5129 widest_int period_value
, max_niter
;
5131 max_niter
= desc
->max
;
5132 if (stmt_after_increment (loop
, cand
, use
->stmt
))
5134 period_value
= wi::to_widest (period
);
5135 if (wi::gtu_p (max_niter
, period_value
))
5137 /* See if we can take advantage of inferred loop bound
5139 if (data
->loop_single_exit_p
)
5141 if (!max_loop_iterations (loop
, &max_niter
))
5143 /* The loop bound is already adjusted by adding 1. */
5144 if (wi::gtu_p (max_niter
, period_value
))
5152 cand_value_at (loop
, cand
, use
->stmt
, desc
->niter
, &bnd
);
5154 *bound
= fold_convert (TREE_TYPE (cand
->iv
->base
),
5155 aff_combination_to_tree (&bnd
));
5156 *comp
= iv_elimination_compare (data
, use
);
5158 /* It is unlikely that computing the number of iterations using division
5159 would be more profitable than keeping the original induction variable. */
5160 if (expression_expensive_p (*bound
))
5163 /* Sometimes, it is possible to handle the situation that the number of
5164 iterations may be zero unless additional assumptions by using <
5165 instead of != in the exit condition.
5167 TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
5168 base the exit condition on it. However, that is often too
5170 if (!integer_zerop (desc
->may_be_zero
))
5171 return iv_elimination_compare_lt (data
, cand
, comp
, desc
);
5176 /* Calculates the cost of BOUND, if it is a PARM_DECL. A PARM_DECL must
5177 be copied, if it is used in the loop body and DATA->body_includes_call. */
5180 parm_decl_cost (struct ivopts_data
*data
, tree bound
)
5182 tree sbound
= bound
;
5183 STRIP_NOPS (sbound
);
5185 if (TREE_CODE (sbound
) == SSA_NAME
5186 && SSA_NAME_IS_DEFAULT_DEF (sbound
)
5187 && TREE_CODE (SSA_NAME_VAR (sbound
)) == PARM_DECL
5188 && data
->body_includes_call
)
5189 return COSTS_N_INSNS (1);
5194 /* Determines cost of computing the use in GROUP with CAND in a condition. */
5197 determine_group_iv_cost_cond (struct ivopts_data
*data
,
5198 struct iv_group
*group
, struct iv_cand
*cand
)
5200 tree bound
= NULL_TREE
;
5202 bitmap inv_exprs
= NULL
;
5203 bitmap inv_vars_elim
= NULL
, inv_vars_express
= NULL
, inv_vars
;
5204 comp_cost elim_cost
= infinite_cost
, express_cost
, cost
, bound_cost
;
5205 enum comp_iv_rewrite rewrite_type
;
5206 iv_inv_expr_ent
*inv_expr_elim
= NULL
, *inv_expr_express
= NULL
, *inv_expr
;
5207 tree
*control_var
, *bound_cst
;
5208 enum tree_code comp
= ERROR_MARK
;
5209 struct iv_use
*use
= group
->vuses
[0];
5211 /* Extract condition operands. */
5212 rewrite_type
= extract_cond_operands (data
, use
->stmt
, &control_var
,
5213 &bound_cst
, NULL
, &cmp_iv
);
5214 gcc_assert (rewrite_type
!= COMP_IV_NA
);
5216 /* Try iv elimination. */
5217 if (rewrite_type
== COMP_IV_ELIM
5218 && may_eliminate_iv (data
, use
, cand
, &bound
, &comp
))
5220 elim_cost
= force_var_cost (data
, bound
, &inv_vars_elim
);
5221 if (elim_cost
.cost
== 0)
5222 elim_cost
.cost
= parm_decl_cost (data
, bound
);
5223 else if (TREE_CODE (bound
) == INTEGER_CST
)
5225 /* If we replace a loop condition 'i < n' with 'p < base + n',
5226 inv_vars_elim will have 'base' and 'n' set, which implies that both
5227 'base' and 'n' will be live during the loop. More likely,
5228 'base + n' will be loop invariant, resulting in only one live value
5229 during the loop. So in that case we clear inv_vars_elim and set
5230 inv_expr_elim instead. */
5231 if (inv_vars_elim
&& bitmap_count_bits (inv_vars_elim
) > 1)
5233 inv_expr_elim
= get_loop_invariant_expr (data
, bound
);
5234 bitmap_clear (inv_vars_elim
);
5236 /* The bound is a loop invariant, so it will be only computed
5238 elim_cost
.cost
= adjust_setup_cost (data
, elim_cost
.cost
);
5241 /* When the condition is a comparison of the candidate IV against
5242 zero, prefer this IV.
5244 TODO: The constant that we're subtracting from the cost should
5245 be target-dependent. This information should be added to the
5246 target costs for each backend. */
5247 if (!elim_cost
.infinite_cost_p () /* Do not try to decrease infinite! */
5248 && integer_zerop (*bound_cst
)
5249 && (operand_equal_p (*control_var
, cand
->var_after
, 0)
5250 || operand_equal_p (*control_var
, cand
->var_before
, 0)))
5253 express_cost
= get_computation_cost (data
, use
, cand
, false,
5254 &inv_vars_express
, NULL
,
5257 find_inv_vars (data
, &cmp_iv
->base
, &inv_vars_express
);
5259 /* Count the cost of the original bound as well. */
5260 bound_cost
= force_var_cost (data
, *bound_cst
, NULL
);
5261 if (bound_cost
.cost
== 0)
5262 bound_cost
.cost
= parm_decl_cost (data
, *bound_cst
);
5263 else if (TREE_CODE (*bound_cst
) == INTEGER_CST
)
5264 bound_cost
.cost
= 0;
5265 express_cost
+= bound_cost
;
5267 /* Choose the better approach, preferring the eliminated IV. */
5268 if (elim_cost
<= express_cost
)
5271 inv_vars
= inv_vars_elim
;
5272 inv_vars_elim
= NULL
;
5273 inv_expr
= inv_expr_elim
;
5277 cost
= express_cost
;
5278 inv_vars
= inv_vars_express
;
5279 inv_vars_express
= NULL
;
5282 inv_expr
= inv_expr_express
;
5287 inv_exprs
= BITMAP_ALLOC (NULL
);
5288 bitmap_set_bit (inv_exprs
, inv_expr
->id
);
5290 set_group_iv_cost (data
, group
, cand
, cost
,
5291 inv_vars
, bound
, comp
, inv_exprs
);
5294 BITMAP_FREE (inv_vars_elim
);
5295 if (inv_vars_express
)
5296 BITMAP_FREE (inv_vars_express
);
5298 return !cost
.infinite_cost_p ();
5301 /* Determines cost of computing uses in GROUP with CAND. Returns false
5302 if USE cannot be represented with CAND. */
5305 determine_group_iv_cost (struct ivopts_data
*data
,
5306 struct iv_group
*group
, struct iv_cand
*cand
)
5308 switch (group
->type
)
5310 case USE_NONLINEAR_EXPR
:
5311 return determine_group_iv_cost_generic (data
, group
, cand
);
5313 case USE_REF_ADDRESS
:
5314 case USE_PTR_ADDRESS
:
5315 return determine_group_iv_cost_address (data
, group
, cand
);
5318 return determine_group_iv_cost_cond (data
, group
, cand
);
5325 /* Return true if get_computation_cost indicates that autoincrement is
5326 a possibility for the pair of USE and CAND, false otherwise. */
5329 autoinc_possible_for_pair (struct ivopts_data
*data
, struct iv_use
*use
,
5330 struct iv_cand
*cand
)
5332 if (!address_p (use
->type
))
5335 bool can_autoinc
= false;
5336 get_computation_cost (data
, use
, cand
, true, NULL
, &can_autoinc
, NULL
);
5340 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
5341 use that allows autoincrement, and set their AINC_USE if possible. */
5344 set_autoinc_for_original_candidates (struct ivopts_data
*data
)
5348 for (i
= 0; i
< data
->vcands
.length (); i
++)
5350 struct iv_cand
*cand
= data
->vcands
[i
];
5351 struct iv_use
*closest_before
= NULL
;
5352 struct iv_use
*closest_after
= NULL
;
5353 if (cand
->pos
!= IP_ORIGINAL
)
5356 for (j
= 0; j
< data
->vgroups
.length (); j
++)
5358 struct iv_group
*group
= data
->vgroups
[j
];
5359 struct iv_use
*use
= group
->vuses
[0];
5360 unsigned uid
= gimple_uid (use
->stmt
);
5362 if (gimple_bb (use
->stmt
) != gimple_bb (cand
->incremented_at
))
5365 if (uid
< gimple_uid (cand
->incremented_at
)
5366 && (closest_before
== NULL
5367 || uid
> gimple_uid (closest_before
->stmt
)))
5368 closest_before
= use
;
5370 if (uid
> gimple_uid (cand
->incremented_at
)
5371 && (closest_after
== NULL
5372 || uid
< gimple_uid (closest_after
->stmt
)))
5373 closest_after
= use
;
5376 if (closest_before
!= NULL
5377 && autoinc_possible_for_pair (data
, closest_before
, cand
))
5378 cand
->ainc_use
= closest_before
;
5379 else if (closest_after
!= NULL
5380 && autoinc_possible_for_pair (data
, closest_after
, cand
))
5381 cand
->ainc_use
= closest_after
;
5385 /* Relate compare use with all candidates. */
5388 relate_compare_use_with_all_cands (struct ivopts_data
*data
)
5390 unsigned i
, count
= data
->vcands
.length ();
5391 for (i
= 0; i
< data
->vgroups
.length (); i
++)
5393 struct iv_group
*group
= data
->vgroups
[i
];
5395 if (group
->type
== USE_COMPARE
)
5396 bitmap_set_range (group
->related_cands
, 0, count
);
5400 /* Finds the candidates for the induction variables. */
5403 find_iv_candidates (struct ivopts_data
*data
)
5405 /* Add commonly used ivs. */
5406 add_standard_iv_candidates (data
);
5408 /* Add old induction variables. */
5409 add_iv_candidate_for_bivs (data
);
5411 /* Add induction variables derived from uses. */
5412 add_iv_candidate_for_groups (data
);
5414 set_autoinc_for_original_candidates (data
);
5416 /* Record the important candidates. */
5417 record_important_candidates (data
);
5419 /* Relate compare iv_use with all candidates. */
5420 if (!data
->consider_all_candidates
)
5421 relate_compare_use_with_all_cands (data
);
5423 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5427 fprintf (dump_file
, "\n<Important Candidates>:\t");
5428 for (i
= 0; i
< data
->vcands
.length (); i
++)
5429 if (data
->vcands
[i
]->important
)
5430 fprintf (dump_file
, " %d,", data
->vcands
[i
]->id
);
5431 fprintf (dump_file
, "\n");
5433 fprintf (dump_file
, "\n<Group, Cand> Related:\n");
5434 for (i
= 0; i
< data
->vgroups
.length (); i
++)
5436 struct iv_group
*group
= data
->vgroups
[i
];
5438 if (group
->related_cands
)
5440 fprintf (dump_file
, " Group %d:\t", group
->id
);
5441 dump_bitmap (dump_file
, group
->related_cands
);
5444 fprintf (dump_file
, "\n");
5448 /* Determines costs of computing use of iv with an iv candidate. */
5451 determine_group_iv_costs (struct ivopts_data
*data
)
5454 struct iv_cand
*cand
;
5455 struct iv_group
*group
;
5456 bitmap to_clear
= BITMAP_ALLOC (NULL
);
5458 alloc_use_cost_map (data
);
5460 for (i
= 0; i
< data
->vgroups
.length (); i
++)
5462 group
= data
->vgroups
[i
];
5464 if (data
->consider_all_candidates
)
5466 for (j
= 0; j
< data
->vcands
.length (); j
++)
5468 cand
= data
->vcands
[j
];
5469 determine_group_iv_cost (data
, group
, cand
);
5476 EXECUTE_IF_SET_IN_BITMAP (group
->related_cands
, 0, j
, bi
)
5478 cand
= data
->vcands
[j
];
5479 if (!determine_group_iv_cost (data
, group
, cand
))
5480 bitmap_set_bit (to_clear
, j
);
5483 /* Remove the candidates for that the cost is infinite from
5484 the list of related candidates. */
5485 bitmap_and_compl_into (group
->related_cands
, to_clear
);
5486 bitmap_clear (to_clear
);
5490 BITMAP_FREE (to_clear
);
5492 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5496 /* Dump invariant variables. */
5497 fprintf (dump_file
, "\n<Invariant Vars>:\n");
5498 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
5500 struct version_info
*info
= ver_info (data
, i
);
5503 fprintf (dump_file
, "Inv %d:\t", info
->inv_id
);
5504 print_generic_expr (dump_file
, info
->name
, TDF_SLIM
);
5505 fprintf (dump_file
, "%s\n",
5506 info
->has_nonlin_use
? "" : "\t(eliminable)");
5510 /* Dump invariant expressions. */
5511 fprintf (dump_file
, "\n<Invariant Expressions>:\n");
5512 auto_vec
<iv_inv_expr_ent
*> list (data
->inv_expr_tab
->elements ());
5514 for (hash_table
<iv_inv_expr_hasher
>::iterator it
5515 = data
->inv_expr_tab
->begin (); it
!= data
->inv_expr_tab
->end ();
5517 list
.safe_push (*it
);
5519 list
.qsort (sort_iv_inv_expr_ent
);
5521 for (i
= 0; i
< list
.length (); ++i
)
5523 fprintf (dump_file
, "inv_expr %d: \t", list
[i
]->id
);
5524 print_generic_expr (dump_file
, list
[i
]->expr
, TDF_SLIM
);
5525 fprintf (dump_file
, "\n");
5528 fprintf (dump_file
, "\n<Group-candidate Costs>:\n");
5530 for (i
= 0; i
< data
->vgroups
.length (); i
++)
5532 group
= data
->vgroups
[i
];
5534 fprintf (dump_file
, "Group %d:\n", i
);
5535 fprintf (dump_file
, " cand\tcost\tcompl.\tinv.expr.\tinv.vars\n");
5536 for (j
= 0; j
< group
->n_map_members
; j
++)
5538 if (!group
->cost_map
[j
].cand
5539 || group
->cost_map
[j
].cost
.infinite_cost_p ())
5542 fprintf (dump_file
, " %d\t%d\t%d\t",
5543 group
->cost_map
[j
].cand
->id
,
5544 group
->cost_map
[j
].cost
.cost
,
5545 group
->cost_map
[j
].cost
.complexity
);
5546 if (!group
->cost_map
[j
].inv_exprs
5547 || bitmap_empty_p (group
->cost_map
[j
].inv_exprs
))
5548 fprintf (dump_file
, "NIL;\t");
5550 bitmap_print (dump_file
,
5551 group
->cost_map
[j
].inv_exprs
, "", ";\t");
5552 if (!group
->cost_map
[j
].inv_vars
5553 || bitmap_empty_p (group
->cost_map
[j
].inv_vars
))
5554 fprintf (dump_file
, "NIL;\n");
5556 bitmap_print (dump_file
,
5557 group
->cost_map
[j
].inv_vars
, "", "\n");
5560 fprintf (dump_file
, "\n");
5562 fprintf (dump_file
, "\n");
5566 /* Determines cost of the candidate CAND. */
5569 determine_iv_cost (struct ivopts_data
*data
, struct iv_cand
*cand
)
5571 comp_cost cost_base
;
5572 unsigned cost
, cost_step
;
5575 gcc_assert (cand
->iv
!= NULL
);
5577 /* There are two costs associated with the candidate -- its increment
5578 and its initialization. The second is almost negligible for any loop
5579 that rolls enough, so we take it just very little into account. */
5581 base
= cand
->iv
->base
;
5582 cost_base
= force_var_cost (data
, base
, NULL
);
5583 /* It will be exceptional that the iv register happens to be initialized with
5584 the proper value at no cost. In general, there will at least be a regcopy
5586 if (cost_base
.cost
== 0)
5587 cost_base
.cost
= COSTS_N_INSNS (1);
5588 cost_step
= add_cost (data
->speed
, TYPE_MODE (TREE_TYPE (base
)));
5590 cost
= cost_step
+ adjust_setup_cost (data
, cost_base
.cost
);
5592 /* Prefer the original ivs unless we may gain something by replacing it.
5593 The reason is to make debugging simpler; so this is not relevant for
5594 artificial ivs created by other optimization passes. */
5595 if (cand
->pos
!= IP_ORIGINAL
5596 || !SSA_NAME_VAR (cand
->var_before
)
5597 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand
->var_before
)))
5600 /* Prefer not to insert statements into latch unless there are some
5601 already (so that we do not create unnecessary jumps). */
5602 if (cand
->pos
== IP_END
5603 && empty_block_p (ip_end_pos (data
->current_loop
)))
5607 cand
->cost_step
= cost_step
;
5610 /* Determines costs of computation of the candidates. */
5613 determine_iv_costs (struct ivopts_data
*data
)
5617 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5619 fprintf (dump_file
, "<Candidate Costs>:\n");
5620 fprintf (dump_file
, " cand\tcost\n");
5623 for (i
= 0; i
< data
->vcands
.length (); i
++)
5625 struct iv_cand
*cand
= data
->vcands
[i
];
5627 determine_iv_cost (data
, cand
);
5629 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5630 fprintf (dump_file
, " %d\t%d\n", i
, cand
->cost
);
5633 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5634 fprintf (dump_file
, "\n");
5637 /* Estimate register pressure for loop having N_INVS invariants and N_CANDS
5638 induction variables. Note N_INVS includes both invariant variables and
5639 invariant expressions. */
5642 ivopts_estimate_reg_pressure (struct ivopts_data
*data
, unsigned n_invs
,
5646 unsigned n_old
= data
->regs_used
, n_new
= n_invs
+ n_cands
;
5647 unsigned regs_needed
= n_new
+ n_old
, available_regs
= target_avail_regs
;
5648 bool speed
= data
->speed
;
5650 /* If there is a call in the loop body, the call-clobbered registers
5651 are not available for loop invariants. */
5652 if (data
->body_includes_call
)
5653 available_regs
= available_regs
- target_clobbered_regs
;
5655 /* If we have enough registers. */
5656 if (regs_needed
+ target_res_regs
< available_regs
)
5658 /* If close to running out of registers, try to preserve them. */
5659 else if (regs_needed
<= available_regs
)
5660 cost
= target_reg_cost
[speed
] * regs_needed
;
5661 /* If we run out of available registers but the number of candidates
5662 does not, we penalize extra registers using target_spill_cost. */
5663 else if (n_cands
<= available_regs
)
5664 cost
= target_reg_cost
[speed
] * available_regs
5665 + target_spill_cost
[speed
] * (regs_needed
- available_regs
);
5666 /* If the number of candidates runs out available registers, we penalize
5667 extra candidate registers using target_spill_cost * 2. Because it is
5668 more expensive to spill induction variable than invariant. */
5670 cost
= target_reg_cost
[speed
] * available_regs
5671 + target_spill_cost
[speed
] * (n_cands
- available_regs
) * 2
5672 + target_spill_cost
[speed
] * (regs_needed
- n_cands
);
5674 /* Finally, add the number of candidates, so that we prefer eliminating
5675 induction variables if possible. */
5676 return cost
+ n_cands
;
5679 /* For each size of the induction variable set determine the penalty. */
5682 determine_set_costs (struct ivopts_data
*data
)
5688 struct loop
*loop
= data
->current_loop
;
5691 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5693 fprintf (dump_file
, "<Global Costs>:\n");
5694 fprintf (dump_file
, " target_avail_regs %d\n", target_avail_regs
);
5695 fprintf (dump_file
, " target_clobbered_regs %d\n", target_clobbered_regs
);
5696 fprintf (dump_file
, " target_reg_cost %d\n", target_reg_cost
[data
->speed
]);
5697 fprintf (dump_file
, " target_spill_cost %d\n", target_spill_cost
[data
->speed
]);
5701 for (psi
= gsi_start_phis (loop
->header
); !gsi_end_p (psi
); gsi_next (&psi
))
5704 op
= PHI_RESULT (phi
);
5706 if (virtual_operand_p (op
))
5709 if (get_iv (data
, op
))
5712 if (!POINTER_TYPE_P (TREE_TYPE (op
))
5713 && !INTEGRAL_TYPE_P (TREE_TYPE (op
)))
5719 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, j
, bi
)
5721 struct version_info
*info
= ver_info (data
, j
);
5723 if (info
->inv_id
&& info
->has_nonlin_use
)
5727 data
->regs_used
= n
;
5728 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5729 fprintf (dump_file
, " regs_used %d\n", n
);
5731 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
5733 fprintf (dump_file
, " cost for size:\n");
5734 fprintf (dump_file
, " ivs\tcost\n");
5735 for (j
= 0; j
<= 2 * target_avail_regs
; j
++)
5736 fprintf (dump_file
, " %d\t%d\n", j
,
5737 ivopts_estimate_reg_pressure (data
, 0, j
));
5738 fprintf (dump_file
, "\n");
5742 /* Returns true if A is a cheaper cost pair than B. */
5745 cheaper_cost_pair (struct cost_pair
*a
, struct cost_pair
*b
)
5753 if (a
->cost
< b
->cost
)
5756 if (b
->cost
< a
->cost
)
5759 /* In case the costs are the same, prefer the cheaper candidate. */
5760 if (a
->cand
->cost
< b
->cand
->cost
)
5766 /* Compare if A is a more expensive cost pair than B. Return 1, 0 and -1
5767 for more expensive, equal and cheaper respectively. */
5770 compare_cost_pair (struct cost_pair
*a
, struct cost_pair
*b
)
5772 if (cheaper_cost_pair (a
, b
))
5774 if (cheaper_cost_pair (b
, a
))
5780 /* Returns candidate by that USE is expressed in IVS. */
5782 static struct cost_pair
*
5783 iv_ca_cand_for_group (struct iv_ca
*ivs
, struct iv_group
*group
)
5785 return ivs
->cand_for_group
[group
->id
];
5788 /* Computes the cost field of IVS structure. */
5791 iv_ca_recount_cost (struct ivopts_data
*data
, struct iv_ca
*ivs
)
5793 comp_cost cost
= ivs
->cand_use_cost
;
5795 cost
+= ivs
->cand_cost
;
5796 cost
+= ivopts_estimate_reg_pressure (data
, ivs
->n_invs
, ivs
->n_cands
);
5800 /* Remove use of invariants in set INVS by decreasing counter in N_INV_USES
5804 iv_ca_set_remove_invs (struct iv_ca
*ivs
, bitmap invs
, unsigned *n_inv_uses
)
5812 gcc_assert (n_inv_uses
!= NULL
);
5813 EXECUTE_IF_SET_IN_BITMAP (invs
, 0, iid
, bi
)
5816 if (n_inv_uses
[iid
] == 0)
5821 /* Set USE not to be expressed by any candidate in IVS. */
5824 iv_ca_set_no_cp (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5825 struct iv_group
*group
)
5827 unsigned gid
= group
->id
, cid
;
5828 struct cost_pair
*cp
;
5830 cp
= ivs
->cand_for_group
[gid
];
5836 ivs
->cand_for_group
[gid
] = NULL
;
5837 ivs
->n_cand_uses
[cid
]--;
5839 if (ivs
->n_cand_uses
[cid
] == 0)
5841 bitmap_clear_bit (ivs
->cands
, cid
);
5843 ivs
->cand_cost
-= cp
->cand
->cost
;
5844 iv_ca_set_remove_invs (ivs
, cp
->cand
->inv_vars
, ivs
->n_inv_var_uses
);
5845 iv_ca_set_remove_invs (ivs
, cp
->cand
->inv_exprs
, ivs
->n_inv_expr_uses
);
5848 ivs
->cand_use_cost
-= cp
->cost
;
5849 iv_ca_set_remove_invs (ivs
, cp
->inv_vars
, ivs
->n_inv_var_uses
);
5850 iv_ca_set_remove_invs (ivs
, cp
->inv_exprs
, ivs
->n_inv_expr_uses
);
5851 iv_ca_recount_cost (data
, ivs
);
5854 /* Add use of invariants in set INVS by increasing counter in N_INV_USES and
5858 iv_ca_set_add_invs (struct iv_ca
*ivs
, bitmap invs
, unsigned *n_inv_uses
)
5866 gcc_assert (n_inv_uses
!= NULL
);
5867 EXECUTE_IF_SET_IN_BITMAP (invs
, 0, iid
, bi
)
5870 if (n_inv_uses
[iid
] == 1)
5875 /* Set cost pair for GROUP in set IVS to CP. */
5878 iv_ca_set_cp (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5879 struct iv_group
*group
, struct cost_pair
*cp
)
5881 unsigned gid
= group
->id
, cid
;
5883 if (ivs
->cand_for_group
[gid
] == cp
)
5886 if (ivs
->cand_for_group
[gid
])
5887 iv_ca_set_no_cp (data
, ivs
, group
);
5894 ivs
->cand_for_group
[gid
] = cp
;
5895 ivs
->n_cand_uses
[cid
]++;
5896 if (ivs
->n_cand_uses
[cid
] == 1)
5898 bitmap_set_bit (ivs
->cands
, cid
);
5900 ivs
->cand_cost
+= cp
->cand
->cost
;
5901 iv_ca_set_add_invs (ivs
, cp
->cand
->inv_vars
, ivs
->n_inv_var_uses
);
5902 iv_ca_set_add_invs (ivs
, cp
->cand
->inv_exprs
, ivs
->n_inv_expr_uses
);
5905 ivs
->cand_use_cost
+= cp
->cost
;
5906 iv_ca_set_add_invs (ivs
, cp
->inv_vars
, ivs
->n_inv_var_uses
);
5907 iv_ca_set_add_invs (ivs
, cp
->inv_exprs
, ivs
->n_inv_expr_uses
);
5908 iv_ca_recount_cost (data
, ivs
);
5912 /* Extend set IVS by expressing USE by some of the candidates in it
5913 if possible. Consider all important candidates if candidates in
5914 set IVS don't give any result. */
5917 iv_ca_add_group (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5918 struct iv_group
*group
)
5920 struct cost_pair
*best_cp
= NULL
, *cp
;
5923 struct iv_cand
*cand
;
5925 gcc_assert (ivs
->upto
>= group
->id
);
5929 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, i
, bi
)
5931 cand
= data
->vcands
[i
];
5932 cp
= get_group_iv_cost (data
, group
, cand
);
5933 if (cheaper_cost_pair (cp
, best_cp
))
5937 if (best_cp
== NULL
)
5939 EXECUTE_IF_SET_IN_BITMAP (data
->important_candidates
, 0, i
, bi
)
5941 cand
= data
->vcands
[i
];
5942 cp
= get_group_iv_cost (data
, group
, cand
);
5943 if (cheaper_cost_pair (cp
, best_cp
))
5948 iv_ca_set_cp (data
, ivs
, group
, best_cp
);
5951 /* Get cost for assignment IVS. */
5954 iv_ca_cost (struct iv_ca
*ivs
)
5956 /* This was a conditional expression but it triggered a bug in
5958 if (ivs
->bad_groups
)
5959 return infinite_cost
;
5964 /* Compare if applying NEW_CP to GROUP for IVS introduces more invariants
5965 than OLD_CP. Return 1, 0 and -1 for more, equal and fewer invariants
5969 iv_ca_compare_deps (struct ivopts_data
*data
, struct iv_ca
*ivs
,
5970 struct iv_group
*group
, struct cost_pair
*old_cp
,
5971 struct cost_pair
*new_cp
)
5973 gcc_assert (old_cp
&& new_cp
&& old_cp
!= new_cp
);
5974 unsigned old_n_invs
= ivs
->n_invs
;
5975 iv_ca_set_cp (data
, ivs
, group
, new_cp
);
5976 unsigned new_n_invs
= ivs
->n_invs
;
5977 iv_ca_set_cp (data
, ivs
, group
, old_cp
);
5979 return new_n_invs
> old_n_invs
? 1 : (new_n_invs
< old_n_invs
? -1 : 0);
5982 /* Creates change of expressing GROUP by NEW_CP instead of OLD_CP and chains
5985 static struct iv_ca_delta
*
5986 iv_ca_delta_add (struct iv_group
*group
, struct cost_pair
*old_cp
,
5987 struct cost_pair
*new_cp
, struct iv_ca_delta
*next
)
5989 struct iv_ca_delta
*change
= XNEW (struct iv_ca_delta
);
5991 change
->group
= group
;
5992 change
->old_cp
= old_cp
;
5993 change
->new_cp
= new_cp
;
5994 change
->next
= next
;
5999 /* Joins two lists of changes L1 and L2. Destructive -- old lists
6002 static struct iv_ca_delta
*
6003 iv_ca_delta_join (struct iv_ca_delta
*l1
, struct iv_ca_delta
*l2
)
6005 struct iv_ca_delta
*last
;
6013 for (last
= l1
; last
->next
; last
= last
->next
)
6020 /* Reverse the list of changes DELTA, forming the inverse to it. */
6022 static struct iv_ca_delta
*
6023 iv_ca_delta_reverse (struct iv_ca_delta
*delta
)
6025 struct iv_ca_delta
*act
, *next
, *prev
= NULL
;
6027 for (act
= delta
; act
; act
= next
)
6033 std::swap (act
->old_cp
, act
->new_cp
);
6039 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
6040 reverted instead. */
6043 iv_ca_delta_commit (struct ivopts_data
*data
, struct iv_ca
*ivs
,
6044 struct iv_ca_delta
*delta
, bool forward
)
6046 struct cost_pair
*from
, *to
;
6047 struct iv_ca_delta
*act
;
6050 delta
= iv_ca_delta_reverse (delta
);
6052 for (act
= delta
; act
; act
= act
->next
)
6056 gcc_assert (iv_ca_cand_for_group (ivs
, act
->group
) == from
);
6057 iv_ca_set_cp (data
, ivs
, act
->group
, to
);
6061 iv_ca_delta_reverse (delta
);
6064 /* Returns true if CAND is used in IVS. */
6067 iv_ca_cand_used_p (struct iv_ca
*ivs
, struct iv_cand
*cand
)
6069 return ivs
->n_cand_uses
[cand
->id
] > 0;
6072 /* Returns number of induction variable candidates in the set IVS. */
6075 iv_ca_n_cands (struct iv_ca
*ivs
)
6077 return ivs
->n_cands
;
6080 /* Free the list of changes DELTA. */
6083 iv_ca_delta_free (struct iv_ca_delta
**delta
)
6085 struct iv_ca_delta
*act
, *next
;
6087 for (act
= *delta
; act
; act
= next
)
6096 /* Allocates new iv candidates assignment. */
6098 static struct iv_ca
*
6099 iv_ca_new (struct ivopts_data
*data
)
6101 struct iv_ca
*nw
= XNEW (struct iv_ca
);
6105 nw
->cand_for_group
= XCNEWVEC (struct cost_pair
*,
6106 data
->vgroups
.length ());
6107 nw
->n_cand_uses
= XCNEWVEC (unsigned, data
->vcands
.length ());
6108 nw
->cands
= BITMAP_ALLOC (NULL
);
6111 nw
->cand_use_cost
= no_cost
;
6113 nw
->n_inv_var_uses
= XCNEWVEC (unsigned, data
->max_inv_var_id
+ 1);
6114 nw
->n_inv_expr_uses
= XCNEWVEC (unsigned, data
->max_inv_expr_id
+ 1);
6120 /* Free memory occupied by the set IVS. */
6123 iv_ca_free (struct iv_ca
**ivs
)
6125 free ((*ivs
)->cand_for_group
);
6126 free ((*ivs
)->n_cand_uses
);
6127 BITMAP_FREE ((*ivs
)->cands
);
6128 free ((*ivs
)->n_inv_var_uses
);
6129 free ((*ivs
)->n_inv_expr_uses
);
6134 /* Dumps IVS to FILE. */
6137 iv_ca_dump (struct ivopts_data
*data
, FILE *file
, struct iv_ca
*ivs
)
6140 comp_cost cost
= iv_ca_cost (ivs
);
6142 fprintf (file
, " cost: %d (complexity %d)\n", cost
.cost
,
6144 fprintf (file
, " cand_cost: %d\n cand_group_cost: %d (complexity %d)\n",
6145 ivs
->cand_cost
, ivs
->cand_use_cost
.cost
,
6146 ivs
->cand_use_cost
.complexity
);
6147 bitmap_print (file
, ivs
->cands
, " candidates: ","\n");
6149 for (i
= 0; i
< ivs
->upto
; i
++)
6151 struct iv_group
*group
= data
->vgroups
[i
];
6152 struct cost_pair
*cp
= iv_ca_cand_for_group (ivs
, group
);
6154 fprintf (file
, " group:%d --> iv_cand:%d, cost=(%d,%d)\n",
6155 group
->id
, cp
->cand
->id
, cp
->cost
.cost
,
6156 cp
->cost
.complexity
);
6158 fprintf (file
, " group:%d --> ??\n", group
->id
);
6161 const char *pref
= "";
6162 fprintf (file
, " invariant variables: ");
6163 for (i
= 1; i
<= data
->max_inv_var_id
; i
++)
6164 if (ivs
->n_inv_var_uses
[i
])
6166 fprintf (file
, "%s%d", pref
, i
);
6171 fprintf (file
, "\n invariant expressions: ");
6172 for (i
= 1; i
<= data
->max_inv_expr_id
; i
++)
6173 if (ivs
->n_inv_expr_uses
[i
])
6175 fprintf (file
, "%s%d", pref
, i
);
6179 fprintf (file
, "\n\n");
6182 /* Try changing candidate in IVS to CAND for each use. Return cost of the
6183 new set, and store differences in DELTA. Number of induction variables
6184 in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
6185 the function will try to find a solution with mimimal iv candidates. */
6188 iv_ca_extend (struct ivopts_data
*data
, struct iv_ca
*ivs
,
6189 struct iv_cand
*cand
, struct iv_ca_delta
**delta
,
6190 unsigned *n_ivs
, bool min_ncand
)
6194 struct iv_group
*group
;
6195 struct cost_pair
*old_cp
, *new_cp
;
6198 for (i
= 0; i
< ivs
->upto
; i
++)
6200 group
= data
->vgroups
[i
];
6201 old_cp
= iv_ca_cand_for_group (ivs
, group
);
6204 && old_cp
->cand
== cand
)
6207 new_cp
= get_group_iv_cost (data
, group
, cand
);
6213 int cmp_invs
= iv_ca_compare_deps (data
, ivs
, group
, old_cp
, new_cp
);
6214 /* Skip if new_cp depends on more invariants. */
6218 int cmp_cost
= compare_cost_pair (new_cp
, old_cp
);
6219 /* Skip if new_cp is not cheaper. */
6220 if (cmp_cost
> 0 || (cmp_cost
== 0 && cmp_invs
== 0))
6224 *delta
= iv_ca_delta_add (group
, old_cp
, new_cp
, *delta
);
6227 iv_ca_delta_commit (data
, ivs
, *delta
, true);
6228 cost
= iv_ca_cost (ivs
);
6230 *n_ivs
= iv_ca_n_cands (ivs
);
6231 iv_ca_delta_commit (data
, ivs
, *delta
, false);
6236 /* Try narrowing set IVS by removing CAND. Return the cost of
6237 the new set and store the differences in DELTA. START is
6238 the candidate with which we start narrowing. */
6241 iv_ca_narrow (struct ivopts_data
*data
, struct iv_ca
*ivs
,
6242 struct iv_cand
*cand
, struct iv_cand
*start
,
6243 struct iv_ca_delta
**delta
)
6246 struct iv_group
*group
;
6247 struct cost_pair
*old_cp
, *new_cp
, *cp
;
6249 struct iv_cand
*cnd
;
6250 comp_cost cost
, best_cost
, acost
;
6253 for (i
= 0; i
< data
->vgroups
.length (); i
++)
6255 group
= data
->vgroups
[i
];
6257 old_cp
= iv_ca_cand_for_group (ivs
, group
);
6258 if (old_cp
->cand
!= cand
)
6261 best_cost
= iv_ca_cost (ivs
);
6262 /* Start narrowing with START. */
6263 new_cp
= get_group_iv_cost (data
, group
, start
);
6265 if (data
->consider_all_candidates
)
6267 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, ci
, bi
)
6269 if (ci
== cand
->id
|| (start
&& ci
== start
->id
))
6272 cnd
= data
->vcands
[ci
];
6274 cp
= get_group_iv_cost (data
, group
, cnd
);
6278 iv_ca_set_cp (data
, ivs
, group
, cp
);
6279 acost
= iv_ca_cost (ivs
);
6281 if (acost
< best_cost
)
6290 EXECUTE_IF_AND_IN_BITMAP (group
->related_cands
, ivs
->cands
, 0, ci
, bi
)
6292 if (ci
== cand
->id
|| (start
&& ci
== start
->id
))
6295 cnd
= data
->vcands
[ci
];
6297 cp
= get_group_iv_cost (data
, group
, cnd
);
6301 iv_ca_set_cp (data
, ivs
, group
, cp
);
6302 acost
= iv_ca_cost (ivs
);
6304 if (acost
< best_cost
)
6311 /* Restore to old cp for use. */
6312 iv_ca_set_cp (data
, ivs
, group
, old_cp
);
6316 iv_ca_delta_free (delta
);
6317 return infinite_cost
;
6320 *delta
= iv_ca_delta_add (group
, old_cp
, new_cp
, *delta
);
6323 iv_ca_delta_commit (data
, ivs
, *delta
, true);
6324 cost
= iv_ca_cost (ivs
);
6325 iv_ca_delta_commit (data
, ivs
, *delta
, false);
6330 /* Try optimizing the set of candidates IVS by removing candidates different
6331 from to EXCEPT_CAND from it. Return cost of the new set, and store
6332 differences in DELTA. */
6335 iv_ca_prune (struct ivopts_data
*data
, struct iv_ca
*ivs
,
6336 struct iv_cand
*except_cand
, struct iv_ca_delta
**delta
)
6339 struct iv_ca_delta
*act_delta
, *best_delta
;
6341 comp_cost best_cost
, acost
;
6342 struct iv_cand
*cand
;
6345 best_cost
= iv_ca_cost (ivs
);
6347 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, i
, bi
)
6349 cand
= data
->vcands
[i
];
6351 if (cand
== except_cand
)
6354 acost
= iv_ca_narrow (data
, ivs
, cand
, except_cand
, &act_delta
);
6356 if (acost
< best_cost
)
6359 iv_ca_delta_free (&best_delta
);
6360 best_delta
= act_delta
;
6363 iv_ca_delta_free (&act_delta
);
6372 /* Recurse to possibly remove other unnecessary ivs. */
6373 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
6374 best_cost
= iv_ca_prune (data
, ivs
, except_cand
, delta
);
6375 iv_ca_delta_commit (data
, ivs
, best_delta
, false);
6376 *delta
= iv_ca_delta_join (best_delta
, *delta
);
6380 /* Check if CAND_IDX is a candidate other than OLD_CAND and has
6381 cheaper local cost for GROUP than BEST_CP. Return pointer to
6382 the corresponding cost_pair, otherwise just return BEST_CP. */
6384 static struct cost_pair
*
6385 cheaper_cost_with_cand (struct ivopts_data
*data
, struct iv_group
*group
,
6386 unsigned int cand_idx
, struct iv_cand
*old_cand
,
6387 struct cost_pair
*best_cp
)
6389 struct iv_cand
*cand
;
6390 struct cost_pair
*cp
;
6392 gcc_assert (old_cand
!= NULL
&& best_cp
!= NULL
);
6393 if (cand_idx
== old_cand
->id
)
6396 cand
= data
->vcands
[cand_idx
];
6397 cp
= get_group_iv_cost (data
, group
, cand
);
6398 if (cp
!= NULL
&& cheaper_cost_pair (cp
, best_cp
))
6404 /* Try breaking local optimal fixed-point for IVS by replacing candidates
6405 which are used by more than one iv uses. For each of those candidates,
6406 this function tries to represent iv uses under that candidate using
6407 other ones with lower local cost, then tries to prune the new set.
6408 If the new set has lower cost, It returns the new cost after recording
6409 candidate replacement in list DELTA. */
6412 iv_ca_replace (struct ivopts_data
*data
, struct iv_ca
*ivs
,
6413 struct iv_ca_delta
**delta
)
6415 bitmap_iterator bi
, bj
;
6416 unsigned int i
, j
, k
;
6417 struct iv_cand
*cand
;
6418 comp_cost orig_cost
, acost
;
6419 struct iv_ca_delta
*act_delta
, *tmp_delta
;
6420 struct cost_pair
*old_cp
, *best_cp
= NULL
;
6423 orig_cost
= iv_ca_cost (ivs
);
6425 EXECUTE_IF_SET_IN_BITMAP (ivs
->cands
, 0, i
, bi
)
6427 if (ivs
->n_cand_uses
[i
] == 1
6428 || ivs
->n_cand_uses
[i
] > ALWAYS_PRUNE_CAND_SET_BOUND
)
6431 cand
= data
->vcands
[i
];
6434 /* Represent uses under current candidate using other ones with
6435 lower local cost. */
6436 for (j
= 0; j
< ivs
->upto
; j
++)
6438 struct iv_group
*group
= data
->vgroups
[j
];
6439 old_cp
= iv_ca_cand_for_group (ivs
, group
);
6441 if (old_cp
->cand
!= cand
)
6445 if (data
->consider_all_candidates
)
6446 for (k
= 0; k
< data
->vcands
.length (); k
++)
6447 best_cp
= cheaper_cost_with_cand (data
, group
, k
,
6448 old_cp
->cand
, best_cp
);
6450 EXECUTE_IF_SET_IN_BITMAP (group
->related_cands
, 0, k
, bj
)
6451 best_cp
= cheaper_cost_with_cand (data
, group
, k
,
6452 old_cp
->cand
, best_cp
);
6454 if (best_cp
== old_cp
)
6457 act_delta
= iv_ca_delta_add (group
, old_cp
, best_cp
, act_delta
);
6459 /* No need for further prune. */
6463 /* Prune the new candidate set. */
6464 iv_ca_delta_commit (data
, ivs
, act_delta
, true);
6465 acost
= iv_ca_prune (data
, ivs
, NULL
, &tmp_delta
);
6466 iv_ca_delta_commit (data
, ivs
, act_delta
, false);
6467 act_delta
= iv_ca_delta_join (act_delta
, tmp_delta
);
6469 if (acost
< orig_cost
)
6475 iv_ca_delta_free (&act_delta
);
6481 /* Tries to extend the sets IVS in the best possible way in order to
6482 express the GROUP. If ORIGINALP is true, prefer candidates from
6483 the original set of IVs, otherwise favor important candidates not
6484 based on any memory object. */
6487 try_add_cand_for (struct ivopts_data
*data
, struct iv_ca
*ivs
,
6488 struct iv_group
*group
, bool originalp
)
6490 comp_cost best_cost
, act_cost
;
6493 struct iv_cand
*cand
;
6494 struct iv_ca_delta
*best_delta
= NULL
, *act_delta
;
6495 struct cost_pair
*cp
;
6497 iv_ca_add_group (data
, ivs
, group
);
6498 best_cost
= iv_ca_cost (ivs
);
6499 cp
= iv_ca_cand_for_group (ivs
, group
);
6502 best_delta
= iv_ca_delta_add (group
, NULL
, cp
, NULL
);
6503 iv_ca_set_no_cp (data
, ivs
, group
);
6506 /* If ORIGINALP is true, try to find the original IV for the use. Otherwise
6507 first try important candidates not based on any memory object. Only if
6508 this fails, try the specific ones. Rationale -- in loops with many
6509 variables the best choice often is to use just one generic biv. If we
6510 added here many ivs specific to the uses, the optimization algorithm later
6511 would be likely to get stuck in a local minimum, thus causing us to create
6512 too many ivs. The approach from few ivs to more seems more likely to be
6513 successful -- starting from few ivs, replacing an expensive use by a
6514 specific iv should always be a win. */
6515 EXECUTE_IF_SET_IN_BITMAP (group
->related_cands
, 0, i
, bi
)
6517 cand
= data
->vcands
[i
];
6519 if (originalp
&& cand
->pos
!=IP_ORIGINAL
)
6522 if (!originalp
&& cand
->iv
->base_object
!= NULL_TREE
)
6525 if (iv_ca_cand_used_p (ivs
, cand
))
6528 cp
= get_group_iv_cost (data
, group
, cand
);
6532 iv_ca_set_cp (data
, ivs
, group
, cp
);
6533 act_cost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, NULL
,
6535 iv_ca_set_no_cp (data
, ivs
, group
);
6536 act_delta
= iv_ca_delta_add (group
, NULL
, cp
, act_delta
);
6538 if (act_cost
< best_cost
)
6540 best_cost
= act_cost
;
6542 iv_ca_delta_free (&best_delta
);
6543 best_delta
= act_delta
;
6546 iv_ca_delta_free (&act_delta
);
6549 if (best_cost
.infinite_cost_p ())
6551 for (i
= 0; i
< group
->n_map_members
; i
++)
6553 cp
= group
->cost_map
+ i
;
6558 /* Already tried this. */
6559 if (cand
->important
)
6561 if (originalp
&& cand
->pos
== IP_ORIGINAL
)
6563 if (!originalp
&& cand
->iv
->base_object
== NULL_TREE
)
6567 if (iv_ca_cand_used_p (ivs
, cand
))
6571 iv_ca_set_cp (data
, ivs
, group
, cp
);
6572 act_cost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, NULL
, true);
6573 iv_ca_set_no_cp (data
, ivs
, group
);
6574 act_delta
= iv_ca_delta_add (group
,
6575 iv_ca_cand_for_group (ivs
, group
),
6578 if (act_cost
< best_cost
)
6580 best_cost
= act_cost
;
6583 iv_ca_delta_free (&best_delta
);
6584 best_delta
= act_delta
;
6587 iv_ca_delta_free (&act_delta
);
6591 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
6592 iv_ca_delta_free (&best_delta
);
6594 return !best_cost
.infinite_cost_p ();
6597 /* Finds an initial assignment of candidates to uses. */
6599 static struct iv_ca
*
6600 get_initial_solution (struct ivopts_data
*data
, bool originalp
)
6603 struct iv_ca
*ivs
= iv_ca_new (data
);
6605 for (i
= 0; i
< data
->vgroups
.length (); i
++)
6606 if (!try_add_cand_for (data
, ivs
, data
->vgroups
[i
], originalp
))
6615 /* Tries to improve set of induction variables IVS. TRY_REPLACE_P
6616 points to a bool variable, this function tries to break local
6617 optimal fixed-point by replacing candidates in IVS if it's true. */
6620 try_improve_iv_set (struct ivopts_data
*data
,
6621 struct iv_ca
*ivs
, bool *try_replace_p
)
6624 comp_cost acost
, best_cost
= iv_ca_cost (ivs
);
6625 struct iv_ca_delta
*best_delta
= NULL
, *act_delta
, *tmp_delta
;
6626 struct iv_cand
*cand
;
6628 /* Try extending the set of induction variables by one. */
6629 for (i
= 0; i
< data
->vcands
.length (); i
++)
6631 cand
= data
->vcands
[i
];
6633 if (iv_ca_cand_used_p (ivs
, cand
))
6636 acost
= iv_ca_extend (data
, ivs
, cand
, &act_delta
, &n_ivs
, false);
6640 /* If we successfully added the candidate and the set is small enough,
6641 try optimizing it by removing other candidates. */
6642 if (n_ivs
<= ALWAYS_PRUNE_CAND_SET_BOUND
)
6644 iv_ca_delta_commit (data
, ivs
, act_delta
, true);
6645 acost
= iv_ca_prune (data
, ivs
, cand
, &tmp_delta
);
6646 iv_ca_delta_commit (data
, ivs
, act_delta
, false);
6647 act_delta
= iv_ca_delta_join (act_delta
, tmp_delta
);
6650 if (acost
< best_cost
)
6653 iv_ca_delta_free (&best_delta
);
6654 best_delta
= act_delta
;
6657 iv_ca_delta_free (&act_delta
);
6662 /* Try removing the candidates from the set instead. */
6663 best_cost
= iv_ca_prune (data
, ivs
, NULL
, &best_delta
);
6665 if (!best_delta
&& *try_replace_p
)
6667 *try_replace_p
= false;
6668 /* So far candidate selecting algorithm tends to choose fewer IVs
6669 so that it can handle cases in which loops have many variables
6670 but the best choice is often to use only one general biv. One
6671 weakness is it can't handle opposite cases, in which different
6672 candidates should be chosen with respect to each use. To solve
6673 the problem, we replace candidates in a manner described by the
6674 comments of iv_ca_replace, thus give general algorithm a chance
6675 to break local optimal fixed-point in these cases. */
6676 best_cost
= iv_ca_replace (data
, ivs
, &best_delta
);
6683 iv_ca_delta_commit (data
, ivs
, best_delta
, true);
6684 iv_ca_delta_free (&best_delta
);
6685 return best_cost
== iv_ca_cost (ivs
);
6688 /* Attempts to find the optimal set of induction variables. We do simple
6689 greedy heuristic -- we try to replace at most one candidate in the selected
6690 solution and remove the unused ivs while this improves the cost. */
6692 static struct iv_ca
*
6693 find_optimal_iv_set_1 (struct ivopts_data
*data
, bool originalp
)
6696 bool try_replace_p
= true;
6698 /* Get the initial solution. */
6699 set
= get_initial_solution (data
, originalp
);
6702 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6703 fprintf (dump_file
, "Unable to substitute for ivs, failed.\n");
6707 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6709 fprintf (dump_file
, "Initial set of candidates:\n");
6710 iv_ca_dump (data
, dump_file
, set
);
6713 while (try_improve_iv_set (data
, set
, &try_replace_p
))
6715 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6717 fprintf (dump_file
, "Improved to:\n");
6718 iv_ca_dump (data
, dump_file
, set
);
6722 /* If the set has infinite_cost, it can't be optimal. */
6723 if (iv_ca_cost (set
).infinite_cost_p ())
6725 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6727 "Overflow to infinite cost in try_improve_iv_set.\n");
6733 static struct iv_ca
*
6734 find_optimal_iv_set (struct ivopts_data
*data
)
6737 comp_cost cost
, origcost
;
6738 struct iv_ca
*set
, *origset
;
6740 /* Determine the cost based on a strategy that starts with original IVs,
6741 and try again using a strategy that prefers candidates not based
6743 origset
= find_optimal_iv_set_1 (data
, true);
6744 set
= find_optimal_iv_set_1 (data
, false);
6746 if (!origset
&& !set
)
6749 origcost
= origset
? iv_ca_cost (origset
) : infinite_cost
;
6750 cost
= set
? iv_ca_cost (set
) : infinite_cost
;
6752 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6754 fprintf (dump_file
, "Original cost %d (complexity %d)\n\n",
6755 origcost
.cost
, origcost
.complexity
);
6756 fprintf (dump_file
, "Final cost %d (complexity %d)\n\n",
6757 cost
.cost
, cost
.complexity
);
6760 /* Choose the one with the best cost. */
6761 if (origcost
<= cost
)
6768 iv_ca_free (&origset
);
6770 for (i
= 0; i
< data
->vgroups
.length (); i
++)
6772 struct iv_group
*group
= data
->vgroups
[i
];
6773 group
->selected
= iv_ca_cand_for_group (set
, group
)->cand
;
6779 /* Creates a new induction variable corresponding to CAND. */
6782 create_new_iv (struct ivopts_data
*data
, struct iv_cand
*cand
)
6784 gimple_stmt_iterator incr_pos
;
6787 struct iv_group
*group
;
6790 gcc_assert (cand
->iv
!= NULL
);
6795 incr_pos
= gsi_last_bb (ip_normal_pos (data
->current_loop
));
6799 incr_pos
= gsi_last_bb (ip_end_pos (data
->current_loop
));
6807 incr_pos
= gsi_for_stmt (cand
->incremented_at
);
6811 /* Mark that the iv is preserved. */
6812 name_info (data
, cand
->var_before
)->preserve_biv
= true;
6813 name_info (data
, cand
->var_after
)->preserve_biv
= true;
6815 /* Rewrite the increment so that it uses var_before directly. */
6816 use
= find_interesting_uses_op (data
, cand
->var_after
);
6817 group
= data
->vgroups
[use
->group_id
];
6818 group
->selected
= cand
;
6822 gimple_add_tmp_var (cand
->var_before
);
6824 base
= unshare_expr (cand
->iv
->base
);
6826 create_iv (base
, unshare_expr (cand
->iv
->step
),
6827 cand
->var_before
, data
->current_loop
,
6828 &incr_pos
, after
, &cand
->var_before
, &cand
->var_after
);
6831 /* Creates new induction variables described in SET. */
6834 create_new_ivs (struct ivopts_data
*data
, struct iv_ca
*set
)
6837 struct iv_cand
*cand
;
6840 EXECUTE_IF_SET_IN_BITMAP (set
->cands
, 0, i
, bi
)
6842 cand
= data
->vcands
[i
];
6843 create_new_iv (data
, cand
);
6846 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
6848 fprintf (dump_file
, "Selected IV set for loop %d",
6849 data
->current_loop
->num
);
6850 if (data
->loop_loc
!= UNKNOWN_LOCATION
)
6851 fprintf (dump_file
, " at %s:%d", LOCATION_FILE (data
->loop_loc
),
6852 LOCATION_LINE (data
->loop_loc
));
6853 fprintf (dump_file
, ", " HOST_WIDE_INT_PRINT_DEC
" avg niters",
6854 avg_loop_niter (data
->current_loop
));
6855 fprintf (dump_file
, ", %lu IVs:\n", bitmap_count_bits (set
->cands
));
6856 EXECUTE_IF_SET_IN_BITMAP (set
->cands
, 0, i
, bi
)
6858 cand
= data
->vcands
[i
];
6859 dump_cand (dump_file
, cand
);
6861 fprintf (dump_file
, "\n");
6865 /* Rewrites USE (definition of iv used in a nonlinear expression)
6866 using candidate CAND. */
6869 rewrite_use_nonlinear_expr (struct ivopts_data
*data
,
6870 struct iv_use
*use
, struct iv_cand
*cand
)
6873 gimple_stmt_iterator bsi
;
6874 tree comp
, type
= get_use_type (use
), tgt
;
6876 /* An important special case -- if we are asked to express value of
6877 the original iv by itself, just exit; there is no need to
6878 introduce a new computation (that might also need casting the
6879 variable to unsigned and back). */
6880 if (cand
->pos
== IP_ORIGINAL
6881 && cand
->incremented_at
== use
->stmt
)
6883 tree op
= NULL_TREE
;
6884 enum tree_code stmt_code
;
6886 gcc_assert (is_gimple_assign (use
->stmt
));
6887 gcc_assert (gimple_assign_lhs (use
->stmt
) == cand
->var_after
);
6889 /* Check whether we may leave the computation unchanged.
6890 This is the case only if it does not rely on other
6891 computations in the loop -- otherwise, the computation
6892 we rely upon may be removed in remove_unused_ivs,
6893 thus leading to ICE. */
6894 stmt_code
= gimple_assign_rhs_code (use
->stmt
);
6895 if (stmt_code
== PLUS_EXPR
6896 || stmt_code
== MINUS_EXPR
6897 || stmt_code
== POINTER_PLUS_EXPR
)
6899 if (gimple_assign_rhs1 (use
->stmt
) == cand
->var_before
)
6900 op
= gimple_assign_rhs2 (use
->stmt
);
6901 else if (gimple_assign_rhs2 (use
->stmt
) == cand
->var_before
)
6902 op
= gimple_assign_rhs1 (use
->stmt
);
6905 if (op
!= NULL_TREE
)
6907 if (expr_invariant_in_loop_p (data
->current_loop
, op
))
6909 if (TREE_CODE (op
) == SSA_NAME
)
6911 struct iv
*iv
= get_iv (data
, op
);
6912 if (iv
!= NULL
&& integer_zerop (iv
->step
))
6918 switch (gimple_code (use
->stmt
))
6921 tgt
= PHI_RESULT (use
->stmt
);
6923 /* If we should keep the biv, do not replace it. */
6924 if (name_info (data
, tgt
)->preserve_biv
)
6927 bsi
= gsi_after_labels (gimple_bb (use
->stmt
));
6931 tgt
= gimple_assign_lhs (use
->stmt
);
6932 bsi
= gsi_for_stmt (use
->stmt
);
6939 aff_tree aff_inv
, aff_var
;
6940 if (!get_computation_aff_1 (data
->current_loop
, use
->stmt
,
6941 use
, cand
, &aff_inv
, &aff_var
))
6944 unshare_aff_combination (&aff_inv
);
6945 unshare_aff_combination (&aff_var
);
6946 /* Prefer CSE opportunity than loop invariant by adding offset at last
6947 so that iv_uses have different offsets can be CSEed. */
6948 poly_widest_int offset
= aff_inv
.offset
;
6951 gimple_seq stmt_list
= NULL
, seq
= NULL
;
6952 tree comp_op1
= aff_combination_to_tree (&aff_inv
);
6953 tree comp_op2
= aff_combination_to_tree (&aff_var
);
6954 gcc_assert (comp_op1
&& comp_op2
);
6956 comp_op1
= force_gimple_operand (comp_op1
, &seq
, true, NULL
);
6957 gimple_seq_add_seq (&stmt_list
, seq
);
6958 comp_op2
= force_gimple_operand (comp_op2
, &seq
, true, NULL
);
6959 gimple_seq_add_seq (&stmt_list
, seq
);
6961 if (POINTER_TYPE_P (TREE_TYPE (comp_op2
)))
6962 std::swap (comp_op1
, comp_op2
);
6964 if (POINTER_TYPE_P (TREE_TYPE (comp_op1
)))
6966 comp
= fold_build_pointer_plus (comp_op1
,
6967 fold_convert (sizetype
, comp_op2
));
6968 comp
= fold_build_pointer_plus (comp
,
6969 wide_int_to_tree (sizetype
, offset
));
6973 comp
= fold_build2 (PLUS_EXPR
, TREE_TYPE (comp_op1
), comp_op1
,
6974 fold_convert (TREE_TYPE (comp_op1
), comp_op2
));
6975 comp
= fold_build2 (PLUS_EXPR
, TREE_TYPE (comp_op1
), comp
,
6976 wide_int_to_tree (TREE_TYPE (comp_op1
), offset
));
6979 comp
= fold_convert (type
, comp
);
6980 if (!valid_gimple_rhs_p (comp
)
6981 || (gimple_code (use
->stmt
) != GIMPLE_PHI
6982 /* We can't allow re-allocating the stmt as it might be pointed
6984 && (get_gimple_rhs_num_ops (TREE_CODE (comp
))
6985 >= gimple_num_ops (gsi_stmt (bsi
)))))
6987 comp
= force_gimple_operand (comp
, &seq
, true, NULL
);
6988 gimple_seq_add_seq (&stmt_list
, seq
);
6989 if (POINTER_TYPE_P (TREE_TYPE (tgt
)))
6991 duplicate_ssa_name_ptr_info (comp
, SSA_NAME_PTR_INFO (tgt
));
6992 /* As this isn't a plain copy we have to reset alignment
6994 if (SSA_NAME_PTR_INFO (comp
))
6995 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp
));
6999 gsi_insert_seq_before (&bsi
, stmt_list
, GSI_SAME_STMT
);
7000 if (gimple_code (use
->stmt
) == GIMPLE_PHI
)
7002 ass
= gimple_build_assign (tgt
, comp
);
7003 gsi_insert_before (&bsi
, ass
, GSI_SAME_STMT
);
7005 bsi
= gsi_for_stmt (use
->stmt
);
7006 remove_phi_node (&bsi
, false);
7010 gimple_assign_set_rhs_from_tree (&bsi
, comp
);
7011 use
->stmt
= gsi_stmt (bsi
);
7015 /* Performs a peephole optimization to reorder the iv update statement with
7016 a mem ref to enable instruction combining in later phases. The mem ref uses
7017 the iv value before the update, so the reordering transformation requires
7018 adjustment of the offset. CAND is the selected IV_CAND.
7022 t = MEM_REF (base, iv1, 8, 16); // base, index, stride, offset
7030 directly propagating t over to (1) will introduce overlapping live range
7031 thus increase register pressure. This peephole transform it into:
7035 t = MEM_REF (base, iv2, 8, 8);
7042 adjust_iv_update_pos (struct iv_cand
*cand
, struct iv_use
*use
)
7045 gimple
*iv_update
, *stmt
;
7047 gimple_stmt_iterator gsi
, gsi_iv
;
7049 if (cand
->pos
!= IP_NORMAL
)
7052 var_after
= cand
->var_after
;
7053 iv_update
= SSA_NAME_DEF_STMT (var_after
);
7055 bb
= gimple_bb (iv_update
);
7056 gsi
= gsi_last_nondebug_bb (bb
);
7057 stmt
= gsi_stmt (gsi
);
7059 /* Only handle conditional statement for now. */
7060 if (gimple_code (stmt
) != GIMPLE_COND
)
7063 gsi_prev_nondebug (&gsi
);
7064 stmt
= gsi_stmt (gsi
);
7065 if (stmt
!= iv_update
)
7068 gsi_prev_nondebug (&gsi
);
7069 if (gsi_end_p (gsi
))
7072 stmt
= gsi_stmt (gsi
);
7073 if (gimple_code (stmt
) != GIMPLE_ASSIGN
)
7076 if (stmt
!= use
->stmt
)
7079 if (TREE_CODE (gimple_assign_lhs (stmt
)) != SSA_NAME
)
7082 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
7084 fprintf (dump_file
, "Reordering \n");
7085 print_gimple_stmt (dump_file
, iv_update
, 0);
7086 print_gimple_stmt (dump_file
, use
->stmt
, 0);
7087 fprintf (dump_file
, "\n");
7090 gsi
= gsi_for_stmt (use
->stmt
);
7091 gsi_iv
= gsi_for_stmt (iv_update
);
7092 gsi_move_before (&gsi_iv
, &gsi
);
7094 cand
->pos
= IP_BEFORE_USE
;
7095 cand
->incremented_at
= use
->stmt
;
7098 /* Return the alias pointer type that should be used for a MEM_REF
7099 associated with USE, which has type USE_PTR_ADDRESS. */
7102 get_alias_ptr_type_for_ptr_address (iv_use
*use
)
7104 gcall
*call
= as_a
<gcall
*> (use
->stmt
);
7105 switch (gimple_call_internal_fn (call
))
7108 case IFN_MASK_STORE
:
7109 /* The second argument contains the correct alias type. */
7110 gcc_assert (use
->op_p
= gimple_call_arg_ptr (call
, 0));
7111 return TREE_TYPE (gimple_call_arg (call
, 1));
7119 /* Rewrites USE (address that is an iv) using candidate CAND. */
7122 rewrite_use_address (struct ivopts_data
*data
,
7123 struct iv_use
*use
, struct iv_cand
*cand
)
7128 adjust_iv_update_pos (cand
, use
);
7129 ok
= get_computation_aff (data
->current_loop
, use
->stmt
, use
, cand
, &aff
);
7131 unshare_aff_combination (&aff
);
7133 /* To avoid undefined overflow problems, all IV candidates use unsigned
7134 integer types. The drawback is that this makes it impossible for
7135 create_mem_ref to distinguish an IV that is based on a memory object
7136 from one that represents simply an offset.
7138 To work around this problem, we pass a hint to create_mem_ref that
7139 indicates which variable (if any) in aff is an IV based on a memory
7140 object. Note that we only consider the candidate. If this is not
7141 based on an object, the base of the reference is in some subexpression
7142 of the use -- but these will use pointer types, so they are recognized
7143 by the create_mem_ref heuristics anyway. */
7144 tree iv
= var_at_stmt (data
->current_loop
, cand
, use
->stmt
);
7145 tree base_hint
= (cand
->iv
->base_object
) ? iv
: NULL_TREE
;
7146 gimple_stmt_iterator bsi
= gsi_for_stmt (use
->stmt
);
7147 tree type
= use
->mem_type
;
7148 tree alias_ptr_type
;
7149 if (use
->type
== USE_PTR_ADDRESS
)
7150 alias_ptr_type
= get_alias_ptr_type_for_ptr_address (use
);
7153 gcc_assert (type
== TREE_TYPE (*use
->op_p
));
7154 unsigned int align
= get_object_alignment (*use
->op_p
);
7155 if (align
!= TYPE_ALIGN (type
))
7156 type
= build_aligned_type (type
, align
);
7157 alias_ptr_type
= reference_alias_ptr_type (*use
->op_p
);
7159 tree ref
= create_mem_ref (&bsi
, type
, &aff
, alias_ptr_type
,
7160 iv
, base_hint
, data
->speed
);
7162 if (use
->type
== USE_PTR_ADDRESS
)
7164 ref
= fold_build1 (ADDR_EXPR
, build_pointer_type (use
->mem_type
), ref
);
7165 ref
= fold_convert (get_use_type (use
), ref
);
7166 ref
= force_gimple_operand_gsi (&bsi
, ref
, true, NULL_TREE
,
7167 true, GSI_SAME_STMT
);
7170 copy_ref_info (ref
, *use
->op_p
);
7175 /* Rewrites USE (the condition such that one of the arguments is an iv) using
7179 rewrite_use_compare (struct ivopts_data
*data
,
7180 struct iv_use
*use
, struct iv_cand
*cand
)
7182 tree comp
, op
, bound
;
7183 gimple_stmt_iterator bsi
= gsi_for_stmt (use
->stmt
);
7184 enum tree_code compare
;
7185 struct iv_group
*group
= data
->vgroups
[use
->group_id
];
7186 struct cost_pair
*cp
= get_group_iv_cost (data
, group
, cand
);
7191 tree var
= var_at_stmt (data
->current_loop
, cand
, use
->stmt
);
7192 tree var_type
= TREE_TYPE (var
);
7195 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
7197 fprintf (dump_file
, "Replacing exit test: ");
7198 print_gimple_stmt (dump_file
, use
->stmt
, 0, TDF_SLIM
);
7201 bound
= unshare_expr (fold_convert (var_type
, bound
));
7202 op
= force_gimple_operand (bound
, &stmts
, true, NULL_TREE
);
7204 gsi_insert_seq_on_edge_immediate (
7205 loop_preheader_edge (data
->current_loop
),
7208 gcond
*cond_stmt
= as_a
<gcond
*> (use
->stmt
);
7209 gimple_cond_set_lhs (cond_stmt
, var
);
7210 gimple_cond_set_code (cond_stmt
, compare
);
7211 gimple_cond_set_rhs (cond_stmt
, op
);
7215 /* The induction variable elimination failed; just express the original
7217 comp
= get_computation_at (data
->current_loop
, use
->stmt
, use
, cand
);
7218 gcc_assert (comp
!= NULL_TREE
);
7219 gcc_assert (use
->op_p
!= NULL
);
7220 *use
->op_p
= force_gimple_operand_gsi (&bsi
, comp
, true,
7221 SSA_NAME_VAR (*use
->op_p
),
7222 true, GSI_SAME_STMT
);
7225 /* Rewrite the groups using the selected induction variables. */
7228 rewrite_groups (struct ivopts_data
*data
)
7232 for (i
= 0; i
< data
->vgroups
.length (); i
++)
7234 struct iv_group
*group
= data
->vgroups
[i
];
7235 struct iv_cand
*cand
= group
->selected
;
7239 if (group
->type
== USE_NONLINEAR_EXPR
)
7241 for (j
= 0; j
< group
->vuses
.length (); j
++)
7243 rewrite_use_nonlinear_expr (data
, group
->vuses
[j
], cand
);
7244 update_stmt (group
->vuses
[j
]->stmt
);
7247 else if (address_p (group
->type
))
7249 for (j
= 0; j
< group
->vuses
.length (); j
++)
7251 rewrite_use_address (data
, group
->vuses
[j
], cand
);
7252 update_stmt (group
->vuses
[j
]->stmt
);
7257 gcc_assert (group
->type
== USE_COMPARE
);
7259 for (j
= 0; j
< group
->vuses
.length (); j
++)
7261 rewrite_use_compare (data
, group
->vuses
[j
], cand
);
7262 update_stmt (group
->vuses
[j
]->stmt
);
7268 /* Removes the ivs that are not used after rewriting. */
7271 remove_unused_ivs (struct ivopts_data
*data
, bitmap toremove
)
7276 /* Figure out an order in which to release SSA DEFs so that we don't
7277 release something that we'd have to propagate into a debug stmt
7279 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, j
, bi
)
7281 struct version_info
*info
;
7283 info
= ver_info (data
, j
);
7285 && !integer_zerop (info
->iv
->step
)
7287 && !info
->iv
->nonlin_use
7288 && !info
->preserve_biv
)
7290 bitmap_set_bit (toremove
, SSA_NAME_VERSION (info
->iv
->ssa_name
));
7292 tree def
= info
->iv
->ssa_name
;
7294 if (MAY_HAVE_DEBUG_BIND_STMTS
&& SSA_NAME_DEF_STMT (def
))
7296 imm_use_iterator imm_iter
;
7297 use_operand_p use_p
;
7301 FOR_EACH_IMM_USE_STMT (stmt
, imm_iter
, def
)
7303 if (!gimple_debug_bind_p (stmt
))
7306 /* We just want to determine whether to do nothing
7307 (count == 0), to substitute the computed
7308 expression into a single use of the SSA DEF by
7309 itself (count == 1), or to use a debug temp
7310 because the SSA DEF is used multiple times or as
7311 part of a larger expression (count > 1). */
7313 if (gimple_debug_bind_get_value (stmt
) != def
)
7317 BREAK_FROM_IMM_USE_STMT (imm_iter
);
7323 struct iv_use dummy_use
;
7324 struct iv_cand
*best_cand
= NULL
, *cand
;
7325 unsigned i
, best_pref
= 0, cand_pref
;
7327 memset (&dummy_use
, 0, sizeof (dummy_use
));
7328 dummy_use
.iv
= info
->iv
;
7329 for (i
= 0; i
< data
->vgroups
.length () && i
< 64; i
++)
7331 cand
= data
->vgroups
[i
]->selected
;
7332 if (cand
== best_cand
)
7334 cand_pref
= operand_equal_p (cand
->iv
->step
,
7338 += TYPE_MODE (TREE_TYPE (cand
->iv
->base
))
7339 == TYPE_MODE (TREE_TYPE (info
->iv
->base
))
7342 += TREE_CODE (cand
->iv
->base
) == INTEGER_CST
7344 if (best_cand
== NULL
|| best_pref
< cand_pref
)
7347 best_pref
= cand_pref
;
7354 tree comp
= get_computation_at (data
->current_loop
,
7355 SSA_NAME_DEF_STMT (def
),
7356 &dummy_use
, best_cand
);
7362 tree vexpr
= make_node (DEBUG_EXPR_DECL
);
7363 DECL_ARTIFICIAL (vexpr
) = 1;
7364 TREE_TYPE (vexpr
) = TREE_TYPE (comp
);
7365 if (SSA_NAME_VAR (def
))
7366 SET_DECL_MODE (vexpr
, DECL_MODE (SSA_NAME_VAR (def
)));
7368 SET_DECL_MODE (vexpr
, TYPE_MODE (TREE_TYPE (vexpr
)));
7370 = gimple_build_debug_bind (vexpr
, comp
, NULL
);
7371 gimple_stmt_iterator gsi
;
7373 if (gimple_code (SSA_NAME_DEF_STMT (def
)) == GIMPLE_PHI
)
7374 gsi
= gsi_after_labels (gimple_bb
7375 (SSA_NAME_DEF_STMT (def
)));
7377 gsi
= gsi_for_stmt (SSA_NAME_DEF_STMT (def
));
7379 gsi_insert_before (&gsi
, def_temp
, GSI_SAME_STMT
);
7383 FOR_EACH_IMM_USE_STMT (stmt
, imm_iter
, def
)
7385 if (!gimple_debug_bind_p (stmt
))
7388 FOR_EACH_IMM_USE_ON_STMT (use_p
, imm_iter
)
7389 SET_USE (use_p
, comp
);
7398 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
7399 for hash_map::traverse. */
7402 free_tree_niter_desc (edge
const &, tree_niter_desc
*const &value
, void *)
7408 /* Frees data allocated by the optimization of a single loop. */
7411 free_loop_data (struct ivopts_data
*data
)
7419 data
->niters
->traverse
<void *, free_tree_niter_desc
> (NULL
);
7420 delete data
->niters
;
7421 data
->niters
= NULL
;
7424 EXECUTE_IF_SET_IN_BITMAP (data
->relevant
, 0, i
, bi
)
7426 struct version_info
*info
;
7428 info
= ver_info (data
, i
);
7430 info
->has_nonlin_use
= false;
7431 info
->preserve_biv
= false;
7434 bitmap_clear (data
->relevant
);
7435 bitmap_clear (data
->important_candidates
);
7437 for (i
= 0; i
< data
->vgroups
.length (); i
++)
7439 struct iv_group
*group
= data
->vgroups
[i
];
7441 for (j
= 0; j
< group
->vuses
.length (); j
++)
7442 free (group
->vuses
[j
]);
7443 group
->vuses
.release ();
7445 BITMAP_FREE (group
->related_cands
);
7446 for (j
= 0; j
< group
->n_map_members
; j
++)
7448 if (group
->cost_map
[j
].inv_vars
)
7449 BITMAP_FREE (group
->cost_map
[j
].inv_vars
);
7450 if (group
->cost_map
[j
].inv_exprs
)
7451 BITMAP_FREE (group
->cost_map
[j
].inv_exprs
);
7454 free (group
->cost_map
);
7457 data
->vgroups
.truncate (0);
7459 for (i
= 0; i
< data
->vcands
.length (); i
++)
7461 struct iv_cand
*cand
= data
->vcands
[i
];
7464 BITMAP_FREE (cand
->inv_vars
);
7465 if (cand
->inv_exprs
)
7466 BITMAP_FREE (cand
->inv_exprs
);
7469 data
->vcands
.truncate (0);
7471 if (data
->version_info_size
< num_ssa_names
)
7473 data
->version_info_size
= 2 * num_ssa_names
;
7474 free (data
->version_info
);
7475 data
->version_info
= XCNEWVEC (struct version_info
, data
->version_info_size
);
7478 data
->max_inv_var_id
= 0;
7479 data
->max_inv_expr_id
= 0;
7481 FOR_EACH_VEC_ELT (decl_rtl_to_reset
, i
, obj
)
7482 SET_DECL_RTL (obj
, NULL_RTX
);
7484 decl_rtl_to_reset
.truncate (0);
7486 data
->inv_expr_tab
->empty ();
7488 data
->iv_common_cand_tab
->empty ();
7489 data
->iv_common_cands
.truncate (0);
7492 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
7496 tree_ssa_iv_optimize_finalize (struct ivopts_data
*data
)
7498 free_loop_data (data
);
7499 free (data
->version_info
);
7500 BITMAP_FREE (data
->relevant
);
7501 BITMAP_FREE (data
->important_candidates
);
7503 decl_rtl_to_reset
.release ();
7504 data
->vgroups
.release ();
7505 data
->vcands
.release ();
7506 delete data
->inv_expr_tab
;
7507 data
->inv_expr_tab
= NULL
;
7508 free_affine_expand_cache (&data
->name_expansion_cache
);
7509 delete data
->iv_common_cand_tab
;
7510 data
->iv_common_cand_tab
= NULL
;
7511 data
->iv_common_cands
.release ();
7512 obstack_free (&data
->iv_obstack
, NULL
);
7515 /* Returns true if the loop body BODY includes any function calls. */
7518 loop_body_includes_call (basic_block
*body
, unsigned num_nodes
)
7520 gimple_stmt_iterator gsi
;
7523 for (i
= 0; i
< num_nodes
; i
++)
7524 for (gsi
= gsi_start_bb (body
[i
]); !gsi_end_p (gsi
); gsi_next (&gsi
))
7526 gimple
*stmt
= gsi_stmt (gsi
);
7527 if (is_gimple_call (stmt
)
7528 && !gimple_call_internal_p (stmt
)
7529 && !is_inexpensive_builtin (gimple_call_fndecl (stmt
)))
7535 /* Determine cost scaling factor for basic blocks in loop. */
7536 #define COST_SCALING_FACTOR_BOUND (20)
7539 determine_scaling_factor (struct ivopts_data
*data
, basic_block
*body
)
7541 int lfreq
= data
->current_loop
->header
->count
.to_frequency (cfun
);
7542 if (!data
->speed
|| lfreq
<= 0)
7545 int max_freq
= lfreq
;
7546 for (unsigned i
= 0; i
< data
->current_loop
->num_nodes
; i
++)
7548 body
[i
]->aux
= (void *)(intptr_t) 1;
7549 if (max_freq
< body
[i
]->count
.to_frequency (cfun
))
7550 max_freq
= body
[i
]->count
.to_frequency (cfun
);
7552 if (max_freq
> lfreq
)
7554 int divisor
, factor
;
7555 /* Check if scaling factor itself needs to be scaled by the bound. This
7556 is to avoid overflow when scaling cost according to profile info. */
7557 if (max_freq
/ lfreq
> COST_SCALING_FACTOR_BOUND
)
7560 factor
= COST_SCALING_FACTOR_BOUND
;
7567 for (unsigned i
= 0; i
< data
->current_loop
->num_nodes
; i
++)
7569 int bfreq
= body
[i
]->count
.to_frequency (cfun
);
7573 body
[i
]->aux
= (void*)(intptr_t) (factor
* bfreq
/ divisor
);
7578 /* Optimizes the LOOP. Returns true if anything changed. */
7581 tree_ssa_iv_optimize_loop (struct ivopts_data
*data
, struct loop
*loop
,
7584 bool changed
= false;
7585 struct iv_ca
*iv_ca
;
7586 edge exit
= single_dom_exit (loop
);
7589 gcc_assert (!data
->niters
);
7590 data
->current_loop
= loop
;
7591 data
->loop_loc
= find_loop_location (loop
).get_location_t ();
7592 data
->speed
= optimize_loop_for_speed_p (loop
);
7594 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
7596 fprintf (dump_file
, "Processing loop %d", loop
->num
);
7597 if (data
->loop_loc
!= UNKNOWN_LOCATION
)
7598 fprintf (dump_file
, " at %s:%d", LOCATION_FILE (data
->loop_loc
),
7599 LOCATION_LINE (data
->loop_loc
));
7600 fprintf (dump_file
, "\n");
7604 fprintf (dump_file
, " single exit %d -> %d, exit condition ",
7605 exit
->src
->index
, exit
->dest
->index
);
7606 print_gimple_stmt (dump_file
, last_stmt (exit
->src
), 0, TDF_SLIM
);
7607 fprintf (dump_file
, "\n");
7610 fprintf (dump_file
, "\n");
7613 body
= get_loop_body (loop
);
7614 data
->body_includes_call
= loop_body_includes_call (body
, loop
->num_nodes
);
7615 renumber_gimple_stmt_uids_in_blocks (body
, loop
->num_nodes
);
7617 data
->loop_single_exit_p
= exit
!= NULL
&& loop_only_exit_p (loop
, exit
);
7619 /* For each ssa name determines whether it behaves as an induction variable
7621 if (!find_induction_variables (data
))
7624 /* Finds interesting uses (item 1). */
7625 find_interesting_uses (data
);
7626 if (data
->vgroups
.length () > MAX_CONSIDERED_GROUPS
)
7629 /* Determine cost scaling factor for basic blocks in loop. */
7630 determine_scaling_factor (data
, body
);
7632 /* Finds candidates for the induction variables (item 2). */
7633 find_iv_candidates (data
);
7635 /* Calculates the costs (item 3, part 1). */
7636 determine_iv_costs (data
);
7637 determine_group_iv_costs (data
);
7638 determine_set_costs (data
);
7640 /* Find the optimal set of induction variables (item 3, part 2). */
7641 iv_ca
= find_optimal_iv_set (data
);
7642 /* Cleanup basic block aux field. */
7643 for (unsigned i
= 0; i
< data
->current_loop
->num_nodes
; i
++)
7644 body
[i
]->aux
= NULL
;
7649 /* Create the new induction variables (item 4, part 1). */
7650 create_new_ivs (data
, iv_ca
);
7651 iv_ca_free (&iv_ca
);
7653 /* Rewrite the uses (item 4, part 2). */
7654 rewrite_groups (data
);
7656 /* Remove the ivs that are unused after rewriting. */
7657 remove_unused_ivs (data
, toremove
);
7661 free_loop_data (data
);
7666 /* Main entry point. Optimizes induction variables in loops. */
7669 tree_ssa_iv_optimize (void)
7672 struct ivopts_data data
;
7673 auto_bitmap toremove
;
7675 tree_ssa_iv_optimize_init (&data
);
7677 /* Optimize the loops starting with the innermost ones. */
7678 FOR_EACH_LOOP (loop
, LI_FROM_INNERMOST
)
7680 if (dump_file
&& (dump_flags
& TDF_DETAILS
))
7681 flow_loop_dump (loop
, dump_file
, NULL
, 1);
7683 tree_ssa_iv_optimize_loop (&data
, loop
, toremove
);
7686 /* Remove eliminated IV defs. */
7687 release_defs_bitset (toremove
);
7689 /* We have changed the structure of induction variables; it might happen
7690 that definitions in the scev database refer to some of them that were
7693 /* Likewise niter and control-IV information. */
7694 free_numbers_of_iterations_estimates (cfun
);
7696 tree_ssa_iv_optimize_finalize (&data
);
7699 #include "gt-tree-ssa-loop-ivopts.h"