]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-ssa-loop-ivopts.c
This patch implements the unification of the *bitmap interfaces as discussed.
[thirdparty/gcc.git] / gcc / tree-ssa-loop-ivopts.c
CommitLineData
dec41e98 1/* Induction variable optimizations.
e0e5e0b3 2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
0ac758f7 3 Free Software Foundation, Inc.
48e1416a 4
dec41e98 5This file is part of GCC.
48e1416a 6
dec41e98 7GCC is free software; you can redistribute it and/or modify it
8under the terms of the GNU General Public License as published by the
8c4c00c1 9Free Software Foundation; either version 3, or (at your option) any
dec41e98 10later version.
48e1416a 11
dec41e98 12GCC is distributed in the hope that it will be useful, but WITHOUT
13ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15for more details.
48e1416a 16
dec41e98 17You should have received a copy of the GNU General Public License
8c4c00c1 18along with GCC; see the file COPYING3. If not see
19<http://www.gnu.org/licenses/>. */
dec41e98 20
21/* This pass tries to find the optimal set of induction variables for the loop.
22 It optimizes just the basic linear induction variables (although adding
23 support for other types should not be too hard). It includes the
24 optimizations commonly known as strength reduction, induction variable
25 coalescing and induction variable elimination. It does it in the
26 following steps:
27
28 1) The interesting uses of induction variables are found. This includes
29
30 -- uses of induction variables in non-linear expressions
31 -- addresses of arrays
32 -- comparisons of induction variables
33
34 2) Candidates for the induction variables are found. This includes
35
36 -- old induction variables
37 -- the variables defined by expressions derived from the "interesting
38 uses" above
39
40 3) The optimal (w.r. to a cost function) set of variables is chosen. The
41 cost function assigns a cost to sets of induction variables and consists
42 of three parts:
43
44 -- The use costs. Each of the interesting uses chooses the best induction
45 variable in the set and adds its cost to the sum. The cost reflects
46 the time spent on modifying the induction variables value to be usable
47 for the given purpose (adding base and offset for arrays, etc.).
48 -- The variable costs. Each of the variables has a cost assigned that
49 reflects the costs associated with incrementing the value of the
50 variable. The original variables are somewhat preferred.
51 -- The set cost. Depending on the size of the set, extra cost may be
52 added to reflect register pressure.
53
54 All the costs are defined in a machine-specific way, using the target
55 hooks and machine descriptions to determine them.
56
57 4) The trees are transformed to use the new variables, the dead code is
58 removed.
48e1416a 59
dec41e98 60 All of this is done loop by loop. Doing it globally is theoretically
61 possible, it might give a better performance and it might enable us
62 to decide costs more precisely, but getting all the interactions right
63 would be complicated. */
64
65#include "config.h"
66#include "system.h"
67#include "coretypes.h"
68#include "tm.h"
69#include "tree.h"
dec41e98 70#include "tm_p.h"
dec41e98 71#include "basic-block.h"
ce084dfc 72#include "gimple-pretty-print.h"
dec41e98 73#include "tree-flow.h"
dec41e98 74#include "cfgloop.h"
dec41e98 75#include "tree-pass.h"
76#include "ggc.h"
77#include "insn-config.h"
b30a8715 78#include "pointer-set.h"
dec41e98 79#include "hashtab.h"
80#include "tree-chrec.h"
81#include "tree-scalar-evolution.h"
82#include "cfgloop.h"
83#include "params.h"
3fd8a7ab 84#include "langhooks.h"
d3610bea 85#include "tree-affine.h"
7e995af7 86#include "target.h"
66368bf4 87#include "tree-inline.h"
f544b9a4 88#include "tree-ssa-propagate.h"
e0ae2fe2 89#include "expmed.h"
8ac8f5ae 90
8e3cb73b 91/* FIXME: Expressions are expanded to RTL in this pass to determine the
92 cost of different addressing modes. This should be moved to a TBD
93 interface between the GIMPLE and RTL worlds. */
94#include "expr.h"
aedb7bf8 95#include "recog.h"
8e3cb73b 96
dec41e98 97/* The infinite cost. */
98#define INFTY 10000000
99
dec41e98 100#define AVG_LOOP_NITER(LOOP) 5
101
66368bf4 102/* Returns the expected number of loop iterations for LOOP.
103 The average trip count is computed from profile data if it
104 exists. */
105
106static inline HOST_WIDE_INT
107avg_loop_niter (struct loop *loop)
108{
fee017b3 109 HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
66368bf4 110 if (niter == -1)
111 return AVG_LOOP_NITER (loop);
112
113 return niter;
114}
dec41e98 115
116/* Representation of the induction variable. */
117struct iv
118{
119 tree base; /* Initial value of the iv. */
065d134e 120 tree base_object; /* A memory object to that the induction variable points. */
dec41e98 121 tree step; /* Step of the iv (constant only). */
122 tree ssa_name; /* The ssa name with the value. */
123 bool biv_p; /* Is it a biv? */
124 bool have_use_for; /* Do we already have a use for it? */
125 unsigned use_id; /* The identifier in the use if it is the case. */
126};
127
128/* Per-ssa version information (induction variable descriptions, etc.). */
129struct version_info
130{
131 tree name; /* The ssa name. */
132 struct iv *iv; /* Induction variable description. */
133 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
134 an expression that is not an induction variable. */
dec41e98 135 bool preserve_biv; /* For the original biv, whether to preserve it. */
609c710b 136 unsigned inv_id; /* Id of an invariant. */
dec41e98 137};
138
dec41e98 139/* Types of uses. */
140enum use_type
141{
142 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
dec41e98 143 USE_ADDRESS, /* Use in an address. */
144 USE_COMPARE /* Use is a compare. */
145};
146
3c2818b0 147/* Cost of a computation. */
148typedef struct
149{
3ee48c5c 150 int cost; /* The runtime cost. */
3c2818b0 151 unsigned complexity; /* The estimate of the complexity of the code for
152 the computation (in no concrete units --
153 complexity field should be larger for more
154 complex expressions and addressing modes). */
155} comp_cost;
156
8ac8f5ae 157static const comp_cost no_cost = {0, 0};
3c2818b0 158static const comp_cost infinite_cost = {INFTY, INFTY};
159
dec41e98 160/* The candidate - cost pair. */
161struct cost_pair
162{
163 struct iv_cand *cand; /* The candidate. */
3c2818b0 164 comp_cost cost; /* The cost. */
dec41e98 165 bitmap depends_on; /* The list of invariants that have to be
166 preserved. */
c4d3b428 167 tree value; /* For final value elimination, the expression for
168 the final value of the iv. For iv elimination,
169 the new bound to compare with. */
08748e8d 170 enum tree_code comp; /* For iv elimination, the comparison. */
66368bf4 171 int inv_expr_id; /* Loop invariant expression id. */
dec41e98 172};
173
174/* Use. */
175struct iv_use
176{
177 unsigned id; /* The id of the use. */
178 enum use_type type; /* Type of the use. */
179 struct iv *iv; /* The induction variable it is based on. */
75a70cf9 180 gimple stmt; /* Statement in that it occurs. */
dec41e98 181 tree *op_p; /* The place where it occurs. */
00991688 182 bitmap related_cands; /* The set of "related" iv candidates, plus the common
183 important ones. */
dec41e98 184
185 unsigned n_map_members; /* Number of candidates in the cost_map list. */
186 struct cost_pair *cost_map;
187 /* The costs wrto the iv candidates. */
188
189 struct iv_cand *selected;
190 /* The selected candidate. */
191};
192
193/* The position where the iv is computed. */
194enum iv_position
195{
196 IP_NORMAL, /* At the end, just before the exit condition. */
197 IP_END, /* At the end of the latch block. */
3ee48c5c 198 IP_BEFORE_USE, /* Immediately before a specific use. */
199 IP_AFTER_USE, /* Immediately after a specific use. */
dec41e98 200 IP_ORIGINAL /* The original biv. */
201};
202
203/* The induction variable candidate. */
204struct iv_cand
205{
206 unsigned id; /* The number of the candidate. */
207 bool important; /* Whether this is an "important" candidate, i.e. such
208 that it should be considered by all uses. */
609c710b 209 ENUM_BITFIELD(iv_position) pos : 8; /* Where it is computed. */
75a70cf9 210 gimple incremented_at;/* For original biv, the statement where it is
dec41e98 211 incremented. */
212 tree var_before; /* The variable used for it before increment. */
213 tree var_after; /* The variable used for it after increment. */
214 struct iv *iv; /* The value of the candidate. NULL for
215 "pseudocandidate" used to indicate the possibility
216 to replace the final value of an iv by direct
217 computation of the value. */
218 unsigned cost; /* Cost of the candidate. */
3ee48c5c 219 unsigned cost_step; /* Cost of the candidate's increment operation. */
220 struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
221 where it is incremented. */
651874e1 222 bitmap depends_on; /* The list of invariants that are used in step of the
223 biv. */
dec41e98 224};
225
66368bf4 226/* Loop invariant expression hashtable entry. */
227struct iv_inv_expr_ent
228{
229 tree expr;
230 int id;
231 hashval_t hash;
232};
233
dec41e98 234/* The data used by the induction variable optimizations. */
235
7d18ea07 236typedef struct iv_use *iv_use_p;
237DEF_VEC_P(iv_use_p);
238DEF_VEC_ALLOC_P(iv_use_p,heap);
239
240typedef struct iv_cand *iv_cand_p;
241DEF_VEC_P(iv_cand_p);
242DEF_VEC_ALLOC_P(iv_cand_p,heap);
243
dec41e98 244struct ivopts_data
245{
246 /* The currently optimized loop. */
247 struct loop *current_loop;
248
0ac758f7 249 /* Numbers of iterations for all exits of the current loop. */
250 struct pointer_map_t *niters;
f529eb25 251
fe382241 252 /* Number of registers used in it. */
253 unsigned regs_used;
254
dec41e98 255 /* The size of version_info array allocated. */
256 unsigned version_info_size;
257
258 /* The array of information for the ssa names. */
259 struct version_info *version_info;
260
66368bf4 261 /* The hashtable of loop invariant expressions created
262 by ivopt. */
263 htab_t inv_expr_tab;
264
265 /* Loop invariant expression id. */
266 int inv_expr_id;
267
dec41e98 268 /* The bitmap of indices in version_info whose value was changed. */
269 bitmap relevant;
270
dec41e98 271 /* The uses of induction variables. */
7d18ea07 272 VEC(iv_use_p,heap) *iv_uses;
dec41e98 273
274 /* The candidates. */
7d18ea07 275 VEC(iv_cand_p,heap) *iv_candidates;
dec41e98 276
0a501ca6 277 /* A bitmap of important candidates. */
278 bitmap important_candidates;
279
0ac758f7 280 /* The maximum invariant id. */
281 unsigned max_inv_id;
282
dec41e98 283 /* Whether to consider just related and important candidates when replacing a
284 use. */
285 bool consider_all_candidates;
0ac758f7 286
287 /* Are we optimizing for speed? */
288 bool speed;
a6b74a67 289
290 /* Whether the loop body includes any function calls. */
291 bool body_includes_call;
08748e8d 292
293 /* Whether the loop body can only be exited via single exit. */
294 bool loop_single_exit_p;
dec41e98 295};
296
00991688 297/* An assignment of iv candidates to uses. */
298
299struct iv_ca
300{
301 /* The number of uses covered by the assignment. */
302 unsigned upto;
303
304 /* Number of uses that cannot be expressed by the candidates in the set. */
305 unsigned bad_uses;
306
307 /* Candidate assigned to a use, together with the related costs. */
308 struct cost_pair **cand_for_use;
309
310 /* Number of times each candidate is used. */
311 unsigned *n_cand_uses;
312
313 /* The candidates used. */
314 bitmap cands;
315
13ea2c1f 316 /* The number of candidates in the set. */
317 unsigned n_cands;
318
00991688 319 /* Total number of registers needed. */
320 unsigned n_regs;
321
322 /* Total cost of expressing uses. */
3c2818b0 323 comp_cost cand_use_cost;
00991688 324
325 /* Total cost of candidates. */
326 unsigned cand_cost;
327
328 /* Number of times each invariant is used. */
329 unsigned *n_invariant_uses;
330
7d5663e4 331 /* The array holding the number of uses of each loop
332 invariant expressions created by ivopt. */
333 unsigned *used_inv_expr;
334
335 /* The number of created loop invariants. */
336 unsigned num_used_inv_expr;
337
00991688 338 /* Total cost of the assignment. */
3c2818b0 339 comp_cost cost;
00991688 340};
341
342/* Difference of two iv candidate assignments. */
343
344struct iv_ca_delta
345{
346 /* Changed use. */
347 struct iv_use *use;
348
349 /* An old assignment (for rollback purposes). */
350 struct cost_pair *old_cp;
351
352 /* A new assignment. */
353 struct cost_pair *new_cp;
354
355 /* Next change in the list. */
356 struct iv_ca_delta *next_change;
357};
358
dec41e98 359/* Bound on number of candidates below that all candidates are considered. */
360
361#define CONSIDER_ALL_CANDIDATES_BOUND \
362 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
363
91275768 364/* If there are more iv occurrences, we just give up (it is quite unlikely that
dec41e98 365 optimizing such a loop would help, and it would take ages). */
366
367#define MAX_CONSIDERED_USES \
368 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
369
13ea2c1f 370/* If there are at most this number of ivs in the set, try removing unnecessary
371 ivs from the set always. */
372
373#define ALWAYS_PRUNE_CAND_SET_BOUND \
374 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
375
dec41e98 376/* The list of trees for that the decl_rtl field must be reset is stored
377 here. */
378
7d18ea07 379static VEC(tree,heap) *decl_rtl_to_reset;
dec41e98 380
e0ae2fe2 381static comp_cost force_expr_to_var_cost (tree, bool);
382
dec41e98 383/* Number of uses recorded in DATA. */
384
385static inline unsigned
386n_iv_uses (struct ivopts_data *data)
387{
7d18ea07 388 return VEC_length (iv_use_p, data->iv_uses);
dec41e98 389}
390
391/* Ith use recorded in DATA. */
392
393static inline struct iv_use *
394iv_use (struct ivopts_data *data, unsigned i)
395{
7d18ea07 396 return VEC_index (iv_use_p, data->iv_uses, i);
dec41e98 397}
398
399/* Number of candidates recorded in DATA. */
400
401static inline unsigned
402n_iv_cands (struct ivopts_data *data)
403{
7d18ea07 404 return VEC_length (iv_cand_p, data->iv_candidates);
dec41e98 405}
406
407/* Ith candidate recorded in DATA. */
408
409static inline struct iv_cand *
410iv_cand (struct ivopts_data *data, unsigned i)
411{
7d18ea07 412 return VEC_index (iv_cand_p, data->iv_candidates, i);
dec41e98 413}
414
dec41e98 415/* The single loop exit if it dominates the latch, NULL otherwise. */
416
8feba661 417edge
dec41e98 418single_dom_exit (struct loop *loop)
419{
d9e7e1a2 420 edge exit = single_exit (loop);
dec41e98 421
422 if (!exit)
423 return NULL;
424
425 if (!just_once_each_iteration_p (loop, exit->src))
426 return NULL;
427
428 return exit;
429}
430
431/* Dumps information about the induction variable IV to FILE. */
432
433extern void dump_iv (FILE *, struct iv *);
434void
435dump_iv (FILE *file, struct iv *iv)
436{
065d134e 437 if (iv->ssa_name)
438 {
439 fprintf (file, "ssa name ");
440 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
441 fprintf (file, "\n");
442 }
dec41e98 443
dbc64c75 444 fprintf (file, " type ");
445 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
446 fprintf (file, "\n");
447
dec41e98 448 if (iv->step)
449 {
450 fprintf (file, " base ");
451 print_generic_expr (file, iv->base, TDF_SLIM);
452 fprintf (file, "\n");
453
454 fprintf (file, " step ");
455 print_generic_expr (file, iv->step, TDF_SLIM);
456 fprintf (file, "\n");
457 }
458 else
459 {
460 fprintf (file, " invariant ");
461 print_generic_expr (file, iv->base, TDF_SLIM);
462 fprintf (file, "\n");
463 }
464
065d134e 465 if (iv->base_object)
466 {
467 fprintf (file, " base object ");
468 print_generic_expr (file, iv->base_object, TDF_SLIM);
469 fprintf (file, "\n");
470 }
471
dec41e98 472 if (iv->biv_p)
473 fprintf (file, " is a biv\n");
474}
475
476/* Dumps information about the USE to FILE. */
477
478extern void dump_use (FILE *, struct iv_use *);
479void
480dump_use (FILE *file, struct iv_use *use)
481{
dec41e98 482 fprintf (file, "use %d\n", use->id);
483
484 switch (use->type)
485 {
486 case USE_NONLINEAR_EXPR:
487 fprintf (file, " generic\n");
488 break;
489
dec41e98 490 case USE_ADDRESS:
491 fprintf (file, " address\n");
492 break;
493
494 case USE_COMPARE:
495 fprintf (file, " compare\n");
496 break;
497
498 default:
8c0963c4 499 gcc_unreachable ();
dec41e98 500 }
501
dbc64c75 502 fprintf (file, " in statement ");
75a70cf9 503 print_gimple_stmt (file, use->stmt, 0, 0);
dbc64c75 504 fprintf (file, "\n");
505
506 fprintf (file, " at position ");
507 if (use->op_p)
508 print_generic_expr (file, *use->op_p, TDF_SLIM);
509 fprintf (file, "\n");
510
065d134e 511 dump_iv (file, use->iv);
dbc64c75 512
d4caf099 513 if (use->related_cands)
514 {
515 fprintf (file, " related candidates ");
516 dump_bitmap (file, use->related_cands);
517 }
dec41e98 518}
519
520/* Dumps information about the uses to FILE. */
521
522extern void dump_uses (FILE *, struct ivopts_data *);
523void
524dump_uses (FILE *file, struct ivopts_data *data)
525{
526 unsigned i;
527 struct iv_use *use;
528
529 for (i = 0; i < n_iv_uses (data); i++)
530 {
531 use = iv_use (data, i);
532
533 dump_use (file, use);
534 fprintf (file, "\n");
535 }
536}
537
538/* Dumps information about induction variable candidate CAND to FILE. */
539
540extern void dump_cand (FILE *, struct iv_cand *);
541void
542dump_cand (FILE *file, struct iv_cand *cand)
543{
544 struct iv *iv = cand->iv;
545
546 fprintf (file, "candidate %d%s\n",
547 cand->id, cand->important ? " (important)" : "");
548
651874e1 549 if (cand->depends_on)
550 {
551 fprintf (file, " depends on ");
552 dump_bitmap (file, cand->depends_on);
553 }
554
dec41e98 555 if (!iv)
556 {
557 fprintf (file, " final value replacement\n");
558 return;
559 }
560
66368bf4 561 if (cand->var_before)
562 {
563 fprintf (file, " var_before ");
564 print_generic_expr (file, cand->var_before, TDF_SLIM);
565 fprintf (file, "\n");
566 }
567 if (cand->var_after)
568 {
569 fprintf (file, " var_after ");
570 print_generic_expr (file, cand->var_after, TDF_SLIM);
571 fprintf (file, "\n");
572 }
573
dec41e98 574 switch (cand->pos)
575 {
576 case IP_NORMAL:
577 fprintf (file, " incremented before exit test\n");
578 break;
579
3ee48c5c 580 case IP_BEFORE_USE:
581 fprintf (file, " incremented before use %d\n", cand->ainc_use->id);
582 break;
583
584 case IP_AFTER_USE:
585 fprintf (file, " incremented after use %d\n", cand->ainc_use->id);
586 break;
587
dec41e98 588 case IP_END:
589 fprintf (file, " incremented at end\n");
590 break;
591
592 case IP_ORIGINAL:
593 fprintf (file, " original biv\n");
594 break;
595 }
596
065d134e 597 dump_iv (file, iv);
dec41e98 598}
599
600/* Returns the info for ssa version VER. */
601
602static inline struct version_info *
603ver_info (struct ivopts_data *data, unsigned ver)
604{
605 return data->version_info + ver;
606}
607
608/* Returns the info for ssa name NAME. */
609
610static inline struct version_info *
611name_info (struct ivopts_data *data, tree name)
612{
613 return ver_info (data, SSA_NAME_VERSION (name));
614}
615
dec41e98 616/* Returns true if STMT is after the place where the IP_NORMAL ivs will be
617 emitted in LOOP. */
618
619static bool
75a70cf9 620stmt_after_ip_normal_pos (struct loop *loop, gimple stmt)
dec41e98 621{
75a70cf9 622 basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
dec41e98 623
8c0963c4 624 gcc_assert (bb);
dec41e98 625
626 if (sbb == loop->latch)
627 return true;
628
629 if (sbb != bb)
630 return false;
631
632 return stmt == last_stmt (bb);
633}
634
635/* Returns true if STMT if after the place where the original induction
3ee48c5c 636 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
637 if the positions are identical. */
dec41e98 638
639static bool
3ee48c5c 640stmt_after_inc_pos (struct iv_cand *cand, gimple stmt, bool true_if_equal)
dec41e98 641{
75a70cf9 642 basic_block cand_bb = gimple_bb (cand->incremented_at);
643 basic_block stmt_bb = gimple_bb (stmt);
dec41e98 644
645 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
646 return false;
647
648 if (stmt_bb != cand_bb)
649 return true;
650
3ee48c5c 651 if (true_if_equal
652 && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
653 return true;
654 return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
dec41e98 655}
656
657/* Returns true if STMT if after the place where the induction variable
658 CAND is incremented in LOOP. */
659
660static bool
75a70cf9 661stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple stmt)
dec41e98 662{
663 switch (cand->pos)
664 {
665 case IP_END:
666 return false;
667
668 case IP_NORMAL:
669 return stmt_after_ip_normal_pos (loop, stmt);
670
671 case IP_ORIGINAL:
3ee48c5c 672 case IP_AFTER_USE:
673 return stmt_after_inc_pos (cand, stmt, false);
674
675 case IP_BEFORE_USE:
676 return stmt_after_inc_pos (cand, stmt, true);
dec41e98 677
678 default:
8c0963c4 679 gcc_unreachable ();
dec41e98 680 }
681}
682
dcd24d3c 683/* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
684
685static bool
686abnormal_ssa_name_p (tree exp)
687{
688 if (!exp)
689 return false;
690
691 if (TREE_CODE (exp) != SSA_NAME)
692 return false;
693
694 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
695}
696
697/* Returns false if BASE or INDEX contains a ssa name that occurs in an
698 abnormal phi node. Callback for for_each_index. */
699
700static bool
701idx_contains_abnormal_ssa_name_p (tree base, tree *index,
702 void *data ATTRIBUTE_UNUSED)
703{
5e19919e 704 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
dcd24d3c 705 {
706 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
707 return false;
708 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
709 return false;
710 }
711
712 return !abnormal_ssa_name_p (*index);
713}
714
715/* Returns true if EXPR contains a ssa name that occurs in an
716 abnormal phi node. */
717
72079d62 718bool
dcd24d3c 719contains_abnormal_ssa_name_p (tree expr)
720{
721 enum tree_code code;
f0d6e81c 722 enum tree_code_class codeclass;
dcd24d3c 723
724 if (!expr)
725 return false;
726
727 code = TREE_CODE (expr);
f0d6e81c 728 codeclass = TREE_CODE_CLASS (code);
dcd24d3c 729
730 if (code == SSA_NAME)
731 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
732
733 if (code == INTEGER_CST
734 || is_gimple_min_invariant (expr))
735 return false;
736
737 if (code == ADDR_EXPR)
738 return !for_each_index (&TREE_OPERAND (expr, 0),
739 idx_contains_abnormal_ssa_name_p,
740 NULL);
741
134c053e 742 if (code == COND_EXPR)
743 return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
744 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
745 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
746
f0d6e81c 747 switch (codeclass)
dcd24d3c 748 {
749 case tcc_binary:
750 case tcc_comparison:
751 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
752 return true;
753
754 /* Fallthru. */
755 case tcc_unary:
756 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
757 return true;
758
759 break;
760
761 default:
762 gcc_unreachable ();
763 }
764
765 return false;
766}
767
08748e8d 768/* Returns the structure describing number of iterations determined from
b091dc59 769 EXIT of DATA->current_loop, or NULL if something goes wrong. */
770
08748e8d 771static struct tree_niter_desc *
772niter_for_exit (struct ivopts_data *data, edge exit)
b091dc59 773{
08748e8d 774 struct tree_niter_desc *desc;
b30a8715 775 void **slot;
b091dc59 776
b30a8715 777 if (!data->niters)
b091dc59 778 {
b30a8715 779 data->niters = pointer_map_create ();
780 slot = NULL;
781 }
782 else
783 slot = pointer_map_contains (data->niters, exit);
dcd24d3c 784
b30a8715 785 if (!slot)
786 {
08748e8d 787 /* Try to determine number of iterations. We cannot safely work with ssa
788 names that appear in phi nodes on abnormal edges, so that we do not
789 create overlapping life ranges for them (PR 27283). */
eaa67821 790 desc = XNEW (struct tree_niter_desc);
08748e8d 791 if (!number_of_iterations_exit (data->current_loop,
792 exit, desc, true)
793 || contains_abnormal_ssa_name_p (desc->niter))
794 {
795 XDELETE (desc);
796 desc = NULL;
797 }
eaa67821 798 slot = pointer_map_insert (data->niters, exit);
799 *slot = desc;
b091dc59 800 }
801 else
08748e8d 802 desc = (struct tree_niter_desc *) *slot;
b091dc59 803
08748e8d 804 return desc;
b091dc59 805}
806
08748e8d 807/* Returns the structure describing number of iterations determined from
b091dc59 808 single dominating exit of DATA->current_loop, or NULL if something
809 goes wrong. */
810
08748e8d 811static struct tree_niter_desc *
b091dc59 812niter_for_single_dom_exit (struct ivopts_data *data)
813{
814 edge exit = single_dom_exit (data->current_loop);
815
816 if (!exit)
817 return NULL;
818
08748e8d 819 return niter_for_exit (data, exit);
b091dc59 820}
821
66368bf4 822/* Hash table equality function for expressions. */
823
824static int
825htab_inv_expr_eq (const void *ent1, const void *ent2)
826{
827 const struct iv_inv_expr_ent *expr1 =
828 (const struct iv_inv_expr_ent *)ent1;
829 const struct iv_inv_expr_ent *expr2 =
830 (const struct iv_inv_expr_ent *)ent2;
831
160ecf52 832 return expr1->hash == expr2->hash
833 && operand_equal_p (expr1->expr, expr2->expr, 0);
66368bf4 834}
835
836/* Hash function for loop invariant expressions. */
837
838static hashval_t
839htab_inv_expr_hash (const void *ent)
840{
841 const struct iv_inv_expr_ent *expr =
842 (const struct iv_inv_expr_ent *)ent;
843 return expr->hash;
844}
845
dec41e98 846/* Initializes data structures used by the iv optimization pass, stored
fe382241 847 in DATA. */
dec41e98 848
849static void
fe382241 850tree_ssa_iv_optimize_init (struct ivopts_data *data)
dec41e98 851{
dec41e98 852 data->version_info_size = 2 * num_ssa_names;
4c36ffe6 853 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
27335ffd 854 data->relevant = BITMAP_ALLOC (NULL);
855 data->important_candidates = BITMAP_ALLOC (NULL);
dec41e98 856 data->max_inv_id = 0;
b30a8715 857 data->niters = NULL;
7d18ea07 858 data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
859 data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
66368bf4 860 data->inv_expr_tab = htab_create (10, htab_inv_expr_hash,
861 htab_inv_expr_eq, free);
862 data->inv_expr_id = 0;
7d18ea07 863 decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
dec41e98 864}
865
065d134e 866/* Returns a memory object to that EXPR points. In case we are able to
867 determine that it does not point to any such object, NULL is returned. */
868
869static tree
870determine_base_object (tree expr)
871{
872 enum tree_code code = TREE_CODE (expr);
0de36bdb 873 tree base, obj;
065d134e 874
9b6564b5 875 /* If this is a pointer casted to any type, we need to determine
876 the base object for the pointer; so handle conversions before
877 throwing away non-pointer expressions. */
72dd6141 878 if (CONVERT_EXPR_P (expr))
9b6564b5 879 return determine_base_object (TREE_OPERAND (expr, 0));
880
065d134e 881 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
882 return NULL_TREE;
883
884 switch (code)
885 {
886 case INTEGER_CST:
887 return NULL_TREE;
888
889 case ADDR_EXPR:
890 obj = TREE_OPERAND (expr, 0);
891 base = get_base_address (obj);
892
893 if (!base)
390955a6 894 return expr;
065d134e 895
182cf5a9 896 if (TREE_CODE (base) == MEM_REF)
390955a6 897 return determine_base_object (TREE_OPERAND (base, 0));
eb91f88e 898
538577e2 899 return fold_convert (ptr_type_node,
900 build_fold_addr_expr (base));
065d134e 901
0de36bdb 902 case POINTER_PLUS_EXPR:
903 return determine_base_object (TREE_OPERAND (expr, 0));
904
065d134e 905 case PLUS_EXPR:
906 case MINUS_EXPR:
0de36bdb 907 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
908 gcc_unreachable ();
065d134e 909
910 default:
911 return fold_convert (ptr_type_node, expr);
912 }
913}
914
dec41e98 915/* Allocates an induction variable with given initial value BASE and step STEP
916 for loop LOOP. */
917
918static struct iv *
919alloc_iv (tree base, tree step)
920{
4c36ffe6 921 struct iv *iv = XCNEW (struct iv);
7a973feb 922 gcc_assert (step != NULL_TREE);
dec41e98 923
924 iv->base = base;
065d134e 925 iv->base_object = determine_base_object (base);
dec41e98 926 iv->step = step;
927 iv->biv_p = false;
928 iv->have_use_for = false;
929 iv->use_id = 0;
930 iv->ssa_name = NULL_TREE;
931
932 return iv;
933}
934
935/* Sets STEP and BASE for induction variable IV. */
936
937static void
938set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
939{
940 struct version_info *info = name_info (data, iv);
941
8c0963c4 942 gcc_assert (!info->iv);
dec41e98 943
944 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
945 info->iv = alloc_iv (base, step);
946 info->iv->ssa_name = iv;
947}
948
949/* Finds induction variable declaration for VAR. */
950
951static struct iv *
952get_iv (struct ivopts_data *data, tree var)
953{
954 basic_block bb;
7a973feb 955 tree type = TREE_TYPE (var);
956
957 if (!POINTER_TYPE_P (type)
958 && !INTEGRAL_TYPE_P (type))
959 return NULL;
960
dec41e98 961 if (!name_info (data, var)->iv)
962 {
75a70cf9 963 bb = gimple_bb (SSA_NAME_DEF_STMT (var));
dec41e98 964
965 if (!bb
966 || !flow_bb_inside_loop_p (data->current_loop, bb))
7a973feb 967 set_iv (data, var, var, build_int_cst (type, 0));
dec41e98 968 }
969
970 return name_info (data, var)->iv;
971}
972
651874e1 973/* Determines the step of a biv defined in PHI. Returns NULL if PHI does
974 not define a simple affine biv with nonzero step. */
dec41e98 975
976static tree
75a70cf9 977determine_biv_step (gimple phi)
dec41e98 978{
75a70cf9 979 struct loop *loop = gimple_bb (phi)->loop_father;
553b9523 980 tree name = PHI_RESULT (phi);
981 affine_iv iv;
dec41e98 982
7c782c9b 983 if (virtual_operand_p (name))
dec41e98 984 return NULL_TREE;
985
76610704 986 if (!simple_iv (loop, loop, name, &iv, true))
dec41e98 987 return NULL_TREE;
988
7a973feb 989 return integer_zerop (iv.step) ? NULL_TREE : iv.step;
dec41e98 990}
991
dec41e98 992/* Finds basic ivs. */
993
994static bool
995find_bivs (struct ivopts_data *data)
996{
75a70cf9 997 gimple phi;
998 tree step, type, base;
dec41e98 999 bool found = false;
1000 struct loop *loop = data->current_loop;
75a70cf9 1001 gimple_stmt_iterator psi;
dec41e98 1002
75a70cf9 1003 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
dec41e98 1004 {
75a70cf9 1005 phi = gsi_stmt (psi);
1006
dec41e98 1007 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1008 continue;
1009
1010 step = determine_biv_step (phi);
dec41e98 1011 if (!step)
1012 continue;
dec41e98 1013
1014 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
b9845e76 1015 base = expand_simple_operations (base);
651874e1 1016 if (contains_abnormal_ssa_name_p (base)
1017 || contains_abnormal_ssa_name_p (step))
dec41e98 1018 continue;
1019
1020 type = TREE_TYPE (PHI_RESULT (phi));
1021 base = fold_convert (type, base);
651874e1 1022 if (step)
06240723 1023 {
1024 if (POINTER_TYPE_P (type))
a0553bff 1025 step = convert_to_ptrofftype (step);
06240723 1026 else
1027 step = fold_convert (type, step);
1028 }
dec41e98 1029
1030 set_iv (data, PHI_RESULT (phi), base, step);
1031 found = true;
1032 }
1033
1034 return found;
1035}
1036
1037/* Marks basic ivs. */
1038
1039static void
1040mark_bivs (struct ivopts_data *data)
1041{
75a70cf9 1042 gimple phi;
1043 tree var;
dec41e98 1044 struct iv *iv, *incr_iv;
1045 struct loop *loop = data->current_loop;
1046 basic_block incr_bb;
75a70cf9 1047 gimple_stmt_iterator psi;
dec41e98 1048
75a70cf9 1049 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
dec41e98 1050 {
75a70cf9 1051 phi = gsi_stmt (psi);
1052
dec41e98 1053 iv = get_iv (data, PHI_RESULT (phi));
1054 if (!iv)
1055 continue;
1056
1057 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1058 incr_iv = get_iv (data, var);
1059 if (!incr_iv)
1060 continue;
1061
1062 /* If the increment is in the subloop, ignore it. */
75a70cf9 1063 incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
dec41e98 1064 if (incr_bb->loop_father != data->current_loop
1065 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1066 continue;
1067
1068 iv->biv_p = true;
1069 incr_iv->biv_p = true;
1070 }
1071}
1072
1073/* Checks whether STMT defines a linear induction variable and stores its
553b9523 1074 parameters to IV. */
dec41e98 1075
1076static bool
75a70cf9 1077find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv)
dec41e98 1078{
1079 tree lhs;
1080 struct loop *loop = data->current_loop;
1081
553b9523 1082 iv->base = NULL_TREE;
1083 iv->step = NULL_TREE;
dec41e98 1084
75a70cf9 1085 if (gimple_code (stmt) != GIMPLE_ASSIGN)
dec41e98 1086 return false;
1087
75a70cf9 1088 lhs = gimple_assign_lhs (stmt);
dec41e98 1089 if (TREE_CODE (lhs) != SSA_NAME)
1090 return false;
1091
76610704 1092 if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
dec41e98 1093 return false;
553b9523 1094 iv->base = expand_simple_operations (iv->base);
dec41e98 1095
553b9523 1096 if (contains_abnormal_ssa_name_p (iv->base)
1097 || contains_abnormal_ssa_name_p (iv->step))
dec41e98 1098 return false;
1099
6623b33f 1100 /* If STMT could throw, then do not consider STMT as defining a GIV.
1101 While this will suppress optimizations, we can not safely delete this
1102 GIV and associated statements, even if it appears it is not used. */
1103 if (stmt_could_throw_p (stmt))
1104 return false;
1105
dec41e98 1106 return true;
1107}
1108
1109/* Finds general ivs in statement STMT. */
1110
1111static void
75a70cf9 1112find_givs_in_stmt (struct ivopts_data *data, gimple stmt)
dec41e98 1113{
553b9523 1114 affine_iv iv;
dec41e98 1115
553b9523 1116 if (!find_givs_in_stmt_scev (data, stmt, &iv))
dec41e98 1117 return;
1118
75a70cf9 1119 set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step);
dec41e98 1120}
1121
1122/* Finds general ivs in basic block BB. */
1123
1124static void
1125find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1126{
75a70cf9 1127 gimple_stmt_iterator bsi;
dec41e98 1128
75a70cf9 1129 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1130 find_givs_in_stmt (data, gsi_stmt (bsi));
dec41e98 1131}
1132
1133/* Finds general ivs. */
1134
1135static void
1136find_givs (struct ivopts_data *data)
1137{
1138 struct loop *loop = data->current_loop;
1139 basic_block *body = get_loop_body_in_dom_order (loop);
1140 unsigned i;
1141
1142 for (i = 0; i < loop->num_nodes; i++)
1143 find_givs_in_bb (data, body[i]);
1144 free (body);
1145}
1146
dec41e98 1147/* For each ssa name defined in LOOP determines whether it is an induction
1148 variable and if so, its initial value and step. */
1149
1150static bool
1151find_induction_variables (struct ivopts_data *data)
1152{
1153 unsigned i;
0cc4271a 1154 bitmap_iterator bi;
dec41e98 1155
1156 if (!find_bivs (data))
1157 return false;
1158
1159 find_givs (data);
1160 mark_bivs (data);
dec41e98 1161
1162 if (dump_file && (dump_flags & TDF_DETAILS))
1163 {
08748e8d 1164 struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
b091dc59 1165
1166 if (niter)
dec41e98 1167 {
1168 fprintf (dump_file, " number of iterations ");
08748e8d 1169 print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1170 if (!integer_zerop (niter->may_be_zero))
1171 {
1172 fprintf (dump_file, "; zero if ");
1173 print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1174 }
dcd24d3c 1175 fprintf (dump_file, "\n\n");
dec41e98 1176 };
48e1416a 1177
dec41e98 1178 fprintf (dump_file, "Induction variables:\n\n");
1179
0cc4271a 1180 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
dec41e98 1181 {
1182 if (ver_info (data, i)->iv)
1183 dump_iv (dump_file, ver_info (data, i)->iv);
0cc4271a 1184 }
dec41e98 1185 }
1186
1187 return true;
1188}
1189
1190/* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1191
1192static struct iv_use *
1193record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
75a70cf9 1194 gimple stmt, enum use_type use_type)
dec41e98 1195{
4c36ffe6 1196 struct iv_use *use = XCNEW (struct iv_use);
dec41e98 1197
1198 use->id = n_iv_uses (data);
1199 use->type = use_type;
1200 use->iv = iv;
1201 use->stmt = stmt;
1202 use->op_p = use_p;
27335ffd 1203 use->related_cands = BITMAP_ALLOC (NULL);
dec41e98 1204
065d134e 1205 /* To avoid showing ssa name in the dumps, if it was not reset by the
1206 caller. */
1207 iv->ssa_name = NULL_TREE;
1208
dec41e98 1209 if (dump_file && (dump_flags & TDF_DETAILS))
1210 dump_use (dump_file, use);
1211
7d18ea07 1212 VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
dec41e98 1213
1214 return use;
1215}
1216
1217/* Checks whether OP is a loop-level invariant and if so, records it.
1218 NONLINEAR_USE is true if the invariant is used in a way we do not
1219 handle specially. */
1220
1221static void
1222record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1223{
1224 basic_block bb;
1225 struct version_info *info;
1226
1227 if (TREE_CODE (op) != SSA_NAME
7c782c9b 1228 || virtual_operand_p (op))
dec41e98 1229 return;
1230
75a70cf9 1231 bb = gimple_bb (SSA_NAME_DEF_STMT (op));
dec41e98 1232 if (bb
1233 && flow_bb_inside_loop_p (data->current_loop, bb))
1234 return;
1235
1236 info = name_info (data, op);
1237 info->name = op;
1238 info->has_nonlin_use |= nonlinear_use;
1239 if (!info->inv_id)
1240 info->inv_id = ++data->max_inv_id;
1241 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1242}
1243
3659c47c 1244/* Checks whether the use OP is interesting and if so, records it. */
dec41e98 1245
1246static struct iv_use *
3659c47c 1247find_interesting_uses_op (struct ivopts_data *data, tree op)
dec41e98 1248{
1249 struct iv *iv;
1250 struct iv *civ;
75a70cf9 1251 gimple stmt;
dec41e98 1252 struct iv_use *use;
1253
1254 if (TREE_CODE (op) != SSA_NAME)
1255 return NULL;
1256
1257 iv = get_iv (data, op);
1258 if (!iv)
1259 return NULL;
48e1416a 1260
dec41e98 1261 if (iv->have_use_for)
1262 {
1263 use = iv_use (data, iv->use_id);
1264
3659c47c 1265 gcc_assert (use->type == USE_NONLINEAR_EXPR);
dec41e98 1266 return use;
1267 }
1268
7a973feb 1269 if (integer_zerop (iv->step))
dec41e98 1270 {
1271 record_invariant (data, op, true);
1272 return NULL;
1273 }
1274 iv->have_use_for = true;
1275
4c36ffe6 1276 civ = XNEW (struct iv);
dec41e98 1277 *civ = *iv;
1278
1279 stmt = SSA_NAME_DEF_STMT (op);
75a70cf9 1280 gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1281 || is_gimple_assign (stmt));
dec41e98 1282
3659c47c 1283 use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
dec41e98 1284 iv->use_id = use->id;
1285
1286 return use;
1287}
1288
75a70cf9 1289/* Given a condition in statement STMT, checks whether it is a compare
1290 of an induction variable and an invariant. If this is the case,
1291 CONTROL_VAR is set to location of the iv, BOUND to the location of
1292 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1293 induction variable descriptions, and true is returned. If this is not
1294 the case, CONTROL_VAR and BOUND are set to the arguments of the
1295 condition and false is returned. */
dec41e98 1296
02f8cd87 1297static bool
75a70cf9 1298extract_cond_operands (struct ivopts_data *data, gimple stmt,
02f8cd87 1299 tree **control_var, tree **bound,
1300 struct iv **iv_var, struct iv **iv_bound)
1301{
75a70cf9 1302 /* The objects returned when COND has constant operands. */
02f8cd87 1303 static struct iv const_iv;
1304 static tree zero;
02f8cd87 1305 tree *op0 = &zero, *op1 = &zero, *tmp_op;
1306 struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1307 bool ret = false;
1308
75a70cf9 1309 if (gimple_code (stmt) == GIMPLE_COND)
dec41e98 1310 {
75a70cf9 1311 op0 = gimple_cond_lhs_ptr (stmt);
1312 op1 = gimple_cond_rhs_ptr (stmt);
dec41e98 1313 }
75a70cf9 1314 else
dec41e98 1315 {
75a70cf9 1316 op0 = gimple_assign_rhs1_ptr (stmt);
1317 op1 = gimple_assign_rhs2_ptr (stmt);
dec41e98 1318 }
1319
75a70cf9 1320 zero = integer_zero_node;
1321 const_iv.step = integer_zero_node;
1322
02f8cd87 1323 if (TREE_CODE (*op0) == SSA_NAME)
1324 iv0 = get_iv (data, *op0);
1325 if (TREE_CODE (*op1) == SSA_NAME)
1326 iv1 = get_iv (data, *op1);
dec41e98 1327
02f8cd87 1328 /* Exactly one of the compared values must be an iv, and the other one must
1329 be an invariant. */
1330 if (!iv0 || !iv1)
1331 goto end;
1332
1333 if (integer_zerop (iv0->step))
1334 {
1335 /* Control variable may be on the other side. */
1336 tmp_op = op0; op0 = op1; op1 = tmp_op;
1337 tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
dec41e98 1338 }
02f8cd87 1339 ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1340
1341end:
1342 if (control_var)
1343 *control_var = op0;;
1344 if (iv_var)
1345 *iv_var = iv0;;
1346 if (bound)
1347 *bound = op1;
1348 if (iv_bound)
1349 *iv_bound = iv1;
1350
1351 return ret;
1352}
1353
75a70cf9 1354/* Checks whether the condition in STMT is interesting and if so,
1355 records it. */
02f8cd87 1356
1357static void
75a70cf9 1358find_interesting_uses_cond (struct ivopts_data *data, gimple stmt)
02f8cd87 1359{
1360 tree *var_p, *bound_p;
1361 struct iv *var_iv, *civ;
dec41e98 1362
75a70cf9 1363 if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
dec41e98 1364 {
02f8cd87 1365 find_interesting_uses_op (data, *var_p);
1366 find_interesting_uses_op (data, *bound_p);
dec41e98 1367 return;
1368 }
1369
4c36ffe6 1370 civ = XNEW (struct iv);
02f8cd87 1371 *civ = *var_iv;
75a70cf9 1372 record_use (data, NULL, civ, stmt, USE_COMPARE);
dec41e98 1373}
1374
a59824bb 1375/* Returns true if expression EXPR is obviously invariant in LOOP,
663608f5 1376 i.e. if all its operands are defined outside of the LOOP. LOOP
1377 should not be the function body. */
a59824bb 1378
1b5c3dde 1379bool
a59824bb 1380expr_invariant_in_loop_p (struct loop *loop, tree expr)
1381{
1382 basic_block def_bb;
1383 unsigned i, len;
1384
663608f5 1385 gcc_assert (loop_depth (loop) > 0);
1386
a59824bb 1387 if (is_gimple_min_invariant (expr))
1388 return true;
1389
1390 if (TREE_CODE (expr) == SSA_NAME)
1391 {
75a70cf9 1392 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
a59824bb 1393 if (def_bb
1394 && flow_bb_inside_loop_p (loop, def_bb))
1395 return false;
1396
1397 return true;
1398 }
1399
75a70cf9 1400 if (!EXPR_P (expr))
a59824bb 1401 return false;
1402
c2f47e15 1403 len = TREE_OPERAND_LENGTH (expr);
a59824bb 1404 for (i = 0; i < len; i++)
12faa8e4 1405 if (TREE_OPERAND (expr, i)
1406 && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
a59824bb 1407 return false;
1408
1409 return true;
1410}
1411
75a70cf9 1412/* Returns true if statement STMT is obviously invariant in LOOP,
1413 i.e. if all its operands on the RHS are defined outside of the LOOP.
1414 LOOP should not be the function body. */
1415
1416bool
1417stmt_invariant_in_loop_p (struct loop *loop, gimple stmt)
1418{
1419 unsigned i;
1420 tree lhs;
1421
1422 gcc_assert (loop_depth (loop) > 0);
1423
1424 lhs = gimple_get_lhs (stmt);
1425 for (i = 0; i < gimple_num_ops (stmt); i++)
1426 {
1427 tree op = gimple_op (stmt, i);
1428 if (op != lhs && !expr_invariant_in_loop_p (loop, op))
1429 return false;
1430 }
1431
1432 return true;
1433}
1434
dec41e98 1435/* Cumulates the steps of indices into DATA and replaces their values with the
1436 initial ones. Returns false when the value of the index cannot be determined.
1437 Callback for for_each_index. */
1438
1439struct ifs_ivopts_data
1440{
1441 struct ivopts_data *ivopts_data;
75a70cf9 1442 gimple stmt;
7a973feb 1443 tree step;
dec41e98 1444};
1445
1446static bool
1447idx_find_step (tree base, tree *idx, void *data)
1448{
f0d6e81c 1449 struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
dec41e98 1450 struct iv *iv;
57e3f39a 1451 tree step, iv_base, iv_step, lbound, off;
dbc64c75 1452 struct loop *loop = dta->ivopts_data->current_loop;
a59824bb 1453
a59824bb 1454 /* If base is a component ref, require that the offset of the reference
f7406879 1455 be invariant. */
a59824bb 1456 if (TREE_CODE (base) == COMPONENT_REF)
1457 {
1458 off = component_ref_field_offset (base);
1459 return expr_invariant_in_loop_p (loop, off);
1460 }
1461
1462 /* If base is array, first check whether we will be able to move the
1463 reference out of the loop (in order to take its address in strength
1464 reduction). In order for this to work we need both lower bound
1465 and step to be loop invariants. */
5e19919e 1466 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
a59824bb 1467 {
5e19919e 1468 /* Moreover, for a range, the size needs to be invariant as well. */
1469 if (TREE_CODE (base) == ARRAY_RANGE_REF
1470 && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1471 return false;
1472
a59824bb 1473 step = array_ref_element_size (base);
1474 lbound = array_ref_low_bound (base);
1475
1476 if (!expr_invariant_in_loop_p (loop, step)
1477 || !expr_invariant_in_loop_p (loop, lbound))
1478 return false;
1479 }
1480
dec41e98 1481 if (TREE_CODE (*idx) != SSA_NAME)
1482 return true;
1483
1484 iv = get_iv (dta->ivopts_data, *idx);
1485 if (!iv)
1486 return false;
1487
d3d02a88 1488 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1489 *&x[0], which is not folded and does not trigger the
1490 ARRAY_REF path below. */
dec41e98 1491 *idx = iv->base;
1492
7a973feb 1493 if (integer_zerop (iv->step))
dec41e98 1494 return true;
1495
5e19919e 1496 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
dbc64c75 1497 {
1498 step = array_ref_element_size (base);
dbc64c75 1499
1500 /* We only handle addresses whose step is an integer constant. */
1501 if (TREE_CODE (step) != INTEGER_CST)
1502 return false;
dbc64c75 1503 }
dec41e98 1504 else
779b4c41 1505 /* The step for pointer arithmetics already is 1 byte. */
2512209b 1506 step = size_one_node;
dec41e98 1507
57e3f39a 1508 iv_base = iv->base;
1509 iv_step = iv->step;
1510 if (!convert_affine_scev (dta->ivopts_data->current_loop,
1511 sizetype, &iv_base, &iv_step, dta->stmt,
1512 false))
dec41e98 1513 {
1514 /* The index might wrap. */
1515 return false;
1516 }
1517
0213c30e 1518 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
7a973feb 1519 dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
dec41e98 1520
1521 return true;
1522}
1523
1524/* Records use in index IDX. Callback for for_each_index. Ivopts data
1525 object is passed to it in DATA. */
1526
1527static bool
dbc64c75 1528idx_record_use (tree base, tree *idx,
f0d6e81c 1529 void *vdata)
dec41e98 1530{
f0d6e81c 1531 struct ivopts_data *data = (struct ivopts_data *) vdata;
dec41e98 1532 find_interesting_uses_op (data, *idx);
5e19919e 1533 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
dbc64c75 1534 {
1535 find_interesting_uses_op (data, array_ref_element_size (base));
1536 find_interesting_uses_op (data, array_ref_low_bound (base));
1537 }
dec41e98 1538 return true;
1539}
1540
90bd1b58 1541/* If we can prove that TOP = cst * BOT for some constant cst,
1542 store cst to MUL and return true. Otherwise return false.
1543 The returned value is always sign-extended, regardless of the
1544 signedness of TOP and BOT. */
1545
1546static bool
1547constant_multiple_of (tree top, tree bot, double_int *mul)
1548{
1549 tree mby;
1550 enum tree_code code;
1551 double_int res, p0, p1;
1552 unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1553
1554 STRIP_NOPS (top);
1555 STRIP_NOPS (bot);
1556
1557 if (operand_equal_p (top, bot, 0))
1558 {
1559 *mul = double_int_one;
1560 return true;
1561 }
1562
1563 code = TREE_CODE (top);
1564 switch (code)
1565 {
1566 case MULT_EXPR:
1567 mby = TREE_OPERAND (top, 1);
1568 if (TREE_CODE (mby) != INTEGER_CST)
1569 return false;
1570
1571 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1572 return false;
1573
cf8f0e63 1574 *mul = (res * tree_to_double_int (mby)).sext (precision);
90bd1b58 1575 return true;
1576
1577 case PLUS_EXPR:
1578 case MINUS_EXPR:
1579 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1580 || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1581 return false;
1582
1583 if (code == MINUS_EXPR)
cf8f0e63 1584 p1 = -p1;
1585 *mul = (p0 + p1).sext (precision);
90bd1b58 1586 return true;
1587
1588 case INTEGER_CST:
1589 if (TREE_CODE (bot) != INTEGER_CST)
1590 return false;
1591
cf8f0e63 1592 p0 = tree_to_double_int (top).sext (precision);
1593 p1 = tree_to_double_int (bot).sext (precision);
1594 if (p1.is_zero ())
90bd1b58 1595 return false;
cf8f0e63 1596 *mul = p0.sdivmod (p1, FLOOR_DIV_EXPR, &res).sext (precision);
1597 return res.is_zero ();
90bd1b58 1598
1599 default:
1600 return false;
1601 }
1602}
1603
1604/* Returns true if memory reference REF with step STEP may be unaligned. */
f63992c3 1605
1606static bool
90bd1b58 1607may_be_unaligned_p (tree ref, tree step)
f63992c3 1608{
1609 tree base;
1610 tree base_type;
1611 HOST_WIDE_INT bitsize;
1612 HOST_WIDE_INT bitpos;
1613 tree toffset;
1614 enum machine_mode mode;
1615 int unsignedp, volatilep;
1616 unsigned base_align;
1617
aed164c3 1618 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
ce10738f 1619 thus they are not misaligned. */
aed164c3 1620 if (TREE_CODE (ref) == TARGET_MEM_REF)
1621 return false;
1622
f63992c3 1623 /* The test below is basically copy of what expr.c:normal_inner_ref
1624 does to check whether the object must be loaded by parts when
1625 STRICT_ALIGNMENT is true. */
1626 base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1627 &unsignedp, &volatilep, true);
1628 base_type = TREE_TYPE (base);
957d0361 1629 base_align = get_object_alignment (base);
1b48bba8 1630 base_align = MAX (base_align, TYPE_ALIGN (base_type));
f63992c3 1631
90bd1b58 1632 if (mode != BLKmode)
1633 {
9f5d6ac9 1634 unsigned mode_align = GET_MODE_ALIGNMENT (mode);
90bd1b58 1635
9f5d6ac9 1636 if (base_align < mode_align
1637 || (bitpos % mode_align) != 0
1638 || (bitpos % BITS_PER_UNIT) != 0)
90bd1b58 1639 return true;
48e1416a 1640
9f5d6ac9 1641 if (toffset
1642 && (highest_pow2_factor (toffset) * BITS_PER_UNIT) < mode_align)
1643 return true;
1644
1645 if ((highest_pow2_factor (step) * BITS_PER_UNIT) < mode_align)
90bd1b58 1646 return true;
1647 }
f63992c3 1648
1649 return false;
1650}
1651
8ee578c8 1652/* Return true if EXPR may be non-addressable. */
1653
09a6f6f5 1654bool
8ee578c8 1655may_be_nonaddressable_p (tree expr)
1656{
1657 switch (TREE_CODE (expr))
1658 {
0e41e637 1659 case TARGET_MEM_REF:
1660 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1661 target, thus they are always addressable. */
1662 return false;
1663
8ee578c8 1664 case COMPONENT_REF:
1665 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1666 || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1667
8ee578c8 1668 case VIEW_CONVERT_EXPR:
1669 /* This kind of view-conversions may wrap non-addressable objects
1670 and make them look addressable. After some processing the
1671 non-addressability may be uncovered again, causing ADDR_EXPRs
1672 of inappropriate objects to be built. */
2a8afb23 1673 if (is_gimple_reg (TREE_OPERAND (expr, 0))
1674 || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
1675 return true;
1676
1677 /* ... fall through ... */
0e41e637 1678
1679 case ARRAY_REF:
1680 case ARRAY_RANGE_REF:
2a8afb23 1681 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
0e41e637 1682
72dd6141 1683 CASE_CONVERT:
0e41e637 1684 return true;
8ee578c8 1685
1686 default:
1687 break;
1688 }
1689
1690 return false;
1691}
1692
dec41e98 1693/* Finds addresses in *OP_P inside STMT. */
1694
1695static void
75a70cf9 1696find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p)
dec41e98 1697{
2512209b 1698 tree base = *op_p, step = size_zero_node;
dec41e98 1699 struct iv *civ;
1700 struct ifs_ivopts_data ifs_ivopts_data;
1701
281baea1 1702 /* Do not play with volatile memory references. A bit too conservative,
1703 perhaps, but safe. */
75a70cf9 1704 if (gimple_has_volatile_ops (stmt))
281baea1 1705 goto fail;
1706
dec41e98 1707 /* Ignore bitfields for now. Not really something terribly complicated
1708 to handle. TODO. */
8ee578c8 1709 if (TREE_CODE (base) == BIT_FIELD_REF)
1710 goto fail;
1711
aed164c3 1712 base = unshare_expr (base);
1713
1714 if (TREE_CODE (base) == TARGET_MEM_REF)
1715 {
1716 tree type = build_pointer_type (TREE_TYPE (base));
1717 tree astep;
1718
1719 if (TMR_BASE (base)
1720 && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1721 {
1722 civ = get_iv (data, TMR_BASE (base));
1723 if (!civ)
1724 goto fail;
1725
1726 TMR_BASE (base) = civ->base;
1727 step = civ->step;
1728 }
28daba6f 1729 if (TMR_INDEX2 (base)
1730 && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
1731 {
1732 civ = get_iv (data, TMR_INDEX2 (base));
1733 if (!civ)
1734 goto fail;
1735
1736 TMR_INDEX2 (base) = civ->base;
1737 step = civ->step;
1738 }
aed164c3 1739 if (TMR_INDEX (base)
1740 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1741 {
1742 civ = get_iv (data, TMR_INDEX (base));
1743 if (!civ)
1744 goto fail;
dec41e98 1745
aed164c3 1746 TMR_INDEX (base) = civ->base;
1747 astep = civ->step;
a59824bb 1748
aed164c3 1749 if (astep)
1750 {
1751 if (TMR_STEP (base))
1752 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1753
7a973feb 1754 step = fold_build2 (PLUS_EXPR, type, step, astep);
aed164c3 1755 }
1756 }
1757
7a973feb 1758 if (integer_zerop (step))
aed164c3 1759 goto fail;
1760 base = tree_mem_ref_addr (type, base);
1761 }
1762 else
1763 {
1764 ifs_ivopts_data.ivopts_data = data;
1765 ifs_ivopts_data.stmt = stmt;
2512209b 1766 ifs_ivopts_data.step = size_zero_node;
aed164c3 1767 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
7a973feb 1768 || integer_zerop (ifs_ivopts_data.step))
aed164c3 1769 goto fail;
7a973feb 1770 step = ifs_ivopts_data.step;
aed164c3 1771
0e41e637 1772 /* Check that the base expression is addressable. This needs
1773 to be done after substituting bases of IVs into it. */
1774 if (may_be_nonaddressable_p (base))
1775 goto fail;
1776
1777 /* Moreover, on strict alignment platforms, check that it is
1778 sufficiently aligned. */
90bd1b58 1779 if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
0e41e637 1780 goto fail;
1781
aed164c3 1782 base = build_fold_addr_expr (base);
d3d02a88 1783
1784 /* Substituting bases of IVs into the base expression might
1785 have caused folding opportunities. */
1786 if (TREE_CODE (base) == ADDR_EXPR)
1787 {
1788 tree *ref = &TREE_OPERAND (base, 0);
1789 while (handled_component_p (*ref))
1790 ref = &TREE_OPERAND (*ref, 0);
182cf5a9 1791 if (TREE_CODE (*ref) == MEM_REF)
0d9f1189 1792 {
c2acfe90 1793 tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
1794 TREE_OPERAND (*ref, 0),
1795 TREE_OPERAND (*ref, 1));
0d9f1189 1796 if (tem)
1797 *ref = tem;
1798 }
d3d02a88 1799 }
aed164c3 1800 }
dec41e98 1801
1802 civ = alloc_iv (base, step);
1803 record_use (data, op_p, civ, stmt, USE_ADDRESS);
1804 return;
1805
1806fail:
1807 for_each_index (op_p, idx_record_use, data);
1808}
1809
1810/* Finds and records invariants used in STMT. */
1811
1812static void
75a70cf9 1813find_invariants_stmt (struct ivopts_data *data, gimple stmt)
dec41e98 1814{
b66731e8 1815 ssa_op_iter iter;
1816 use_operand_p use_p;
dec41e98 1817 tree op;
1818
b66731e8 1819 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
dec41e98 1820 {
b66731e8 1821 op = USE_FROM_PTR (use_p);
dec41e98 1822 record_invariant (data, op, false);
1823 }
1824}
1825
1826/* Finds interesting uses of induction variables in the statement STMT. */
1827
1828static void
75a70cf9 1829find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt)
dec41e98 1830{
1831 struct iv *iv;
75a70cf9 1832 tree op, *lhs, *rhs;
b66731e8 1833 ssa_op_iter iter;
1834 use_operand_p use_p;
75a70cf9 1835 enum tree_code code;
dec41e98 1836
1837 find_invariants_stmt (data, stmt);
1838
75a70cf9 1839 if (gimple_code (stmt) == GIMPLE_COND)
dec41e98 1840 {
75a70cf9 1841 find_interesting_uses_cond (data, stmt);
dec41e98 1842 return;
1843 }
1844
75a70cf9 1845 if (is_gimple_assign (stmt))
dec41e98 1846 {
75a70cf9 1847 lhs = gimple_assign_lhs_ptr (stmt);
1848 rhs = gimple_assign_rhs1_ptr (stmt);
dec41e98 1849
75a70cf9 1850 if (TREE_CODE (*lhs) == SSA_NAME)
dec41e98 1851 {
1852 /* If the statement defines an induction variable, the uses are not
1853 interesting by themselves. */
1854
75a70cf9 1855 iv = get_iv (data, *lhs);
dec41e98 1856
7a973feb 1857 if (iv && !integer_zerop (iv->step))
dec41e98 1858 return;
1859 }
1860
75a70cf9 1861 code = gimple_assign_rhs_code (stmt);
1862 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
1863 && (REFERENCE_CLASS_P (*rhs)
1864 || is_gimple_val (*rhs)))
dec41e98 1865 {
75a70cf9 1866 if (REFERENCE_CLASS_P (*rhs))
1867 find_interesting_uses_address (data, stmt, rhs);
1868 else
1869 find_interesting_uses_op (data, *rhs);
dec41e98 1870
75a70cf9 1871 if (REFERENCE_CLASS_P (*lhs))
1872 find_interesting_uses_address (data, stmt, lhs);
dec41e98 1873 return;
dec41e98 1874 }
75a70cf9 1875 else if (TREE_CODE_CLASS (code) == tcc_comparison)
dec41e98 1876 {
75a70cf9 1877 find_interesting_uses_cond (data, stmt);
dec41e98 1878 return;
1879 }
dbc64c75 1880
1881 /* TODO -- we should also handle address uses of type
1882
1883 memory = call (whatever);
1884
1885 and
1886
1887 call (memory). */
dec41e98 1888 }
1889
75a70cf9 1890 if (gimple_code (stmt) == GIMPLE_PHI
1891 && gimple_bb (stmt) == data->current_loop->header)
dec41e98 1892 {
75a70cf9 1893 iv = get_iv (data, PHI_RESULT (stmt));
dec41e98 1894
7a973feb 1895 if (iv && !integer_zerop (iv->step))
dec41e98 1896 return;
1897 }
1898
b66731e8 1899 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
dec41e98 1900 {
b66731e8 1901 op = USE_FROM_PTR (use_p);
dec41e98 1902
1903 if (TREE_CODE (op) != SSA_NAME)
1904 continue;
1905
1906 iv = get_iv (data, op);
1907 if (!iv)
1908 continue;
1909
1910 find_interesting_uses_op (data, op);
1911 }
1912}
1913
1914/* Finds interesting uses of induction variables outside of loops
1915 on loop exit edge EXIT. */
1916
1917static void
1918find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1919{
75a70cf9 1920 gimple phi;
1921 gimple_stmt_iterator psi;
1922 tree def;
dec41e98 1923
75a70cf9 1924 for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
dec41e98 1925 {
75a70cf9 1926 phi = gsi_stmt (psi);
dec41e98 1927 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
7c782c9b 1928 if (!virtual_operand_p (def))
66368bf4 1929 find_interesting_uses_op (data, def);
dec41e98 1930 }
1931}
1932
1933/* Finds uses of the induction variables that are interesting. */
1934
1935static void
1936find_interesting_uses (struct ivopts_data *data)
1937{
1938 basic_block bb;
75a70cf9 1939 gimple_stmt_iterator bsi;
dec41e98 1940 basic_block *body = get_loop_body (data->current_loop);
1941 unsigned i;
1942 struct version_info *info;
1943 edge e;
1944
1945 if (dump_file && (dump_flags & TDF_DETAILS))
1946 fprintf (dump_file, "Uses:\n\n");
1947
1948 for (i = 0; i < data->current_loop->num_nodes; i++)
1949 {
cd665a06 1950 edge_iterator ei;
dec41e98 1951 bb = body[i];
1952
cd665a06 1953 FOR_EACH_EDGE (e, ei, bb->succs)
dec41e98 1954 if (e->dest != EXIT_BLOCK_PTR
1955 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1956 find_interesting_uses_outside (data, e);
1957
75a70cf9 1958 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1959 find_interesting_uses_stmt (data, gsi_stmt (bsi));
1960 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
9845d120 1961 if (!is_gimple_debug (gsi_stmt (bsi)))
1962 find_interesting_uses_stmt (data, gsi_stmt (bsi));
dec41e98 1963 }
1964
1965 if (dump_file && (dump_flags & TDF_DETAILS))
1966 {
0cc4271a 1967 bitmap_iterator bi;
1968
dec41e98 1969 fprintf (dump_file, "\n");
1970
0cc4271a 1971 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
dec41e98 1972 {
1973 info = ver_info (data, i);
1974 if (info->inv_id)
1975 {
1976 fprintf (dump_file, " ");
1977 print_generic_expr (dump_file, info->name, TDF_SLIM);
1978 fprintf (dump_file, " is invariant (%d)%s\n",
1979 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1980 }
0cc4271a 1981 }
dec41e98 1982
1983 fprintf (dump_file, "\n");
1984 }
1985
1986 free (body);
1987}
1988
390955a6 1989/* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
651874e1 1990 is true, assume we are inside an address. If TOP_COMPREF is true, assume
1991 we are at the top-level of the processed address. */
390955a6 1992
1993static tree
651874e1 1994strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
1995 unsigned HOST_WIDE_INT *offset)
390955a6 1996{
651874e1 1997 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
390955a6 1998 enum tree_code code;
1999 tree type, orig_type = TREE_TYPE (expr);
2000 unsigned HOST_WIDE_INT off0, off1, st;
2001 tree orig_expr = expr;
2002
2003 STRIP_NOPS (expr);
651874e1 2004
390955a6 2005 type = TREE_TYPE (expr);
2006 code = TREE_CODE (expr);
2007 *offset = 0;
2008
2009 switch (code)
2010 {
2011 case INTEGER_CST:
2012 if (!cst_and_fits_in_hwi (expr)
cd743a11 2013 || integer_zerop (expr))
390955a6 2014 return orig_expr;
2015
2016 *offset = int_cst_value (expr);
05db596e 2017 return build_int_cst (orig_type, 0);
390955a6 2018
2595dcbb 2019 case POINTER_PLUS_EXPR:
390955a6 2020 case PLUS_EXPR:
2021 case MINUS_EXPR:
2022 op0 = TREE_OPERAND (expr, 0);
2023 op1 = TREE_OPERAND (expr, 1);
2024
651874e1 2025 op0 = strip_offset_1 (op0, false, false, &off0);
2026 op1 = strip_offset_1 (op1, false, false, &off1);
390955a6 2027
2595dcbb 2028 *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
390955a6 2029 if (op0 == TREE_OPERAND (expr, 0)
2030 && op1 == TREE_OPERAND (expr, 1))
2031 return orig_expr;
2032
cd743a11 2033 if (integer_zerop (op1))
390955a6 2034 expr = op0;
cd743a11 2035 else if (integer_zerop (op0))
390955a6 2036 {
2595dcbb 2037 if (code == MINUS_EXPR)
651874e1 2038 expr = fold_build1 (NEGATE_EXPR, type, op1);
2595dcbb 2039 else
2040 expr = op1;
390955a6 2041 }
2042 else
651874e1 2043 expr = fold_build2 (code, type, op0, op1);
390955a6 2044
2045 return fold_convert (orig_type, expr);
2046
fe4600c9 2047 case MULT_EXPR:
2048 op1 = TREE_OPERAND (expr, 1);
2049 if (!cst_and_fits_in_hwi (op1))
2050 return orig_expr;
2051
2052 op0 = TREE_OPERAND (expr, 0);
2053 op0 = strip_offset_1 (op0, false, false, &off0);
2054 if (op0 == TREE_OPERAND (expr, 0))
2055 return orig_expr;
2056
2057 *offset = off0 * int_cst_value (op1);
2058 if (integer_zerop (op0))
2059 expr = op0;
2060 else
2061 expr = fold_build2 (MULT_EXPR, type, op0, op1);
2062
2063 return fold_convert (orig_type, expr);
2064
390955a6 2065 case ARRAY_REF:
5e19919e 2066 case ARRAY_RANGE_REF:
390955a6 2067 if (!inside_addr)
2068 return orig_expr;
2069
2070 step = array_ref_element_size (expr);
2071 if (!cst_and_fits_in_hwi (step))
2072 break;
2073
2074 st = int_cst_value (step);
2075 op1 = TREE_OPERAND (expr, 1);
651874e1 2076 op1 = strip_offset_1 (op1, false, false, &off1);
390955a6 2077 *offset = off1 * st;
651874e1 2078
2079 if (top_compref
cd743a11 2080 && integer_zerop (op1))
651874e1 2081 {
2082 /* Strip the component reference completely. */
2083 op0 = TREE_OPERAND (expr, 0);
2084 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2085 *offset += off0;
2086 return op0;
2087 }
390955a6 2088 break;
2089
2090 case COMPONENT_REF:
2091 if (!inside_addr)
2092 return orig_expr;
651874e1 2093
2094 tmp = component_ref_field_offset (expr);
2095 if (top_compref
2096 && cst_and_fits_in_hwi (tmp))
2097 {
2098 /* Strip the component reference completely. */
2099 op0 = TREE_OPERAND (expr, 0);
2100 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2101 *offset = off0 + int_cst_value (tmp);
2102 return op0;
2103 }
390955a6 2104 break;
2105
2106 case ADDR_EXPR:
651874e1 2107 op0 = TREE_OPERAND (expr, 0);
2108 op0 = strip_offset_1 (op0, true, true, &off0);
2109 *offset += off0;
2110
2111 if (op0 == TREE_OPERAND (expr, 0))
2112 return orig_expr;
2113
8cb6f421 2114 expr = build_fold_addr_expr (op0);
651874e1 2115 return fold_convert (orig_type, expr);
2116
182cf5a9 2117 case MEM_REF:
2118 /* ??? Offset operand? */
651874e1 2119 inside_addr = false;
390955a6 2120 break;
2121
2122 default:
2123 return orig_expr;
2124 }
2125
2126 /* Default handling of expressions for that we want to recurse into
2127 the first operand. */
2128 op0 = TREE_OPERAND (expr, 0);
651874e1 2129 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
390955a6 2130 *offset += off0;
2131
2132 if (op0 == TREE_OPERAND (expr, 0)
2133 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2134 return orig_expr;
2135
2136 expr = copy_node (expr);
2137 TREE_OPERAND (expr, 0) = op0;
2138 if (op1)
2139 TREE_OPERAND (expr, 1) = op1;
2140
651874e1 2141 /* Inside address, we might strip the top level component references,
442e3cb9 2142 thus changing type of the expression. Handling of ADDR_EXPR
651874e1 2143 will fix that. */
2144 expr = fold_convert (orig_type, expr);
2145
2146 return expr;
2147}
2148
2149/* Strips constant offsets from EXPR and stores them to OFFSET. */
2150
2151static tree
2152strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2153{
2154 return strip_offset_1 (expr, false, false, offset);
390955a6 2155}
2156
d086c307 2157/* Returns variant of TYPE that can be used as base for different uses.
318a3281 2158 We return unsigned type with the same precision, which avoids problems
2159 with overflows. */
d086c307 2160
2161static tree
2162generic_type_for (tree type)
2163{
2164 if (POINTER_TYPE_P (type))
318a3281 2165 return unsigned_type_for (type);
d086c307 2166
2167 if (TYPE_UNSIGNED (type))
2168 return type;
2169
2170 return unsigned_type_for (type);
2171}
2172
651874e1 2173/* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2174 the bitmap to that we should store it. */
2175
2176static struct ivopts_data *fd_ivopts_data;
2177static tree
2178find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2179{
f0d6e81c 2180 bitmap *depends_on = (bitmap *) data;
651874e1 2181 struct version_info *info;
2182
2183 if (TREE_CODE (*expr_p) != SSA_NAME)
2184 return NULL_TREE;
2185 info = name_info (fd_ivopts_data, *expr_p);
2186
2187 if (!info->inv_id || info->has_nonlin_use)
2188 return NULL_TREE;
2189
2190 if (!*depends_on)
2191 *depends_on = BITMAP_ALLOC (NULL);
2192 bitmap_set_bit (*depends_on, info->inv_id);
2193
2194 return NULL_TREE;
2195}
2196
dec41e98 2197/* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2198 position to POS. If USE is not NULL, the candidate is set as related to
2199 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
2200 replacement of the final value of the iv by a direct computation. */
2201
2202static struct iv_cand *
2203add_candidate_1 (struct ivopts_data *data,
2204 tree base, tree step, bool important, enum iv_position pos,
75a70cf9 2205 struct iv_use *use, gimple incremented_at)
dec41e98 2206{
2207 unsigned i;
2208 struct iv_cand *cand = NULL;
d086c307 2209 tree type, orig_type;
48e1416a 2210
08748e8d 2211 /* For non-original variables, make sure their values are computed in a type
2212 that does not invoke undefined behavior on overflows (since in general,
2213 we cannot prove that these induction variables are non-wrapping). */
2214 if (pos != IP_ORIGINAL)
dec41e98 2215 {
d086c307 2216 orig_type = TREE_TYPE (base);
2217 type = generic_type_for (orig_type);
29474274 2218 if (type != orig_type)
dec41e98 2219 {
dec41e98 2220 base = fold_convert (type, base);
7a973feb 2221 step = fold_convert (type, step);
dec41e98 2222 }
2223 }
2224
2225 for (i = 0; i < n_iv_cands (data); i++)
2226 {
2227 cand = iv_cand (data, i);
2228
2229 if (cand->pos != pos)
2230 continue;
2231
3ee48c5c 2232 if (cand->incremented_at != incremented_at
2233 || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2234 && cand->ainc_use != use))
dec41e98 2235 continue;
2236
2237 if (!cand->iv)
2238 {
2239 if (!base && !step)
2240 break;
2241
2242 continue;
2243 }
2244
2245 if (!base && !step)
2246 continue;
2247
7a973feb 2248 if (operand_equal_p (base, cand->iv->base, 0)
66368bf4 2249 && operand_equal_p (step, cand->iv->step, 0)
2250 && (TYPE_PRECISION (TREE_TYPE (base))
2251 == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
7a973feb 2252 break;
dec41e98 2253 }
2254
2255 if (i == n_iv_cands (data))
2256 {
4c36ffe6 2257 cand = XCNEW (struct iv_cand);
dec41e98 2258 cand->id = i;
2259
2260 if (!base && !step)
2261 cand->iv = NULL;
2262 else
2263 cand->iv = alloc_iv (base, step);
2264
2265 cand->pos = pos;
2266 if (pos != IP_ORIGINAL && cand->iv)
2267 {
2268 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2269 cand->var_after = cand->var_before;
2270 }
2271 cand->important = important;
2272 cand->incremented_at = incremented_at;
7d18ea07 2273 VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
dec41e98 2274
651874e1 2275 if (step
2276 && TREE_CODE (step) != INTEGER_CST)
2277 {
2278 fd_ivopts_data = data;
2279 walk_tree (&step, find_depends, &cand->depends_on, NULL);
2280 }
2281
3ee48c5c 2282 if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2283 cand->ainc_use = use;
2284 else
2285 cand->ainc_use = NULL;
2286
dec41e98 2287 if (dump_file && (dump_flags & TDF_DETAILS))
2288 dump_cand (dump_file, cand);
2289 }
2290
2291 if (important && !cand->important)
2292 {
2293 cand->important = true;
2294 if (dump_file && (dump_flags & TDF_DETAILS))
2295 fprintf (dump_file, "Candidate %d is important\n", cand->id);
2296 }
2297
2298 if (use)
2299 {
2300 bitmap_set_bit (use->related_cands, i);
2301 if (dump_file && (dump_flags & TDF_DETAILS))
2302 fprintf (dump_file, "Candidate %d is related to use %d\n",
2303 cand->id, use->id);
2304 }
2305
2306 return cand;
2307}
2308
5414270a 2309/* Returns true if incrementing the induction variable at the end of the LOOP
2310 is allowed.
2311
2312 The purpose is to avoid splitting latch edge with a biv increment, thus
2313 creating a jump, possibly confusing other optimization passes and leaving
2314 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2315 is not available (so we do not have a better alternative), or if the latch
2316 edge is already nonempty. */
2317
2318static bool
2319allow_ip_end_pos_p (struct loop *loop)
2320{
2321 if (!ip_normal_pos (loop))
2322 return true;
2323
2324 if (!empty_block_p (ip_end_pos (loop)))
2325 return true;
2326
2327 return false;
2328}
2329
3ee48c5c 2330/* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2331 Important field is set to IMPORTANT. */
2332
2333static void
2334add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2335 bool important, struct iv_use *use)
2336{
2337 basic_block use_bb = gimple_bb (use->stmt);
2338 enum machine_mode mem_mode;
2339 unsigned HOST_WIDE_INT cstepi;
2340
2341 /* If we insert the increment in any position other than the standard
2342 ones, we must ensure that it is incremented once per iteration.
2343 It must not be in an inner nested loop, or one side of an if
2344 statement. */
2345 if (use_bb->loop_father != data->current_loop
2346 || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2347 || stmt_could_throw_p (use->stmt)
2348 || !cst_and_fits_in_hwi (step))
2349 return;
2350
2351 cstepi = int_cst_value (step);
2352
2353 mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
aa1572b5 2354 if (((USE_LOAD_PRE_INCREMENT (mem_mode)
2355 || USE_STORE_PRE_INCREMENT (mem_mode))
2356 && GET_MODE_SIZE (mem_mode) == cstepi)
2357 || ((USE_LOAD_PRE_DECREMENT (mem_mode)
2358 || USE_STORE_PRE_DECREMENT (mem_mode))
2359 && GET_MODE_SIZE (mem_mode) == -cstepi))
3ee48c5c 2360 {
2361 enum tree_code code = MINUS_EXPR;
2362 tree new_base;
2363 tree new_step = step;
2364
2365 if (POINTER_TYPE_P (TREE_TYPE (base)))
2366 {
2367 new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2368 code = POINTER_PLUS_EXPR;
2369 }
2370 else
2371 new_step = fold_convert (TREE_TYPE (base), new_step);
2372 new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2373 add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2374 use->stmt);
2375 }
aa1572b5 2376 if (((USE_LOAD_POST_INCREMENT (mem_mode)
2377 || USE_STORE_POST_INCREMENT (mem_mode))
2378 && GET_MODE_SIZE (mem_mode) == cstepi)
2379 || ((USE_LOAD_POST_DECREMENT (mem_mode)
2380 || USE_STORE_POST_DECREMENT (mem_mode))
2381 && GET_MODE_SIZE (mem_mode) == -cstepi))
3ee48c5c 2382 {
2383 add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
2384 use->stmt);
2385 }
2386}
2387
dec41e98 2388/* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2389 position to POS. If USE is not NULL, the candidate is set as related to
2390 it. The candidate computation is scheduled on all available positions. */
2391
2392static void
48e1416a 2393add_candidate (struct ivopts_data *data,
dec41e98 2394 tree base, tree step, bool important, struct iv_use *use)
2395{
2396 if (ip_normal_pos (data->current_loop))
75a70cf9 2397 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL);
5414270a 2398 if (ip_end_pos (data->current_loop)
2399 && allow_ip_end_pos_p (data->current_loop))
75a70cf9 2400 add_candidate_1 (data, base, step, important, IP_END, use, NULL);
3ee48c5c 2401
2402 if (use != NULL && use->type == USE_ADDRESS)
2403 add_autoinc_candidates (data, base, step, important, use);
dec41e98 2404}
2405
2406/* Adds standard iv candidates. */
2407
2408static void
2409add_standard_iv_candidates (struct ivopts_data *data)
2410{
1b3c3119 2411 add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
2412
2413 /* The same for a double-integer type if it is still fast enough. */
2414 if (TYPE_PRECISION
2415 (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
2416 && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
2417 add_candidate (data, build_int_cst (long_integer_type_node, 0),
2418 build_int_cst (long_integer_type_node, 1), true, NULL);
dec41e98 2419
3fd8a7ab 2420 /* The same for a double-integer type if it is still fast enough. */
1b3c3119 2421 if (TYPE_PRECISION
2422 (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
2423 && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
2424 add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
2425 build_int_cst (long_long_integer_type_node, 1), true, NULL);
dec41e98 2426}
2427
2428
2429/* Adds candidates bases on the old induction variable IV. */
2430
2431static void
2432add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2433{
75a70cf9 2434 gimple phi;
2435 tree def;
dec41e98 2436 struct iv_cand *cand;
2437
2438 add_candidate (data, iv->base, iv->step, true, NULL);
2439
2440 /* The same, but with initial value zero. */
19052ad2 2441 if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
2442 add_candidate (data, size_int (0), iv->step, true, NULL);
2443 else
2444 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2445 iv->step, true, NULL);
dec41e98 2446
2447 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
75a70cf9 2448 if (gimple_code (phi) == GIMPLE_PHI)
dec41e98 2449 {
2450 /* Additionally record the possibility of leaving the original iv
2451 untouched. */
2452 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2453 cand = add_candidate_1 (data,
2454 iv->base, iv->step, true, IP_ORIGINAL, NULL,
2455 SSA_NAME_DEF_STMT (def));
2456 cand->var_before = iv->ssa_name;
2457 cand->var_after = def;
2458 }
2459}
2460
2461/* Adds candidates based on the old induction variables. */
2462
2463static void
2464add_old_ivs_candidates (struct ivopts_data *data)
2465{
2466 unsigned i;
2467 struct iv *iv;
0cc4271a 2468 bitmap_iterator bi;
dec41e98 2469
0cc4271a 2470 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
dec41e98 2471 {
2472 iv = ver_info (data, i)->iv;
7a973feb 2473 if (iv && iv->biv_p && !integer_zerop (iv->step))
dec41e98 2474 add_old_iv_candidates (data, iv);
0cc4271a 2475 }
dec41e98 2476}
2477
2478/* Adds candidates based on the value of the induction variable IV and USE. */
2479
2480static void
2481add_iv_value_candidates (struct ivopts_data *data,
2482 struct iv *iv, struct iv_use *use)
2483{
390955a6 2484 unsigned HOST_WIDE_INT offset;
651874e1 2485 tree base;
06240723 2486 tree basetype;
dec41e98 2487
651874e1 2488 add_candidate (data, iv->base, iv->step, false, use);
a59824bb 2489
651874e1 2490 /* The same, but with initial value zero. Make such variable important,
2491 since it is generic enough so that possibly many uses may be based
2492 on it. */
06240723 2493 basetype = TREE_TYPE (iv->base);
2494 if (POINTER_TYPE_P (basetype))
2495 basetype = sizetype;
2496 add_candidate (data, build_int_cst (basetype, 0),
651874e1 2497 iv->step, true, use);
dec41e98 2498
0cfdcbde 2499 /* Third, try removing the constant offset. Make sure to even
2500 add a candidate for &a[0] vs. (T *)&a. */
651874e1 2501 base = strip_offset (iv->base, &offset);
0cfdcbde 2502 if (offset
2503 || base != iv->base)
390955a6 2504 add_candidate (data, base, iv->step, false, use);
dec41e98 2505}
2506
dec41e98 2507/* Adds candidates based on the uses. */
2508
2509static void
2510add_derived_ivs_candidates (struct ivopts_data *data)
2511{
2512 unsigned i;
2513
2514 for (i = 0; i < n_iv_uses (data); i++)
2515 {
2516 struct iv_use *use = iv_use (data, i);
2517
2518 if (!use)
2519 continue;
2520
2521 switch (use->type)
2522 {
2523 case USE_NONLINEAR_EXPR:
2524 case USE_COMPARE:
651874e1 2525 case USE_ADDRESS:
dec41e98 2526 /* Just add the ivs based on the value of the iv used here. */
2527 add_iv_value_candidates (data, use->iv, use);
2528 break;
2529
dec41e98 2530 default:
8c0963c4 2531 gcc_unreachable ();
dec41e98 2532 }
2533 }
2534}
2535
00991688 2536/* Record important candidates and add them to related_cands bitmaps
2537 if needed. */
2538
2539static void
2540record_important_candidates (struct ivopts_data *data)
2541{
2542 unsigned i;
2543 struct iv_use *use;
2544
2545 for (i = 0; i < n_iv_cands (data); i++)
2546 {
2547 struct iv_cand *cand = iv_cand (data, i);
2548
2549 if (cand->important)
2550 bitmap_set_bit (data->important_candidates, i);
2551 }
2552
2553 data->consider_all_candidates = (n_iv_cands (data)
2554 <= CONSIDER_ALL_CANDIDATES_BOUND);
2555
2556 if (data->consider_all_candidates)
2557 {
2558 /* We will not need "related_cands" bitmaps in this case,
2559 so release them to decrease peak memory consumption. */
2560 for (i = 0; i < n_iv_uses (data); i++)
2561 {
2562 use = iv_use (data, i);
27335ffd 2563 BITMAP_FREE (use->related_cands);
00991688 2564 }
2565 }
2566 else
2567 {
2568 /* Add important candidates to the related_cands bitmaps. */
2569 for (i = 0; i < n_iv_uses (data); i++)
2570 bitmap_ior_into (iv_use (data, i)->related_cands,
2571 data->important_candidates);
2572 }
2573}
2574
dec41e98 2575/* Allocates the data structure mapping the (use, candidate) pairs to costs.
2576 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2577 we allocate a simple list to every use. */
2578
2579static void
2580alloc_use_cost_map (struct ivopts_data *data)
2581{
00991688 2582 unsigned i, size, s, j;
dec41e98 2583
2584 for (i = 0; i < n_iv_uses (data); i++)
2585 {
2586 struct iv_use *use = iv_use (data, i);
0cc4271a 2587 bitmap_iterator bi;
dec41e98 2588
2589 if (data->consider_all_candidates)
00991688 2590 size = n_iv_cands (data);
dec41e98 2591 else
2592 {
00991688 2593 s = 0;
0cc4271a 2594 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2595 {
00991688 2596 s++;
0cc4271a 2597 }
00991688 2598
2599 /* Round up to the power of two, so that moduling by it is fast. */
2600 for (size = 1; size < s; size <<= 1)
2601 continue;
dec41e98 2602 }
2603
00991688 2604 use->n_map_members = size;
4c36ffe6 2605 use->cost_map = XCNEWVEC (struct cost_pair, size);
dec41e98 2606 }
2607}
2608
3c2818b0 2609/* Returns description of computation cost of expression whose runtime
2610 cost is RUNTIME and complexity corresponds to COMPLEXITY. */
2611
2612static comp_cost
2613new_cost (unsigned runtime, unsigned complexity)
2614{
2615 comp_cost cost;
2616
2617 cost.cost = runtime;
2618 cost.complexity = complexity;
2619
2620 return cost;
2621}
2622
2623/* Adds costs COST1 and COST2. */
2624
2625static comp_cost
2626add_costs (comp_cost cost1, comp_cost cost2)
2627{
2628 cost1.cost += cost2.cost;
2629 cost1.complexity += cost2.complexity;
2630
2631 return cost1;
2632}
2633/* Subtracts costs COST1 and COST2. */
2634
2635static comp_cost
2636sub_costs (comp_cost cost1, comp_cost cost2)
2637{
2638 cost1.cost -= cost2.cost;
2639 cost1.complexity -= cost2.complexity;
2640
2641 return cost1;
2642}
2643
2644/* Returns a negative number if COST1 < COST2, a positive number if
2645 COST1 > COST2, and 0 if COST1 = COST2. */
2646
2647static int
2648compare_costs (comp_cost cost1, comp_cost cost2)
2649{
2650 if (cost1.cost == cost2.cost)
2651 return cost1.complexity - cost2.complexity;
2652
2653 return cost1.cost - cost2.cost;
2654}
2655
2656/* Returns true if COST is infinite. */
2657
2658static bool
2659infinite_cost_p (comp_cost cost)
2660{
2661 return cost.cost == INFTY;
2662}
2663
dec41e98 2664/* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
c4d3b428 2665 on invariants DEPENDS_ON and that the value used in expressing it
08748e8d 2666 is VALUE, and in case of iv elimination the comparison operator is COMP. */
dec41e98 2667
2668static void
2669set_use_iv_cost (struct ivopts_data *data,
3c2818b0 2670 struct iv_use *use, struct iv_cand *cand,
66368bf4 2671 comp_cost cost, bitmap depends_on, tree value,
08748e8d 2672 enum tree_code comp, int inv_expr_id)
dec41e98 2673{
00991688 2674 unsigned i, s;
2675
3c2818b0 2676 if (infinite_cost_p (cost))
dec41e98 2677 {
27335ffd 2678 BITMAP_FREE (depends_on);
00991688 2679 return;
dec41e98 2680 }
2681
2682 if (data->consider_all_candidates)
2683 {
2684 use->cost_map[cand->id].cand = cand;
2685 use->cost_map[cand->id].cost = cost;
2686 use->cost_map[cand->id].depends_on = depends_on;
c4d3b428 2687 use->cost_map[cand->id].value = value;
08748e8d 2688 use->cost_map[cand->id].comp = comp;
66368bf4 2689 use->cost_map[cand->id].inv_expr_id = inv_expr_id;
dec41e98 2690 return;
2691 }
2692
00991688 2693 /* n_map_members is a power of two, so this computes modulo. */
2694 s = cand->id & (use->n_map_members - 1);
2695 for (i = s; i < use->n_map_members; i++)
2696 if (!use->cost_map[i].cand)
2697 goto found;
2698 for (i = 0; i < s; i++)
2699 if (!use->cost_map[i].cand)
2700 goto found;
2701
2702 gcc_unreachable ();
dec41e98 2703
00991688 2704found:
2705 use->cost_map[i].cand = cand;
2706 use->cost_map[i].cost = cost;
2707 use->cost_map[i].depends_on = depends_on;
c4d3b428 2708 use->cost_map[i].value = value;
08748e8d 2709 use->cost_map[i].comp = comp;
66368bf4 2710 use->cost_map[i].inv_expr_id = inv_expr_id;
dec41e98 2711}
2712
00991688 2713/* Gets cost of (USE, CANDIDATE) pair. */
dec41e98 2714
00991688 2715static struct cost_pair *
2716get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2717 struct iv_cand *cand)
dec41e98 2718{
00991688 2719 unsigned i, s;
2720 struct cost_pair *ret;
dec41e98 2721
2722 if (!cand)
00991688 2723 return NULL;
dec41e98 2724
2725 if (data->consider_all_candidates)
dec41e98 2726 {
00991688 2727 ret = use->cost_map + cand->id;
2728 if (!ret->cand)
2729 return NULL;
dec41e98 2730
00991688 2731 return ret;
dec41e98 2732 }
48e1416a 2733
00991688 2734 /* n_map_members is a power of two, so this computes modulo. */
2735 s = cand->id & (use->n_map_members - 1);
2736 for (i = s; i < use->n_map_members; i++)
2737 if (use->cost_map[i].cand == cand)
2738 return use->cost_map + i;
dec41e98 2739
00991688 2740 for (i = 0; i < s; i++)
2741 if (use->cost_map[i].cand == cand)
2742 return use->cost_map + i;
2743
2744 return NULL;
dec41e98 2745}
2746
2747/* Returns estimate on cost of computing SEQ. */
2748
2749static unsigned
f529eb25 2750seq_cost (rtx seq, bool speed)
dec41e98 2751{
2752 unsigned cost = 0;
2753 rtx set;
2754
2755 for (; seq; seq = NEXT_INSN (seq))
2756 {
2757 set = single_set (seq);
2758 if (set)
7013e87c 2759 cost += set_src_cost (SET_SRC (set), speed);
dec41e98 2760 else
2761 cost++;
2762 }
2763
2764 return cost;
2765}
2766
abf7e53a 2767/* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2768static rtx
2769produce_memory_decl_rtl (tree obj, int *regno)
2770{
bd1a81f7 2771 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
98155838 2772 enum machine_mode address_mode = targetm.addr_space.address_mode (as);
abf7e53a 2773 rtx x;
48e1416a 2774
876760f6 2775 gcc_assert (obj);
abf7e53a 2776 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2777 {
2778 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
98155838 2779 x = gen_rtx_SYMBOL_REF (address_mode, name);
7e995af7 2780 SET_SYMBOL_REF_DECL (x, obj);
2781 x = gen_rtx_MEM (DECL_MODE (obj), x);
bd1a81f7 2782 set_mem_addr_space (x, as);
7e995af7 2783 targetm.encode_section_info (obj, x, true);
abf7e53a 2784 }
2785 else
7e995af7 2786 {
98155838 2787 x = gen_raw_REG (address_mode, (*regno)++);
7e995af7 2788 x = gen_rtx_MEM (DECL_MODE (obj), x);
bd1a81f7 2789 set_mem_addr_space (x, as);
7e995af7 2790 }
abf7e53a 2791
7e995af7 2792 return x;
abf7e53a 2793}
2794
dec41e98 2795/* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2796 walk_tree. DATA contains the actual fake register number. */
2797
2798static tree
2799prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2800{
2801 tree obj = NULL_TREE;
2802 rtx x = NULL_RTX;
f0d6e81c 2803 int *regno = (int *) data;
dec41e98 2804
2805 switch (TREE_CODE (*expr_p))
2806 {
abf7e53a 2807 case ADDR_EXPR:
2808 for (expr_p = &TREE_OPERAND (*expr_p, 0);
1f9b622b 2809 handled_component_p (*expr_p);
2810 expr_p = &TREE_OPERAND (*expr_p, 0))
2811 continue;
abf7e53a 2812 obj = *expr_p;
f2d0e9f1 2813 if (DECL_P (obj) && !DECL_RTL_SET_P (obj))
abf7e53a 2814 x = produce_memory_decl_rtl (obj, regno);
2815 break;
2816
dec41e98 2817 case SSA_NAME:
2818 *ws = 0;
2819 obj = SSA_NAME_VAR (*expr_p);
ec11736b 2820 /* Defer handling of anonymous SSA_NAMEs to the expander. */
2821 if (!obj)
2822 return NULL_TREE;
dec41e98 2823 if (!DECL_RTL_SET_P (obj))
2824 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2825 break;
2826
2827 case VAR_DECL:
2828 case PARM_DECL:
2829 case RESULT_DECL:
2830 *ws = 0;
2831 obj = *expr_p;
2832
2833 if (DECL_RTL_SET_P (obj))
2834 break;
2835
2836 if (DECL_MODE (obj) == BLKmode)
abf7e53a 2837 x = produce_memory_decl_rtl (obj, regno);
dec41e98 2838 else
2839 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2840
2841 break;
2842
2843 default:
2844 break;
2845 }
2846
2847 if (x)
2848 {
7d18ea07 2849 VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
dec41e98 2850 SET_DECL_RTL (obj, x);
2851 }
2852
2853 return NULL_TREE;
2854}
2855
2856/* Determines cost of the computation of EXPR. */
2857
2858static unsigned
f529eb25 2859computation_cost (tree expr, bool speed)
dec41e98 2860{
2861 rtx seq, rslt;
2862 tree type = TREE_TYPE (expr);
2863 unsigned cost;
15a79151 2864 /* Avoid using hard regs in ways which may be unsupported. */
2865 int regno = LAST_VIRTUAL_REGISTER + 1;
fd6a3c41 2866 struct cgraph_node *node = cgraph_get_node (current_function_decl);
125b6d78 2867 enum node_frequency real_frequency = node->frequency;
dec41e98 2868
125b6d78 2869 node->frequency = NODE_FREQUENCY_NORMAL;
f529eb25 2870 crtl->maybe_hot_insn_p = speed;
dec41e98 2871 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2872 start_sequence ();
2873 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2874 seq = get_insns ();
2875 end_sequence ();
f529eb25 2876 default_rtl_profile ();
125b6d78 2877 node->frequency = real_frequency;
dec41e98 2878
f529eb25 2879 cost = seq_cost (seq, speed);
1c14a50e 2880 if (MEM_P (rslt))
bd1a81f7 2881 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
2882 TYPE_ADDR_SPACE (type), speed);
9881cd2d 2883 else if (!REG_P (rslt))
7013e87c 2884 cost += set_src_cost (rslt, speed);
dec41e98 2885
2886 return cost;
2887}
2888
2889/* Returns variable containing the value of candidate CAND at statement AT. */
2890
2891static tree
75a70cf9 2892var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple stmt)
dec41e98 2893{
2894 if (stmt_after_increment (loop, cand, stmt))
2895 return cand->var_after;
2896 else
2897 return cand->var_before;
2898}
2899
4d9f13e4 2900/* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2901 same precision that is at least as wide as the precision of TYPE, stores
2902 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
2903 type of A and B. */
2904
2905static tree
2906determine_common_wider_type (tree *a, tree *b)
2907{
2908 tree wider_type = NULL;
2909 tree suba, subb;
2910 tree atype = TREE_TYPE (*a);
2911
72dd6141 2912 if (CONVERT_EXPR_P (*a))
4d9f13e4 2913 {
2914 suba = TREE_OPERAND (*a, 0);
2915 wider_type = TREE_TYPE (suba);
2916 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
2917 return atype;
2918 }
2919 else
2920 return atype;
2921
72dd6141 2922 if (CONVERT_EXPR_P (*b))
4d9f13e4 2923 {
2924 subb = TREE_OPERAND (*b, 0);
2925 if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
2926 return atype;
2927 }
2928 else
2929 return atype;
2930
2931 *a = suba;
2932 *b = subb;
2933 return wider_type;
2934}
2935
dec41e98 2936/* Determines the expression by that USE is expressed from induction variable
aed164c3 2937 CAND at statement AT in LOOP. The expression is stored in a decomposed
2938 form into AFF. Returns false if USE cannot be expressed using CAND. */
dec41e98 2939
aed164c3 2940static bool
2941get_computation_aff (struct loop *loop,
75a70cf9 2942 struct iv_use *use, struct iv_cand *cand, gimple at,
aed164c3 2943 struct affine_tree_combination *aff)
dec41e98 2944{
dbc64c75 2945 tree ubase = use->iv->base;
2946 tree ustep = use->iv->step;
2947 tree cbase = cand->iv->base;
d3610bea 2948 tree cstep = cand->iv->step, cstep_common;
dec41e98 2949 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
d3610bea 2950 tree common_type, var;
dec41e98 2951 tree uutype;
d3610bea 2952 aff_tree cbase_aff, var_aff;
58f43b90 2953 double_int rat;
dec41e98 2954
2955 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2956 {
2957 /* We do not have a precision to express the values of use. */
aed164c3 2958 return false;
dec41e98 2959 }
2960
d3610bea 2961 var = var_at_stmt (loop, cand, at);
2962 uutype = unsigned_type_for (utype);
dec41e98 2963
d3610bea 2964 /* If the conversion is not noop, perform it. */
2965 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
dec41e98 2966 {
dec41e98 2967 cstep = fold_convert (uutype, cstep);
d3610bea 2968 cbase = fold_convert (uutype, cbase);
2969 var = fold_convert (uutype, var);
651874e1 2970 }
651874e1 2971
d3610bea 2972 if (!constant_multiple_of (ustep, cstep, &rat))
2973 return false;
dec41e98 2974
4d9f13e4 2975 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
2976 type, we achieve better folding by computing their difference in this
2977 wider type, and cast the result to UUTYPE. We do not need to worry about
2978 overflows, as all the arithmetics will in the end be performed in UUTYPE
2979 anyway. */
2980 common_type = determine_common_wider_type (&ubase, &cbase);
2981
d3610bea 2982 /* use = ubase - ratio * cbase + ratio * var. */
2983 tree_to_aff_combination (ubase, common_type, aff);
2984 tree_to_aff_combination (cbase, common_type, &cbase_aff);
2985 tree_to_aff_combination (var, uutype, &var_aff);
dec41e98 2986
d3610bea 2987 /* We need to shift the value if we are after the increment. */
2988 if (stmt_after_increment (loop, cand, at))
dec41e98 2989 {
d3610bea 2990 aff_tree cstep_aff;
48e1416a 2991
d3610bea 2992 if (common_type != uutype)
2993 cstep_common = fold_convert (common_type, cstep);
651874e1 2994 else
d3610bea 2995 cstep_common = cstep;
aed164c3 2996
d3610bea 2997 tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
2998 aff_combination_add (&cbase_aff, &cstep_aff);
dec41e98 2999 }
dec41e98 3000
cf8f0e63 3001 aff_combination_scale (&cbase_aff, -rat);
aed164c3 3002 aff_combination_add (aff, &cbase_aff);
4d9f13e4 3003 if (common_type != uutype)
d3610bea 3004 aff_combination_convert (aff, uutype);
3005
3006 aff_combination_scale (&var_aff, rat);
3007 aff_combination_add (aff, &var_aff);
aed164c3 3008
3009 return true;
3010}
3011
3012/* Determines the expression by that USE is expressed from induction variable
3013 CAND at statement AT in LOOP. The computation is unshared. */
3014
3015static tree
3016get_computation_at (struct loop *loop,
75a70cf9 3017 struct iv_use *use, struct iv_cand *cand, gimple at)
aed164c3 3018{
d3610bea 3019 aff_tree aff;
aed164c3 3020 tree type = TREE_TYPE (use->iv->base);
3021
3022 if (!get_computation_aff (loop, use, cand, at, &aff))
3023 return NULL_TREE;
3024 unshare_aff_combination (&aff);
3025 return fold_convert (type, aff_combination_to_tree (&aff));
dec41e98 3026}
3027
3028/* Determines the expression by that USE is expressed from induction variable
aed164c3 3029 CAND in LOOP. The computation is unshared. */
dec41e98 3030
3031static tree
3032get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3033{
3034 return get_computation_at (loop, use, cand, use->stmt);
3035}
3036
fe166695 3037/* Adjust the cost COST for being in loop setup rather than loop body.
3038 If we're optimizing for space, the loop setup overhead is constant;
3039 if we're optimizing for speed, amortize it over the per-iteration cost. */
3040static unsigned
3041adjust_setup_cost (struct ivopts_data *data, unsigned cost)
3042{
3043 if (cost == INFTY)
3044 return cost;
3045 else if (optimize_loop_for_speed_p (data->current_loop))
66368bf4 3046 return cost / avg_loop_niter (data->current_loop);
fe166695 3047 else
3048 return cost;
3049}
3050
e90b35db 3051/* Returns true if multiplying by RATIO is allowed in an address. Test the
bd1a81f7 3052 validity for a memory reference accessing memory of mode MODE in
3053 address space AS. */
3054
3055DEF_VEC_P (sbitmap);
3056DEF_VEC_ALLOC_P (sbitmap, heap);
aed164c3 3057
3058bool
bd1a81f7 3059multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode,
3060 addr_space_t as)
aed164c3 3061{
3062#define MAX_RATIO 128
bd1a81f7 3063 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3064 static VEC (sbitmap, heap) *valid_mult_list;
3065 sbitmap valid_mult;
3066
3067 if (data_index >= VEC_length (sbitmap, valid_mult_list))
3068 VEC_safe_grow_cleared (sbitmap, heap, valid_mult_list, data_index + 1);
3069
3070 valid_mult = VEC_index (sbitmap, valid_mult_list, data_index);
3071 if (!valid_mult)
aed164c3 3072 {
98155838 3073 enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3074 rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
aed164c3 3075 rtx addr;
3076 HOST_WIDE_INT i;
3077
bd1a81f7 3078 valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
53c5d9d4 3079 bitmap_clear (valid_mult);
98155838 3080 addr = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
aed164c3 3081 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3082 {
98155838 3083 XEXP (addr, 1) = gen_int_mode (i, address_mode);
bd1a81f7 3084 if (memory_address_addr_space_p (mode, addr, as))
3085 SET_BIT (valid_mult, i + MAX_RATIO);
aed164c3 3086 }
3087
3088 if (dump_file && (dump_flags & TDF_DETAILS))
3089 {
3090 fprintf (dump_file, " allowed multipliers:");
3091 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
bd1a81f7 3092 if (TEST_BIT (valid_mult, i + MAX_RATIO))
aed164c3 3093 fprintf (dump_file, " %d", (int) i);
3094 fprintf (dump_file, "\n");
3095 fprintf (dump_file, "\n");
3096 }
bd1a81f7 3097
3098 VEC_replace (sbitmap, valid_mult_list, data_index, valid_mult);
aed164c3 3099 }
3100
3101 if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3102 return false;
3103
bd1a81f7 3104 return TEST_BIT (valid_mult, ratio + MAX_RATIO);
aed164c3 3105}
3106
dec41e98 3107/* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3108 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
e90b35db 3109 variable is omitted. Compute the cost for a memory reference that accesses
bd1a81f7 3110 a memory location of mode MEM_MODE in address space AS.
e90b35db 3111
3ee48c5c 3112 MAY_AUTOINC is set to true if the autoincrement (increasing index by
3113 size of MEM_MODE / RATIO) is available. To make this determination, we
3114 look at the size of the increment to be made, which is given in CSTEP.
3115 CSTEP may be zero if the step is unknown.
3116 STMT_AFTER_INC is true iff the statement we're looking at is after the
3117 increment of the original biv.
3118
dec41e98 3119 TODO -- there must be some better way. This all is quite crude. */
3120
2b15d2ba 3121typedef struct address_cost_data_s
bd1a81f7 3122{
3123 HOST_WIDE_INT min_offset, max_offset;
3124 unsigned costs[2][2][2][2];
3125} *address_cost_data;
3126
3127DEF_VEC_P (address_cost_data);
3128DEF_VEC_ALLOC_P (address_cost_data, heap);
3129
3c2818b0 3130static comp_cost
dec41e98 3131get_address_cost (bool symbol_present, bool var_present,
e7f482a0 3132 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
bd1a81f7 3133 HOST_WIDE_INT cstep, enum machine_mode mem_mode,
3134 addr_space_t as, bool speed,
3ee48c5c 3135 bool stmt_after_inc, bool *may_autoinc)
dec41e98 3136{
98155838 3137 enum machine_mode address_mode = targetm.addr_space.address_mode (as);
bd1a81f7 3138 static VEC(address_cost_data, heap) *address_cost_data_list;
3139 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3140 address_cost_data data;
3ee48c5c 3141 static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3142 static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3c2818b0 3143 unsigned cost, acost, complexity;
3ee48c5c 3144 bool offset_p, ratio_p, autoinc;
3145 HOST_WIDE_INT s_offset, autoinc_offset, msize;
dec41e98 3146 unsigned HOST_WIDE_INT mask;
3147 unsigned bits;
3148
bd1a81f7 3149 if (data_index >= VEC_length (address_cost_data, address_cost_data_list))
3150 VEC_safe_grow_cleared (address_cost_data, heap, address_cost_data_list,
3151 data_index + 1);
3152
3153 data = VEC_index (address_cost_data, address_cost_data_list, data_index);
3154 if (!data)
dec41e98 3155 {
3156 HOST_WIDE_INT i;
471fc991 3157 HOST_WIDE_INT rat, off = 0;
3158 int old_cse_not_expected, width;
9b6564b5 3159 unsigned sym_p, var_p, off_p, rat_p, add_c;
3160 rtx seq, addr, base;
3161 rtx reg0, reg1;
3162
bd1a81f7 3163 data = (address_cost_data) xcalloc (1, sizeof (*data));
dec41e98 3164
98155838 3165 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
dec41e98 3166
471fc991 3167 width = GET_MODE_BITSIZE (address_mode) - 1;
3168 if (width > (HOST_BITS_PER_WIDE_INT - 1))
3169 width = HOST_BITS_PER_WIDE_INT - 1;
98155838 3170 addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
471fc991 3171
3172 for (i = width; i >= 0; i--)
dec41e98 3173 {
e0e5e0b3 3174 off = -((unsigned HOST_WIDE_INT) 1 << i);
471fc991 3175 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3176 if (memory_address_addr_space_p (mem_mode, addr, as))
dec41e98 3177 break;
3178 }
471fc991 3179 data->min_offset = (i == -1? 0 : off);
dec41e98 3180
471fc991 3181 for (i = width; i >= 0; i--)
dec41e98 3182 {
e0e5e0b3 3183 off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
471fc991 3184 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3185 if (memory_address_addr_space_p (mem_mode, addr, as))
dec41e98 3186 break;
3187 }
471fc991 3188 if (i == -1)
3189 off = 0;
3190 data->max_offset = off;
dec41e98 3191
3192 if (dump_file && (dump_flags & TDF_DETAILS))
3193 {
3194 fprintf (dump_file, "get_address_cost:\n");
471fc991 3195 fprintf (dump_file, " min offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
e7f482a0 3196 GET_MODE_NAME (mem_mode),
471fc991 3197 data->min_offset);
3198 fprintf (dump_file, " max offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
e7f482a0 3199 GET_MODE_NAME (mem_mode),
471fc991 3200 data->max_offset);
dec41e98 3201 }
3202
bd1a81f7 3203 rat = 1;
aed164c3 3204 for (i = 2; i <= MAX_RATIO; i++)
bd1a81f7 3205 if (multiplier_allowed_in_address_p (i, mem_mode, as))
aed164c3 3206 {
bd1a81f7 3207 rat = i;
aed164c3 3208 break;
3209 }
9b6564b5 3210
3211 /* Compute the cost of various addressing modes. */
3212 acost = 0;
98155838 3213 reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3214 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
9b6564b5 3215
aa1572b5 3216 if (USE_LOAD_PRE_DECREMENT (mem_mode)
3217 || USE_STORE_PRE_DECREMENT (mem_mode))
3ee48c5c 3218 {
98155838 3219 addr = gen_rtx_PRE_DEC (address_mode, reg0);
bd1a81f7 3220 has_predec[mem_mode]
3221 = memory_address_addr_space_p (mem_mode, addr, as);
3ee48c5c 3222 }
aa1572b5 3223 if (USE_LOAD_POST_DECREMENT (mem_mode)
3224 || USE_STORE_POST_DECREMENT (mem_mode))
3ee48c5c 3225 {
98155838 3226 addr = gen_rtx_POST_DEC (address_mode, reg0);
bd1a81f7 3227 has_postdec[mem_mode]
3228 = memory_address_addr_space_p (mem_mode, addr, as);
3ee48c5c 3229 }
aa1572b5 3230 if (USE_LOAD_PRE_INCREMENT (mem_mode)
3231 || USE_STORE_PRE_DECREMENT (mem_mode))
3ee48c5c 3232 {
98155838 3233 addr = gen_rtx_PRE_INC (address_mode, reg0);
bd1a81f7 3234 has_preinc[mem_mode]
3235 = memory_address_addr_space_p (mem_mode, addr, as);
3ee48c5c 3236 }
aa1572b5 3237 if (USE_LOAD_POST_INCREMENT (mem_mode)
3238 || USE_STORE_POST_INCREMENT (mem_mode))
3ee48c5c 3239 {
98155838 3240 addr = gen_rtx_POST_INC (address_mode, reg0);
bd1a81f7 3241 has_postinc[mem_mode]
3242 = memory_address_addr_space_p (mem_mode, addr, as);
3ee48c5c 3243 }
9b6564b5 3244 for (i = 0; i < 16; i++)
3245 {
3246 sym_p = i & 1;
3247 var_p = (i >> 1) & 1;
3248 off_p = (i >> 2) & 1;
3249 rat_p = (i >> 3) & 1;
3250
3251 addr = reg0;
3252 if (rat_p)
98155838 3253 addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
3254 gen_int_mode (rat, address_mode));
9b6564b5 3255
3256 if (var_p)
98155838 3257 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
9b6564b5 3258
3259 if (sym_p)
3260 {
98155838 3261 base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
7e995af7 3262 /* ??? We can run into trouble with some backends by presenting
f0b5f617 3263 it with symbols which haven't been properly passed through
7e995af7 3264 targetm.encode_section_info. By setting the local bit, we
3265 enhance the probability of things working. */
3266 SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3267
9b6564b5 3268 if (off_p)
98155838 3269 base = gen_rtx_fmt_e (CONST, address_mode,
bd1a81f7 3270 gen_rtx_fmt_ee
98155838 3271 (PLUS, address_mode, base,
3272 gen_int_mode (off, address_mode)));
9b6564b5 3273 }
3274 else if (off_p)
98155838 3275 base = gen_int_mode (off, address_mode);
9b6564b5 3276 else
3277 base = NULL_RTX;
48e1416a 3278
9b6564b5 3279 if (base)
98155838 3280 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
3ee48c5c 3281
9b6564b5 3282 start_sequence ();
3283 /* To avoid splitting addressing modes, pretend that no cse will
3284 follow. */
3285 old_cse_not_expected = cse_not_expected;
3286 cse_not_expected = true;
bd1a81f7 3287 addr = memory_address_addr_space (mem_mode, addr, as);
9b6564b5 3288 cse_not_expected = old_cse_not_expected;
3289 seq = get_insns ();
3290 end_sequence ();
3291
f529eb25 3292 acost = seq_cost (seq, speed);
bd1a81f7 3293 acost += address_cost (addr, mem_mode, as, speed);
9b6564b5 3294
3295 if (!acost)
3296 acost = 1;
bd1a81f7 3297 data->costs[sym_p][var_p][off_p][rat_p] = acost;
9b6564b5 3298 }
3299
3300 /* On some targets, it is quite expensive to load symbol to a register,
3301 which makes addresses that contain symbols look much more expensive.
3302 However, the symbol will have to be loaded in any case before the
3303 loop (and quite likely we have it in register already), so it does not
3304 make much sense to penalize them too heavily. So make some final
3305 tweaks for the SYMBOL_PRESENT modes:
3306
3307 If VAR_PRESENT is false, and the mode obtained by changing symbol to
3308 var is cheaper, use this mode with small penalty.
3309 If VAR_PRESENT is true, try whether the mode with
3310 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3311 if this is the case, use it. */
49a71e58 3312 add_c = add_cost (speed, address_mode);
9b6564b5 3313 for (i = 0; i < 8; i++)
3314 {
3315 var_p = i & 1;
3316 off_p = (i >> 1) & 1;
3317 rat_p = (i >> 2) & 1;
3318
bd1a81f7 3319 acost = data->costs[0][1][off_p][rat_p] + 1;
9b6564b5 3320 if (var_p)
3321 acost += add_c;
3322
bd1a81f7 3323 if (acost < data->costs[1][var_p][off_p][rat_p])
3324 data->costs[1][var_p][off_p][rat_p] = acost;
9b6564b5 3325 }
3ee48c5c 3326
9b6564b5 3327 if (dump_file && (dump_flags & TDF_DETAILS))
3328 {
3329 fprintf (dump_file, "Address costs:\n");
48e1416a 3330
9b6564b5 3331 for (i = 0; i < 16; i++)
3332 {
3333 sym_p = i & 1;
3334 var_p = (i >> 1) & 1;
3335 off_p = (i >> 2) & 1;
3336 rat_p = (i >> 3) & 1;
3337
3338 fprintf (dump_file, " ");
3339 if (sym_p)
3340 fprintf (dump_file, "sym + ");
3341 if (var_p)
3342 fprintf (dump_file, "var + ");
3343 if (off_p)
3344 fprintf (dump_file, "cst + ");
3345 if (rat_p)
3346 fprintf (dump_file, "rat * ");
3347
bd1a81f7 3348 acost = data->costs[sym_p][var_p][off_p][rat_p];
9b6564b5 3349 fprintf (dump_file, "index costs %d\n", acost);
3350 }
3ee48c5c 3351 if (has_predec[mem_mode] || has_postdec[mem_mode]
3352 || has_preinc[mem_mode] || has_postinc[mem_mode])
3353 fprintf (dump_file, " May include autoinc/dec\n");
9b6564b5 3354 fprintf (dump_file, "\n");
3355 }
bd1a81f7 3356
3357 VEC_replace (address_cost_data, address_cost_data_list,
3358 data_index, data);
dec41e98 3359 }
3360
98155838 3361 bits = GET_MODE_BITSIZE (address_mode);
dec41e98 3362 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3363 offset &= mask;
3364 if ((offset >> (bits - 1) & 1))
3365 offset |= ~mask;
3366 s_offset = offset;
3367
3ee48c5c 3368 autoinc = false;
3369 msize = GET_MODE_SIZE (mem_mode);
3370 autoinc_offset = offset;
3371 if (stmt_after_inc)
3372 autoinc_offset += ratio * cstep;
3373 if (symbol_present || var_present || ratio != 1)
3374 autoinc = false;
3375 else if ((has_postinc[mem_mode] && autoinc_offset == 0
3376 && msize == cstep)
3377 || (has_postdec[mem_mode] && autoinc_offset == 0
3378 && msize == -cstep)
3379 || (has_preinc[mem_mode] && autoinc_offset == msize
3380 && msize == cstep)
3381 || (has_predec[mem_mode] && autoinc_offset == -msize
3382 && msize == -cstep))
3383 autoinc = true;
3384
dec41e98 3385 cost = 0;
636addb3 3386 offset_p = (s_offset != 0
bd1a81f7 3387 && data->min_offset <= s_offset
3388 && s_offset <= data->max_offset);
dec41e98 3389 ratio_p = (ratio != 1
bd1a81f7 3390 && multiplier_allowed_in_address_p (ratio, mem_mode, as));
dec41e98 3391
3392 if (ratio != 1 && !ratio_p)
72655676 3393 cost += mult_by_coeff_cost (ratio, address_mode, speed);
dec41e98 3394
3395 if (s_offset && !offset_p && !symbol_present)
49a71e58 3396 cost += add_cost (speed, address_mode);
dec41e98 3397
3ee48c5c 3398 if (may_autoinc)
3399 *may_autoinc = autoinc;
bd1a81f7 3400 acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
3c2818b0 3401 complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3402 return new_cost (cost + acost, complexity);
dec41e98 3403}
dec41e98 3404
e0ae2fe2 3405 /* Calculate the SPEED or size cost of shiftadd EXPR in MODE. MULT is the
3406 the EXPR operand holding the shift. COST0 and COST1 are the costs for
3407 calculating the operands of EXPR. Returns true if successful, and returns
3408 the cost in COST. */
3409
3410static bool
3411get_shiftadd_cost (tree expr, enum machine_mode mode, comp_cost cost0,
3412 comp_cost cost1, tree mult, bool speed, comp_cost *cost)
3413{
3414 comp_cost res;
3415 tree op1 = TREE_OPERAND (expr, 1);
3416 tree cst = TREE_OPERAND (mult, 1);
3417 tree multop = TREE_OPERAND (mult, 0);
3418 int m = exact_log2 (int_cst_value (cst));
3419 int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
3420 int sa_cost;
3421
3422 if (!(m >= 0 && m < maxm))
3423 return false;
3424
3425 sa_cost = (TREE_CODE (expr) != MINUS_EXPR
49a71e58 3426 ? shiftadd_cost (speed, mode, m)
e0ae2fe2 3427 : (mult == op1
49a71e58 3428 ? shiftsub1_cost (speed, mode, m)
3429 : shiftsub0_cost (speed, mode, m)));
e0ae2fe2 3430 res = new_cost (sa_cost, 0);
3431 res = add_costs (res, mult == op1 ? cost0 : cost1);
3432
3433 STRIP_NOPS (multop);
3434 if (!is_gimple_val (multop))
3435 res = add_costs (res, force_expr_to_var_cost (multop, speed));
3436
3437 *cost = res;
3438 return true;
3439}
3440
f4d7629d 3441/* Estimates cost of forcing expression EXPR into a variable. */
3442
3c2818b0 3443static comp_cost
f529eb25 3444force_expr_to_var_cost (tree expr, bool speed)
dec41e98 3445{
3446 static bool costs_initialized = false;
f529eb25 3447 static unsigned integer_cost [2];
3448 static unsigned symbol_cost [2];
3449 static unsigned address_cost [2];
eb91f88e 3450 tree op0, op1;
3c2818b0 3451 comp_cost cost0, cost1, cost;
eb91f88e 3452 enum machine_mode mode;
dec41e98 3453
3454 if (!costs_initialized)
3455 {
dec41e98 3456 tree type = build_pointer_type (integer_type_node);
7e995af7 3457 tree var, addr;
3458 rtx x;
f529eb25 3459 int i;
7e995af7 3460
3461 var = create_tmp_var_raw (integer_type_node, "test_var");
3462 TREE_STATIC (var) = 1;
3463 x = produce_memory_decl_rtl (var, NULL);
3464 SET_DECL_RTL (var, x);
dec41e98 3465
dec41e98 3466 addr = build1 (ADDR_EXPR, type, var);
dec41e98 3467
f529eb25 3468
3469 for (i = 0; i < 2; i++)
dec41e98 3470 {
f529eb25 3471 integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
3472 2000), i);
3473
3474 symbol_cost[i] = computation_cost (addr, i) + 1;
3475
3476 address_cost[i]
2cc66f2a 3477 = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
f529eb25 3478 if (dump_file && (dump_flags & TDF_DETAILS))
3479 {
3480 fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
3481 fprintf (dump_file, " integer %d\n", (int) integer_cost[i]);
3482 fprintf (dump_file, " symbol %d\n", (int) symbol_cost[i]);
3483 fprintf (dump_file, " address %d\n", (int) address_cost[i]);
3484 fprintf (dump_file, " other %d\n", (int) target_spill_cost[i]);
3485 fprintf (dump_file, "\n");
3486 }
dec41e98 3487 }
3488
3489 costs_initialized = true;
3490 }
3491
390955a6 3492 STRIP_NOPS (expr);
3493
dec41e98 3494 if (SSA_VAR_P (expr))
8ac8f5ae 3495 return no_cost;
dec41e98 3496
71d9af81 3497 if (is_gimple_min_invariant (expr))
dec41e98 3498 {
3499 if (TREE_CODE (expr) == INTEGER_CST)
f529eb25 3500 return new_cost (integer_cost [speed], 0);
dec41e98 3501
3502 if (TREE_CODE (expr) == ADDR_EXPR)
3503 {
3504 tree obj = TREE_OPERAND (expr, 0);
3505
3506 if (TREE_CODE (obj) == VAR_DECL
3507 || TREE_CODE (obj) == PARM_DECL
3508 || TREE_CODE (obj) == RESULT_DECL)
f529eb25 3509 return new_cost (symbol_cost [speed], 0);
dec41e98 3510 }
3511
f529eb25 3512 return new_cost (address_cost [speed], 0);
dec41e98 3513 }
3514
eb91f88e 3515 switch (TREE_CODE (expr))
3516 {
0de36bdb 3517 case POINTER_PLUS_EXPR:
eb91f88e 3518 case PLUS_EXPR:
3519 case MINUS_EXPR:
3520 case MULT_EXPR:
3521 op0 = TREE_OPERAND (expr, 0);
3522 op1 = TREE_OPERAND (expr, 1);
390955a6 3523 STRIP_NOPS (op0);
3524 STRIP_NOPS (op1);
eb91f88e 3525
3526 if (is_gimple_val (op0))
8ac8f5ae 3527 cost0 = no_cost;
eb91f88e 3528 else
f529eb25 3529 cost0 = force_expr_to_var_cost (op0, speed);
eb91f88e 3530
3531 if (is_gimple_val (op1))
8ac8f5ae 3532 cost1 = no_cost;
eb91f88e 3533 else
f529eb25 3534 cost1 = force_expr_to_var_cost (op1, speed);
eb91f88e 3535
3536 break;
3537
fe4600c9 3538 case NEGATE_EXPR:
3539 op0 = TREE_OPERAND (expr, 0);
3540 STRIP_NOPS (op0);
3541 op1 = NULL_TREE;
3542
3543 if (is_gimple_val (op0))
8ac8f5ae 3544 cost0 = no_cost;
fe4600c9 3545 else
3546 cost0 = force_expr_to_var_cost (op0, speed);
3547
8ac8f5ae 3548 cost1 = no_cost;
fe4600c9 3549 break;
3550
eb91f88e 3551 default:
3552 /* Just an arbitrary value, FIXME. */
f529eb25 3553 return new_cost (target_spill_cost[speed], 0);
eb91f88e 3554 }
3555
3556 mode = TYPE_MODE (TREE_TYPE (expr));
3557 switch (TREE_CODE (expr))
3558 {
0de36bdb 3559 case POINTER_PLUS_EXPR:
eb91f88e 3560 case PLUS_EXPR:
3561 case MINUS_EXPR:
fe4600c9 3562 case NEGATE_EXPR:
49a71e58 3563 cost = new_cost (add_cost (speed, mode), 0);
e0ae2fe2 3564 if (TREE_CODE (expr) != NEGATE_EXPR)
3565 {
3566 tree mult = NULL_TREE;
3567 comp_cost sa_cost;
3568 if (TREE_CODE (op1) == MULT_EXPR)
3569 mult = op1;
3570 else if (TREE_CODE (op0) == MULT_EXPR)
3571 mult = op0;
3572
3573 if (mult != NULL_TREE
5d24f4be 3574 && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
49a71e58 3575 && get_shiftadd_cost (expr, mode, cost0, cost1, mult,
3576 speed, &sa_cost))
e0ae2fe2 3577 return sa_cost;
3578 }
eb91f88e 3579 break;
3580
3581 case MULT_EXPR:
3582 if (cst_and_fits_in_hwi (op0))
72655676 3583 cost = new_cost (mult_by_coeff_cost (int_cst_value (op0),
3584 mode, speed), 0);
48e1416a 3585 else if (cst_and_fits_in_hwi (op1))
72655676 3586 cost = new_cost (mult_by_coeff_cost (int_cst_value (op1),
3587 mode, speed), 0);
eb91f88e 3588 else
f529eb25 3589 return new_cost (target_spill_cost [speed], 0);
eb91f88e 3590 break;
3591
3592 default:
3593 gcc_unreachable ();
3594 }
3595
3c2818b0 3596 cost = add_costs (cost, cost0);
3597 cost = add_costs (cost, cost1);
eb91f88e 3598
3599 /* Bound the cost by target_spill_cost. The parts of complicated
3600 computations often are either loop invariant or at least can
3601 be shared between several iv uses, so letting this grow without
3602 limits would not give reasonable results. */
3ee48c5c 3603 if (cost.cost > (int) target_spill_cost [speed])
f529eb25 3604 cost.cost = target_spill_cost [speed];
3c2818b0 3605
3606 return cost;
dec41e98 3607}
3608
f4d7629d 3609/* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
3610 invariants the computation depends on. */
3611
3c2818b0 3612static comp_cost
f4d7629d 3613force_var_cost (struct ivopts_data *data,
3614 tree expr, bitmap *depends_on)
3615{
3616 if (depends_on)
3617 {
3618 fd_ivopts_data = data;
3619 walk_tree (&expr, find_depends, depends_on, NULL);
3620 }
3621
f529eb25 3622 return force_expr_to_var_cost (expr, data->speed);
f4d7629d 3623}
3624
dec41e98 3625/* Estimates cost of expressing address ADDR as var + symbol + offset. The
3626 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3627 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3628 invariants the computation depends on. */
3629
3c2818b0 3630static comp_cost
dec41e98 3631split_address_cost (struct ivopts_data *data,
3632 tree addr, bool *symbol_present, bool *var_present,
3633 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3634{
dbc64c75 3635 tree core;
3636 HOST_WIDE_INT bitsize;
3637 HOST_WIDE_INT bitpos;
3638 tree toffset;
3639 enum machine_mode mode;
3640 int unsignedp, volatilep;
48e1416a 3641
dbc64c75 3642 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
e7e9416e 3643 &unsignedp, &volatilep, false);
dec41e98 3644
dbc64c75 3645 if (toffset != 0
3646 || bitpos % BITS_PER_UNIT != 0
3647 || TREE_CODE (core) != VAR_DECL)
dec41e98 3648 {
3649 *symbol_present = false;
3650 *var_present = true;
3651 fd_ivopts_data = data;
3652 walk_tree (&addr, find_depends, depends_on, NULL);
f529eb25 3653 return new_cost (target_spill_cost[data->speed], 0);
dbc64c75 3654 }
3655
3656 *offset += bitpos / BITS_PER_UNIT;
dec41e98 3657 if (TREE_STATIC (core)
3658 || DECL_EXTERNAL (core))
3659 {
3660 *symbol_present = true;
3661 *var_present = false;
8ac8f5ae 3662 return no_cost;
dec41e98 3663 }
48e1416a 3664
dec41e98 3665 *symbol_present = false;
3666 *var_present = true;
8ac8f5ae 3667 return no_cost;
dec41e98 3668}
3669
3670/* Estimates cost of expressing difference of addresses E1 - E2 as
3671 var + symbol + offset. The value of offset is added to OFFSET,
3672 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3673 part is missing. DEPENDS_ON is a set of the invariants the computation
3674 depends on. */
3675
3c2818b0 3676static comp_cost
dec41e98 3677ptr_difference_cost (struct ivopts_data *data,
3678 tree e1, tree e2, bool *symbol_present, bool *var_present,
3679 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3680{
dbc64c75 3681 HOST_WIDE_INT diff = 0;
fe4600c9 3682 aff_tree aff_e1, aff_e2;
3683 tree type;
dec41e98 3684
8c0963c4 3685 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
dec41e98 3686
eb91f88e 3687 if (ptr_difference_const (e1, e2, &diff))
dec41e98 3688 {
3689 *offset += diff;
3690 *symbol_present = false;
3691 *var_present = false;
8ac8f5ae 3692 return no_cost;
dec41e98 3693 }
3694
3c2818b0 3695 if (integer_zerop (e2))
dec41e98 3696 return split_address_cost (data, TREE_OPERAND (e1, 0),
3697 symbol_present, var_present, offset, depends_on);
3698
3699 *symbol_present = false;
3700 *var_present = true;
dec41e98 3701
fe4600c9 3702 type = signed_type_for (TREE_TYPE (e1));
3703 tree_to_aff_combination (e1, type, &aff_e1);
3704 tree_to_aff_combination (e2, type, &aff_e2);
3705 aff_combination_scale (&aff_e2, double_int_minus_one);
3706 aff_combination_add (&aff_e1, &aff_e2);
3707
3708 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
dec41e98 3709}
3710
3711/* Estimates cost of expressing difference E1 - E2 as
3712 var + symbol + offset. The value of offset is added to OFFSET,
3713 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3714 part is missing. DEPENDS_ON is a set of the invariants the computation
3715 depends on. */
3716
3c2818b0 3717static comp_cost
dec41e98 3718difference_cost (struct ivopts_data *data,
3719 tree e1, tree e2, bool *symbol_present, bool *var_present,
3720 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3721{
dec41e98 3722 enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
390955a6 3723 unsigned HOST_WIDE_INT off1, off2;
fe4600c9 3724 aff_tree aff_e1, aff_e2;
3725 tree type;
390955a6 3726
651874e1 3727 e1 = strip_offset (e1, &off1);
3728 e2 = strip_offset (e2, &off2);
390955a6 3729 *offset += off1 - off2;
dec41e98 3730
390955a6 3731 STRIP_NOPS (e1);
3732 STRIP_NOPS (e2);
dec41e98 3733
3734 if (TREE_CODE (e1) == ADDR_EXPR)
fe4600c9 3735 return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
3736 offset, depends_on);
dec41e98 3737 *symbol_present = false;
3738
3739 if (operand_equal_p (e1, e2, 0))
3740 {
3741 *var_present = false;
8ac8f5ae 3742 return no_cost;
dec41e98 3743 }
fe4600c9 3744
dec41e98 3745 *var_present = true;
fe4600c9 3746
cd743a11 3747 if (integer_zerop (e2))
dec41e98 3748 return force_var_cost (data, e1, depends_on);
3749
cd743a11 3750 if (integer_zerop (e1))
dec41e98 3751 {
fe4600c9 3752 comp_cost cost = force_var_cost (data, e2, depends_on);
72655676 3753 cost.cost += mult_by_coeff_cost (-1, mode, data->speed);
dec41e98 3754 return cost;
3755 }
3756
fe4600c9 3757 type = signed_type_for (TREE_TYPE (e1));
3758 tree_to_aff_combination (e1, type, &aff_e1);
3759 tree_to_aff_combination (e2, type, &aff_e2);
3760 aff_combination_scale (&aff_e2, double_int_minus_one);
3761 aff_combination_add (&aff_e1, &aff_e2);
dec41e98 3762
fe4600c9 3763 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
dec41e98 3764}
3765
66368bf4 3766/* Returns true if AFF1 and AFF2 are identical. */
3767
3768static bool
3769compare_aff_trees (aff_tree *aff1, aff_tree *aff2)
3770{
3771 unsigned i;
3772
3773 if (aff1->n != aff2->n)
3774 return false;
3775
3776 for (i = 0; i < aff1->n; i++)
3777 {
cf8f0e63 3778 if (aff1->elts[i].coef != aff2->elts[i].coef)
66368bf4 3779 return false;
3780
3781 if (!operand_equal_p (aff1->elts[i].val, aff2->elts[i].val, 0))
3782 return false;
3783 }
3784 return true;
3785}
3786
055d6bb2 3787/* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id. */
3788
3789static int
3790get_expr_id (struct ivopts_data *data, tree expr)
3791{
3792 struct iv_inv_expr_ent ent;
3793 struct iv_inv_expr_ent **slot;
3794
3795 ent.expr = expr;
3796 ent.hash = iterative_hash_expr (expr, 0);
3797 slot = (struct iv_inv_expr_ent **) htab_find_slot (data->inv_expr_tab,
3798 &ent, INSERT);
3799 if (*slot)
3800 return (*slot)->id;
3801
3802 *slot = XNEW (struct iv_inv_expr_ent);
3803 (*slot)->expr = expr;
3804 (*slot)->hash = ent.hash;
3805 (*slot)->id = data->inv_expr_id++;
3806 return (*slot)->id;
3807}
3808
66368bf4 3809/* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
3810 requires a new compiler generated temporary. Returns -1 otherwise.
3811 ADDRESS_P is a flag indicating if the expression is for address
3812 computation. */
3813
3814static int
3815get_loop_invariant_expr_id (struct ivopts_data *data, tree ubase,
3816 tree cbase, HOST_WIDE_INT ratio,
3817 bool address_p)
3818{
3819 aff_tree ubase_aff, cbase_aff;
3820 tree expr, ub, cb;
66368bf4 3821
3822 STRIP_NOPS (ubase);
3823 STRIP_NOPS (cbase);
3824 ub = ubase;
3825 cb = cbase;
3826
3827 if ((TREE_CODE (ubase) == INTEGER_CST)
3828 && (TREE_CODE (cbase) == INTEGER_CST))
3829 return -1;
3830
3831 /* Strips the constant part. */
3832 if (TREE_CODE (ubase) == PLUS_EXPR
3833 || TREE_CODE (ubase) == MINUS_EXPR
3834 || TREE_CODE (ubase) == POINTER_PLUS_EXPR)
3835 {
3836 if (TREE_CODE (TREE_OPERAND (ubase, 1)) == INTEGER_CST)
3837 ubase = TREE_OPERAND (ubase, 0);
3838 }
3839
3840 /* Strips the constant part. */
3841 if (TREE_CODE (cbase) == PLUS_EXPR
3842 || TREE_CODE (cbase) == MINUS_EXPR
3843 || TREE_CODE (cbase) == POINTER_PLUS_EXPR)
3844 {
3845 if (TREE_CODE (TREE_OPERAND (cbase, 1)) == INTEGER_CST)
3846 cbase = TREE_OPERAND (cbase, 0);
3847 }
3848
3849 if (address_p)
3850 {
3851 if (((TREE_CODE (ubase) == SSA_NAME)
3852 || (TREE_CODE (ubase) == ADDR_EXPR
3853 && is_gimple_min_invariant (ubase)))
3854 && (TREE_CODE (cbase) == INTEGER_CST))
3855 return -1;
3856
3857 if (((TREE_CODE (cbase) == SSA_NAME)
3858 || (TREE_CODE (cbase) == ADDR_EXPR
3859 && is_gimple_min_invariant (cbase)))
3860 && (TREE_CODE (ubase) == INTEGER_CST))
3861 return -1;
3862 }
3863
3864 if (ratio == 1)
3865 {
3866 if(operand_equal_p (ubase, cbase, 0))
3867 return -1;
3868
3869 if (TREE_CODE (ubase) == ADDR_EXPR
3870 && TREE_CODE (cbase) == ADDR_EXPR)
3871 {
3872 tree usym, csym;
3873
3874 usym = TREE_OPERAND (ubase, 0);
3875 csym = TREE_OPERAND (cbase, 0);
3876 if (TREE_CODE (usym) == ARRAY_REF)
3877 {
3878 tree ind = TREE_OPERAND (usym, 1);
3879 if (TREE_CODE (ind) == INTEGER_CST
3880 && host_integerp (ind, 0)
3881 && TREE_INT_CST_LOW (ind) == 0)
3882 usym = TREE_OPERAND (usym, 0);
3883 }
3884 if (TREE_CODE (csym) == ARRAY_REF)
3885 {
3886 tree ind = TREE_OPERAND (csym, 1);
3887 if (TREE_CODE (ind) == INTEGER_CST
3888 && host_integerp (ind, 0)
3889 && TREE_INT_CST_LOW (ind) == 0)
3890 csym = TREE_OPERAND (csym, 0);
3891 }
3892 if (operand_equal_p (usym, csym, 0))
3893 return -1;
3894 }
3895 /* Now do more complex comparison */
3896 tree_to_aff_combination (ubase, TREE_TYPE (ubase), &ubase_aff);
3897 tree_to_aff_combination (cbase, TREE_TYPE (cbase), &cbase_aff);
3898 if (compare_aff_trees (&ubase_aff, &cbase_aff))
3899 return -1;
3900 }
3901
3902 tree_to_aff_combination (ub, TREE_TYPE (ub), &ubase_aff);
3903 tree_to_aff_combination (cb, TREE_TYPE (cb), &cbase_aff);
3904
cf8f0e63 3905 aff_combination_scale (&cbase_aff, double_int::from_shwi (-1 * ratio));
66368bf4 3906 aff_combination_add (&ubase_aff, &cbase_aff);
3907 expr = aff_combination_to_tree (&ubase_aff);
055d6bb2 3908 return get_expr_id (data, expr);
66368bf4 3909}
3910
3911
3912
dec41e98 3913/* Determines the cost of the computation by that USE is expressed
3914 from induction variable CAND. If ADDRESS_P is true, we just need
3915 to create an address from it, otherwise we want to get it into
3916 register. A set of invariants we depend on is stored in
3ee48c5c 3917 DEPENDS_ON. AT is the statement at that the value is computed.
3918 If CAN_AUTOINC is nonnull, use it to record whether autoinc
3919 addressing is likely. */
dec41e98 3920
3c2818b0 3921static comp_cost
dec41e98 3922get_computation_cost_at (struct ivopts_data *data,
3923 struct iv_use *use, struct iv_cand *cand,
3ee48c5c 3924 bool address_p, bitmap *depends_on, gimple at,
66368bf4 3925 bool *can_autoinc,
3926 int *inv_expr_id)
dec41e98 3927{
3928 tree ubase = use->iv->base, ustep = use->iv->step;
3929 tree cbase, cstep;
3930 tree utype = TREE_TYPE (ubase), ctype;
d3610bea 3931 unsigned HOST_WIDE_INT cstepi, offset = 0;
dec41e98 3932 HOST_WIDE_INT ratio, aratio;
3ee48c5c 3933 bool var_present, symbol_present, stmt_is_after_inc;
3c2818b0 3934 comp_cost cost;
d3610bea 3935 double_int rat;
f529eb25 3936 bool speed = optimize_bb_for_speed_p (gimple_bb (at));
dec41e98 3937
3938 *depends_on = NULL;
3939
3940 /* Only consider real candidates. */
3941 if (!cand->iv)
3c2818b0 3942 return infinite_cost;
dec41e98 3943
3944 cbase = cand->iv->base;
3945 cstep = cand->iv->step;
3946 ctype = TREE_TYPE (cbase);
3947
3948 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3949 {
3950 /* We do not have a precision to express the values of use. */
3c2818b0 3951 return infinite_cost;
dec41e98 3952 }
3953
316830ac 3954 if (address_p
3955 || (use->iv->base_object
3956 && cand->iv->base_object
3957 && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
3958 && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
065d134e 3959 {
3960 /* Do not try to express address of an object with computation based
3961 on address of a different object. This may cause problems in rtl
3962 level alias analysis (that does not expect this to be happening,
3963 as this is illegal in C), and would be unlikely to be useful
3964 anyway. */
3965 if (use->iv->base_object
3966 && cand->iv->base_object
3967 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3c2818b0 3968 return infinite_cost;
065d134e 3969 }
3970
fe4600c9 3971 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
dec41e98 3972 {
3973 /* TODO -- add direct handling of this case. */
3974 goto fallback;
3975 }
3976
651874e1 3977 /* CSTEPI is removed from the offset in case statement is after the
3978 increment. If the step is not constant, we use zero instead.
e446560c 3979 This is a bit imprecise (there is the extra addition), but
651874e1 3980 redundancy elimination is likely to transform the code so that
3981 it uses value of the variable before increment anyway,
3982 so it is not that much unrealistic. */
3983 if (cst_and_fits_in_hwi (cstep))
3984 cstepi = int_cst_value (cstep);
3985 else
3986 cstepi = 0;
3987
d3610bea 3988 if (!constant_multiple_of (ustep, cstep, &rat))
3c2818b0 3989 return infinite_cost;
48e1416a 3990
cf8f0e63 3991 if (rat.fits_shwi ())
3992 ratio = rat.to_shwi ();
d3610bea 3993 else
3c2818b0 3994 return infinite_cost;
dec41e98 3995
fe4600c9 3996 STRIP_NOPS (cbase);
3997 ctype = TREE_TYPE (cbase);
3998
83c71c22 3999 stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
4000
dec41e98 4001 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
4002 or ratio == 1, it is better to handle this like
48e1416a 4003
dec41e98 4004 ubase - ratio * cbase + ratio * var
48e1416a 4005
dec41e98 4006 (also holds in the case ratio == -1, TODO. */
4007
651874e1 4008 if (cst_and_fits_in_hwi (cbase))
dec41e98 4009 {
48e1416a 4010 offset = - ratio * int_cst_value (cbase);
3c2818b0 4011 cost = difference_cost (data,
4012 ubase, build_int_cst (utype, 0),
4013 &symbol_present, &var_present, &offset,
4014 depends_on);
66368bf4 4015 cost.cost /= avg_loop_niter (data->current_loop);
dec41e98 4016 }
4017 else if (ratio == 1)
4018 {
83c71c22 4019 tree real_cbase = cbase;
4020
4021 /* Check to see if any adjustment is needed. */
4022 if (cstepi == 0 && stmt_is_after_inc)
4023 {
4024 aff_tree real_cbase_aff;
4025 aff_tree cstep_aff;
4026
4027 tree_to_aff_combination (cbase, TREE_TYPE (real_cbase),
4028 &real_cbase_aff);
4029 tree_to_aff_combination (cstep, TREE_TYPE (cstep), &cstep_aff);
4030
4031 aff_combination_add (&real_cbase_aff, &cstep_aff);
4032 real_cbase = aff_combination_to_tree (&real_cbase_aff);
4033 }
4034
3c2818b0 4035 cost = difference_cost (data,
83c71c22 4036 ubase, real_cbase,
3c2818b0 4037 &symbol_present, &var_present, &offset,
4038 depends_on);
66368bf4 4039 cost.cost /= avg_loop_niter (data->current_loop);
dec41e98 4040 }
fe4600c9 4041 else if (address_p
4042 && !POINTER_TYPE_P (ctype)
bd1a81f7 4043 && multiplier_allowed_in_address_p
4044 (ratio, TYPE_MODE (TREE_TYPE (utype)),
4045 TYPE_ADDR_SPACE (TREE_TYPE (utype))))
fe4600c9 4046 {
4047 cbase
4048 = fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio));
4049 cost = difference_cost (data,
4050 ubase, cbase,
4051 &symbol_present, &var_present, &offset,
4052 depends_on);
66368bf4 4053 cost.cost /= avg_loop_niter (data->current_loop);
fe4600c9 4054 }
dec41e98 4055 else
4056 {
3c2818b0 4057 cost = force_var_cost (data, cbase, depends_on);
3c2818b0 4058 cost = add_costs (cost,
4059 difference_cost (data,
4060 ubase, build_int_cst (utype, 0),
4061 &symbol_present, &var_present,
4062 &offset, depends_on));
66368bf4 4063 cost.cost /= avg_loop_niter (data->current_loop);
49a71e58 4064 cost.cost += add_cost (data->speed, TYPE_MODE (ctype));
66368bf4 4065 }
4066
4067 if (inv_expr_id)
4068 {
4069 *inv_expr_id =
4070 get_loop_invariant_expr_id (data, ubase, cbase, ratio, address_p);
4071 /* Clear depends on. */
4072 if (*inv_expr_id != -1 && depends_on && *depends_on)
4073 bitmap_clear (*depends_on);
dec41e98 4074 }
4075
4076 /* If we are after the increment, the value of the candidate is higher by
4077 one iteration. */
3ee48c5c 4078 if (stmt_is_after_inc)
dec41e98 4079 offset -= ratio * cstepi;
4080
4081 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
fe4600c9 4082 (symbol/var1/const parts may be omitted). If we are looking for an
4083 address, find the cost of addressing this. */
dec41e98 4084 if (address_p)
fe4600c9 4085 return add_costs (cost,
4086 get_address_cost (symbol_present, var_present,
3ee48c5c 4087 offset, ratio, cstepi,
4088 TYPE_MODE (TREE_TYPE (utype)),
bd1a81f7 4089 TYPE_ADDR_SPACE (TREE_TYPE (utype)),
3ee48c5c 4090 speed, stmt_is_after_inc,
4091 can_autoinc));
dec41e98 4092
4093 /* Otherwise estimate the costs for computing the expression. */
dec41e98 4094 if (!symbol_present && !var_present && !offset)
4095 {
4096 if (ratio != 1)
72655676 4097 cost.cost += mult_by_coeff_cost (ratio, TYPE_MODE (ctype), speed);
dec41e98 4098 return cost;
4099 }
4100
fe4600c9 4101 /* Symbol + offset should be compile-time computable so consider that they
4102 are added once to the variable, if present. */
4103 if (var_present && (symbol_present || offset))
fe166695 4104 cost.cost += adjust_setup_cost (data,
49a71e58 4105 add_cost (speed, TYPE_MODE (ctype)));
dec41e98 4106
3c2818b0 4107 /* Having offset does not affect runtime cost in case it is added to
4108 symbol, but it increases complexity. */
4109 if (offset)
4110 cost.complexity++;
4111
49a71e58 4112 cost.cost += add_cost (speed, TYPE_MODE (ctype));
fe4600c9 4113
4114 aratio = ratio > 0 ? ratio : -ratio;
4115 if (aratio != 1)
72655676 4116 cost.cost += mult_by_coeff_cost (aratio, TYPE_MODE (ctype), speed);
abc7c8e9 4117 return cost;
dec41e98 4118
4119fallback:
3ee48c5c 4120 if (can_autoinc)
4121 *can_autoinc = false;
4122
dec41e98 4123 {
4124 /* Just get the expression, expand it and measure the cost. */
4125 tree comp = get_computation_at (data->current_loop, use, cand, at);
4126
4127 if (!comp)
3c2818b0 4128 return infinite_cost;
dec41e98 4129
4130 if (address_p)
182cf5a9 4131 comp = build_simple_mem_ref (comp);
dec41e98 4132
f529eb25 4133 return new_cost (computation_cost (comp, speed), 0);
dec41e98 4134 }
4135}
4136
4137/* Determines the cost of the computation by that USE is expressed
4138 from induction variable CAND. If ADDRESS_P is true, we just need
4139 to create an address from it, otherwise we want to get it into
4140 register. A set of invariants we depend on is stored in
3ee48c5c 4141 DEPENDS_ON. If CAN_AUTOINC is nonnull, use it to record whether
4142 autoinc addressing is likely. */
dec41e98 4143
3c2818b0 4144static comp_cost
dec41e98 4145get_computation_cost (struct ivopts_data *data,
4146 struct iv_use *use, struct iv_cand *cand,
66368bf4 4147 bool address_p, bitmap *depends_on,
4148 bool *can_autoinc, int *inv_expr_id)
dec41e98 4149{
4150 return get_computation_cost_at (data,
3ee48c5c 4151 use, cand, address_p, depends_on, use->stmt,
66368bf4 4152 can_autoinc, inv_expr_id);
dec41e98 4153}
4154
4155/* Determines cost of basing replacement of USE on CAND in a generic
4156 expression. */
4157
00991688 4158static bool
dec41e98 4159determine_use_iv_cost_generic (struct ivopts_data *data,
4160 struct iv_use *use, struct iv_cand *cand)
4161{
4162 bitmap depends_on;
3c2818b0 4163 comp_cost cost;
66368bf4 4164 int inv_expr_id = -1;
d4caf099 4165
4166 /* The simple case first -- if we need to express value of the preserved
4167 original biv, the cost is 0. This also prevents us from counting the
4168 cost of increment twice -- once at this use and once in the cost of
4169 the candidate. */
4170 if (cand->pos == IP_ORIGINAL
4171 && cand->incremented_at == use->stmt)
4172 {
8ac8f5ae 4173 set_use_iv_cost (data, use, cand, no_cost, NULL, NULL_TREE,
08748e8d 4174 ERROR_MARK, -1);
d4caf099 4175 return true;
4176 }
dec41e98 4177
66368bf4 4178 cost = get_computation_cost (data, use, cand, false, &depends_on,
4179 NULL, &inv_expr_id);
4180
08748e8d 4181 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
66368bf4 4182 inv_expr_id);
00991688 4183
3c2818b0 4184 return !infinite_cost_p (cost);
dec41e98 4185}
4186
4187/* Determines cost of basing replacement of USE on CAND in an address. */
4188
00991688 4189static bool
dec41e98 4190determine_use_iv_cost_address (struct ivopts_data *data,
4191 struct iv_use *use, struct iv_cand *cand)
4192{
4193 bitmap depends_on;
3ee48c5c 4194 bool can_autoinc;
66368bf4 4195 int inv_expr_id = -1;
3ee48c5c 4196 comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
66368bf4 4197 &can_autoinc, &inv_expr_id);
dec41e98 4198
3ee48c5c 4199 if (cand->ainc_use == use)
4200 {
4201 if (can_autoinc)
4202 cost.cost -= cand->cost_step;
4203 /* If we generated the candidate solely for exploiting autoincrement
4204 opportunities, and it turns out it can't be used, set the cost to
4205 infinity to make sure we ignore it. */
4206 else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
4207 cost = infinite_cost;
4208 }
08748e8d 4209 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
66368bf4 4210 inv_expr_id);
00991688 4211
3c2818b0 4212 return !infinite_cost_p (cost);
dec41e98 4213}
4214
cf661063 4215/* Computes value of candidate CAND at position AT in iteration NITER, and
4216 stores it to VAL. */
dec41e98 4217
cf661063 4218static void
75a70cf9 4219cand_value_at (struct loop *loop, struct iv_cand *cand, gimple at, tree niter,
cf661063 4220 aff_tree *val)
dec41e98 4221{
cf661063 4222 aff_tree step, delta, nit;
4223 struct iv *iv = cand->iv;
dec41e98 4224 tree type = TREE_TYPE (iv->base);
06240723 4225 tree steptype = type;
4226 if (POINTER_TYPE_P (type))
4227 steptype = sizetype;
dec41e98 4228
06240723 4229 tree_to_aff_combination (iv->step, steptype, &step);
cf661063 4230 tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
06240723 4231 aff_combination_convert (&nit, steptype);
cf661063 4232 aff_combination_mult (&nit, &step, &delta);
dec41e98 4233 if (stmt_after_increment (loop, cand, at))
cf661063 4234 aff_combination_add (&delta, &step);
dec41e98 4235
cf661063 4236 tree_to_aff_combination (iv->base, type, val);
4237 aff_combination_add (val, &delta);
dec41e98 4238}
4239
b091dc59 4240/* Returns period of induction variable iv. */
4241
4242static tree
4243iv_period (struct iv *iv)
4244{
4245 tree step = iv->step, period, type;
4246 tree pow2div;
4247
4248 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4249
b091dc59 4250 type = unsigned_type_for (TREE_TYPE (step));
eaa67821 4251 /* Period of the iv is lcm (step, type_range)/step -1,
4252 i.e., N*type_range/step - 1. Since type range is power
4253 of two, N == (step >> num_of_ending_zeros_binary (step),
4254 so the final result is
4255
4256 (type_range >> num_of_ending_zeros_binary (step)) - 1
4257
4258 */
4259 pow2div = num_ending_zeros (step);
b091dc59 4260
4261 period = build_low_bits_mask (type,
eaa67821 4262 (TYPE_PRECISION (type)
4263 - tree_low_cst (pow2div, 1)));
b091dc59 4264
4265 return period;
4266}
4267
c4d3b428 4268/* Returns the comparison operator used when eliminating the iv USE. */
4269
4270static enum tree_code
4271iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4272{
4273 struct loop *loop = data->current_loop;
4274 basic_block ex_bb;
4275 edge exit;
4276
75a70cf9 4277 ex_bb = gimple_bb (use->stmt);
c4d3b428 4278 exit = EDGE_SUCC (ex_bb, 0);
4279 if (flow_bb_inside_loop_p (loop, exit->dest))
4280 exit = EDGE_SUCC (ex_bb, 1);
4281
4282 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4283}
4284
08748e8d 4285static tree
4286strip_wrap_conserving_type_conversions (tree exp)
4287{
4288 while (tree_ssa_useless_type_conversion (exp)
4289 && (nowrap_type_p (TREE_TYPE (exp))
4290 == nowrap_type_p (TREE_TYPE (TREE_OPERAND (exp, 0)))))
4291 exp = TREE_OPERAND (exp, 0);
4292 return exp;
4293}
4294
4295/* Walk the SSA form and check whether E == WHAT. Fairly simplistic, we
4296 check for an exact match. */
4297
4298static bool
4299expr_equal_p (tree e, tree what)
4300{
4301 gimple stmt;
4302 enum tree_code code;
4303
4304 e = strip_wrap_conserving_type_conversions (e);
4305 what = strip_wrap_conserving_type_conversions (what);
4306
4307 code = TREE_CODE (what);
4308 if (TREE_TYPE (e) != TREE_TYPE (what))
4309 return false;
4310
4311 if (operand_equal_p (e, what, 0))
4312 return true;
4313
4314 if (TREE_CODE (e) != SSA_NAME)
4315 return false;
4316
4317 stmt = SSA_NAME_DEF_STMT (e);
4318 if (gimple_code (stmt) != GIMPLE_ASSIGN
4319 || gimple_assign_rhs_code (stmt) != code)
4320 return false;
4321
4322 switch (get_gimple_rhs_class (code))
4323 {
4324 case GIMPLE_BINARY_RHS:
4325 if (!expr_equal_p (gimple_assign_rhs2 (stmt), TREE_OPERAND (what, 1)))
4326 return false;
4327 /* Fallthru. */
4328
4329 case GIMPLE_UNARY_RHS:
4330 case GIMPLE_SINGLE_RHS:
4331 return expr_equal_p (gimple_assign_rhs1 (stmt), TREE_OPERAND (what, 0));
4332 default:
4333 return false;
4334 }
4335}
4336
4337/* Returns true if we can prove that BASE - OFFSET does not overflow. For now,
4338 we only detect the situation that BASE = SOMETHING + OFFSET, where the
4339 calculation is performed in non-wrapping type.
4340
4341 TODO: More generally, we could test for the situation that
4342 BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
4343 This would require knowing the sign of OFFSET.
4344
4345 Also, we only look for the first addition in the computation of BASE.
4346 More complex analysis would be better, but introducing it just for
4347 this optimization seems like an overkill. */
4348
4349static bool
4350difference_cannot_overflow_p (tree base, tree offset)
4351{
4352 enum tree_code code;
4353 tree e1, e2;
4354
4355 if (!nowrap_type_p (TREE_TYPE (base)))
4356 return false;
4357
4358 base = expand_simple_operations (base);
4359
4360 if (TREE_CODE (base) == SSA_NAME)
4361 {
4362 gimple stmt = SSA_NAME_DEF_STMT (base);
4363
4364 if (gimple_code (stmt) != GIMPLE_ASSIGN)
4365 return false;
4366
4367 code = gimple_assign_rhs_code (stmt);
4368 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4369 return false;
4370
4371 e1 = gimple_assign_rhs1 (stmt);
4372 e2 = gimple_assign_rhs2 (stmt);
4373 }
4374 else
4375 {
4376 code = TREE_CODE (base);
4377 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4378 return false;
4379 e1 = TREE_OPERAND (base, 0);
4380 e2 = TREE_OPERAND (base, 1);
4381 }
4382
4383 /* TODO: deeper inspection may be necessary to prove the equality. */
4384 switch (code)
4385 {
4386 case PLUS_EXPR:
4387 return expr_equal_p (e1, offset) || expr_equal_p (e2, offset);
4388 case POINTER_PLUS_EXPR:
4389 return expr_equal_p (e2, offset);
4390
4391 default:
4392 return false;
4393 }
4394}
4395
4396/* Tries to replace loop exit by one formulated in terms of a LT_EXPR
4397 comparison with CAND. NITER describes the number of iterations of
4398 the loops. If successful, the comparison in COMP_P is altered accordingly.
4399
4400 We aim to handle the following situation:
4401
4402 sometype *base, *p;
4403 int a, b, i;
4404
4405 i = a;
4406 p = p_0 = base + a;
4407
4408 do
4409 {
4410 bla (*p);
4411 p++;
4412 i++;
4413 }
4414 while (i < b);
4415
4416 Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
4417 We aim to optimize this to
4418
4419 p = p_0 = base + a;
4420 do
4421 {
4422 bla (*p);
4423 p++;
4424 }
4425 while (p < p_0 - a + b);
4426
4427 This preserves the correctness, since the pointer arithmetics does not
4428 overflow. More precisely:
4429
4430 1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
4431 overflow in computing it or the values of p.
4432 2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
4433 overflow. To prove this, we use the fact that p_0 = base + a. */
4434
4435static bool
4436iv_elimination_compare_lt (struct ivopts_data *data,
4437 struct iv_cand *cand, enum tree_code *comp_p,
4438 struct tree_niter_desc *niter)
4439{
4440 tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
4441 struct affine_tree_combination nit, tmpa, tmpb;
4442 enum tree_code comp;
4443 HOST_WIDE_INT step;
4444
4445 /* We need to know that the candidate induction variable does not overflow.
4446 While more complex analysis may be used to prove this, for now just
4447 check that the variable appears in the original program and that it
4448 is computed in a type that guarantees no overflows. */
4449 cand_type = TREE_TYPE (cand->iv->base);
4450 if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
4451 return false;
4452
4453 /* Make sure that the loop iterates till the loop bound is hit, as otherwise
4454 the calculation of the BOUND could overflow, making the comparison
4455 invalid. */
4456 if (!data->loop_single_exit_p)
4457 return false;
4458
4459 /* We need to be able to decide whether candidate is increasing or decreasing
4460 in order to choose the right comparison operator. */
4461 if (!cst_and_fits_in_hwi (cand->iv->step))
4462 return false;
4463 step = int_cst_value (cand->iv->step);
4464
4465 /* Check that the number of iterations matches the expected pattern:
4466 a + 1 > b ? 0 : b - a - 1. */
4467 mbz = niter->may_be_zero;
4468 if (TREE_CODE (mbz) == GT_EXPR)
4469 {
4470 /* Handle a + 1 > b. */
4471 tree op0 = TREE_OPERAND (mbz, 0);
4472 if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
4473 {
4474 a = TREE_OPERAND (op0, 0);
4475 b = TREE_OPERAND (mbz, 1);
4476 }
4477 else
4478 return false;
4479 }
4480 else if (TREE_CODE (mbz) == LT_EXPR)
4481 {
4482 tree op1 = TREE_OPERAND (mbz, 1);
4483
4484 /* Handle b < a + 1. */
4485 if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
4486 {
4487 a = TREE_OPERAND (op1, 0);
4488 b = TREE_OPERAND (mbz, 0);
4489 }
4490 else
4491 return false;
4492 }
4493 else
4494 return false;
4495
4496 /* Expected number of iterations is B - A - 1. Check that it matches
4497 the actual number, i.e., that B - A - NITER = 1. */
4498 tree_to_aff_combination (niter->niter, nit_type, &nit);
4499 tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
4500 tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
4501 aff_combination_scale (&nit, double_int_minus_one);
4502 aff_combination_scale (&tmpa, double_int_minus_one);
4503 aff_combination_add (&tmpb, &tmpa);
4504 aff_combination_add (&tmpb, &nit);
cf8f0e63 4505 if (tmpb.n != 0 || tmpb.offset != double_int_one)
08748e8d 4506 return false;
4507
4508 /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
4509 overflow. */
4510 offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
4511 cand->iv->step,
4512 fold_convert (TREE_TYPE (cand->iv->step), a));
4513 if (!difference_cannot_overflow_p (cand->iv->base, offset))
4514 return false;
4515
4516 /* Determine the new comparison operator. */
4517 comp = step < 0 ? GT_EXPR : LT_EXPR;
4518 if (*comp_p == NE_EXPR)
4519 *comp_p = comp;
4520 else if (*comp_p == EQ_EXPR)
4521 *comp_p = invert_tree_comparison (comp, false);
4522 else
4523 gcc_unreachable ();
4524
4525 return true;
4526}
4527
dec41e98 4528/* Check whether it is possible to express the condition in USE by comparison
08748e8d 4529 of candidate CAND. If so, store the value compared with to BOUND, and the
4530 comparison operator to COMP. */
dec41e98 4531
4532static bool
b091dc59 4533may_eliminate_iv (struct ivopts_data *data,
08748e8d 4534 struct iv_use *use, struct iv_cand *cand, tree *bound,
4535 enum tree_code *comp)
dec41e98 4536{
065d134e 4537 basic_block ex_bb;
dec41e98 4538 edge exit;
08748e8d 4539 tree period;
b091dc59 4540 struct loop *loop = data->current_loop;
cf661063 4541 aff_tree bnd;
eaa67821 4542 struct tree_niter_desc *desc = NULL;
1b0af592 4543
651874e1 4544 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
4545 return false;
4546
14f25899 4547 /* For now works only for exits that dominate the loop latch.
4548 TODO: extend to other conditions inside loop body. */
75a70cf9 4549 ex_bb = gimple_bb (use->stmt);
065d134e 4550 if (use->stmt != last_stmt (ex_bb)
75a70cf9 4551 || gimple_code (use->stmt) != GIMPLE_COND
4552 || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
dec41e98 4553 return false;
4554
065d134e 4555 exit = EDGE_SUCC (ex_bb, 0);
4556 if (flow_bb_inside_loop_p (loop, exit->dest))
4557 exit = EDGE_SUCC (ex_bb, 1);
4558 if (flow_bb_inside_loop_p (loop, exit->dest))
4559 return false;
4560
08748e8d 4561 desc = niter_for_exit (data, exit);
4562 if (!desc)
dec41e98 4563 return false;
4564
14f25899 4565 /* Determine whether we can use the variable to test the exit condition.
4566 This is the case iff the period of the induction variable is greater
4567 than the number of iterations for which the exit condition is true. */
b091dc59 4568 period = iv_period (cand->iv);
dec41e98 4569
14f25899 4570 /* If the number of iterations is constant, compare against it directly. */
08748e8d 4571 if (TREE_CODE (desc->niter) == INTEGER_CST)
14f25899 4572 {
eaa67821 4573 /* See cand_value_at. */
4574 if (stmt_after_increment (loop, cand, use->stmt))
4575 {
08748e8d 4576 if (!tree_int_cst_lt (desc->niter, period))
eaa67821 4577 return false;
4578 }
4579 else
4580 {
08748e8d 4581 if (tree_int_cst_lt (period, desc->niter))
eaa67821 4582 return false;
4583 }
14f25899 4584 }
4585
4586 /* If not, and if this is the only possible exit of the loop, see whether
4587 we can get a conservative estimate on the number of iterations of the
4588 entire loop and compare against that instead. */
eaa67821 4589 else
14f25899 4590 {
4591 double_int period_value, max_niter;
eaa67821 4592
4593 max_niter = desc->max;
4594 if (stmt_after_increment (loop, cand, use->stmt))
cf8f0e63 4595 max_niter += double_int_one;
14f25899 4596 period_value = tree_to_double_int (period);
cf8f0e63 4597 if (max_niter.ugt (period_value))
eaa67821 4598 {
9d75589a 4599 /* See if we can take advantage of inferred loop bound information. */
08748e8d 4600 if (data->loop_single_exit_p)
eaa67821 4601 {
fee017b3 4602 if (!max_loop_iterations (loop, &max_niter))
eaa67821 4603 return false;
4604 /* The loop bound is already adjusted by adding 1. */
cf8f0e63 4605 if (max_niter.ugt (period_value))
eaa67821 4606 return false;
4607 }
4608 else
4609 return false;
4610 }
14f25899 4611 }
4612
08748e8d 4613 cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
590f8b68 4614
cf661063 4615 *bound = aff_combination_to_tree (&bnd);
08748e8d 4616 *comp = iv_elimination_compare (data, use);
4617
590f8b68 4618 /* It is unlikely that computing the number of iterations using division
4619 would be more profitable than keeping the original induction variable. */
4620 if (expression_expensive_p (*bound))
4621 return false;
08748e8d 4622
4623 /* Sometimes, it is possible to handle the situation that the number of
4624 iterations may be zero unless additional assumtions by using <
4625 instead of != in the exit condition.
4626
4627 TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
4628 base the exit condition on it. However, that is often too
4629 expensive. */
4630 if (!integer_zerop (desc->may_be_zero))
4631 return iv_elimination_compare_lt (data, cand, comp, desc);
4632
dec41e98 4633 return true;
4634}
4635
055d6bb2 4636 /* Calculates the cost of BOUND, if it is a PARM_DECL. A PARM_DECL must
4637 be copied, if is is used in the loop body and DATA->body_includes_call. */
4638
4639static int
4640parm_decl_cost (struct ivopts_data *data, tree bound)
4641{
4642 tree sbound = bound;
4643 STRIP_NOPS (sbound);
4644
4645 if (TREE_CODE (sbound) == SSA_NAME
2f4ec87c 4646 && SSA_NAME_IS_DEFAULT_DEF (sbound)
055d6bb2 4647 && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
055d6bb2 4648 && data->body_includes_call)
4649 return COSTS_N_INSNS (1);
4650
4651 return 0;
4652}
eaa67821 4653
dec41e98 4654/* Determines cost of basing replacement of USE on CAND in a condition. */
4655
00991688 4656static bool
dec41e98 4657determine_use_iv_cost_condition (struct ivopts_data *data,
4658 struct iv_use *use, struct iv_cand *cand)
4659{
02f8cd87 4660 tree bound = NULL_TREE;
4661 struct iv *cmp_iv;
4662 bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
055d6bb2 4663 comp_cost elim_cost, express_cost, cost, bound_cost;
02f8cd87 4664 bool ok;
055d6bb2 4665 int elim_inv_expr_id = -1, express_inv_expr_id = -1, inv_expr_id;
168561ef 4666 tree *control_var, *bound_cst;
613fe554 4667 enum tree_code comp = ERROR_MARK;
dec41e98 4668
4669 /* Only consider real candidates. */
4670 if (!cand->iv)
4671 {
08748e8d 4672 set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE,
4673 ERROR_MARK, -1);
00991688 4674 return false;
dec41e98 4675 }
4676
02f8cd87 4677 /* Try iv elimination. */
08748e8d 4678 if (may_eliminate_iv (data, use, cand, &bound, &comp))
1b0af592 4679 {
4680 elim_cost = force_var_cost (data, bound, &depends_on_elim);
055d6bb2 4681 if (elim_cost.cost == 0)
4682 elim_cost.cost = parm_decl_cost (data, bound);
4683 else if (TREE_CODE (bound) == INTEGER_CST)
4684 elim_cost.cost = 0;
4685 /* If we replace a loop condition 'i < n' with 'p < base + n',
4686 depends_on_elim will have 'base' and 'n' set, which implies
4687 that both 'base' and 'n' will be live during the loop. More likely,
4688 'base + n' will be loop invariant, resulting in only one live value
4689 during the loop. So in that case we clear depends_on_elim and set
4690 elim_inv_expr_id instead. */
4691 if (depends_on_elim && bitmap_count_bits (depends_on_elim) > 1)
4692 {
4693 elim_inv_expr_id = get_expr_id (data, bound);
4694 bitmap_clear (depends_on_elim);
4695 }
1b0af592 4696 /* The bound is a loop invariant, so it will be only computed
4697 once. */
fe166695 4698 elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
1b0af592 4699 }
02f8cd87 4700 else
3c2818b0 4701 elim_cost = infinite_cost;
dec41e98 4702
02f8cd87 4703 /* Try expressing the original giv. If it is compared with an invariant,
4704 note that we cannot get rid of it. */
168561ef 4705 ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
4706 NULL, &cmp_iv);
02f8cd87 4707 gcc_assert (ok);
dec41e98 4708
168561ef 4709 /* When the condition is a comparison of the candidate IV against
4710 zero, prefer this IV.
4711
9d75589a 4712 TODO: The constant that we're subtracting from the cost should
168561ef 4713 be target-dependent. This information should be added to the
4714 target costs for each backend. */
0a5fa2b7 4715 if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */
4716 && integer_zerop (*bound_cst)
168561ef 4717 && (operand_equal_p (*control_var, cand->var_after, 0)
4718 || operand_equal_p (*control_var, cand->var_before, 0)))
4719 elim_cost.cost -= 1;
4720
02f8cd87 4721 express_cost = get_computation_cost (data, use, cand, false,
66368bf4 4722 &depends_on_express, NULL,
055d6bb2 4723 &express_inv_expr_id);
02f8cd87 4724 fd_ivopts_data = data;
4725 walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
c4d3b428 4726
055d6bb2 4727 /* Count the cost of the original bound as well. */
4728 bound_cost = force_var_cost (data, *bound_cst, NULL);
4729 if (bound_cost.cost == 0)
4730 bound_cost.cost = parm_decl_cost (data, *bound_cst);
4731 else if (TREE_CODE (*bound_cst) == INTEGER_CST)
4732 bound_cost.cost = 0;
4733 express_cost.cost += bound_cost.cost;
4734
b27dbec0 4735 /* Choose the better approach, preferring the eliminated IV. */
4736 if (compare_costs (elim_cost, express_cost) <= 0)
dec41e98 4737 {
02f8cd87 4738 cost = elim_cost;
4739 depends_on = depends_on_elim;
4740 depends_on_elim = NULL;
055d6bb2 4741 inv_expr_id = elim_inv_expr_id;
02f8cd87 4742 }
4743 else
4744 {
4745 cost = express_cost;
4746 depends_on = depends_on_express;
4747 depends_on_express = NULL;
4748 bound = NULL_TREE;
08748e8d 4749 comp = ERROR_MARK;
055d6bb2 4750 inv_expr_id = express_inv_expr_id;
dec41e98 4751 }
4752
08748e8d 4753 set_use_iv_cost (data, use, cand, cost, depends_on, bound, comp, inv_expr_id);
02f8cd87 4754
4755 if (depends_on_elim)
4756 BITMAP_FREE (depends_on_elim);
4757 if (depends_on_express)
4758 BITMAP_FREE (depends_on_express);
4759
3c2818b0 4760 return !infinite_cost_p (cost);
dec41e98 4761}
4762
00991688 4763/* Determines cost of basing replacement of USE on CAND. Returns false
4764 if USE cannot be based on CAND. */
dec41e98 4765
00991688 4766static bool
dec41e98 4767determine_use_iv_cost (struct ivopts_data *data,
4768 struct iv_use *use, struct iv_cand *cand)
4769{
4770 switch (use->type)
4771 {
4772 case USE_NONLINEAR_EXPR:
00991688 4773 return determine_use_iv_cost_generic (data, use, cand);
dec41e98 4774
dec41e98 4775 case USE_ADDRESS:
00991688 4776 return determine_use_iv_cost_address (data, use, cand);
dec41e98 4777
4778 case USE_COMPARE:
00991688 4779 return determine_use_iv_cost_condition (data, use, cand);
dec41e98 4780
4781 default:
8c0963c4 4782 gcc_unreachable ();
dec41e98 4783 }
4784}
4785
3ee48c5c 4786/* Return true if get_computation_cost indicates that autoincrement is
4787 a possibility for the pair of USE and CAND, false otherwise. */
4788
4789static bool
4790autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
4791 struct iv_cand *cand)
4792{
4793 bitmap depends_on;
4794 bool can_autoinc;
4795 comp_cost cost;
4796
4797 if (use->type != USE_ADDRESS)
4798 return false;
4799
4800 cost = get_computation_cost (data, use, cand, true, &depends_on,
66368bf4 4801 &can_autoinc, NULL);
3ee48c5c 4802
4803 BITMAP_FREE (depends_on);
4804
4805 return !infinite_cost_p (cost) && can_autoinc;
4806}
4807
4808/* Examine IP_ORIGINAL candidates to see if they are incremented next to a
4809 use that allows autoincrement, and set their AINC_USE if possible. */
4810
4811static void
4812set_autoinc_for_original_candidates (struct ivopts_data *data)
4813{
4814 unsigned i, j;
4815
4816 for (i = 0; i < n_iv_cands (data); i++)
4817 {
4818 struct iv_cand *cand = iv_cand (data, i);
4819 struct iv_use *closest = NULL;
4820 if (cand->pos != IP_ORIGINAL)
4821 continue;
4822 for (j = 0; j < n_iv_uses (data); j++)
4823 {
4824 struct iv_use *use = iv_use (data, j);
4825 unsigned uid = gimple_uid (use->stmt);
4826 if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at)
4827 || uid > gimple_uid (cand->incremented_at))
4828 continue;
4829 if (closest == NULL || uid > gimple_uid (closest->stmt))
4830 closest = use;
4831 }
4832 if (closest == NULL || !autoinc_possible_for_pair (data, closest, cand))
4833 continue;
4834 cand->ainc_use = closest;
4835 }
4836}
4837
4838/* Finds the candidates for the induction variables. */
4839
4840static void
4841find_iv_candidates (struct ivopts_data *data)
4842{
4843 /* Add commonly used ivs. */
4844 add_standard_iv_candidates (data);
4845
4846 /* Add old induction variables. */
4847 add_old_ivs_candidates (data);
4848
4849 /* Add induction variables derived from uses. */
4850 add_derived_ivs_candidates (data);
4851
4852 set_autoinc_for_original_candidates (data);
4853
4854 /* Record the important candidates. */
4855 record_important_candidates (data);
4856}
4857
dec41e98 4858/* Determines costs of basing the use of the iv on an iv candidate. */
4859
4860static void
4861determine_use_iv_costs (struct ivopts_data *data)
4862{
4863 unsigned i, j;
4864 struct iv_use *use;
4865 struct iv_cand *cand;
27335ffd 4866 bitmap to_clear = BITMAP_ALLOC (NULL);
dec41e98 4867
4868 alloc_use_cost_map (data);
4869
dec41e98 4870 for (i = 0; i < n_iv_uses (data); i++)
4871 {
4872 use = iv_use (data, i);
4873
4874 if (data->consider_all_candidates)
4875 {
4876 for (j = 0; j < n_iv_cands (data); j++)
4877 {
4878 cand = iv_cand (data, j);
4879 determine_use_iv_cost (data, use, cand);
4880 }
4881 }
4882 else
4883 {
0cc4271a 4884 bitmap_iterator bi;
4885
4886 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
dec41e98 4887 {
4888 cand = iv_cand (data, j);
00991688 4889 if (!determine_use_iv_cost (data, use, cand))
4890 bitmap_set_bit (to_clear, j);
0cc4271a 4891 }
00991688 4892
4893 /* Remove the candidates for that the cost is infinite from
4894 the list of related candidates. */
4895 bitmap_and_compl_into (use->related_cands, to_clear);
4896 bitmap_clear (to_clear);
dec41e98 4897 }
4898 }
4899
27335ffd 4900 BITMAP_FREE (to_clear);
00991688 4901
dec41e98 4902 if (dump_file && (dump_flags & TDF_DETAILS))
4903 {
4904 fprintf (dump_file, "Use-candidate costs:\n");
4905
4906 for (i = 0; i < n_iv_uses (data); i++)
4907 {
4908 use = iv_use (data, i);
4909
4910 fprintf (dump_file, "Use %d:\n", i);
3c2818b0 4911 fprintf (dump_file, " cand\tcost\tcompl.\tdepends on\n");
dec41e98 4912 for (j = 0; j < use->n_map_members; j++)
4913 {
4914 if (!use->cost_map[j].cand
3c2818b0 4915 || infinite_cost_p (use->cost_map[j].cost))
dec41e98 4916 continue;
4917
3c2818b0 4918 fprintf (dump_file, " %d\t%d\t%d\t",
dec41e98 4919 use->cost_map[j].cand->id,
3c2818b0 4920 use->cost_map[j].cost.cost,
4921 use->cost_map[j].cost.complexity);
dec41e98 4922 if (use->cost_map[j].depends_on)
4923 bitmap_print (dump_file,
4924 use->cost_map[j].depends_on, "","");
66368bf4 4925 if (use->cost_map[j].inv_expr_id != -1)
4926 fprintf (dump_file, " inv_expr:%d", use->cost_map[j].inv_expr_id);
dec41e98 4927 fprintf (dump_file, "\n");
4928 }
4929
4930 fprintf (dump_file, "\n");
4931 }
4932 fprintf (dump_file, "\n");
4933 }
4934}
4935
4936/* Determines cost of the candidate CAND. */
4937
4938static void
4939determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
4940{
3c2818b0 4941 comp_cost cost_base;
4942 unsigned cost, cost_step;
5414270a 4943 tree base;
dec41e98 4944
4945 if (!cand->iv)
4946 {
4947 cand->cost = 0;
4948 return;
4949 }
4950
4951 /* There are two costs associated with the candidate -- its increment
4952 and its initialization. The second is almost negligible for any loop
4953 that rolls enough, so we take it just very little into account. */
4954
4955 base = cand->iv->base;
4956 cost_base = force_var_cost (data, base, NULL);
ee36045b 4957 /* It will be exceptional that the iv register happens to be initialized with
4958 the proper value at no cost. In general, there will at least be a regcopy
4959 or a const set. */
4960 if (cost_base.cost == 0)
4961 cost_base.cost = COSTS_N_INSNS (1);
49a71e58 4962 cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
dec41e98 4963
fe166695 4964 cost = cost_step + adjust_setup_cost (data, cost_base.cost);
dec41e98 4965
3c2818b0 4966 /* Prefer the original ivs unless we may gain something by replacing it.
f0b5f617 4967 The reason is to make debugging simpler; so this is not relevant for
3c2818b0 4968 artificial ivs created by other optimization passes. */
4969 if (cand->pos != IP_ORIGINAL
ec11736b 4970 || !SSA_NAME_VAR (cand->var_before)
3c2818b0 4971 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
4972 cost++;
48e1416a 4973
dec41e98 4974 /* Prefer not to insert statements into latch unless there are some
4975 already (so that we do not create unnecessary jumps). */
5414270a 4976 if (cand->pos == IP_END
4977 && empty_block_p (ip_end_pos (data->current_loop)))
3c2818b0 4978 cost++;
4979
4980 cand->cost = cost;
3ee48c5c 4981 cand->cost_step = cost_step;
dec41e98 4982}
4983
4984/* Determines costs of computation of the candidates. */
4985
4986static void
4987determine_iv_costs (struct ivopts_data *data)
4988{
4989 unsigned i;
4990
4991 if (dump_file && (dump_flags & TDF_DETAILS))
4992 {
4993 fprintf (dump_file, "Candidate costs:\n");
4994 fprintf (dump_file, " cand\tcost\n");
4995 }
4996
4997 for (i = 0; i < n_iv_cands (data); i++)
4998 {
4999 struct iv_cand *cand = iv_cand (data, i);
5000
5001 determine_iv_cost (data, cand);
5002
5003 if (dump_file && (dump_flags & TDF_DETAILS))
5004 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
5005 }
3ee48c5c 5006
ee9e7ecf 5007 if (dump_file && (dump_flags & TDF_DETAILS))
5008 fprintf (dump_file, "\n");
dec41e98 5009}
5010
5011/* Calculates cost for having SIZE induction variables. */
5012
5013static unsigned
5014ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
5015{
25153338 5016 /* We add size to the cost, so that we prefer eliminating ivs
5017 if possible. */
a6b74a67 5018 return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed,
5019 data->body_includes_call);
dec41e98 5020}
5021
5022/* For each size of the induction variable set determine the penalty. */
5023
5024static void
5025determine_set_costs (struct ivopts_data *data)
5026{
5027 unsigned j, n;
75a70cf9 5028 gimple phi;
5029 gimple_stmt_iterator psi;
5030 tree op;
dec41e98 5031 struct loop *loop = data->current_loop;
0cc4271a 5032 bitmap_iterator bi;
dec41e98 5033
dec41e98 5034 if (dump_file && (dump_flags & TDF_DETAILS))
5035 {
5036 fprintf (dump_file, "Global costs:\n");
5037 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
a6b74a67 5038 fprintf (dump_file, " target_clobbered_regs %d\n", target_clobbered_regs);
f529eb25 5039 fprintf (dump_file, " target_reg_cost %d\n", target_reg_cost[data->speed]);
5040 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost[data->speed]);
dec41e98 5041 }
5042
5043 n = 0;
75a70cf9 5044 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
dec41e98 5045 {
75a70cf9 5046 phi = gsi_stmt (psi);
dec41e98 5047 op = PHI_RESULT (phi);
5048
7c782c9b 5049 if (virtual_operand_p (op))
dec41e98 5050 continue;
5051
5052 if (get_iv (data, op))
5053 continue;
5054
5055 n++;
5056 }
5057
0cc4271a 5058 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
dec41e98 5059 {
5060 struct version_info *info = ver_info (data, j);
5061
5062 if (info->inv_id && info->has_nonlin_use)
5063 n++;
0cc4271a 5064 }
dec41e98 5065
fe382241 5066 data->regs_used = n;
dec41e98 5067 if (dump_file && (dump_flags & TDF_DETAILS))
5068 fprintf (dump_file, " regs_used %d\n", n);
5069
5070 if (dump_file && (dump_flags & TDF_DETAILS))
5071 {
5072 fprintf (dump_file, " cost for size:\n");
5073 fprintf (dump_file, " ivs\tcost\n");
5074 for (j = 0; j <= 2 * target_avail_regs; j++)
5075 fprintf (dump_file, " %d\t%d\n", j,
5076 ivopts_global_cost_for_size (data, j));
5077 fprintf (dump_file, "\n");
5078 }
5079}
5080
00991688 5081/* Returns true if A is a cheaper cost pair than B. */
dec41e98 5082
00991688 5083static bool
5084cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
dec41e98 5085{
3c2818b0 5086 int cmp;
5087
00991688 5088 if (!a)
5089 return false;
dec41e98 5090
00991688 5091 if (!b)
5092 return true;
5093
3c2818b0 5094 cmp = compare_costs (a->cost, b->cost);
5095 if (cmp < 0)
00991688 5096 return true;
5097
3c2818b0 5098 if (cmp > 0)
00991688 5099 return false;
5100
5101 /* In case the costs are the same, prefer the cheaper candidate. */
5102 if (a->cand->cost < b->cand->cost)
5103 return true;
5104
5105 return false;
5106}
5107
66368bf4 5108
5109/* Returns candidate by that USE is expressed in IVS. */
5110
5111static struct cost_pair *
5112iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
5113{
5114 return ivs->cand_for_use[use->id];
5115}
5116
00991688 5117/* Computes the cost field of IVS structure. */
5118
5119static void
5120iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
5121{
3c2818b0 5122 comp_cost cost = ivs->cand_use_cost;
66368bf4 5123
3c2818b0 5124 cost.cost += ivs->cand_cost;
66368bf4 5125
66368bf4 5126 cost.cost += ivopts_global_cost_for_size (data,
7d5663e4 5127 ivs->n_regs + ivs->num_used_inv_expr);
00991688 5128
5129 ivs->cost = cost;
5130}
5131
651874e1 5132/* Remove invariants in set INVS to set IVS. */
5133
5134static void
5135iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
5136{
5137 bitmap_iterator bi;
5138 unsigned iid;
5139
5140 if (!invs)
5141 return;
5142
5143 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5144 {
5145 ivs->n_invariant_uses[iid]--;
5146 if (ivs->n_invariant_uses[iid] == 0)
66368bf4 5147 ivs->n_regs--;
651874e1 5148 }
5149}
5150
00991688 5151/* Set USE not to be expressed by any candidate in IVS. */
5152
5153static void
5154iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
5155 struct iv_use *use)
5156{
651874e1 5157 unsigned uid = use->id, cid;
00991688 5158 struct cost_pair *cp;
00991688 5159
5160 cp = ivs->cand_for_use[uid];
5161 if (!cp)
5162 return;
5163 cid = cp->cand->id;
5164
5165 ivs->bad_uses++;
5166 ivs->cand_for_use[uid] = NULL;
5167 ivs->n_cand_uses[cid]--;
5168
5169 if (ivs->n_cand_uses[cid] == 0)
dec41e98 5170 {
00991688 5171 bitmap_clear_bit (ivs->cands, cid);
5172 /* Do not count the pseudocandidates. */
5173 if (cp->cand->iv)
5174 ivs->n_regs--;
13ea2c1f 5175 ivs->n_cands--;
00991688 5176 ivs->cand_cost -= cp->cand->cost;
651874e1 5177
5178 iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
00991688 5179 }
5180
3c2818b0 5181 ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
00991688 5182
651874e1 5183 iv_ca_set_remove_invariants (ivs, cp->depends_on);
7d5663e4 5184
5185 if (cp->inv_expr_id != -1)
5186 {
5187 ivs->used_inv_expr[cp->inv_expr_id]--;
5188 if (ivs->used_inv_expr[cp->inv_expr_id] == 0)
5189 ivs->num_used_inv_expr--;
5190 }
651874e1 5191 iv_ca_recount_cost (data, ivs);
5192}
5193
5194/* Add invariants in set INVS to set IVS. */
0a501ca6 5195
651874e1 5196static void
5197iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
5198{
5199 bitmap_iterator bi;
5200 unsigned iid;
5201
5202 if (!invs)
5203 return;
5204
5205 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
00991688 5206 {
651874e1 5207 ivs->n_invariant_uses[iid]++;
5208 if (ivs->n_invariant_uses[iid] == 1)
66368bf4 5209 ivs->n_regs++;
dec41e98 5210 }
00991688 5211}
5212
5213/* Set cost pair for USE in set IVS to CP. */
5214
5215static void
5216iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
5217 struct iv_use *use, struct cost_pair *cp)
5218{
651874e1 5219 unsigned uid = use->id, cid;
00991688 5220
5221 if (ivs->cand_for_use[uid] == cp)
5222 return;
5223
5224 if (ivs->cand_for_use[uid])
5225 iv_ca_set_no_cp (data, ivs, use);
5226
5227 if (cp)
dec41e98 5228 {
00991688 5229 cid = cp->cand->id;
dec41e98 5230
00991688 5231 ivs->bad_uses--;
5232 ivs->cand_for_use[uid] = cp;
5233 ivs->n_cand_uses[cid]++;
5234 if (ivs->n_cand_uses[cid] == 1)
dec41e98 5235 {
00991688 5236 bitmap_set_bit (ivs->cands, cid);
5237 /* Do not count the pseudocandidates. */
5238 if (cp->cand->iv)
5239 ivs->n_regs++;
13ea2c1f 5240 ivs->n_cands++;
00991688 5241 ivs->cand_cost += cp->cand->cost;
00991688 5242
651874e1 5243 iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
dec41e98 5244 }
5245
3c2818b0 5246 ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
651874e1 5247 iv_ca_set_add_invariants (ivs, cp->depends_on);
7d5663e4 5248
5249 if (cp->inv_expr_id != -1)
5250 {
5251 ivs->used_inv_expr[cp->inv_expr_id]++;
5252 if (ivs->used_inv_expr[cp->inv_expr_id] == 1)
5253 ivs->num_used_inv_expr++;
5254 }
00991688 5255 iv_ca_recount_cost (data, ivs);
0cc4271a 5256 }
00991688 5257}
5258
5259/* Extend set IVS by expressing USE by some of the candidates in it
66368bf4 5260 if possible. All important candidates will be considered
5261 if IMPORTANT_CANDIDATES is true. */
00991688 5262
5263static void
5264iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
66368bf4 5265 struct iv_use *use, bool important_candidates)
00991688 5266{
5267 struct cost_pair *best_cp = NULL, *cp;
5268 bitmap_iterator bi;
66368bf4 5269 bitmap cands;
00991688 5270 unsigned i;
dec41e98 5271
00991688 5272 gcc_assert (ivs->upto >= use->id);
5273
5274 if (ivs->upto == use->id)
5275 {
5276 ivs->upto++;
5277 ivs->bad_uses++;
5278 }
dec41e98 5279
66368bf4 5280 cands = (important_candidates ? data->important_candidates : ivs->cands);
5281 EXECUTE_IF_SET_IN_BITMAP (cands, 0, i, bi)
00991688 5282 {
66368bf4 5283 struct iv_cand *cand = iv_cand (data, i);
5284
5285 cp = get_use_iv_cost (data, use, cand);
dec41e98 5286
00991688 5287 if (cheaper_cost_pair (cp, best_cp))
5288 best_cp = cp;
5289 }
dec41e98 5290
00991688 5291 iv_ca_set_cp (data, ivs, use, best_cp);
dec41e98 5292}
5293
00991688 5294/* Get cost for assignment IVS. */
dec41e98 5295
3c2818b0 5296static comp_cost
00991688 5297iv_ca_cost (struct iv_ca *ivs)
5298{
18381619 5299 /* This was a conditional expression but it triggered a bug in
5300 Sun C 5.5. */
ce6cd837 5301 if (ivs->bad_uses)
5302 return infinite_cost;
5303 else
5304 return ivs->cost;
00991688 5305}
5306
5307/* Returns true if all dependences of CP are among invariants in IVS. */
5308
5309static bool
5310iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
dec41e98 5311{
5312 unsigned i;
0cc4271a 5313 bitmap_iterator bi;
dec41e98 5314
00991688 5315 if (!cp->depends_on)
5316 return true;
5317
5318 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
dec41e98 5319 {
00991688 5320 if (ivs->n_invariant_uses[i] == 0)
5321 return false;
5322 }
5323
5324 return true;
5325}
5326
5327/* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
5328 it before NEXT_CHANGE. */
5329
5330static struct iv_ca_delta *
5331iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
5332 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
5333{
4c36ffe6 5334 struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
00991688 5335
5336 change->use = use;
5337 change->old_cp = old_cp;
5338 change->new_cp = new_cp;
5339 change->next_change = next_change;
5340
5341 return change;
5342}
5343
13ea2c1f 5344/* Joins two lists of changes L1 and L2. Destructive -- old lists
f7f07c95 5345 are rewritten. */
13ea2c1f 5346
5347static struct iv_ca_delta *
5348iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
5349{
5350 struct iv_ca_delta *last;
5351
5352 if (!l2)
5353 return l1;
5354
5355 if (!l1)
5356 return l2;
5357
5358 for (last = l1; last->next_change; last = last->next_change)
5359 continue;
5360 last->next_change = l2;
5361
5362 return l1;
5363}
5364
13ea2c1f 5365/* Reverse the list of changes DELTA, forming the inverse to it. */
5366
5367static struct iv_ca_delta *
5368iv_ca_delta_reverse (struct iv_ca_delta *delta)
5369{
5370 struct iv_ca_delta *act, *next, *prev = NULL;
5371 struct cost_pair *tmp;
5372
5373 for (act = delta; act; act = next)
5374 {
5375 next = act->next_change;
5376 act->next_change = prev;
5377 prev = act;
5378
5379 tmp = act->old_cp;
5380 act->old_cp = act->new_cp;
5381 act->new_cp = tmp;
5382 }
5383
5384 return prev;
5385}
5386
00991688 5387/* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
5388 reverted instead. */
5389
5390static void
5391iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
5392 struct iv_ca_delta *delta, bool forward)
5393{
5394 struct cost_pair *from, *to;
13ea2c1f 5395 struct iv_ca_delta *act;
00991688 5396
13ea2c1f 5397 if (!forward)
5398 delta = iv_ca_delta_reverse (delta);
00991688 5399
13ea2c1f 5400 for (act = delta; act; act = act->next_change)
5401 {
5402 from = act->old_cp;
5403 to = act->new_cp;
5404 gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
5405 iv_ca_set_cp (data, ivs, act->use, to);
dec41e98 5406 }
13ea2c1f 5407
5408 if (!forward)
5409 iv_ca_delta_reverse (delta);
00991688 5410}
dec41e98 5411
00991688 5412/* Returns true if CAND is used in IVS. */
dec41e98 5413
00991688 5414static bool
5415iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
5416{
5417 return ivs->n_cand_uses[cand->id] > 0;
5418}
dec41e98 5419
13ea2c1f 5420/* Returns number of induction variable candidates in the set IVS. */
5421
5422static unsigned
5423iv_ca_n_cands (struct iv_ca *ivs)
5424{
5425 return ivs->n_cands;
5426}
5427
00991688 5428/* Free the list of changes DELTA. */
5429
5430static void
5431iv_ca_delta_free (struct iv_ca_delta **delta)
5432{
5433 struct iv_ca_delta *act, *next;
5434
5435 for (act = *delta; act; act = next)
0cc4271a 5436 {
00991688 5437 next = act->next_change;
5438 free (act);
0cc4271a 5439 }
dec41e98 5440
00991688 5441 *delta = NULL;
5442}
5443
5444/* Allocates new iv candidates assignment. */
5445
5446static struct iv_ca *
5447iv_ca_new (struct ivopts_data *data)
5448{
4c36ffe6 5449 struct iv_ca *nw = XNEW (struct iv_ca);
dec41e98 5450
00991688 5451 nw->upto = 0;
5452 nw->bad_uses = 0;
4c36ffe6 5453 nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
5454 nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
27335ffd 5455 nw->cands = BITMAP_ALLOC (NULL);
13ea2c1f 5456 nw->n_cands = 0;
00991688 5457 nw->n_regs = 0;
8ac8f5ae 5458 nw->cand_use_cost = no_cost;
00991688 5459 nw->cand_cost = 0;
4c36ffe6 5460 nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
8ac8f5ae 5461 nw->cost = no_cost;
7d5663e4 5462 nw->used_inv_expr = XCNEWVEC (unsigned, data->inv_expr_id + 1);
5463 nw->num_used_inv_expr = 0;
00991688 5464
5465 return nw;
5466}
5467
5468/* Free memory occupied by the set IVS. */
5469
5470static void
5471iv_ca_free (struct iv_ca **ivs)
5472{
5473 free ((*ivs)->cand_for_use);
5474 free ((*ivs)->n_cand_uses);
27335ffd 5475 BITMAP_FREE ((*ivs)->cands);
00991688 5476 free ((*ivs)->n_invariant_uses);
7d5663e4 5477 free ((*ivs)->used_inv_expr);
00991688 5478 free (*ivs);
5479 *ivs = NULL;
5480}
5481
5482/* Dumps IVS to FILE. */
5483
5484static void
5485iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
5486{
5487 const char *pref = " invariants ";
5488 unsigned i;
3c2818b0 5489 comp_cost cost = iv_ca_cost (ivs);
00991688 5490
66368bf4 5491 fprintf (file, " cost: %d (complexity %d)\n", cost.cost, cost.complexity);
5492 fprintf (file, " cand_cost: %d\n cand_use_cost: %d (complexity %d)\n",
5493 ivs->cand_cost, ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
5494 bitmap_print (file, ivs->cands, " candidates: ","\n");
5495
5496 for (i = 0; i < ivs->upto; i++)
5497 {
5498 struct iv_use *use = iv_use (data, i);
5499 struct cost_pair *cp = iv_ca_cand_for_use (ivs, use);
5500 if (cp)
5501 fprintf (file, " use:%d --> iv_cand:%d, cost=(%d,%d)\n",
5502 use->id, cp->cand->id, cp->cost.cost, cp->cost.complexity);
5503 else
5504 fprintf (file, " use:%d --> ??\n", use->id);
5505 }
00991688 5506
5507 for (i = 1; i <= data->max_inv_id; i++)
5508 if (ivs->n_invariant_uses[i])
5509 {
5510 fprintf (file, "%s%d", pref, i);
5511 pref = ", ";
5512 }
66368bf4 5513 fprintf (file, "\n\n");
00991688 5514}
5515
5516/* Try changing candidate in IVS to CAND for each use. Return cost of the
13ea2c1f 5517 new set, and store differences in DELTA. Number of induction variables
66368bf4 5518 in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
5519 the function will try to find a solution with mimimal iv candidates. */
00991688 5520
3c2818b0 5521static comp_cost
00991688 5522iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
13ea2c1f 5523 struct iv_cand *cand, struct iv_ca_delta **delta,
66368bf4 5524 unsigned *n_ivs, bool min_ncand)
00991688 5525{
3c2818b0 5526 unsigned i;
5527 comp_cost cost;
00991688 5528 struct iv_use *use;
5529 struct cost_pair *old_cp, *new_cp;
5530
5531 *delta = NULL;
5532 for (i = 0; i < ivs->upto; i++)
5533 {
5534 use = iv_use (data, i);
5535 old_cp = iv_ca_cand_for_use (ivs, use);
5536
5537 if (old_cp
5538 && old_cp->cand == cand)
5539 continue;
5540
5541 new_cp = get_use_iv_cost (data, use, cand);
5542 if (!new_cp)
5543 continue;
5544
66368bf4 5545 if (!min_ncand && !iv_ca_has_deps (ivs, new_cp))
00991688 5546 continue;
48e1416a 5547
66368bf4 5548 if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp))
5549 continue;
00991688 5550
5551 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5552 }
5553
5554 iv_ca_delta_commit (data, ivs, *delta, true);
5555 cost = iv_ca_cost (ivs);
13ea2c1f 5556 if (n_ivs)
5557 *n_ivs = iv_ca_n_cands (ivs);
00991688 5558 iv_ca_delta_commit (data, ivs, *delta, false);
dec41e98 5559
5560 return cost;
5561}
5562
d2aa5f30 5563/* Try narrowing set IVS by removing CAND. Return the cost of
00991688 5564 the new set and store the differences in DELTA. */
dec41e98 5565
3c2818b0 5566static comp_cost
00991688 5567iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
5568 struct iv_cand *cand, struct iv_ca_delta **delta)
dec41e98 5569{
00991688 5570 unsigned i, ci;
5571 struct iv_use *use;
5572 struct cost_pair *old_cp, *new_cp, *cp;
5573 bitmap_iterator bi;
5574 struct iv_cand *cnd;
3c2818b0 5575 comp_cost cost;
00991688 5576
5577 *delta = NULL;
5578 for (i = 0; i < n_iv_uses (data); i++)
5579 {
5580 use = iv_use (data, i);
5581
5582 old_cp = iv_ca_cand_for_use (ivs, use);
5583 if (old_cp->cand != cand)
5584 continue;
5585
5586 new_cp = NULL;
5587
5588 if (data->consider_all_candidates)
5589 {
5590 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
5591 {
5592 if (ci == cand->id)
5593 continue;
5594
5595 cnd = iv_cand (data, ci);
5596
5597 cp = get_use_iv_cost (data, use, cnd);
5598 if (!cp)
5599 continue;
66368bf4 5600
00991688 5601 if (!iv_ca_has_deps (ivs, cp))
66368bf4 5602 continue;
48e1416a 5603
00991688 5604 if (!cheaper_cost_pair (cp, new_cp))
5605 continue;
5606
5607 new_cp = cp;
5608 }
5609 }
5610 else
5611 {
5612 EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
5613 {
5614 if (ci == cand->id)
5615 continue;
5616
5617 cnd = iv_cand (data, ci);
5618
5619 cp = get_use_iv_cost (data, use, cnd);
5620 if (!cp)
5621 continue;
5622 if (!iv_ca_has_deps (ivs, cp))
5623 continue;
48e1416a 5624
00991688 5625 if (!cheaper_cost_pair (cp, new_cp))
5626 continue;
5627
5628 new_cp = cp;
5629 }
5630 }
5631
5632 if (!new_cp)
5633 {
5634 iv_ca_delta_free (delta);
3c2818b0 5635 return infinite_cost;
00991688 5636 }
5637
5638 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5639 }
5640
5641 iv_ca_delta_commit (data, ivs, *delta, true);
5642 cost = iv_ca_cost (ivs);
5643 iv_ca_delta_commit (data, ivs, *delta, false);
5644
5645 return cost;
dec41e98 5646}
5647
13ea2c1f 5648/* Try optimizing the set of candidates IVS by removing candidates different
5649 from to EXCEPT_CAND from it. Return cost of the new set, and store
5650 differences in DELTA. */
5651
3c2818b0 5652static comp_cost
13ea2c1f 5653iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
5654 struct iv_cand *except_cand, struct iv_ca_delta **delta)
5655{
5656 bitmap_iterator bi;
5657 struct iv_ca_delta *act_delta, *best_delta;
3c2818b0 5658 unsigned i;
5659 comp_cost best_cost, acost;
13ea2c1f 5660 struct iv_cand *cand;
5661
5662 best_delta = NULL;
5663 best_cost = iv_ca_cost (ivs);
5664
5665 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5666 {
5667 cand = iv_cand (data, i);
5668
5669 if (cand == except_cand)
5670 continue;
5671
5672 acost = iv_ca_narrow (data, ivs, cand, &act_delta);
5673
3c2818b0 5674 if (compare_costs (acost, best_cost) < 0)
13ea2c1f 5675 {
5676 best_cost = acost;
5677 iv_ca_delta_free (&best_delta);
5678 best_delta = act_delta;
5679 }
5680 else
5681 iv_ca_delta_free (&act_delta);
5682 }
5683
5684 if (!best_delta)
5685 {
5686 *delta = NULL;
5687 return best_cost;
5688 }
5689
5690 /* Recurse to possibly remove other unnecessary ivs. */
5691 iv_ca_delta_commit (data, ivs, best_delta, true);
5692 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
5693 iv_ca_delta_commit (data, ivs, best_delta, false);
5694 *delta = iv_ca_delta_join (best_delta, *delta);
5695 return best_cost;
5696}
5697
00991688 5698/* Tries to extend the sets IVS in the best possible way in order
fdd153a1 5699 to express the USE. If ORIGINALP is true, prefer candidates from
5700 the original set of IVs, otherwise favor important candidates not
5701 based on any memory object. */
dec41e98 5702
5703static bool
00991688 5704try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
fdd153a1 5705 struct iv_use *use, bool originalp)
dec41e98 5706{
3c2818b0 5707 comp_cost best_cost, act_cost;
dec41e98 5708 unsigned i;
dede8dcc 5709 bitmap_iterator bi;
5710 struct iv_cand *cand;
00991688 5711 struct iv_ca_delta *best_delta = NULL, *act_delta;
5712 struct cost_pair *cp;
5713
66368bf4 5714 iv_ca_add_use (data, ivs, use, false);
00991688 5715 best_cost = iv_ca_cost (ivs);
dec41e98 5716
00991688 5717 cp = iv_ca_cand_for_use (ivs, use);
66368bf4 5718 if (!cp)
5719 {
5720 ivs->upto--;
5721 ivs->bad_uses--;
5722 iv_ca_add_use (data, ivs, use, true);
5723 best_cost = iv_ca_cost (ivs);
5724 cp = iv_ca_cand_for_use (ivs, use);
5725 }
00991688 5726 if (cp)
5727 {
5728 best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
5729 iv_ca_set_no_cp (data, ivs, use);
5730 }
dec41e98 5731
fdd153a1 5732 /* If ORIGINALP is true, try to find the original IV for the use. Otherwise
5733 first try important candidates not based on any memory object. Only if
3c2818b0 5734 this fails, try the specific ones. Rationale -- in loops with many
5735 variables the best choice often is to use just one generic biv. If we
5736 added here many ivs specific to the uses, the optimization algorithm later
5737 would be likely to get stuck in a local minimum, thus causing us to create
5738 too many ivs. The approach from few ivs to more seems more likely to be
5739 successful -- starting from few ivs, replacing an expensive use by a
5740 specific iv should always be a win. */
dede8dcc 5741 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
dec41e98 5742 {
dede8dcc 5743 cand = iv_cand (data, i);
5744
fdd153a1 5745 if (originalp && cand->pos !=IP_ORIGINAL)
5746 continue;
5747
5748 if (!originalp && cand->iv->base_object != NULL_TREE)
3c2818b0 5749 continue;
5750
00991688 5751 if (iv_ca_cand_used_p (ivs, cand))
66368bf4 5752 continue;
dec41e98 5753
00991688 5754 cp = get_use_iv_cost (data, use, cand);
5755 if (!cp)
5756 continue;
5757
5758 iv_ca_set_cp (data, ivs, use, cp);
66368bf4 5759 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
5760 true);
00991688 5761 iv_ca_set_no_cp (data, ivs, use);
5762 act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
dec41e98 5763
3c2818b0 5764 if (compare_costs (act_cost, best_cost) < 0)
dec41e98 5765 {
5766 best_cost = act_cost;
00991688 5767
5768 iv_ca_delta_free (&best_delta);
5769 best_delta = act_delta;
dec41e98 5770 }
00991688 5771 else
5772 iv_ca_delta_free (&act_delta);
dec41e98 5773 }
5774
3c2818b0 5775 if (infinite_cost_p (best_cost))
dede8dcc 5776 {
5777 for (i = 0; i < use->n_map_members; i++)
5778 {
5779 cp = use->cost_map + i;
00991688 5780 cand = cp->cand;
5781 if (!cand)
dede8dcc 5782 continue;
5783
5784 /* Already tried this. */
fdd153a1 5785 if (cand->important)
5786 {
5787 if (originalp && cand->pos == IP_ORIGINAL)
5788 continue;
5789 if (!originalp && cand->iv->base_object == NULL_TREE)
5790 continue;
5791 }
48e1416a 5792
00991688 5793 if (iv_ca_cand_used_p (ivs, cand))
dede8dcc 5794 continue;
5795
00991688 5796 act_delta = NULL;
5797 iv_ca_set_cp (data, ivs, use, cp);
66368bf4 5798 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
00991688 5799 iv_ca_set_no_cp (data, ivs, use);
5800 act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
5801 cp, act_delta);
dede8dcc 5802
3c2818b0 5803 if (compare_costs (act_cost, best_cost) < 0)
dede8dcc 5804 {
5805 best_cost = act_cost;
00991688 5806
5807 if (best_delta)
5808 iv_ca_delta_free (&best_delta);
5809 best_delta = act_delta;
dede8dcc 5810 }
00991688 5811 else
5812 iv_ca_delta_free (&act_delta);
dede8dcc 5813 }
5814 }
5815
00991688 5816 iv_ca_delta_commit (data, ivs, best_delta, true);
5817 iv_ca_delta_free (&best_delta);
dec41e98 5818
3c2818b0 5819 return !infinite_cost_p (best_cost);
dec41e98 5820}
5821
00991688 5822/* Finds an initial assignment of candidates to uses. */
dec41e98 5823
00991688 5824static struct iv_ca *
fdd153a1 5825get_initial_solution (struct ivopts_data *data, bool originalp)
dec41e98 5826{
00991688 5827 struct iv_ca *ivs = iv_ca_new (data);
dec41e98 5828 unsigned i;
5829
5830 for (i = 0; i < n_iv_uses (data); i++)
fdd153a1 5831 if (!try_add_cand_for (data, ivs, iv_use (data, i), originalp))
00991688 5832 {
5833 iv_ca_free (&ivs);
5834 return NULL;
5835 }
dec41e98 5836
00991688 5837 return ivs;
dec41e98 5838}
5839
00991688 5840/* Tries to improve set of induction variables IVS. */
dec41e98 5841
5842static bool
00991688 5843try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
dec41e98 5844{
3c2818b0 5845 unsigned i, n_ivs;
5846 comp_cost acost, best_cost = iv_ca_cost (ivs);
13ea2c1f 5847 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
00991688 5848 struct iv_cand *cand;
dec41e98 5849
13ea2c1f 5850 /* Try extending the set of induction variables by one. */
dec41e98 5851 for (i = 0; i < n_iv_cands (data); i++)
5852 {
00991688 5853 cand = iv_cand (data, i);
48e1416a 5854
00991688 5855 if (iv_ca_cand_used_p (ivs, cand))
13ea2c1f 5856 continue;
5857
66368bf4 5858 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
13ea2c1f 5859 if (!act_delta)
5860 continue;
5861
5862 /* If we successfully added the candidate and the set is small enough,
5863 try optimizing it by removing other candidates. */
5864 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
5865 {
5866 iv_ca_delta_commit (data, ivs, act_delta, true);
5867 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
5868 iv_ca_delta_commit (data, ivs, act_delta, false);
5869 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
5870 }
dec41e98 5871
3c2818b0 5872 if (compare_costs (acost, best_cost) < 0)
dec41e98 5873 {
00991688 5874 best_cost = acost;
13ea2c1f 5875 iv_ca_delta_free (&best_delta);
00991688 5876 best_delta = act_delta;
dec41e98 5877 }
dec41e98 5878 else
00991688 5879 iv_ca_delta_free (&act_delta);
dec41e98 5880 }
5881
00991688 5882 if (!best_delta)
13ea2c1f 5883 {
5884 /* Try removing the candidates from the set instead. */
5885 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
5886
5887 /* Nothing more we can do. */
5888 if (!best_delta)
5889 return false;
5890 }
dec41e98 5891
00991688 5892 iv_ca_delta_commit (data, ivs, best_delta, true);
3c2818b0 5893 gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
00991688 5894 iv_ca_delta_free (&best_delta);
dec41e98 5895 return true;
5896}
5897
5898/* Attempts to find the optimal set of induction variables. We do simple
5899 greedy heuristic -- we try to replace at most one candidate in the selected
5900 solution and remove the unused ivs while this improves the cost. */
5901
00991688 5902static struct iv_ca *
fdd153a1 5903find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
dec41e98 5904{
00991688 5905 struct iv_ca *set;
dec41e98 5906
00991688 5907 /* Get the initial solution. */
fdd153a1 5908 set = get_initial_solution (data, originalp);
00991688 5909 if (!set)
dec41e98 5910 {
5911 if (dump_file && (dump_flags & TDF_DETAILS))
5912 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
dec41e98 5913 return NULL;
5914 }
5915
5916 if (dump_file && (dump_flags & TDF_DETAILS))
5917 {
00991688 5918 fprintf (dump_file, "Initial set of candidates:\n");
5919 iv_ca_dump (data, dump_file, set);
dec41e98 5920 }
5921
00991688 5922 while (try_improve_iv_set (data, set))
dec41e98 5923 {
5924 if (dump_file && (dump_flags & TDF_DETAILS))
5925 {
00991688 5926 fprintf (dump_file, "Improved to:\n");
5927 iv_ca_dump (data, dump_file, set);
dec41e98 5928 }
5929 }
5930
fdd153a1 5931 return set;
5932}
5933
5934static struct iv_ca *
5935find_optimal_iv_set (struct ivopts_data *data)
5936{
5937 unsigned i;
5938 struct iv_ca *set, *origset;
5939 struct iv_use *use;
5940 comp_cost cost, origcost;
5941
5942 /* Determine the cost based on a strategy that starts with original IVs,
5943 and try again using a strategy that prefers candidates not based
5944 on any IVs. */
5945 origset = find_optimal_iv_set_1 (data, true);
5946 set = find_optimal_iv_set_1 (data, false);
5947
5948 if (!origset && !set)
5949 return NULL;
5950
5951 origcost = origset ? iv_ca_cost (origset) : infinite_cost;
5952 cost = set ? iv_ca_cost (set) : infinite_cost;
5953
dec41e98 5954 if (dump_file && (dump_flags & TDF_DETAILS))
3c2818b0 5955 {
fdd153a1 5956 fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
5957 origcost.cost, origcost.complexity);
5958 fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
5959 cost.cost, cost.complexity);
5960 }
5961
5962 /* Choose the one with the best cost. */
5963 if (compare_costs (origcost, cost) <= 0)
5964 {
5965 if (set)
5966 iv_ca_free (&set);
5967 set = origset;
3c2818b0 5968 }
fdd153a1 5969 else if (origset)
5970 iv_ca_free (&origset);
dec41e98 5971
5972 for (i = 0; i < n_iv_uses (data); i++)
5973 {
5974 use = iv_use (data, i);
00991688 5975 use->selected = iv_ca_cand_for_use (set, use)->cand;
dec41e98 5976 }
5977
dec41e98 5978 return set;
5979}
5980
5981/* Creates a new induction variable corresponding to CAND. */
5982
5983static void
5984create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
5985{
75a70cf9 5986 gimple_stmt_iterator incr_pos;
dec41e98 5987 tree base;
5988 bool after = false;
5989
5990 if (!cand->iv)
5991 return;
5992
5993 switch (cand->pos)
5994 {
5995 case IP_NORMAL:
75a70cf9 5996 incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
dec41e98 5997 break;
5998
5999 case IP_END:
75a70cf9 6000 incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
dec41e98 6001 after = true;
6002 break;
6003
3ee48c5c 6004 case IP_AFTER_USE:
6005 after = true;
6006 /* fall through */
6007 case IP_BEFORE_USE:
6008 incr_pos = gsi_for_stmt (cand->incremented_at);
6009 break;
6010
dec41e98 6011 case IP_ORIGINAL:
6012 /* Mark that the iv is preserved. */
6013 name_info (data, cand->var_before)->preserve_biv = true;
6014 name_info (data, cand->var_after)->preserve_biv = true;
6015
6016 /* Rewrite the increment so that it uses var_before directly. */
6017 find_interesting_uses_op (data, cand->var_after)->selected = cand;
dec41e98 6018 return;
6019 }
48e1416a 6020
dec41e98 6021 gimple_add_tmp_var (cand->var_before);
dec41e98 6022
6023 base = unshare_expr (cand->iv->base);
6024
651874e1 6025 create_iv (base, unshare_expr (cand->iv->step),
6026 cand->var_before, data->current_loop,
dec41e98 6027 &incr_pos, after, &cand->var_before, &cand->var_after);
6028}
6029
6030/* Creates new induction variables described in SET. */
6031
6032static void
00991688 6033create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
dec41e98 6034{
6035 unsigned i;
6036 struct iv_cand *cand;
0cc4271a 6037 bitmap_iterator bi;
dec41e98 6038
00991688 6039 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
dec41e98 6040 {
6041 cand = iv_cand (data, i);
6042 create_new_iv (data, cand);
0cc4271a 6043 }
dec41e98 6044
66368bf4 6045 if (dump_file && (dump_flags & TDF_DETAILS))
6046 {
6047 fprintf (dump_file, "\nSelected IV set: \n");
6048 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6049 {
6050 cand = iv_cand (data, i);
6051 dump_cand (dump_file, cand);
6052 }
6053 fprintf (dump_file, "\n");
6054 }
6055}
dec41e98 6056
6057/* Rewrites USE (definition of iv used in a nonlinear expression)
6058 using candidate CAND. */
6059
6060static void
6061rewrite_use_nonlinear_expr (struct ivopts_data *data,
6062 struct iv_use *use, struct iv_cand *cand)
6063{
99e833c3 6064 tree comp;
75a70cf9 6065 tree op, tgt;
6066 gimple ass;
6067 gimple_stmt_iterator bsi;
99e833c3 6068
6069 /* An important special case -- if we are asked to express value of
6070 the original iv by itself, just exit; there is no need to
6071 introduce a new computation (that might also need casting the
6072 variable to unsigned and back). */
6073 if (cand->pos == IP_ORIGINAL
c0b52952 6074 && cand->incremented_at == use->stmt)
99e833c3 6075 {
c0b52952 6076 tree step, ctype, utype;
75a70cf9 6077 enum tree_code incr_code = PLUS_EXPR, old_code;
c0b52952 6078
75a70cf9 6079 gcc_assert (is_gimple_assign (use->stmt));
6080 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
c0b52952 6081
6082 step = cand->iv->step;
6083 ctype = TREE_TYPE (step);
6084 utype = TREE_TYPE (cand->var_after);
6085 if (TREE_CODE (step) == NEGATE_EXPR)
6086 {
6087 incr_code = MINUS_EXPR;
6088 step = TREE_OPERAND (step, 0);
6089 }
6090
6091 /* Check whether we may leave the computation unchanged.
6092 This is the case only if it does not rely on other
6093 computations in the loop -- otherwise, the computation
6094 we rely upon may be removed in remove_unused_ivs,
6095 thus leading to ICE. */
75a70cf9 6096 old_code = gimple_assign_rhs_code (use->stmt);
6097 if (old_code == PLUS_EXPR
6098 || old_code == MINUS_EXPR
6099 || old_code == POINTER_PLUS_EXPR)
c0b52952 6100 {
75a70cf9 6101 if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
6102 op = gimple_assign_rhs2 (use->stmt);
6103 else if (old_code != MINUS_EXPR
6104 && gimple_assign_rhs2 (use->stmt) == cand->var_before)
6105 op = gimple_assign_rhs1 (use->stmt);
c0b52952 6106 else
6107 op = NULL_TREE;
6108 }
6109 else
6110 op = NULL_TREE;
99e833c3 6111
c0b52952 6112 if (op
6113 && (TREE_CODE (op) == INTEGER_CST
6114 || operand_equal_p (op, step, 0)))
99e833c3 6115 return;
c0b52952 6116
6117 /* Otherwise, add the necessary computations to express
6118 the iv. */
6119 op = fold_convert (ctype, cand->var_before);
6120 comp = fold_convert (utype,
6121 build2 (incr_code, ctype, op,
6122 unshare_expr (step)));
99e833c3 6123 }
c0b52952 6124 else
d3610bea 6125 {
6126 comp = get_computation (data->current_loop, use, cand);
6127 gcc_assert (comp != NULL_TREE);
6128 }
99e833c3 6129
75a70cf9 6130 switch (gimple_code (use->stmt))
dec41e98 6131 {
75a70cf9 6132 case GIMPLE_PHI:
dec41e98 6133 tgt = PHI_RESULT (use->stmt);
6134
6135 /* If we should keep the biv, do not replace it. */
6136 if (name_info (data, tgt)->preserve_biv)
6137 return;
6138
75a70cf9 6139 bsi = gsi_after_labels (gimple_bb (use->stmt));
8c0963c4 6140 break;
6141
75a70cf9 6142 case GIMPLE_ASSIGN:
6143 tgt = gimple_assign_lhs (use->stmt);
6144 bsi = gsi_for_stmt (use->stmt);
8c0963c4 6145 break;
6146
6147 default:
6148 gcc_unreachable ();
dec41e98 6149 }
dec41e98 6150
f544b9a4 6151 if (!valid_gimple_rhs_p (comp)
6152 || (gimple_code (use->stmt) != GIMPLE_PHI
6153 /* We can't allow re-allocating the stmt as it might be pointed
6154 to still. */
6155 && (get_gimple_rhs_num_ops (TREE_CODE (comp))
6156 >= gimple_num_ops (gsi_stmt (bsi)))))
8eb9cb0e 6157 {
6158 comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE,
6159 true, GSI_SAME_STMT);
6160 if (POINTER_TYPE_P (TREE_TYPE (tgt)))
9e5b7a23 6161 {
6162 duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
6163 /* As this isn't a plain copy we have to reset alignment
6164 information. */
6165 if (SSA_NAME_PTR_INFO (comp))
ceea063b 6166 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
9e5b7a23 6167 }
8eb9cb0e 6168 }
dec41e98 6169
75a70cf9 6170 if (gimple_code (use->stmt) == GIMPLE_PHI)
dec41e98 6171 {
f544b9a4 6172 ass = gimple_build_assign (tgt, comp);
75a70cf9 6173 gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
1b223194 6174
6175 bsi = gsi_for_stmt (use->stmt);
6176 remove_phi_node (&bsi, false);
dec41e98 6177 }
6178 else
75a70cf9 6179 {
f544b9a4 6180 gimple_assign_set_rhs_from_tree (&bsi, comp);
75a70cf9 6181 use->stmt = gsi_stmt (bsi);
6182 }
dec41e98 6183}
6184
05f638ea 6185/* Performs a peephole optimization to reorder the iv update statement with
6186 a mem ref to enable instruction combining in later phases. The mem ref uses
6187 the iv value before the update, so the reordering transformation requires
6188 adjustment of the offset. CAND is the selected IV_CAND.
6189
6190 Example:
6191
6192 t = MEM_REF (base, iv1, 8, 16); // base, index, stride, offset
6193 iv2 = iv1 + 1;
6194
6195 if (t < val) (1)
6196 goto L;
6197 goto Head;
6198
6199
6200 directly propagating t over to (1) will introduce overlapping live range
6201 thus increase register pressure. This peephole transform it into:
6202
6203
6204 iv2 = iv1 + 1;
6205 t = MEM_REF (base, iv2, 8, 8);
6206 if (t < val)
6207 goto L;
6208 goto Head;
6209*/
6210
6211static void
6212adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
6213{
6214 tree var_after;
6215 gimple iv_update, stmt;
6216 basic_block bb;
6217 gimple_stmt_iterator gsi, gsi_iv;
6218
6219 if (cand->pos != IP_NORMAL)
6220 return;
6221
6222 var_after = cand->var_after;
6223 iv_update = SSA_NAME_DEF_STMT (var_after);
6224
6225 bb = gimple_bb (iv_update);
6226 gsi = gsi_last_nondebug_bb (bb);
6227 stmt = gsi_stmt (gsi);
6228
6229 /* Only handle conditional statement for now. */
6230 if (gimple_code (stmt) != GIMPLE_COND)
6231 return;
6232
6233 gsi_prev_nondebug (&gsi);
6234 stmt = gsi_stmt (gsi);
6235 if (stmt != iv_update)
6236 return;
6237
6238 gsi_prev_nondebug (&gsi);
6239 if (gsi_end_p (gsi))
6240 return;
6241
6242 stmt = gsi_stmt (gsi);
6243 if (gimple_code (stmt) != GIMPLE_ASSIGN)
6244 return;
6245
6246 if (stmt != use->stmt)
6247 return;
6248
6249 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
6250 return;
6251
6252 if (dump_file && (dump_flags & TDF_DETAILS))
6253 {
6254 fprintf (dump_file, "Reordering \n");
6255 print_gimple_stmt (dump_file, iv_update, 0, 0);
6256 print_gimple_stmt (dump_file, use->stmt, 0, 0);
6257 fprintf (dump_file, "\n");
6258 }
6259
6260 gsi = gsi_for_stmt (use->stmt);
6261 gsi_iv = gsi_for_stmt (iv_update);
6262 gsi_move_before (&gsi_iv, &gsi);
6263
6264 cand->pos = IP_BEFORE_USE;
6265 cand->incremented_at = use->stmt;
6266}
6267
dec41e98 6268/* Rewrites USE (address that is an iv) using candidate CAND. */
6269
6270static void
6271rewrite_use_address (struct ivopts_data *data,
6272 struct iv_use *use, struct iv_cand *cand)
6273{
d3610bea 6274 aff_tree aff;
75a70cf9 6275 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
2451b8b8 6276 tree base_hint = NULL_TREE;
a7ae69c9 6277 tree ref, iv;
d3610bea 6278 bool ok;
dec41e98 6279
05f638ea 6280 adjust_iv_update_pos (cand, use);
d3610bea 6281 ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
6282 gcc_assert (ok);
aed164c3 6283 unshare_aff_combination (&aff);
dec41e98 6284
2451b8b8 6285 /* To avoid undefined overflow problems, all IV candidates use unsigned
6286 integer types. The drawback is that this makes it impossible for
6287 create_mem_ref to distinguish an IV that is based on a memory object
6288 from one that represents simply an offset.
6289
6290 To work around this problem, we pass a hint to create_mem_ref that
6291 indicates which variable (if any) in aff is an IV based on a memory
6292 object. Note that we only consider the candidate. If this is not
6293 based on an object, the base of the reference is in some subexpression
6294 of the use -- but these will use pointer types, so they are recognized
6295 by the create_mem_ref heuristics anyway. */
6296 if (cand->iv->base_object)
6297 base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
6298
a7ae69c9 6299 iv = var_at_stmt (data->current_loop, cand, use->stmt);
6300 ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff,
9614eeff 6301 reference_alias_ptr_type (*use->op_p),
a7ae69c9 6302 iv, base_hint, data->speed);
aed164c3 6303 copy_ref_info (ref, *use->op_p);
6304 *use->op_p = ref;
dec41e98 6305}
6306
6307/* Rewrites USE (the condition such that one of the arguments is an iv) using
6308 candidate CAND. */
6309
6310static void
6311rewrite_use_compare (struct ivopts_data *data,
6312 struct iv_use *use, struct iv_cand *cand)
6313{
02f8cd87 6314 tree comp, *var_p, op, bound;
75a70cf9 6315 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
dec41e98 6316 enum tree_code compare;
c4d3b428 6317 struct cost_pair *cp = get_use_iv_cost (data, use, cand);
02f8cd87 6318 bool ok;
6319
c4d3b428 6320 bound = cp->value;
6321 if (bound)
dec41e98 6322 {
34bbbd2e 6323 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
6324 tree var_type = TREE_TYPE (var);
b718fe63 6325 gimple_seq stmts;
34bbbd2e 6326
66368bf4 6327 if (dump_file && (dump_flags & TDF_DETAILS))
6328 {
6329 fprintf (dump_file, "Replacing exit test: ");
6330 print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
6331 }
08748e8d 6332 compare = cp->comp;
02f8cd87 6333 bound = unshare_expr (fold_convert (var_type, bound));
b718fe63 6334 op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
6335 if (stmts)
6336 gsi_insert_seq_on_edge_immediate (
6337 loop_preheader_edge (data->current_loop),
6338 stmts);
dec41e98 6339
75a70cf9 6340 gimple_cond_set_lhs (use->stmt, var);
6341 gimple_cond_set_code (use->stmt, compare);
6342 gimple_cond_set_rhs (use->stmt, op);
dec41e98 6343 return;
6344 }
6345
6346 /* The induction variable elimination failed; just express the original
6347 giv. */
aed164c3 6348 comp = get_computation (data->current_loop, use, cand);
d3610bea 6349 gcc_assert (comp != NULL_TREE);
dec41e98 6350
75a70cf9 6351 ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
02f8cd87 6352 gcc_assert (ok);
dec41e98 6353
75a70cf9 6354 *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
6355 true, GSI_SAME_STMT);
dec41e98 6356}
6357
dec41e98 6358/* Rewrites USE using candidate CAND. */
6359
6360static void
de6ed584 6361rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
dec41e98 6362{
6363 switch (use->type)
6364 {
6365 case USE_NONLINEAR_EXPR:
6366 rewrite_use_nonlinear_expr (data, use, cand);
6367 break;
6368
dec41e98 6369 case USE_ADDRESS:
6370 rewrite_use_address (data, use, cand);
6371 break;
6372
6373 case USE_COMPARE:
6374 rewrite_use_compare (data, use, cand);
6375 break;
6376
6377 default:
8c0963c4 6378 gcc_unreachable ();
dec41e98 6379 }
48e1416a 6380
4c5fd53c 6381 update_stmt (use->stmt);
dec41e98 6382}
6383
6384/* Rewrite the uses using the selected induction variables. */
6385
6386static void
6387rewrite_uses (struct ivopts_data *data)
6388{
6389 unsigned i;
6390 struct iv_cand *cand;
6391 struct iv_use *use;
6392
6393 for (i = 0; i < n_iv_uses (data); i++)
6394 {
6395 use = iv_use (data, i);
6396 cand = use->selected;
8c0963c4 6397 gcc_assert (cand);
dec41e98 6398
6399 rewrite_use (data, use, cand);
6400 }
6401}
6402
6403/* Removes the ivs that are not used after rewriting. */
6404
6405static void
6406remove_unused_ivs (struct ivopts_data *data)
6407{
6408 unsigned j;
0cc4271a 6409 bitmap_iterator bi;
1b223194 6410 bitmap toremove = BITMAP_ALLOC (NULL);
dec41e98 6411
1b223194 6412 /* Figure out an order in which to release SSA DEFs so that we don't
6413 release something that we'd have to propagate into a debug stmt
6414 afterwards. */
0cc4271a 6415 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
dec41e98 6416 {
6417 struct version_info *info;
6418
6419 info = ver_info (data, j);
6420 if (info->iv
7a973feb 6421 && !integer_zerop (info->iv->step)
dec41e98 6422 && !info->inv_id
6423 && !info->iv->have_use_for
6424 && !info->preserve_biv)
1b223194 6425 bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
0cc4271a 6426 }
1b223194 6427
6428 release_defs_bitset (toremove);
6429
6430 BITMAP_FREE (toremove);
dec41e98 6431}
6432
eaa67821 6433/* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
6434 for pointer_map_traverse. */
6435
6436static bool
6437free_tree_niter_desc (const void *key ATTRIBUTE_UNUSED, void **value,
6438 void *data ATTRIBUTE_UNUSED)
6439{
6440 struct tree_niter_desc *const niter = (struct tree_niter_desc *) *value;
6441
6442 free (niter);
6443 return true;
6444}
6445
dec41e98 6446/* Frees data allocated by the optimization of a single loop. */
6447
6448static void
6449free_loop_data (struct ivopts_data *data)
6450{
6451 unsigned i, j;
0cc4271a 6452 bitmap_iterator bi;
7d18ea07 6453 tree obj;
dec41e98 6454
b30a8715 6455 if (data->niters)
6456 {
eaa67821 6457 pointer_map_traverse (data->niters, free_tree_niter_desc, NULL);
b30a8715 6458 pointer_map_destroy (data->niters);
6459 data->niters = NULL;
6460 }
b091dc59 6461
0cc4271a 6462 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
dec41e98 6463 {
6464 struct version_info *info;
6465
6466 info = ver_info (data, i);
dd045aee 6467 free (info->iv);
dec41e98 6468 info->iv = NULL;
6469 info->has_nonlin_use = false;
6470 info->preserve_biv = false;
6471 info->inv_id = 0;
0cc4271a 6472 }
dec41e98 6473 bitmap_clear (data->relevant);
00991688 6474 bitmap_clear (data->important_candidates);
dec41e98 6475
6476 for (i = 0; i < n_iv_uses (data); i++)
6477 {
6478 struct iv_use *use = iv_use (data, i);
6479
6480 free (use->iv);
27335ffd 6481 BITMAP_FREE (use->related_cands);
dec41e98 6482 for (j = 0; j < use->n_map_members; j++)
6483 if (use->cost_map[j].depends_on)
27335ffd 6484 BITMAP_FREE (use->cost_map[j].depends_on);
dec41e98 6485 free (use->cost_map);
6486 free (use);
6487 }
7d18ea07 6488 VEC_truncate (iv_use_p, data->iv_uses, 0);
dec41e98 6489
6490 for (i = 0; i < n_iv_cands (data); i++)
6491 {
6492 struct iv_cand *cand = iv_cand (data, i);
6493
dd045aee 6494 free (cand->iv);
651874e1 6495 if (cand->depends_on)
6496 BITMAP_FREE (cand->depends_on);
dec41e98 6497 free (cand);
6498 }
7d18ea07 6499 VEC_truncate (iv_cand_p, data->iv_candidates, 0);
dec41e98 6500
6501 if (data->version_info_size < num_ssa_names)
6502 {
6503 data->version_info_size = 2 * num_ssa_names;
6504 free (data->version_info);
4c36ffe6 6505 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
dec41e98 6506 }
6507
6508 data->max_inv_id = 0;
6509
48148244 6510 FOR_EACH_VEC_ELT (tree, decl_rtl_to_reset, i, obj)
7d18ea07 6511 SET_DECL_RTL (obj, NULL_RTX);
dec41e98 6512
7d18ea07 6513 VEC_truncate (tree, decl_rtl_to_reset, 0);
66368bf4 6514
6515 htab_empty (data->inv_expr_tab);
6516 data->inv_expr_id = 0;
dec41e98 6517}
6518
6519/* Finalizes data structures used by the iv optimization pass. LOOPS is the
6520 loop tree. */
6521
6522static void
fe382241 6523tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
dec41e98 6524{
dec41e98 6525 free_loop_data (data);
6526 free (data->version_info);
27335ffd 6527 BITMAP_FREE (data->relevant);
6528 BITMAP_FREE (data->important_candidates);
dec41e98 6529
7d18ea07 6530 VEC_free (tree, heap, decl_rtl_to_reset);
6531 VEC_free (iv_use_p, heap, data->iv_uses);
6532 VEC_free (iv_cand_p, heap, data->iv_candidates);
66368bf4 6533 htab_delete (data->inv_expr_tab);
dec41e98 6534}
6535
a6b74a67 6536/* Returns true if the loop body BODY includes any function calls. */
6537
6538static bool
6539loop_body_includes_call (basic_block *body, unsigned num_nodes)
6540{
6541 gimple_stmt_iterator gsi;
6542 unsigned i;
6543
6544 for (i = 0; i < num_nodes; i++)
6545 for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
6546 {
6547 gimple stmt = gsi_stmt (gsi);
6548 if (is_gimple_call (stmt)
6549 && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
6550 return true;
6551 }
6552 return false;
6553}
6554
dec41e98 6555/* Optimizes the LOOP. Returns true if anything changed. */
6556
6557static bool
6558tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
6559{
6560 bool changed = false;
00991688 6561 struct iv_ca *iv_ca;
08748e8d 6562 edge exit = single_dom_exit (loop);
3ee48c5c 6563 basic_block *body;
dec41e98 6564
b30a8715 6565 gcc_assert (!data->niters);
dec41e98 6566 data->current_loop = loop;
f529eb25 6567 data->speed = optimize_loop_for_speed_p (loop);
dec41e98 6568
6569 if (dump_file && (dump_flags & TDF_DETAILS))
6570 {
6571 fprintf (dump_file, "Processing loop %d\n", loop->num);
48e1416a 6572
dec41e98 6573 if (exit)
6574 {
6575 fprintf (dump_file, " single exit %d -> %d, exit condition ",
6576 exit->src->index, exit->dest->index);
75a70cf9 6577 print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
dec41e98 6578 fprintf (dump_file, "\n");
6579 }
6580
6581 fprintf (dump_file, "\n");
6582 }
6583
3ee48c5c 6584 body = get_loop_body (loop);
a6b74a67 6585 data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
3ee48c5c 6586 renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
6587 free (body);
6588
08748e8d 6589 data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
6590
dec41e98 6591 /* For each ssa name determines whether it behaves as an induction variable
6592 in some loop. */
6593 if (!find_induction_variables (data))
6594 goto finish;
6595
6596 /* Finds interesting uses (item 1). */
6597 find_interesting_uses (data);
6598 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
6599 goto finish;
6600
6601 /* Finds candidates for the induction variables (item 2). */
6602 find_iv_candidates (data);
6603
6604 /* Calculates the costs (item 3, part 1). */
dec41e98 6605 determine_iv_costs (data);
3ee48c5c 6606 determine_use_iv_costs (data);
dec41e98 6607 determine_set_costs (data);
6608
6609 /* Find the optimal set of induction variables (item 3, part 2). */
00991688 6610 iv_ca = find_optimal_iv_set (data);
6611 if (!iv_ca)
dec41e98 6612 goto finish;
6613 changed = true;
6614
6615 /* Create the new induction variables (item 4, part 1). */
00991688 6616 create_new_ivs (data, iv_ca);
6617 iv_ca_free (&iv_ca);
48e1416a 6618
dec41e98 6619 /* Rewrite the uses (item 4, part 2). */
6620 rewrite_uses (data);
6621
6622 /* Remove the ivs that are unused after rewriting. */
6623 remove_unused_ivs (data);
6624
dec41e98 6625 /* We have changed the structure of induction variables; it might happen
6626 that definitions in the scev database refer to some of them that were
6627 eliminated. */
6628 scev_reset ();
6629
6630finish:
6631 free_loop_data (data);
6632
6633 return changed;
6634}
6635
7194de72 6636/* Main entry point. Optimizes induction variables in loops. */
dec41e98 6637
6638void
7194de72 6639tree_ssa_iv_optimize (void)
dec41e98 6640{
6641 struct loop *loop;
6642 struct ivopts_data data;
17519ba0 6643 loop_iterator li;
dec41e98 6644
fe382241 6645 tree_ssa_iv_optimize_init (&data);
dec41e98 6646
6647 /* Optimize the loops starting with the innermost ones. */
17519ba0 6648 FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
dec41e98 6649 {
abf7e53a 6650 if (dump_file && (dump_flags & TDF_DETAILS))
6651 flow_loop_dump (loop, dump_file, NULL, 1);
0e269eea 6652
6653 tree_ssa_iv_optimize_loop (&data, loop);
dec41e98 6654 }
6655
fe382241 6656 tree_ssa_iv_optimize_finalize (&data);
dec41e98 6657}