]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-ssa-loop-ivopts.c
re PR tree-optimization/41857 (Loop optimizer breaks __ea pointers with -mea64)
[thirdparty/gcc.git] / gcc / tree-ssa-loop-ivopts.c
CommitLineData
8b11a64c 1/* Induction variable optimizations.
8f5929e1
JJ
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009
3 Free Software Foundation, Inc.
8b11a64c
ZD
4
5This file is part of GCC.
6
7GCC is free software; you can redistribute it and/or modify it
8under the terms of the GNU General Public License as published by the
9dcd6f09 9Free Software Foundation; either version 3, or (at your option) any
8b11a64c
ZD
10later version.
11
12GCC is distributed in the hope that it will be useful, but WITHOUT
13ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15for more details.
16
17You should have received a copy of the GNU General Public License
9dcd6f09
NC
18along with GCC; see the file COPYING3. If not see
19<http://www.gnu.org/licenses/>. */
8b11a64c
ZD
20
21/* This pass tries to find the optimal set of induction variables for the loop.
22 It optimizes just the basic linear induction variables (although adding
23 support for other types should not be too hard). It includes the
24 optimizations commonly known as strength reduction, induction variable
25 coalescing and induction variable elimination. It does it in the
26 following steps:
27
28 1) The interesting uses of induction variables are found. This includes
29
30 -- uses of induction variables in non-linear expressions
31 -- addresses of arrays
32 -- comparisons of induction variables
33
34 2) Candidates for the induction variables are found. This includes
35
36 -- old induction variables
37 -- the variables defined by expressions derived from the "interesting
38 uses" above
39
40 3) The optimal (w.r. to a cost function) set of variables is chosen. The
41 cost function assigns a cost to sets of induction variables and consists
42 of three parts:
43
44 -- The use costs. Each of the interesting uses chooses the best induction
45 variable in the set and adds its cost to the sum. The cost reflects
46 the time spent on modifying the induction variables value to be usable
47 for the given purpose (adding base and offset for arrays, etc.).
48 -- The variable costs. Each of the variables has a cost assigned that
49 reflects the costs associated with incrementing the value of the
50 variable. The original variables are somewhat preferred.
51 -- The set cost. Depending on the size of the set, extra cost may be
52 added to reflect register pressure.
53
54 All the costs are defined in a machine-specific way, using the target
55 hooks and machine descriptions to determine them.
56
57 4) The trees are transformed to use the new variables, the dead code is
58 removed.
59
60 All of this is done loop by loop. Doing it globally is theoretically
61 possible, it might give a better performance and it might enable us
62 to decide costs more precisely, but getting all the interactions right
63 would be complicated. */
64
65#include "config.h"
66#include "system.h"
67#include "coretypes.h"
68#include "tm.h"
69#include "tree.h"
70#include "rtl.h"
71#include "tm_p.h"
72#include "hard-reg-set.h"
73#include "basic-block.h"
74#include "output.h"
75#include "diagnostic.h"
76#include "tree-flow.h"
77#include "tree-dump.h"
78#include "timevar.h"
79#include "cfgloop.h"
80#include "varray.h"
81#include "expr.h"
82#include "tree-pass.h"
83#include "ggc.h"
84#include "insn-config.h"
85#include "recog.h"
15814ba0 86#include "pointer-set.h"
8b11a64c
ZD
87#include "hashtab.h"
88#include "tree-chrec.h"
89#include "tree-scalar-evolution.h"
90#include "cfgloop.h"
91#include "params.h"
39b4020c 92#include "langhooks.h"
73f30c63 93#include "tree-affine.h"
8318b0d9 94#include "target.h"
8b11a64c
ZD
95
96/* The infinite cost. */
97#define INFTY 10000000
98
99/* The expected number of loop iterations. TODO -- use profiling instead of
100 this. */
101#define AVG_LOOP_NITER(LOOP) 5
102
8b11a64c
ZD
103
104/* Representation of the induction variable. */
105struct iv
106{
107 tree base; /* Initial value of the iv. */
e6845c23 108 tree base_object; /* A memory object to that the induction variable points. */
8b11a64c
ZD
109 tree step; /* Step of the iv (constant only). */
110 tree ssa_name; /* The ssa name with the value. */
111 bool biv_p; /* Is it a biv? */
112 bool have_use_for; /* Do we already have a use for it? */
113 unsigned use_id; /* The identifier in the use if it is the case. */
114};
115
116/* Per-ssa version information (induction variable descriptions, etc.). */
117struct version_info
118{
119 tree name; /* The ssa name. */
120 struct iv *iv; /* Induction variable description. */
121 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
122 an expression that is not an induction variable. */
123 unsigned inv_id; /* Id of an invariant. */
124 bool preserve_biv; /* For the original biv, whether to preserve it. */
125};
126
8b11a64c
ZD
127/* Types of uses. */
128enum use_type
129{
130 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
8b11a64c
ZD
131 USE_ADDRESS, /* Use in an address. */
132 USE_COMPARE /* Use is a compare. */
133};
134
6e8c65f6
ZD
135/* Cost of a computation. */
136typedef struct
137{
2c08497a 138 int cost; /* The runtime cost. */
6e8c65f6
ZD
139 unsigned complexity; /* The estimate of the complexity of the code for
140 the computation (in no concrete units --
141 complexity field should be larger for more
142 complex expressions and addressing modes). */
143} comp_cost;
144
145static const comp_cost zero_cost = {0, 0};
146static const comp_cost infinite_cost = {INFTY, INFTY};
147
8b11a64c
ZD
148/* The candidate - cost pair. */
149struct cost_pair
150{
151 struct iv_cand *cand; /* The candidate. */
6e8c65f6 152 comp_cost cost; /* The cost. */
8b11a64c
ZD
153 bitmap depends_on; /* The list of invariants that have to be
154 preserved. */
f5f12961
ZD
155 tree value; /* For final value elimination, the expression for
156 the final value of the iv. For iv elimination,
157 the new bound to compare with. */
8b11a64c
ZD
158};
159
160/* Use. */
161struct iv_use
162{
163 unsigned id; /* The id of the use. */
164 enum use_type type; /* Type of the use. */
165 struct iv *iv; /* The induction variable it is based on. */
726a989a 166 gimple stmt; /* Statement in that it occurs. */
8b11a64c 167 tree *op_p; /* The place where it occurs. */
b1b02be2
ZD
168 bitmap related_cands; /* The set of "related" iv candidates, plus the common
169 important ones. */
8b11a64c
ZD
170
171 unsigned n_map_members; /* Number of candidates in the cost_map list. */
172 struct cost_pair *cost_map;
173 /* The costs wrto the iv candidates. */
174
175 struct iv_cand *selected;
176 /* The selected candidate. */
177};
178
179/* The position where the iv is computed. */
180enum iv_position
181{
182 IP_NORMAL, /* At the end, just before the exit condition. */
183 IP_END, /* At the end of the latch block. */
2c08497a
BS
184 IP_BEFORE_USE, /* Immediately before a specific use. */
185 IP_AFTER_USE, /* Immediately after a specific use. */
8b11a64c
ZD
186 IP_ORIGINAL /* The original biv. */
187};
188
189/* The induction variable candidate. */
190struct iv_cand
191{
192 unsigned id; /* The number of the candidate. */
193 bool important; /* Whether this is an "important" candidate, i.e. such
194 that it should be considered by all uses. */
195 enum iv_position pos; /* Where it is computed. */
726a989a 196 gimple incremented_at;/* For original biv, the statement where it is
8b11a64c
ZD
197 incremented. */
198 tree var_before; /* The variable used for it before increment. */
199 tree var_after; /* The variable used for it after increment. */
200 struct iv *iv; /* The value of the candidate. NULL for
201 "pseudocandidate" used to indicate the possibility
202 to replace the final value of an iv by direct
203 computation of the value. */
204 unsigned cost; /* Cost of the candidate. */
2c08497a
BS
205 unsigned cost_step; /* Cost of the candidate's increment operation. */
206 struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
207 where it is incremented. */
9be872b7
ZD
208 bitmap depends_on; /* The list of invariants that are used in step of the
209 biv. */
8b11a64c
ZD
210};
211
212/* The data used by the induction variable optimizations. */
213
69ebd99d
KH
214typedef struct iv_use *iv_use_p;
215DEF_VEC_P(iv_use_p);
216DEF_VEC_ALLOC_P(iv_use_p,heap);
217
218typedef struct iv_cand *iv_cand_p;
219DEF_VEC_P(iv_cand_p);
220DEF_VEC_ALLOC_P(iv_cand_p,heap);
221
8b11a64c
ZD
222struct ivopts_data
223{
224 /* The currently optimized loop. */
225 struct loop *current_loop;
226
8f5929e1
JJ
227 /* Numbers of iterations for all exits of the current loop. */
228 struct pointer_map_t *niters;
f40751dd 229
9a2ef6b8
ZD
230 /* Number of registers used in it. */
231 unsigned regs_used;
232
8b11a64c
ZD
233 /* The size of version_info array allocated. */
234 unsigned version_info_size;
235
236 /* The array of information for the ssa names. */
237 struct version_info *version_info;
238
239 /* The bitmap of indices in version_info whose value was changed. */
240 bitmap relevant;
241
8b11a64c 242 /* The uses of induction variables. */
69ebd99d 243 VEC(iv_use_p,heap) *iv_uses;
8b11a64c
ZD
244
245 /* The candidates. */
69ebd99d 246 VEC(iv_cand_p,heap) *iv_candidates;
8b11a64c 247
80cad5fa
ZD
248 /* A bitmap of important candidates. */
249 bitmap important_candidates;
250
8f5929e1
JJ
251 /* The maximum invariant id. */
252 unsigned max_inv_id;
253
8b11a64c
ZD
254 /* Whether to consider just related and important candidates when replacing a
255 use. */
256 bool consider_all_candidates;
8f5929e1
JJ
257
258 /* Are we optimizing for speed? */
259 bool speed;
8b11a64c
ZD
260};
261
b1b02be2
ZD
262/* An assignment of iv candidates to uses. */
263
264struct iv_ca
265{
266 /* The number of uses covered by the assignment. */
267 unsigned upto;
268
269 /* Number of uses that cannot be expressed by the candidates in the set. */
270 unsigned bad_uses;
271
272 /* Candidate assigned to a use, together with the related costs. */
273 struct cost_pair **cand_for_use;
274
275 /* Number of times each candidate is used. */
276 unsigned *n_cand_uses;
277
278 /* The candidates used. */
279 bitmap cands;
280
36f5ada1
ZD
281 /* The number of candidates in the set. */
282 unsigned n_cands;
283
b1b02be2
ZD
284 /* Total number of registers needed. */
285 unsigned n_regs;
286
287 /* Total cost of expressing uses. */
6e8c65f6 288 comp_cost cand_use_cost;
b1b02be2
ZD
289
290 /* Total cost of candidates. */
291 unsigned cand_cost;
292
293 /* Number of times each invariant is used. */
294 unsigned *n_invariant_uses;
295
296 /* Total cost of the assignment. */
6e8c65f6 297 comp_cost cost;
b1b02be2
ZD
298};
299
300/* Difference of two iv candidate assignments. */
301
302struct iv_ca_delta
303{
304 /* Changed use. */
305 struct iv_use *use;
306
307 /* An old assignment (for rollback purposes). */
308 struct cost_pair *old_cp;
309
310 /* A new assignment. */
311 struct cost_pair *new_cp;
312
313 /* Next change in the list. */
314 struct iv_ca_delta *next_change;
315};
316
8b11a64c
ZD
317/* Bound on number of candidates below that all candidates are considered. */
318
319#define CONSIDER_ALL_CANDIDATES_BOUND \
320 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
321
2a7e31df 322/* If there are more iv occurrences, we just give up (it is quite unlikely that
8b11a64c
ZD
323 optimizing such a loop would help, and it would take ages). */
324
325#define MAX_CONSIDERED_USES \
326 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
327
36f5ada1
ZD
328/* If there are at most this number of ivs in the set, try removing unnecessary
329 ivs from the set always. */
330
331#define ALWAYS_PRUNE_CAND_SET_BOUND \
332 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
333
8b11a64c
ZD
334/* The list of trees for that the decl_rtl field must be reset is stored
335 here. */
336
69ebd99d 337static VEC(tree,heap) *decl_rtl_to_reset;
8b11a64c
ZD
338
339/* Number of uses recorded in DATA. */
340
341static inline unsigned
342n_iv_uses (struct ivopts_data *data)
343{
69ebd99d 344 return VEC_length (iv_use_p, data->iv_uses);
8b11a64c
ZD
345}
346
347/* Ith use recorded in DATA. */
348
349static inline struct iv_use *
350iv_use (struct ivopts_data *data, unsigned i)
351{
69ebd99d 352 return VEC_index (iv_use_p, data->iv_uses, i);
8b11a64c
ZD
353}
354
355/* Number of candidates recorded in DATA. */
356
357static inline unsigned
358n_iv_cands (struct ivopts_data *data)
359{
69ebd99d 360 return VEC_length (iv_cand_p, data->iv_candidates);
8b11a64c
ZD
361}
362
363/* Ith candidate recorded in DATA. */
364
365static inline struct iv_cand *
366iv_cand (struct ivopts_data *data, unsigned i)
367{
69ebd99d 368 return VEC_index (iv_cand_p, data->iv_candidates, i);
8b11a64c
ZD
369}
370
8b11a64c
ZD
371/* The single loop exit if it dominates the latch, NULL otherwise. */
372
b7eae7b8 373edge
8b11a64c
ZD
374single_dom_exit (struct loop *loop)
375{
ac8f6c69 376 edge exit = single_exit (loop);
8b11a64c
ZD
377
378 if (!exit)
379 return NULL;
380
381 if (!just_once_each_iteration_p (loop, exit->src))
382 return NULL;
383
384 return exit;
385}
386
387/* Dumps information about the induction variable IV to FILE. */
388
389extern void dump_iv (FILE *, struct iv *);
390void
391dump_iv (FILE *file, struct iv *iv)
392{
e6845c23
ZD
393 if (iv->ssa_name)
394 {
395 fprintf (file, "ssa name ");
396 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
397 fprintf (file, "\n");
398 }
8b11a64c 399
2f4675b4
ZD
400 fprintf (file, " type ");
401 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
402 fprintf (file, "\n");
403
8b11a64c
ZD
404 if (iv->step)
405 {
406 fprintf (file, " base ");
407 print_generic_expr (file, iv->base, TDF_SLIM);
408 fprintf (file, "\n");
409
410 fprintf (file, " step ");
411 print_generic_expr (file, iv->step, TDF_SLIM);
412 fprintf (file, "\n");
413 }
414 else
415 {
416 fprintf (file, " invariant ");
417 print_generic_expr (file, iv->base, TDF_SLIM);
418 fprintf (file, "\n");
419 }
420
e6845c23
ZD
421 if (iv->base_object)
422 {
423 fprintf (file, " base object ");
424 print_generic_expr (file, iv->base_object, TDF_SLIM);
425 fprintf (file, "\n");
426 }
427
8b11a64c
ZD
428 if (iv->biv_p)
429 fprintf (file, " is a biv\n");
430}
431
432/* Dumps information about the USE to FILE. */
433
434extern void dump_use (FILE *, struct iv_use *);
435void
436dump_use (FILE *file, struct iv_use *use)
437{
8b11a64c
ZD
438 fprintf (file, "use %d\n", use->id);
439
440 switch (use->type)
441 {
442 case USE_NONLINEAR_EXPR:
443 fprintf (file, " generic\n");
444 break;
445
8b11a64c
ZD
446 case USE_ADDRESS:
447 fprintf (file, " address\n");
448 break;
449
450 case USE_COMPARE:
451 fprintf (file, " compare\n");
452 break;
453
454 default:
1e128c5f 455 gcc_unreachable ();
8b11a64c
ZD
456 }
457
2f4675b4 458 fprintf (file, " in statement ");
726a989a 459 print_gimple_stmt (file, use->stmt, 0, 0);
2f4675b4
ZD
460 fprintf (file, "\n");
461
462 fprintf (file, " at position ");
463 if (use->op_p)
464 print_generic_expr (file, *use->op_p, TDF_SLIM);
465 fprintf (file, "\n");
466
e6845c23 467 dump_iv (file, use->iv);
2f4675b4 468
eec5fec9
ZD
469 if (use->related_cands)
470 {
471 fprintf (file, " related candidates ");
472 dump_bitmap (file, use->related_cands);
473 }
8b11a64c
ZD
474}
475
476/* Dumps information about the uses to FILE. */
477
478extern void dump_uses (FILE *, struct ivopts_data *);
479void
480dump_uses (FILE *file, struct ivopts_data *data)
481{
482 unsigned i;
483 struct iv_use *use;
484
485 for (i = 0; i < n_iv_uses (data); i++)
486 {
487 use = iv_use (data, i);
488
489 dump_use (file, use);
490 fprintf (file, "\n");
491 }
492}
493
494/* Dumps information about induction variable candidate CAND to FILE. */
495
496extern void dump_cand (FILE *, struct iv_cand *);
497void
498dump_cand (FILE *file, struct iv_cand *cand)
499{
500 struct iv *iv = cand->iv;
501
502 fprintf (file, "candidate %d%s\n",
503 cand->id, cand->important ? " (important)" : "");
504
9be872b7
ZD
505 if (cand->depends_on)
506 {
507 fprintf (file, " depends on ");
508 dump_bitmap (file, cand->depends_on);
509 }
510
8b11a64c
ZD
511 if (!iv)
512 {
513 fprintf (file, " final value replacement\n");
514 return;
515 }
516
517 switch (cand->pos)
518 {
519 case IP_NORMAL:
520 fprintf (file, " incremented before exit test\n");
521 break;
522
2c08497a
BS
523 case IP_BEFORE_USE:
524 fprintf (file, " incremented before use %d\n", cand->ainc_use->id);
525 break;
526
527 case IP_AFTER_USE:
528 fprintf (file, " incremented after use %d\n", cand->ainc_use->id);
529 break;
530
8b11a64c
ZD
531 case IP_END:
532 fprintf (file, " incremented at end\n");
533 break;
534
535 case IP_ORIGINAL:
536 fprintf (file, " original biv\n");
537 break;
538 }
539
e6845c23 540 dump_iv (file, iv);
8b11a64c
ZD
541}
542
543/* Returns the info for ssa version VER. */
544
545static inline struct version_info *
546ver_info (struct ivopts_data *data, unsigned ver)
547{
548 return data->version_info + ver;
549}
550
551/* Returns the info for ssa name NAME. */
552
553static inline struct version_info *
554name_info (struct ivopts_data *data, tree name)
555{
556 return ver_info (data, SSA_NAME_VERSION (name));
557}
558
8b11a64c
ZD
559/* Returns true if STMT is after the place where the IP_NORMAL ivs will be
560 emitted in LOOP. */
561
562static bool
726a989a 563stmt_after_ip_normal_pos (struct loop *loop, gimple stmt)
8b11a64c 564{
726a989a 565 basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
8b11a64c 566
1e128c5f 567 gcc_assert (bb);
8b11a64c
ZD
568
569 if (sbb == loop->latch)
570 return true;
571
572 if (sbb != bb)
573 return false;
574
575 return stmt == last_stmt (bb);
576}
577
578/* Returns true if STMT if after the place where the original induction
2c08497a
BS
579 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
580 if the positions are identical. */
8b11a64c
ZD
581
582static bool
2c08497a 583stmt_after_inc_pos (struct iv_cand *cand, gimple stmt, bool true_if_equal)
8b11a64c 584{
726a989a
RB
585 basic_block cand_bb = gimple_bb (cand->incremented_at);
586 basic_block stmt_bb = gimple_bb (stmt);
8b11a64c
ZD
587
588 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
589 return false;
590
591 if (stmt_bb != cand_bb)
592 return true;
593
2c08497a
BS
594 if (true_if_equal
595 && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
596 return true;
597 return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
8b11a64c
ZD
598}
599
600/* Returns true if STMT if after the place where the induction variable
601 CAND is incremented in LOOP. */
602
603static bool
726a989a 604stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple stmt)
8b11a64c
ZD
605{
606 switch (cand->pos)
607 {
608 case IP_END:
609 return false;
610
611 case IP_NORMAL:
612 return stmt_after_ip_normal_pos (loop, stmt);
613
614 case IP_ORIGINAL:
2c08497a
BS
615 case IP_AFTER_USE:
616 return stmt_after_inc_pos (cand, stmt, false);
617
618 case IP_BEFORE_USE:
619 return stmt_after_inc_pos (cand, stmt, true);
8b11a64c
ZD
620
621 default:
1e128c5f 622 gcc_unreachable ();
8b11a64c
ZD
623 }
624}
625
dcccd88d
ZD
626/* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
627
628static bool
629abnormal_ssa_name_p (tree exp)
630{
631 if (!exp)
632 return false;
633
634 if (TREE_CODE (exp) != SSA_NAME)
635 return false;
636
637 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
638}
639
640/* Returns false if BASE or INDEX contains a ssa name that occurs in an
641 abnormal phi node. Callback for for_each_index. */
642
643static bool
644idx_contains_abnormal_ssa_name_p (tree base, tree *index,
645 void *data ATTRIBUTE_UNUSED)
646{
9f7ccf69 647 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
dcccd88d
ZD
648 {
649 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
650 return false;
651 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
652 return false;
653 }
654
655 return !abnormal_ssa_name_p (*index);
656}
657
658/* Returns true if EXPR contains a ssa name that occurs in an
659 abnormal phi node. */
660
e5db3515 661bool
dcccd88d
ZD
662contains_abnormal_ssa_name_p (tree expr)
663{
664 enum tree_code code;
c22940cd 665 enum tree_code_class codeclass;
dcccd88d
ZD
666
667 if (!expr)
668 return false;
669
670 code = TREE_CODE (expr);
c22940cd 671 codeclass = TREE_CODE_CLASS (code);
dcccd88d
ZD
672
673 if (code == SSA_NAME)
674 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
675
676 if (code == INTEGER_CST
677 || is_gimple_min_invariant (expr))
678 return false;
679
680 if (code == ADDR_EXPR)
681 return !for_each_index (&TREE_OPERAND (expr, 0),
682 idx_contains_abnormal_ssa_name_p,
683 NULL);
684
c22940cd 685 switch (codeclass)
dcccd88d
ZD
686 {
687 case tcc_binary:
688 case tcc_comparison:
689 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
690 return true;
691
692 /* Fallthru. */
693 case tcc_unary:
694 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
695 return true;
696
697 break;
698
699 default:
700 gcc_unreachable ();
701 }
702
703 return false;
704}
705
dcccd88d 706/* Returns tree describing number of iterations determined from
ca4c3169
ZD
707 EXIT of DATA->current_loop, or NULL if something goes wrong. */
708
dcccd88d 709static tree
ca4c3169
ZD
710niter_for_exit (struct ivopts_data *data, edge exit)
711{
dcccd88d 712 struct tree_niter_desc desc;
15814ba0
PB
713 tree niter;
714 void **slot;
ca4c3169 715
15814ba0 716 if (!data->niters)
ca4c3169 717 {
15814ba0
PB
718 data->niters = pointer_map_create ();
719 slot = NULL;
720 }
721 else
722 slot = pointer_map_contains (data->niters, exit);
dcccd88d 723
15814ba0
PB
724 if (!slot)
725 {
dcccd88d
ZD
726 /* Try to determine number of iterations. We must know it
727 unconditionally (i.e., without possibility of # of iterations
728 being zero). Also, we cannot safely work with ssa names that
729 appear in phi nodes on abnormal edges, so that we do not create
730 overlapping life ranges for them (PR 27283). */
731 if (number_of_iterations_exit (data->current_loop,
732 exit, &desc, true)
6e682d7e 733 && integer_zerop (desc.may_be_zero)
dcccd88d 734 && !contains_abnormal_ssa_name_p (desc.niter))
15814ba0 735 niter = desc.niter;
dcccd88d 736 else
15814ba0
PB
737 niter = NULL_TREE;
738
739 *pointer_map_insert (data->niters, exit) = niter;
ca4c3169
ZD
740 }
741 else
c22940cd 742 niter = (tree) *slot;
ca4c3169 743
15814ba0 744 return niter;
ca4c3169
ZD
745}
746
dcccd88d 747/* Returns tree describing number of iterations determined from
ca4c3169
ZD
748 single dominating exit of DATA->current_loop, or NULL if something
749 goes wrong. */
750
dcccd88d 751static tree
ca4c3169
ZD
752niter_for_single_dom_exit (struct ivopts_data *data)
753{
754 edge exit = single_dom_exit (data->current_loop);
755
756 if (!exit)
757 return NULL;
758
759 return niter_for_exit (data, exit);
760}
761
8b11a64c 762/* Initializes data structures used by the iv optimization pass, stored
9a2ef6b8 763 in DATA. */
8b11a64c
ZD
764
765static void
9a2ef6b8 766tree_ssa_iv_optimize_init (struct ivopts_data *data)
8b11a64c 767{
8b11a64c 768 data->version_info_size = 2 * num_ssa_names;
5ed6ace5 769 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
8bdbfff5
NS
770 data->relevant = BITMAP_ALLOC (NULL);
771 data->important_candidates = BITMAP_ALLOC (NULL);
8b11a64c 772 data->max_inv_id = 0;
15814ba0 773 data->niters = NULL;
69ebd99d
KH
774 data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
775 data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
776 decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
8b11a64c
ZD
777}
778
e6845c23
ZD
779/* Returns a memory object to that EXPR points. In case we are able to
780 determine that it does not point to any such object, NULL is returned. */
781
782static tree
783determine_base_object (tree expr)
784{
785 enum tree_code code = TREE_CODE (expr);
5be014d5 786 tree base, obj;
e6845c23 787
975626a7
ZD
788 /* If this is a pointer casted to any type, we need to determine
789 the base object for the pointer; so handle conversions before
790 throwing away non-pointer expressions. */
1043771b 791 if (CONVERT_EXPR_P (expr))
975626a7
ZD
792 return determine_base_object (TREE_OPERAND (expr, 0));
793
e6845c23
ZD
794 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
795 return NULL_TREE;
796
797 switch (code)
798 {
799 case INTEGER_CST:
800 return NULL_TREE;
801
802 case ADDR_EXPR:
803 obj = TREE_OPERAND (expr, 0);
804 base = get_base_address (obj);
805
806 if (!base)
f5e2738c 807 return expr;
e6845c23 808
7299dbfb 809 if (TREE_CODE (base) == INDIRECT_REF)
f5e2738c 810 return determine_base_object (TREE_OPERAND (base, 0));
7299dbfb 811
62b37d91
RG
812 return fold_convert (ptr_type_node,
813 build_fold_addr_expr (base));
e6845c23 814
5be014d5
AP
815 case POINTER_PLUS_EXPR:
816 return determine_base_object (TREE_OPERAND (expr, 0));
817
e6845c23
ZD
818 case PLUS_EXPR:
819 case MINUS_EXPR:
5be014d5
AP
820 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
821 gcc_unreachable ();
e6845c23
ZD
822
823 default:
824 return fold_convert (ptr_type_node, expr);
825 }
826}
827
8b11a64c
ZD
828/* Allocates an induction variable with given initial value BASE and step STEP
829 for loop LOOP. */
830
831static struct iv *
832alloc_iv (tree base, tree step)
833{
5ed6ace5 834 struct iv *iv = XCNEW (struct iv);
6e42ce54 835 gcc_assert (step != NULL_TREE);
8b11a64c
ZD
836
837 iv->base = base;
e6845c23 838 iv->base_object = determine_base_object (base);
8b11a64c
ZD
839 iv->step = step;
840 iv->biv_p = false;
841 iv->have_use_for = false;
842 iv->use_id = 0;
843 iv->ssa_name = NULL_TREE;
844
845 return iv;
846}
847
848/* Sets STEP and BASE for induction variable IV. */
849
850static void
851set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
852{
853 struct version_info *info = name_info (data, iv);
854
1e128c5f 855 gcc_assert (!info->iv);
8b11a64c
ZD
856
857 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
858 info->iv = alloc_iv (base, step);
859 info->iv->ssa_name = iv;
860}
861
862/* Finds induction variable declaration for VAR. */
863
864static struct iv *
865get_iv (struct ivopts_data *data, tree var)
866{
867 basic_block bb;
6e42ce54
ZD
868 tree type = TREE_TYPE (var);
869
870 if (!POINTER_TYPE_P (type)
871 && !INTEGRAL_TYPE_P (type))
872 return NULL;
873
8b11a64c
ZD
874 if (!name_info (data, var)->iv)
875 {
726a989a 876 bb = gimple_bb (SSA_NAME_DEF_STMT (var));
8b11a64c
ZD
877
878 if (!bb
879 || !flow_bb_inside_loop_p (data->current_loop, bb))
6e42ce54 880 set_iv (data, var, var, build_int_cst (type, 0));
8b11a64c
ZD
881 }
882
883 return name_info (data, var)->iv;
884}
885
9be872b7
ZD
886/* Determines the step of a biv defined in PHI. Returns NULL if PHI does
887 not define a simple affine biv with nonzero step. */
8b11a64c
ZD
888
889static tree
726a989a 890determine_biv_step (gimple phi)
8b11a64c 891{
726a989a 892 struct loop *loop = gimple_bb (phi)->loop_father;
a6f778b2
ZD
893 tree name = PHI_RESULT (phi);
894 affine_iv iv;
8b11a64c
ZD
895
896 if (!is_gimple_reg (name))
897 return NULL_TREE;
898
f017bf5e 899 if (!simple_iv (loop, loop, name, &iv, true))
8b11a64c
ZD
900 return NULL_TREE;
901
6e42ce54 902 return integer_zerop (iv.step) ? NULL_TREE : iv.step;
8b11a64c
ZD
903}
904
8b11a64c
ZD
905/* Finds basic ivs. */
906
907static bool
908find_bivs (struct ivopts_data *data)
909{
726a989a
RB
910 gimple phi;
911 tree step, type, base;
8b11a64c
ZD
912 bool found = false;
913 struct loop *loop = data->current_loop;
726a989a 914 gimple_stmt_iterator psi;
8b11a64c 915
726a989a 916 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
8b11a64c 917 {
726a989a
RB
918 phi = gsi_stmt (psi);
919
8b11a64c
ZD
920 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
921 continue;
922
923 step = determine_biv_step (phi);
8b11a64c
ZD
924 if (!step)
925 continue;
8b11a64c
ZD
926
927 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
d7bf3bcf 928 base = expand_simple_operations (base);
9be872b7
ZD
929 if (contains_abnormal_ssa_name_p (base)
930 || contains_abnormal_ssa_name_p (step))
8b11a64c
ZD
931 continue;
932
933 type = TREE_TYPE (PHI_RESULT (phi));
934 base = fold_convert (type, base);
9be872b7 935 if (step)
1ffe34d9
AP
936 {
937 if (POINTER_TYPE_P (type))
938 step = fold_convert (sizetype, step);
939 else
940 step = fold_convert (type, step);
941 }
8b11a64c
ZD
942
943 set_iv (data, PHI_RESULT (phi), base, step);
944 found = true;
945 }
946
947 return found;
948}
949
950/* Marks basic ivs. */
951
952static void
953mark_bivs (struct ivopts_data *data)
954{
726a989a
RB
955 gimple phi;
956 tree var;
8b11a64c
ZD
957 struct iv *iv, *incr_iv;
958 struct loop *loop = data->current_loop;
959 basic_block incr_bb;
726a989a 960 gimple_stmt_iterator psi;
8b11a64c 961
726a989a 962 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
8b11a64c 963 {
726a989a
RB
964 phi = gsi_stmt (psi);
965
8b11a64c
ZD
966 iv = get_iv (data, PHI_RESULT (phi));
967 if (!iv)
968 continue;
969
970 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
971 incr_iv = get_iv (data, var);
972 if (!incr_iv)
973 continue;
974
975 /* If the increment is in the subloop, ignore it. */
726a989a 976 incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
8b11a64c
ZD
977 if (incr_bb->loop_father != data->current_loop
978 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
979 continue;
980
981 iv->biv_p = true;
982 incr_iv->biv_p = true;
983 }
984}
985
986/* Checks whether STMT defines a linear induction variable and stores its
a6f778b2 987 parameters to IV. */
8b11a64c
ZD
988
989static bool
726a989a 990find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv)
8b11a64c
ZD
991{
992 tree lhs;
993 struct loop *loop = data->current_loop;
994
a6f778b2
ZD
995 iv->base = NULL_TREE;
996 iv->step = NULL_TREE;
8b11a64c 997
726a989a 998 if (gimple_code (stmt) != GIMPLE_ASSIGN)
8b11a64c
ZD
999 return false;
1000
726a989a 1001 lhs = gimple_assign_lhs (stmt);
8b11a64c
ZD
1002 if (TREE_CODE (lhs) != SSA_NAME)
1003 return false;
1004
f017bf5e 1005 if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
8b11a64c 1006 return false;
a6f778b2 1007 iv->base = expand_simple_operations (iv->base);
8b11a64c 1008
a6f778b2
ZD
1009 if (contains_abnormal_ssa_name_p (iv->base)
1010 || contains_abnormal_ssa_name_p (iv->step))
8b11a64c
ZD
1011 return false;
1012
1013 return true;
1014}
1015
1016/* Finds general ivs in statement STMT. */
1017
1018static void
726a989a 1019find_givs_in_stmt (struct ivopts_data *data, gimple stmt)
8b11a64c 1020{
a6f778b2 1021 affine_iv iv;
8b11a64c 1022
a6f778b2 1023 if (!find_givs_in_stmt_scev (data, stmt, &iv))
8b11a64c
ZD
1024 return;
1025
726a989a 1026 set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step);
8b11a64c
ZD
1027}
1028
1029/* Finds general ivs in basic block BB. */
1030
1031static void
1032find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1033{
726a989a 1034 gimple_stmt_iterator bsi;
8b11a64c 1035
726a989a
RB
1036 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1037 find_givs_in_stmt (data, gsi_stmt (bsi));
8b11a64c
ZD
1038}
1039
1040/* Finds general ivs. */
1041
1042static void
1043find_givs (struct ivopts_data *data)
1044{
1045 struct loop *loop = data->current_loop;
1046 basic_block *body = get_loop_body_in_dom_order (loop);
1047 unsigned i;
1048
1049 for (i = 0; i < loop->num_nodes; i++)
1050 find_givs_in_bb (data, body[i]);
1051 free (body);
1052}
1053
8b11a64c
ZD
1054/* For each ssa name defined in LOOP determines whether it is an induction
1055 variable and if so, its initial value and step. */
1056
1057static bool
1058find_induction_variables (struct ivopts_data *data)
1059{
1060 unsigned i;
87c476a2 1061 bitmap_iterator bi;
8b11a64c
ZD
1062
1063 if (!find_bivs (data))
1064 return false;
1065
1066 find_givs (data);
1067 mark_bivs (data);
8b11a64c
ZD
1068
1069 if (dump_file && (dump_flags & TDF_DETAILS))
1070 {
dcccd88d 1071 tree niter = niter_for_single_dom_exit (data);
ca4c3169
ZD
1072
1073 if (niter)
8b11a64c
ZD
1074 {
1075 fprintf (dump_file, " number of iterations ");
dcccd88d
ZD
1076 print_generic_expr (dump_file, niter, TDF_SLIM);
1077 fprintf (dump_file, "\n\n");
8b11a64c
ZD
1078 };
1079
1080 fprintf (dump_file, "Induction variables:\n\n");
1081
87c476a2 1082 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
8b11a64c
ZD
1083 {
1084 if (ver_info (data, i)->iv)
1085 dump_iv (dump_file, ver_info (data, i)->iv);
87c476a2 1086 }
8b11a64c
ZD
1087 }
1088
1089 return true;
1090}
1091
1092/* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1093
1094static struct iv_use *
1095record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
726a989a 1096 gimple stmt, enum use_type use_type)
8b11a64c 1097{
5ed6ace5 1098 struct iv_use *use = XCNEW (struct iv_use);
8b11a64c
ZD
1099
1100 use->id = n_iv_uses (data);
1101 use->type = use_type;
1102 use->iv = iv;
1103 use->stmt = stmt;
1104 use->op_p = use_p;
8bdbfff5 1105 use->related_cands = BITMAP_ALLOC (NULL);
8b11a64c 1106
e6845c23
ZD
1107 /* To avoid showing ssa name in the dumps, if it was not reset by the
1108 caller. */
1109 iv->ssa_name = NULL_TREE;
1110
8b11a64c
ZD
1111 if (dump_file && (dump_flags & TDF_DETAILS))
1112 dump_use (dump_file, use);
1113
69ebd99d 1114 VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
8b11a64c
ZD
1115
1116 return use;
1117}
1118
1119/* Checks whether OP is a loop-level invariant and if so, records it.
1120 NONLINEAR_USE is true if the invariant is used in a way we do not
1121 handle specially. */
1122
1123static void
1124record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1125{
1126 basic_block bb;
1127 struct version_info *info;
1128
1129 if (TREE_CODE (op) != SSA_NAME
1130 || !is_gimple_reg (op))
1131 return;
1132
726a989a 1133 bb = gimple_bb (SSA_NAME_DEF_STMT (op));
8b11a64c
ZD
1134 if (bb
1135 && flow_bb_inside_loop_p (data->current_loop, bb))
1136 return;
1137
1138 info = name_info (data, op);
1139 info->name = op;
1140 info->has_nonlin_use |= nonlinear_use;
1141 if (!info->inv_id)
1142 info->inv_id = ++data->max_inv_id;
1143 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1144}
1145
50cc9802 1146/* Checks whether the use OP is interesting and if so, records it. */
8b11a64c
ZD
1147
1148static struct iv_use *
50cc9802 1149find_interesting_uses_op (struct ivopts_data *data, tree op)
8b11a64c
ZD
1150{
1151 struct iv *iv;
1152 struct iv *civ;
726a989a 1153 gimple stmt;
8b11a64c
ZD
1154 struct iv_use *use;
1155
1156 if (TREE_CODE (op) != SSA_NAME)
1157 return NULL;
1158
1159 iv = get_iv (data, op);
1160 if (!iv)
1161 return NULL;
1162
1163 if (iv->have_use_for)
1164 {
1165 use = iv_use (data, iv->use_id);
1166
50cc9802 1167 gcc_assert (use->type == USE_NONLINEAR_EXPR);
8b11a64c
ZD
1168 return use;
1169 }
1170
6e42ce54 1171 if (integer_zerop (iv->step))
8b11a64c
ZD
1172 {
1173 record_invariant (data, op, true);
1174 return NULL;
1175 }
1176 iv->have_use_for = true;
1177
5ed6ace5 1178 civ = XNEW (struct iv);
8b11a64c
ZD
1179 *civ = *iv;
1180
1181 stmt = SSA_NAME_DEF_STMT (op);
726a989a
RB
1182 gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1183 || is_gimple_assign (stmt));
8b11a64c 1184
50cc9802 1185 use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
8b11a64c
ZD
1186 iv->use_id = use->id;
1187
1188 return use;
1189}
1190
726a989a
RB
1191/* Given a condition in statement STMT, checks whether it is a compare
1192 of an induction variable and an invariant. If this is the case,
1193 CONTROL_VAR is set to location of the iv, BOUND to the location of
1194 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1195 induction variable descriptions, and true is returned. If this is not
1196 the case, CONTROL_VAR and BOUND are set to the arguments of the
1197 condition and false is returned. */
8b11a64c 1198
b697aed4 1199static bool
726a989a 1200extract_cond_operands (struct ivopts_data *data, gimple stmt,
b697aed4
ZD
1201 tree **control_var, tree **bound,
1202 struct iv **iv_var, struct iv **iv_bound)
1203{
726a989a 1204 /* The objects returned when COND has constant operands. */
b697aed4
ZD
1205 static struct iv const_iv;
1206 static tree zero;
b697aed4
ZD
1207 tree *op0 = &zero, *op1 = &zero, *tmp_op;
1208 struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1209 bool ret = false;
1210
726a989a 1211 if (gimple_code (stmt) == GIMPLE_COND)
8b11a64c 1212 {
726a989a
RB
1213 op0 = gimple_cond_lhs_ptr (stmt);
1214 op1 = gimple_cond_rhs_ptr (stmt);
8b11a64c 1215 }
726a989a 1216 else
8b11a64c 1217 {
726a989a
RB
1218 op0 = gimple_assign_rhs1_ptr (stmt);
1219 op1 = gimple_assign_rhs2_ptr (stmt);
8b11a64c
ZD
1220 }
1221
726a989a
RB
1222 zero = integer_zero_node;
1223 const_iv.step = integer_zero_node;
1224
b697aed4
ZD
1225 if (TREE_CODE (*op0) == SSA_NAME)
1226 iv0 = get_iv (data, *op0);
1227 if (TREE_CODE (*op1) == SSA_NAME)
1228 iv1 = get_iv (data, *op1);
8b11a64c 1229
b697aed4
ZD
1230 /* Exactly one of the compared values must be an iv, and the other one must
1231 be an invariant. */
1232 if (!iv0 || !iv1)
1233 goto end;
1234
1235 if (integer_zerop (iv0->step))
1236 {
1237 /* Control variable may be on the other side. */
1238 tmp_op = op0; op0 = op1; op1 = tmp_op;
1239 tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
8b11a64c 1240 }
b697aed4
ZD
1241 ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1242
1243end:
1244 if (control_var)
1245 *control_var = op0;;
1246 if (iv_var)
1247 *iv_var = iv0;;
1248 if (bound)
1249 *bound = op1;
1250 if (iv_bound)
1251 *iv_bound = iv1;
1252
1253 return ret;
1254}
1255
726a989a
RB
1256/* Checks whether the condition in STMT is interesting and if so,
1257 records it. */
b697aed4
ZD
1258
1259static void
726a989a 1260find_interesting_uses_cond (struct ivopts_data *data, gimple stmt)
b697aed4
ZD
1261{
1262 tree *var_p, *bound_p;
1263 struct iv *var_iv, *civ;
8b11a64c 1264
726a989a 1265 if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
8b11a64c 1266 {
b697aed4
ZD
1267 find_interesting_uses_op (data, *var_p);
1268 find_interesting_uses_op (data, *bound_p);
8b11a64c
ZD
1269 return;
1270 }
1271
5ed6ace5 1272 civ = XNEW (struct iv);
b697aed4 1273 *civ = *var_iv;
726a989a 1274 record_use (data, NULL, civ, stmt, USE_COMPARE);
8b11a64c
ZD
1275}
1276
be35cf60 1277/* Returns true if expression EXPR is obviously invariant in LOOP,
6a732743
SP
1278 i.e. if all its operands are defined outside of the LOOP. LOOP
1279 should not be the function body. */
be35cf60 1280
feb075f4 1281bool
be35cf60
ZD
1282expr_invariant_in_loop_p (struct loop *loop, tree expr)
1283{
1284 basic_block def_bb;
1285 unsigned i, len;
1286
6a732743
SP
1287 gcc_assert (loop_depth (loop) > 0);
1288
be35cf60
ZD
1289 if (is_gimple_min_invariant (expr))
1290 return true;
1291
1292 if (TREE_CODE (expr) == SSA_NAME)
1293 {
726a989a 1294 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
be35cf60
ZD
1295 if (def_bb
1296 && flow_bb_inside_loop_p (loop, def_bb))
1297 return false;
1298
1299 return true;
1300 }
1301
726a989a 1302 if (!EXPR_P (expr))
be35cf60
ZD
1303 return false;
1304
5039610b 1305 len = TREE_OPERAND_LENGTH (expr);
be35cf60
ZD
1306 for (i = 0; i < len; i++)
1307 if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1308 return false;
1309
1310 return true;
1311}
1312
726a989a
RB
1313/* Returns true if statement STMT is obviously invariant in LOOP,
1314 i.e. if all its operands on the RHS are defined outside of the LOOP.
1315 LOOP should not be the function body. */
1316
1317bool
1318stmt_invariant_in_loop_p (struct loop *loop, gimple stmt)
1319{
1320 unsigned i;
1321 tree lhs;
1322
1323 gcc_assert (loop_depth (loop) > 0);
1324
1325 lhs = gimple_get_lhs (stmt);
1326 for (i = 0; i < gimple_num_ops (stmt); i++)
1327 {
1328 tree op = gimple_op (stmt, i);
1329 if (op != lhs && !expr_invariant_in_loop_p (loop, op))
1330 return false;
1331 }
1332
1333 return true;
1334}
1335
8b11a64c
ZD
1336/* Cumulates the steps of indices into DATA and replaces their values with the
1337 initial ones. Returns false when the value of the index cannot be determined.
1338 Callback for for_each_index. */
1339
1340struct ifs_ivopts_data
1341{
1342 struct ivopts_data *ivopts_data;
726a989a 1343 gimple stmt;
6e42ce54 1344 tree step;
8b11a64c
ZD
1345};
1346
1347static bool
1348idx_find_step (tree base, tree *idx, void *data)
1349{
c22940cd 1350 struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
8b11a64c 1351 struct iv *iv;
d7f5de76 1352 tree step, iv_base, iv_step, lbound, off;
2f4675b4 1353 struct loop *loop = dta->ivopts_data->current_loop;
be35cf60
ZD
1354
1355 if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF
1356 || TREE_CODE (base) == ALIGN_INDIRECT_REF)
1357 return false;
1358
1359 /* If base is a component ref, require that the offset of the reference
3a7c155d 1360 be invariant. */
be35cf60
ZD
1361 if (TREE_CODE (base) == COMPONENT_REF)
1362 {
1363 off = component_ref_field_offset (base);
1364 return expr_invariant_in_loop_p (loop, off);
1365 }
1366
1367 /* If base is array, first check whether we will be able to move the
1368 reference out of the loop (in order to take its address in strength
1369 reduction). In order for this to work we need both lower bound
1370 and step to be loop invariants. */
9f7ccf69 1371 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
be35cf60 1372 {
9f7ccf69
EB
1373 /* Moreover, for a range, the size needs to be invariant as well. */
1374 if (TREE_CODE (base) == ARRAY_RANGE_REF
1375 && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1376 return false;
1377
be35cf60
ZD
1378 step = array_ref_element_size (base);
1379 lbound = array_ref_low_bound (base);
1380
1381 if (!expr_invariant_in_loop_p (loop, step)
1382 || !expr_invariant_in_loop_p (loop, lbound))
1383 return false;
1384 }
1385
8b11a64c
ZD
1386 if (TREE_CODE (*idx) != SSA_NAME)
1387 return true;
1388
1389 iv = get_iv (dta->ivopts_data, *idx);
1390 if (!iv)
1391 return false;
1392
ea643120
RG
1393 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1394 *&x[0], which is not folded and does not trigger the
1395 ARRAY_REF path below. */
8b11a64c
ZD
1396 *idx = iv->base;
1397
6e42ce54 1398 if (integer_zerop (iv->step))
8b11a64c
ZD
1399 return true;
1400
9f7ccf69 1401 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2f4675b4
ZD
1402 {
1403 step = array_ref_element_size (base);
2f4675b4
ZD
1404
1405 /* We only handle addresses whose step is an integer constant. */
1406 if (TREE_CODE (step) != INTEGER_CST)
1407 return false;
2f4675b4 1408 }
8b11a64c 1409 else
5212068f 1410 /* The step for pointer arithmetics already is 1 byte. */
1c1205fb 1411 step = build_int_cst (sizetype, 1);
8b11a64c 1412
d7f5de76
ZD
1413 iv_base = iv->base;
1414 iv_step = iv->step;
1415 if (!convert_affine_scev (dta->ivopts_data->current_loop,
1416 sizetype, &iv_base, &iv_step, dta->stmt,
1417 false))
8b11a64c
ZD
1418 {
1419 /* The index might wrap. */
1420 return false;
1421 }
1422
1c1205fb 1423 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
6e42ce54 1424 dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
8b11a64c
ZD
1425
1426 return true;
1427}
1428
1429/* Records use in index IDX. Callback for for_each_index. Ivopts data
1430 object is passed to it in DATA. */
1431
1432static bool
2f4675b4 1433idx_record_use (tree base, tree *idx,
c22940cd 1434 void *vdata)
8b11a64c 1435{
c22940cd 1436 struct ivopts_data *data = (struct ivopts_data *) vdata;
8b11a64c 1437 find_interesting_uses_op (data, *idx);
9f7ccf69 1438 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2f4675b4
ZD
1439 {
1440 find_interesting_uses_op (data, array_ref_element_size (base));
1441 find_interesting_uses_op (data, array_ref_low_bound (base));
1442 }
8b11a64c
ZD
1443 return true;
1444}
1445
32159434
CB
1446/* If we can prove that TOP = cst * BOT for some constant cst,
1447 store cst to MUL and return true. Otherwise return false.
1448 The returned value is always sign-extended, regardless of the
1449 signedness of TOP and BOT. */
1450
1451static bool
1452constant_multiple_of (tree top, tree bot, double_int *mul)
1453{
1454 tree mby;
1455 enum tree_code code;
1456 double_int res, p0, p1;
1457 unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1458
1459 STRIP_NOPS (top);
1460 STRIP_NOPS (bot);
1461
1462 if (operand_equal_p (top, bot, 0))
1463 {
1464 *mul = double_int_one;
1465 return true;
1466 }
1467
1468 code = TREE_CODE (top);
1469 switch (code)
1470 {
1471 case MULT_EXPR:
1472 mby = TREE_OPERAND (top, 1);
1473 if (TREE_CODE (mby) != INTEGER_CST)
1474 return false;
1475
1476 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1477 return false;
1478
1479 *mul = double_int_sext (double_int_mul (res, tree_to_double_int (mby)),
1480 precision);
1481 return true;
1482
1483 case PLUS_EXPR:
1484 case MINUS_EXPR:
1485 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1486 || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1487 return false;
1488
1489 if (code == MINUS_EXPR)
1490 p1 = double_int_neg (p1);
1491 *mul = double_int_sext (double_int_add (p0, p1), precision);
1492 return true;
1493
1494 case INTEGER_CST:
1495 if (TREE_CODE (bot) != INTEGER_CST)
1496 return false;
1497
1498 p0 = double_int_sext (tree_to_double_int (top), precision);
1499 p1 = double_int_sext (tree_to_double_int (bot), precision);
1500 if (double_int_zero_p (p1))
1501 return false;
1502 *mul = double_int_sext (double_int_sdivmod (p0, p1, FLOOR_DIV_EXPR, &res),
1503 precision);
1504 return double_int_zero_p (res);
1505
1506 default:
1507 return false;
1508 }
1509}
1510
1511/* Returns true if memory reference REF with step STEP may be unaligned. */
0a915e3d
ZD
1512
1513static bool
32159434 1514may_be_unaligned_p (tree ref, tree step)
0a915e3d
ZD
1515{
1516 tree base;
1517 tree base_type;
1518 HOST_WIDE_INT bitsize;
1519 HOST_WIDE_INT bitpos;
1520 tree toffset;
1521 enum machine_mode mode;
1522 int unsignedp, volatilep;
1523 unsigned base_align;
1524
ac182688 1525 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
607fb860 1526 thus they are not misaligned. */
ac182688
ZD
1527 if (TREE_CODE (ref) == TARGET_MEM_REF)
1528 return false;
1529
0a915e3d
ZD
1530 /* The test below is basically copy of what expr.c:normal_inner_ref
1531 does to check whether the object must be loaded by parts when
1532 STRICT_ALIGNMENT is true. */
1533 base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1534 &unsignedp, &volatilep, true);
1535 base_type = TREE_TYPE (base);
1536 base_align = TYPE_ALIGN (base_type);
1537
32159434
CB
1538 if (mode != BLKmode)
1539 {
1540 double_int mul;
1541 tree al = build_int_cst (TREE_TYPE (step),
1542 GET_MODE_ALIGNMENT (mode) / BITS_PER_UNIT);
1543
1544 if (base_align < GET_MODE_ALIGNMENT (mode)
0a915e3d 1545 || bitpos % GET_MODE_ALIGNMENT (mode) != 0
32159434
CB
1546 || bitpos % BITS_PER_UNIT != 0)
1547 return true;
1548
80fa012e 1549 if (!constant_multiple_of (step, al, &mul))
32159434
CB
1550 return true;
1551 }
0a915e3d
ZD
1552
1553 return false;
1554}
1555
75715cf6
EB
1556/* Return true if EXPR may be non-addressable. */
1557
1558static bool
1559may_be_nonaddressable_p (tree expr)
1560{
1561 switch (TREE_CODE (expr))
1562 {
928bc34f
EB
1563 case TARGET_MEM_REF:
1564 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1565 target, thus they are always addressable. */
1566 return false;
1567
75715cf6
EB
1568 case COMPONENT_REF:
1569 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1570 || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1571
75715cf6
EB
1572 case VIEW_CONVERT_EXPR:
1573 /* This kind of view-conversions may wrap non-addressable objects
1574 and make them look addressable. After some processing the
1575 non-addressability may be uncovered again, causing ADDR_EXPRs
1576 of inappropriate objects to be built. */
7a4fbffc
EB
1577 if (is_gimple_reg (TREE_OPERAND (expr, 0))
1578 || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
1579 return true;
1580
1581 /* ... fall through ... */
928bc34f
EB
1582
1583 case ARRAY_REF:
1584 case ARRAY_RANGE_REF:
7a4fbffc 1585 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
928bc34f 1586
1043771b 1587 CASE_CONVERT:
928bc34f 1588 return true;
75715cf6
EB
1589
1590 default:
1591 break;
1592 }
1593
1594 return false;
1595}
1596
8b11a64c
ZD
1597/* Finds addresses in *OP_P inside STMT. */
1598
1599static void
726a989a 1600find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p)
8b11a64c 1601{
6e42ce54 1602 tree base = *op_p, step = build_int_cst (sizetype, 0);
8b11a64c
ZD
1603 struct iv *civ;
1604 struct ifs_ivopts_data ifs_ivopts_data;
1605
e3cc7254
ZD
1606 /* Do not play with volatile memory references. A bit too conservative,
1607 perhaps, but safe. */
726a989a 1608 if (gimple_has_volatile_ops (stmt))
e3cc7254
ZD
1609 goto fail;
1610
8b11a64c
ZD
1611 /* Ignore bitfields for now. Not really something terribly complicated
1612 to handle. TODO. */
75715cf6
EB
1613 if (TREE_CODE (base) == BIT_FIELD_REF)
1614 goto fail;
1615
ac182688
ZD
1616 base = unshare_expr (base);
1617
1618 if (TREE_CODE (base) == TARGET_MEM_REF)
1619 {
1620 tree type = build_pointer_type (TREE_TYPE (base));
1621 tree astep;
1622
1623 if (TMR_BASE (base)
1624 && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1625 {
1626 civ = get_iv (data, TMR_BASE (base));
1627 if (!civ)
1628 goto fail;
1629
1630 TMR_BASE (base) = civ->base;
1631 step = civ->step;
1632 }
1633 if (TMR_INDEX (base)
1634 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1635 {
1636 civ = get_iv (data, TMR_INDEX (base));
1637 if (!civ)
1638 goto fail;
8b11a64c 1639
ac182688
ZD
1640 TMR_INDEX (base) = civ->base;
1641 astep = civ->step;
be35cf60 1642
ac182688
ZD
1643 if (astep)
1644 {
1645 if (TMR_STEP (base))
1646 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1647
6e42ce54 1648 step = fold_build2 (PLUS_EXPR, type, step, astep);
ac182688
ZD
1649 }
1650 }
1651
6e42ce54 1652 if (integer_zerop (step))
ac182688
ZD
1653 goto fail;
1654 base = tree_mem_ref_addr (type, base);
1655 }
1656 else
1657 {
1658 ifs_ivopts_data.ivopts_data = data;
1659 ifs_ivopts_data.stmt = stmt;
6e42ce54 1660 ifs_ivopts_data.step = build_int_cst (sizetype, 0);
ac182688 1661 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
6e42ce54 1662 || integer_zerop (ifs_ivopts_data.step))
ac182688 1663 goto fail;
6e42ce54 1664 step = ifs_ivopts_data.step;
ac182688
ZD
1665
1666 gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
1667 gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1668
928bc34f
EB
1669 /* Check that the base expression is addressable. This needs
1670 to be done after substituting bases of IVs into it. */
1671 if (may_be_nonaddressable_p (base))
1672 goto fail;
1673
1674 /* Moreover, on strict alignment platforms, check that it is
1675 sufficiently aligned. */
32159434 1676 if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
928bc34f
EB
1677 goto fail;
1678
ac182688 1679 base = build_fold_addr_expr (base);
ea643120
RG
1680
1681 /* Substituting bases of IVs into the base expression might
1682 have caused folding opportunities. */
1683 if (TREE_CODE (base) == ADDR_EXPR)
1684 {
1685 tree *ref = &TREE_OPERAND (base, 0);
1686 while (handled_component_p (*ref))
1687 ref = &TREE_OPERAND (*ref, 0);
1688 if (TREE_CODE (*ref) == INDIRECT_REF)
1689 *ref = fold_indirect_ref (*ref);
1690 }
ac182688 1691 }
8b11a64c
ZD
1692
1693 civ = alloc_iv (base, step);
1694 record_use (data, op_p, civ, stmt, USE_ADDRESS);
1695 return;
1696
1697fail:
1698 for_each_index (op_p, idx_record_use, data);
1699}
1700
1701/* Finds and records invariants used in STMT. */
1702
1703static void
726a989a 1704find_invariants_stmt (struct ivopts_data *data, gimple stmt)
8b11a64c 1705{
f47c96aa
AM
1706 ssa_op_iter iter;
1707 use_operand_p use_p;
8b11a64c
ZD
1708 tree op;
1709
f47c96aa 1710 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
8b11a64c 1711 {
f47c96aa 1712 op = USE_FROM_PTR (use_p);
8b11a64c
ZD
1713 record_invariant (data, op, false);
1714 }
1715}
1716
1717/* Finds interesting uses of induction variables in the statement STMT. */
1718
1719static void
726a989a 1720find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt)
8b11a64c
ZD
1721{
1722 struct iv *iv;
726a989a 1723 tree op, *lhs, *rhs;
f47c96aa
AM
1724 ssa_op_iter iter;
1725 use_operand_p use_p;
726a989a 1726 enum tree_code code;
8b11a64c
ZD
1727
1728 find_invariants_stmt (data, stmt);
1729
726a989a 1730 if (gimple_code (stmt) == GIMPLE_COND)
8b11a64c 1731 {
726a989a 1732 find_interesting_uses_cond (data, stmt);
8b11a64c
ZD
1733 return;
1734 }
1735
726a989a 1736 if (is_gimple_assign (stmt))
8b11a64c 1737 {
726a989a
RB
1738 lhs = gimple_assign_lhs_ptr (stmt);
1739 rhs = gimple_assign_rhs1_ptr (stmt);
8b11a64c 1740
726a989a 1741 if (TREE_CODE (*lhs) == SSA_NAME)
8b11a64c
ZD
1742 {
1743 /* If the statement defines an induction variable, the uses are not
1744 interesting by themselves. */
1745
726a989a 1746 iv = get_iv (data, *lhs);
8b11a64c 1747
6e42ce54 1748 if (iv && !integer_zerop (iv->step))
8b11a64c
ZD
1749 return;
1750 }
1751
726a989a
RB
1752 code = gimple_assign_rhs_code (stmt);
1753 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
1754 && (REFERENCE_CLASS_P (*rhs)
1755 || is_gimple_val (*rhs)))
8b11a64c 1756 {
726a989a
RB
1757 if (REFERENCE_CLASS_P (*rhs))
1758 find_interesting_uses_address (data, stmt, rhs);
1759 else
1760 find_interesting_uses_op (data, *rhs);
8b11a64c 1761
726a989a
RB
1762 if (REFERENCE_CLASS_P (*lhs))
1763 find_interesting_uses_address (data, stmt, lhs);
8b11a64c 1764 return;
8b11a64c 1765 }
726a989a 1766 else if (TREE_CODE_CLASS (code) == tcc_comparison)
8b11a64c 1767 {
726a989a 1768 find_interesting_uses_cond (data, stmt);
8b11a64c
ZD
1769 return;
1770 }
2f4675b4
ZD
1771
1772 /* TODO -- we should also handle address uses of type
1773
1774 memory = call (whatever);
1775
1776 and
1777
1778 call (memory). */
8b11a64c
ZD
1779 }
1780
726a989a
RB
1781 if (gimple_code (stmt) == GIMPLE_PHI
1782 && gimple_bb (stmt) == data->current_loop->header)
8b11a64c 1783 {
726a989a 1784 iv = get_iv (data, PHI_RESULT (stmt));
8b11a64c 1785
6e42ce54 1786 if (iv && !integer_zerop (iv->step))
8b11a64c
ZD
1787 return;
1788 }
1789
f47c96aa 1790 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
8b11a64c 1791 {
f47c96aa 1792 op = USE_FROM_PTR (use_p);
8b11a64c
ZD
1793
1794 if (TREE_CODE (op) != SSA_NAME)
1795 continue;
1796
1797 iv = get_iv (data, op);
1798 if (!iv)
1799 continue;
1800
1801 find_interesting_uses_op (data, op);
1802 }
1803}
1804
1805/* Finds interesting uses of induction variables outside of loops
1806 on loop exit edge EXIT. */
1807
1808static void
1809find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1810{
726a989a
RB
1811 gimple phi;
1812 gimple_stmt_iterator psi;
1813 tree def;
8b11a64c 1814
726a989a 1815 for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
8b11a64c 1816 {
726a989a 1817 phi = gsi_stmt (psi);
8b11a64c 1818 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
6e42ce54
ZD
1819 if (is_gimple_reg (def))
1820 find_interesting_uses_op (data, def);
8b11a64c
ZD
1821 }
1822}
1823
1824/* Finds uses of the induction variables that are interesting. */
1825
1826static void
1827find_interesting_uses (struct ivopts_data *data)
1828{
1829 basic_block bb;
726a989a 1830 gimple_stmt_iterator bsi;
8b11a64c
ZD
1831 basic_block *body = get_loop_body (data->current_loop);
1832 unsigned i;
1833 struct version_info *info;
1834 edge e;
1835
1836 if (dump_file && (dump_flags & TDF_DETAILS))
1837 fprintf (dump_file, "Uses:\n\n");
1838
1839 for (i = 0; i < data->current_loop->num_nodes; i++)
1840 {
628f6a4e 1841 edge_iterator ei;
8b11a64c
ZD
1842 bb = body[i];
1843
628f6a4e 1844 FOR_EACH_EDGE (e, ei, bb->succs)
8b11a64c
ZD
1845 if (e->dest != EXIT_BLOCK_PTR
1846 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1847 find_interesting_uses_outside (data, e);
1848
726a989a
RB
1849 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1850 find_interesting_uses_stmt (data, gsi_stmt (bsi));
1851 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
b5b8b0ac
AO
1852 if (!is_gimple_debug (gsi_stmt (bsi)))
1853 find_interesting_uses_stmt (data, gsi_stmt (bsi));
8b11a64c
ZD
1854 }
1855
1856 if (dump_file && (dump_flags & TDF_DETAILS))
1857 {
87c476a2
ZD
1858 bitmap_iterator bi;
1859
8b11a64c
ZD
1860 fprintf (dump_file, "\n");
1861
87c476a2 1862 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
8b11a64c
ZD
1863 {
1864 info = ver_info (data, i);
1865 if (info->inv_id)
1866 {
1867 fprintf (dump_file, " ");
1868 print_generic_expr (dump_file, info->name, TDF_SLIM);
1869 fprintf (dump_file, " is invariant (%d)%s\n",
1870 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1871 }
87c476a2 1872 }
8b11a64c
ZD
1873
1874 fprintf (dump_file, "\n");
1875 }
1876
1877 free (body);
1878}
1879
f5e2738c 1880/* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
9be872b7
ZD
1881 is true, assume we are inside an address. If TOP_COMPREF is true, assume
1882 we are at the top-level of the processed address. */
f5e2738c
ZD
1883
1884static tree
9be872b7
ZD
1885strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
1886 unsigned HOST_WIDE_INT *offset)
f5e2738c 1887{
9be872b7 1888 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
f5e2738c
ZD
1889 enum tree_code code;
1890 tree type, orig_type = TREE_TYPE (expr);
1891 unsigned HOST_WIDE_INT off0, off1, st;
1892 tree orig_expr = expr;
1893
1894 STRIP_NOPS (expr);
9be872b7 1895
f5e2738c
ZD
1896 type = TREE_TYPE (expr);
1897 code = TREE_CODE (expr);
1898 *offset = 0;
1899
1900 switch (code)
1901 {
1902 case INTEGER_CST:
1903 if (!cst_and_fits_in_hwi (expr)
6e682d7e 1904 || integer_zerop (expr))
f5e2738c
ZD
1905 return orig_expr;
1906
1907 *offset = int_cst_value (expr);
ff5e9a94 1908 return build_int_cst (orig_type, 0);
f5e2738c 1909
2d1a1007 1910 case POINTER_PLUS_EXPR:
f5e2738c
ZD
1911 case PLUS_EXPR:
1912 case MINUS_EXPR:
1913 op0 = TREE_OPERAND (expr, 0);
1914 op1 = TREE_OPERAND (expr, 1);
1915
9be872b7
ZD
1916 op0 = strip_offset_1 (op0, false, false, &off0);
1917 op1 = strip_offset_1 (op1, false, false, &off1);
f5e2738c 1918
2d1a1007 1919 *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
f5e2738c
ZD
1920 if (op0 == TREE_OPERAND (expr, 0)
1921 && op1 == TREE_OPERAND (expr, 1))
1922 return orig_expr;
1923
6e682d7e 1924 if (integer_zerop (op1))
f5e2738c 1925 expr = op0;
6e682d7e 1926 else if (integer_zerop (op0))
f5e2738c 1927 {
2d1a1007 1928 if (code == MINUS_EXPR)
9be872b7 1929 expr = fold_build1 (NEGATE_EXPR, type, op1);
2d1a1007
AP
1930 else
1931 expr = op1;
f5e2738c
ZD
1932 }
1933 else
9be872b7 1934 expr = fold_build2 (code, type, op0, op1);
f5e2738c
ZD
1935
1936 return fold_convert (orig_type, expr);
1937
7a2faca1
EB
1938 case MULT_EXPR:
1939 op1 = TREE_OPERAND (expr, 1);
1940 if (!cst_and_fits_in_hwi (op1))
1941 return orig_expr;
1942
1943 op0 = TREE_OPERAND (expr, 0);
1944 op0 = strip_offset_1 (op0, false, false, &off0);
1945 if (op0 == TREE_OPERAND (expr, 0))
1946 return orig_expr;
1947
1948 *offset = off0 * int_cst_value (op1);
1949 if (integer_zerop (op0))
1950 expr = op0;
1951 else
1952 expr = fold_build2 (MULT_EXPR, type, op0, op1);
1953
1954 return fold_convert (orig_type, expr);
1955
f5e2738c 1956 case ARRAY_REF:
9f7ccf69 1957 case ARRAY_RANGE_REF:
f5e2738c
ZD
1958 if (!inside_addr)
1959 return orig_expr;
1960
1961 step = array_ref_element_size (expr);
1962 if (!cst_and_fits_in_hwi (step))
1963 break;
1964
1965 st = int_cst_value (step);
1966 op1 = TREE_OPERAND (expr, 1);
9be872b7 1967 op1 = strip_offset_1 (op1, false, false, &off1);
f5e2738c 1968 *offset = off1 * st;
9be872b7
ZD
1969
1970 if (top_compref
6e682d7e 1971 && integer_zerop (op1))
9be872b7
ZD
1972 {
1973 /* Strip the component reference completely. */
1974 op0 = TREE_OPERAND (expr, 0);
1975 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1976 *offset += off0;
1977 return op0;
1978 }
f5e2738c
ZD
1979 break;
1980
1981 case COMPONENT_REF:
1982 if (!inside_addr)
1983 return orig_expr;
9be872b7
ZD
1984
1985 tmp = component_ref_field_offset (expr);
1986 if (top_compref
1987 && cst_and_fits_in_hwi (tmp))
1988 {
1989 /* Strip the component reference completely. */
1990 op0 = TREE_OPERAND (expr, 0);
1991 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1992 *offset = off0 + int_cst_value (tmp);
1993 return op0;
1994 }
f5e2738c
ZD
1995 break;
1996
1997 case ADDR_EXPR:
9be872b7
ZD
1998 op0 = TREE_OPERAND (expr, 0);
1999 op0 = strip_offset_1 (op0, true, true, &off0);
2000 *offset += off0;
2001
2002 if (op0 == TREE_OPERAND (expr, 0))
2003 return orig_expr;
2004
d5dc1717 2005 expr = build_fold_addr_expr (op0);
9be872b7
ZD
2006 return fold_convert (orig_type, expr);
2007
2008 case INDIRECT_REF:
2009 inside_addr = false;
f5e2738c
ZD
2010 break;
2011
2012 default:
2013 return orig_expr;
2014 }
2015
2016 /* Default handling of expressions for that we want to recurse into
2017 the first operand. */
2018 op0 = TREE_OPERAND (expr, 0);
9be872b7 2019 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
f5e2738c
ZD
2020 *offset += off0;
2021
2022 if (op0 == TREE_OPERAND (expr, 0)
2023 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2024 return orig_expr;
2025
2026 expr = copy_node (expr);
2027 TREE_OPERAND (expr, 0) = op0;
2028 if (op1)
2029 TREE_OPERAND (expr, 1) = op1;
2030
9be872b7 2031 /* Inside address, we might strip the top level component references,
0fa2e4df 2032 thus changing type of the expression. Handling of ADDR_EXPR
9be872b7
ZD
2033 will fix that. */
2034 expr = fold_convert (orig_type, expr);
2035
2036 return expr;
2037}
2038
2039/* Strips constant offsets from EXPR and stores them to OFFSET. */
2040
2041static tree
2042strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2043{
2044 return strip_offset_1 (expr, false, false, offset);
f5e2738c
ZD
2045}
2046
d482f417 2047/* Returns variant of TYPE that can be used as base for different uses.
20527215
ZD
2048 We return unsigned type with the same precision, which avoids problems
2049 with overflows. */
d482f417
ZD
2050
2051static tree
2052generic_type_for (tree type)
2053{
2054 if (POINTER_TYPE_P (type))
20527215 2055 return unsigned_type_for (type);
d482f417
ZD
2056
2057 if (TYPE_UNSIGNED (type))
2058 return type;
2059
2060 return unsigned_type_for (type);
2061}
2062
9be872b7
ZD
2063/* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2064 the bitmap to that we should store it. */
2065
2066static struct ivopts_data *fd_ivopts_data;
2067static tree
2068find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2069{
c22940cd 2070 bitmap *depends_on = (bitmap *) data;
9be872b7
ZD
2071 struct version_info *info;
2072
2073 if (TREE_CODE (*expr_p) != SSA_NAME)
2074 return NULL_TREE;
2075 info = name_info (fd_ivopts_data, *expr_p);
2076
2077 if (!info->inv_id || info->has_nonlin_use)
2078 return NULL_TREE;
2079
2080 if (!*depends_on)
2081 *depends_on = BITMAP_ALLOC (NULL);
2082 bitmap_set_bit (*depends_on, info->inv_id);
2083
2084 return NULL_TREE;
2085}
2086
8b11a64c
ZD
2087/* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2088 position to POS. If USE is not NULL, the candidate is set as related to
2089 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
2090 replacement of the final value of the iv by a direct computation. */
2091
2092static struct iv_cand *
2093add_candidate_1 (struct ivopts_data *data,
2094 tree base, tree step, bool important, enum iv_position pos,
726a989a 2095 struct iv_use *use, gimple incremented_at)
8b11a64c
ZD
2096{
2097 unsigned i;
2098 struct iv_cand *cand = NULL;
d482f417 2099 tree type, orig_type;
8b11a64c
ZD
2100
2101 if (base)
2102 {
d482f417
ZD
2103 orig_type = TREE_TYPE (base);
2104 type = generic_type_for (orig_type);
71adbef3 2105 if (type != orig_type)
8b11a64c 2106 {
8b11a64c 2107 base = fold_convert (type, base);
6e42ce54 2108 step = fold_convert (type, step);
8b11a64c
ZD
2109 }
2110 }
2111
2112 for (i = 0; i < n_iv_cands (data); i++)
2113 {
2114 cand = iv_cand (data, i);
2115
2116 if (cand->pos != pos)
2117 continue;
2118
2c08497a
BS
2119 if (cand->incremented_at != incremented_at
2120 || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2121 && cand->ainc_use != use))
8b11a64c
ZD
2122 continue;
2123
2124 if (!cand->iv)
2125 {
2126 if (!base && !step)
2127 break;
2128
2129 continue;
2130 }
2131
2132 if (!base && !step)
2133 continue;
2134
6e42ce54
ZD
2135 if (operand_equal_p (base, cand->iv->base, 0)
2136 && operand_equal_p (step, cand->iv->step, 0))
2137 break;
8b11a64c
ZD
2138 }
2139
2140 if (i == n_iv_cands (data))
2141 {
5ed6ace5 2142 cand = XCNEW (struct iv_cand);
8b11a64c
ZD
2143 cand->id = i;
2144
2145 if (!base && !step)
2146 cand->iv = NULL;
2147 else
2148 cand->iv = alloc_iv (base, step);
2149
2150 cand->pos = pos;
2151 if (pos != IP_ORIGINAL && cand->iv)
2152 {
2153 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2154 cand->var_after = cand->var_before;
2155 }
2156 cand->important = important;
2157 cand->incremented_at = incremented_at;
69ebd99d 2158 VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
8b11a64c 2159
9be872b7
ZD
2160 if (step
2161 && TREE_CODE (step) != INTEGER_CST)
2162 {
2163 fd_ivopts_data = data;
2164 walk_tree (&step, find_depends, &cand->depends_on, NULL);
2165 }
2166
2c08497a
BS
2167 if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2168 cand->ainc_use = use;
2169 else
2170 cand->ainc_use = NULL;
2171
8b11a64c
ZD
2172 if (dump_file && (dump_flags & TDF_DETAILS))
2173 dump_cand (dump_file, cand);
2174 }
2175
2176 if (important && !cand->important)
2177 {
2178 cand->important = true;
2179 if (dump_file && (dump_flags & TDF_DETAILS))
2180 fprintf (dump_file, "Candidate %d is important\n", cand->id);
2181 }
2182
2183 if (use)
2184 {
2185 bitmap_set_bit (use->related_cands, i);
2186 if (dump_file && (dump_flags & TDF_DETAILS))
2187 fprintf (dump_file, "Candidate %d is related to use %d\n",
2188 cand->id, use->id);
2189 }
2190
2191 return cand;
2192}
2193
4366cf6d
ZD
2194/* Returns true if incrementing the induction variable at the end of the LOOP
2195 is allowed.
2196
2197 The purpose is to avoid splitting latch edge with a biv increment, thus
2198 creating a jump, possibly confusing other optimization passes and leaving
2199 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2200 is not available (so we do not have a better alternative), or if the latch
2201 edge is already nonempty. */
2202
2203static bool
2204allow_ip_end_pos_p (struct loop *loop)
2205{
2206 if (!ip_normal_pos (loop))
2207 return true;
2208
2209 if (!empty_block_p (ip_end_pos (loop)))
2210 return true;
2211
2212 return false;
2213}
2214
2c08497a
BS
2215/* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2216 Important field is set to IMPORTANT. */
2217
2218static void
2219add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2220 bool important, struct iv_use *use)
2221{
2222 basic_block use_bb = gimple_bb (use->stmt);
2223 enum machine_mode mem_mode;
2224 unsigned HOST_WIDE_INT cstepi;
2225
2226 /* If we insert the increment in any position other than the standard
2227 ones, we must ensure that it is incremented once per iteration.
2228 It must not be in an inner nested loop, or one side of an if
2229 statement. */
2230 if (use_bb->loop_father != data->current_loop
2231 || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2232 || stmt_could_throw_p (use->stmt)
2233 || !cst_and_fits_in_hwi (step))
2234 return;
2235
2236 cstepi = int_cst_value (step);
2237
2238 mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2239 if ((HAVE_PRE_INCREMENT && GET_MODE_SIZE (mem_mode) == cstepi)
2240 || (HAVE_PRE_DECREMENT && GET_MODE_SIZE (mem_mode) == -cstepi))
2241 {
2242 enum tree_code code = MINUS_EXPR;
2243 tree new_base;
2244 tree new_step = step;
2245
2246 if (POINTER_TYPE_P (TREE_TYPE (base)))
2247 {
2248 new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2249 code = POINTER_PLUS_EXPR;
2250 }
2251 else
2252 new_step = fold_convert (TREE_TYPE (base), new_step);
2253 new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2254 add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2255 use->stmt);
2256 }
2257 if ((HAVE_POST_INCREMENT && GET_MODE_SIZE (mem_mode) == cstepi)
2258 || (HAVE_POST_DECREMENT && GET_MODE_SIZE (mem_mode) == -cstepi))
2259 {
2260 add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
2261 use->stmt);
2262 }
2263}
2264
8b11a64c
ZD
2265/* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2266 position to POS. If USE is not NULL, the candidate is set as related to
2267 it. The candidate computation is scheduled on all available positions. */
2268
2269static void
2270add_candidate (struct ivopts_data *data,
2271 tree base, tree step, bool important, struct iv_use *use)
2272{
2273 if (ip_normal_pos (data->current_loop))
726a989a 2274 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL);
4366cf6d
ZD
2275 if (ip_end_pos (data->current_loop)
2276 && allow_ip_end_pos_p (data->current_loop))
726a989a 2277 add_candidate_1 (data, base, step, important, IP_END, use, NULL);
2c08497a
BS
2278
2279 if (use != NULL && use->type == USE_ADDRESS)
2280 add_autoinc_candidates (data, base, step, important, use);
8b11a64c
ZD
2281}
2282
39b4020c
RS
2283/* Add a standard "0 + 1 * iteration" iv candidate for a
2284 type with SIZE bits. */
2285
2286static void
2287add_standard_iv_candidates_for_size (struct ivopts_data *data,
2288 unsigned int size)
2289{
2290 tree type = lang_hooks.types.type_for_size (size, true);
2291 add_candidate (data, build_int_cst (type, 0), build_int_cst (type, 1),
2292 true, NULL);
2293}
2294
8b11a64c
ZD
2295/* Adds standard iv candidates. */
2296
2297static void
2298add_standard_iv_candidates (struct ivopts_data *data)
2299{
39b4020c 2300 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE);
8b11a64c 2301
39b4020c
RS
2302 /* The same for a double-integer type if it is still fast enough. */
2303 if (BITS_PER_WORD >= INT_TYPE_SIZE * 2)
2304 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE * 2);
8b11a64c
ZD
2305}
2306
2307
2308/* Adds candidates bases on the old induction variable IV. */
2309
2310static void
2311add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2312{
726a989a
RB
2313 gimple phi;
2314 tree def;
8b11a64c
ZD
2315 struct iv_cand *cand;
2316
2317 add_candidate (data, iv->base, iv->step, true, NULL);
2318
2319 /* The same, but with initial value zero. */
1a00e5f7
JJ
2320 if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
2321 add_candidate (data, size_int (0), iv->step, true, NULL);
2322 else
2323 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2324 iv->step, true, NULL);
8b11a64c
ZD
2325
2326 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
726a989a 2327 if (gimple_code (phi) == GIMPLE_PHI)
8b11a64c
ZD
2328 {
2329 /* Additionally record the possibility of leaving the original iv
2330 untouched. */
2331 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2332 cand = add_candidate_1 (data,
2333 iv->base, iv->step, true, IP_ORIGINAL, NULL,
2334 SSA_NAME_DEF_STMT (def));
2335 cand->var_before = iv->ssa_name;
2336 cand->var_after = def;
2337 }
2338}
2339
2340/* Adds candidates based on the old induction variables. */
2341
2342static void
2343add_old_ivs_candidates (struct ivopts_data *data)
2344{
2345 unsigned i;
2346 struct iv *iv;
87c476a2 2347 bitmap_iterator bi;
8b11a64c 2348
87c476a2 2349 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
8b11a64c
ZD
2350 {
2351 iv = ver_info (data, i)->iv;
6e42ce54 2352 if (iv && iv->biv_p && !integer_zerop (iv->step))
8b11a64c 2353 add_old_iv_candidates (data, iv);
87c476a2 2354 }
8b11a64c
ZD
2355}
2356
2357/* Adds candidates based on the value of the induction variable IV and USE. */
2358
2359static void
2360add_iv_value_candidates (struct ivopts_data *data,
2361 struct iv *iv, struct iv_use *use)
2362{
f5e2738c 2363 unsigned HOST_WIDE_INT offset;
9be872b7 2364 tree base;
1ffe34d9 2365 tree basetype;
8b11a64c 2366
9be872b7 2367 add_candidate (data, iv->base, iv->step, false, use);
be35cf60 2368
9be872b7
ZD
2369 /* The same, but with initial value zero. Make such variable important,
2370 since it is generic enough so that possibly many uses may be based
2371 on it. */
1ffe34d9
AP
2372 basetype = TREE_TYPE (iv->base);
2373 if (POINTER_TYPE_P (basetype))
2374 basetype = sizetype;
2375 add_candidate (data, build_int_cst (basetype, 0),
9be872b7 2376 iv->step, true, use);
8b11a64c 2377
7eeef08e
RG
2378 /* Third, try removing the constant offset. Make sure to even
2379 add a candidate for &a[0] vs. (T *)&a. */
9be872b7 2380 base = strip_offset (iv->base, &offset);
7eeef08e
RG
2381 if (offset
2382 || base != iv->base)
f5e2738c 2383 add_candidate (data, base, iv->step, false, use);
8b11a64c
ZD
2384}
2385
8b11a64c
ZD
2386/* Adds candidates based on the uses. */
2387
2388static void
2389add_derived_ivs_candidates (struct ivopts_data *data)
2390{
2391 unsigned i;
2392
2393 for (i = 0; i < n_iv_uses (data); i++)
2394 {
2395 struct iv_use *use = iv_use (data, i);
2396
2397 if (!use)
2398 continue;
2399
2400 switch (use->type)
2401 {
2402 case USE_NONLINEAR_EXPR:
2403 case USE_COMPARE:
9be872b7 2404 case USE_ADDRESS:
8b11a64c
ZD
2405 /* Just add the ivs based on the value of the iv used here. */
2406 add_iv_value_candidates (data, use->iv, use);
2407 break;
2408
8b11a64c 2409 default:
1e128c5f 2410 gcc_unreachable ();
8b11a64c
ZD
2411 }
2412 }
2413}
2414
b1b02be2
ZD
2415/* Record important candidates and add them to related_cands bitmaps
2416 if needed. */
2417
2418static void
2419record_important_candidates (struct ivopts_data *data)
2420{
2421 unsigned i;
2422 struct iv_use *use;
2423
2424 for (i = 0; i < n_iv_cands (data); i++)
2425 {
2426 struct iv_cand *cand = iv_cand (data, i);
2427
2428 if (cand->important)
2429 bitmap_set_bit (data->important_candidates, i);
2430 }
2431
2432 data->consider_all_candidates = (n_iv_cands (data)
2433 <= CONSIDER_ALL_CANDIDATES_BOUND);
2434
2435 if (data->consider_all_candidates)
2436 {
2437 /* We will not need "related_cands" bitmaps in this case,
2438 so release them to decrease peak memory consumption. */
2439 for (i = 0; i < n_iv_uses (data); i++)
2440 {
2441 use = iv_use (data, i);
8bdbfff5 2442 BITMAP_FREE (use->related_cands);
b1b02be2
ZD
2443 }
2444 }
2445 else
2446 {
2447 /* Add important candidates to the related_cands bitmaps. */
2448 for (i = 0; i < n_iv_uses (data); i++)
2449 bitmap_ior_into (iv_use (data, i)->related_cands,
2450 data->important_candidates);
2451 }
2452}
2453
8b11a64c
ZD
2454/* Allocates the data structure mapping the (use, candidate) pairs to costs.
2455 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2456 we allocate a simple list to every use. */
2457
2458static void
2459alloc_use_cost_map (struct ivopts_data *data)
2460{
b1b02be2 2461 unsigned i, size, s, j;
8b11a64c
ZD
2462
2463 for (i = 0; i < n_iv_uses (data); i++)
2464 {
2465 struct iv_use *use = iv_use (data, i);
87c476a2 2466 bitmap_iterator bi;
8b11a64c
ZD
2467
2468 if (data->consider_all_candidates)
b1b02be2 2469 size = n_iv_cands (data);
8b11a64c
ZD
2470 else
2471 {
b1b02be2 2472 s = 0;
87c476a2
ZD
2473 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2474 {
b1b02be2 2475 s++;
87c476a2 2476 }
b1b02be2
ZD
2477
2478 /* Round up to the power of two, so that moduling by it is fast. */
2479 for (size = 1; size < s; size <<= 1)
2480 continue;
8b11a64c
ZD
2481 }
2482
b1b02be2 2483 use->n_map_members = size;
5ed6ace5 2484 use->cost_map = XCNEWVEC (struct cost_pair, size);
8b11a64c
ZD
2485 }
2486}
2487
6e8c65f6
ZD
2488/* Returns description of computation cost of expression whose runtime
2489 cost is RUNTIME and complexity corresponds to COMPLEXITY. */
2490
2491static comp_cost
2492new_cost (unsigned runtime, unsigned complexity)
2493{
2494 comp_cost cost;
2495
2496 cost.cost = runtime;
2497 cost.complexity = complexity;
2498
2499 return cost;
2500}
2501
2502/* Adds costs COST1 and COST2. */
2503
2504static comp_cost
2505add_costs (comp_cost cost1, comp_cost cost2)
2506{
2507 cost1.cost += cost2.cost;
2508 cost1.complexity += cost2.complexity;
2509
2510 return cost1;
2511}
2512/* Subtracts costs COST1 and COST2. */
2513
2514static comp_cost
2515sub_costs (comp_cost cost1, comp_cost cost2)
2516{
2517 cost1.cost -= cost2.cost;
2518 cost1.complexity -= cost2.complexity;
2519
2520 return cost1;
2521}
2522
2523/* Returns a negative number if COST1 < COST2, a positive number if
2524 COST1 > COST2, and 0 if COST1 = COST2. */
2525
2526static int
2527compare_costs (comp_cost cost1, comp_cost cost2)
2528{
2529 if (cost1.cost == cost2.cost)
2530 return cost1.complexity - cost2.complexity;
2531
2532 return cost1.cost - cost2.cost;
2533}
2534
2535/* Returns true if COST is infinite. */
2536
2537static bool
2538infinite_cost_p (comp_cost cost)
2539{
2540 return cost.cost == INFTY;
2541}
2542
8b11a64c 2543/* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
f5f12961 2544 on invariants DEPENDS_ON and that the value used in expressing it
2c08497a 2545 is VALUE. */
8b11a64c
ZD
2546
2547static void
2548set_use_iv_cost (struct ivopts_data *data,
6e8c65f6
ZD
2549 struct iv_use *use, struct iv_cand *cand,
2550 comp_cost cost, bitmap depends_on, tree value)
8b11a64c 2551{
b1b02be2
ZD
2552 unsigned i, s;
2553
6e8c65f6 2554 if (infinite_cost_p (cost))
8b11a64c 2555 {
8bdbfff5 2556 BITMAP_FREE (depends_on);
b1b02be2 2557 return;
8b11a64c
ZD
2558 }
2559
2560 if (data->consider_all_candidates)
2561 {
2562 use->cost_map[cand->id].cand = cand;
2563 use->cost_map[cand->id].cost = cost;
2564 use->cost_map[cand->id].depends_on = depends_on;
f5f12961 2565 use->cost_map[cand->id].value = value;
8b11a64c
ZD
2566 return;
2567 }
2568
b1b02be2
ZD
2569 /* n_map_members is a power of two, so this computes modulo. */
2570 s = cand->id & (use->n_map_members - 1);
2571 for (i = s; i < use->n_map_members; i++)
2572 if (!use->cost_map[i].cand)
2573 goto found;
2574 for (i = 0; i < s; i++)
2575 if (!use->cost_map[i].cand)
2576 goto found;
2577
2578 gcc_unreachable ();
8b11a64c 2579
b1b02be2
ZD
2580found:
2581 use->cost_map[i].cand = cand;
2582 use->cost_map[i].cost = cost;
2583 use->cost_map[i].depends_on = depends_on;
f5f12961 2584 use->cost_map[i].value = value;
8b11a64c
ZD
2585}
2586
b1b02be2 2587/* Gets cost of (USE, CANDIDATE) pair. */
8b11a64c 2588
b1b02be2
ZD
2589static struct cost_pair *
2590get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2591 struct iv_cand *cand)
8b11a64c 2592{
b1b02be2
ZD
2593 unsigned i, s;
2594 struct cost_pair *ret;
8b11a64c
ZD
2595
2596 if (!cand)
b1b02be2 2597 return NULL;
8b11a64c
ZD
2598
2599 if (data->consider_all_candidates)
8b11a64c 2600 {
b1b02be2
ZD
2601 ret = use->cost_map + cand->id;
2602 if (!ret->cand)
2603 return NULL;
8b11a64c 2604
b1b02be2 2605 return ret;
8b11a64c 2606 }
b1b02be2
ZD
2607
2608 /* n_map_members is a power of two, so this computes modulo. */
2609 s = cand->id & (use->n_map_members - 1);
2610 for (i = s; i < use->n_map_members; i++)
2611 if (use->cost_map[i].cand == cand)
2612 return use->cost_map + i;
8b11a64c 2613
b1b02be2
ZD
2614 for (i = 0; i < s; i++)
2615 if (use->cost_map[i].cand == cand)
2616 return use->cost_map + i;
2617
2618 return NULL;
8b11a64c
ZD
2619}
2620
2621/* Returns estimate on cost of computing SEQ. */
2622
2623static unsigned
f40751dd 2624seq_cost (rtx seq, bool speed)
8b11a64c
ZD
2625{
2626 unsigned cost = 0;
2627 rtx set;
2628
2629 for (; seq; seq = NEXT_INSN (seq))
2630 {
2631 set = single_set (seq);
2632 if (set)
f40751dd 2633 cost += rtx_cost (set, SET,speed);
8b11a64c
ZD
2634 else
2635 cost++;
2636 }
2637
2638 return cost;
2639}
2640
8679c649
JH
2641/* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2642static rtx
2643produce_memory_decl_rtl (tree obj, int *regno)
2644{
09e881c9 2645 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
d4ebfa65 2646 enum machine_mode address_mode = targetm.addr_space.address_mode (as);
8679c649 2647 rtx x;
e16acfcd
NS
2648
2649 gcc_assert (obj);
8679c649
JH
2650 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2651 {
2652 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
d4ebfa65 2653 x = gen_rtx_SYMBOL_REF (address_mode, name);
8318b0d9
RH
2654 SET_SYMBOL_REF_DECL (x, obj);
2655 x = gen_rtx_MEM (DECL_MODE (obj), x);
09e881c9 2656 set_mem_addr_space (x, as);
8318b0d9 2657 targetm.encode_section_info (obj, x, true);
8679c649
JH
2658 }
2659 else
8318b0d9 2660 {
d4ebfa65 2661 x = gen_raw_REG (address_mode, (*regno)++);
8318b0d9 2662 x = gen_rtx_MEM (DECL_MODE (obj), x);
09e881c9 2663 set_mem_addr_space (x, as);
8318b0d9 2664 }
8679c649 2665
8318b0d9 2666 return x;
8679c649
JH
2667}
2668
8b11a64c
ZD
2669/* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2670 walk_tree. DATA contains the actual fake register number. */
2671
2672static tree
2673prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2674{
2675 tree obj = NULL_TREE;
2676 rtx x = NULL_RTX;
c22940cd 2677 int *regno = (int *) data;
8b11a64c
ZD
2678
2679 switch (TREE_CODE (*expr_p))
2680 {
8679c649
JH
2681 case ADDR_EXPR:
2682 for (expr_p = &TREE_OPERAND (*expr_p, 0);
afe84921
RH
2683 handled_component_p (*expr_p);
2684 expr_p = &TREE_OPERAND (*expr_p, 0))
2685 continue;
8679c649 2686 obj = *expr_p;
aacd3885 2687 if (DECL_P (obj) && !DECL_RTL_SET_P (obj))
8679c649
JH
2688 x = produce_memory_decl_rtl (obj, regno);
2689 break;
2690
8b11a64c
ZD
2691 case SSA_NAME:
2692 *ws = 0;
2693 obj = SSA_NAME_VAR (*expr_p);
2694 if (!DECL_RTL_SET_P (obj))
2695 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2696 break;
2697
2698 case VAR_DECL:
2699 case PARM_DECL:
2700 case RESULT_DECL:
2701 *ws = 0;
2702 obj = *expr_p;
2703
2704 if (DECL_RTL_SET_P (obj))
2705 break;
2706
2707 if (DECL_MODE (obj) == BLKmode)
8679c649 2708 x = produce_memory_decl_rtl (obj, regno);
8b11a64c
ZD
2709 else
2710 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2711
2712 break;
2713
2714 default:
2715 break;
2716 }
2717
2718 if (x)
2719 {
69ebd99d 2720 VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
8b11a64c
ZD
2721 SET_DECL_RTL (obj, x);
2722 }
2723
2724 return NULL_TREE;
2725}
2726
2727/* Determines cost of the computation of EXPR. */
2728
2729static unsigned
f40751dd 2730computation_cost (tree expr, bool speed)
8b11a64c
ZD
2731{
2732 rtx seq, rslt;
2733 tree type = TREE_TYPE (expr);
2734 unsigned cost;
1d27fed4
DD
2735 /* Avoid using hard regs in ways which may be unsupported. */
2736 int regno = LAST_VIRTUAL_REGISTER + 1;
f40751dd 2737 enum function_frequency real_frequency = cfun->function_frequency;
8b11a64c 2738
f40751dd
JH
2739 cfun->function_frequency = FUNCTION_FREQUENCY_NORMAL;
2740 crtl->maybe_hot_insn_p = speed;
8b11a64c
ZD
2741 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2742 start_sequence ();
2743 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2744 seq = get_insns ();
2745 end_sequence ();
f40751dd
JH
2746 default_rtl_profile ();
2747 cfun->function_frequency = real_frequency;
8b11a64c 2748
f40751dd 2749 cost = seq_cost (seq, speed);
2ca202e7 2750 if (MEM_P (rslt))
09e881c9
BE
2751 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
2752 TYPE_ADDR_SPACE (type), speed);
8b11a64c
ZD
2753
2754 return cost;
2755}
2756
2757/* Returns variable containing the value of candidate CAND at statement AT. */
2758
2759static tree
726a989a 2760var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple stmt)
8b11a64c
ZD
2761{
2762 if (stmt_after_increment (loop, cand, stmt))
2763 return cand->var_after;
2764 else
2765 return cand->var_before;
2766}
2767
9be872b7
ZD
2768/* Return the most significant (sign) bit of T. Similar to tree_int_cst_msb,
2769 but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE. */
2770
d7bf3bcf 2771int
fa233e34 2772tree_int_cst_sign_bit (const_tree t)
9be872b7
ZD
2773{
2774 unsigned bitno = TYPE_PRECISION (TREE_TYPE (t)) - 1;
2775 unsigned HOST_WIDE_INT w;
2776
2777 if (bitno < HOST_BITS_PER_WIDE_INT)
2778 w = TREE_INT_CST_LOW (t);
2779 else
2780 {
2781 w = TREE_INT_CST_HIGH (t);
2782 bitno -= HOST_BITS_PER_WIDE_INT;
2783 }
2784
2785 return (w >> bitno) & 1;
2786}
2787
b67102ae
ZD
2788/* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2789 same precision that is at least as wide as the precision of TYPE, stores
2790 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
2791 type of A and B. */
2792
2793static tree
2794determine_common_wider_type (tree *a, tree *b)
2795{
2796 tree wider_type = NULL;
2797 tree suba, subb;
2798 tree atype = TREE_TYPE (*a);
2799
1043771b 2800 if (CONVERT_EXPR_P (*a))
b67102ae
ZD
2801 {
2802 suba = TREE_OPERAND (*a, 0);
2803 wider_type = TREE_TYPE (suba);
2804 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
2805 return atype;
2806 }
2807 else
2808 return atype;
2809
1043771b 2810 if (CONVERT_EXPR_P (*b))
b67102ae
ZD
2811 {
2812 subb = TREE_OPERAND (*b, 0);
2813 if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
2814 return atype;
2815 }
2816 else
2817 return atype;
2818
2819 *a = suba;
2820 *b = subb;
2821 return wider_type;
2822}
2823
8b11a64c 2824/* Determines the expression by that USE is expressed from induction variable
ac182688
ZD
2825 CAND at statement AT in LOOP. The expression is stored in a decomposed
2826 form into AFF. Returns false if USE cannot be expressed using CAND. */
8b11a64c 2827
ac182688
ZD
2828static bool
2829get_computation_aff (struct loop *loop,
726a989a 2830 struct iv_use *use, struct iv_cand *cand, gimple at,
ac182688 2831 struct affine_tree_combination *aff)
8b11a64c 2832{
2f4675b4
ZD
2833 tree ubase = use->iv->base;
2834 tree ustep = use->iv->step;
2835 tree cbase = cand->iv->base;
73f30c63 2836 tree cstep = cand->iv->step, cstep_common;
8b11a64c 2837 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
73f30c63 2838 tree common_type, var;
8b11a64c 2839 tree uutype;
73f30c63 2840 aff_tree cbase_aff, var_aff;
f414f2f3 2841 double_int rat;
8b11a64c
ZD
2842
2843 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2844 {
2845 /* We do not have a precision to express the values of use. */
ac182688 2846 return false;
8b11a64c
ZD
2847 }
2848
73f30c63
ZD
2849 var = var_at_stmt (loop, cand, at);
2850 uutype = unsigned_type_for (utype);
8b11a64c 2851
73f30c63
ZD
2852 /* If the conversion is not noop, perform it. */
2853 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
8b11a64c 2854 {
8b11a64c 2855 cstep = fold_convert (uutype, cstep);
73f30c63
ZD
2856 cbase = fold_convert (uutype, cbase);
2857 var = fold_convert (uutype, var);
9be872b7 2858 }
9be872b7 2859
73f30c63
ZD
2860 if (!constant_multiple_of (ustep, cstep, &rat))
2861 return false;
8b11a64c 2862
b67102ae
ZD
2863 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
2864 type, we achieve better folding by computing their difference in this
2865 wider type, and cast the result to UUTYPE. We do not need to worry about
2866 overflows, as all the arithmetics will in the end be performed in UUTYPE
2867 anyway. */
2868 common_type = determine_common_wider_type (&ubase, &cbase);
2869
73f30c63
ZD
2870 /* use = ubase - ratio * cbase + ratio * var. */
2871 tree_to_aff_combination (ubase, common_type, aff);
2872 tree_to_aff_combination (cbase, common_type, &cbase_aff);
2873 tree_to_aff_combination (var, uutype, &var_aff);
8b11a64c 2874
73f30c63
ZD
2875 /* We need to shift the value if we are after the increment. */
2876 if (stmt_after_increment (loop, cand, at))
8b11a64c 2877 {
73f30c63
ZD
2878 aff_tree cstep_aff;
2879
2880 if (common_type != uutype)
2881 cstep_common = fold_convert (common_type, cstep);
9be872b7 2882 else
73f30c63 2883 cstep_common = cstep;
ac182688 2884
73f30c63
ZD
2885 tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
2886 aff_combination_add (&cbase_aff, &cstep_aff);
8b11a64c 2887 }
8b11a64c 2888
73f30c63 2889 aff_combination_scale (&cbase_aff, double_int_neg (rat));
ac182688 2890 aff_combination_add (aff, &cbase_aff);
b67102ae 2891 if (common_type != uutype)
73f30c63
ZD
2892 aff_combination_convert (aff, uutype);
2893
2894 aff_combination_scale (&var_aff, rat);
2895 aff_combination_add (aff, &var_aff);
ac182688
ZD
2896
2897 return true;
2898}
2899
2900/* Determines the expression by that USE is expressed from induction variable
2901 CAND at statement AT in LOOP. The computation is unshared. */
2902
2903static tree
2904get_computation_at (struct loop *loop,
726a989a 2905 struct iv_use *use, struct iv_cand *cand, gimple at)
ac182688 2906{
73f30c63 2907 aff_tree aff;
ac182688
ZD
2908 tree type = TREE_TYPE (use->iv->base);
2909
2910 if (!get_computation_aff (loop, use, cand, at, &aff))
2911 return NULL_TREE;
2912 unshare_aff_combination (&aff);
2913 return fold_convert (type, aff_combination_to_tree (&aff));
8b11a64c
ZD
2914}
2915
2916/* Determines the expression by that USE is expressed from induction variable
ac182688 2917 CAND in LOOP. The computation is unshared. */
8b11a64c
ZD
2918
2919static tree
2920get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
2921{
2922 return get_computation_at (loop, use, cand, use->stmt);
2923}
2924
8b11a64c
ZD
2925/* Returns cost of addition in MODE. */
2926
2927static unsigned
f40751dd 2928add_cost (enum machine_mode mode, bool speed)
8b11a64c
ZD
2929{
2930 static unsigned costs[NUM_MACHINE_MODES];
2931 rtx seq;
2932 unsigned cost;
2933
2934 if (costs[mode])
2935 return costs[mode];
2936
2937 start_sequence ();
2938 force_operand (gen_rtx_fmt_ee (PLUS, mode,
03daa27a
ZD
2939 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
2940 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 2)),
8b11a64c
ZD
2941 NULL_RTX);
2942 seq = get_insns ();
2943 end_sequence ();
2944
f40751dd 2945 cost = seq_cost (seq, speed);
8b11a64c
ZD
2946 if (!cost)
2947 cost = 1;
2948
2949 costs[mode] = cost;
2950
2951 if (dump_file && (dump_flags & TDF_DETAILS))
2952 fprintf (dump_file, "Addition in %s costs %d\n",
2953 GET_MODE_NAME (mode), cost);
2954 return cost;
2955}
2956
2957/* Entry in a hashtable of already known costs for multiplication. */
2958struct mbc_entry
2959{
2960 HOST_WIDE_INT cst; /* The constant to multiply by. */
2961 enum machine_mode mode; /* In mode. */
2962 unsigned cost; /* The cost. */
2963};
2964
2965/* Counts hash value for the ENTRY. */
2966
2967static hashval_t
2968mbc_entry_hash (const void *entry)
2969{
c22940cd 2970 const struct mbc_entry *e = (const struct mbc_entry *) entry;
8b11a64c
ZD
2971
2972 return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
2973}
2974
2975/* Compares the hash table entries ENTRY1 and ENTRY2. */
2976
2977static int
2978mbc_entry_eq (const void *entry1, const void *entry2)
2979{
c22940cd
TN
2980 const struct mbc_entry *e1 = (const struct mbc_entry *) entry1;
2981 const struct mbc_entry *e2 = (const struct mbc_entry *) entry2;
8b11a64c
ZD
2982
2983 return (e1->mode == e2->mode
2984 && e1->cst == e2->cst);
2985}
2986
2987/* Returns cost of multiplication by constant CST in MODE. */
2988
ac182688 2989unsigned
f40751dd 2990multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode, bool speed)
8b11a64c
ZD
2991{
2992 static htab_t costs;
2993 struct mbc_entry **cached, act;
2994 rtx seq;
2995 unsigned cost;
2996
2997 if (!costs)
2998 costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
2999
3000 act.mode = mode;
3001 act.cst = cst;
3002 cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
3003 if (*cached)
3004 return (*cached)->cost;
3005
5ed6ace5 3006 *cached = XNEW (struct mbc_entry);
8b11a64c
ZD
3007 (*cached)->mode = mode;
3008 (*cached)->cst = cst;
3009
3010 start_sequence ();
03daa27a
ZD
3011 expand_mult (mode, gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3012 gen_int_mode (cst, mode), NULL_RTX, 0);
8b11a64c
ZD
3013 seq = get_insns ();
3014 end_sequence ();
3015
f40751dd 3016 cost = seq_cost (seq, speed);
8b11a64c
ZD
3017
3018 if (dump_file && (dump_flags & TDF_DETAILS))
3019 fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
3020 (int) cst, GET_MODE_NAME (mode), cost);
3021
3022 (*cached)->cost = cost;
3023
3024 return cost;
3025}
3026
b1dd92fd 3027/* Returns true if multiplying by RATIO is allowed in an address. Test the
09e881c9
BE
3028 validity for a memory reference accessing memory of mode MODE in
3029 address space AS. */
3030
3031DEF_VEC_P (sbitmap);
3032DEF_VEC_ALLOC_P (sbitmap, heap);
ac182688
ZD
3033
3034bool
09e881c9
BE
3035multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode,
3036 addr_space_t as)
ac182688
ZD
3037{
3038#define MAX_RATIO 128
09e881c9
BE
3039 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3040 static VEC (sbitmap, heap) *valid_mult_list;
3041 sbitmap valid_mult;
3042
3043 if (data_index >= VEC_length (sbitmap, valid_mult_list))
3044 VEC_safe_grow_cleared (sbitmap, heap, valid_mult_list, data_index + 1);
3045
3046 valid_mult = VEC_index (sbitmap, valid_mult_list, data_index);
3047 if (!valid_mult)
ac182688 3048 {
d4ebfa65
BE
3049 enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3050 rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
ac182688
ZD
3051 rtx addr;
3052 HOST_WIDE_INT i;
3053
09e881c9
BE
3054 valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3055 sbitmap_zero (valid_mult);
d4ebfa65 3056 addr = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
ac182688
ZD
3057 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3058 {
d4ebfa65 3059 XEXP (addr, 1) = gen_int_mode (i, address_mode);
09e881c9
BE
3060 if (memory_address_addr_space_p (mode, addr, as))
3061 SET_BIT (valid_mult, i + MAX_RATIO);
ac182688
ZD
3062 }
3063
3064 if (dump_file && (dump_flags & TDF_DETAILS))
3065 {
3066 fprintf (dump_file, " allowed multipliers:");
3067 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
09e881c9 3068 if (TEST_BIT (valid_mult, i + MAX_RATIO))
ac182688
ZD
3069 fprintf (dump_file, " %d", (int) i);
3070 fprintf (dump_file, "\n");
3071 fprintf (dump_file, "\n");
3072 }
09e881c9
BE
3073
3074 VEC_replace (sbitmap, valid_mult_list, data_index, valid_mult);
ac182688
ZD
3075 }
3076
3077 if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3078 return false;
3079
09e881c9 3080 return TEST_BIT (valid_mult, ratio + MAX_RATIO);
ac182688
ZD
3081}
3082
8b11a64c
ZD
3083/* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3084 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
b1dd92fd 3085 variable is omitted. Compute the cost for a memory reference that accesses
09e881c9 3086 a memory location of mode MEM_MODE in address space AS.
b1dd92fd 3087
2c08497a
BS
3088 MAY_AUTOINC is set to true if the autoincrement (increasing index by
3089 size of MEM_MODE / RATIO) is available. To make this determination, we
3090 look at the size of the increment to be made, which is given in CSTEP.
3091 CSTEP may be zero if the step is unknown.
3092 STMT_AFTER_INC is true iff the statement we're looking at is after the
3093 increment of the original biv.
3094
8b11a64c
ZD
3095 TODO -- there must be some better way. This all is quite crude. */
3096
09e881c9
BE
3097typedef struct
3098{
3099 HOST_WIDE_INT min_offset, max_offset;
3100 unsigned costs[2][2][2][2];
3101} *address_cost_data;
3102
3103DEF_VEC_P (address_cost_data);
3104DEF_VEC_ALLOC_P (address_cost_data, heap);
3105
6e8c65f6 3106static comp_cost
8b11a64c 3107get_address_cost (bool symbol_present, bool var_present,
dd5f63f8 3108 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
09e881c9
BE
3109 HOST_WIDE_INT cstep, enum machine_mode mem_mode,
3110 addr_space_t as, bool speed,
2c08497a 3111 bool stmt_after_inc, bool *may_autoinc)
8b11a64c 3112{
d4ebfa65 3113 enum machine_mode address_mode = targetm.addr_space.address_mode (as);
09e881c9
BE
3114 static VEC(address_cost_data, heap) *address_cost_data_list;
3115 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3116 address_cost_data data;
2c08497a
BS
3117 static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3118 static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
6e8c65f6 3119 unsigned cost, acost, complexity;
2c08497a
BS
3120 bool offset_p, ratio_p, autoinc;
3121 HOST_WIDE_INT s_offset, autoinc_offset, msize;
8b11a64c
ZD
3122 unsigned HOST_WIDE_INT mask;
3123 unsigned bits;
3124
09e881c9
BE
3125 if (data_index >= VEC_length (address_cost_data, address_cost_data_list))
3126 VEC_safe_grow_cleared (address_cost_data, heap, address_cost_data_list,
3127 data_index + 1);
3128
3129 data = VEC_index (address_cost_data, address_cost_data_list, data_index);
3130 if (!data)
8b11a64c
ZD
3131 {
3132 HOST_WIDE_INT i;
b1dd92fd 3133 HOST_WIDE_INT start = BIGGEST_ALIGNMENT / BITS_PER_UNIT;
09e881c9 3134 HOST_WIDE_INT rat, off;
975626a7
ZD
3135 int old_cse_not_expected;
3136 unsigned sym_p, var_p, off_p, rat_p, add_c;
3137 rtx seq, addr, base;
3138 rtx reg0, reg1;
3139
09e881c9 3140 data = (address_cost_data) xcalloc (1, sizeof (*data));
8b11a64c 3141
d4ebfa65 3142 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
8b11a64c 3143
d4ebfa65 3144 addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
b1dd92fd 3145 for (i = start; i <= 1 << 20; i <<= 1)
8b11a64c 3146 {
d4ebfa65 3147 XEXP (addr, 1) = gen_int_mode (i, address_mode);
09e881c9 3148 if (!memory_address_addr_space_p (mem_mode, addr, as))
8b11a64c
ZD
3149 break;
3150 }
09e881c9
BE
3151 data->max_offset = i == start ? 0 : i >> 1;
3152 off = data->max_offset;
8b11a64c 3153
b1dd92fd 3154 for (i = start; i <= 1 << 20; i <<= 1)
8b11a64c 3155 {
d4ebfa65 3156 XEXP (addr, 1) = gen_int_mode (-i, address_mode);
09e881c9 3157 if (!memory_address_addr_space_p (mem_mode, addr, as))
8b11a64c
ZD
3158 break;
3159 }
09e881c9 3160 data->min_offset = i == start ? 0 : -(i >> 1);
8b11a64c
ZD
3161
3162 if (dump_file && (dump_flags & TDF_DETAILS))
3163 {
3164 fprintf (dump_file, "get_address_cost:\n");
dd5f63f8
BS
3165 fprintf (dump_file, " min offset %s %d\n",
3166 GET_MODE_NAME (mem_mode),
09e881c9 3167 (int) data->min_offset);
dd5f63f8
BS
3168 fprintf (dump_file, " max offset %s %d\n",
3169 GET_MODE_NAME (mem_mode),
09e881c9 3170 (int) data->max_offset);
8b11a64c
ZD
3171 }
3172
09e881c9 3173 rat = 1;
ac182688 3174 for (i = 2; i <= MAX_RATIO; i++)
09e881c9 3175 if (multiplier_allowed_in_address_p (i, mem_mode, as))
ac182688 3176 {
09e881c9 3177 rat = i;
ac182688
ZD
3178 break;
3179 }
975626a7
ZD
3180
3181 /* Compute the cost of various addressing modes. */
3182 acost = 0;
d4ebfa65
BE
3183 reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3184 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
975626a7 3185
2c08497a
BS
3186 if (HAVE_PRE_DECREMENT)
3187 {
d4ebfa65 3188 addr = gen_rtx_PRE_DEC (address_mode, reg0);
09e881c9
BE
3189 has_predec[mem_mode]
3190 = memory_address_addr_space_p (mem_mode, addr, as);
2c08497a
BS
3191 }
3192 if (HAVE_POST_DECREMENT)
3193 {
d4ebfa65 3194 addr = gen_rtx_POST_DEC (address_mode, reg0);
09e881c9
BE
3195 has_postdec[mem_mode]
3196 = memory_address_addr_space_p (mem_mode, addr, as);
2c08497a
BS
3197 }
3198 if (HAVE_PRE_INCREMENT)
3199 {
d4ebfa65 3200 addr = gen_rtx_PRE_INC (address_mode, reg0);
09e881c9
BE
3201 has_preinc[mem_mode]
3202 = memory_address_addr_space_p (mem_mode, addr, as);
2c08497a
BS
3203 }
3204 if (HAVE_POST_INCREMENT)
3205 {
d4ebfa65 3206 addr = gen_rtx_POST_INC (address_mode, reg0);
09e881c9
BE
3207 has_postinc[mem_mode]
3208 = memory_address_addr_space_p (mem_mode, addr, as);
2c08497a 3209 }
975626a7
ZD
3210 for (i = 0; i < 16; i++)
3211 {
3212 sym_p = i & 1;
3213 var_p = (i >> 1) & 1;
3214 off_p = (i >> 2) & 1;
3215 rat_p = (i >> 3) & 1;
3216
3217 addr = reg0;
3218 if (rat_p)
d4ebfa65
BE
3219 addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
3220 gen_int_mode (rat, address_mode));
975626a7
ZD
3221
3222 if (var_p)
d4ebfa65 3223 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
975626a7
ZD
3224
3225 if (sym_p)
3226 {
d4ebfa65 3227 base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
8318b0d9 3228 /* ??? We can run into trouble with some backends by presenting
fa10beec 3229 it with symbols which haven't been properly passed through
8318b0d9
RH
3230 targetm.encode_section_info. By setting the local bit, we
3231 enhance the probability of things working. */
3232 SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3233
975626a7 3234 if (off_p)
d4ebfa65 3235 base = gen_rtx_fmt_e (CONST, address_mode,
09e881c9 3236 gen_rtx_fmt_ee
d4ebfa65
BE
3237 (PLUS, address_mode, base,
3238 gen_int_mode (off, address_mode)));
975626a7
ZD
3239 }
3240 else if (off_p)
d4ebfa65 3241 base = gen_int_mode (off, address_mode);
975626a7
ZD
3242 else
3243 base = NULL_RTX;
3244
3245 if (base)
d4ebfa65 3246 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
2c08497a 3247
975626a7
ZD
3248 start_sequence ();
3249 /* To avoid splitting addressing modes, pretend that no cse will
3250 follow. */
3251 old_cse_not_expected = cse_not_expected;
3252 cse_not_expected = true;
09e881c9 3253 addr = memory_address_addr_space (mem_mode, addr, as);
975626a7
ZD
3254 cse_not_expected = old_cse_not_expected;
3255 seq = get_insns ();
3256 end_sequence ();
3257
f40751dd 3258 acost = seq_cost (seq, speed);
09e881c9 3259 acost += address_cost (addr, mem_mode, as, speed);
975626a7
ZD
3260
3261 if (!acost)
3262 acost = 1;
09e881c9 3263 data->costs[sym_p][var_p][off_p][rat_p] = acost;
975626a7
ZD
3264 }
3265
3266 /* On some targets, it is quite expensive to load symbol to a register,
3267 which makes addresses that contain symbols look much more expensive.
3268 However, the symbol will have to be loaded in any case before the
3269 loop (and quite likely we have it in register already), so it does not
3270 make much sense to penalize them too heavily. So make some final
3271 tweaks for the SYMBOL_PRESENT modes:
3272
3273 If VAR_PRESENT is false, and the mode obtained by changing symbol to
3274 var is cheaper, use this mode with small penalty.
3275 If VAR_PRESENT is true, try whether the mode with
3276 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3277 if this is the case, use it. */
d4ebfa65 3278 add_c = add_cost (address_mode, speed);
975626a7
ZD
3279 for (i = 0; i < 8; i++)
3280 {
3281 var_p = i & 1;
3282 off_p = (i >> 1) & 1;
3283 rat_p = (i >> 2) & 1;
3284
09e881c9 3285 acost = data->costs[0][1][off_p][rat_p] + 1;
975626a7
ZD
3286 if (var_p)
3287 acost += add_c;
3288
09e881c9
BE
3289 if (acost < data->costs[1][var_p][off_p][rat_p])
3290 data->costs[1][var_p][off_p][rat_p] = acost;
975626a7 3291 }
2c08497a 3292
975626a7
ZD
3293 if (dump_file && (dump_flags & TDF_DETAILS))
3294 {
3295 fprintf (dump_file, "Address costs:\n");
3296
3297 for (i = 0; i < 16; i++)
3298 {
3299 sym_p = i & 1;
3300 var_p = (i >> 1) & 1;
3301 off_p = (i >> 2) & 1;
3302 rat_p = (i >> 3) & 1;
3303
3304 fprintf (dump_file, " ");
3305 if (sym_p)
3306 fprintf (dump_file, "sym + ");
3307 if (var_p)
3308 fprintf (dump_file, "var + ");
3309 if (off_p)
3310 fprintf (dump_file, "cst + ");
3311 if (rat_p)
3312 fprintf (dump_file, "rat * ");
3313
09e881c9 3314 acost = data->costs[sym_p][var_p][off_p][rat_p];
975626a7
ZD
3315 fprintf (dump_file, "index costs %d\n", acost);
3316 }
2c08497a
BS
3317 if (has_predec[mem_mode] || has_postdec[mem_mode]
3318 || has_preinc[mem_mode] || has_postinc[mem_mode])
3319 fprintf (dump_file, " May include autoinc/dec\n");
975626a7
ZD
3320 fprintf (dump_file, "\n");
3321 }
09e881c9
BE
3322
3323 VEC_replace (address_cost_data, address_cost_data_list,
3324 data_index, data);
8b11a64c
ZD
3325 }
3326
d4ebfa65 3327 bits = GET_MODE_BITSIZE (address_mode);
8b11a64c
ZD
3328 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3329 offset &= mask;
3330 if ((offset >> (bits - 1) & 1))
3331 offset |= ~mask;
3332 s_offset = offset;
3333
2c08497a
BS
3334 autoinc = false;
3335 msize = GET_MODE_SIZE (mem_mode);
3336 autoinc_offset = offset;
3337 if (stmt_after_inc)
3338 autoinc_offset += ratio * cstep;
3339 if (symbol_present || var_present || ratio != 1)
3340 autoinc = false;
3341 else if ((has_postinc[mem_mode] && autoinc_offset == 0
3342 && msize == cstep)
3343 || (has_postdec[mem_mode] && autoinc_offset == 0
3344 && msize == -cstep)
3345 || (has_preinc[mem_mode] && autoinc_offset == msize
3346 && msize == cstep)
3347 || (has_predec[mem_mode] && autoinc_offset == -msize
3348 && msize == -cstep))
3349 autoinc = true;
3350
8b11a64c 3351 cost = 0;
a85a9024 3352 offset_p = (s_offset != 0
09e881c9
BE
3353 && data->min_offset <= s_offset
3354 && s_offset <= data->max_offset);
8b11a64c 3355 ratio_p = (ratio != 1
09e881c9 3356 && multiplier_allowed_in_address_p (ratio, mem_mode, as));
8b11a64c
ZD
3357
3358 if (ratio != 1 && !ratio_p)
d4ebfa65 3359 cost += multiply_by_cost (ratio, address_mode, speed);
8b11a64c
ZD
3360
3361 if (s_offset && !offset_p && !symbol_present)
d4ebfa65 3362 cost += add_cost (address_mode, speed);
8b11a64c 3363
2c08497a
BS
3364 if (may_autoinc)
3365 *may_autoinc = autoinc;
09e881c9 3366 acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
6e8c65f6
ZD
3367 complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3368 return new_cost (cost + acost, complexity);
8b11a64c 3369}
8b11a64c 3370
3ac01fde
ZD
3371/* Estimates cost of forcing expression EXPR into a variable. */
3372
6e8c65f6 3373static comp_cost
f40751dd 3374force_expr_to_var_cost (tree expr, bool speed)
8b11a64c
ZD
3375{
3376 static bool costs_initialized = false;
f40751dd
JH
3377 static unsigned integer_cost [2];
3378 static unsigned symbol_cost [2];
3379 static unsigned address_cost [2];
7299dbfb 3380 tree op0, op1;
6e8c65f6 3381 comp_cost cost0, cost1, cost;
7299dbfb 3382 enum machine_mode mode;
8b11a64c
ZD
3383
3384 if (!costs_initialized)
3385 {
8b11a64c 3386 tree type = build_pointer_type (integer_type_node);
8318b0d9
RH
3387 tree var, addr;
3388 rtx x;
f40751dd 3389 int i;
8318b0d9
RH
3390
3391 var = create_tmp_var_raw (integer_type_node, "test_var");
3392 TREE_STATIC (var) = 1;
3393 x = produce_memory_decl_rtl (var, NULL);
3394 SET_DECL_RTL (var, x);
8b11a64c 3395
8b11a64c 3396 addr = build1 (ADDR_EXPR, type, var);
8b11a64c 3397
f40751dd
JH
3398
3399 for (i = 0; i < 2; i++)
8b11a64c 3400 {
f40751dd
JH
3401 integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
3402 2000), i);
3403
3404 symbol_cost[i] = computation_cost (addr, i) + 1;
3405
3406 address_cost[i]
3407 = computation_cost (build2 (POINTER_PLUS_EXPR, type,
3408 addr,
3409 build_int_cst (sizetype, 2000)), i) + 1;
3410 if (dump_file && (dump_flags & TDF_DETAILS))
3411 {
3412 fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
3413 fprintf (dump_file, " integer %d\n", (int) integer_cost[i]);
3414 fprintf (dump_file, " symbol %d\n", (int) symbol_cost[i]);
3415 fprintf (dump_file, " address %d\n", (int) address_cost[i]);
3416 fprintf (dump_file, " other %d\n", (int) target_spill_cost[i]);
3417 fprintf (dump_file, "\n");
3418 }
8b11a64c
ZD
3419 }
3420
3421 costs_initialized = true;
3422 }
3423
f5e2738c
ZD
3424 STRIP_NOPS (expr);
3425
8b11a64c 3426 if (SSA_VAR_P (expr))
6e8c65f6 3427 return zero_cost;
8b11a64c 3428
ad6003f2 3429 if (is_gimple_min_invariant (expr))
8b11a64c
ZD
3430 {
3431 if (TREE_CODE (expr) == INTEGER_CST)
f40751dd 3432 return new_cost (integer_cost [speed], 0);
8b11a64c
ZD
3433
3434 if (TREE_CODE (expr) == ADDR_EXPR)
3435 {
3436 tree obj = TREE_OPERAND (expr, 0);
3437
3438 if (TREE_CODE (obj) == VAR_DECL
3439 || TREE_CODE (obj) == PARM_DECL
3440 || TREE_CODE (obj) == RESULT_DECL)
f40751dd 3441 return new_cost (symbol_cost [speed], 0);
8b11a64c
ZD
3442 }
3443
f40751dd 3444 return new_cost (address_cost [speed], 0);
8b11a64c
ZD
3445 }
3446
7299dbfb
ZD
3447 switch (TREE_CODE (expr))
3448 {
5be014d5 3449 case POINTER_PLUS_EXPR:
7299dbfb
ZD
3450 case PLUS_EXPR:
3451 case MINUS_EXPR:
3452 case MULT_EXPR:
3453 op0 = TREE_OPERAND (expr, 0);
3454 op1 = TREE_OPERAND (expr, 1);
f5e2738c
ZD
3455 STRIP_NOPS (op0);
3456 STRIP_NOPS (op1);
7299dbfb
ZD
3457
3458 if (is_gimple_val (op0))
6e8c65f6 3459 cost0 = zero_cost;
7299dbfb 3460 else
f40751dd 3461 cost0 = force_expr_to_var_cost (op0, speed);
7299dbfb
ZD
3462
3463 if (is_gimple_val (op1))
6e8c65f6 3464 cost1 = zero_cost;
7299dbfb 3465 else
f40751dd 3466 cost1 = force_expr_to_var_cost (op1, speed);
7299dbfb
ZD
3467
3468 break;
3469
7a2faca1
EB
3470 case NEGATE_EXPR:
3471 op0 = TREE_OPERAND (expr, 0);
3472 STRIP_NOPS (op0);
3473 op1 = NULL_TREE;
3474
3475 if (is_gimple_val (op0))
3476 cost0 = zero_cost;
3477 else
3478 cost0 = force_expr_to_var_cost (op0, speed);
3479
3480 cost1 = zero_cost;
3481 break;
3482
7299dbfb
ZD
3483 default:
3484 /* Just an arbitrary value, FIXME. */
f40751dd 3485 return new_cost (target_spill_cost[speed], 0);
7299dbfb
ZD
3486 }
3487
3488 mode = TYPE_MODE (TREE_TYPE (expr));
3489 switch (TREE_CODE (expr))
3490 {
5be014d5 3491 case POINTER_PLUS_EXPR:
7299dbfb
ZD
3492 case PLUS_EXPR:
3493 case MINUS_EXPR:
7a2faca1 3494 case NEGATE_EXPR:
f40751dd 3495 cost = new_cost (add_cost (mode, speed), 0);
7299dbfb
ZD
3496 break;
3497
3498 case MULT_EXPR:
3499 if (cst_and_fits_in_hwi (op0))
f40751dd
JH
3500 cost = new_cost (multiply_by_cost (int_cst_value (op0), mode, speed), 0);
3501 else if (cst_and_fits_in_hwi (op1))
3502 cost = new_cost (multiply_by_cost (int_cst_value (op1), mode, speed), 0);
7299dbfb 3503 else
f40751dd 3504 return new_cost (target_spill_cost [speed], 0);
7299dbfb
ZD
3505 break;
3506
3507 default:
3508 gcc_unreachable ();
3509 }
3510
6e8c65f6
ZD
3511 cost = add_costs (cost, cost0);
3512 cost = add_costs (cost, cost1);
7299dbfb
ZD
3513
3514 /* Bound the cost by target_spill_cost. The parts of complicated
3515 computations often are either loop invariant or at least can
3516 be shared between several iv uses, so letting this grow without
3517 limits would not give reasonable results. */
2c08497a 3518 if (cost.cost > (int) target_spill_cost [speed])
f40751dd 3519 cost.cost = target_spill_cost [speed];
6e8c65f6
ZD
3520
3521 return cost;
8b11a64c
ZD
3522}
3523
3ac01fde
ZD
3524/* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
3525 invariants the computation depends on. */
3526
6e8c65f6 3527static comp_cost
3ac01fde
ZD
3528force_var_cost (struct ivopts_data *data,
3529 tree expr, bitmap *depends_on)
3530{
3531 if (depends_on)
3532 {
3533 fd_ivopts_data = data;
3534 walk_tree (&expr, find_depends, depends_on, NULL);
3535 }
3536
f40751dd 3537 return force_expr_to_var_cost (expr, data->speed);
3ac01fde
ZD
3538}
3539
8b11a64c
ZD
3540/* Estimates cost of expressing address ADDR as var + symbol + offset. The
3541 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3542 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3543 invariants the computation depends on. */
3544
6e8c65f6 3545static comp_cost
8b11a64c
ZD
3546split_address_cost (struct ivopts_data *data,
3547 tree addr, bool *symbol_present, bool *var_present,
3548 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3549{
2f4675b4
ZD
3550 tree core;
3551 HOST_WIDE_INT bitsize;
3552 HOST_WIDE_INT bitpos;
3553 tree toffset;
3554 enum machine_mode mode;
3555 int unsignedp, volatilep;
3556
3557 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
2614034e 3558 &unsignedp, &volatilep, false);
8b11a64c 3559
2f4675b4
ZD
3560 if (toffset != 0
3561 || bitpos % BITS_PER_UNIT != 0
3562 || TREE_CODE (core) != VAR_DECL)
8b11a64c
ZD
3563 {
3564 *symbol_present = false;
3565 *var_present = true;
3566 fd_ivopts_data = data;
3567 walk_tree (&addr, find_depends, depends_on, NULL);
f40751dd 3568 return new_cost (target_spill_cost[data->speed], 0);
2f4675b4
ZD
3569 }
3570
3571 *offset += bitpos / BITS_PER_UNIT;
8b11a64c
ZD
3572 if (TREE_STATIC (core)
3573 || DECL_EXTERNAL (core))
3574 {
3575 *symbol_present = true;
3576 *var_present = false;
6e8c65f6 3577 return zero_cost;
8b11a64c
ZD
3578 }
3579
3580 *symbol_present = false;
3581 *var_present = true;
6e8c65f6 3582 return zero_cost;
8b11a64c
ZD
3583}
3584
3585/* Estimates cost of expressing difference of addresses E1 - E2 as
3586 var + symbol + offset. The value of offset is added to OFFSET,
3587 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3588 part is missing. DEPENDS_ON is a set of the invariants the computation
3589 depends on. */
3590
6e8c65f6 3591static comp_cost
8b11a64c
ZD
3592ptr_difference_cost (struct ivopts_data *data,
3593 tree e1, tree e2, bool *symbol_present, bool *var_present,
3594 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3595{
2f4675b4 3596 HOST_WIDE_INT diff = 0;
7a2faca1
EB
3597 aff_tree aff_e1, aff_e2;
3598 tree type;
8b11a64c 3599
1e128c5f 3600 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
8b11a64c 3601
7299dbfb 3602 if (ptr_difference_const (e1, e2, &diff))
8b11a64c
ZD
3603 {
3604 *offset += diff;
3605 *symbol_present = false;
3606 *var_present = false;
6e8c65f6 3607 return zero_cost;
8b11a64c
ZD
3608 }
3609
6e8c65f6 3610 if (integer_zerop (e2))
8b11a64c
ZD
3611 return split_address_cost (data, TREE_OPERAND (e1, 0),
3612 symbol_present, var_present, offset, depends_on);
3613
3614 *symbol_present = false;
3615 *var_present = true;
8b11a64c 3616
7a2faca1
EB
3617 type = signed_type_for (TREE_TYPE (e1));
3618 tree_to_aff_combination (e1, type, &aff_e1);
3619 tree_to_aff_combination (e2, type, &aff_e2);
3620 aff_combination_scale (&aff_e2, double_int_minus_one);
3621 aff_combination_add (&aff_e1, &aff_e2);
3622
3623 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
8b11a64c
ZD
3624}
3625
3626/* Estimates cost of expressing difference E1 - E2 as
3627 var + symbol + offset. The value of offset is added to OFFSET,
3628 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3629 part is missing. DEPENDS_ON is a set of the invariants the computation
3630 depends on. */
3631
6e8c65f6 3632static comp_cost
8b11a64c
ZD
3633difference_cost (struct ivopts_data *data,
3634 tree e1, tree e2, bool *symbol_present, bool *var_present,
3635 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3636{
8b11a64c 3637 enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
f5e2738c 3638 unsigned HOST_WIDE_INT off1, off2;
7a2faca1
EB
3639 aff_tree aff_e1, aff_e2;
3640 tree type;
f5e2738c 3641
9be872b7
ZD
3642 e1 = strip_offset (e1, &off1);
3643 e2 = strip_offset (e2, &off2);
f5e2738c 3644 *offset += off1 - off2;
8b11a64c 3645
f5e2738c
ZD
3646 STRIP_NOPS (e1);
3647 STRIP_NOPS (e2);
8b11a64c
ZD
3648
3649 if (TREE_CODE (e1) == ADDR_EXPR)
7a2faca1
EB
3650 return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
3651 offset, depends_on);
8b11a64c
ZD
3652 *symbol_present = false;
3653
3654 if (operand_equal_p (e1, e2, 0))
3655 {
3656 *var_present = false;
6e8c65f6 3657 return zero_cost;
8b11a64c 3658 }
7a2faca1 3659
8b11a64c 3660 *var_present = true;
7a2faca1 3661
6e682d7e 3662 if (integer_zerop (e2))
8b11a64c
ZD
3663 return force_var_cost (data, e1, depends_on);
3664
6e682d7e 3665 if (integer_zerop (e1))
8b11a64c 3666 {
7a2faca1 3667 comp_cost cost = force_var_cost (data, e2, depends_on);
f40751dd 3668 cost.cost += multiply_by_cost (-1, mode, data->speed);
8b11a64c
ZD
3669 return cost;
3670 }
3671
7a2faca1
EB
3672 type = signed_type_for (TREE_TYPE (e1));
3673 tree_to_aff_combination (e1, type, &aff_e1);
3674 tree_to_aff_combination (e2, type, &aff_e2);
3675 aff_combination_scale (&aff_e2, double_int_minus_one);
3676 aff_combination_add (&aff_e1, &aff_e2);
8b11a64c 3677
7a2faca1 3678 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
8b11a64c
ZD
3679}
3680
3681/* Determines the cost of the computation by that USE is expressed
3682 from induction variable CAND. If ADDRESS_P is true, we just need
3683 to create an address from it, otherwise we want to get it into
3684 register. A set of invariants we depend on is stored in
2c08497a
BS
3685 DEPENDS_ON. AT is the statement at that the value is computed.
3686 If CAN_AUTOINC is nonnull, use it to record whether autoinc
3687 addressing is likely. */
8b11a64c 3688
6e8c65f6 3689static comp_cost
8b11a64c
ZD
3690get_computation_cost_at (struct ivopts_data *data,
3691 struct iv_use *use, struct iv_cand *cand,
2c08497a
BS
3692 bool address_p, bitmap *depends_on, gimple at,
3693 bool *can_autoinc)
8b11a64c
ZD
3694{
3695 tree ubase = use->iv->base, ustep = use->iv->step;
3696 tree cbase, cstep;
3697 tree utype = TREE_TYPE (ubase), ctype;
73f30c63 3698 unsigned HOST_WIDE_INT cstepi, offset = 0;
8b11a64c 3699 HOST_WIDE_INT ratio, aratio;
2c08497a 3700 bool var_present, symbol_present, stmt_is_after_inc;
6e8c65f6 3701 comp_cost cost;
73f30c63 3702 double_int rat;
f40751dd 3703 bool speed = optimize_bb_for_speed_p (gimple_bb (at));
8b11a64c
ZD
3704
3705 *depends_on = NULL;
3706
3707 /* Only consider real candidates. */
3708 if (!cand->iv)
6e8c65f6 3709 return infinite_cost;
8b11a64c
ZD
3710
3711 cbase = cand->iv->base;
3712 cstep = cand->iv->step;
3713 ctype = TREE_TYPE (cbase);
3714
3715 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3716 {
3717 /* We do not have a precision to express the values of use. */
6e8c65f6 3718 return infinite_cost;
8b11a64c
ZD
3719 }
3720
e6845c23
ZD
3721 if (address_p)
3722 {
3723 /* Do not try to express address of an object with computation based
3724 on address of a different object. This may cause problems in rtl
3725 level alias analysis (that does not expect this to be happening,
3726 as this is illegal in C), and would be unlikely to be useful
3727 anyway. */
3728 if (use->iv->base_object
3729 && cand->iv->base_object
3730 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
6e8c65f6 3731 return infinite_cost;
e6845c23
ZD
3732 }
3733
7a2faca1 3734 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
8b11a64c
ZD
3735 {
3736 /* TODO -- add direct handling of this case. */
3737 goto fallback;
3738 }
3739
9be872b7
ZD
3740 /* CSTEPI is removed from the offset in case statement is after the
3741 increment. If the step is not constant, we use zero instead.
1e1def1c 3742 This is a bit imprecise (there is the extra addition), but
9be872b7
ZD
3743 redundancy elimination is likely to transform the code so that
3744 it uses value of the variable before increment anyway,
3745 so it is not that much unrealistic. */
3746 if (cst_and_fits_in_hwi (cstep))
3747 cstepi = int_cst_value (cstep);
3748 else
3749 cstepi = 0;
3750
73f30c63 3751 if (!constant_multiple_of (ustep, cstep, &rat))
6e8c65f6 3752 return infinite_cost;
f414f2f3 3753
73f30c63
ZD
3754 if (double_int_fits_in_shwi_p (rat))
3755 ratio = double_int_to_shwi (rat);
3756 else
6e8c65f6 3757 return infinite_cost;
8b11a64c 3758
7a2faca1
EB
3759 STRIP_NOPS (cbase);
3760 ctype = TREE_TYPE (cbase);
3761
8b11a64c
ZD
3762 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
3763 or ratio == 1, it is better to handle this like
3764
3765 ubase - ratio * cbase + ratio * var
3766
3767 (also holds in the case ratio == -1, TODO. */
3768
9be872b7 3769 if (cst_and_fits_in_hwi (cbase))
8b11a64c
ZD
3770 {
3771 offset = - ratio * int_cst_value (cbase);
6e8c65f6
ZD
3772 cost = difference_cost (data,
3773 ubase, build_int_cst (utype, 0),
3774 &symbol_present, &var_present, &offset,
3775 depends_on);
8b11a64c
ZD
3776 }
3777 else if (ratio == 1)
3778 {
6e8c65f6
ZD
3779 cost = difference_cost (data,
3780 ubase, cbase,
3781 &symbol_present, &var_present, &offset,
3782 depends_on);
8b11a64c 3783 }
7a2faca1
EB
3784 else if (address_p
3785 && !POINTER_TYPE_P (ctype)
09e881c9
BE
3786 && multiplier_allowed_in_address_p
3787 (ratio, TYPE_MODE (TREE_TYPE (utype)),
3788 TYPE_ADDR_SPACE (TREE_TYPE (utype))))
7a2faca1
EB
3789 {
3790 cbase
3791 = fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio));
3792 cost = difference_cost (data,
3793 ubase, cbase,
3794 &symbol_present, &var_present, &offset,
3795 depends_on);
3796 }
8b11a64c
ZD
3797 else
3798 {
6e8c65f6 3799 cost = force_var_cost (data, cbase, depends_on);
f40751dd 3800 cost.cost += add_cost (TYPE_MODE (ctype), data->speed);
6e8c65f6
ZD
3801 cost = add_costs (cost,
3802 difference_cost (data,
3803 ubase, build_int_cst (utype, 0),
3804 &symbol_present, &var_present,
3805 &offset, depends_on));
8b11a64c
ZD
3806 }
3807
3808 /* If we are after the increment, the value of the candidate is higher by
3809 one iteration. */
2c08497a
BS
3810 stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
3811 if (stmt_is_after_inc)
8b11a64c
ZD
3812 offset -= ratio * cstepi;
3813
3814 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
7a2faca1
EB
3815 (symbol/var1/const parts may be omitted). If we are looking for an
3816 address, find the cost of addressing this. */
8b11a64c 3817 if (address_p)
7a2faca1
EB
3818 return add_costs (cost,
3819 get_address_cost (symbol_present, var_present,
2c08497a
BS
3820 offset, ratio, cstepi,
3821 TYPE_MODE (TREE_TYPE (utype)),
09e881c9 3822 TYPE_ADDR_SPACE (TREE_TYPE (utype)),
2c08497a
BS
3823 speed, stmt_is_after_inc,
3824 can_autoinc));
8b11a64c
ZD
3825
3826 /* Otherwise estimate the costs for computing the expression. */
8b11a64c
ZD
3827 if (!symbol_present && !var_present && !offset)
3828 {
3829 if (ratio != 1)
f40751dd 3830 cost.cost += multiply_by_cost (ratio, TYPE_MODE (ctype), speed);
8b11a64c
ZD
3831 return cost;
3832 }
3833
7a2faca1
EB
3834 /* Symbol + offset should be compile-time computable so consider that they
3835 are added once to the variable, if present. */
3836 if (var_present && (symbol_present || offset))
3837 cost.cost += add_cost (TYPE_MODE (ctype), speed)
3838 / AVG_LOOP_NITER (data->current_loop);
8b11a64c 3839
6e8c65f6
ZD
3840 /* Having offset does not affect runtime cost in case it is added to
3841 symbol, but it increases complexity. */
3842 if (offset)
3843 cost.complexity++;
3844
7a2faca1
EB
3845 cost.cost += add_cost (TYPE_MODE (ctype), speed);
3846
3847 aratio = ratio > 0 ? ratio : -ratio;
3848 if (aratio != 1)
3849 cost.cost += multiply_by_cost (aratio, TYPE_MODE (ctype), speed);
8b11a64c
ZD
3850
3851fallback:
2c08497a
BS
3852 if (can_autoinc)
3853 *can_autoinc = false;
3854
8b11a64c
ZD
3855 {
3856 /* Just get the expression, expand it and measure the cost. */
3857 tree comp = get_computation_at (data->current_loop, use, cand, at);
3858
3859 if (!comp)
6e8c65f6 3860 return infinite_cost;
8b11a64c
ZD
3861
3862 if (address_p)
3863 comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp);
3864
f40751dd 3865 return new_cost (computation_cost (comp, speed), 0);
8b11a64c
ZD
3866 }
3867}
3868
3869/* Determines the cost of the computation by that USE is expressed
3870 from induction variable CAND. If ADDRESS_P is true, we just need
3871 to create an address from it, otherwise we want to get it into
3872 register. A set of invariants we depend on is stored in
2c08497a
BS
3873 DEPENDS_ON. If CAN_AUTOINC is nonnull, use it to record whether
3874 autoinc addressing is likely. */
8b11a64c 3875
6e8c65f6 3876static comp_cost
8b11a64c
ZD
3877get_computation_cost (struct ivopts_data *data,
3878 struct iv_use *use, struct iv_cand *cand,
2c08497a 3879 bool address_p, bitmap *depends_on, bool *can_autoinc)
8b11a64c
ZD
3880{
3881 return get_computation_cost_at (data,
2c08497a
BS
3882 use, cand, address_p, depends_on, use->stmt,
3883 can_autoinc);
8b11a64c
ZD
3884}
3885
3886/* Determines cost of basing replacement of USE on CAND in a generic
3887 expression. */
3888
b1b02be2 3889static bool
8b11a64c
ZD
3890determine_use_iv_cost_generic (struct ivopts_data *data,
3891 struct iv_use *use, struct iv_cand *cand)
3892{
3893 bitmap depends_on;
6e8c65f6 3894 comp_cost cost;
eec5fec9
ZD
3895
3896 /* The simple case first -- if we need to express value of the preserved
3897 original biv, the cost is 0. This also prevents us from counting the
3898 cost of increment twice -- once at this use and once in the cost of
3899 the candidate. */
3900 if (cand->pos == IP_ORIGINAL
3901 && cand->incremented_at == use->stmt)
3902 {
6e8c65f6 3903 set_use_iv_cost (data, use, cand, zero_cost, NULL, NULL_TREE);
eec5fec9
ZD
3904 return true;
3905 }
8b11a64c 3906
2c08497a 3907 cost = get_computation_cost (data, use, cand, false, &depends_on, NULL);
f5f12961 3908 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
b1b02be2 3909
6e8c65f6 3910 return !infinite_cost_p (cost);
8b11a64c
ZD
3911}
3912
3913/* Determines cost of basing replacement of USE on CAND in an address. */
3914
b1b02be2 3915static bool
8b11a64c
ZD
3916determine_use_iv_cost_address (struct ivopts_data *data,
3917 struct iv_use *use, struct iv_cand *cand)
3918{
3919 bitmap depends_on;
2c08497a
BS
3920 bool can_autoinc;
3921 comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
3922 &can_autoinc);
8b11a64c 3923
2c08497a
BS
3924 if (cand->ainc_use == use)
3925 {
3926 if (can_autoinc)
3927 cost.cost -= cand->cost_step;
3928 /* If we generated the candidate solely for exploiting autoincrement
3929 opportunities, and it turns out it can't be used, set the cost to
3930 infinity to make sure we ignore it. */
3931 else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
3932 cost = infinite_cost;
3933 }
f5f12961 3934 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
b1b02be2 3935
6e8c65f6 3936 return !infinite_cost_p (cost);
8b11a64c
ZD
3937}
3938
7e2ac86c
ZD
3939/* Computes value of candidate CAND at position AT in iteration NITER, and
3940 stores it to VAL. */
8b11a64c 3941
7e2ac86c 3942static void
726a989a 3943cand_value_at (struct loop *loop, struct iv_cand *cand, gimple at, tree niter,
7e2ac86c 3944 aff_tree *val)
8b11a64c 3945{
7e2ac86c
ZD
3946 aff_tree step, delta, nit;
3947 struct iv *iv = cand->iv;
8b11a64c 3948 tree type = TREE_TYPE (iv->base);
1ffe34d9
AP
3949 tree steptype = type;
3950 if (POINTER_TYPE_P (type))
3951 steptype = sizetype;
8b11a64c 3952
1ffe34d9 3953 tree_to_aff_combination (iv->step, steptype, &step);
7e2ac86c 3954 tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
1ffe34d9 3955 aff_combination_convert (&nit, steptype);
7e2ac86c 3956 aff_combination_mult (&nit, &step, &delta);
8b11a64c 3957 if (stmt_after_increment (loop, cand, at))
7e2ac86c 3958 aff_combination_add (&delta, &step);
8b11a64c 3959
7e2ac86c
ZD
3960 tree_to_aff_combination (iv->base, type, val);
3961 aff_combination_add (val, &delta);
8b11a64c
ZD
3962}
3963
ca4c3169
ZD
3964/* Returns period of induction variable iv. */
3965
3966static tree
3967iv_period (struct iv *iv)
3968{
3969 tree step = iv->step, period, type;
3970 tree pow2div;
3971
3972 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
3973
3974 /* Period of the iv is gcd (step, type range). Since type range is power
3975 of two, it suffices to determine the maximum power of two that divides
3976 step. */
3977 pow2div = num_ending_zeros (step);
3978 type = unsigned_type_for (TREE_TYPE (step));
3979
3980 period = build_low_bits_mask (type,
3981 (TYPE_PRECISION (type)
3982 - tree_low_cst (pow2div, 1)));
3983
3984 return period;
3985}
3986
f5f12961
ZD
3987/* Returns the comparison operator used when eliminating the iv USE. */
3988
3989static enum tree_code
3990iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
3991{
3992 struct loop *loop = data->current_loop;
3993 basic_block ex_bb;
3994 edge exit;
3995
726a989a 3996 ex_bb = gimple_bb (use->stmt);
f5f12961
ZD
3997 exit = EDGE_SUCC (ex_bb, 0);
3998 if (flow_bb_inside_loop_p (loop, exit->dest))
3999 exit = EDGE_SUCC (ex_bb, 1);
4000
4001 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4002}
4003
8b11a64c 4004/* Check whether it is possible to express the condition in USE by comparison
f5f12961 4005 of candidate CAND. If so, store the value compared with to BOUND. */
8b11a64c
ZD
4006
4007static bool
ca4c3169 4008may_eliminate_iv (struct ivopts_data *data,
f5f12961 4009 struct iv_use *use, struct iv_cand *cand, tree *bound)
8b11a64c 4010{
e6845c23 4011 basic_block ex_bb;
8b11a64c 4012 edge exit;
cbc012d5 4013 tree nit, period;
ca4c3169 4014 struct loop *loop = data->current_loop;
7e2ac86c 4015 aff_tree bnd;
cbc012d5 4016
9be872b7
ZD
4017 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
4018 return false;
4019
52778e2a
EB
4020 /* For now works only for exits that dominate the loop latch.
4021 TODO: extend to other conditions inside loop body. */
726a989a 4022 ex_bb = gimple_bb (use->stmt);
e6845c23 4023 if (use->stmt != last_stmt (ex_bb)
726a989a
RB
4024 || gimple_code (use->stmt) != GIMPLE_COND
4025 || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
8b11a64c
ZD
4026 return false;
4027
e6845c23
ZD
4028 exit = EDGE_SUCC (ex_bb, 0);
4029 if (flow_bb_inside_loop_p (loop, exit->dest))
4030 exit = EDGE_SUCC (ex_bb, 1);
4031 if (flow_bb_inside_loop_p (loop, exit->dest))
4032 return false;
4033
dcccd88d
ZD
4034 nit = niter_for_exit (data, exit);
4035 if (!nit)
8b11a64c
ZD
4036 return false;
4037
52778e2a
EB
4038 /* Determine whether we can use the variable to test the exit condition.
4039 This is the case iff the period of the induction variable is greater
4040 than the number of iterations for which the exit condition is true. */
ca4c3169 4041 period = iv_period (cand->iv);
8b11a64c 4042
52778e2a
EB
4043 /* If the number of iterations is constant, compare against it directly. */
4044 if (TREE_CODE (nit) == INTEGER_CST)
4045 {
4046 if (!tree_int_cst_lt (nit, period))
4047 return false;
4048 }
4049
4050 /* If not, and if this is the only possible exit of the loop, see whether
4051 we can get a conservative estimate on the number of iterations of the
4052 entire loop and compare against that instead. */
4053 else if (loop_only_exit_p (loop, exit))
4054 {
4055 double_int period_value, max_niter;
4056 if (!estimated_loop_iterations (loop, true, &max_niter))
4057 return false;
4058 period_value = tree_to_double_int (period);
4059 if (double_int_ucmp (max_niter, period_value) >= 0)
4060 return false;
4061 }
4062
4063 /* Otherwise, punt. */
4064 else
8b11a64c
ZD
4065 return false;
4066
7e2ac86c 4067 cand_value_at (loop, cand, use->stmt, nit, &bnd);
771f882e 4068
7e2ac86c 4069 *bound = aff_combination_to_tree (&bnd);
771f882e
ZD
4070 /* It is unlikely that computing the number of iterations using division
4071 would be more profitable than keeping the original induction variable. */
4072 if (expression_expensive_p (*bound))
4073 return false;
8b11a64c
ZD
4074 return true;
4075}
4076
4077/* Determines cost of basing replacement of USE on CAND in a condition. */
4078
b1b02be2 4079static bool
8b11a64c
ZD
4080determine_use_iv_cost_condition (struct ivopts_data *data,
4081 struct iv_use *use, struct iv_cand *cand)
4082{
b697aed4
ZD
4083 tree bound = NULL_TREE;
4084 struct iv *cmp_iv;
4085 bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
6e8c65f6 4086 comp_cost elim_cost, express_cost, cost;
b697aed4 4087 bool ok;
8b11a64c
ZD
4088
4089 /* Only consider real candidates. */
4090 if (!cand->iv)
4091 {
6e8c65f6 4092 set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE);
b1b02be2 4093 return false;
8b11a64c
ZD
4094 }
4095
b697aed4 4096 /* Try iv elimination. */
f5f12961 4097 if (may_eliminate_iv (data, use, cand, &bound))
cbc012d5
ZD
4098 {
4099 elim_cost = force_var_cost (data, bound, &depends_on_elim);
4100 /* The bound is a loop invariant, so it will be only computed
4101 once. */
6e8c65f6 4102 elim_cost.cost /= AVG_LOOP_NITER (data->current_loop);
cbc012d5 4103 }
b697aed4 4104 else
6e8c65f6 4105 elim_cost = infinite_cost;
8b11a64c 4106
b697aed4
ZD
4107 /* Try expressing the original giv. If it is compared with an invariant,
4108 note that we cannot get rid of it. */
726a989a 4109 ok = extract_cond_operands (data, use->stmt, NULL, NULL, NULL, &cmp_iv);
b697aed4 4110 gcc_assert (ok);
8b11a64c 4111
b697aed4 4112 express_cost = get_computation_cost (data, use, cand, false,
2c08497a 4113 &depends_on_express, NULL);
b697aed4
ZD
4114 fd_ivopts_data = data;
4115 walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
f5f12961 4116
b9ff6079
AP
4117 /* Choose the better approach, preferring the eliminated IV. */
4118 if (compare_costs (elim_cost, express_cost) <= 0)
8b11a64c 4119 {
b697aed4
ZD
4120 cost = elim_cost;
4121 depends_on = depends_on_elim;
4122 depends_on_elim = NULL;
4123 }
4124 else
4125 {
4126 cost = express_cost;
4127 depends_on = depends_on_express;
4128 depends_on_express = NULL;
4129 bound = NULL_TREE;
8b11a64c
ZD
4130 }
4131
b697aed4
ZD
4132 set_use_iv_cost (data, use, cand, cost, depends_on, bound);
4133
4134 if (depends_on_elim)
4135 BITMAP_FREE (depends_on_elim);
4136 if (depends_on_express)
4137 BITMAP_FREE (depends_on_express);
4138
6e8c65f6 4139 return !infinite_cost_p (cost);
8b11a64c
ZD
4140}
4141
b1b02be2
ZD
4142/* Determines cost of basing replacement of USE on CAND. Returns false
4143 if USE cannot be based on CAND. */
8b11a64c 4144
b1b02be2 4145static bool
8b11a64c
ZD
4146determine_use_iv_cost (struct ivopts_data *data,
4147 struct iv_use *use, struct iv_cand *cand)
4148{
4149 switch (use->type)
4150 {
4151 case USE_NONLINEAR_EXPR:
b1b02be2 4152 return determine_use_iv_cost_generic (data, use, cand);
8b11a64c 4153
8b11a64c 4154 case USE_ADDRESS:
b1b02be2 4155 return determine_use_iv_cost_address (data, use, cand);
8b11a64c
ZD
4156
4157 case USE_COMPARE:
b1b02be2 4158 return determine_use_iv_cost_condition (data, use, cand);
8b11a64c
ZD
4159
4160 default:
1e128c5f 4161 gcc_unreachable ();
8b11a64c
ZD
4162 }
4163}
4164
2c08497a
BS
4165/* Return true if get_computation_cost indicates that autoincrement is
4166 a possibility for the pair of USE and CAND, false otherwise. */
4167
4168static bool
4169autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
4170 struct iv_cand *cand)
4171{
4172 bitmap depends_on;
4173 bool can_autoinc;
4174 comp_cost cost;
4175
4176 if (use->type != USE_ADDRESS)
4177 return false;
4178
4179 cost = get_computation_cost (data, use, cand, true, &depends_on,
4180 &can_autoinc);
4181
4182 BITMAP_FREE (depends_on);
4183
4184 return !infinite_cost_p (cost) && can_autoinc;
4185}
4186
4187/* Examine IP_ORIGINAL candidates to see if they are incremented next to a
4188 use that allows autoincrement, and set their AINC_USE if possible. */
4189
4190static void
4191set_autoinc_for_original_candidates (struct ivopts_data *data)
4192{
4193 unsigned i, j;
4194
4195 for (i = 0; i < n_iv_cands (data); i++)
4196 {
4197 struct iv_cand *cand = iv_cand (data, i);
4198 struct iv_use *closest = NULL;
4199 if (cand->pos != IP_ORIGINAL)
4200 continue;
4201 for (j = 0; j < n_iv_uses (data); j++)
4202 {
4203 struct iv_use *use = iv_use (data, j);
4204 unsigned uid = gimple_uid (use->stmt);
4205 if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at)
4206 || uid > gimple_uid (cand->incremented_at))
4207 continue;
4208 if (closest == NULL || uid > gimple_uid (closest->stmt))
4209 closest = use;
4210 }
4211 if (closest == NULL || !autoinc_possible_for_pair (data, closest, cand))
4212 continue;
4213 cand->ainc_use = closest;
4214 }
4215}
4216
4217/* Finds the candidates for the induction variables. */
4218
4219static void
4220find_iv_candidates (struct ivopts_data *data)
4221{
4222 /* Add commonly used ivs. */
4223 add_standard_iv_candidates (data);
4224
4225 /* Add old induction variables. */
4226 add_old_ivs_candidates (data);
4227
4228 /* Add induction variables derived from uses. */
4229 add_derived_ivs_candidates (data);
4230
4231 set_autoinc_for_original_candidates (data);
4232
4233 /* Record the important candidates. */
4234 record_important_candidates (data);
4235}
4236
8b11a64c
ZD
4237/* Determines costs of basing the use of the iv on an iv candidate. */
4238
4239static void
4240determine_use_iv_costs (struct ivopts_data *data)
4241{
4242 unsigned i, j;
4243 struct iv_use *use;
4244 struct iv_cand *cand;
8bdbfff5 4245 bitmap to_clear = BITMAP_ALLOC (NULL);
8b11a64c
ZD
4246
4247 alloc_use_cost_map (data);
4248
8b11a64c
ZD
4249 for (i = 0; i < n_iv_uses (data); i++)
4250 {
4251 use = iv_use (data, i);
4252
4253 if (data->consider_all_candidates)
4254 {
4255 for (j = 0; j < n_iv_cands (data); j++)
4256 {
4257 cand = iv_cand (data, j);
4258 determine_use_iv_cost (data, use, cand);
4259 }
4260 }
4261 else
4262 {
87c476a2
ZD
4263 bitmap_iterator bi;
4264
4265 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
8b11a64c
ZD
4266 {
4267 cand = iv_cand (data, j);
b1b02be2
ZD
4268 if (!determine_use_iv_cost (data, use, cand))
4269 bitmap_set_bit (to_clear, j);
87c476a2 4270 }
b1b02be2
ZD
4271
4272 /* Remove the candidates for that the cost is infinite from
4273 the list of related candidates. */
4274 bitmap_and_compl_into (use->related_cands, to_clear);
4275 bitmap_clear (to_clear);
8b11a64c
ZD
4276 }
4277 }
4278
8bdbfff5 4279 BITMAP_FREE (to_clear);
b1b02be2 4280
8b11a64c
ZD
4281 if (dump_file && (dump_flags & TDF_DETAILS))
4282 {
4283 fprintf (dump_file, "Use-candidate costs:\n");
4284
4285 for (i = 0; i < n_iv_uses (data); i++)
4286 {
4287 use = iv_use (data, i);
4288
4289 fprintf (dump_file, "Use %d:\n", i);
6e8c65f6 4290 fprintf (dump_file, " cand\tcost\tcompl.\tdepends on\n");
8b11a64c
ZD
4291 for (j = 0; j < use->n_map_members; j++)
4292 {
4293 if (!use->cost_map[j].cand
6e8c65f6 4294 || infinite_cost_p (use->cost_map[j].cost))
8b11a64c
ZD
4295 continue;
4296
6e8c65f6 4297 fprintf (dump_file, " %d\t%d\t%d\t",
8b11a64c 4298 use->cost_map[j].cand->id,
6e8c65f6
ZD
4299 use->cost_map[j].cost.cost,
4300 use->cost_map[j].cost.complexity);
8b11a64c
ZD
4301 if (use->cost_map[j].depends_on)
4302 bitmap_print (dump_file,
4303 use->cost_map[j].depends_on, "","");
4304 fprintf (dump_file, "\n");
4305 }
4306
4307 fprintf (dump_file, "\n");
4308 }
4309 fprintf (dump_file, "\n");
4310 }
4311}
4312
4313/* Determines cost of the candidate CAND. */
4314
4315static void
4316determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
4317{
6e8c65f6
ZD
4318 comp_cost cost_base;
4319 unsigned cost, cost_step;
4366cf6d 4320 tree base;
8b11a64c
ZD
4321
4322 if (!cand->iv)
4323 {
4324 cand->cost = 0;
4325 return;
4326 }
4327
4328 /* There are two costs associated with the candidate -- its increment
4329 and its initialization. The second is almost negligible for any loop
4330 that rolls enough, so we take it just very little into account. */
4331
4332 base = cand->iv->base;
4333 cost_base = force_var_cost (data, base, NULL);
f40751dd 4334 cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)), data->speed);
8b11a64c 4335
6e8c65f6 4336 cost = cost_step + cost_base.cost / AVG_LOOP_NITER (current_loop);
8b11a64c 4337
6e8c65f6 4338 /* Prefer the original ivs unless we may gain something by replacing it.
fa10beec 4339 The reason is to make debugging simpler; so this is not relevant for
6e8c65f6
ZD
4340 artificial ivs created by other optimization passes. */
4341 if (cand->pos != IP_ORIGINAL
4342 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
4343 cost++;
8b11a64c
ZD
4344
4345 /* Prefer not to insert statements into latch unless there are some
4346 already (so that we do not create unnecessary jumps). */
4366cf6d
ZD
4347 if (cand->pos == IP_END
4348 && empty_block_p (ip_end_pos (data->current_loop)))
6e8c65f6
ZD
4349 cost++;
4350
4351 cand->cost = cost;
2c08497a 4352 cand->cost_step = cost_step;
8b11a64c
ZD
4353}
4354
4355/* Determines costs of computation of the candidates. */
4356
4357static void
4358determine_iv_costs (struct ivopts_data *data)
4359{
4360 unsigned i;
4361
4362 if (dump_file && (dump_flags & TDF_DETAILS))
4363 {
4364 fprintf (dump_file, "Candidate costs:\n");
4365 fprintf (dump_file, " cand\tcost\n");
4366 }
4367
4368 for (i = 0; i < n_iv_cands (data); i++)
4369 {
4370 struct iv_cand *cand = iv_cand (data, i);
4371
4372 determine_iv_cost (data, cand);
4373
4374 if (dump_file && (dump_flags & TDF_DETAILS))
4375 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
4376 }
2c08497a 4377
0f14009a
BS
4378 if (dump_file && (dump_flags & TDF_DETAILS))
4379 fprintf (dump_file, "\n");
8b11a64c
ZD
4380}
4381
4382/* Calculates cost for having SIZE induction variables. */
4383
4384static unsigned
4385ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
4386{
a154b43a
ZD
4387 /* We add size to the cost, so that we prefer eliminating ivs
4388 if possible. */
f40751dd 4389 return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed);
8b11a64c
ZD
4390}
4391
4392/* For each size of the induction variable set determine the penalty. */
4393
4394static void
4395determine_set_costs (struct ivopts_data *data)
4396{
4397 unsigned j, n;
726a989a
RB
4398 gimple phi;
4399 gimple_stmt_iterator psi;
4400 tree op;
8b11a64c 4401 struct loop *loop = data->current_loop;
87c476a2 4402 bitmap_iterator bi;
8b11a64c
ZD
4403
4404 /* We use the following model (definitely improvable, especially the
4405 cost function -- TODO):
4406
4407 We estimate the number of registers available (using MD data), name it A.
4408
4409 We estimate the number of registers used by the loop, name it U. This
4410 number is obtained as the number of loop phi nodes (not counting virtual
4411 registers and bivs) + the number of variables from outside of the loop.
4412
4413 We set a reserve R (free regs that are used for temporary computations,
4414 etc.). For now the reserve is a constant 3.
4415
4416 Let I be the number of induction variables.
4417
4418 -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
4419 make a lot of ivs without a reason).
4420 -- if A - R < U + I <= A, the cost is I * PRES_COST
4421 -- if U + I > A, the cost is I * PRES_COST and
4422 number of uses * SPILL_COST * (U + I - A) / (U + I) is added. */
4423
4424 if (dump_file && (dump_flags & TDF_DETAILS))
4425 {
4426 fprintf (dump_file, "Global costs:\n");
4427 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
f40751dd
JH
4428 fprintf (dump_file, " target_reg_cost %d\n", target_reg_cost[data->speed]);
4429 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost[data->speed]);
8b11a64c
ZD
4430 }
4431
4432 n = 0;
726a989a 4433 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
8b11a64c 4434 {
726a989a 4435 phi = gsi_stmt (psi);
8b11a64c
ZD
4436 op = PHI_RESULT (phi);
4437
4438 if (!is_gimple_reg (op))
4439 continue;
4440
4441 if (get_iv (data, op))
4442 continue;
4443
4444 n++;
4445 }
4446
87c476a2 4447 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
8b11a64c
ZD
4448 {
4449 struct version_info *info = ver_info (data, j);
4450
4451 if (info->inv_id && info->has_nonlin_use)
4452 n++;
87c476a2 4453 }
8b11a64c 4454
9a2ef6b8 4455 data->regs_used = n;
8b11a64c
ZD
4456 if (dump_file && (dump_flags & TDF_DETAILS))
4457 fprintf (dump_file, " regs_used %d\n", n);
4458
4459 if (dump_file && (dump_flags & TDF_DETAILS))
4460 {
4461 fprintf (dump_file, " cost for size:\n");
4462 fprintf (dump_file, " ivs\tcost\n");
4463 for (j = 0; j <= 2 * target_avail_regs; j++)
4464 fprintf (dump_file, " %d\t%d\n", j,
4465 ivopts_global_cost_for_size (data, j));
4466 fprintf (dump_file, "\n");
4467 }
4468}
4469
b1b02be2 4470/* Returns true if A is a cheaper cost pair than B. */
8b11a64c 4471
b1b02be2
ZD
4472static bool
4473cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
8b11a64c 4474{
6e8c65f6
ZD
4475 int cmp;
4476
b1b02be2
ZD
4477 if (!a)
4478 return false;
8b11a64c 4479
b1b02be2
ZD
4480 if (!b)
4481 return true;
4482
6e8c65f6
ZD
4483 cmp = compare_costs (a->cost, b->cost);
4484 if (cmp < 0)
b1b02be2
ZD
4485 return true;
4486
6e8c65f6 4487 if (cmp > 0)
b1b02be2
ZD
4488 return false;
4489
4490 /* In case the costs are the same, prefer the cheaper candidate. */
4491 if (a->cand->cost < b->cand->cost)
4492 return true;
4493
4494 return false;
4495}
4496
4497/* Computes the cost field of IVS structure. */
4498
4499static void
4500iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
4501{
6e8c65f6
ZD
4502 comp_cost cost = ivs->cand_use_cost;
4503 cost.cost += ivs->cand_cost;
4504 cost.cost += ivopts_global_cost_for_size (data, ivs->n_regs);
b1b02be2
ZD
4505
4506 ivs->cost = cost;
4507}
4508
9be872b7
ZD
4509/* Remove invariants in set INVS to set IVS. */
4510
4511static void
4512iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
4513{
4514 bitmap_iterator bi;
4515 unsigned iid;
4516
4517 if (!invs)
4518 return;
4519
4520 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4521 {
4522 ivs->n_invariant_uses[iid]--;
4523 if (ivs->n_invariant_uses[iid] == 0)
4524 ivs->n_regs--;
4525 }
4526}
4527
b1b02be2
ZD
4528/* Set USE not to be expressed by any candidate in IVS. */
4529
4530static void
4531iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
4532 struct iv_use *use)
4533{
9be872b7 4534 unsigned uid = use->id, cid;
b1b02be2 4535 struct cost_pair *cp;
b1b02be2
ZD
4536
4537 cp = ivs->cand_for_use[uid];
4538 if (!cp)
4539 return;
4540 cid = cp->cand->id;
4541
4542 ivs->bad_uses++;
4543 ivs->cand_for_use[uid] = NULL;
4544 ivs->n_cand_uses[cid]--;
4545
4546 if (ivs->n_cand_uses[cid] == 0)
8b11a64c 4547 {
b1b02be2
ZD
4548 bitmap_clear_bit (ivs->cands, cid);
4549 /* Do not count the pseudocandidates. */
4550 if (cp->cand->iv)
4551 ivs->n_regs--;
36f5ada1 4552 ivs->n_cands--;
b1b02be2 4553 ivs->cand_cost -= cp->cand->cost;
9be872b7
ZD
4554
4555 iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
b1b02be2
ZD
4556 }
4557
6e8c65f6 4558 ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
b1b02be2 4559
9be872b7
ZD
4560 iv_ca_set_remove_invariants (ivs, cp->depends_on);
4561 iv_ca_recount_cost (data, ivs);
4562}
4563
4564/* Add invariants in set INVS to set IVS. */
80cad5fa 4565
9be872b7
ZD
4566static void
4567iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
4568{
4569 bitmap_iterator bi;
4570 unsigned iid;
4571
4572 if (!invs)
4573 return;
4574
4575 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
b1b02be2 4576 {
9be872b7
ZD
4577 ivs->n_invariant_uses[iid]++;
4578 if (ivs->n_invariant_uses[iid] == 1)
4579 ivs->n_regs++;
8b11a64c 4580 }
b1b02be2
ZD
4581}
4582
4583/* Set cost pair for USE in set IVS to CP. */
4584
4585static void
4586iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
4587 struct iv_use *use, struct cost_pair *cp)
4588{
9be872b7 4589 unsigned uid = use->id, cid;
b1b02be2
ZD
4590
4591 if (ivs->cand_for_use[uid] == cp)
4592 return;
4593
4594 if (ivs->cand_for_use[uid])
4595 iv_ca_set_no_cp (data, ivs, use);
4596
4597 if (cp)
8b11a64c 4598 {
b1b02be2 4599 cid = cp->cand->id;
8b11a64c 4600
b1b02be2
ZD
4601 ivs->bad_uses--;
4602 ivs->cand_for_use[uid] = cp;
4603 ivs->n_cand_uses[cid]++;
4604 if (ivs->n_cand_uses[cid] == 1)
8b11a64c 4605 {
b1b02be2
ZD
4606 bitmap_set_bit (ivs->cands, cid);
4607 /* Do not count the pseudocandidates. */
4608 if (cp->cand->iv)
4609 ivs->n_regs++;
36f5ada1 4610 ivs->n_cands++;
b1b02be2 4611 ivs->cand_cost += cp->cand->cost;
b1b02be2 4612
9be872b7 4613 iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
8b11a64c
ZD
4614 }
4615
6e8c65f6 4616 ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
9be872b7 4617 iv_ca_set_add_invariants (ivs, cp->depends_on);
b1b02be2 4618 iv_ca_recount_cost (data, ivs);
87c476a2 4619 }
b1b02be2
ZD
4620}
4621
4622/* Extend set IVS by expressing USE by some of the candidates in it
4623 if possible. */
4624
4625static void
4626iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
4627 struct iv_use *use)
4628{
4629 struct cost_pair *best_cp = NULL, *cp;
4630 bitmap_iterator bi;
4631 unsigned i;
8b11a64c 4632
b1b02be2
ZD
4633 gcc_assert (ivs->upto >= use->id);
4634
4635 if (ivs->upto == use->id)
4636 {
4637 ivs->upto++;
4638 ivs->bad_uses++;
4639 }
8b11a64c 4640
b1b02be2
ZD
4641 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4642 {
4643 cp = get_use_iv_cost (data, use, iv_cand (data, i));
8b11a64c 4644
b1b02be2
ZD
4645 if (cheaper_cost_pair (cp, best_cp))
4646 best_cp = cp;
4647 }
8b11a64c 4648
b1b02be2 4649 iv_ca_set_cp (data, ivs, use, best_cp);
8b11a64c
ZD
4650}
4651
b1b02be2 4652/* Get cost for assignment IVS. */
8b11a64c 4653
6e8c65f6 4654static comp_cost
b1b02be2
ZD
4655iv_ca_cost (struct iv_ca *ivs)
4656{
c4e93e28
AH
4657 /* This was a conditional expression but it triggered a bug in
4658 Sun C 5.5. */
cb4ad180
AH
4659 if (ivs->bad_uses)
4660 return infinite_cost;
4661 else
4662 return ivs->cost;
b1b02be2
ZD
4663}
4664
4665/* Returns true if all dependences of CP are among invariants in IVS. */
4666
4667static bool
4668iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
8b11a64c
ZD
4669{
4670 unsigned i;
87c476a2 4671 bitmap_iterator bi;
8b11a64c 4672
b1b02be2
ZD
4673 if (!cp->depends_on)
4674 return true;
4675
4676 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
8b11a64c 4677 {
b1b02be2
ZD
4678 if (ivs->n_invariant_uses[i] == 0)
4679 return false;
4680 }
4681
4682 return true;
4683}
4684
4685/* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
4686 it before NEXT_CHANGE. */
4687
4688static struct iv_ca_delta *
4689iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
4690 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
4691{
5ed6ace5 4692 struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
b1b02be2
ZD
4693
4694 change->use = use;
4695 change->old_cp = old_cp;
4696 change->new_cp = new_cp;
4697 change->next_change = next_change;
4698
4699 return change;
4700}
4701
36f5ada1 4702/* Joins two lists of changes L1 and L2. Destructive -- old lists
6c6cfbfd 4703 are rewritten. */
36f5ada1
ZD
4704
4705static struct iv_ca_delta *
4706iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
4707{
4708 struct iv_ca_delta *last;
4709
4710 if (!l2)
4711 return l1;
4712
4713 if (!l1)
4714 return l2;
4715
4716 for (last = l1; last->next_change; last = last->next_change)
4717 continue;
4718 last->next_change = l2;
4719
4720 return l1;
4721}
4722
b1b02be2
ZD
4723/* Returns candidate by that USE is expressed in IVS. */
4724
4725static struct cost_pair *
4726iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
4727{
4728 return ivs->cand_for_use[use->id];
4729}
4730
36f5ada1
ZD
4731/* Reverse the list of changes DELTA, forming the inverse to it. */
4732
4733static struct iv_ca_delta *
4734iv_ca_delta_reverse (struct iv_ca_delta *delta)
4735{
4736 struct iv_ca_delta *act, *next, *prev = NULL;
4737 struct cost_pair *tmp;
4738
4739 for (act = delta; act; act = next)
4740 {
4741 next = act->next_change;
4742 act->next_change = prev;
4743 prev = act;
4744
4745 tmp = act->old_cp;
4746 act->old_cp = act->new_cp;
4747 act->new_cp = tmp;
4748 }
4749
4750 return prev;
4751}
4752
b1b02be2
ZD
4753/* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
4754 reverted instead. */
4755
4756static void
4757iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
4758 struct iv_ca_delta *delta, bool forward)
4759{
4760 struct cost_pair *from, *to;
36f5ada1 4761 struct iv_ca_delta *act;
b1b02be2 4762
36f5ada1
ZD
4763 if (!forward)
4764 delta = iv_ca_delta_reverse (delta);
b1b02be2 4765
36f5ada1
ZD
4766 for (act = delta; act; act = act->next_change)
4767 {
4768 from = act->old_cp;
4769 to = act->new_cp;
4770 gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
4771 iv_ca_set_cp (data, ivs, act->use, to);
8b11a64c 4772 }
36f5ada1
ZD
4773
4774 if (!forward)
4775 iv_ca_delta_reverse (delta);
b1b02be2 4776}
8b11a64c 4777
b1b02be2 4778/* Returns true if CAND is used in IVS. */
8b11a64c 4779
b1b02be2
ZD
4780static bool
4781iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
4782{
4783 return ivs->n_cand_uses[cand->id] > 0;
4784}
8b11a64c 4785
36f5ada1
ZD
4786/* Returns number of induction variable candidates in the set IVS. */
4787
4788static unsigned
4789iv_ca_n_cands (struct iv_ca *ivs)
4790{
4791 return ivs->n_cands;
4792}
4793
b1b02be2
ZD
4794/* Free the list of changes DELTA. */
4795
4796static void
4797iv_ca_delta_free (struct iv_ca_delta **delta)
4798{
4799 struct iv_ca_delta *act, *next;
4800
4801 for (act = *delta; act; act = next)
87c476a2 4802 {
b1b02be2
ZD
4803 next = act->next_change;
4804 free (act);
87c476a2 4805 }
8b11a64c 4806
b1b02be2
ZD
4807 *delta = NULL;
4808}
4809
4810/* Allocates new iv candidates assignment. */
4811
4812static struct iv_ca *
4813iv_ca_new (struct ivopts_data *data)
4814{
5ed6ace5 4815 struct iv_ca *nw = XNEW (struct iv_ca);
8b11a64c 4816
b1b02be2
ZD
4817 nw->upto = 0;
4818 nw->bad_uses = 0;
5ed6ace5
MD
4819 nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
4820 nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
8bdbfff5 4821 nw->cands = BITMAP_ALLOC (NULL);
36f5ada1 4822 nw->n_cands = 0;
b1b02be2 4823 nw->n_regs = 0;
6e8c65f6 4824 nw->cand_use_cost = zero_cost;
b1b02be2 4825 nw->cand_cost = 0;
5ed6ace5 4826 nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
6e8c65f6 4827 nw->cost = zero_cost;
b1b02be2
ZD
4828
4829 return nw;
4830}
4831
4832/* Free memory occupied by the set IVS. */
4833
4834static void
4835iv_ca_free (struct iv_ca **ivs)
4836{
4837 free ((*ivs)->cand_for_use);
4838 free ((*ivs)->n_cand_uses);
8bdbfff5 4839 BITMAP_FREE ((*ivs)->cands);
b1b02be2
ZD
4840 free ((*ivs)->n_invariant_uses);
4841 free (*ivs);
4842 *ivs = NULL;
4843}
4844
4845/* Dumps IVS to FILE. */
4846
4847static void
4848iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
4849{
4850 const char *pref = " invariants ";
4851 unsigned i;
6e8c65f6 4852 comp_cost cost = iv_ca_cost (ivs);
b1b02be2 4853
6e8c65f6 4854 fprintf (file, " cost %d (complexity %d)\n", cost.cost, cost.complexity);
b1b02be2
ZD
4855 bitmap_print (file, ivs->cands, " candidates ","\n");
4856
4857 for (i = 1; i <= data->max_inv_id; i++)
4858 if (ivs->n_invariant_uses[i])
4859 {
4860 fprintf (file, "%s%d", pref, i);
4861 pref = ", ";
4862 }
4863 fprintf (file, "\n");
4864}
4865
4866/* Try changing candidate in IVS to CAND for each use. Return cost of the
36f5ada1
ZD
4867 new set, and store differences in DELTA. Number of induction variables
4868 in the new set is stored to N_IVS. */
b1b02be2 4869
6e8c65f6 4870static comp_cost
b1b02be2 4871iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
36f5ada1
ZD
4872 struct iv_cand *cand, struct iv_ca_delta **delta,
4873 unsigned *n_ivs)
b1b02be2 4874{
6e8c65f6
ZD
4875 unsigned i;
4876 comp_cost cost;
b1b02be2
ZD
4877 struct iv_use *use;
4878 struct cost_pair *old_cp, *new_cp;
4879
4880 *delta = NULL;
4881 for (i = 0; i < ivs->upto; i++)
4882 {
4883 use = iv_use (data, i);
4884 old_cp = iv_ca_cand_for_use (ivs, use);
4885
4886 if (old_cp
4887 && old_cp->cand == cand)
4888 continue;
4889
4890 new_cp = get_use_iv_cost (data, use, cand);
4891 if (!new_cp)
4892 continue;
4893
4894 if (!iv_ca_has_deps (ivs, new_cp))
4895 continue;
4896
4897 if (!cheaper_cost_pair (new_cp, old_cp))
4898 continue;
4899
4900 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4901 }
4902
4903 iv_ca_delta_commit (data, ivs, *delta, true);
4904 cost = iv_ca_cost (ivs);
36f5ada1
ZD
4905 if (n_ivs)
4906 *n_ivs = iv_ca_n_cands (ivs);
b1b02be2 4907 iv_ca_delta_commit (data, ivs, *delta, false);
8b11a64c
ZD
4908
4909 return cost;
4910}
4911
a0eca485 4912/* Try narrowing set IVS by removing CAND. Return the cost of
b1b02be2 4913 the new set and store the differences in DELTA. */
8b11a64c 4914
6e8c65f6 4915static comp_cost
b1b02be2
ZD
4916iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
4917 struct iv_cand *cand, struct iv_ca_delta **delta)
8b11a64c 4918{
b1b02be2
ZD
4919 unsigned i, ci;
4920 struct iv_use *use;
4921 struct cost_pair *old_cp, *new_cp, *cp;
4922 bitmap_iterator bi;
4923 struct iv_cand *cnd;
6e8c65f6 4924 comp_cost cost;
b1b02be2
ZD
4925
4926 *delta = NULL;
4927 for (i = 0; i < n_iv_uses (data); i++)
4928 {
4929 use = iv_use (data, i);
4930
4931 old_cp = iv_ca_cand_for_use (ivs, use);
4932 if (old_cp->cand != cand)
4933 continue;
4934
4935 new_cp = NULL;
4936
4937 if (data->consider_all_candidates)
4938 {
4939 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
4940 {
4941 if (ci == cand->id)
4942 continue;
4943
4944 cnd = iv_cand (data, ci);
4945
4946 cp = get_use_iv_cost (data, use, cnd);
4947 if (!cp)
4948 continue;
4949 if (!iv_ca_has_deps (ivs, cp))
4950 continue;
4951
4952 if (!cheaper_cost_pair (cp, new_cp))
4953 continue;
4954
4955 new_cp = cp;
4956 }
4957 }
4958 else
4959 {
4960 EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
4961 {
4962 if (ci == cand->id)
4963 continue;
4964
4965 cnd = iv_cand (data, ci);
4966
4967 cp = get_use_iv_cost (data, use, cnd);
4968 if (!cp)
4969 continue;
4970 if (!iv_ca_has_deps (ivs, cp))
4971 continue;
4972
4973 if (!cheaper_cost_pair (cp, new_cp))
4974 continue;
4975
4976 new_cp = cp;
4977 }
4978 }
4979
4980 if (!new_cp)
4981 {
4982 iv_ca_delta_free (delta);
6e8c65f6 4983 return infinite_cost;
b1b02be2
ZD
4984 }
4985
4986 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4987 }
4988
4989 iv_ca_delta_commit (data, ivs, *delta, true);
4990 cost = iv_ca_cost (ivs);
4991 iv_ca_delta_commit (data, ivs, *delta, false);
4992
4993 return cost;
8b11a64c
ZD
4994}
4995
36f5ada1
ZD
4996/* Try optimizing the set of candidates IVS by removing candidates different
4997 from to EXCEPT_CAND from it. Return cost of the new set, and store
4998 differences in DELTA. */
4999
6e8c65f6 5000static comp_cost
36f5ada1
ZD
5001iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
5002 struct iv_cand *except_cand, struct iv_ca_delta **delta)
5003{
5004 bitmap_iterator bi;
5005 struct iv_ca_delta *act_delta, *best_delta;
6e8c65f6
ZD
5006 unsigned i;
5007 comp_cost best_cost, acost;
36f5ada1
ZD
5008 struct iv_cand *cand;
5009
5010 best_delta = NULL;
5011 best_cost = iv_ca_cost (ivs);
5012
5013 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5014 {
5015 cand = iv_cand (data, i);
5016
5017 if (cand == except_cand)
5018 continue;
5019
5020 acost = iv_ca_narrow (data, ivs, cand, &act_delta);
5021
6e8c65f6 5022 if (compare_costs (acost, best_cost) < 0)
36f5ada1
ZD
5023 {
5024 best_cost = acost;
5025 iv_ca_delta_free (&best_delta);
5026 best_delta = act_delta;
5027 }
5028 else
5029 iv_ca_delta_free (&act_delta);
5030 }
5031
5032 if (!best_delta)
5033 {
5034 *delta = NULL;
5035 return best_cost;
5036 }
5037
5038 /* Recurse to possibly remove other unnecessary ivs. */
5039 iv_ca_delta_commit (data, ivs, best_delta, true);
5040 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
5041 iv_ca_delta_commit (data, ivs, best_delta, false);
5042 *delta = iv_ca_delta_join (best_delta, *delta);
5043 return best_cost;
5044}
5045
b1b02be2 5046/* Tries to extend the sets IVS in the best possible way in order
8b11a64c
ZD
5047 to express the USE. */
5048
5049static bool
b1b02be2 5050try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
8b11a64c
ZD
5051 struct iv_use *use)
5052{
6e8c65f6 5053 comp_cost best_cost, act_cost;
8b11a64c 5054 unsigned i;
38b0dcb8
ZD
5055 bitmap_iterator bi;
5056 struct iv_cand *cand;
b1b02be2
ZD
5057 struct iv_ca_delta *best_delta = NULL, *act_delta;
5058 struct cost_pair *cp;
5059
5060 iv_ca_add_use (data, ivs, use);
5061 best_cost = iv_ca_cost (ivs);
8b11a64c 5062
b1b02be2
ZD
5063 cp = iv_ca_cand_for_use (ivs, use);
5064 if (cp)
5065 {
5066 best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
5067 iv_ca_set_no_cp (data, ivs, use);
5068 }
8b11a64c 5069
6e8c65f6
ZD
5070 /* First try important candidates not based on any memory object. Only if
5071 this fails, try the specific ones. Rationale -- in loops with many
5072 variables the best choice often is to use just one generic biv. If we
5073 added here many ivs specific to the uses, the optimization algorithm later
5074 would be likely to get stuck in a local minimum, thus causing us to create
5075 too many ivs. The approach from few ivs to more seems more likely to be
5076 successful -- starting from few ivs, replacing an expensive use by a
5077 specific iv should always be a win. */
38b0dcb8 5078 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
8b11a64c 5079 {
38b0dcb8
ZD
5080 cand = iv_cand (data, i);
5081
6e8c65f6
ZD
5082 if (cand->iv->base_object != NULL_TREE)
5083 continue;
5084
b1b02be2 5085 if (iv_ca_cand_used_p (ivs, cand))
8b11a64c
ZD
5086 continue;
5087
b1b02be2
ZD
5088 cp = get_use_iv_cost (data, use, cand);
5089 if (!cp)
5090 continue;
5091
5092 iv_ca_set_cp (data, ivs, use, cp);
36f5ada1 5093 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
b1b02be2
ZD
5094 iv_ca_set_no_cp (data, ivs, use);
5095 act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
8b11a64c 5096
6e8c65f6 5097 if (compare_costs (act_cost, best_cost) < 0)
8b11a64c
ZD
5098 {
5099 best_cost = act_cost;
b1b02be2
ZD
5100
5101 iv_ca_delta_free (&best_delta);
5102 best_delta = act_delta;
8b11a64c 5103 }
b1b02be2
ZD
5104 else
5105 iv_ca_delta_free (&act_delta);
8b11a64c
ZD
5106 }
5107
6e8c65f6 5108 if (infinite_cost_p (best_cost))
38b0dcb8
ZD
5109 {
5110 for (i = 0; i < use->n_map_members; i++)
5111 {
5112 cp = use->cost_map + i;
b1b02be2
ZD
5113 cand = cp->cand;
5114 if (!cand)
38b0dcb8
ZD
5115 continue;
5116
5117 /* Already tried this. */
6e8c65f6 5118 if (cand->important && cand->iv->base_object == NULL_TREE)
b1b02be2
ZD
5119 continue;
5120
5121 if (iv_ca_cand_used_p (ivs, cand))
38b0dcb8
ZD
5122 continue;
5123
b1b02be2
ZD
5124 act_delta = NULL;
5125 iv_ca_set_cp (data, ivs, use, cp);
36f5ada1 5126 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
b1b02be2
ZD
5127 iv_ca_set_no_cp (data, ivs, use);
5128 act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
5129 cp, act_delta);
38b0dcb8 5130
6e8c65f6 5131 if (compare_costs (act_cost, best_cost) < 0)
38b0dcb8
ZD
5132 {
5133 best_cost = act_cost;
b1b02be2
ZD
5134
5135 if (best_delta)
5136 iv_ca_delta_free (&best_delta);
5137 best_delta = act_delta;
38b0dcb8 5138 }
b1b02be2
ZD
5139 else
5140 iv_ca_delta_free (&act_delta);
38b0dcb8
ZD
5141 }
5142 }
5143
b1b02be2
ZD
5144 iv_ca_delta_commit (data, ivs, best_delta, true);
5145 iv_ca_delta_free (&best_delta);
8b11a64c 5146
6e8c65f6 5147 return !infinite_cost_p (best_cost);
8b11a64c
ZD
5148}
5149
b1b02be2 5150/* Finds an initial assignment of candidates to uses. */
8b11a64c 5151
b1b02be2
ZD
5152static struct iv_ca *
5153get_initial_solution (struct ivopts_data *data)
8b11a64c 5154{
b1b02be2 5155 struct iv_ca *ivs = iv_ca_new (data);
8b11a64c
ZD
5156 unsigned i;
5157
5158 for (i = 0; i < n_iv_uses (data); i++)
b1b02be2
ZD
5159 if (!try_add_cand_for (data, ivs, iv_use (data, i)))
5160 {
5161 iv_ca_free (&ivs);
5162 return NULL;
5163 }
8b11a64c 5164
b1b02be2 5165 return ivs;
8b11a64c
ZD
5166}
5167
b1b02be2 5168/* Tries to improve set of induction variables IVS. */
8b11a64c
ZD
5169
5170static bool
b1b02be2 5171try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
8b11a64c 5172{
6e8c65f6
ZD
5173 unsigned i, n_ivs;
5174 comp_cost acost, best_cost = iv_ca_cost (ivs);
36f5ada1 5175 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
b1b02be2 5176 struct iv_cand *cand;
8b11a64c 5177
36f5ada1 5178 /* Try extending the set of induction variables by one. */
8b11a64c
ZD
5179 for (i = 0; i < n_iv_cands (data); i++)
5180 {
b1b02be2
ZD
5181 cand = iv_cand (data, i);
5182
5183 if (iv_ca_cand_used_p (ivs, cand))
36f5ada1
ZD
5184 continue;
5185
5186 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs);
5187 if (!act_delta)
5188 continue;
5189
5190 /* If we successfully added the candidate and the set is small enough,
5191 try optimizing it by removing other candidates. */
5192 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
5193 {
5194 iv_ca_delta_commit (data, ivs, act_delta, true);
5195 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
5196 iv_ca_delta_commit (data, ivs, act_delta, false);
5197 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
5198 }
8b11a64c 5199
6e8c65f6 5200 if (compare_costs (acost, best_cost) < 0)
8b11a64c 5201 {
b1b02be2 5202 best_cost = acost;
36f5ada1 5203 iv_ca_delta_free (&best_delta);
b1b02be2 5204 best_delta = act_delta;
8b11a64c 5205 }
8b11a64c 5206 else
b1b02be2 5207 iv_ca_delta_free (&act_delta);
8b11a64c
ZD
5208 }
5209
b1b02be2 5210 if (!best_delta)
36f5ada1
ZD
5211 {
5212 /* Try removing the candidates from the set instead. */
5213 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
5214
5215 /* Nothing more we can do. */
5216 if (!best_delta)
5217 return false;
5218 }
8b11a64c 5219
b1b02be2 5220 iv_ca_delta_commit (data, ivs, best_delta, true);
6e8c65f6 5221 gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
b1b02be2 5222 iv_ca_delta_free (&best_delta);
8b11a64c
ZD
5223 return true;
5224}
5225
5226/* Attempts to find the optimal set of induction variables. We do simple
5227 greedy heuristic -- we try to replace at most one candidate in the selected
5228 solution and remove the unused ivs while this improves the cost. */
5229
b1b02be2 5230static struct iv_ca *
8b11a64c
ZD
5231find_optimal_iv_set (struct ivopts_data *data)
5232{
b1b02be2
ZD
5233 unsigned i;
5234 struct iv_ca *set;
8b11a64c
ZD
5235 struct iv_use *use;
5236
b1b02be2
ZD
5237 /* Get the initial solution. */
5238 set = get_initial_solution (data);
5239 if (!set)
8b11a64c
ZD
5240 {
5241 if (dump_file && (dump_flags & TDF_DETAILS))
5242 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
8b11a64c
ZD
5243 return NULL;
5244 }
5245
5246 if (dump_file && (dump_flags & TDF_DETAILS))
5247 {
b1b02be2
ZD
5248 fprintf (dump_file, "Initial set of candidates:\n");
5249 iv_ca_dump (data, dump_file, set);
8b11a64c
ZD
5250 }
5251
b1b02be2 5252 while (try_improve_iv_set (data, set))
8b11a64c
ZD
5253 {
5254 if (dump_file && (dump_flags & TDF_DETAILS))
5255 {
b1b02be2
ZD
5256 fprintf (dump_file, "Improved to:\n");
5257 iv_ca_dump (data, dump_file, set);
8b11a64c
ZD
5258 }
5259 }
5260
5261 if (dump_file && (dump_flags & TDF_DETAILS))
6e8c65f6
ZD
5262 {
5263 comp_cost cost = iv_ca_cost (set);
5264 fprintf (dump_file, "Final cost %d (complexity %d)\n\n", cost.cost, cost.complexity);
5265 }
8b11a64c
ZD
5266
5267 for (i = 0; i < n_iv_uses (data); i++)
5268 {
5269 use = iv_use (data, i);
b1b02be2 5270 use->selected = iv_ca_cand_for_use (set, use)->cand;
8b11a64c
ZD
5271 }
5272
8b11a64c
ZD
5273 return set;
5274}
5275
5276/* Creates a new induction variable corresponding to CAND. */
5277
5278static void
5279create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
5280{
726a989a 5281 gimple_stmt_iterator incr_pos;
8b11a64c
ZD
5282 tree base;
5283 bool after = false;
5284
5285 if (!cand->iv)
5286 return;
5287
5288 switch (cand->pos)
5289 {
5290 case IP_NORMAL:
726a989a 5291 incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
8b11a64c
ZD
5292 break;
5293
5294 case IP_END:
726a989a 5295 incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
8b11a64c
ZD
5296 after = true;
5297 break;
5298
2c08497a
BS
5299 case IP_AFTER_USE:
5300 after = true;
5301 /* fall through */
5302 case IP_BEFORE_USE:
5303 incr_pos = gsi_for_stmt (cand->incremented_at);
5304 break;
5305
8b11a64c
ZD
5306 case IP_ORIGINAL:
5307 /* Mark that the iv is preserved. */
5308 name_info (data, cand->var_before)->preserve_biv = true;
5309 name_info (data, cand->var_after)->preserve_biv = true;
5310
5311 /* Rewrite the increment so that it uses var_before directly. */
5312 find_interesting_uses_op (data, cand->var_after)->selected = cand;
5313
5314 return;
5315 }
5316
5317 gimple_add_tmp_var (cand->var_before);
f004ab02 5318 add_referenced_var (cand->var_before);
8b11a64c
ZD
5319
5320 base = unshare_expr (cand->iv->base);
5321
9be872b7
ZD
5322 create_iv (base, unshare_expr (cand->iv->step),
5323 cand->var_before, data->current_loop,
8b11a64c
ZD
5324 &incr_pos, after, &cand->var_before, &cand->var_after);
5325}
5326
5327/* Creates new induction variables described in SET. */
5328
5329static void
b1b02be2 5330create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
8b11a64c
ZD
5331{
5332 unsigned i;
5333 struct iv_cand *cand;
87c476a2 5334 bitmap_iterator bi;
8b11a64c 5335
b1b02be2 5336 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
8b11a64c
ZD
5337 {
5338 cand = iv_cand (data, i);
5339 create_new_iv (data, cand);
87c476a2 5340 }
8b11a64c
ZD
5341}
5342
8b11a64c
ZD
5343
5344/* Rewrites USE (definition of iv used in a nonlinear expression)
5345 using candidate CAND. */
5346
5347static void
5348rewrite_use_nonlinear_expr (struct ivopts_data *data,
5349 struct iv_use *use, struct iv_cand *cand)
5350{
3520b745 5351 tree comp;
726a989a
RB
5352 tree op, tgt;
5353 gimple ass;
5354 gimple_stmt_iterator bsi;
3520b745
ZD
5355
5356 /* An important special case -- if we are asked to express value of
5357 the original iv by itself, just exit; there is no need to
5358 introduce a new computation (that might also need casting the
5359 variable to unsigned and back). */
5360 if (cand->pos == IP_ORIGINAL
7b9d4f70 5361 && cand->incremented_at == use->stmt)
3520b745 5362 {
7b9d4f70 5363 tree step, ctype, utype;
726a989a 5364 enum tree_code incr_code = PLUS_EXPR, old_code;
7b9d4f70 5365
726a989a
RB
5366 gcc_assert (is_gimple_assign (use->stmt));
5367 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
7b9d4f70
ZD
5368
5369 step = cand->iv->step;
5370 ctype = TREE_TYPE (step);
5371 utype = TREE_TYPE (cand->var_after);
5372 if (TREE_CODE (step) == NEGATE_EXPR)
5373 {
5374 incr_code = MINUS_EXPR;
5375 step = TREE_OPERAND (step, 0);
5376 }
5377
5378 /* Check whether we may leave the computation unchanged.
5379 This is the case only if it does not rely on other
5380 computations in the loop -- otherwise, the computation
5381 we rely upon may be removed in remove_unused_ivs,
5382 thus leading to ICE. */
726a989a
RB
5383 old_code = gimple_assign_rhs_code (use->stmt);
5384 if (old_code == PLUS_EXPR
5385 || old_code == MINUS_EXPR
5386 || old_code == POINTER_PLUS_EXPR)
7b9d4f70 5387 {
726a989a
RB
5388 if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
5389 op = gimple_assign_rhs2 (use->stmt);
5390 else if (old_code != MINUS_EXPR
5391 && gimple_assign_rhs2 (use->stmt) == cand->var_before)
5392 op = gimple_assign_rhs1 (use->stmt);
7b9d4f70
ZD
5393 else
5394 op = NULL_TREE;
5395 }
5396 else
5397 op = NULL_TREE;
3520b745 5398
7b9d4f70
ZD
5399 if (op
5400 && (TREE_CODE (op) == INTEGER_CST
5401 || operand_equal_p (op, step, 0)))
3520b745 5402 return;
7b9d4f70
ZD
5403
5404 /* Otherwise, add the necessary computations to express
5405 the iv. */
5406 op = fold_convert (ctype, cand->var_before);
5407 comp = fold_convert (utype,
5408 build2 (incr_code, ctype, op,
5409 unshare_expr (step)));
3520b745 5410 }
7b9d4f70 5411 else
73f30c63
ZD
5412 {
5413 comp = get_computation (data->current_loop, use, cand);
5414 gcc_assert (comp != NULL_TREE);
5415 }
3520b745 5416
726a989a 5417 switch (gimple_code (use->stmt))
8b11a64c 5418 {
726a989a 5419 case GIMPLE_PHI:
8b11a64c
ZD
5420 tgt = PHI_RESULT (use->stmt);
5421
5422 /* If we should keep the biv, do not replace it. */
5423 if (name_info (data, tgt)->preserve_biv)
5424 return;
5425
726a989a 5426 bsi = gsi_after_labels (gimple_bb (use->stmt));
1e128c5f
GB
5427 break;
5428
726a989a
RB
5429 case GIMPLE_ASSIGN:
5430 tgt = gimple_assign_lhs (use->stmt);
5431 bsi = gsi_for_stmt (use->stmt);
1e128c5f
GB
5432 break;
5433
5434 default:
5435 gcc_unreachable ();
8b11a64c 5436 }
8b11a64c 5437
726a989a
RB
5438 op = force_gimple_operand_gsi (&bsi, comp, false, SSA_NAME_VAR (tgt),
5439 true, GSI_SAME_STMT);
8b11a64c 5440
726a989a 5441 if (gimple_code (use->stmt) == GIMPLE_PHI)
8b11a64c 5442 {
726a989a
RB
5443 ass = gimple_build_assign (tgt, op);
5444 gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
ae0a4449
AO
5445
5446 bsi = gsi_for_stmt (use->stmt);
5447 remove_phi_node (&bsi, false);
8b11a64c
ZD
5448 }
5449 else
726a989a
RB
5450 {
5451 gimple_assign_set_rhs_from_tree (&bsi, op);
5452 use->stmt = gsi_stmt (bsi);
5453 }
8b11a64c
ZD
5454}
5455
5456/* Replaces ssa name in index IDX by its basic variable. Callback for
5457 for_each_index. */
5458
5459static bool
be35cf60 5460idx_remove_ssa_names (tree base, tree *idx,
8b11a64c
ZD
5461 void *data ATTRIBUTE_UNUSED)
5462{
be35cf60
ZD
5463 tree *op;
5464
8b11a64c
ZD
5465 if (TREE_CODE (*idx) == SSA_NAME)
5466 *idx = SSA_NAME_VAR (*idx);
be35cf60 5467
9f7ccf69 5468 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
be35cf60
ZD
5469 {
5470 op = &TREE_OPERAND (base, 2);
5471 if (*op
5472 && TREE_CODE (*op) == SSA_NAME)
5473 *op = SSA_NAME_VAR (*op);
5474 op = &TREE_OPERAND (base, 3);
5475 if (*op
5476 && TREE_CODE (*op) == SSA_NAME)
5477 *op = SSA_NAME_VAR (*op);
5478 }
5479
8b11a64c
ZD
5480 return true;
5481}
5482
5483/* Unshares REF and replaces ssa names inside it by their basic variables. */
5484
5485static tree
5486unshare_and_remove_ssa_names (tree ref)
5487{
5488 ref = unshare_expr (ref);
5489 for_each_index (&ref, idx_remove_ssa_names, NULL);
5490
5491 return ref;
5492}
5493
ac182688 5494/* Copies the reference information from OLD_REF to NEW_REF. */
0bca51f0 5495
ac182688
ZD
5496static void
5497copy_ref_info (tree new_ref, tree old_ref)
5498{
5499 if (TREE_CODE (old_ref) == TARGET_MEM_REF)
5500 copy_mem_ref_info (new_ref, old_ref);
5501 else
5006671f 5502 TMR_ORIGINAL (new_ref) = unshare_and_remove_ssa_names (old_ref);
8b11a64c
ZD
5503}
5504
5505/* Rewrites USE (address that is an iv) using candidate CAND. */
5506
5507static void
5508rewrite_use_address (struct ivopts_data *data,
5509 struct iv_use *use, struct iv_cand *cand)
5510{
73f30c63 5511 aff_tree aff;
726a989a 5512 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
d7c0c068 5513 tree base_hint = NULL_TREE;
ac182688 5514 tree ref;
73f30c63 5515 bool ok;
8b11a64c 5516
73f30c63
ZD
5517 ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
5518 gcc_assert (ok);
ac182688 5519 unshare_aff_combination (&aff);
8b11a64c 5520
d7c0c068
UW
5521 /* To avoid undefined overflow problems, all IV candidates use unsigned
5522 integer types. The drawback is that this makes it impossible for
5523 create_mem_ref to distinguish an IV that is based on a memory object
5524 from one that represents simply an offset.
5525
5526 To work around this problem, we pass a hint to create_mem_ref that
5527 indicates which variable (if any) in aff is an IV based on a memory
5528 object. Note that we only consider the candidate. If this is not
5529 based on an object, the base of the reference is in some subexpression
5530 of the use -- but these will use pointer types, so they are recognized
5531 by the create_mem_ref heuristics anyway. */
5532 if (cand->iv->base_object)
5533 base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
5534
5535 ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff, base_hint,
5536 data->speed);
ac182688
ZD
5537 copy_ref_info (ref, *use->op_p);
5538 *use->op_p = ref;
8b11a64c
ZD
5539}
5540
5541/* Rewrites USE (the condition such that one of the arguments is an iv) using
5542 candidate CAND. */
5543
5544static void
5545rewrite_use_compare (struct ivopts_data *data,
5546 struct iv_use *use, struct iv_cand *cand)
5547{
b697aed4 5548 tree comp, *var_p, op, bound;
726a989a 5549 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
8b11a64c 5550 enum tree_code compare;
f5f12961 5551 struct cost_pair *cp = get_use_iv_cost (data, use, cand);
b697aed4
ZD
5552 bool ok;
5553
f5f12961
ZD
5554 bound = cp->value;
5555 if (bound)
8b11a64c 5556 {
9e7376e5
ZD
5557 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
5558 tree var_type = TREE_TYPE (var);
dc5b3407 5559 gimple_seq stmts;
9e7376e5 5560
f5f12961 5561 compare = iv_elimination_compare (data, use);
b697aed4 5562 bound = unshare_expr (fold_convert (var_type, bound));
dc5b3407
ZD
5563 op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
5564 if (stmts)
5565 gsi_insert_seq_on_edge_immediate (
5566 loop_preheader_edge (data->current_loop),
5567 stmts);
8b11a64c 5568
726a989a
RB
5569 gimple_cond_set_lhs (use->stmt, var);
5570 gimple_cond_set_code (use->stmt, compare);
5571 gimple_cond_set_rhs (use->stmt, op);
8b11a64c
ZD
5572 return;
5573 }
5574
5575 /* The induction variable elimination failed; just express the original
5576 giv. */
ac182688 5577 comp = get_computation (data->current_loop, use, cand);
73f30c63 5578 gcc_assert (comp != NULL_TREE);
8b11a64c 5579
726a989a 5580 ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
b697aed4 5581 gcc_assert (ok);
8b11a64c 5582
726a989a
RB
5583 *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
5584 true, GSI_SAME_STMT);
8b11a64c
ZD
5585}
5586
8b11a64c
ZD
5587/* Rewrites USE using candidate CAND. */
5588
5589static void
cfaab3a9 5590rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
8b11a64c
ZD
5591{
5592 switch (use->type)
5593 {
5594 case USE_NONLINEAR_EXPR:
5595 rewrite_use_nonlinear_expr (data, use, cand);
5596 break;
5597
8b11a64c
ZD
5598 case USE_ADDRESS:
5599 rewrite_use_address (data, use, cand);
5600 break;
5601
5602 case USE_COMPARE:
5603 rewrite_use_compare (data, use, cand);
5604 break;
5605
5606 default:
1e128c5f 5607 gcc_unreachable ();
8b11a64c 5608 }
cff4e50d
PB
5609
5610 update_stmt (use->stmt);
8b11a64c
ZD
5611}
5612
5613/* Rewrite the uses using the selected induction variables. */
5614
5615static void
5616rewrite_uses (struct ivopts_data *data)
5617{
5618 unsigned i;
5619 struct iv_cand *cand;
5620 struct iv_use *use;
5621
5622 for (i = 0; i < n_iv_uses (data); i++)
5623 {
5624 use = iv_use (data, i);
5625 cand = use->selected;
1e128c5f 5626 gcc_assert (cand);
8b11a64c
ZD
5627
5628 rewrite_use (data, use, cand);
5629 }
5630}
5631
5632/* Removes the ivs that are not used after rewriting. */
5633
5634static void
5635remove_unused_ivs (struct ivopts_data *data)
5636{
5637 unsigned j;
87c476a2 5638 bitmap_iterator bi;
ae0a4449 5639 bitmap toremove = BITMAP_ALLOC (NULL);
8b11a64c 5640
ae0a4449
AO
5641 /* Figure out an order in which to release SSA DEFs so that we don't
5642 release something that we'd have to propagate into a debug stmt
5643 afterwards. */
87c476a2 5644 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
8b11a64c
ZD
5645 {
5646 struct version_info *info;
5647
5648 info = ver_info (data, j);
5649 if (info->iv
6e42ce54 5650 && !integer_zerop (info->iv->step)
8b11a64c
ZD
5651 && !info->inv_id
5652 && !info->iv->have_use_for
5653 && !info->preserve_biv)
ae0a4449 5654 bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
87c476a2 5655 }
ae0a4449
AO
5656
5657 release_defs_bitset (toremove);
5658
5659 BITMAP_FREE (toremove);
8b11a64c
ZD
5660}
5661
5662/* Frees data allocated by the optimization of a single loop. */
5663
5664static void
5665free_loop_data (struct ivopts_data *data)
5666{
5667 unsigned i, j;
87c476a2 5668 bitmap_iterator bi;
69ebd99d 5669 tree obj;
8b11a64c 5670
15814ba0
PB
5671 if (data->niters)
5672 {
5673 pointer_map_destroy (data->niters);
5674 data->niters = NULL;
5675 }
ca4c3169 5676
87c476a2 5677 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
8b11a64c
ZD
5678 {
5679 struct version_info *info;
5680
5681 info = ver_info (data, i);
5682 if (info->iv)
5683 free (info->iv);
5684 info->iv = NULL;
5685 info->has_nonlin_use = false;
5686 info->preserve_biv = false;
5687 info->inv_id = 0;
87c476a2 5688 }
8b11a64c 5689 bitmap_clear (data->relevant);
b1b02be2 5690 bitmap_clear (data->important_candidates);
8b11a64c
ZD
5691
5692 for (i = 0; i < n_iv_uses (data); i++)
5693 {
5694 struct iv_use *use = iv_use (data, i);
5695
5696 free (use->iv);
8bdbfff5 5697 BITMAP_FREE (use->related_cands);
8b11a64c
ZD
5698 for (j = 0; j < use->n_map_members; j++)
5699 if (use->cost_map[j].depends_on)
8bdbfff5 5700 BITMAP_FREE (use->cost_map[j].depends_on);
8b11a64c
ZD
5701 free (use->cost_map);
5702 free (use);
5703 }
69ebd99d 5704 VEC_truncate (iv_use_p, data->iv_uses, 0);
8b11a64c
ZD
5705
5706 for (i = 0; i < n_iv_cands (data); i++)
5707 {
5708 struct iv_cand *cand = iv_cand (data, i);
5709
5710 if (cand->iv)
5711 free (cand->iv);
9be872b7
ZD
5712 if (cand->depends_on)
5713 BITMAP_FREE (cand->depends_on);
8b11a64c
ZD
5714 free (cand);
5715 }
69ebd99d 5716 VEC_truncate (iv_cand_p, data->iv_candidates, 0);
8b11a64c
ZD
5717
5718 if (data->version_info_size < num_ssa_names)
5719 {
5720 data->version_info_size = 2 * num_ssa_names;
5721 free (data->version_info);
5ed6ace5 5722 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
8b11a64c
ZD
5723 }
5724
5725 data->max_inv_id = 0;
5726
69ebd99d
KH
5727 for (i = 0; VEC_iterate (tree, decl_rtl_to_reset, i, obj); i++)
5728 SET_DECL_RTL (obj, NULL_RTX);
8b11a64c 5729
69ebd99d 5730 VEC_truncate (tree, decl_rtl_to_reset, 0);
8b11a64c
ZD
5731}
5732
5733/* Finalizes data structures used by the iv optimization pass. LOOPS is the
5734 loop tree. */
5735
5736static void
9a2ef6b8 5737tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
8b11a64c 5738{
8b11a64c
ZD
5739 free_loop_data (data);
5740 free (data->version_info);
8bdbfff5
NS
5741 BITMAP_FREE (data->relevant);
5742 BITMAP_FREE (data->important_candidates);
8b11a64c 5743
69ebd99d
KH
5744 VEC_free (tree, heap, decl_rtl_to_reset);
5745 VEC_free (iv_use_p, heap, data->iv_uses);
5746 VEC_free (iv_cand_p, heap, data->iv_candidates);
8b11a64c
ZD
5747}
5748
5749/* Optimizes the LOOP. Returns true if anything changed. */
5750
5751static bool
5752tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
5753{
5754 bool changed = false;
b1b02be2 5755 struct iv_ca *iv_ca;
8b11a64c 5756 edge exit;
2c08497a 5757 basic_block *body;
8b11a64c 5758
15814ba0 5759 gcc_assert (!data->niters);
8b11a64c 5760 data->current_loop = loop;
f40751dd 5761 data->speed = optimize_loop_for_speed_p (loop);
8b11a64c
ZD
5762
5763 if (dump_file && (dump_flags & TDF_DETAILS))
5764 {
5765 fprintf (dump_file, "Processing loop %d\n", loop->num);
5766
5767 exit = single_dom_exit (loop);
5768 if (exit)
5769 {
5770 fprintf (dump_file, " single exit %d -> %d, exit condition ",
5771 exit->src->index, exit->dest->index);
726a989a 5772 print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
8b11a64c
ZD
5773 fprintf (dump_file, "\n");
5774 }
5775
5776 fprintf (dump_file, "\n");
5777 }
5778
2c08497a
BS
5779 body = get_loop_body (loop);
5780 renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
5781 free (body);
5782
8b11a64c
ZD
5783 /* For each ssa name determines whether it behaves as an induction variable
5784 in some loop. */
5785 if (!find_induction_variables (data))
5786 goto finish;
5787
5788 /* Finds interesting uses (item 1). */
5789 find_interesting_uses (data);
5790 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
5791 goto finish;
5792
5793 /* Finds candidates for the induction variables (item 2). */
5794 find_iv_candidates (data);
5795
5796 /* Calculates the costs (item 3, part 1). */
8b11a64c 5797 determine_iv_costs (data);
2c08497a 5798 determine_use_iv_costs (data);
8b11a64c
ZD
5799 determine_set_costs (data);
5800
5801 /* Find the optimal set of induction variables (item 3, part 2). */
b1b02be2
ZD
5802 iv_ca = find_optimal_iv_set (data);
5803 if (!iv_ca)
8b11a64c
ZD
5804 goto finish;
5805 changed = true;
5806
5807 /* Create the new induction variables (item 4, part 1). */
b1b02be2
ZD
5808 create_new_ivs (data, iv_ca);
5809 iv_ca_free (&iv_ca);
8b11a64c
ZD
5810
5811 /* Rewrite the uses (item 4, part 2). */
5812 rewrite_uses (data);
5813
5814 /* Remove the ivs that are unused after rewriting. */
5815 remove_unused_ivs (data);
5816
8b11a64c
ZD
5817 /* We have changed the structure of induction variables; it might happen
5818 that definitions in the scev database refer to some of them that were
5819 eliminated. */
5820 scev_reset ();
5821
5822finish:
5823 free_loop_data (data);
5824
5825 return changed;
5826}
5827
d73be268 5828/* Main entry point. Optimizes induction variables in loops. */
8b11a64c
ZD
5829
5830void
d73be268 5831tree_ssa_iv_optimize (void)
8b11a64c
ZD
5832{
5833 struct loop *loop;
5834 struct ivopts_data data;
42fd6772 5835 loop_iterator li;
8b11a64c 5836
9a2ef6b8 5837 tree_ssa_iv_optimize_init (&data);
8b11a64c
ZD
5838
5839 /* Optimize the loops starting with the innermost ones. */
42fd6772 5840 FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
8b11a64c 5841 {
8679c649
JH
5842 if (dump_file && (dump_flags & TDF_DETAILS))
5843 flow_loop_dump (loop, dump_file, NULL, 1);
e9472263
ZD
5844
5845 tree_ssa_iv_optimize_loop (&data, loop);
8b11a64c
ZD
5846 }
5847
9a2ef6b8 5848 tree_ssa_iv_optimize_finalize (&data);
8b11a64c 5849}