]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-ssa-loop-ivopts.c
2009-04-03 Richard Guenther <rguenther@suse.de>
[thirdparty/gcc.git] / gcc / tree-ssa-loop-ivopts.c
CommitLineData
dec41e98 1/* Induction variable optimizations.
0ac758f7 2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009
3 Free Software Foundation, Inc.
dec41e98 4
5This file is part of GCC.
6
7GCC is free software; you can redistribute it and/or modify it
8under the terms of the GNU General Public License as published by the
8c4c00c1 9Free Software Foundation; either version 3, or (at your option) any
dec41e98 10later version.
11
12GCC is distributed in the hope that it will be useful, but WITHOUT
13ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15for more details.
16
17You should have received a copy of the GNU General Public License
8c4c00c1 18along with GCC; see the file COPYING3. If not see
19<http://www.gnu.org/licenses/>. */
dec41e98 20
21/* This pass tries to find the optimal set of induction variables for the loop.
22 It optimizes just the basic linear induction variables (although adding
23 support for other types should not be too hard). It includes the
24 optimizations commonly known as strength reduction, induction variable
25 coalescing and induction variable elimination. It does it in the
26 following steps:
27
28 1) The interesting uses of induction variables are found. This includes
29
30 -- uses of induction variables in non-linear expressions
31 -- addresses of arrays
32 -- comparisons of induction variables
33
34 2) Candidates for the induction variables are found. This includes
35
36 -- old induction variables
37 -- the variables defined by expressions derived from the "interesting
38 uses" above
39
40 3) The optimal (w.r. to a cost function) set of variables is chosen. The
41 cost function assigns a cost to sets of induction variables and consists
42 of three parts:
43
44 -- The use costs. Each of the interesting uses chooses the best induction
45 variable in the set and adds its cost to the sum. The cost reflects
46 the time spent on modifying the induction variables value to be usable
47 for the given purpose (adding base and offset for arrays, etc.).
48 -- The variable costs. Each of the variables has a cost assigned that
49 reflects the costs associated with incrementing the value of the
50 variable. The original variables are somewhat preferred.
51 -- The set cost. Depending on the size of the set, extra cost may be
52 added to reflect register pressure.
53
54 All the costs are defined in a machine-specific way, using the target
55 hooks and machine descriptions to determine them.
56
57 4) The trees are transformed to use the new variables, the dead code is
58 removed.
59
60 All of this is done loop by loop. Doing it globally is theoretically
61 possible, it might give a better performance and it might enable us
62 to decide costs more precisely, but getting all the interactions right
63 would be complicated. */
64
65#include "config.h"
66#include "system.h"
67#include "coretypes.h"
68#include "tm.h"
69#include "tree.h"
70#include "rtl.h"
71#include "tm_p.h"
72#include "hard-reg-set.h"
73#include "basic-block.h"
74#include "output.h"
75#include "diagnostic.h"
76#include "tree-flow.h"
77#include "tree-dump.h"
78#include "timevar.h"
79#include "cfgloop.h"
80#include "varray.h"
81#include "expr.h"
82#include "tree-pass.h"
83#include "ggc.h"
84#include "insn-config.h"
85#include "recog.h"
b30a8715 86#include "pointer-set.h"
dec41e98 87#include "hashtab.h"
88#include "tree-chrec.h"
89#include "tree-scalar-evolution.h"
90#include "cfgloop.h"
91#include "params.h"
3fd8a7ab 92#include "langhooks.h"
d3610bea 93#include "tree-affine.h"
7e995af7 94#include "target.h"
dec41e98 95
96/* The infinite cost. */
97#define INFTY 10000000
98
99/* The expected number of loop iterations. TODO -- use profiling instead of
100 this. */
101#define AVG_LOOP_NITER(LOOP) 5
102
dec41e98 103
104/* Representation of the induction variable. */
105struct iv
106{
107 tree base; /* Initial value of the iv. */
065d134e 108 tree base_object; /* A memory object to that the induction variable points. */
dec41e98 109 tree step; /* Step of the iv (constant only). */
110 tree ssa_name; /* The ssa name with the value. */
111 bool biv_p; /* Is it a biv? */
112 bool have_use_for; /* Do we already have a use for it? */
113 unsigned use_id; /* The identifier in the use if it is the case. */
114};
115
116/* Per-ssa version information (induction variable descriptions, etc.). */
117struct version_info
118{
119 tree name; /* The ssa name. */
120 struct iv *iv; /* Induction variable description. */
121 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
122 an expression that is not an induction variable. */
123 unsigned inv_id; /* Id of an invariant. */
124 bool preserve_biv; /* For the original biv, whether to preserve it. */
125};
126
dec41e98 127/* Types of uses. */
128enum use_type
129{
130 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
dec41e98 131 USE_ADDRESS, /* Use in an address. */
132 USE_COMPARE /* Use is a compare. */
133};
134
3c2818b0 135/* Cost of a computation. */
136typedef struct
137{
138 unsigned cost; /* The runtime cost. */
139 unsigned complexity; /* The estimate of the complexity of the code for
140 the computation (in no concrete units --
141 complexity field should be larger for more
142 complex expressions and addressing modes). */
143} comp_cost;
144
145static const comp_cost zero_cost = {0, 0};
146static const comp_cost infinite_cost = {INFTY, INFTY};
147
dec41e98 148/* The candidate - cost pair. */
149struct cost_pair
150{
151 struct iv_cand *cand; /* The candidate. */
3c2818b0 152 comp_cost cost; /* The cost. */
dec41e98 153 bitmap depends_on; /* The list of invariants that have to be
154 preserved. */
c4d3b428 155 tree value; /* For final value elimination, the expression for
156 the final value of the iv. For iv elimination,
157 the new bound to compare with. */
dec41e98 158};
159
160/* Use. */
161struct iv_use
162{
163 unsigned id; /* The id of the use. */
164 enum use_type type; /* Type of the use. */
165 struct iv *iv; /* The induction variable it is based on. */
75a70cf9 166 gimple stmt; /* Statement in that it occurs. */
dec41e98 167 tree *op_p; /* The place where it occurs. */
00991688 168 bitmap related_cands; /* The set of "related" iv candidates, plus the common
169 important ones. */
dec41e98 170
171 unsigned n_map_members; /* Number of candidates in the cost_map list. */
172 struct cost_pair *cost_map;
173 /* The costs wrto the iv candidates. */
174
175 struct iv_cand *selected;
176 /* The selected candidate. */
177};
178
179/* The position where the iv is computed. */
180enum iv_position
181{
182 IP_NORMAL, /* At the end, just before the exit condition. */
183 IP_END, /* At the end of the latch block. */
184 IP_ORIGINAL /* The original biv. */
185};
186
187/* The induction variable candidate. */
188struct iv_cand
189{
190 unsigned id; /* The number of the candidate. */
191 bool important; /* Whether this is an "important" candidate, i.e. such
192 that it should be considered by all uses. */
193 enum iv_position pos; /* Where it is computed. */
75a70cf9 194 gimple incremented_at;/* For original biv, the statement where it is
dec41e98 195 incremented. */
196 tree var_before; /* The variable used for it before increment. */
197 tree var_after; /* The variable used for it after increment. */
198 struct iv *iv; /* The value of the candidate. NULL for
199 "pseudocandidate" used to indicate the possibility
200 to replace the final value of an iv by direct
201 computation of the value. */
202 unsigned cost; /* Cost of the candidate. */
651874e1 203 bitmap depends_on; /* The list of invariants that are used in step of the
204 biv. */
dec41e98 205};
206
207/* The data used by the induction variable optimizations. */
208
7d18ea07 209typedef struct iv_use *iv_use_p;
210DEF_VEC_P(iv_use_p);
211DEF_VEC_ALLOC_P(iv_use_p,heap);
212
213typedef struct iv_cand *iv_cand_p;
214DEF_VEC_P(iv_cand_p);
215DEF_VEC_ALLOC_P(iv_cand_p,heap);
216
dec41e98 217struct ivopts_data
218{
219 /* The currently optimized loop. */
220 struct loop *current_loop;
221
0ac758f7 222 /* Numbers of iterations for all exits of the current loop. */
223 struct pointer_map_t *niters;
f529eb25 224
fe382241 225 /* Number of registers used in it. */
226 unsigned regs_used;
227
dec41e98 228 /* The size of version_info array allocated. */
229 unsigned version_info_size;
230
231 /* The array of information for the ssa names. */
232 struct version_info *version_info;
233
234 /* The bitmap of indices in version_info whose value was changed. */
235 bitmap relevant;
236
dec41e98 237 /* The uses of induction variables. */
7d18ea07 238 VEC(iv_use_p,heap) *iv_uses;
dec41e98 239
240 /* The candidates. */
7d18ea07 241 VEC(iv_cand_p,heap) *iv_candidates;
dec41e98 242
0a501ca6 243 /* A bitmap of important candidates. */
244 bitmap important_candidates;
245
0ac758f7 246 /* The maximum invariant id. */
247 unsigned max_inv_id;
248
dec41e98 249 /* Whether to consider just related and important candidates when replacing a
250 use. */
251 bool consider_all_candidates;
0ac758f7 252
253 /* Are we optimizing for speed? */
254 bool speed;
dec41e98 255};
256
00991688 257/* An assignment of iv candidates to uses. */
258
259struct iv_ca
260{
261 /* The number of uses covered by the assignment. */
262 unsigned upto;
263
264 /* Number of uses that cannot be expressed by the candidates in the set. */
265 unsigned bad_uses;
266
267 /* Candidate assigned to a use, together with the related costs. */
268 struct cost_pair **cand_for_use;
269
270 /* Number of times each candidate is used. */
271 unsigned *n_cand_uses;
272
273 /* The candidates used. */
274 bitmap cands;
275
13ea2c1f 276 /* The number of candidates in the set. */
277 unsigned n_cands;
278
00991688 279 /* Total number of registers needed. */
280 unsigned n_regs;
281
282 /* Total cost of expressing uses. */
3c2818b0 283 comp_cost cand_use_cost;
00991688 284
285 /* Total cost of candidates. */
286 unsigned cand_cost;
287
288 /* Number of times each invariant is used. */
289 unsigned *n_invariant_uses;
290
291 /* Total cost of the assignment. */
3c2818b0 292 comp_cost cost;
00991688 293};
294
295/* Difference of two iv candidate assignments. */
296
297struct iv_ca_delta
298{
299 /* Changed use. */
300 struct iv_use *use;
301
302 /* An old assignment (for rollback purposes). */
303 struct cost_pair *old_cp;
304
305 /* A new assignment. */
306 struct cost_pair *new_cp;
307
308 /* Next change in the list. */
309 struct iv_ca_delta *next_change;
310};
311
dec41e98 312/* Bound on number of candidates below that all candidates are considered. */
313
314#define CONSIDER_ALL_CANDIDATES_BOUND \
315 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
316
91275768 317/* If there are more iv occurrences, we just give up (it is quite unlikely that
dec41e98 318 optimizing such a loop would help, and it would take ages). */
319
320#define MAX_CONSIDERED_USES \
321 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
322
13ea2c1f 323/* If there are at most this number of ivs in the set, try removing unnecessary
324 ivs from the set always. */
325
326#define ALWAYS_PRUNE_CAND_SET_BOUND \
327 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
328
dec41e98 329/* The list of trees for that the decl_rtl field must be reset is stored
330 here. */
331
7d18ea07 332static VEC(tree,heap) *decl_rtl_to_reset;
dec41e98 333
334/* Number of uses recorded in DATA. */
335
336static inline unsigned
337n_iv_uses (struct ivopts_data *data)
338{
7d18ea07 339 return VEC_length (iv_use_p, data->iv_uses);
dec41e98 340}
341
342/* Ith use recorded in DATA. */
343
344static inline struct iv_use *
345iv_use (struct ivopts_data *data, unsigned i)
346{
7d18ea07 347 return VEC_index (iv_use_p, data->iv_uses, i);
dec41e98 348}
349
350/* Number of candidates recorded in DATA. */
351
352static inline unsigned
353n_iv_cands (struct ivopts_data *data)
354{
7d18ea07 355 return VEC_length (iv_cand_p, data->iv_candidates);
dec41e98 356}
357
358/* Ith candidate recorded in DATA. */
359
360static inline struct iv_cand *
361iv_cand (struct ivopts_data *data, unsigned i)
362{
7d18ea07 363 return VEC_index (iv_cand_p, data->iv_candidates, i);
dec41e98 364}
365
dec41e98 366/* The single loop exit if it dominates the latch, NULL otherwise. */
367
8feba661 368edge
dec41e98 369single_dom_exit (struct loop *loop)
370{
d9e7e1a2 371 edge exit = single_exit (loop);
dec41e98 372
373 if (!exit)
374 return NULL;
375
376 if (!just_once_each_iteration_p (loop, exit->src))
377 return NULL;
378
379 return exit;
380}
381
382/* Dumps information about the induction variable IV to FILE. */
383
384extern void dump_iv (FILE *, struct iv *);
385void
386dump_iv (FILE *file, struct iv *iv)
387{
065d134e 388 if (iv->ssa_name)
389 {
390 fprintf (file, "ssa name ");
391 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
392 fprintf (file, "\n");
393 }
dec41e98 394
dbc64c75 395 fprintf (file, " type ");
396 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
397 fprintf (file, "\n");
398
dec41e98 399 if (iv->step)
400 {
401 fprintf (file, " base ");
402 print_generic_expr (file, iv->base, TDF_SLIM);
403 fprintf (file, "\n");
404
405 fprintf (file, " step ");
406 print_generic_expr (file, iv->step, TDF_SLIM);
407 fprintf (file, "\n");
408 }
409 else
410 {
411 fprintf (file, " invariant ");
412 print_generic_expr (file, iv->base, TDF_SLIM);
413 fprintf (file, "\n");
414 }
415
065d134e 416 if (iv->base_object)
417 {
418 fprintf (file, " base object ");
419 print_generic_expr (file, iv->base_object, TDF_SLIM);
420 fprintf (file, "\n");
421 }
422
dec41e98 423 if (iv->biv_p)
424 fprintf (file, " is a biv\n");
425}
426
427/* Dumps information about the USE to FILE. */
428
429extern void dump_use (FILE *, struct iv_use *);
430void
431dump_use (FILE *file, struct iv_use *use)
432{
dec41e98 433 fprintf (file, "use %d\n", use->id);
434
435 switch (use->type)
436 {
437 case USE_NONLINEAR_EXPR:
438 fprintf (file, " generic\n");
439 break;
440
dec41e98 441 case USE_ADDRESS:
442 fprintf (file, " address\n");
443 break;
444
445 case USE_COMPARE:
446 fprintf (file, " compare\n");
447 break;
448
449 default:
8c0963c4 450 gcc_unreachable ();
dec41e98 451 }
452
dbc64c75 453 fprintf (file, " in statement ");
75a70cf9 454 print_gimple_stmt (file, use->stmt, 0, 0);
dbc64c75 455 fprintf (file, "\n");
456
457 fprintf (file, " at position ");
458 if (use->op_p)
459 print_generic_expr (file, *use->op_p, TDF_SLIM);
460 fprintf (file, "\n");
461
065d134e 462 dump_iv (file, use->iv);
dbc64c75 463
d4caf099 464 if (use->related_cands)
465 {
466 fprintf (file, " related candidates ");
467 dump_bitmap (file, use->related_cands);
468 }
dec41e98 469}
470
471/* Dumps information about the uses to FILE. */
472
473extern void dump_uses (FILE *, struct ivopts_data *);
474void
475dump_uses (FILE *file, struct ivopts_data *data)
476{
477 unsigned i;
478 struct iv_use *use;
479
480 for (i = 0; i < n_iv_uses (data); i++)
481 {
482 use = iv_use (data, i);
483
484 dump_use (file, use);
485 fprintf (file, "\n");
486 }
487}
488
489/* Dumps information about induction variable candidate CAND to FILE. */
490
491extern void dump_cand (FILE *, struct iv_cand *);
492void
493dump_cand (FILE *file, struct iv_cand *cand)
494{
495 struct iv *iv = cand->iv;
496
497 fprintf (file, "candidate %d%s\n",
498 cand->id, cand->important ? " (important)" : "");
499
651874e1 500 if (cand->depends_on)
501 {
502 fprintf (file, " depends on ");
503 dump_bitmap (file, cand->depends_on);
504 }
505
dec41e98 506 if (!iv)
507 {
508 fprintf (file, " final value replacement\n");
509 return;
510 }
511
512 switch (cand->pos)
513 {
514 case IP_NORMAL:
515 fprintf (file, " incremented before exit test\n");
516 break;
517
518 case IP_END:
519 fprintf (file, " incremented at end\n");
520 break;
521
522 case IP_ORIGINAL:
523 fprintf (file, " original biv\n");
524 break;
525 }
526
065d134e 527 dump_iv (file, iv);
dec41e98 528}
529
530/* Returns the info for ssa version VER. */
531
532static inline struct version_info *
533ver_info (struct ivopts_data *data, unsigned ver)
534{
535 return data->version_info + ver;
536}
537
538/* Returns the info for ssa name NAME. */
539
540static inline struct version_info *
541name_info (struct ivopts_data *data, tree name)
542{
543 return ver_info (data, SSA_NAME_VERSION (name));
544}
545
dec41e98 546/* Returns true if STMT is after the place where the IP_NORMAL ivs will be
547 emitted in LOOP. */
548
549static bool
75a70cf9 550stmt_after_ip_normal_pos (struct loop *loop, gimple stmt)
dec41e98 551{
75a70cf9 552 basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
dec41e98 553
8c0963c4 554 gcc_assert (bb);
dec41e98 555
556 if (sbb == loop->latch)
557 return true;
558
559 if (sbb != bb)
560 return false;
561
562 return stmt == last_stmt (bb);
563}
564
565/* Returns true if STMT if after the place where the original induction
566 variable CAND is incremented. */
567
568static bool
75a70cf9 569stmt_after_ip_original_pos (struct iv_cand *cand, gimple stmt)
dec41e98 570{
75a70cf9 571 basic_block cand_bb = gimple_bb (cand->incremented_at);
572 basic_block stmt_bb = gimple_bb (stmt);
573 gimple_stmt_iterator bsi;
dec41e98 574
575 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
576 return false;
577
578 if (stmt_bb != cand_bb)
579 return true;
580
581 /* Scan the block from the end, since the original ivs are usually
582 incremented at the end of the loop body. */
75a70cf9 583 for (bsi = gsi_last_bb (stmt_bb); ; gsi_prev (&bsi))
dec41e98 584 {
75a70cf9 585 if (gsi_stmt (bsi) == cand->incremented_at)
dec41e98 586 return false;
75a70cf9 587 if (gsi_stmt (bsi) == stmt)
dec41e98 588 return true;
589 }
590}
591
592/* Returns true if STMT if after the place where the induction variable
593 CAND is incremented in LOOP. */
594
595static bool
75a70cf9 596stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple stmt)
dec41e98 597{
598 switch (cand->pos)
599 {
600 case IP_END:
601 return false;
602
603 case IP_NORMAL:
604 return stmt_after_ip_normal_pos (loop, stmt);
605
606 case IP_ORIGINAL:
607 return stmt_after_ip_original_pos (cand, stmt);
608
609 default:
8c0963c4 610 gcc_unreachable ();
dec41e98 611 }
612}
613
dcd24d3c 614/* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
615
616static bool
617abnormal_ssa_name_p (tree exp)
618{
619 if (!exp)
620 return false;
621
622 if (TREE_CODE (exp) != SSA_NAME)
623 return false;
624
625 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
626}
627
628/* Returns false if BASE or INDEX contains a ssa name that occurs in an
629 abnormal phi node. Callback for for_each_index. */
630
631static bool
632idx_contains_abnormal_ssa_name_p (tree base, tree *index,
633 void *data ATTRIBUTE_UNUSED)
634{
5e19919e 635 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
dcd24d3c 636 {
637 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
638 return false;
639 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
640 return false;
641 }
642
643 return !abnormal_ssa_name_p (*index);
644}
645
646/* Returns true if EXPR contains a ssa name that occurs in an
647 abnormal phi node. */
648
72079d62 649bool
dcd24d3c 650contains_abnormal_ssa_name_p (tree expr)
651{
652 enum tree_code code;
f0d6e81c 653 enum tree_code_class codeclass;
dcd24d3c 654
655 if (!expr)
656 return false;
657
658 code = TREE_CODE (expr);
f0d6e81c 659 codeclass = TREE_CODE_CLASS (code);
dcd24d3c 660
661 if (code == SSA_NAME)
662 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
663
664 if (code == INTEGER_CST
665 || is_gimple_min_invariant (expr))
666 return false;
667
668 if (code == ADDR_EXPR)
669 return !for_each_index (&TREE_OPERAND (expr, 0),
670 idx_contains_abnormal_ssa_name_p,
671 NULL);
672
f0d6e81c 673 switch (codeclass)
dcd24d3c 674 {
675 case tcc_binary:
676 case tcc_comparison:
677 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
678 return true;
679
680 /* Fallthru. */
681 case tcc_unary:
682 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
683 return true;
684
685 break;
686
687 default:
688 gcc_unreachable ();
689 }
690
691 return false;
692}
693
dcd24d3c 694/* Returns tree describing number of iterations determined from
b091dc59 695 EXIT of DATA->current_loop, or NULL if something goes wrong. */
696
dcd24d3c 697static tree
b091dc59 698niter_for_exit (struct ivopts_data *data, edge exit)
699{
dcd24d3c 700 struct tree_niter_desc desc;
b30a8715 701 tree niter;
702 void **slot;
b091dc59 703
b30a8715 704 if (!data->niters)
b091dc59 705 {
b30a8715 706 data->niters = pointer_map_create ();
707 slot = NULL;
708 }
709 else
710 slot = pointer_map_contains (data->niters, exit);
dcd24d3c 711
b30a8715 712 if (!slot)
713 {
dcd24d3c 714 /* Try to determine number of iterations. We must know it
715 unconditionally (i.e., without possibility of # of iterations
716 being zero). Also, we cannot safely work with ssa names that
717 appear in phi nodes on abnormal edges, so that we do not create
718 overlapping life ranges for them (PR 27283). */
719 if (number_of_iterations_exit (data->current_loop,
720 exit, &desc, true)
cd743a11 721 && integer_zerop (desc.may_be_zero)
dcd24d3c 722 && !contains_abnormal_ssa_name_p (desc.niter))
b30a8715 723 niter = desc.niter;
dcd24d3c 724 else
b30a8715 725 niter = NULL_TREE;
726
727 *pointer_map_insert (data->niters, exit) = niter;
b091dc59 728 }
729 else
f0d6e81c 730 niter = (tree) *slot;
b091dc59 731
b30a8715 732 return niter;
b091dc59 733}
734
dcd24d3c 735/* Returns tree describing number of iterations determined from
b091dc59 736 single dominating exit of DATA->current_loop, or NULL if something
737 goes wrong. */
738
dcd24d3c 739static tree
b091dc59 740niter_for_single_dom_exit (struct ivopts_data *data)
741{
742 edge exit = single_dom_exit (data->current_loop);
743
744 if (!exit)
745 return NULL;
746
747 return niter_for_exit (data, exit);
748}
749
dec41e98 750/* Initializes data structures used by the iv optimization pass, stored
fe382241 751 in DATA. */
dec41e98 752
753static void
fe382241 754tree_ssa_iv_optimize_init (struct ivopts_data *data)
dec41e98 755{
dec41e98 756 data->version_info_size = 2 * num_ssa_names;
4c36ffe6 757 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
27335ffd 758 data->relevant = BITMAP_ALLOC (NULL);
759 data->important_candidates = BITMAP_ALLOC (NULL);
dec41e98 760 data->max_inv_id = 0;
b30a8715 761 data->niters = NULL;
7d18ea07 762 data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
763 data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
764 decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
dec41e98 765}
766
065d134e 767/* Returns a memory object to that EXPR points. In case we are able to
768 determine that it does not point to any such object, NULL is returned. */
769
770static tree
771determine_base_object (tree expr)
772{
773 enum tree_code code = TREE_CODE (expr);
0de36bdb 774 tree base, obj;
065d134e 775
9b6564b5 776 /* If this is a pointer casted to any type, we need to determine
777 the base object for the pointer; so handle conversions before
778 throwing away non-pointer expressions. */
72dd6141 779 if (CONVERT_EXPR_P (expr))
9b6564b5 780 return determine_base_object (TREE_OPERAND (expr, 0));
781
065d134e 782 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
783 return NULL_TREE;
784
785 switch (code)
786 {
787 case INTEGER_CST:
788 return NULL_TREE;
789
790 case ADDR_EXPR:
791 obj = TREE_OPERAND (expr, 0);
792 base = get_base_address (obj);
793
794 if (!base)
390955a6 795 return expr;
065d134e 796
eb91f88e 797 if (TREE_CODE (base) == INDIRECT_REF)
390955a6 798 return determine_base_object (TREE_OPERAND (base, 0));
eb91f88e 799
538577e2 800 return fold_convert (ptr_type_node,
801 build_fold_addr_expr (base));
065d134e 802
0de36bdb 803 case POINTER_PLUS_EXPR:
804 return determine_base_object (TREE_OPERAND (expr, 0));
805
065d134e 806 case PLUS_EXPR:
807 case MINUS_EXPR:
0de36bdb 808 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
809 gcc_unreachable ();
065d134e 810
811 default:
812 return fold_convert (ptr_type_node, expr);
813 }
814}
815
dec41e98 816/* Allocates an induction variable with given initial value BASE and step STEP
817 for loop LOOP. */
818
819static struct iv *
820alloc_iv (tree base, tree step)
821{
4c36ffe6 822 struct iv *iv = XCNEW (struct iv);
7a973feb 823 gcc_assert (step != NULL_TREE);
dec41e98 824
825 iv->base = base;
065d134e 826 iv->base_object = determine_base_object (base);
dec41e98 827 iv->step = step;
828 iv->biv_p = false;
829 iv->have_use_for = false;
830 iv->use_id = 0;
831 iv->ssa_name = NULL_TREE;
832
833 return iv;
834}
835
836/* Sets STEP and BASE for induction variable IV. */
837
838static void
839set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
840{
841 struct version_info *info = name_info (data, iv);
842
8c0963c4 843 gcc_assert (!info->iv);
dec41e98 844
845 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
846 info->iv = alloc_iv (base, step);
847 info->iv->ssa_name = iv;
848}
849
850/* Finds induction variable declaration for VAR. */
851
852static struct iv *
853get_iv (struct ivopts_data *data, tree var)
854{
855 basic_block bb;
7a973feb 856 tree type = TREE_TYPE (var);
857
858 if (!POINTER_TYPE_P (type)
859 && !INTEGRAL_TYPE_P (type))
860 return NULL;
861
dec41e98 862 if (!name_info (data, var)->iv)
863 {
75a70cf9 864 bb = gimple_bb (SSA_NAME_DEF_STMT (var));
dec41e98 865
866 if (!bb
867 || !flow_bb_inside_loop_p (data->current_loop, bb))
7a973feb 868 set_iv (data, var, var, build_int_cst (type, 0));
dec41e98 869 }
870
871 return name_info (data, var)->iv;
872}
873
651874e1 874/* Determines the step of a biv defined in PHI. Returns NULL if PHI does
875 not define a simple affine biv with nonzero step. */
dec41e98 876
877static tree
75a70cf9 878determine_biv_step (gimple phi)
dec41e98 879{
75a70cf9 880 struct loop *loop = gimple_bb (phi)->loop_father;
553b9523 881 tree name = PHI_RESULT (phi);
882 affine_iv iv;
dec41e98 883
884 if (!is_gimple_reg (name))
885 return NULL_TREE;
886
76610704 887 if (!simple_iv (loop, loop, name, &iv, true))
dec41e98 888 return NULL_TREE;
889
7a973feb 890 return integer_zerop (iv.step) ? NULL_TREE : iv.step;
dec41e98 891}
892
dec41e98 893/* Finds basic ivs. */
894
895static bool
896find_bivs (struct ivopts_data *data)
897{
75a70cf9 898 gimple phi;
899 tree step, type, base;
dec41e98 900 bool found = false;
901 struct loop *loop = data->current_loop;
75a70cf9 902 gimple_stmt_iterator psi;
dec41e98 903
75a70cf9 904 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
dec41e98 905 {
75a70cf9 906 phi = gsi_stmt (psi);
907
dec41e98 908 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
909 continue;
910
911 step = determine_biv_step (phi);
dec41e98 912 if (!step)
913 continue;
dec41e98 914
915 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
b9845e76 916 base = expand_simple_operations (base);
651874e1 917 if (contains_abnormal_ssa_name_p (base)
918 || contains_abnormal_ssa_name_p (step))
dec41e98 919 continue;
920
921 type = TREE_TYPE (PHI_RESULT (phi));
922 base = fold_convert (type, base);
651874e1 923 if (step)
06240723 924 {
925 if (POINTER_TYPE_P (type))
926 step = fold_convert (sizetype, step);
927 else
928 step = fold_convert (type, step);
929 }
dec41e98 930
931 set_iv (data, PHI_RESULT (phi), base, step);
932 found = true;
933 }
934
935 return found;
936}
937
938/* Marks basic ivs. */
939
940static void
941mark_bivs (struct ivopts_data *data)
942{
75a70cf9 943 gimple phi;
944 tree var;
dec41e98 945 struct iv *iv, *incr_iv;
946 struct loop *loop = data->current_loop;
947 basic_block incr_bb;
75a70cf9 948 gimple_stmt_iterator psi;
dec41e98 949
75a70cf9 950 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
dec41e98 951 {
75a70cf9 952 phi = gsi_stmt (psi);
953
dec41e98 954 iv = get_iv (data, PHI_RESULT (phi));
955 if (!iv)
956 continue;
957
958 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
959 incr_iv = get_iv (data, var);
960 if (!incr_iv)
961 continue;
962
963 /* If the increment is in the subloop, ignore it. */
75a70cf9 964 incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
dec41e98 965 if (incr_bb->loop_father != data->current_loop
966 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
967 continue;
968
969 iv->biv_p = true;
970 incr_iv->biv_p = true;
971 }
972}
973
974/* Checks whether STMT defines a linear induction variable and stores its
553b9523 975 parameters to IV. */
dec41e98 976
977static bool
75a70cf9 978find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv)
dec41e98 979{
980 tree lhs;
981 struct loop *loop = data->current_loop;
982
553b9523 983 iv->base = NULL_TREE;
984 iv->step = NULL_TREE;
dec41e98 985
75a70cf9 986 if (gimple_code (stmt) != GIMPLE_ASSIGN)
dec41e98 987 return false;
988
75a70cf9 989 lhs = gimple_assign_lhs (stmt);
dec41e98 990 if (TREE_CODE (lhs) != SSA_NAME)
991 return false;
992
76610704 993 if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
dec41e98 994 return false;
553b9523 995 iv->base = expand_simple_operations (iv->base);
dec41e98 996
553b9523 997 if (contains_abnormal_ssa_name_p (iv->base)
998 || contains_abnormal_ssa_name_p (iv->step))
dec41e98 999 return false;
1000
1001 return true;
1002}
1003
1004/* Finds general ivs in statement STMT. */
1005
1006static void
75a70cf9 1007find_givs_in_stmt (struct ivopts_data *data, gimple stmt)
dec41e98 1008{
553b9523 1009 affine_iv iv;
dec41e98 1010
553b9523 1011 if (!find_givs_in_stmt_scev (data, stmt, &iv))
dec41e98 1012 return;
1013
75a70cf9 1014 set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step);
dec41e98 1015}
1016
1017/* Finds general ivs in basic block BB. */
1018
1019static void
1020find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1021{
75a70cf9 1022 gimple_stmt_iterator bsi;
dec41e98 1023
75a70cf9 1024 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1025 find_givs_in_stmt (data, gsi_stmt (bsi));
dec41e98 1026}
1027
1028/* Finds general ivs. */
1029
1030static void
1031find_givs (struct ivopts_data *data)
1032{
1033 struct loop *loop = data->current_loop;
1034 basic_block *body = get_loop_body_in_dom_order (loop);
1035 unsigned i;
1036
1037 for (i = 0; i < loop->num_nodes; i++)
1038 find_givs_in_bb (data, body[i]);
1039 free (body);
1040}
1041
dec41e98 1042/* For each ssa name defined in LOOP determines whether it is an induction
1043 variable and if so, its initial value and step. */
1044
1045static bool
1046find_induction_variables (struct ivopts_data *data)
1047{
1048 unsigned i;
0cc4271a 1049 bitmap_iterator bi;
dec41e98 1050
1051 if (!find_bivs (data))
1052 return false;
1053
1054 find_givs (data);
1055 mark_bivs (data);
dec41e98 1056
1057 if (dump_file && (dump_flags & TDF_DETAILS))
1058 {
dcd24d3c 1059 tree niter = niter_for_single_dom_exit (data);
b091dc59 1060
1061 if (niter)
dec41e98 1062 {
1063 fprintf (dump_file, " number of iterations ");
dcd24d3c 1064 print_generic_expr (dump_file, niter, TDF_SLIM);
1065 fprintf (dump_file, "\n\n");
dec41e98 1066 };
1067
1068 fprintf (dump_file, "Induction variables:\n\n");
1069
0cc4271a 1070 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
dec41e98 1071 {
1072 if (ver_info (data, i)->iv)
1073 dump_iv (dump_file, ver_info (data, i)->iv);
0cc4271a 1074 }
dec41e98 1075 }
1076
1077 return true;
1078}
1079
1080/* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1081
1082static struct iv_use *
1083record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
75a70cf9 1084 gimple stmt, enum use_type use_type)
dec41e98 1085{
4c36ffe6 1086 struct iv_use *use = XCNEW (struct iv_use);
dec41e98 1087
1088 use->id = n_iv_uses (data);
1089 use->type = use_type;
1090 use->iv = iv;
1091 use->stmt = stmt;
1092 use->op_p = use_p;
27335ffd 1093 use->related_cands = BITMAP_ALLOC (NULL);
dec41e98 1094
065d134e 1095 /* To avoid showing ssa name in the dumps, if it was not reset by the
1096 caller. */
1097 iv->ssa_name = NULL_TREE;
1098
dec41e98 1099 if (dump_file && (dump_flags & TDF_DETAILS))
1100 dump_use (dump_file, use);
1101
7d18ea07 1102 VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
dec41e98 1103
1104 return use;
1105}
1106
1107/* Checks whether OP is a loop-level invariant and if so, records it.
1108 NONLINEAR_USE is true if the invariant is used in a way we do not
1109 handle specially. */
1110
1111static void
1112record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1113{
1114 basic_block bb;
1115 struct version_info *info;
1116
1117 if (TREE_CODE (op) != SSA_NAME
1118 || !is_gimple_reg (op))
1119 return;
1120
75a70cf9 1121 bb = gimple_bb (SSA_NAME_DEF_STMT (op));
dec41e98 1122 if (bb
1123 && flow_bb_inside_loop_p (data->current_loop, bb))
1124 return;
1125
1126 info = name_info (data, op);
1127 info->name = op;
1128 info->has_nonlin_use |= nonlinear_use;
1129 if (!info->inv_id)
1130 info->inv_id = ++data->max_inv_id;
1131 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1132}
1133
3659c47c 1134/* Checks whether the use OP is interesting and if so, records it. */
dec41e98 1135
1136static struct iv_use *
3659c47c 1137find_interesting_uses_op (struct ivopts_data *data, tree op)
dec41e98 1138{
1139 struct iv *iv;
1140 struct iv *civ;
75a70cf9 1141 gimple stmt;
dec41e98 1142 struct iv_use *use;
1143
1144 if (TREE_CODE (op) != SSA_NAME)
1145 return NULL;
1146
1147 iv = get_iv (data, op);
1148 if (!iv)
1149 return NULL;
1150
1151 if (iv->have_use_for)
1152 {
1153 use = iv_use (data, iv->use_id);
1154
3659c47c 1155 gcc_assert (use->type == USE_NONLINEAR_EXPR);
dec41e98 1156 return use;
1157 }
1158
7a973feb 1159 if (integer_zerop (iv->step))
dec41e98 1160 {
1161 record_invariant (data, op, true);
1162 return NULL;
1163 }
1164 iv->have_use_for = true;
1165
4c36ffe6 1166 civ = XNEW (struct iv);
dec41e98 1167 *civ = *iv;
1168
1169 stmt = SSA_NAME_DEF_STMT (op);
75a70cf9 1170 gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1171 || is_gimple_assign (stmt));
dec41e98 1172
3659c47c 1173 use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
dec41e98 1174 iv->use_id = use->id;
1175
1176 return use;
1177}
1178
75a70cf9 1179/* Given a condition in statement STMT, checks whether it is a compare
1180 of an induction variable and an invariant. If this is the case,
1181 CONTROL_VAR is set to location of the iv, BOUND to the location of
1182 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1183 induction variable descriptions, and true is returned. If this is not
1184 the case, CONTROL_VAR and BOUND are set to the arguments of the
1185 condition and false is returned. */
dec41e98 1186
02f8cd87 1187static bool
75a70cf9 1188extract_cond_operands (struct ivopts_data *data, gimple stmt,
02f8cd87 1189 tree **control_var, tree **bound,
1190 struct iv **iv_var, struct iv **iv_bound)
1191{
75a70cf9 1192 /* The objects returned when COND has constant operands. */
02f8cd87 1193 static struct iv const_iv;
1194 static tree zero;
02f8cd87 1195 tree *op0 = &zero, *op1 = &zero, *tmp_op;
1196 struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1197 bool ret = false;
1198
75a70cf9 1199 if (gimple_code (stmt) == GIMPLE_COND)
dec41e98 1200 {
75a70cf9 1201 op0 = gimple_cond_lhs_ptr (stmt);
1202 op1 = gimple_cond_rhs_ptr (stmt);
dec41e98 1203 }
75a70cf9 1204 else
dec41e98 1205 {
75a70cf9 1206 op0 = gimple_assign_rhs1_ptr (stmt);
1207 op1 = gimple_assign_rhs2_ptr (stmt);
dec41e98 1208 }
1209
75a70cf9 1210 zero = integer_zero_node;
1211 const_iv.step = integer_zero_node;
1212
02f8cd87 1213 if (TREE_CODE (*op0) == SSA_NAME)
1214 iv0 = get_iv (data, *op0);
1215 if (TREE_CODE (*op1) == SSA_NAME)
1216 iv1 = get_iv (data, *op1);
dec41e98 1217
02f8cd87 1218 /* Exactly one of the compared values must be an iv, and the other one must
1219 be an invariant. */
1220 if (!iv0 || !iv1)
1221 goto end;
1222
1223 if (integer_zerop (iv0->step))
1224 {
1225 /* Control variable may be on the other side. */
1226 tmp_op = op0; op0 = op1; op1 = tmp_op;
1227 tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
dec41e98 1228 }
02f8cd87 1229 ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1230
1231end:
1232 if (control_var)
1233 *control_var = op0;;
1234 if (iv_var)
1235 *iv_var = iv0;;
1236 if (bound)
1237 *bound = op1;
1238 if (iv_bound)
1239 *iv_bound = iv1;
1240
1241 return ret;
1242}
1243
75a70cf9 1244/* Checks whether the condition in STMT is interesting and if so,
1245 records it. */
02f8cd87 1246
1247static void
75a70cf9 1248find_interesting_uses_cond (struct ivopts_data *data, gimple stmt)
02f8cd87 1249{
1250 tree *var_p, *bound_p;
1251 struct iv *var_iv, *civ;
dec41e98 1252
75a70cf9 1253 if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
dec41e98 1254 {
02f8cd87 1255 find_interesting_uses_op (data, *var_p);
1256 find_interesting_uses_op (data, *bound_p);
dec41e98 1257 return;
1258 }
1259
4c36ffe6 1260 civ = XNEW (struct iv);
02f8cd87 1261 *civ = *var_iv;
75a70cf9 1262 record_use (data, NULL, civ, stmt, USE_COMPARE);
dec41e98 1263}
1264
a59824bb 1265/* Returns true if expression EXPR is obviously invariant in LOOP,
663608f5 1266 i.e. if all its operands are defined outside of the LOOP. LOOP
1267 should not be the function body. */
a59824bb 1268
1b5c3dde 1269bool
a59824bb 1270expr_invariant_in_loop_p (struct loop *loop, tree expr)
1271{
1272 basic_block def_bb;
1273 unsigned i, len;
1274
663608f5 1275 gcc_assert (loop_depth (loop) > 0);
1276
a59824bb 1277 if (is_gimple_min_invariant (expr))
1278 return true;
1279
1280 if (TREE_CODE (expr) == SSA_NAME)
1281 {
75a70cf9 1282 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
a59824bb 1283 if (def_bb
1284 && flow_bb_inside_loop_p (loop, def_bb))
1285 return false;
1286
1287 return true;
1288 }
1289
75a70cf9 1290 if (!EXPR_P (expr))
a59824bb 1291 return false;
1292
c2f47e15 1293 len = TREE_OPERAND_LENGTH (expr);
a59824bb 1294 for (i = 0; i < len; i++)
1295 if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1296 return false;
1297
1298 return true;
1299}
1300
75a70cf9 1301/* Returns true if statement STMT is obviously invariant in LOOP,
1302 i.e. if all its operands on the RHS are defined outside of the LOOP.
1303 LOOP should not be the function body. */
1304
1305bool
1306stmt_invariant_in_loop_p (struct loop *loop, gimple stmt)
1307{
1308 unsigned i;
1309 tree lhs;
1310
1311 gcc_assert (loop_depth (loop) > 0);
1312
1313 lhs = gimple_get_lhs (stmt);
1314 for (i = 0; i < gimple_num_ops (stmt); i++)
1315 {
1316 tree op = gimple_op (stmt, i);
1317 if (op != lhs && !expr_invariant_in_loop_p (loop, op))
1318 return false;
1319 }
1320
1321 return true;
1322}
1323
dec41e98 1324/* Cumulates the steps of indices into DATA and replaces their values with the
1325 initial ones. Returns false when the value of the index cannot be determined.
1326 Callback for for_each_index. */
1327
1328struct ifs_ivopts_data
1329{
1330 struct ivopts_data *ivopts_data;
75a70cf9 1331 gimple stmt;
7a973feb 1332 tree step;
dec41e98 1333};
1334
1335static bool
1336idx_find_step (tree base, tree *idx, void *data)
1337{
f0d6e81c 1338 struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
dec41e98 1339 struct iv *iv;
57e3f39a 1340 tree step, iv_base, iv_step, lbound, off;
dbc64c75 1341 struct loop *loop = dta->ivopts_data->current_loop;
a59824bb 1342
1343 if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF
1344 || TREE_CODE (base) == ALIGN_INDIRECT_REF)
1345 return false;
1346
1347 /* If base is a component ref, require that the offset of the reference
f7406879 1348 be invariant. */
a59824bb 1349 if (TREE_CODE (base) == COMPONENT_REF)
1350 {
1351 off = component_ref_field_offset (base);
1352 return expr_invariant_in_loop_p (loop, off);
1353 }
1354
1355 /* If base is array, first check whether we will be able to move the
1356 reference out of the loop (in order to take its address in strength
1357 reduction). In order for this to work we need both lower bound
1358 and step to be loop invariants. */
5e19919e 1359 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
a59824bb 1360 {
5e19919e 1361 /* Moreover, for a range, the size needs to be invariant as well. */
1362 if (TREE_CODE (base) == ARRAY_RANGE_REF
1363 && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1364 return false;
1365
a59824bb 1366 step = array_ref_element_size (base);
1367 lbound = array_ref_low_bound (base);
1368
1369 if (!expr_invariant_in_loop_p (loop, step)
1370 || !expr_invariant_in_loop_p (loop, lbound))
1371 return false;
1372 }
1373
dec41e98 1374 if (TREE_CODE (*idx) != SSA_NAME)
1375 return true;
1376
1377 iv = get_iv (dta->ivopts_data, *idx);
1378 if (!iv)
1379 return false;
1380
d3d02a88 1381 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1382 *&x[0], which is not folded and does not trigger the
1383 ARRAY_REF path below. */
dec41e98 1384 *idx = iv->base;
1385
7a973feb 1386 if (integer_zerop (iv->step))
dec41e98 1387 return true;
1388
5e19919e 1389 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
dbc64c75 1390 {
1391 step = array_ref_element_size (base);
dbc64c75 1392
1393 /* We only handle addresses whose step is an integer constant. */
1394 if (TREE_CODE (step) != INTEGER_CST)
1395 return false;
dbc64c75 1396 }
dec41e98 1397 else
779b4c41 1398 /* The step for pointer arithmetics already is 1 byte. */
0213c30e 1399 step = build_int_cst (sizetype, 1);
dec41e98 1400
57e3f39a 1401 iv_base = iv->base;
1402 iv_step = iv->step;
1403 if (!convert_affine_scev (dta->ivopts_data->current_loop,
1404 sizetype, &iv_base, &iv_step, dta->stmt,
1405 false))
dec41e98 1406 {
1407 /* The index might wrap. */
1408 return false;
1409 }
1410
0213c30e 1411 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
7a973feb 1412 dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
dec41e98 1413
1414 return true;
1415}
1416
1417/* Records use in index IDX. Callback for for_each_index. Ivopts data
1418 object is passed to it in DATA. */
1419
1420static bool
dbc64c75 1421idx_record_use (tree base, tree *idx,
f0d6e81c 1422 void *vdata)
dec41e98 1423{
f0d6e81c 1424 struct ivopts_data *data = (struct ivopts_data *) vdata;
dec41e98 1425 find_interesting_uses_op (data, *idx);
5e19919e 1426 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
dbc64c75 1427 {
1428 find_interesting_uses_op (data, array_ref_element_size (base));
1429 find_interesting_uses_op (data, array_ref_low_bound (base));
1430 }
dec41e98 1431 return true;
1432}
1433
90bd1b58 1434/* If we can prove that TOP = cst * BOT for some constant cst,
1435 store cst to MUL and return true. Otherwise return false.
1436 The returned value is always sign-extended, regardless of the
1437 signedness of TOP and BOT. */
1438
1439static bool
1440constant_multiple_of (tree top, tree bot, double_int *mul)
1441{
1442 tree mby;
1443 enum tree_code code;
1444 double_int res, p0, p1;
1445 unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1446
1447 STRIP_NOPS (top);
1448 STRIP_NOPS (bot);
1449
1450 if (operand_equal_p (top, bot, 0))
1451 {
1452 *mul = double_int_one;
1453 return true;
1454 }
1455
1456 code = TREE_CODE (top);
1457 switch (code)
1458 {
1459 case MULT_EXPR:
1460 mby = TREE_OPERAND (top, 1);
1461 if (TREE_CODE (mby) != INTEGER_CST)
1462 return false;
1463
1464 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1465 return false;
1466
1467 *mul = double_int_sext (double_int_mul (res, tree_to_double_int (mby)),
1468 precision);
1469 return true;
1470
1471 case PLUS_EXPR:
1472 case MINUS_EXPR:
1473 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1474 || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1475 return false;
1476
1477 if (code == MINUS_EXPR)
1478 p1 = double_int_neg (p1);
1479 *mul = double_int_sext (double_int_add (p0, p1), precision);
1480 return true;
1481
1482 case INTEGER_CST:
1483 if (TREE_CODE (bot) != INTEGER_CST)
1484 return false;
1485
1486 p0 = double_int_sext (tree_to_double_int (top), precision);
1487 p1 = double_int_sext (tree_to_double_int (bot), precision);
1488 if (double_int_zero_p (p1))
1489 return false;
1490 *mul = double_int_sext (double_int_sdivmod (p0, p1, FLOOR_DIV_EXPR, &res),
1491 precision);
1492 return double_int_zero_p (res);
1493
1494 default:
1495 return false;
1496 }
1497}
1498
1499/* Returns true if memory reference REF with step STEP may be unaligned. */
f63992c3 1500
1501static bool
90bd1b58 1502may_be_unaligned_p (tree ref, tree step)
f63992c3 1503{
1504 tree base;
1505 tree base_type;
1506 HOST_WIDE_INT bitsize;
1507 HOST_WIDE_INT bitpos;
1508 tree toffset;
1509 enum machine_mode mode;
1510 int unsignedp, volatilep;
1511 unsigned base_align;
1512
aed164c3 1513 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
ce10738f 1514 thus they are not misaligned. */
aed164c3 1515 if (TREE_CODE (ref) == TARGET_MEM_REF)
1516 return false;
1517
f63992c3 1518 /* The test below is basically copy of what expr.c:normal_inner_ref
1519 does to check whether the object must be loaded by parts when
1520 STRICT_ALIGNMENT is true. */
1521 base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1522 &unsignedp, &volatilep, true);
1523 base_type = TREE_TYPE (base);
1524 base_align = TYPE_ALIGN (base_type);
1525
90bd1b58 1526 if (mode != BLKmode)
1527 {
1528 double_int mul;
1529 tree al = build_int_cst (TREE_TYPE (step),
1530 GET_MODE_ALIGNMENT (mode) / BITS_PER_UNIT);
1531
1532 if (base_align < GET_MODE_ALIGNMENT (mode)
f63992c3 1533 || bitpos % GET_MODE_ALIGNMENT (mode) != 0
90bd1b58 1534 || bitpos % BITS_PER_UNIT != 0)
1535 return true;
1536
a379b1cb 1537 if (!constant_multiple_of (step, al, &mul))
90bd1b58 1538 return true;
1539 }
f63992c3 1540
1541 return false;
1542}
1543
8ee578c8 1544/* Return true if EXPR may be non-addressable. */
1545
1546static bool
1547may_be_nonaddressable_p (tree expr)
1548{
1549 switch (TREE_CODE (expr))
1550 {
0e41e637 1551 case TARGET_MEM_REF:
1552 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1553 target, thus they are always addressable. */
1554 return false;
1555
8ee578c8 1556 case COMPONENT_REF:
1557 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1558 || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1559
8ee578c8 1560 case VIEW_CONVERT_EXPR:
1561 /* This kind of view-conversions may wrap non-addressable objects
1562 and make them look addressable. After some processing the
1563 non-addressability may be uncovered again, causing ADDR_EXPRs
1564 of inappropriate objects to be built. */
2a8afb23 1565 if (is_gimple_reg (TREE_OPERAND (expr, 0))
1566 || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
1567 return true;
1568
1569 /* ... fall through ... */
0e41e637 1570
1571 case ARRAY_REF:
1572 case ARRAY_RANGE_REF:
2a8afb23 1573 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
0e41e637 1574
72dd6141 1575 CASE_CONVERT:
0e41e637 1576 return true;
8ee578c8 1577
1578 default:
1579 break;
1580 }
1581
1582 return false;
1583}
1584
dec41e98 1585/* Finds addresses in *OP_P inside STMT. */
1586
1587static void
75a70cf9 1588find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p)
dec41e98 1589{
7a973feb 1590 tree base = *op_p, step = build_int_cst (sizetype, 0);
dec41e98 1591 struct iv *civ;
1592 struct ifs_ivopts_data ifs_ivopts_data;
1593
281baea1 1594 /* Do not play with volatile memory references. A bit too conservative,
1595 perhaps, but safe. */
75a70cf9 1596 if (gimple_has_volatile_ops (stmt))
281baea1 1597 goto fail;
1598
dec41e98 1599 /* Ignore bitfields for now. Not really something terribly complicated
1600 to handle. TODO. */
8ee578c8 1601 if (TREE_CODE (base) == BIT_FIELD_REF)
1602 goto fail;
1603
aed164c3 1604 base = unshare_expr (base);
1605
1606 if (TREE_CODE (base) == TARGET_MEM_REF)
1607 {
1608 tree type = build_pointer_type (TREE_TYPE (base));
1609 tree astep;
1610
1611 if (TMR_BASE (base)
1612 && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1613 {
1614 civ = get_iv (data, TMR_BASE (base));
1615 if (!civ)
1616 goto fail;
1617
1618 TMR_BASE (base) = civ->base;
1619 step = civ->step;
1620 }
1621 if (TMR_INDEX (base)
1622 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1623 {
1624 civ = get_iv (data, TMR_INDEX (base));
1625 if (!civ)
1626 goto fail;
dec41e98 1627
aed164c3 1628 TMR_INDEX (base) = civ->base;
1629 astep = civ->step;
a59824bb 1630
aed164c3 1631 if (astep)
1632 {
1633 if (TMR_STEP (base))
1634 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1635
7a973feb 1636 step = fold_build2 (PLUS_EXPR, type, step, astep);
aed164c3 1637 }
1638 }
1639
7a973feb 1640 if (integer_zerop (step))
aed164c3 1641 goto fail;
1642 base = tree_mem_ref_addr (type, base);
1643 }
1644 else
1645 {
1646 ifs_ivopts_data.ivopts_data = data;
1647 ifs_ivopts_data.stmt = stmt;
7a973feb 1648 ifs_ivopts_data.step = build_int_cst (sizetype, 0);
aed164c3 1649 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
7a973feb 1650 || integer_zerop (ifs_ivopts_data.step))
aed164c3 1651 goto fail;
7a973feb 1652 step = ifs_ivopts_data.step;
aed164c3 1653
1654 gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
1655 gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1656
0e41e637 1657 /* Check that the base expression is addressable. This needs
1658 to be done after substituting bases of IVs into it. */
1659 if (may_be_nonaddressable_p (base))
1660 goto fail;
1661
1662 /* Moreover, on strict alignment platforms, check that it is
1663 sufficiently aligned. */
90bd1b58 1664 if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
0e41e637 1665 goto fail;
1666
aed164c3 1667 base = build_fold_addr_expr (base);
d3d02a88 1668
1669 /* Substituting bases of IVs into the base expression might
1670 have caused folding opportunities. */
1671 if (TREE_CODE (base) == ADDR_EXPR)
1672 {
1673 tree *ref = &TREE_OPERAND (base, 0);
1674 while (handled_component_p (*ref))
1675 ref = &TREE_OPERAND (*ref, 0);
1676 if (TREE_CODE (*ref) == INDIRECT_REF)
1677 *ref = fold_indirect_ref (*ref);
1678 }
aed164c3 1679 }
dec41e98 1680
1681 civ = alloc_iv (base, step);
1682 record_use (data, op_p, civ, stmt, USE_ADDRESS);
1683 return;
1684
1685fail:
1686 for_each_index (op_p, idx_record_use, data);
1687}
1688
1689/* Finds and records invariants used in STMT. */
1690
1691static void
75a70cf9 1692find_invariants_stmt (struct ivopts_data *data, gimple stmt)
dec41e98 1693{
b66731e8 1694 ssa_op_iter iter;
1695 use_operand_p use_p;
dec41e98 1696 tree op;
1697
b66731e8 1698 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
dec41e98 1699 {
b66731e8 1700 op = USE_FROM_PTR (use_p);
dec41e98 1701 record_invariant (data, op, false);
1702 }
1703}
1704
1705/* Finds interesting uses of induction variables in the statement STMT. */
1706
1707static void
75a70cf9 1708find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt)
dec41e98 1709{
1710 struct iv *iv;
75a70cf9 1711 tree op, *lhs, *rhs;
b66731e8 1712 ssa_op_iter iter;
1713 use_operand_p use_p;
75a70cf9 1714 enum tree_code code;
dec41e98 1715
1716 find_invariants_stmt (data, stmt);
1717
75a70cf9 1718 if (gimple_code (stmt) == GIMPLE_COND)
dec41e98 1719 {
75a70cf9 1720 find_interesting_uses_cond (data, stmt);
dec41e98 1721 return;
1722 }
1723
75a70cf9 1724 if (is_gimple_assign (stmt))
dec41e98 1725 {
75a70cf9 1726 lhs = gimple_assign_lhs_ptr (stmt);
1727 rhs = gimple_assign_rhs1_ptr (stmt);
dec41e98 1728
75a70cf9 1729 if (TREE_CODE (*lhs) == SSA_NAME)
dec41e98 1730 {
1731 /* If the statement defines an induction variable, the uses are not
1732 interesting by themselves. */
1733
75a70cf9 1734 iv = get_iv (data, *lhs);
dec41e98 1735
7a973feb 1736 if (iv && !integer_zerop (iv->step))
dec41e98 1737 return;
1738 }
1739
75a70cf9 1740 code = gimple_assign_rhs_code (stmt);
1741 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
1742 && (REFERENCE_CLASS_P (*rhs)
1743 || is_gimple_val (*rhs)))
dec41e98 1744 {
75a70cf9 1745 if (REFERENCE_CLASS_P (*rhs))
1746 find_interesting_uses_address (data, stmt, rhs);
1747 else
1748 find_interesting_uses_op (data, *rhs);
dec41e98 1749
75a70cf9 1750 if (REFERENCE_CLASS_P (*lhs))
1751 find_interesting_uses_address (data, stmt, lhs);
dec41e98 1752 return;
dec41e98 1753 }
75a70cf9 1754 else if (TREE_CODE_CLASS (code) == tcc_comparison)
dec41e98 1755 {
75a70cf9 1756 find_interesting_uses_cond (data, stmt);
dec41e98 1757 return;
1758 }
dbc64c75 1759
1760 /* TODO -- we should also handle address uses of type
1761
1762 memory = call (whatever);
1763
1764 and
1765
1766 call (memory). */
dec41e98 1767 }
1768
75a70cf9 1769 if (gimple_code (stmt) == GIMPLE_PHI
1770 && gimple_bb (stmt) == data->current_loop->header)
dec41e98 1771 {
75a70cf9 1772 iv = get_iv (data, PHI_RESULT (stmt));
dec41e98 1773
7a973feb 1774 if (iv && !integer_zerop (iv->step))
dec41e98 1775 return;
1776 }
1777
b66731e8 1778 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
dec41e98 1779 {
b66731e8 1780 op = USE_FROM_PTR (use_p);
dec41e98 1781
1782 if (TREE_CODE (op) != SSA_NAME)
1783 continue;
1784
1785 iv = get_iv (data, op);
1786 if (!iv)
1787 continue;
1788
1789 find_interesting_uses_op (data, op);
1790 }
1791}
1792
1793/* Finds interesting uses of induction variables outside of loops
1794 on loop exit edge EXIT. */
1795
1796static void
1797find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1798{
75a70cf9 1799 gimple phi;
1800 gimple_stmt_iterator psi;
1801 tree def;
dec41e98 1802
75a70cf9 1803 for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
dec41e98 1804 {
75a70cf9 1805 phi = gsi_stmt (psi);
dec41e98 1806 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
7a973feb 1807 if (is_gimple_reg (def))
1808 find_interesting_uses_op (data, def);
dec41e98 1809 }
1810}
1811
1812/* Finds uses of the induction variables that are interesting. */
1813
1814static void
1815find_interesting_uses (struct ivopts_data *data)
1816{
1817 basic_block bb;
75a70cf9 1818 gimple_stmt_iterator bsi;
dec41e98 1819 basic_block *body = get_loop_body (data->current_loop);
1820 unsigned i;
1821 struct version_info *info;
1822 edge e;
1823
1824 if (dump_file && (dump_flags & TDF_DETAILS))
1825 fprintf (dump_file, "Uses:\n\n");
1826
1827 for (i = 0; i < data->current_loop->num_nodes; i++)
1828 {
cd665a06 1829 edge_iterator ei;
dec41e98 1830 bb = body[i];
1831
cd665a06 1832 FOR_EACH_EDGE (e, ei, bb->succs)
dec41e98 1833 if (e->dest != EXIT_BLOCK_PTR
1834 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1835 find_interesting_uses_outside (data, e);
1836
75a70cf9 1837 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1838 find_interesting_uses_stmt (data, gsi_stmt (bsi));
1839 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1840 find_interesting_uses_stmt (data, gsi_stmt (bsi));
dec41e98 1841 }
1842
1843 if (dump_file && (dump_flags & TDF_DETAILS))
1844 {
0cc4271a 1845 bitmap_iterator bi;
1846
dec41e98 1847 fprintf (dump_file, "\n");
1848
0cc4271a 1849 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
dec41e98 1850 {
1851 info = ver_info (data, i);
1852 if (info->inv_id)
1853 {
1854 fprintf (dump_file, " ");
1855 print_generic_expr (dump_file, info->name, TDF_SLIM);
1856 fprintf (dump_file, " is invariant (%d)%s\n",
1857 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1858 }
0cc4271a 1859 }
dec41e98 1860
1861 fprintf (dump_file, "\n");
1862 }
1863
1864 free (body);
1865}
1866
390955a6 1867/* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
651874e1 1868 is true, assume we are inside an address. If TOP_COMPREF is true, assume
1869 we are at the top-level of the processed address. */
390955a6 1870
1871static tree
651874e1 1872strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
1873 unsigned HOST_WIDE_INT *offset)
390955a6 1874{
651874e1 1875 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
390955a6 1876 enum tree_code code;
1877 tree type, orig_type = TREE_TYPE (expr);
1878 unsigned HOST_WIDE_INT off0, off1, st;
1879 tree orig_expr = expr;
1880
1881 STRIP_NOPS (expr);
651874e1 1882
390955a6 1883 type = TREE_TYPE (expr);
1884 code = TREE_CODE (expr);
1885 *offset = 0;
1886
1887 switch (code)
1888 {
1889 case INTEGER_CST:
1890 if (!cst_and_fits_in_hwi (expr)
cd743a11 1891 || integer_zerop (expr))
390955a6 1892 return orig_expr;
1893
1894 *offset = int_cst_value (expr);
05db596e 1895 return build_int_cst (orig_type, 0);
390955a6 1896
2595dcbb 1897 case POINTER_PLUS_EXPR:
390955a6 1898 case PLUS_EXPR:
1899 case MINUS_EXPR:
1900 op0 = TREE_OPERAND (expr, 0);
1901 op1 = TREE_OPERAND (expr, 1);
1902
651874e1 1903 op0 = strip_offset_1 (op0, false, false, &off0);
1904 op1 = strip_offset_1 (op1, false, false, &off1);
390955a6 1905
2595dcbb 1906 *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
390955a6 1907 if (op0 == TREE_OPERAND (expr, 0)
1908 && op1 == TREE_OPERAND (expr, 1))
1909 return orig_expr;
1910
cd743a11 1911 if (integer_zerop (op1))
390955a6 1912 expr = op0;
cd743a11 1913 else if (integer_zerop (op0))
390955a6 1914 {
2595dcbb 1915 if (code == MINUS_EXPR)
651874e1 1916 expr = fold_build1 (NEGATE_EXPR, type, op1);
2595dcbb 1917 else
1918 expr = op1;
390955a6 1919 }
1920 else
651874e1 1921 expr = fold_build2 (code, type, op0, op1);
390955a6 1922
1923 return fold_convert (orig_type, expr);
1924
1925 case ARRAY_REF:
5e19919e 1926 case ARRAY_RANGE_REF:
390955a6 1927 if (!inside_addr)
1928 return orig_expr;
1929
1930 step = array_ref_element_size (expr);
1931 if (!cst_and_fits_in_hwi (step))
1932 break;
1933
1934 st = int_cst_value (step);
1935 op1 = TREE_OPERAND (expr, 1);
651874e1 1936 op1 = strip_offset_1 (op1, false, false, &off1);
390955a6 1937 *offset = off1 * st;
651874e1 1938
1939 if (top_compref
cd743a11 1940 && integer_zerop (op1))
651874e1 1941 {
1942 /* Strip the component reference completely. */
1943 op0 = TREE_OPERAND (expr, 0);
1944 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1945 *offset += off0;
1946 return op0;
1947 }
390955a6 1948 break;
1949
1950 case COMPONENT_REF:
1951 if (!inside_addr)
1952 return orig_expr;
651874e1 1953
1954 tmp = component_ref_field_offset (expr);
1955 if (top_compref
1956 && cst_and_fits_in_hwi (tmp))
1957 {
1958 /* Strip the component reference completely. */
1959 op0 = TREE_OPERAND (expr, 0);
1960 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1961 *offset = off0 + int_cst_value (tmp);
1962 return op0;
1963 }
390955a6 1964 break;
1965
1966 case ADDR_EXPR:
651874e1 1967 op0 = TREE_OPERAND (expr, 0);
1968 op0 = strip_offset_1 (op0, true, true, &off0);
1969 *offset += off0;
1970
1971 if (op0 == TREE_OPERAND (expr, 0))
1972 return orig_expr;
1973
8cb6f421 1974 expr = build_fold_addr_expr (op0);
651874e1 1975 return fold_convert (orig_type, expr);
1976
1977 case INDIRECT_REF:
1978 inside_addr = false;
390955a6 1979 break;
1980
1981 default:
1982 return orig_expr;
1983 }
1984
1985 /* Default handling of expressions for that we want to recurse into
1986 the first operand. */
1987 op0 = TREE_OPERAND (expr, 0);
651874e1 1988 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
390955a6 1989 *offset += off0;
1990
1991 if (op0 == TREE_OPERAND (expr, 0)
1992 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
1993 return orig_expr;
1994
1995 expr = copy_node (expr);
1996 TREE_OPERAND (expr, 0) = op0;
1997 if (op1)
1998 TREE_OPERAND (expr, 1) = op1;
1999
651874e1 2000 /* Inside address, we might strip the top level component references,
442e3cb9 2001 thus changing type of the expression. Handling of ADDR_EXPR
651874e1 2002 will fix that. */
2003 expr = fold_convert (orig_type, expr);
2004
2005 return expr;
2006}
2007
2008/* Strips constant offsets from EXPR and stores them to OFFSET. */
2009
2010static tree
2011strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2012{
2013 return strip_offset_1 (expr, false, false, offset);
390955a6 2014}
2015
d086c307 2016/* Returns variant of TYPE that can be used as base for different uses.
318a3281 2017 We return unsigned type with the same precision, which avoids problems
2018 with overflows. */
d086c307 2019
2020static tree
2021generic_type_for (tree type)
2022{
2023 if (POINTER_TYPE_P (type))
318a3281 2024 return unsigned_type_for (type);
d086c307 2025
2026 if (TYPE_UNSIGNED (type))
2027 return type;
2028
2029 return unsigned_type_for (type);
2030}
2031
651874e1 2032/* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2033 the bitmap to that we should store it. */
2034
2035static struct ivopts_data *fd_ivopts_data;
2036static tree
2037find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2038{
f0d6e81c 2039 bitmap *depends_on = (bitmap *) data;
651874e1 2040 struct version_info *info;
2041
2042 if (TREE_CODE (*expr_p) != SSA_NAME)
2043 return NULL_TREE;
2044 info = name_info (fd_ivopts_data, *expr_p);
2045
2046 if (!info->inv_id || info->has_nonlin_use)
2047 return NULL_TREE;
2048
2049 if (!*depends_on)
2050 *depends_on = BITMAP_ALLOC (NULL);
2051 bitmap_set_bit (*depends_on, info->inv_id);
2052
2053 return NULL_TREE;
2054}
2055
dec41e98 2056/* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2057 position to POS. If USE is not NULL, the candidate is set as related to
2058 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
2059 replacement of the final value of the iv by a direct computation. */
2060
2061static struct iv_cand *
2062add_candidate_1 (struct ivopts_data *data,
2063 tree base, tree step, bool important, enum iv_position pos,
75a70cf9 2064 struct iv_use *use, gimple incremented_at)
dec41e98 2065{
2066 unsigned i;
2067 struct iv_cand *cand = NULL;
d086c307 2068 tree type, orig_type;
dec41e98 2069
2070 if (base)
2071 {
d086c307 2072 orig_type = TREE_TYPE (base);
2073 type = generic_type_for (orig_type);
29474274 2074 if (type != orig_type)
dec41e98 2075 {
dec41e98 2076 base = fold_convert (type, base);
7a973feb 2077 step = fold_convert (type, step);
dec41e98 2078 }
2079 }
2080
2081 for (i = 0; i < n_iv_cands (data); i++)
2082 {
2083 cand = iv_cand (data, i);
2084
2085 if (cand->pos != pos)
2086 continue;
2087
2088 if (cand->incremented_at != incremented_at)
2089 continue;
2090
2091 if (!cand->iv)
2092 {
2093 if (!base && !step)
2094 break;
2095
2096 continue;
2097 }
2098
2099 if (!base && !step)
2100 continue;
2101
7a973feb 2102 if (operand_equal_p (base, cand->iv->base, 0)
2103 && operand_equal_p (step, cand->iv->step, 0))
2104 break;
dec41e98 2105 }
2106
2107 if (i == n_iv_cands (data))
2108 {
4c36ffe6 2109 cand = XCNEW (struct iv_cand);
dec41e98 2110 cand->id = i;
2111
2112 if (!base && !step)
2113 cand->iv = NULL;
2114 else
2115 cand->iv = alloc_iv (base, step);
2116
2117 cand->pos = pos;
2118 if (pos != IP_ORIGINAL && cand->iv)
2119 {
2120 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2121 cand->var_after = cand->var_before;
2122 }
2123 cand->important = important;
2124 cand->incremented_at = incremented_at;
7d18ea07 2125 VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
dec41e98 2126
651874e1 2127 if (step
2128 && TREE_CODE (step) != INTEGER_CST)
2129 {
2130 fd_ivopts_data = data;
2131 walk_tree (&step, find_depends, &cand->depends_on, NULL);
2132 }
2133
dec41e98 2134 if (dump_file && (dump_flags & TDF_DETAILS))
2135 dump_cand (dump_file, cand);
2136 }
2137
2138 if (important && !cand->important)
2139 {
2140 cand->important = true;
2141 if (dump_file && (dump_flags & TDF_DETAILS))
2142 fprintf (dump_file, "Candidate %d is important\n", cand->id);
2143 }
2144
2145 if (use)
2146 {
2147 bitmap_set_bit (use->related_cands, i);
2148 if (dump_file && (dump_flags & TDF_DETAILS))
2149 fprintf (dump_file, "Candidate %d is related to use %d\n",
2150 cand->id, use->id);
2151 }
2152
2153 return cand;
2154}
2155
5414270a 2156/* Returns true if incrementing the induction variable at the end of the LOOP
2157 is allowed.
2158
2159 The purpose is to avoid splitting latch edge with a biv increment, thus
2160 creating a jump, possibly confusing other optimization passes and leaving
2161 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2162 is not available (so we do not have a better alternative), or if the latch
2163 edge is already nonempty. */
2164
2165static bool
2166allow_ip_end_pos_p (struct loop *loop)
2167{
2168 if (!ip_normal_pos (loop))
2169 return true;
2170
2171 if (!empty_block_p (ip_end_pos (loop)))
2172 return true;
2173
2174 return false;
2175}
2176
dec41e98 2177/* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2178 position to POS. If USE is not NULL, the candidate is set as related to
2179 it. The candidate computation is scheduled on all available positions. */
2180
2181static void
2182add_candidate (struct ivopts_data *data,
2183 tree base, tree step, bool important, struct iv_use *use)
2184{
2185 if (ip_normal_pos (data->current_loop))
75a70cf9 2186 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL);
5414270a 2187 if (ip_end_pos (data->current_loop)
2188 && allow_ip_end_pos_p (data->current_loop))
75a70cf9 2189 add_candidate_1 (data, base, step, important, IP_END, use, NULL);
dec41e98 2190}
2191
3fd8a7ab 2192/* Add a standard "0 + 1 * iteration" iv candidate for a
2193 type with SIZE bits. */
2194
2195static void
2196add_standard_iv_candidates_for_size (struct ivopts_data *data,
2197 unsigned int size)
2198{
2199 tree type = lang_hooks.types.type_for_size (size, true);
2200 add_candidate (data, build_int_cst (type, 0), build_int_cst (type, 1),
2201 true, NULL);
2202}
2203
dec41e98 2204/* Adds standard iv candidates. */
2205
2206static void
2207add_standard_iv_candidates (struct ivopts_data *data)
2208{
3fd8a7ab 2209 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE);
dec41e98 2210
3fd8a7ab 2211 /* The same for a double-integer type if it is still fast enough. */
2212 if (BITS_PER_WORD >= INT_TYPE_SIZE * 2)
2213 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE * 2);
dec41e98 2214}
2215
2216
2217/* Adds candidates bases on the old induction variable IV. */
2218
2219static void
2220add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2221{
75a70cf9 2222 gimple phi;
2223 tree def;
dec41e98 2224 struct iv_cand *cand;
2225
2226 add_candidate (data, iv->base, iv->step, true, NULL);
2227
2228 /* The same, but with initial value zero. */
19052ad2 2229 if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
2230 add_candidate (data, size_int (0), iv->step, true, NULL);
2231 else
2232 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2233 iv->step, true, NULL);
dec41e98 2234
2235 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
75a70cf9 2236 if (gimple_code (phi) == GIMPLE_PHI)
dec41e98 2237 {
2238 /* Additionally record the possibility of leaving the original iv
2239 untouched. */
2240 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2241 cand = add_candidate_1 (data,
2242 iv->base, iv->step, true, IP_ORIGINAL, NULL,
2243 SSA_NAME_DEF_STMT (def));
2244 cand->var_before = iv->ssa_name;
2245 cand->var_after = def;
2246 }
2247}
2248
2249/* Adds candidates based on the old induction variables. */
2250
2251static void
2252add_old_ivs_candidates (struct ivopts_data *data)
2253{
2254 unsigned i;
2255 struct iv *iv;
0cc4271a 2256 bitmap_iterator bi;
dec41e98 2257
0cc4271a 2258 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
dec41e98 2259 {
2260 iv = ver_info (data, i)->iv;
7a973feb 2261 if (iv && iv->biv_p && !integer_zerop (iv->step))
dec41e98 2262 add_old_iv_candidates (data, iv);
0cc4271a 2263 }
dec41e98 2264}
2265
2266/* Adds candidates based on the value of the induction variable IV and USE. */
2267
2268static void
2269add_iv_value_candidates (struct ivopts_data *data,
2270 struct iv *iv, struct iv_use *use)
2271{
390955a6 2272 unsigned HOST_WIDE_INT offset;
651874e1 2273 tree base;
06240723 2274 tree basetype;
dec41e98 2275
651874e1 2276 add_candidate (data, iv->base, iv->step, false, use);
a59824bb 2277
651874e1 2278 /* The same, but with initial value zero. Make such variable important,
2279 since it is generic enough so that possibly many uses may be based
2280 on it. */
06240723 2281 basetype = TREE_TYPE (iv->base);
2282 if (POINTER_TYPE_P (basetype))
2283 basetype = sizetype;
2284 add_candidate (data, build_int_cst (basetype, 0),
651874e1 2285 iv->step, true, use);
dec41e98 2286
0cfdcbde 2287 /* Third, try removing the constant offset. Make sure to even
2288 add a candidate for &a[0] vs. (T *)&a. */
651874e1 2289 base = strip_offset (iv->base, &offset);
0cfdcbde 2290 if (offset
2291 || base != iv->base)
390955a6 2292 add_candidate (data, base, iv->step, false, use);
dec41e98 2293}
2294
dec41e98 2295/* Adds candidates based on the uses. */
2296
2297static void
2298add_derived_ivs_candidates (struct ivopts_data *data)
2299{
2300 unsigned i;
2301
2302 for (i = 0; i < n_iv_uses (data); i++)
2303 {
2304 struct iv_use *use = iv_use (data, i);
2305
2306 if (!use)
2307 continue;
2308
2309 switch (use->type)
2310 {
2311 case USE_NONLINEAR_EXPR:
2312 case USE_COMPARE:
651874e1 2313 case USE_ADDRESS:
dec41e98 2314 /* Just add the ivs based on the value of the iv used here. */
2315 add_iv_value_candidates (data, use->iv, use);
2316 break;
2317
dec41e98 2318 default:
8c0963c4 2319 gcc_unreachable ();
dec41e98 2320 }
2321 }
2322}
2323
00991688 2324/* Record important candidates and add them to related_cands bitmaps
2325 if needed. */
2326
2327static void
2328record_important_candidates (struct ivopts_data *data)
2329{
2330 unsigned i;
2331 struct iv_use *use;
2332
2333 for (i = 0; i < n_iv_cands (data); i++)
2334 {
2335 struct iv_cand *cand = iv_cand (data, i);
2336
2337 if (cand->important)
2338 bitmap_set_bit (data->important_candidates, i);
2339 }
2340
2341 data->consider_all_candidates = (n_iv_cands (data)
2342 <= CONSIDER_ALL_CANDIDATES_BOUND);
2343
2344 if (data->consider_all_candidates)
2345 {
2346 /* We will not need "related_cands" bitmaps in this case,
2347 so release them to decrease peak memory consumption. */
2348 for (i = 0; i < n_iv_uses (data); i++)
2349 {
2350 use = iv_use (data, i);
27335ffd 2351 BITMAP_FREE (use->related_cands);
00991688 2352 }
2353 }
2354 else
2355 {
2356 /* Add important candidates to the related_cands bitmaps. */
2357 for (i = 0; i < n_iv_uses (data); i++)
2358 bitmap_ior_into (iv_use (data, i)->related_cands,
2359 data->important_candidates);
2360 }
2361}
2362
dec41e98 2363/* Finds the candidates for the induction variables. */
2364
2365static void
2366find_iv_candidates (struct ivopts_data *data)
2367{
2368 /* Add commonly used ivs. */
2369 add_standard_iv_candidates (data);
2370
2371 /* Add old induction variables. */
2372 add_old_ivs_candidates (data);
2373
2374 /* Add induction variables derived from uses. */
2375 add_derived_ivs_candidates (data);
00991688 2376
2377 /* Record the important candidates. */
2378 record_important_candidates (data);
dec41e98 2379}
2380
2381/* Allocates the data structure mapping the (use, candidate) pairs to costs.
2382 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2383 we allocate a simple list to every use. */
2384
2385static void
2386alloc_use_cost_map (struct ivopts_data *data)
2387{
00991688 2388 unsigned i, size, s, j;
dec41e98 2389
2390 for (i = 0; i < n_iv_uses (data); i++)
2391 {
2392 struct iv_use *use = iv_use (data, i);
0cc4271a 2393 bitmap_iterator bi;
dec41e98 2394
2395 if (data->consider_all_candidates)
00991688 2396 size = n_iv_cands (data);
dec41e98 2397 else
2398 {
00991688 2399 s = 0;
0cc4271a 2400 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2401 {
00991688 2402 s++;
0cc4271a 2403 }
00991688 2404
2405 /* Round up to the power of two, so that moduling by it is fast. */
2406 for (size = 1; size < s; size <<= 1)
2407 continue;
dec41e98 2408 }
2409
00991688 2410 use->n_map_members = size;
4c36ffe6 2411 use->cost_map = XCNEWVEC (struct cost_pair, size);
dec41e98 2412 }
2413}
2414
3c2818b0 2415/* Returns description of computation cost of expression whose runtime
2416 cost is RUNTIME and complexity corresponds to COMPLEXITY. */
2417
2418static comp_cost
2419new_cost (unsigned runtime, unsigned complexity)
2420{
2421 comp_cost cost;
2422
2423 cost.cost = runtime;
2424 cost.complexity = complexity;
2425
2426 return cost;
2427}
2428
2429/* Adds costs COST1 and COST2. */
2430
2431static comp_cost
2432add_costs (comp_cost cost1, comp_cost cost2)
2433{
2434 cost1.cost += cost2.cost;
2435 cost1.complexity += cost2.complexity;
2436
2437 return cost1;
2438}
2439/* Subtracts costs COST1 and COST2. */
2440
2441static comp_cost
2442sub_costs (comp_cost cost1, comp_cost cost2)
2443{
2444 cost1.cost -= cost2.cost;
2445 cost1.complexity -= cost2.complexity;
2446
2447 return cost1;
2448}
2449
2450/* Returns a negative number if COST1 < COST2, a positive number if
2451 COST1 > COST2, and 0 if COST1 = COST2. */
2452
2453static int
2454compare_costs (comp_cost cost1, comp_cost cost2)
2455{
2456 if (cost1.cost == cost2.cost)
2457 return cost1.complexity - cost2.complexity;
2458
2459 return cost1.cost - cost2.cost;
2460}
2461
2462/* Returns true if COST is infinite. */
2463
2464static bool
2465infinite_cost_p (comp_cost cost)
2466{
2467 return cost.cost == INFTY;
2468}
2469
dec41e98 2470/* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
c4d3b428 2471 on invariants DEPENDS_ON and that the value used in expressing it
2472 is VALUE.*/
dec41e98 2473
2474static void
2475set_use_iv_cost (struct ivopts_data *data,
3c2818b0 2476 struct iv_use *use, struct iv_cand *cand,
2477 comp_cost cost, bitmap depends_on, tree value)
dec41e98 2478{
00991688 2479 unsigned i, s;
2480
3c2818b0 2481 if (infinite_cost_p (cost))
dec41e98 2482 {
27335ffd 2483 BITMAP_FREE (depends_on);
00991688 2484 return;
dec41e98 2485 }
2486
2487 if (data->consider_all_candidates)
2488 {
2489 use->cost_map[cand->id].cand = cand;
2490 use->cost_map[cand->id].cost = cost;
2491 use->cost_map[cand->id].depends_on = depends_on;
c4d3b428 2492 use->cost_map[cand->id].value = value;
dec41e98 2493 return;
2494 }
2495
00991688 2496 /* n_map_members is a power of two, so this computes modulo. */
2497 s = cand->id & (use->n_map_members - 1);
2498 for (i = s; i < use->n_map_members; i++)
2499 if (!use->cost_map[i].cand)
2500 goto found;
2501 for (i = 0; i < s; i++)
2502 if (!use->cost_map[i].cand)
2503 goto found;
2504
2505 gcc_unreachable ();
dec41e98 2506
00991688 2507found:
2508 use->cost_map[i].cand = cand;
2509 use->cost_map[i].cost = cost;
2510 use->cost_map[i].depends_on = depends_on;
c4d3b428 2511 use->cost_map[i].value = value;
dec41e98 2512}
2513
00991688 2514/* Gets cost of (USE, CANDIDATE) pair. */
dec41e98 2515
00991688 2516static struct cost_pair *
2517get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2518 struct iv_cand *cand)
dec41e98 2519{
00991688 2520 unsigned i, s;
2521 struct cost_pair *ret;
dec41e98 2522
2523 if (!cand)
00991688 2524 return NULL;
dec41e98 2525
2526 if (data->consider_all_candidates)
dec41e98 2527 {
00991688 2528 ret = use->cost_map + cand->id;
2529 if (!ret->cand)
2530 return NULL;
dec41e98 2531
00991688 2532 return ret;
dec41e98 2533 }
00991688 2534
2535 /* n_map_members is a power of two, so this computes modulo. */
2536 s = cand->id & (use->n_map_members - 1);
2537 for (i = s; i < use->n_map_members; i++)
2538 if (use->cost_map[i].cand == cand)
2539 return use->cost_map + i;
dec41e98 2540
00991688 2541 for (i = 0; i < s; i++)
2542 if (use->cost_map[i].cand == cand)
2543 return use->cost_map + i;
2544
2545 return NULL;
dec41e98 2546}
2547
2548/* Returns estimate on cost of computing SEQ. */
2549
2550static unsigned
f529eb25 2551seq_cost (rtx seq, bool speed)
dec41e98 2552{
2553 unsigned cost = 0;
2554 rtx set;
2555
2556 for (; seq; seq = NEXT_INSN (seq))
2557 {
2558 set = single_set (seq);
2559 if (set)
f529eb25 2560 cost += rtx_cost (set, SET,speed);
dec41e98 2561 else
2562 cost++;
2563 }
2564
2565 return cost;
2566}
2567
abf7e53a 2568/* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2569static rtx
2570produce_memory_decl_rtl (tree obj, int *regno)
2571{
2572 rtx x;
876760f6 2573
2574 gcc_assert (obj);
abf7e53a 2575 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2576 {
2577 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2578 x = gen_rtx_SYMBOL_REF (Pmode, name);
7e995af7 2579 SET_SYMBOL_REF_DECL (x, obj);
2580 x = gen_rtx_MEM (DECL_MODE (obj), x);
2581 targetm.encode_section_info (obj, x, true);
abf7e53a 2582 }
2583 else
7e995af7 2584 {
2585 x = gen_raw_REG (Pmode, (*regno)++);
2586 x = gen_rtx_MEM (DECL_MODE (obj), x);
2587 }
abf7e53a 2588
7e995af7 2589 return x;
abf7e53a 2590}
2591
dec41e98 2592/* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2593 walk_tree. DATA contains the actual fake register number. */
2594
2595static tree
2596prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2597{
2598 tree obj = NULL_TREE;
2599 rtx x = NULL_RTX;
f0d6e81c 2600 int *regno = (int *) data;
dec41e98 2601
2602 switch (TREE_CODE (*expr_p))
2603 {
abf7e53a 2604 case ADDR_EXPR:
2605 for (expr_p = &TREE_OPERAND (*expr_p, 0);
1f9b622b 2606 handled_component_p (*expr_p);
2607 expr_p = &TREE_OPERAND (*expr_p, 0))
2608 continue;
abf7e53a 2609 obj = *expr_p;
f2d0e9f1 2610 if (DECL_P (obj) && !DECL_RTL_SET_P (obj))
abf7e53a 2611 x = produce_memory_decl_rtl (obj, regno);
2612 break;
2613
dec41e98 2614 case SSA_NAME:
2615 *ws = 0;
2616 obj = SSA_NAME_VAR (*expr_p);
2617 if (!DECL_RTL_SET_P (obj))
2618 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2619 break;
2620
2621 case VAR_DECL:
2622 case PARM_DECL:
2623 case RESULT_DECL:
2624 *ws = 0;
2625 obj = *expr_p;
2626
2627 if (DECL_RTL_SET_P (obj))
2628 break;
2629
2630 if (DECL_MODE (obj) == BLKmode)
abf7e53a 2631 x = produce_memory_decl_rtl (obj, regno);
dec41e98 2632 else
2633 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2634
2635 break;
2636
2637 default:
2638 break;
2639 }
2640
2641 if (x)
2642 {
7d18ea07 2643 VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
dec41e98 2644 SET_DECL_RTL (obj, x);
2645 }
2646
2647 return NULL_TREE;
2648}
2649
2650/* Determines cost of the computation of EXPR. */
2651
2652static unsigned
f529eb25 2653computation_cost (tree expr, bool speed)
dec41e98 2654{
2655 rtx seq, rslt;
2656 tree type = TREE_TYPE (expr);
2657 unsigned cost;
15a79151 2658 /* Avoid using hard regs in ways which may be unsupported. */
2659 int regno = LAST_VIRTUAL_REGISTER + 1;
f529eb25 2660 enum function_frequency real_frequency = cfun->function_frequency;
dec41e98 2661
f529eb25 2662 cfun->function_frequency = FUNCTION_FREQUENCY_NORMAL;
2663 crtl->maybe_hot_insn_p = speed;
dec41e98 2664 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2665 start_sequence ();
2666 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2667 seq = get_insns ();
2668 end_sequence ();
f529eb25 2669 default_rtl_profile ();
2670 cfun->function_frequency = real_frequency;
dec41e98 2671
f529eb25 2672 cost = seq_cost (seq, speed);
1c14a50e 2673 if (MEM_P (rslt))
f529eb25 2674 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type), speed);
dec41e98 2675
2676 return cost;
2677}
2678
2679/* Returns variable containing the value of candidate CAND at statement AT. */
2680
2681static tree
75a70cf9 2682var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple stmt)
dec41e98 2683{
2684 if (stmt_after_increment (loop, cand, stmt))
2685 return cand->var_after;
2686 else
2687 return cand->var_before;
2688}
2689
651874e1 2690/* Return the most significant (sign) bit of T. Similar to tree_int_cst_msb,
2691 but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE. */
2692
b9845e76 2693int
b7bf20db 2694tree_int_cst_sign_bit (const_tree t)
651874e1 2695{
2696 unsigned bitno = TYPE_PRECISION (TREE_TYPE (t)) - 1;
2697 unsigned HOST_WIDE_INT w;
2698
2699 if (bitno < HOST_BITS_PER_WIDE_INT)
2700 w = TREE_INT_CST_LOW (t);
2701 else
2702 {
2703 w = TREE_INT_CST_HIGH (t);
2704 bitno -= HOST_BITS_PER_WIDE_INT;
2705 }
2706
2707 return (w >> bitno) & 1;
2708}
2709
4d9f13e4 2710/* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2711 same precision that is at least as wide as the precision of TYPE, stores
2712 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
2713 type of A and B. */
2714
2715static tree
2716determine_common_wider_type (tree *a, tree *b)
2717{
2718 tree wider_type = NULL;
2719 tree suba, subb;
2720 tree atype = TREE_TYPE (*a);
2721
72dd6141 2722 if (CONVERT_EXPR_P (*a))
4d9f13e4 2723 {
2724 suba = TREE_OPERAND (*a, 0);
2725 wider_type = TREE_TYPE (suba);
2726 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
2727 return atype;
2728 }
2729 else
2730 return atype;
2731
72dd6141 2732 if (CONVERT_EXPR_P (*b))
4d9f13e4 2733 {
2734 subb = TREE_OPERAND (*b, 0);
2735 if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
2736 return atype;
2737 }
2738 else
2739 return atype;
2740
2741 *a = suba;
2742 *b = subb;
2743 return wider_type;
2744}
2745
dec41e98 2746/* Determines the expression by that USE is expressed from induction variable
aed164c3 2747 CAND at statement AT in LOOP. The expression is stored in a decomposed
2748 form into AFF. Returns false if USE cannot be expressed using CAND. */
dec41e98 2749
aed164c3 2750static bool
2751get_computation_aff (struct loop *loop,
75a70cf9 2752 struct iv_use *use, struct iv_cand *cand, gimple at,
aed164c3 2753 struct affine_tree_combination *aff)
dec41e98 2754{
dbc64c75 2755 tree ubase = use->iv->base;
2756 tree ustep = use->iv->step;
2757 tree cbase = cand->iv->base;
d3610bea 2758 tree cstep = cand->iv->step, cstep_common;
dec41e98 2759 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
d3610bea 2760 tree common_type, var;
dec41e98 2761 tree uutype;
d3610bea 2762 aff_tree cbase_aff, var_aff;
58f43b90 2763 double_int rat;
dec41e98 2764
2765 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2766 {
2767 /* We do not have a precision to express the values of use. */
aed164c3 2768 return false;
dec41e98 2769 }
2770
d3610bea 2771 var = var_at_stmt (loop, cand, at);
2772 uutype = unsigned_type_for (utype);
dec41e98 2773
d3610bea 2774 /* If the conversion is not noop, perform it. */
2775 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
dec41e98 2776 {
dec41e98 2777 cstep = fold_convert (uutype, cstep);
d3610bea 2778 cbase = fold_convert (uutype, cbase);
2779 var = fold_convert (uutype, var);
651874e1 2780 }
651874e1 2781
d3610bea 2782 if (!constant_multiple_of (ustep, cstep, &rat))
2783 return false;
dec41e98 2784
4d9f13e4 2785 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
2786 type, we achieve better folding by computing their difference in this
2787 wider type, and cast the result to UUTYPE. We do not need to worry about
2788 overflows, as all the arithmetics will in the end be performed in UUTYPE
2789 anyway. */
2790 common_type = determine_common_wider_type (&ubase, &cbase);
2791
d3610bea 2792 /* use = ubase - ratio * cbase + ratio * var. */
2793 tree_to_aff_combination (ubase, common_type, aff);
2794 tree_to_aff_combination (cbase, common_type, &cbase_aff);
2795 tree_to_aff_combination (var, uutype, &var_aff);
dec41e98 2796
d3610bea 2797 /* We need to shift the value if we are after the increment. */
2798 if (stmt_after_increment (loop, cand, at))
dec41e98 2799 {
d3610bea 2800 aff_tree cstep_aff;
2801
2802 if (common_type != uutype)
2803 cstep_common = fold_convert (common_type, cstep);
651874e1 2804 else
d3610bea 2805 cstep_common = cstep;
aed164c3 2806
d3610bea 2807 tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
2808 aff_combination_add (&cbase_aff, &cstep_aff);
dec41e98 2809 }
dec41e98 2810
d3610bea 2811 aff_combination_scale (&cbase_aff, double_int_neg (rat));
aed164c3 2812 aff_combination_add (aff, &cbase_aff);
4d9f13e4 2813 if (common_type != uutype)
d3610bea 2814 aff_combination_convert (aff, uutype);
2815
2816 aff_combination_scale (&var_aff, rat);
2817 aff_combination_add (aff, &var_aff);
aed164c3 2818
2819 return true;
2820}
2821
2822/* Determines the expression by that USE is expressed from induction variable
2823 CAND at statement AT in LOOP. The computation is unshared. */
2824
2825static tree
2826get_computation_at (struct loop *loop,
75a70cf9 2827 struct iv_use *use, struct iv_cand *cand, gimple at)
aed164c3 2828{
d3610bea 2829 aff_tree aff;
aed164c3 2830 tree type = TREE_TYPE (use->iv->base);
2831
2832 if (!get_computation_aff (loop, use, cand, at, &aff))
2833 return NULL_TREE;
2834 unshare_aff_combination (&aff);
2835 return fold_convert (type, aff_combination_to_tree (&aff));
dec41e98 2836}
2837
2838/* Determines the expression by that USE is expressed from induction variable
aed164c3 2839 CAND in LOOP. The computation is unshared. */
dec41e98 2840
2841static tree
2842get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
2843{
2844 return get_computation_at (loop, use, cand, use->stmt);
2845}
2846
dec41e98 2847/* Returns cost of addition in MODE. */
2848
2849static unsigned
f529eb25 2850add_cost (enum machine_mode mode, bool speed)
dec41e98 2851{
2852 static unsigned costs[NUM_MACHINE_MODES];
2853 rtx seq;
2854 unsigned cost;
2855
2856 if (costs[mode])
2857 return costs[mode];
2858
2859 start_sequence ();
2860 force_operand (gen_rtx_fmt_ee (PLUS, mode,
5bf0af5e 2861 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
2862 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 2)),
dec41e98 2863 NULL_RTX);
2864 seq = get_insns ();
2865 end_sequence ();
2866
f529eb25 2867 cost = seq_cost (seq, speed);
dec41e98 2868 if (!cost)
2869 cost = 1;
2870
2871 costs[mode] = cost;
2872
2873 if (dump_file && (dump_flags & TDF_DETAILS))
2874 fprintf (dump_file, "Addition in %s costs %d\n",
2875 GET_MODE_NAME (mode), cost);
2876 return cost;
2877}
2878
2879/* Entry in a hashtable of already known costs for multiplication. */
2880struct mbc_entry
2881{
2882 HOST_WIDE_INT cst; /* The constant to multiply by. */
2883 enum machine_mode mode; /* In mode. */
2884 unsigned cost; /* The cost. */
2885};
2886
2887/* Counts hash value for the ENTRY. */
2888
2889static hashval_t
2890mbc_entry_hash (const void *entry)
2891{
f0d6e81c 2892 const struct mbc_entry *e = (const struct mbc_entry *) entry;
dec41e98 2893
2894 return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
2895}
2896
2897/* Compares the hash table entries ENTRY1 and ENTRY2. */
2898
2899static int
2900mbc_entry_eq (const void *entry1, const void *entry2)
2901{
f0d6e81c 2902 const struct mbc_entry *e1 = (const struct mbc_entry *) entry1;
2903 const struct mbc_entry *e2 = (const struct mbc_entry *) entry2;
dec41e98 2904
2905 return (e1->mode == e2->mode
2906 && e1->cst == e2->cst);
2907}
2908
2909/* Returns cost of multiplication by constant CST in MODE. */
2910
aed164c3 2911unsigned
f529eb25 2912multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode, bool speed)
dec41e98 2913{
2914 static htab_t costs;
2915 struct mbc_entry **cached, act;
2916 rtx seq;
2917 unsigned cost;
2918
2919 if (!costs)
2920 costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
2921
2922 act.mode = mode;
2923 act.cst = cst;
2924 cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
2925 if (*cached)
2926 return (*cached)->cost;
2927
4c36ffe6 2928 *cached = XNEW (struct mbc_entry);
dec41e98 2929 (*cached)->mode = mode;
2930 (*cached)->cst = cst;
2931
2932 start_sequence ();
5bf0af5e 2933 expand_mult (mode, gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
2934 gen_int_mode (cst, mode), NULL_RTX, 0);
dec41e98 2935 seq = get_insns ();
2936 end_sequence ();
2937
f529eb25 2938 cost = seq_cost (seq, speed);
dec41e98 2939
2940 if (dump_file && (dump_flags & TDF_DETAILS))
2941 fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
2942 (int) cst, GET_MODE_NAME (mode), cost);
2943
2944 (*cached)->cost = cost;
2945
2946 return cost;
2947}
2948
e90b35db 2949/* Returns true if multiplying by RATIO is allowed in an address. Test the
2950 validity for a memory reference accessing memory of mode MODE. */
aed164c3 2951
2952bool
e7f482a0 2953multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode)
aed164c3 2954{
2955#define MAX_RATIO 128
e7f482a0 2956 static sbitmap valid_mult[MAX_MACHINE_MODE];
aed164c3 2957
e7f482a0 2958 if (!valid_mult[mode])
aed164c3 2959 {
5bf0af5e 2960 rtx reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
aed164c3 2961 rtx addr;
2962 HOST_WIDE_INT i;
2963
e7f482a0 2964 valid_mult[mode] = sbitmap_alloc (2 * MAX_RATIO + 1);
2965 sbitmap_zero (valid_mult[mode]);
aed164c3 2966 addr = gen_rtx_fmt_ee (MULT, Pmode, reg1, NULL_RTX);
2967 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2968 {
2969 XEXP (addr, 1) = gen_int_mode (i, Pmode);
e7f482a0 2970 if (memory_address_p (mode, addr))
2971 SET_BIT (valid_mult[mode], i + MAX_RATIO);
aed164c3 2972 }
2973
2974 if (dump_file && (dump_flags & TDF_DETAILS))
2975 {
2976 fprintf (dump_file, " allowed multipliers:");
2977 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
e7f482a0 2978 if (TEST_BIT (valid_mult[mode], i + MAX_RATIO))
aed164c3 2979 fprintf (dump_file, " %d", (int) i);
2980 fprintf (dump_file, "\n");
2981 fprintf (dump_file, "\n");
2982 }
2983 }
2984
2985 if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
2986 return false;
2987
e7f482a0 2988 return TEST_BIT (valid_mult[mode], ratio + MAX_RATIO);
aed164c3 2989}
2990
dec41e98 2991/* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
2992 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
e90b35db 2993 variable is omitted. Compute the cost for a memory reference that accesses
2994 a memory location of mode MEM_MODE.
2995
dec41e98 2996 TODO -- there must be some better way. This all is quite crude. */
2997
3c2818b0 2998static comp_cost
dec41e98 2999get_address_cost (bool symbol_present, bool var_present,
e7f482a0 3000 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
f529eb25 3001 enum machine_mode mem_mode,
3002 bool speed)
dec41e98 3003{
e7f482a0 3004 static bool initialized[MAX_MACHINE_MODE];
3005 static HOST_WIDE_INT rat[MAX_MACHINE_MODE], off[MAX_MACHINE_MODE];
3006 static HOST_WIDE_INT min_offset[MAX_MACHINE_MODE], max_offset[MAX_MACHINE_MODE];
3007 static unsigned costs[MAX_MACHINE_MODE][2][2][2][2];
3c2818b0 3008 unsigned cost, acost, complexity;
dec41e98 3009 bool offset_p, ratio_p;
dec41e98 3010 HOST_WIDE_INT s_offset;
3011 unsigned HOST_WIDE_INT mask;
3012 unsigned bits;
3013
e7f482a0 3014 if (!initialized[mem_mode])
dec41e98 3015 {
3016 HOST_WIDE_INT i;
e90b35db 3017 HOST_WIDE_INT start = BIGGEST_ALIGNMENT / BITS_PER_UNIT;
9b6564b5 3018 int old_cse_not_expected;
3019 unsigned sym_p, var_p, off_p, rat_p, add_c;
3020 rtx seq, addr, base;
3021 rtx reg0, reg1;
3022
e7f482a0 3023 initialized[mem_mode] = true;
dec41e98 3024
5bf0af5e 3025 reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
dec41e98 3026
3027 addr = gen_rtx_fmt_ee (PLUS, Pmode, reg1, NULL_RTX);
e90b35db 3028 for (i = start; i <= 1 << 20; i <<= 1)
dec41e98 3029 {
5bf0af5e 3030 XEXP (addr, 1) = gen_int_mode (i, Pmode);
e7f482a0 3031 if (!memory_address_p (mem_mode, addr))
dec41e98 3032 break;
3033 }
e90b35db 3034 max_offset[mem_mode] = i == start ? 0 : i >> 1;
e7f482a0 3035 off[mem_mode] = max_offset[mem_mode];
dec41e98 3036
e90b35db 3037 for (i = start; i <= 1 << 20; i <<= 1)
dec41e98 3038 {
5bf0af5e 3039 XEXP (addr, 1) = gen_int_mode (-i, Pmode);
e7f482a0 3040 if (!memory_address_p (mem_mode, addr))
dec41e98 3041 break;
3042 }
e90b35db 3043 min_offset[mem_mode] = i == start ? 0 : -(i >> 1);
dec41e98 3044
3045 if (dump_file && (dump_flags & TDF_DETAILS))
3046 {
3047 fprintf (dump_file, "get_address_cost:\n");
e7f482a0 3048 fprintf (dump_file, " min offset %s %d\n",
3049 GET_MODE_NAME (mem_mode),
3050 (int) min_offset[mem_mode]);
3051 fprintf (dump_file, " max offset %s %d\n",
3052 GET_MODE_NAME (mem_mode),
3053 (int) max_offset[mem_mode]);
dec41e98 3054 }
3055
e7f482a0 3056 rat[mem_mode] = 1;
aed164c3 3057 for (i = 2; i <= MAX_RATIO; i++)
e7f482a0 3058 if (multiplier_allowed_in_address_p (i, mem_mode))
aed164c3 3059 {
e7f482a0 3060 rat[mem_mode] = i;
aed164c3 3061 break;
3062 }
9b6564b5 3063
3064 /* Compute the cost of various addressing modes. */
3065 acost = 0;
3066 reg0 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3067 reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 2);
3068
3069 for (i = 0; i < 16; i++)
3070 {
3071 sym_p = i & 1;
3072 var_p = (i >> 1) & 1;
3073 off_p = (i >> 2) & 1;
3074 rat_p = (i >> 3) & 1;
3075
3076 addr = reg0;
3077 if (rat_p)
e7f482a0 3078 addr = gen_rtx_fmt_ee (MULT, Pmode, addr,
3079 gen_int_mode (rat[mem_mode], Pmode));
9b6564b5 3080
3081 if (var_p)
3082 addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, reg1);
3083
3084 if (sym_p)
3085 {
3086 base = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (""));
7e995af7 3087 /* ??? We can run into trouble with some backends by presenting
f0b5f617 3088 it with symbols which haven't been properly passed through
7e995af7 3089 targetm.encode_section_info. By setting the local bit, we
3090 enhance the probability of things working. */
3091 SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3092
9b6564b5 3093 if (off_p)
3094 base = gen_rtx_fmt_e (CONST, Pmode,
3095 gen_rtx_fmt_ee (PLUS, Pmode,
3096 base,
e7f482a0 3097 gen_int_mode (off[mem_mode],
3098 Pmode)));
9b6564b5 3099 }
3100 else if (off_p)
e7f482a0 3101 base = gen_int_mode (off[mem_mode], Pmode);
9b6564b5 3102 else
3103 base = NULL_RTX;
3104
3105 if (base)
3106 addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, base);
3107
3108 start_sequence ();
3109 /* To avoid splitting addressing modes, pretend that no cse will
3110 follow. */
3111 old_cse_not_expected = cse_not_expected;
3112 cse_not_expected = true;
e7f482a0 3113 addr = memory_address (mem_mode, addr);
9b6564b5 3114 cse_not_expected = old_cse_not_expected;
3115 seq = get_insns ();
3116 end_sequence ();
3117
f529eb25 3118 acost = seq_cost (seq, speed);
3119 acost += address_cost (addr, mem_mode, speed);
9b6564b5 3120
3121 if (!acost)
3122 acost = 1;
e7f482a0 3123 costs[mem_mode][sym_p][var_p][off_p][rat_p] = acost;
9b6564b5 3124 }
3125
3126 /* On some targets, it is quite expensive to load symbol to a register,
3127 which makes addresses that contain symbols look much more expensive.
3128 However, the symbol will have to be loaded in any case before the
3129 loop (and quite likely we have it in register already), so it does not
3130 make much sense to penalize them too heavily. So make some final
3131 tweaks for the SYMBOL_PRESENT modes:
3132
3133 If VAR_PRESENT is false, and the mode obtained by changing symbol to
3134 var is cheaper, use this mode with small penalty.
3135 If VAR_PRESENT is true, try whether the mode with
3136 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3137 if this is the case, use it. */
f529eb25 3138 add_c = add_cost (Pmode, speed);
9b6564b5 3139 for (i = 0; i < 8; i++)
3140 {
3141 var_p = i & 1;
3142 off_p = (i >> 1) & 1;
3143 rat_p = (i >> 2) & 1;
3144
e7f482a0 3145 acost = costs[mem_mode][0][1][off_p][rat_p] + 1;
9b6564b5 3146 if (var_p)
3147 acost += add_c;
3148
e7f482a0 3149 if (acost < costs[mem_mode][1][var_p][off_p][rat_p])
3150 costs[mem_mode][1][var_p][off_p][rat_p] = acost;
9b6564b5 3151 }
3152
3153 if (dump_file && (dump_flags & TDF_DETAILS))
3154 {
3155 fprintf (dump_file, "Address costs:\n");
3156
3157 for (i = 0; i < 16; i++)
3158 {
3159 sym_p = i & 1;
3160 var_p = (i >> 1) & 1;
3161 off_p = (i >> 2) & 1;
3162 rat_p = (i >> 3) & 1;
3163
3164 fprintf (dump_file, " ");
3165 if (sym_p)
3166 fprintf (dump_file, "sym + ");
3167 if (var_p)
3168 fprintf (dump_file, "var + ");
3169 if (off_p)
3170 fprintf (dump_file, "cst + ");
3171 if (rat_p)
3172 fprintf (dump_file, "rat * ");
3173
e7f482a0 3174 acost = costs[mem_mode][sym_p][var_p][off_p][rat_p];
9b6564b5 3175 fprintf (dump_file, "index costs %d\n", acost);
3176 }
3177 fprintf (dump_file, "\n");
3178 }
dec41e98 3179 }
3180
3181 bits = GET_MODE_BITSIZE (Pmode);
3182 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3183 offset &= mask;
3184 if ((offset >> (bits - 1) & 1))
3185 offset |= ~mask;
3186 s_offset = offset;
3187
3188 cost = 0;
636addb3 3189 offset_p = (s_offset != 0
e7f482a0 3190 && min_offset[mem_mode] <= s_offset
3191 && s_offset <= max_offset[mem_mode]);
dec41e98 3192 ratio_p = (ratio != 1
e7f482a0 3193 && multiplier_allowed_in_address_p (ratio, mem_mode));
dec41e98 3194
3195 if (ratio != 1 && !ratio_p)
f529eb25 3196 cost += multiply_by_cost (ratio, Pmode, speed);
dec41e98 3197
3198 if (s_offset && !offset_p && !symbol_present)
f529eb25 3199 cost += add_cost (Pmode, speed);
dec41e98 3200
e7f482a0 3201 acost = costs[mem_mode][symbol_present][var_present][offset_p][ratio_p];
3c2818b0 3202 complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3203 return new_cost (cost + acost, complexity);
dec41e98 3204}
dec41e98 3205
f4d7629d 3206/* Estimates cost of forcing expression EXPR into a variable. */
3207
3c2818b0 3208static comp_cost
f529eb25 3209force_expr_to_var_cost (tree expr, bool speed)
dec41e98 3210{
3211 static bool costs_initialized = false;
f529eb25 3212 static unsigned integer_cost [2];
3213 static unsigned symbol_cost [2];
3214 static unsigned address_cost [2];
eb91f88e 3215 tree op0, op1;
3c2818b0 3216 comp_cost cost0, cost1, cost;
eb91f88e 3217 enum machine_mode mode;
dec41e98 3218
3219 if (!costs_initialized)
3220 {
dec41e98 3221 tree type = build_pointer_type (integer_type_node);
7e995af7 3222 tree var, addr;
3223 rtx x;
f529eb25 3224 int i;
7e995af7 3225
3226 var = create_tmp_var_raw (integer_type_node, "test_var");
3227 TREE_STATIC (var) = 1;
3228 x = produce_memory_decl_rtl (var, NULL);
3229 SET_DECL_RTL (var, x);
dec41e98 3230
dec41e98 3231 addr = build1 (ADDR_EXPR, type, var);
dec41e98 3232
f529eb25 3233
3234 for (i = 0; i < 2; i++)
dec41e98 3235 {
f529eb25 3236 integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
3237 2000), i);
3238
3239 symbol_cost[i] = computation_cost (addr, i) + 1;
3240
3241 address_cost[i]
3242 = computation_cost (build2 (POINTER_PLUS_EXPR, type,
3243 addr,
3244 build_int_cst (sizetype, 2000)), i) + 1;
3245 if (dump_file && (dump_flags & TDF_DETAILS))
3246 {
3247 fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
3248 fprintf (dump_file, " integer %d\n", (int) integer_cost[i]);
3249 fprintf (dump_file, " symbol %d\n", (int) symbol_cost[i]);
3250 fprintf (dump_file, " address %d\n", (int) address_cost[i]);
3251 fprintf (dump_file, " other %d\n", (int) target_spill_cost[i]);
3252 fprintf (dump_file, "\n");
3253 }
dec41e98 3254 }
3255
3256 costs_initialized = true;
3257 }
3258
390955a6 3259 STRIP_NOPS (expr);
3260
dec41e98 3261 if (SSA_VAR_P (expr))
3c2818b0 3262 return zero_cost;
dec41e98 3263
71d9af81 3264 if (is_gimple_min_invariant (expr))
dec41e98 3265 {
3266 if (TREE_CODE (expr) == INTEGER_CST)
f529eb25 3267 return new_cost (integer_cost [speed], 0);
dec41e98 3268
3269 if (TREE_CODE (expr) == ADDR_EXPR)
3270 {
3271 tree obj = TREE_OPERAND (expr, 0);
3272
3273 if (TREE_CODE (obj) == VAR_DECL
3274 || TREE_CODE (obj) == PARM_DECL
3275 || TREE_CODE (obj) == RESULT_DECL)
f529eb25 3276 return new_cost (symbol_cost [speed], 0);
dec41e98 3277 }
3278
f529eb25 3279 return new_cost (address_cost [speed], 0);
dec41e98 3280 }
3281
eb91f88e 3282 switch (TREE_CODE (expr))
3283 {
0de36bdb 3284 case POINTER_PLUS_EXPR:
eb91f88e 3285 case PLUS_EXPR:
3286 case MINUS_EXPR:
3287 case MULT_EXPR:
3288 op0 = TREE_OPERAND (expr, 0);
3289 op1 = TREE_OPERAND (expr, 1);
390955a6 3290 STRIP_NOPS (op0);
3291 STRIP_NOPS (op1);
eb91f88e 3292
3293 if (is_gimple_val (op0))
3c2818b0 3294 cost0 = zero_cost;
eb91f88e 3295 else
f529eb25 3296 cost0 = force_expr_to_var_cost (op0, speed);
eb91f88e 3297
3298 if (is_gimple_val (op1))
3c2818b0 3299 cost1 = zero_cost;
eb91f88e 3300 else
f529eb25 3301 cost1 = force_expr_to_var_cost (op1, speed);
eb91f88e 3302
3303 break;
3304
3305 default:
3306 /* Just an arbitrary value, FIXME. */
f529eb25 3307 return new_cost (target_spill_cost[speed], 0);
eb91f88e 3308 }
3309
3310 mode = TYPE_MODE (TREE_TYPE (expr));
3311 switch (TREE_CODE (expr))
3312 {
0de36bdb 3313 case POINTER_PLUS_EXPR:
eb91f88e 3314 case PLUS_EXPR:
3315 case MINUS_EXPR:
f529eb25 3316 cost = new_cost (add_cost (mode, speed), 0);
eb91f88e 3317 break;
3318
3319 case MULT_EXPR:
3320 if (cst_and_fits_in_hwi (op0))
f529eb25 3321 cost = new_cost (multiply_by_cost (int_cst_value (op0), mode, speed), 0);
3322 else if (cst_and_fits_in_hwi (op1))
3323 cost = new_cost (multiply_by_cost (int_cst_value (op1), mode, speed), 0);
eb91f88e 3324 else
f529eb25 3325 return new_cost (target_spill_cost [speed], 0);
eb91f88e 3326 break;
3327
3328 default:
3329 gcc_unreachable ();
3330 }
3331
3c2818b0 3332 cost = add_costs (cost, cost0);
3333 cost = add_costs (cost, cost1);
eb91f88e 3334
3335 /* Bound the cost by target_spill_cost. The parts of complicated
3336 computations often are either loop invariant or at least can
3337 be shared between several iv uses, so letting this grow without
3338 limits would not give reasonable results. */
f529eb25 3339 if (cost.cost > target_spill_cost [speed])
3340 cost.cost = target_spill_cost [speed];
3c2818b0 3341
3342 return cost;
dec41e98 3343}
3344
f4d7629d 3345/* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
3346 invariants the computation depends on. */
3347
3c2818b0 3348static comp_cost
f4d7629d 3349force_var_cost (struct ivopts_data *data,
3350 tree expr, bitmap *depends_on)
3351{
3352 if (depends_on)
3353 {
3354 fd_ivopts_data = data;
3355 walk_tree (&expr, find_depends, depends_on, NULL);
3356 }
3357
f529eb25 3358 return force_expr_to_var_cost (expr, data->speed);
f4d7629d 3359}
3360
dec41e98 3361/* Estimates cost of expressing address ADDR as var + symbol + offset. The
3362 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3363 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3364 invariants the computation depends on. */
3365
3c2818b0 3366static comp_cost
dec41e98 3367split_address_cost (struct ivopts_data *data,
3368 tree addr, bool *symbol_present, bool *var_present,
3369 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3370{
dbc64c75 3371 tree core;
3372 HOST_WIDE_INT bitsize;
3373 HOST_WIDE_INT bitpos;
3374 tree toffset;
3375 enum machine_mode mode;
3376 int unsignedp, volatilep;
3377
3378 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
e7e9416e 3379 &unsignedp, &volatilep, false);
dec41e98 3380
dbc64c75 3381 if (toffset != 0
3382 || bitpos % BITS_PER_UNIT != 0
3383 || TREE_CODE (core) != VAR_DECL)
dec41e98 3384 {
3385 *symbol_present = false;
3386 *var_present = true;
3387 fd_ivopts_data = data;
3388 walk_tree (&addr, find_depends, depends_on, NULL);
f529eb25 3389 return new_cost (target_spill_cost[data->speed], 0);
dbc64c75 3390 }
3391
3392 *offset += bitpos / BITS_PER_UNIT;
dec41e98 3393 if (TREE_STATIC (core)
3394 || DECL_EXTERNAL (core))
3395 {
3396 *symbol_present = true;
3397 *var_present = false;
3c2818b0 3398 return zero_cost;
dec41e98 3399 }
3400
3401 *symbol_present = false;
3402 *var_present = true;
3c2818b0 3403 return zero_cost;
dec41e98 3404}
3405
3406/* Estimates cost of expressing difference of addresses E1 - E2 as
3407 var + symbol + offset. The value of offset is added to OFFSET,
3408 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3409 part is missing. DEPENDS_ON is a set of the invariants the computation
3410 depends on. */
3411
3c2818b0 3412static comp_cost
dec41e98 3413ptr_difference_cost (struct ivopts_data *data,
3414 tree e1, tree e2, bool *symbol_present, bool *var_present,
3415 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3416{
dbc64c75 3417 HOST_WIDE_INT diff = 0;
3c2818b0 3418 comp_cost cost;
f529eb25 3419 bool speed = optimize_loop_for_speed_p (data->current_loop);
dec41e98 3420
8c0963c4 3421 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
dec41e98 3422
eb91f88e 3423 if (ptr_difference_const (e1, e2, &diff))
dec41e98 3424 {
3425 *offset += diff;
3426 *symbol_present = false;
3427 *var_present = false;
3c2818b0 3428 return zero_cost;
dec41e98 3429 }
3430
3c2818b0 3431 if (integer_zerop (e2))
dec41e98 3432 return split_address_cost (data, TREE_OPERAND (e1, 0),
3433 symbol_present, var_present, offset, depends_on);
3434
3435 *symbol_present = false;
3436 *var_present = true;
3437
3438 cost = force_var_cost (data, e1, depends_on);
3c2818b0 3439 cost = add_costs (cost, force_var_cost (data, e2, depends_on));
f529eb25 3440 cost.cost += add_cost (Pmode, speed);
dec41e98 3441
3442 return cost;
3443}
3444
3445/* Estimates cost of expressing difference E1 - E2 as
3446 var + symbol + offset. The value of offset is added to OFFSET,
3447 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3448 part is missing. DEPENDS_ON is a set of the invariants the computation
3449 depends on. */
3450
3c2818b0 3451static comp_cost
dec41e98 3452difference_cost (struct ivopts_data *data,
3453 tree e1, tree e2, bool *symbol_present, bool *var_present,
3454 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3455{
3c2818b0 3456 comp_cost cost;
dec41e98 3457 enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
390955a6 3458 unsigned HOST_WIDE_INT off1, off2;
3459
651874e1 3460 e1 = strip_offset (e1, &off1);
3461 e2 = strip_offset (e2, &off2);
390955a6 3462 *offset += off1 - off2;
dec41e98 3463
390955a6 3464 STRIP_NOPS (e1);
3465 STRIP_NOPS (e2);
dec41e98 3466
3467 if (TREE_CODE (e1) == ADDR_EXPR)
3468 return ptr_difference_cost (data, e1, e2, symbol_present, var_present, offset,
3469 depends_on);
3470 *symbol_present = false;
3471
3472 if (operand_equal_p (e1, e2, 0))
3473 {
3474 *var_present = false;
3c2818b0 3475 return zero_cost;
dec41e98 3476 }
3477 *var_present = true;
cd743a11 3478 if (integer_zerop (e2))
dec41e98 3479 return force_var_cost (data, e1, depends_on);
3480
cd743a11 3481 if (integer_zerop (e1))
dec41e98 3482 {
3483 cost = force_var_cost (data, e2, depends_on);
f529eb25 3484 cost.cost += multiply_by_cost (-1, mode, data->speed);
dec41e98 3485
3486 return cost;
3487 }
3488
3489 cost = force_var_cost (data, e1, depends_on);
3c2818b0 3490 cost = add_costs (cost, force_var_cost (data, e2, depends_on));
f529eb25 3491 cost.cost += add_cost (mode, data->speed);
dec41e98 3492
3493 return cost;
3494}
3495
3496/* Determines the cost of the computation by that USE is expressed
3497 from induction variable CAND. If ADDRESS_P is true, we just need
3498 to create an address from it, otherwise we want to get it into
3499 register. A set of invariants we depend on is stored in
3500 DEPENDS_ON. AT is the statement at that the value is computed. */
3501
3c2818b0 3502static comp_cost
dec41e98 3503get_computation_cost_at (struct ivopts_data *data,
3504 struct iv_use *use, struct iv_cand *cand,
75a70cf9 3505 bool address_p, bitmap *depends_on, gimple at)
dec41e98 3506{
3507 tree ubase = use->iv->base, ustep = use->iv->step;
3508 tree cbase, cstep;
3509 tree utype = TREE_TYPE (ubase), ctype;
d3610bea 3510 unsigned HOST_WIDE_INT cstepi, offset = 0;
dec41e98 3511 HOST_WIDE_INT ratio, aratio;
3512 bool var_present, symbol_present;
3c2818b0 3513 comp_cost cost;
3514 unsigned n_sums;
d3610bea 3515 double_int rat;
f529eb25 3516 bool speed = optimize_bb_for_speed_p (gimple_bb (at));
dec41e98 3517
3518 *depends_on = NULL;
3519
3520 /* Only consider real candidates. */
3521 if (!cand->iv)
3c2818b0 3522 return infinite_cost;
dec41e98 3523
3524 cbase = cand->iv->base;
3525 cstep = cand->iv->step;
3526 ctype = TREE_TYPE (cbase);
3527
3528 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3529 {
3530 /* We do not have a precision to express the values of use. */
3c2818b0 3531 return infinite_cost;
dec41e98 3532 }
3533
065d134e 3534 if (address_p)
3535 {
3536 /* Do not try to express address of an object with computation based
3537 on address of a different object. This may cause problems in rtl
3538 level alias analysis (that does not expect this to be happening,
3539 as this is illegal in C), and would be unlikely to be useful
3540 anyway. */
3541 if (use->iv->base_object
3542 && cand->iv->base_object
3543 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3c2818b0 3544 return infinite_cost;
065d134e 3545 }
3546
dec41e98 3547 if (TYPE_PRECISION (utype) != TYPE_PRECISION (ctype))
3548 {
3549 /* TODO -- add direct handling of this case. */
3550 goto fallback;
3551 }
3552
651874e1 3553 /* CSTEPI is removed from the offset in case statement is after the
3554 increment. If the step is not constant, we use zero instead.
e446560c 3555 This is a bit imprecise (there is the extra addition), but
651874e1 3556 redundancy elimination is likely to transform the code so that
3557 it uses value of the variable before increment anyway,
3558 so it is not that much unrealistic. */
3559 if (cst_and_fits_in_hwi (cstep))
3560 cstepi = int_cst_value (cstep);
3561 else
3562 cstepi = 0;
3563
d3610bea 3564 if (!constant_multiple_of (ustep, cstep, &rat))
3c2818b0 3565 return infinite_cost;
58f43b90 3566
d3610bea 3567 if (double_int_fits_in_shwi_p (rat))
3568 ratio = double_int_to_shwi (rat);
3569 else
3c2818b0 3570 return infinite_cost;
dec41e98 3571
3572 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
3573 or ratio == 1, it is better to handle this like
3574
3575 ubase - ratio * cbase + ratio * var
3576
3577 (also holds in the case ratio == -1, TODO. */
3578
651874e1 3579 if (cst_and_fits_in_hwi (cbase))
dec41e98 3580 {
3581 offset = - ratio * int_cst_value (cbase);
3c2818b0 3582 cost = difference_cost (data,
3583 ubase, build_int_cst (utype, 0),
3584 &symbol_present, &var_present, &offset,
3585 depends_on);
dec41e98 3586 }
3587 else if (ratio == 1)
3588 {
3c2818b0 3589 cost = difference_cost (data,
3590 ubase, cbase,
3591 &symbol_present, &var_present, &offset,
3592 depends_on);
dec41e98 3593 }
3594 else
3595 {
3c2818b0 3596 cost = force_var_cost (data, cbase, depends_on);
f529eb25 3597 cost.cost += add_cost (TYPE_MODE (ctype), data->speed);
3c2818b0 3598 cost = add_costs (cost,
3599 difference_cost (data,
3600 ubase, build_int_cst (utype, 0),
3601 &symbol_present, &var_present,
3602 &offset, depends_on));
dec41e98 3603 }
3604
3605 /* If we are after the increment, the value of the candidate is higher by
3606 one iteration. */
3607 if (stmt_after_increment (data->current_loop, cand, at))
3608 offset -= ratio * cstepi;
3609
3610 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3611 (symbol/var/const parts may be omitted). If we are looking for an address,
3612 find the cost of addressing this. */
3613 if (address_p)
3c2818b0 3614 return add_costs (cost, get_address_cost (symbol_present, var_present,
3615 offset, ratio,
f529eb25 3616 TYPE_MODE (TREE_TYPE (*use->op_p)), speed));
dec41e98 3617
3618 /* Otherwise estimate the costs for computing the expression. */
3619 aratio = ratio > 0 ? ratio : -ratio;
3620 if (!symbol_present && !var_present && !offset)
3621 {
3622 if (ratio != 1)
f529eb25 3623 cost.cost += multiply_by_cost (ratio, TYPE_MODE (ctype), speed);
dec41e98 3624
3625 return cost;
3626 }
3627
3628 if (aratio != 1)
f529eb25 3629 cost.cost += multiply_by_cost (aratio, TYPE_MODE (ctype), speed);
dec41e98 3630
3631 n_sums = 1;
3632 if (var_present
3633 /* Symbol + offset should be compile-time computable. */
3634 && (symbol_present || offset))
3635 n_sums++;
3636
3c2818b0 3637 /* Having offset does not affect runtime cost in case it is added to
3638 symbol, but it increases complexity. */
3639 if (offset)
3640 cost.complexity++;
3641
f529eb25 3642 cost.cost += n_sums * add_cost (TYPE_MODE (ctype), speed);
3c2818b0 3643 return cost;
dec41e98 3644
3645fallback:
3646 {
3647 /* Just get the expression, expand it and measure the cost. */
3648 tree comp = get_computation_at (data->current_loop, use, cand, at);
3649
3650 if (!comp)
3c2818b0 3651 return infinite_cost;
dec41e98 3652
3653 if (address_p)
3654 comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp);
3655
f529eb25 3656 return new_cost (computation_cost (comp, speed), 0);
dec41e98 3657 }
3658}
3659
3660/* Determines the cost of the computation by that USE is expressed
3661 from induction variable CAND. If ADDRESS_P is true, we just need
3662 to create an address from it, otherwise we want to get it into
3663 register. A set of invariants we depend on is stored in
3664 DEPENDS_ON. */
3665
3c2818b0 3666static comp_cost
dec41e98 3667get_computation_cost (struct ivopts_data *data,
3668 struct iv_use *use, struct iv_cand *cand,
3669 bool address_p, bitmap *depends_on)
3670{
3671 return get_computation_cost_at (data,
3672 use, cand, address_p, depends_on, use->stmt);
3673}
3674
3675/* Determines cost of basing replacement of USE on CAND in a generic
3676 expression. */
3677
00991688 3678static bool
dec41e98 3679determine_use_iv_cost_generic (struct ivopts_data *data,
3680 struct iv_use *use, struct iv_cand *cand)
3681{
3682 bitmap depends_on;
3c2818b0 3683 comp_cost cost;
d4caf099 3684
3685 /* The simple case first -- if we need to express value of the preserved
3686 original biv, the cost is 0. This also prevents us from counting the
3687 cost of increment twice -- once at this use and once in the cost of
3688 the candidate. */
3689 if (cand->pos == IP_ORIGINAL
3690 && cand->incremented_at == use->stmt)
3691 {
3c2818b0 3692 set_use_iv_cost (data, use, cand, zero_cost, NULL, NULL_TREE);
d4caf099 3693 return true;
3694 }
dec41e98 3695
d4caf099 3696 cost = get_computation_cost (data, use, cand, false, &depends_on);
c4d3b428 3697 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
00991688 3698
3c2818b0 3699 return !infinite_cost_p (cost);
dec41e98 3700}
3701
3702/* Determines cost of basing replacement of USE on CAND in an address. */
3703
00991688 3704static bool
dec41e98 3705determine_use_iv_cost_address (struct ivopts_data *data,
3706 struct iv_use *use, struct iv_cand *cand)
3707{
3708 bitmap depends_on;
3c2818b0 3709 comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on);
dec41e98 3710
c4d3b428 3711 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
00991688 3712
3c2818b0 3713 return !infinite_cost_p (cost);
dec41e98 3714}
3715
cf661063 3716/* Computes value of candidate CAND at position AT in iteration NITER, and
3717 stores it to VAL. */
dec41e98 3718
cf661063 3719static void
75a70cf9 3720cand_value_at (struct loop *loop, struct iv_cand *cand, gimple at, tree niter,
cf661063 3721 aff_tree *val)
dec41e98 3722{
cf661063 3723 aff_tree step, delta, nit;
3724 struct iv *iv = cand->iv;
dec41e98 3725 tree type = TREE_TYPE (iv->base);
06240723 3726 tree steptype = type;
3727 if (POINTER_TYPE_P (type))
3728 steptype = sizetype;
dec41e98 3729
06240723 3730 tree_to_aff_combination (iv->step, steptype, &step);
cf661063 3731 tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
06240723 3732 aff_combination_convert (&nit, steptype);
cf661063 3733 aff_combination_mult (&nit, &step, &delta);
dec41e98 3734 if (stmt_after_increment (loop, cand, at))
cf661063 3735 aff_combination_add (&delta, &step);
dec41e98 3736
cf661063 3737 tree_to_aff_combination (iv->base, type, val);
3738 aff_combination_add (val, &delta);
dec41e98 3739}
3740
b091dc59 3741/* Returns period of induction variable iv. */
3742
3743static tree
3744iv_period (struct iv *iv)
3745{
3746 tree step = iv->step, period, type;
3747 tree pow2div;
3748
3749 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
3750
3751 /* Period of the iv is gcd (step, type range). Since type range is power
3752 of two, it suffices to determine the maximum power of two that divides
3753 step. */
3754 pow2div = num_ending_zeros (step);
3755 type = unsigned_type_for (TREE_TYPE (step));
3756
3757 period = build_low_bits_mask (type,
3758 (TYPE_PRECISION (type)
3759 - tree_low_cst (pow2div, 1)));
3760
3761 return period;
3762}
3763
c4d3b428 3764/* Returns the comparison operator used when eliminating the iv USE. */
3765
3766static enum tree_code
3767iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
3768{
3769 struct loop *loop = data->current_loop;
3770 basic_block ex_bb;
3771 edge exit;
3772
75a70cf9 3773 ex_bb = gimple_bb (use->stmt);
c4d3b428 3774 exit = EDGE_SUCC (ex_bb, 0);
3775 if (flow_bb_inside_loop_p (loop, exit->dest))
3776 exit = EDGE_SUCC (ex_bb, 1);
3777
3778 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
3779}
3780
dec41e98 3781/* Check whether it is possible to express the condition in USE by comparison
c4d3b428 3782 of candidate CAND. If so, store the value compared with to BOUND. */
dec41e98 3783
3784static bool
b091dc59 3785may_eliminate_iv (struct ivopts_data *data,
c4d3b428 3786 struct iv_use *use, struct iv_cand *cand, tree *bound)
dec41e98 3787{
065d134e 3788 basic_block ex_bb;
dec41e98 3789 edge exit;
1b0af592 3790 tree nit, period;
b091dc59 3791 struct loop *loop = data->current_loop;
cf661063 3792 aff_tree bnd;
1b0af592 3793
651874e1 3794 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
3795 return false;
3796
14f25899 3797 /* For now works only for exits that dominate the loop latch.
3798 TODO: extend to other conditions inside loop body. */
75a70cf9 3799 ex_bb = gimple_bb (use->stmt);
065d134e 3800 if (use->stmt != last_stmt (ex_bb)
75a70cf9 3801 || gimple_code (use->stmt) != GIMPLE_COND
3802 || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
dec41e98 3803 return false;
3804
065d134e 3805 exit = EDGE_SUCC (ex_bb, 0);
3806 if (flow_bb_inside_loop_p (loop, exit->dest))
3807 exit = EDGE_SUCC (ex_bb, 1);
3808 if (flow_bb_inside_loop_p (loop, exit->dest))
3809 return false;
3810
dcd24d3c 3811 nit = niter_for_exit (data, exit);
3812 if (!nit)
dec41e98 3813 return false;
3814
14f25899 3815 /* Determine whether we can use the variable to test the exit condition.
3816 This is the case iff the period of the induction variable is greater
3817 than the number of iterations for which the exit condition is true. */
b091dc59 3818 period = iv_period (cand->iv);
dec41e98 3819
14f25899 3820 /* If the number of iterations is constant, compare against it directly. */
3821 if (TREE_CODE (nit) == INTEGER_CST)
3822 {
3823 if (!tree_int_cst_lt (nit, period))
3824 return false;
3825 }
3826
3827 /* If not, and if this is the only possible exit of the loop, see whether
3828 we can get a conservative estimate on the number of iterations of the
3829 entire loop and compare against that instead. */
3830 else if (loop_only_exit_p (loop, exit))
3831 {
3832 double_int period_value, max_niter;
3833 if (!estimated_loop_iterations (loop, true, &max_niter))
3834 return false;
3835 period_value = tree_to_double_int (period);
3836 if (double_int_ucmp (max_niter, period_value) >= 0)
3837 return false;
3838 }
3839
3840 /* Otherwise, punt. */
3841 else
dec41e98 3842 return false;
3843
cf661063 3844 cand_value_at (loop, cand, use->stmt, nit, &bnd);
590f8b68 3845
cf661063 3846 *bound = aff_combination_to_tree (&bnd);
590f8b68 3847 /* It is unlikely that computing the number of iterations using division
3848 would be more profitable than keeping the original induction variable. */
3849 if (expression_expensive_p (*bound))
3850 return false;
dec41e98 3851 return true;
3852}
3853
3854/* Determines cost of basing replacement of USE on CAND in a condition. */
3855
00991688 3856static bool
dec41e98 3857determine_use_iv_cost_condition (struct ivopts_data *data,
3858 struct iv_use *use, struct iv_cand *cand)
3859{
02f8cd87 3860 tree bound = NULL_TREE;
3861 struct iv *cmp_iv;
3862 bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
3c2818b0 3863 comp_cost elim_cost, express_cost, cost;
02f8cd87 3864 bool ok;
dec41e98 3865
3866 /* Only consider real candidates. */
3867 if (!cand->iv)
3868 {
3c2818b0 3869 set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE);
00991688 3870 return false;
dec41e98 3871 }
3872
02f8cd87 3873 /* Try iv elimination. */
c4d3b428 3874 if (may_eliminate_iv (data, use, cand, &bound))
1b0af592 3875 {
3876 elim_cost = force_var_cost (data, bound, &depends_on_elim);
3877 /* The bound is a loop invariant, so it will be only computed
3878 once. */
3c2818b0 3879 elim_cost.cost /= AVG_LOOP_NITER (data->current_loop);
1b0af592 3880 }
02f8cd87 3881 else
3c2818b0 3882 elim_cost = infinite_cost;
dec41e98 3883
02f8cd87 3884 /* Try expressing the original giv. If it is compared with an invariant,
3885 note that we cannot get rid of it. */
75a70cf9 3886 ok = extract_cond_operands (data, use->stmt, NULL, NULL, NULL, &cmp_iv);
02f8cd87 3887 gcc_assert (ok);
dec41e98 3888
02f8cd87 3889 express_cost = get_computation_cost (data, use, cand, false,
3890 &depends_on_express);
3891 fd_ivopts_data = data;
3892 walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
c4d3b428 3893
b27dbec0 3894 /* Choose the better approach, preferring the eliminated IV. */
3895 if (compare_costs (elim_cost, express_cost) <= 0)
dec41e98 3896 {
02f8cd87 3897 cost = elim_cost;
3898 depends_on = depends_on_elim;
3899 depends_on_elim = NULL;
3900 }
3901 else
3902 {
3903 cost = express_cost;
3904 depends_on = depends_on_express;
3905 depends_on_express = NULL;
3906 bound = NULL_TREE;
dec41e98 3907 }
3908
02f8cd87 3909 set_use_iv_cost (data, use, cand, cost, depends_on, bound);
3910
3911 if (depends_on_elim)
3912 BITMAP_FREE (depends_on_elim);
3913 if (depends_on_express)
3914 BITMAP_FREE (depends_on_express);
3915
3c2818b0 3916 return !infinite_cost_p (cost);
dec41e98 3917}
3918
00991688 3919/* Determines cost of basing replacement of USE on CAND. Returns false
3920 if USE cannot be based on CAND. */
dec41e98 3921
00991688 3922static bool
dec41e98 3923determine_use_iv_cost (struct ivopts_data *data,
3924 struct iv_use *use, struct iv_cand *cand)
3925{
3926 switch (use->type)
3927 {
3928 case USE_NONLINEAR_EXPR:
00991688 3929 return determine_use_iv_cost_generic (data, use, cand);
dec41e98 3930
dec41e98 3931 case USE_ADDRESS:
00991688 3932 return determine_use_iv_cost_address (data, use, cand);
dec41e98 3933
3934 case USE_COMPARE:
00991688 3935 return determine_use_iv_cost_condition (data, use, cand);
dec41e98 3936
3937 default:
8c0963c4 3938 gcc_unreachable ();
dec41e98 3939 }
3940}
3941
3942/* Determines costs of basing the use of the iv on an iv candidate. */
3943
3944static void
3945determine_use_iv_costs (struct ivopts_data *data)
3946{
3947 unsigned i, j;
3948 struct iv_use *use;
3949 struct iv_cand *cand;
27335ffd 3950 bitmap to_clear = BITMAP_ALLOC (NULL);
dec41e98 3951
3952 alloc_use_cost_map (data);
3953
dec41e98 3954 for (i = 0; i < n_iv_uses (data); i++)
3955 {
3956 use = iv_use (data, i);
3957
3958 if (data->consider_all_candidates)
3959 {
3960 for (j = 0; j < n_iv_cands (data); j++)
3961 {
3962 cand = iv_cand (data, j);
3963 determine_use_iv_cost (data, use, cand);
3964 }
3965 }
3966 else
3967 {
0cc4271a 3968 bitmap_iterator bi;
3969
3970 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
dec41e98 3971 {
3972 cand = iv_cand (data, j);
00991688 3973 if (!determine_use_iv_cost (data, use, cand))
3974 bitmap_set_bit (to_clear, j);
0cc4271a 3975 }
00991688 3976
3977 /* Remove the candidates for that the cost is infinite from
3978 the list of related candidates. */
3979 bitmap_and_compl_into (use->related_cands, to_clear);
3980 bitmap_clear (to_clear);
dec41e98 3981 }
3982 }
3983
27335ffd 3984 BITMAP_FREE (to_clear);
00991688 3985
dec41e98 3986 if (dump_file && (dump_flags & TDF_DETAILS))
3987 {
3988 fprintf (dump_file, "Use-candidate costs:\n");
3989
3990 for (i = 0; i < n_iv_uses (data); i++)
3991 {
3992 use = iv_use (data, i);
3993
3994 fprintf (dump_file, "Use %d:\n", i);
3c2818b0 3995 fprintf (dump_file, " cand\tcost\tcompl.\tdepends on\n");
dec41e98 3996 for (j = 0; j < use->n_map_members; j++)
3997 {
3998 if (!use->cost_map[j].cand
3c2818b0 3999 || infinite_cost_p (use->cost_map[j].cost))
dec41e98 4000 continue;
4001
3c2818b0 4002 fprintf (dump_file, " %d\t%d\t%d\t",
dec41e98 4003 use->cost_map[j].cand->id,
3c2818b0 4004 use->cost_map[j].cost.cost,
4005 use->cost_map[j].cost.complexity);
dec41e98 4006 if (use->cost_map[j].depends_on)
4007 bitmap_print (dump_file,
4008 use->cost_map[j].depends_on, "","");
4009 fprintf (dump_file, "\n");
4010 }
4011
4012 fprintf (dump_file, "\n");
4013 }
4014 fprintf (dump_file, "\n");
4015 }
4016}
4017
4018/* Determines cost of the candidate CAND. */
4019
4020static void
4021determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
4022{
3c2818b0 4023 comp_cost cost_base;
4024 unsigned cost, cost_step;
5414270a 4025 tree base;
dec41e98 4026
4027 if (!cand->iv)
4028 {
4029 cand->cost = 0;
4030 return;
4031 }
4032
4033 /* There are two costs associated with the candidate -- its increment
4034 and its initialization. The second is almost negligible for any loop
4035 that rolls enough, so we take it just very little into account. */
4036
4037 base = cand->iv->base;
4038 cost_base = force_var_cost (data, base, NULL);
f529eb25 4039 cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)), data->speed);
dec41e98 4040
3c2818b0 4041 cost = cost_step + cost_base.cost / AVG_LOOP_NITER (current_loop);
dec41e98 4042
3c2818b0 4043 /* Prefer the original ivs unless we may gain something by replacing it.
f0b5f617 4044 The reason is to make debugging simpler; so this is not relevant for
3c2818b0 4045 artificial ivs created by other optimization passes. */
4046 if (cand->pos != IP_ORIGINAL
4047 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
4048 cost++;
dec41e98 4049
4050 /* Prefer not to insert statements into latch unless there are some
4051 already (so that we do not create unnecessary jumps). */
5414270a 4052 if (cand->pos == IP_END
4053 && empty_block_p (ip_end_pos (data->current_loop)))
3c2818b0 4054 cost++;
4055
4056 cand->cost = cost;
dec41e98 4057}
4058
4059/* Determines costs of computation of the candidates. */
4060
4061static void
4062determine_iv_costs (struct ivopts_data *data)
4063{
4064 unsigned i;
4065
4066 if (dump_file && (dump_flags & TDF_DETAILS))
4067 {
4068 fprintf (dump_file, "Candidate costs:\n");
4069 fprintf (dump_file, " cand\tcost\n");
4070 }
4071
4072 for (i = 0; i < n_iv_cands (data); i++)
4073 {
4074 struct iv_cand *cand = iv_cand (data, i);
4075
4076 determine_iv_cost (data, cand);
4077
4078 if (dump_file && (dump_flags & TDF_DETAILS))
4079 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
4080 }
4081
ee9e7ecf 4082 if (dump_file && (dump_flags & TDF_DETAILS))
4083 fprintf (dump_file, "\n");
dec41e98 4084}
4085
4086/* Calculates cost for having SIZE induction variables. */
4087
4088static unsigned
4089ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
4090{
25153338 4091 /* We add size to the cost, so that we prefer eliminating ivs
4092 if possible. */
f529eb25 4093 return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed);
dec41e98 4094}
4095
4096/* For each size of the induction variable set determine the penalty. */
4097
4098static void
4099determine_set_costs (struct ivopts_data *data)
4100{
4101 unsigned j, n;
75a70cf9 4102 gimple phi;
4103 gimple_stmt_iterator psi;
4104 tree op;
dec41e98 4105 struct loop *loop = data->current_loop;
0cc4271a 4106 bitmap_iterator bi;
dec41e98 4107
4108 /* We use the following model (definitely improvable, especially the
4109 cost function -- TODO):
4110
4111 We estimate the number of registers available (using MD data), name it A.
4112
4113 We estimate the number of registers used by the loop, name it U. This
4114 number is obtained as the number of loop phi nodes (not counting virtual
4115 registers and bivs) + the number of variables from outside of the loop.
4116
4117 We set a reserve R (free regs that are used for temporary computations,
4118 etc.). For now the reserve is a constant 3.
4119
4120 Let I be the number of induction variables.
4121
4122 -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
4123 make a lot of ivs without a reason).
4124 -- if A - R < U + I <= A, the cost is I * PRES_COST
4125 -- if U + I > A, the cost is I * PRES_COST and
4126 number of uses * SPILL_COST * (U + I - A) / (U + I) is added. */
4127
4128 if (dump_file && (dump_flags & TDF_DETAILS))
4129 {
4130 fprintf (dump_file, "Global costs:\n");
4131 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
f529eb25 4132 fprintf (dump_file, " target_reg_cost %d\n", target_reg_cost[data->speed]);
4133 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost[data->speed]);
dec41e98 4134 }
4135
4136 n = 0;
75a70cf9 4137 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
dec41e98 4138 {
75a70cf9 4139 phi = gsi_stmt (psi);
dec41e98 4140 op = PHI_RESULT (phi);
4141
4142 if (!is_gimple_reg (op))
4143 continue;
4144
4145 if (get_iv (data, op))
4146 continue;
4147
4148 n++;
4149 }
4150
0cc4271a 4151 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
dec41e98 4152 {
4153 struct version_info *info = ver_info (data, j);
4154
4155 if (info->inv_id && info->has_nonlin_use)
4156 n++;
0cc4271a 4157 }
dec41e98 4158
fe382241 4159 data->regs_used = n;
dec41e98 4160 if (dump_file && (dump_flags & TDF_DETAILS))
4161 fprintf (dump_file, " regs_used %d\n", n);
4162
4163 if (dump_file && (dump_flags & TDF_DETAILS))
4164 {
4165 fprintf (dump_file, " cost for size:\n");
4166 fprintf (dump_file, " ivs\tcost\n");
4167 for (j = 0; j <= 2 * target_avail_regs; j++)
4168 fprintf (dump_file, " %d\t%d\n", j,
4169 ivopts_global_cost_for_size (data, j));
4170 fprintf (dump_file, "\n");
4171 }
4172}
4173
00991688 4174/* Returns true if A is a cheaper cost pair than B. */
dec41e98 4175
00991688 4176static bool
4177cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
dec41e98 4178{
3c2818b0 4179 int cmp;
4180
00991688 4181 if (!a)
4182 return false;
dec41e98 4183
00991688 4184 if (!b)
4185 return true;
4186
3c2818b0 4187 cmp = compare_costs (a->cost, b->cost);
4188 if (cmp < 0)
00991688 4189 return true;
4190
3c2818b0 4191 if (cmp > 0)
00991688 4192 return false;
4193
4194 /* In case the costs are the same, prefer the cheaper candidate. */
4195 if (a->cand->cost < b->cand->cost)
4196 return true;
4197
4198 return false;
4199}
4200
4201/* Computes the cost field of IVS structure. */
4202
4203static void
4204iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
4205{
3c2818b0 4206 comp_cost cost = ivs->cand_use_cost;
4207 cost.cost += ivs->cand_cost;
4208 cost.cost += ivopts_global_cost_for_size (data, ivs->n_regs);
00991688 4209
4210 ivs->cost = cost;
4211}
4212
651874e1 4213/* Remove invariants in set INVS to set IVS. */
4214
4215static void
4216iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
4217{
4218 bitmap_iterator bi;
4219 unsigned iid;
4220
4221 if (!invs)
4222 return;
4223
4224 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4225 {
4226 ivs->n_invariant_uses[iid]--;
4227 if (ivs->n_invariant_uses[iid] == 0)
4228 ivs->n_regs--;
4229 }
4230}
4231
00991688 4232/* Set USE not to be expressed by any candidate in IVS. */
4233
4234static void
4235iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
4236 struct iv_use *use)
4237{
651874e1 4238 unsigned uid = use->id, cid;
00991688 4239 struct cost_pair *cp;
00991688 4240
4241 cp = ivs->cand_for_use[uid];
4242 if (!cp)
4243 return;
4244 cid = cp->cand->id;
4245
4246 ivs->bad_uses++;
4247 ivs->cand_for_use[uid] = NULL;
4248 ivs->n_cand_uses[cid]--;
4249
4250 if (ivs->n_cand_uses[cid] == 0)
dec41e98 4251 {
00991688 4252 bitmap_clear_bit (ivs->cands, cid);
4253 /* Do not count the pseudocandidates. */
4254 if (cp->cand->iv)
4255 ivs->n_regs--;
13ea2c1f 4256 ivs->n_cands--;
00991688 4257 ivs->cand_cost -= cp->cand->cost;
651874e1 4258
4259 iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
00991688 4260 }
4261
3c2818b0 4262 ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
00991688 4263
651874e1 4264 iv_ca_set_remove_invariants (ivs, cp->depends_on);
4265 iv_ca_recount_cost (data, ivs);
4266}
4267
4268/* Add invariants in set INVS to set IVS. */
0a501ca6 4269
651874e1 4270static void
4271iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
4272{
4273 bitmap_iterator bi;
4274 unsigned iid;
4275
4276 if (!invs)
4277 return;
4278
4279 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
00991688 4280 {
651874e1 4281 ivs->n_invariant_uses[iid]++;
4282 if (ivs->n_invariant_uses[iid] == 1)
4283 ivs->n_regs++;
dec41e98 4284 }
00991688 4285}
4286
4287/* Set cost pair for USE in set IVS to CP. */
4288
4289static void
4290iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
4291 struct iv_use *use, struct cost_pair *cp)
4292{
651874e1 4293 unsigned uid = use->id, cid;
00991688 4294
4295 if (ivs->cand_for_use[uid] == cp)
4296 return;
4297
4298 if (ivs->cand_for_use[uid])
4299 iv_ca_set_no_cp (data, ivs, use);
4300
4301 if (cp)
dec41e98 4302 {
00991688 4303 cid = cp->cand->id;
dec41e98 4304
00991688 4305 ivs->bad_uses--;
4306 ivs->cand_for_use[uid] = cp;
4307 ivs->n_cand_uses[cid]++;
4308 if (ivs->n_cand_uses[cid] == 1)
dec41e98 4309 {
00991688 4310 bitmap_set_bit (ivs->cands, cid);
4311 /* Do not count the pseudocandidates. */
4312 if (cp->cand->iv)
4313 ivs->n_regs++;
13ea2c1f 4314 ivs->n_cands++;
00991688 4315 ivs->cand_cost += cp->cand->cost;
00991688 4316
651874e1 4317 iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
dec41e98 4318 }
4319
3c2818b0 4320 ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
651874e1 4321 iv_ca_set_add_invariants (ivs, cp->depends_on);
00991688 4322 iv_ca_recount_cost (data, ivs);
0cc4271a 4323 }
00991688 4324}
4325
4326/* Extend set IVS by expressing USE by some of the candidates in it
4327 if possible. */
4328
4329static void
4330iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
4331 struct iv_use *use)
4332{
4333 struct cost_pair *best_cp = NULL, *cp;
4334 bitmap_iterator bi;
4335 unsigned i;
dec41e98 4336
00991688 4337 gcc_assert (ivs->upto >= use->id);
4338
4339 if (ivs->upto == use->id)
4340 {
4341 ivs->upto++;
4342 ivs->bad_uses++;
4343 }
dec41e98 4344
00991688 4345 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4346 {
4347 cp = get_use_iv_cost (data, use, iv_cand (data, i));
dec41e98 4348
00991688 4349 if (cheaper_cost_pair (cp, best_cp))
4350 best_cp = cp;
4351 }
dec41e98 4352
00991688 4353 iv_ca_set_cp (data, ivs, use, best_cp);
dec41e98 4354}
4355
00991688 4356/* Get cost for assignment IVS. */
dec41e98 4357
3c2818b0 4358static comp_cost
00991688 4359iv_ca_cost (struct iv_ca *ivs)
4360{
18381619 4361 /* This was a conditional expression but it triggered a bug in
4362 Sun C 5.5. */
ce6cd837 4363 if (ivs->bad_uses)
4364 return infinite_cost;
4365 else
4366 return ivs->cost;
00991688 4367}
4368
4369/* Returns true if all dependences of CP are among invariants in IVS. */
4370
4371static bool
4372iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
dec41e98 4373{
4374 unsigned i;
0cc4271a 4375 bitmap_iterator bi;
dec41e98 4376
00991688 4377 if (!cp->depends_on)
4378 return true;
4379
4380 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
dec41e98 4381 {
00991688 4382 if (ivs->n_invariant_uses[i] == 0)
4383 return false;
4384 }
4385
4386 return true;
4387}
4388
4389/* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
4390 it before NEXT_CHANGE. */
4391
4392static struct iv_ca_delta *
4393iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
4394 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
4395{
4c36ffe6 4396 struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
00991688 4397
4398 change->use = use;
4399 change->old_cp = old_cp;
4400 change->new_cp = new_cp;
4401 change->next_change = next_change;
4402
4403 return change;
4404}
4405
13ea2c1f 4406/* Joins two lists of changes L1 and L2. Destructive -- old lists
f7f07c95 4407 are rewritten. */
13ea2c1f 4408
4409static struct iv_ca_delta *
4410iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
4411{
4412 struct iv_ca_delta *last;
4413
4414 if (!l2)
4415 return l1;
4416
4417 if (!l1)
4418 return l2;
4419
4420 for (last = l1; last->next_change; last = last->next_change)
4421 continue;
4422 last->next_change = l2;
4423
4424 return l1;
4425}
4426
00991688 4427/* Returns candidate by that USE is expressed in IVS. */
4428
4429static struct cost_pair *
4430iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
4431{
4432 return ivs->cand_for_use[use->id];
4433}
4434
13ea2c1f 4435/* Reverse the list of changes DELTA, forming the inverse to it. */
4436
4437static struct iv_ca_delta *
4438iv_ca_delta_reverse (struct iv_ca_delta *delta)
4439{
4440 struct iv_ca_delta *act, *next, *prev = NULL;
4441 struct cost_pair *tmp;
4442
4443 for (act = delta; act; act = next)
4444 {
4445 next = act->next_change;
4446 act->next_change = prev;
4447 prev = act;
4448
4449 tmp = act->old_cp;
4450 act->old_cp = act->new_cp;
4451 act->new_cp = tmp;
4452 }
4453
4454 return prev;
4455}
4456
00991688 4457/* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
4458 reverted instead. */
4459
4460static void
4461iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
4462 struct iv_ca_delta *delta, bool forward)
4463{
4464 struct cost_pair *from, *to;
13ea2c1f 4465 struct iv_ca_delta *act;
00991688 4466
13ea2c1f 4467 if (!forward)
4468 delta = iv_ca_delta_reverse (delta);
00991688 4469
13ea2c1f 4470 for (act = delta; act; act = act->next_change)
4471 {
4472 from = act->old_cp;
4473 to = act->new_cp;
4474 gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
4475 iv_ca_set_cp (data, ivs, act->use, to);
dec41e98 4476 }
13ea2c1f 4477
4478 if (!forward)
4479 iv_ca_delta_reverse (delta);
00991688 4480}
dec41e98 4481
00991688 4482/* Returns true if CAND is used in IVS. */
dec41e98 4483
00991688 4484static bool
4485iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
4486{
4487 return ivs->n_cand_uses[cand->id] > 0;
4488}
dec41e98 4489
13ea2c1f 4490/* Returns number of induction variable candidates in the set IVS. */
4491
4492static unsigned
4493iv_ca_n_cands (struct iv_ca *ivs)
4494{
4495 return ivs->n_cands;
4496}
4497
00991688 4498/* Free the list of changes DELTA. */
4499
4500static void
4501iv_ca_delta_free (struct iv_ca_delta **delta)
4502{
4503 struct iv_ca_delta *act, *next;
4504
4505 for (act = *delta; act; act = next)
0cc4271a 4506 {
00991688 4507 next = act->next_change;
4508 free (act);
0cc4271a 4509 }
dec41e98 4510
00991688 4511 *delta = NULL;
4512}
4513
4514/* Allocates new iv candidates assignment. */
4515
4516static struct iv_ca *
4517iv_ca_new (struct ivopts_data *data)
4518{
4c36ffe6 4519 struct iv_ca *nw = XNEW (struct iv_ca);
dec41e98 4520
00991688 4521 nw->upto = 0;
4522 nw->bad_uses = 0;
4c36ffe6 4523 nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
4524 nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
27335ffd 4525 nw->cands = BITMAP_ALLOC (NULL);
13ea2c1f 4526 nw->n_cands = 0;
00991688 4527 nw->n_regs = 0;
3c2818b0 4528 nw->cand_use_cost = zero_cost;
00991688 4529 nw->cand_cost = 0;
4c36ffe6 4530 nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
3c2818b0 4531 nw->cost = zero_cost;
00991688 4532
4533 return nw;
4534}
4535
4536/* Free memory occupied by the set IVS. */
4537
4538static void
4539iv_ca_free (struct iv_ca **ivs)
4540{
4541 free ((*ivs)->cand_for_use);
4542 free ((*ivs)->n_cand_uses);
27335ffd 4543 BITMAP_FREE ((*ivs)->cands);
00991688 4544 free ((*ivs)->n_invariant_uses);
4545 free (*ivs);
4546 *ivs = NULL;
4547}
4548
4549/* Dumps IVS to FILE. */
4550
4551static void
4552iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
4553{
4554 const char *pref = " invariants ";
4555 unsigned i;
3c2818b0 4556 comp_cost cost = iv_ca_cost (ivs);
00991688 4557
3c2818b0 4558 fprintf (file, " cost %d (complexity %d)\n", cost.cost, cost.complexity);
00991688 4559 bitmap_print (file, ivs->cands, " candidates ","\n");
4560
4561 for (i = 1; i <= data->max_inv_id; i++)
4562 if (ivs->n_invariant_uses[i])
4563 {
4564 fprintf (file, "%s%d", pref, i);
4565 pref = ", ";
4566 }
4567 fprintf (file, "\n");
4568}
4569
4570/* Try changing candidate in IVS to CAND for each use. Return cost of the
13ea2c1f 4571 new set, and store differences in DELTA. Number of induction variables
4572 in the new set is stored to N_IVS. */
00991688 4573
3c2818b0 4574static comp_cost
00991688 4575iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
13ea2c1f 4576 struct iv_cand *cand, struct iv_ca_delta **delta,
4577 unsigned *n_ivs)
00991688 4578{
3c2818b0 4579 unsigned i;
4580 comp_cost cost;
00991688 4581 struct iv_use *use;
4582 struct cost_pair *old_cp, *new_cp;
4583
4584 *delta = NULL;
4585 for (i = 0; i < ivs->upto; i++)
4586 {
4587 use = iv_use (data, i);
4588 old_cp = iv_ca_cand_for_use (ivs, use);
4589
4590 if (old_cp
4591 && old_cp->cand == cand)
4592 continue;
4593
4594 new_cp = get_use_iv_cost (data, use, cand);
4595 if (!new_cp)
4596 continue;
4597
4598 if (!iv_ca_has_deps (ivs, new_cp))
4599 continue;
4600
4601 if (!cheaper_cost_pair (new_cp, old_cp))
4602 continue;
4603
4604 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4605 }
4606
4607 iv_ca_delta_commit (data, ivs, *delta, true);
4608 cost = iv_ca_cost (ivs);
13ea2c1f 4609 if (n_ivs)
4610 *n_ivs = iv_ca_n_cands (ivs);
00991688 4611 iv_ca_delta_commit (data, ivs, *delta, false);
dec41e98 4612
4613 return cost;
4614}
4615
d2aa5f30 4616/* Try narrowing set IVS by removing CAND. Return the cost of
00991688 4617 the new set and store the differences in DELTA. */
dec41e98 4618
3c2818b0 4619static comp_cost
00991688 4620iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
4621 struct iv_cand *cand, struct iv_ca_delta **delta)
dec41e98 4622{
00991688 4623 unsigned i, ci;
4624 struct iv_use *use;
4625 struct cost_pair *old_cp, *new_cp, *cp;
4626 bitmap_iterator bi;
4627 struct iv_cand *cnd;
3c2818b0 4628 comp_cost cost;
00991688 4629
4630 *delta = NULL;
4631 for (i = 0; i < n_iv_uses (data); i++)
4632 {
4633 use = iv_use (data, i);
4634
4635 old_cp = iv_ca_cand_for_use (ivs, use);
4636 if (old_cp->cand != cand)
4637 continue;
4638
4639 new_cp = NULL;
4640
4641 if (data->consider_all_candidates)
4642 {
4643 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
4644 {
4645 if (ci == cand->id)
4646 continue;
4647
4648 cnd = iv_cand (data, ci);
4649
4650 cp = get_use_iv_cost (data, use, cnd);
4651 if (!cp)
4652 continue;
4653 if (!iv_ca_has_deps (ivs, cp))
4654 continue;
4655
4656 if (!cheaper_cost_pair (cp, new_cp))
4657 continue;
4658
4659 new_cp = cp;
4660 }
4661 }
4662 else
4663 {
4664 EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
4665 {
4666 if (ci == cand->id)
4667 continue;
4668
4669 cnd = iv_cand (data, ci);
4670
4671 cp = get_use_iv_cost (data, use, cnd);
4672 if (!cp)
4673 continue;
4674 if (!iv_ca_has_deps (ivs, cp))
4675 continue;
4676
4677 if (!cheaper_cost_pair (cp, new_cp))
4678 continue;
4679
4680 new_cp = cp;
4681 }
4682 }
4683
4684 if (!new_cp)
4685 {
4686 iv_ca_delta_free (delta);
3c2818b0 4687 return infinite_cost;
00991688 4688 }
4689
4690 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4691 }
4692
4693 iv_ca_delta_commit (data, ivs, *delta, true);
4694 cost = iv_ca_cost (ivs);
4695 iv_ca_delta_commit (data, ivs, *delta, false);
4696
4697 return cost;
dec41e98 4698}
4699
13ea2c1f 4700/* Try optimizing the set of candidates IVS by removing candidates different
4701 from to EXCEPT_CAND from it. Return cost of the new set, and store
4702 differences in DELTA. */
4703
3c2818b0 4704static comp_cost
13ea2c1f 4705iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
4706 struct iv_cand *except_cand, struct iv_ca_delta **delta)
4707{
4708 bitmap_iterator bi;
4709 struct iv_ca_delta *act_delta, *best_delta;
3c2818b0 4710 unsigned i;
4711 comp_cost best_cost, acost;
13ea2c1f 4712 struct iv_cand *cand;
4713
4714 best_delta = NULL;
4715 best_cost = iv_ca_cost (ivs);
4716
4717 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4718 {
4719 cand = iv_cand (data, i);
4720
4721 if (cand == except_cand)
4722 continue;
4723
4724 acost = iv_ca_narrow (data, ivs, cand, &act_delta);
4725
3c2818b0 4726 if (compare_costs (acost, best_cost) < 0)
13ea2c1f 4727 {
4728 best_cost = acost;
4729 iv_ca_delta_free (&best_delta);
4730 best_delta = act_delta;
4731 }
4732 else
4733 iv_ca_delta_free (&act_delta);
4734 }
4735
4736 if (!best_delta)
4737 {
4738 *delta = NULL;
4739 return best_cost;
4740 }
4741
4742 /* Recurse to possibly remove other unnecessary ivs. */
4743 iv_ca_delta_commit (data, ivs, best_delta, true);
4744 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
4745 iv_ca_delta_commit (data, ivs, best_delta, false);
4746 *delta = iv_ca_delta_join (best_delta, *delta);
4747 return best_cost;
4748}
4749
00991688 4750/* Tries to extend the sets IVS in the best possible way in order
dec41e98 4751 to express the USE. */
4752
4753static bool
00991688 4754try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
dec41e98 4755 struct iv_use *use)
4756{
3c2818b0 4757 comp_cost best_cost, act_cost;
dec41e98 4758 unsigned i;
dede8dcc 4759 bitmap_iterator bi;
4760 struct iv_cand *cand;
00991688 4761 struct iv_ca_delta *best_delta = NULL, *act_delta;
4762 struct cost_pair *cp;
4763
4764 iv_ca_add_use (data, ivs, use);
4765 best_cost = iv_ca_cost (ivs);
dec41e98 4766
00991688 4767 cp = iv_ca_cand_for_use (ivs, use);
4768 if (cp)
4769 {
4770 best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
4771 iv_ca_set_no_cp (data, ivs, use);
4772 }
dec41e98 4773
3c2818b0 4774 /* First try important candidates not based on any memory object. Only if
4775 this fails, try the specific ones. Rationale -- in loops with many
4776 variables the best choice often is to use just one generic biv. If we
4777 added here many ivs specific to the uses, the optimization algorithm later
4778 would be likely to get stuck in a local minimum, thus causing us to create
4779 too many ivs. The approach from few ivs to more seems more likely to be
4780 successful -- starting from few ivs, replacing an expensive use by a
4781 specific iv should always be a win. */
dede8dcc 4782 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
dec41e98 4783 {
dede8dcc 4784 cand = iv_cand (data, i);
4785
3c2818b0 4786 if (cand->iv->base_object != NULL_TREE)
4787 continue;
4788
00991688 4789 if (iv_ca_cand_used_p (ivs, cand))
dec41e98 4790 continue;
4791
00991688 4792 cp = get_use_iv_cost (data, use, cand);
4793 if (!cp)
4794 continue;
4795
4796 iv_ca_set_cp (data, ivs, use, cp);
13ea2c1f 4797 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
00991688 4798 iv_ca_set_no_cp (data, ivs, use);
4799 act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
dec41e98 4800
3c2818b0 4801 if (compare_costs (act_cost, best_cost) < 0)
dec41e98 4802 {
4803 best_cost = act_cost;
00991688 4804
4805 iv_ca_delta_free (&best_delta);
4806 best_delta = act_delta;
dec41e98 4807 }
00991688 4808 else
4809 iv_ca_delta_free (&act_delta);
dec41e98 4810 }
4811
3c2818b0 4812 if (infinite_cost_p (best_cost))
dede8dcc 4813 {
4814 for (i = 0; i < use->n_map_members; i++)
4815 {
4816 cp = use->cost_map + i;
00991688 4817 cand = cp->cand;
4818 if (!cand)
dede8dcc 4819 continue;
4820
4821 /* Already tried this. */
3c2818b0 4822 if (cand->important && cand->iv->base_object == NULL_TREE)
00991688 4823 continue;
4824
4825 if (iv_ca_cand_used_p (ivs, cand))
dede8dcc 4826 continue;
4827
00991688 4828 act_delta = NULL;
4829 iv_ca_set_cp (data, ivs, use, cp);
13ea2c1f 4830 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
00991688 4831 iv_ca_set_no_cp (data, ivs, use);
4832 act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
4833 cp, act_delta);
dede8dcc 4834
3c2818b0 4835 if (compare_costs (act_cost, best_cost) < 0)
dede8dcc 4836 {
4837 best_cost = act_cost;
00991688 4838
4839 if (best_delta)
4840 iv_ca_delta_free (&best_delta);
4841 best_delta = act_delta;
dede8dcc 4842 }
00991688 4843 else
4844 iv_ca_delta_free (&act_delta);
dede8dcc 4845 }
4846 }
4847
00991688 4848 iv_ca_delta_commit (data, ivs, best_delta, true);
4849 iv_ca_delta_free (&best_delta);
dec41e98 4850
3c2818b0 4851 return !infinite_cost_p (best_cost);
dec41e98 4852}
4853
00991688 4854/* Finds an initial assignment of candidates to uses. */
dec41e98 4855
00991688 4856static struct iv_ca *
4857get_initial_solution (struct ivopts_data *data)
dec41e98 4858{
00991688 4859 struct iv_ca *ivs = iv_ca_new (data);
dec41e98 4860 unsigned i;
4861
4862 for (i = 0; i < n_iv_uses (data); i++)
00991688 4863 if (!try_add_cand_for (data, ivs, iv_use (data, i)))
4864 {
4865 iv_ca_free (&ivs);
4866 return NULL;
4867 }
dec41e98 4868
00991688 4869 return ivs;
dec41e98 4870}
4871
00991688 4872/* Tries to improve set of induction variables IVS. */
dec41e98 4873
4874static bool
00991688 4875try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
dec41e98 4876{
3c2818b0 4877 unsigned i, n_ivs;
4878 comp_cost acost, best_cost = iv_ca_cost (ivs);
13ea2c1f 4879 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
00991688 4880 struct iv_cand *cand;
dec41e98 4881
13ea2c1f 4882 /* Try extending the set of induction variables by one. */
dec41e98 4883 for (i = 0; i < n_iv_cands (data); i++)
4884 {
00991688 4885 cand = iv_cand (data, i);
4886
4887 if (iv_ca_cand_used_p (ivs, cand))
13ea2c1f 4888 continue;
4889
4890 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs);
4891 if (!act_delta)
4892 continue;
4893
4894 /* If we successfully added the candidate and the set is small enough,
4895 try optimizing it by removing other candidates. */
4896 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
4897 {
4898 iv_ca_delta_commit (data, ivs, act_delta, true);
4899 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
4900 iv_ca_delta_commit (data, ivs, act_delta, false);
4901 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
4902 }
dec41e98 4903
3c2818b0 4904 if (compare_costs (acost, best_cost) < 0)
dec41e98 4905 {
00991688 4906 best_cost = acost;
13ea2c1f 4907 iv_ca_delta_free (&best_delta);
00991688 4908 best_delta = act_delta;
dec41e98 4909 }
dec41e98 4910 else
00991688 4911 iv_ca_delta_free (&act_delta);
dec41e98 4912 }
4913
00991688 4914 if (!best_delta)
13ea2c1f 4915 {
4916 /* Try removing the candidates from the set instead. */
4917 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
4918
4919 /* Nothing more we can do. */
4920 if (!best_delta)
4921 return false;
4922 }
dec41e98 4923
00991688 4924 iv_ca_delta_commit (data, ivs, best_delta, true);
3c2818b0 4925 gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
00991688 4926 iv_ca_delta_free (&best_delta);
dec41e98 4927 return true;
4928}
4929
4930/* Attempts to find the optimal set of induction variables. We do simple
4931 greedy heuristic -- we try to replace at most one candidate in the selected
4932 solution and remove the unused ivs while this improves the cost. */
4933
00991688 4934static struct iv_ca *
dec41e98 4935find_optimal_iv_set (struct ivopts_data *data)
4936{
00991688 4937 unsigned i;
4938 struct iv_ca *set;
dec41e98 4939 struct iv_use *use;
4940
00991688 4941 /* Get the initial solution. */
4942 set = get_initial_solution (data);
4943 if (!set)
dec41e98 4944 {
4945 if (dump_file && (dump_flags & TDF_DETAILS))
4946 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
dec41e98 4947 return NULL;
4948 }
4949
4950 if (dump_file && (dump_flags & TDF_DETAILS))
4951 {
00991688 4952 fprintf (dump_file, "Initial set of candidates:\n");
4953 iv_ca_dump (data, dump_file, set);
dec41e98 4954 }
4955
00991688 4956 while (try_improve_iv_set (data, set))
dec41e98 4957 {
4958 if (dump_file && (dump_flags & TDF_DETAILS))
4959 {
00991688 4960 fprintf (dump_file, "Improved to:\n");
4961 iv_ca_dump (data, dump_file, set);
dec41e98 4962 }
4963 }
4964
4965 if (dump_file && (dump_flags & TDF_DETAILS))
3c2818b0 4966 {
4967 comp_cost cost = iv_ca_cost (set);
4968 fprintf (dump_file, "Final cost %d (complexity %d)\n\n", cost.cost, cost.complexity);
4969 }
dec41e98 4970
4971 for (i = 0; i < n_iv_uses (data); i++)
4972 {
4973 use = iv_use (data, i);
00991688 4974 use->selected = iv_ca_cand_for_use (set, use)->cand;
dec41e98 4975 }
4976
dec41e98 4977 return set;
4978}
4979
4980/* Creates a new induction variable corresponding to CAND. */
4981
4982static void
4983create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
4984{
75a70cf9 4985 gimple_stmt_iterator incr_pos;
dec41e98 4986 tree base;
4987 bool after = false;
4988
4989 if (!cand->iv)
4990 return;
4991
4992 switch (cand->pos)
4993 {
4994 case IP_NORMAL:
75a70cf9 4995 incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
dec41e98 4996 break;
4997
4998 case IP_END:
75a70cf9 4999 incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
dec41e98 5000 after = true;
5001 break;
5002
5003 case IP_ORIGINAL:
5004 /* Mark that the iv is preserved. */
5005 name_info (data, cand->var_before)->preserve_biv = true;
5006 name_info (data, cand->var_after)->preserve_biv = true;
5007
5008 /* Rewrite the increment so that it uses var_before directly. */
5009 find_interesting_uses_op (data, cand->var_after)->selected = cand;
5010
5011 return;
5012 }
5013
5014 gimple_add_tmp_var (cand->var_before);
987392e5 5015 add_referenced_var (cand->var_before);
dec41e98 5016
5017 base = unshare_expr (cand->iv->base);
5018
651874e1 5019 create_iv (base, unshare_expr (cand->iv->step),
5020 cand->var_before, data->current_loop,
dec41e98 5021 &incr_pos, after, &cand->var_before, &cand->var_after);
5022}
5023
5024/* Creates new induction variables described in SET. */
5025
5026static void
00991688 5027create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
dec41e98 5028{
5029 unsigned i;
5030 struct iv_cand *cand;
0cc4271a 5031 bitmap_iterator bi;
dec41e98 5032
00991688 5033 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
dec41e98 5034 {
5035 cand = iv_cand (data, i);
5036 create_new_iv (data, cand);
0cc4271a 5037 }
dec41e98 5038}
5039
255b6be7 5040/* Returns the phi-node in BB with result RESULT. */
5041
5042static gimple
5043get_phi_with_result (basic_block bb, tree result)
5044{
5045 gimple_stmt_iterator i = gsi_start_phis (bb);
5046
5047 for (; !gsi_end_p (i); gsi_next (&i))
5048 if (gimple_phi_result (gsi_stmt (i)) == result)
5049 return gsi_stmt (i);
5050
5051 gcc_unreachable ();
5052 return NULL;
5053}
5054
5055
dec41e98 5056/* Removes statement STMT (real or a phi node). If INCLUDING_DEFINED_NAME
5057 is true, remove also the ssa name defined by the statement. */
5058
5059static void
75a70cf9 5060remove_statement (gimple stmt, bool including_defined_name)
dec41e98 5061{
75a70cf9 5062 if (gimple_code (stmt) == GIMPLE_PHI)
255b6be7 5063 {
5064 gimple bb_phi = get_phi_with_result (gimple_bb (stmt),
5065 gimple_phi_result (stmt));
5066 gimple_stmt_iterator bsi = gsi_for_stmt (bb_phi);
5067 remove_phi_node (&bsi, including_defined_name);
5068 }
dec41e98 5069 else
5070 {
255b6be7 5071 gimple_stmt_iterator bsi = gsi_for_stmt (stmt);
75a70cf9 5072 gsi_remove (&bsi, true);
8d9f686b 5073 release_defs (stmt);
dec41e98 5074 }
5075}
5076
5077/* Rewrites USE (definition of iv used in a nonlinear expression)
5078 using candidate CAND. */
5079
5080static void
5081rewrite_use_nonlinear_expr (struct ivopts_data *data,
5082 struct iv_use *use, struct iv_cand *cand)
5083{
99e833c3 5084 tree comp;
75a70cf9 5085 tree op, tgt;
5086 gimple ass;
5087 gimple_stmt_iterator bsi;
99e833c3 5088
5089 /* An important special case -- if we are asked to express value of
5090 the original iv by itself, just exit; there is no need to
5091 introduce a new computation (that might also need casting the
5092 variable to unsigned and back). */
5093 if (cand->pos == IP_ORIGINAL
c0b52952 5094 && cand->incremented_at == use->stmt)
99e833c3 5095 {
c0b52952 5096 tree step, ctype, utype;
75a70cf9 5097 enum tree_code incr_code = PLUS_EXPR, old_code;
c0b52952 5098
75a70cf9 5099 gcc_assert (is_gimple_assign (use->stmt));
5100 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
c0b52952 5101
5102 step = cand->iv->step;
5103 ctype = TREE_TYPE (step);
5104 utype = TREE_TYPE (cand->var_after);
5105 if (TREE_CODE (step) == NEGATE_EXPR)
5106 {
5107 incr_code = MINUS_EXPR;
5108 step = TREE_OPERAND (step, 0);
5109 }
5110
5111 /* Check whether we may leave the computation unchanged.
5112 This is the case only if it does not rely on other
5113 computations in the loop -- otherwise, the computation
5114 we rely upon may be removed in remove_unused_ivs,
5115 thus leading to ICE. */
75a70cf9 5116 old_code = gimple_assign_rhs_code (use->stmt);
5117 if (old_code == PLUS_EXPR
5118 || old_code == MINUS_EXPR
5119 || old_code == POINTER_PLUS_EXPR)
c0b52952 5120 {
75a70cf9 5121 if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
5122 op = gimple_assign_rhs2 (use->stmt);
5123 else if (old_code != MINUS_EXPR
5124 && gimple_assign_rhs2 (use->stmt) == cand->var_before)
5125 op = gimple_assign_rhs1 (use->stmt);
c0b52952 5126 else
5127 op = NULL_TREE;
5128 }
5129 else
5130 op = NULL_TREE;
99e833c3 5131
c0b52952 5132 if (op
5133 && (TREE_CODE (op) == INTEGER_CST
5134 || operand_equal_p (op, step, 0)))
99e833c3 5135 return;
c0b52952 5136
5137 /* Otherwise, add the necessary computations to express
5138 the iv. */
5139 op = fold_convert (ctype, cand->var_before);
5140 comp = fold_convert (utype,
5141 build2 (incr_code, ctype, op,
5142 unshare_expr (step)));
99e833c3 5143 }
c0b52952 5144 else
d3610bea 5145 {
5146 comp = get_computation (data->current_loop, use, cand);
5147 gcc_assert (comp != NULL_TREE);
5148 }
99e833c3 5149
75a70cf9 5150 switch (gimple_code (use->stmt))
dec41e98 5151 {
75a70cf9 5152 case GIMPLE_PHI:
dec41e98 5153 tgt = PHI_RESULT (use->stmt);
5154
5155 /* If we should keep the biv, do not replace it. */
5156 if (name_info (data, tgt)->preserve_biv)
5157 return;
5158
75a70cf9 5159 bsi = gsi_after_labels (gimple_bb (use->stmt));
8c0963c4 5160 break;
5161
75a70cf9 5162 case GIMPLE_ASSIGN:
5163 tgt = gimple_assign_lhs (use->stmt);
5164 bsi = gsi_for_stmt (use->stmt);
8c0963c4 5165 break;
5166
5167 default:
5168 gcc_unreachable ();
dec41e98 5169 }
dec41e98 5170
75a70cf9 5171 op = force_gimple_operand_gsi (&bsi, comp, false, SSA_NAME_VAR (tgt),
5172 true, GSI_SAME_STMT);
dec41e98 5173
75a70cf9 5174 if (gimple_code (use->stmt) == GIMPLE_PHI)
dec41e98 5175 {
75a70cf9 5176 ass = gimple_build_assign (tgt, op);
5177 gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
dec41e98 5178 remove_statement (use->stmt, false);
dec41e98 5179 }
5180 else
75a70cf9 5181 {
5182 gimple_assign_set_rhs_from_tree (&bsi, op);
5183 use->stmt = gsi_stmt (bsi);
5184 }
dec41e98 5185}
5186
5187/* Replaces ssa name in index IDX by its basic variable. Callback for
5188 for_each_index. */
5189
5190static bool
a59824bb 5191idx_remove_ssa_names (tree base, tree *idx,
dec41e98 5192 void *data ATTRIBUTE_UNUSED)
5193{
a59824bb 5194 tree *op;
5195
dec41e98 5196 if (TREE_CODE (*idx) == SSA_NAME)
5197 *idx = SSA_NAME_VAR (*idx);
a59824bb 5198
5e19919e 5199 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
a59824bb 5200 {
5201 op = &TREE_OPERAND (base, 2);
5202 if (*op
5203 && TREE_CODE (*op) == SSA_NAME)
5204 *op = SSA_NAME_VAR (*op);
5205 op = &TREE_OPERAND (base, 3);
5206 if (*op
5207 && TREE_CODE (*op) == SSA_NAME)
5208 *op = SSA_NAME_VAR (*op);
5209 }
5210
dec41e98 5211 return true;
5212}
5213
5214/* Unshares REF and replaces ssa names inside it by their basic variables. */
5215
5216static tree
5217unshare_and_remove_ssa_names (tree ref)
5218{
5219 ref = unshare_expr (ref);
5220 for_each_index (&ref, idx_remove_ssa_names, NULL);
5221
5222 return ref;
5223}
5224
aed164c3 5225/* Copies the reference information from OLD_REF to NEW_REF. */
88dbf20f 5226
aed164c3 5227static void
5228copy_ref_info (tree new_ref, tree old_ref)
5229{
5230 if (TREE_CODE (old_ref) == TARGET_MEM_REF)
5231 copy_mem_ref_info (new_ref, old_ref);
5232 else
dd277d48 5233 TMR_ORIGINAL (new_ref) = unshare_and_remove_ssa_names (old_ref);
dec41e98 5234}
5235
5236/* Rewrites USE (address that is an iv) using candidate CAND. */
5237
5238static void
5239rewrite_use_address (struct ivopts_data *data,
5240 struct iv_use *use, struct iv_cand *cand)
5241{
d3610bea 5242 aff_tree aff;
75a70cf9 5243 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
aed164c3 5244 tree ref;
d3610bea 5245 bool ok;
dec41e98 5246
d3610bea 5247 ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
5248 gcc_assert (ok);
aed164c3 5249 unshare_aff_combination (&aff);
dec41e98 5250
f529eb25 5251 ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff, data->speed);
aed164c3 5252 copy_ref_info (ref, *use->op_p);
5253 *use->op_p = ref;
dec41e98 5254}
5255
5256/* Rewrites USE (the condition such that one of the arguments is an iv) using
5257 candidate CAND. */
5258
5259static void
5260rewrite_use_compare (struct ivopts_data *data,
5261 struct iv_use *use, struct iv_cand *cand)
5262{
02f8cd87 5263 tree comp, *var_p, op, bound;
75a70cf9 5264 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
dec41e98 5265 enum tree_code compare;
c4d3b428 5266 struct cost_pair *cp = get_use_iv_cost (data, use, cand);
02f8cd87 5267 bool ok;
5268
c4d3b428 5269 bound = cp->value;
5270 if (bound)
dec41e98 5271 {
34bbbd2e 5272 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
5273 tree var_type = TREE_TYPE (var);
b718fe63 5274 gimple_seq stmts;
34bbbd2e 5275
c4d3b428 5276 compare = iv_elimination_compare (data, use);
02f8cd87 5277 bound = unshare_expr (fold_convert (var_type, bound));
b718fe63 5278 op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
5279 if (stmts)
5280 gsi_insert_seq_on_edge_immediate (
5281 loop_preheader_edge (data->current_loop),
5282 stmts);
dec41e98 5283
75a70cf9 5284 gimple_cond_set_lhs (use->stmt, var);
5285 gimple_cond_set_code (use->stmt, compare);
5286 gimple_cond_set_rhs (use->stmt, op);
dec41e98 5287 return;
5288 }
5289
5290 /* The induction variable elimination failed; just express the original
5291 giv. */
aed164c3 5292 comp = get_computation (data->current_loop, use, cand);
d3610bea 5293 gcc_assert (comp != NULL_TREE);
dec41e98 5294
75a70cf9 5295 ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
02f8cd87 5296 gcc_assert (ok);
dec41e98 5297
75a70cf9 5298 *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
5299 true, GSI_SAME_STMT);
dec41e98 5300}
5301
dec41e98 5302/* Rewrites USE using candidate CAND. */
5303
5304static void
de6ed584 5305rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
dec41e98 5306{
de6ed584 5307 push_stmt_changes (&use->stmt);
5308
dec41e98 5309 switch (use->type)
5310 {
5311 case USE_NONLINEAR_EXPR:
5312 rewrite_use_nonlinear_expr (data, use, cand);
5313 break;
5314
dec41e98 5315 case USE_ADDRESS:
5316 rewrite_use_address (data, use, cand);
5317 break;
5318
5319 case USE_COMPARE:
5320 rewrite_use_compare (data, use, cand);
5321 break;
5322
5323 default:
8c0963c4 5324 gcc_unreachable ();
dec41e98 5325 }
de6ed584 5326
5327 pop_stmt_changes (&use->stmt);
dec41e98 5328}
5329
5330/* Rewrite the uses using the selected induction variables. */
5331
5332static void
5333rewrite_uses (struct ivopts_data *data)
5334{
5335 unsigned i;
5336 struct iv_cand *cand;
5337 struct iv_use *use;
5338
5339 for (i = 0; i < n_iv_uses (data); i++)
5340 {
5341 use = iv_use (data, i);
5342 cand = use->selected;
8c0963c4 5343 gcc_assert (cand);
dec41e98 5344
5345 rewrite_use (data, use, cand);
5346 }
5347}
5348
5349/* Removes the ivs that are not used after rewriting. */
5350
5351static void
5352remove_unused_ivs (struct ivopts_data *data)
5353{
5354 unsigned j;
0cc4271a 5355 bitmap_iterator bi;
dec41e98 5356
0cc4271a 5357 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
dec41e98 5358 {
5359 struct version_info *info;
5360
5361 info = ver_info (data, j);
5362 if (info->iv
7a973feb 5363 && !integer_zerop (info->iv->step)
dec41e98 5364 && !info->inv_id
5365 && !info->iv->have_use_for
5366 && !info->preserve_biv)
5367 remove_statement (SSA_NAME_DEF_STMT (info->iv->ssa_name), true);
0cc4271a 5368 }
dec41e98 5369}
5370
5371/* Frees data allocated by the optimization of a single loop. */
5372
5373static void
5374free_loop_data (struct ivopts_data *data)
5375{
5376 unsigned i, j;
0cc4271a 5377 bitmap_iterator bi;
7d18ea07 5378 tree obj;
dec41e98 5379
b30a8715 5380 if (data->niters)
5381 {
5382 pointer_map_destroy (data->niters);
5383 data->niters = NULL;
5384 }
b091dc59 5385
0cc4271a 5386 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
dec41e98 5387 {
5388 struct version_info *info;
5389
5390 info = ver_info (data, i);
5391 if (info->iv)
5392 free (info->iv);
5393 info->iv = NULL;
5394 info->has_nonlin_use = false;
5395 info->preserve_biv = false;
5396 info->inv_id = 0;
0cc4271a 5397 }
dec41e98 5398 bitmap_clear (data->relevant);
00991688 5399 bitmap_clear (data->important_candidates);
dec41e98 5400
5401 for (i = 0; i < n_iv_uses (data); i++)
5402 {
5403 struct iv_use *use = iv_use (data, i);
5404
5405 free (use->iv);
27335ffd 5406 BITMAP_FREE (use->related_cands);
dec41e98 5407 for (j = 0; j < use->n_map_members; j++)
5408 if (use->cost_map[j].depends_on)
27335ffd 5409 BITMAP_FREE (use->cost_map[j].depends_on);
dec41e98 5410 free (use->cost_map);
5411 free (use);
5412 }
7d18ea07 5413 VEC_truncate (iv_use_p, data->iv_uses, 0);
dec41e98 5414
5415 for (i = 0; i < n_iv_cands (data); i++)
5416 {
5417 struct iv_cand *cand = iv_cand (data, i);
5418
5419 if (cand->iv)
5420 free (cand->iv);
651874e1 5421 if (cand->depends_on)
5422 BITMAP_FREE (cand->depends_on);
dec41e98 5423 free (cand);
5424 }
7d18ea07 5425 VEC_truncate (iv_cand_p, data->iv_candidates, 0);
dec41e98 5426
5427 if (data->version_info_size < num_ssa_names)
5428 {
5429 data->version_info_size = 2 * num_ssa_names;
5430 free (data->version_info);
4c36ffe6 5431 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
dec41e98 5432 }
5433
5434 data->max_inv_id = 0;
5435
7d18ea07 5436 for (i = 0; VEC_iterate (tree, decl_rtl_to_reset, i, obj); i++)
5437 SET_DECL_RTL (obj, NULL_RTX);
dec41e98 5438
7d18ea07 5439 VEC_truncate (tree, decl_rtl_to_reset, 0);
dec41e98 5440}
5441
5442/* Finalizes data structures used by the iv optimization pass. LOOPS is the
5443 loop tree. */
5444
5445static void
fe382241 5446tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
dec41e98 5447{
dec41e98 5448 free_loop_data (data);
5449 free (data->version_info);
27335ffd 5450 BITMAP_FREE (data->relevant);
5451 BITMAP_FREE (data->important_candidates);
dec41e98 5452
7d18ea07 5453 VEC_free (tree, heap, decl_rtl_to_reset);
5454 VEC_free (iv_use_p, heap, data->iv_uses);
5455 VEC_free (iv_cand_p, heap, data->iv_candidates);
dec41e98 5456}
5457
5458/* Optimizes the LOOP. Returns true if anything changed. */
5459
5460static bool
5461tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
5462{
5463 bool changed = false;
00991688 5464 struct iv_ca *iv_ca;
dec41e98 5465 edge exit;
5466
b30a8715 5467 gcc_assert (!data->niters);
dec41e98 5468 data->current_loop = loop;
f529eb25 5469 data->speed = optimize_loop_for_speed_p (loop);
dec41e98 5470
5471 if (dump_file && (dump_flags & TDF_DETAILS))
5472 {
5473 fprintf (dump_file, "Processing loop %d\n", loop->num);
5474
5475 exit = single_dom_exit (loop);
5476 if (exit)
5477 {
5478 fprintf (dump_file, " single exit %d -> %d, exit condition ",
5479 exit->src->index, exit->dest->index);
75a70cf9 5480 print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
dec41e98 5481 fprintf (dump_file, "\n");
5482 }
5483
5484 fprintf (dump_file, "\n");
5485 }
5486
5487 /* For each ssa name determines whether it behaves as an induction variable
5488 in some loop. */
5489 if (!find_induction_variables (data))
5490 goto finish;
5491
5492 /* Finds interesting uses (item 1). */
5493 find_interesting_uses (data);
5494 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
5495 goto finish;
5496
5497 /* Finds candidates for the induction variables (item 2). */
5498 find_iv_candidates (data);
5499
5500 /* Calculates the costs (item 3, part 1). */
5501 determine_use_iv_costs (data);
5502 determine_iv_costs (data);
5503 determine_set_costs (data);
5504
5505 /* Find the optimal set of induction variables (item 3, part 2). */
00991688 5506 iv_ca = find_optimal_iv_set (data);
5507 if (!iv_ca)
dec41e98 5508 goto finish;
5509 changed = true;
5510
5511 /* Create the new induction variables (item 4, part 1). */
00991688 5512 create_new_ivs (data, iv_ca);
5513 iv_ca_free (&iv_ca);
dec41e98 5514
5515 /* Rewrite the uses (item 4, part 2). */
5516 rewrite_uses (data);
5517
5518 /* Remove the ivs that are unused after rewriting. */
5519 remove_unused_ivs (data);
5520
dec41e98 5521 /* We have changed the structure of induction variables; it might happen
5522 that definitions in the scev database refer to some of them that were
5523 eliminated. */
5524 scev_reset ();
5525
5526finish:
5527 free_loop_data (data);
5528
5529 return changed;
5530}
5531
7194de72 5532/* Main entry point. Optimizes induction variables in loops. */
dec41e98 5533
5534void
7194de72 5535tree_ssa_iv_optimize (void)
dec41e98 5536{
5537 struct loop *loop;
5538 struct ivopts_data data;
17519ba0 5539 loop_iterator li;
dec41e98 5540
fe382241 5541 tree_ssa_iv_optimize_init (&data);
dec41e98 5542
5543 /* Optimize the loops starting with the innermost ones. */
17519ba0 5544 FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
dec41e98 5545 {
abf7e53a 5546 if (dump_file && (dump_flags & TDF_DETAILS))
5547 flow_loop_dump (loop, dump_file, NULL, 1);
0e269eea 5548
5549 tree_ssa_iv_optimize_loop (&data, loop);
dec41e98 5550 }
5551
fe382241 5552 tree_ssa_iv_optimize_finalize (&data);
dec41e98 5553}