]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-ssa-loop-ivopts.c
dojump.h: New header file.
[thirdparty/gcc.git] / gcc / tree-ssa-loop-ivopts.c
CommitLineData
8b11a64c 1/* Induction variable optimizations.
5624e564 2 Copyright (C) 2003-2015 Free Software Foundation, Inc.
b8698a0f 3
8b11a64c 4This file is part of GCC.
b8698a0f 5
8b11a64c
ZD
6GCC is free software; you can redistribute it and/or modify it
7under the terms of the GNU General Public License as published by the
9dcd6f09 8Free Software Foundation; either version 3, or (at your option) any
8b11a64c 9later version.
b8698a0f 10
8b11a64c
ZD
11GCC is distributed in the hope that it will be useful, but WITHOUT
12ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14for more details.
b8698a0f 15
8b11a64c 16You should have received a copy of the GNU General Public License
9dcd6f09
NC
17along with GCC; see the file COPYING3. If not see
18<http://www.gnu.org/licenses/>. */
8b11a64c
ZD
19
20/* This pass tries to find the optimal set of induction variables for the loop.
21 It optimizes just the basic linear induction variables (although adding
22 support for other types should not be too hard). It includes the
23 optimizations commonly known as strength reduction, induction variable
24 coalescing and induction variable elimination. It does it in the
25 following steps:
26
27 1) The interesting uses of induction variables are found. This includes
28
29 -- uses of induction variables in non-linear expressions
30 -- addresses of arrays
31 -- comparisons of induction variables
32
33 2) Candidates for the induction variables are found. This includes
34
35 -- old induction variables
36 -- the variables defined by expressions derived from the "interesting
37 uses" above
38
39 3) The optimal (w.r. to a cost function) set of variables is chosen. The
40 cost function assigns a cost to sets of induction variables and consists
41 of three parts:
42
43 -- The use costs. Each of the interesting uses chooses the best induction
44 variable in the set and adds its cost to the sum. The cost reflects
45 the time spent on modifying the induction variables value to be usable
46 for the given purpose (adding base and offset for arrays, etc.).
47 -- The variable costs. Each of the variables has a cost assigned that
48 reflects the costs associated with incrementing the value of the
49 variable. The original variables are somewhat preferred.
50 -- The set cost. Depending on the size of the set, extra cost may be
51 added to reflect register pressure.
52
53 All the costs are defined in a machine-specific way, using the target
54 hooks and machine descriptions to determine them.
55
56 4) The trees are transformed to use the new variables, the dead code is
57 removed.
b8698a0f 58
8b11a64c
ZD
59 All of this is done loop by loop. Doing it globally is theoretically
60 possible, it might give a better performance and it might enable us
61 to decide costs more precisely, but getting all the interactions right
62 would be complicated. */
63
64#include "config.h"
65#include "system.h"
66#include "coretypes.h"
67#include "tm.h"
40e23961
MC
68#include "hash-set.h"
69#include "machmode.h"
70#include "vec.h"
71#include "double-int.h"
72#include "input.h"
73#include "alias.h"
74#include "symtab.h"
75#include "wide-int.h"
76#include "inchash.h"
8b11a64c 77#include "tree.h"
40e23961 78#include "fold-const.h"
d8a2d370 79#include "stor-layout.h"
8b11a64c 80#include "tm_p.h"
60393bbc 81#include "predict.h"
60393bbc 82#include "hard-reg-set.h"
60393bbc
AM
83#include "function.h"
84#include "dominance.h"
85#include "cfg.h"
8b11a64c 86#include "basic-block.h"
cf835838 87#include "gimple-pretty-print.h"
b787e7a2 88#include "hash-map.h"
2fb9a547
AM
89#include "hash-table.h"
90#include "tree-ssa-alias.h"
91#include "internal-fn.h"
92#include "tree-eh.h"
93#include "gimple-expr.h"
94#include "is-a.h"
18f429e2 95#include "gimple.h"
45b0be94 96#include "gimplify.h"
5be5c238 97#include "gimple-iterator.h"
18f429e2 98#include "gimplify-me.h"
442b4905 99#include "gimple-ssa.h"
c582198b
AM
100#include "plugin-api.h"
101#include "ipa-ref.h"
442b4905
AM
102#include "cgraph.h"
103#include "tree-cfg.h"
104#include "tree-phinodes.h"
105#include "ssa-iterators.h"
d8a2d370 106#include "stringpool.h"
442b4905 107#include "tree-ssanames.h"
e28030cf
AM
108#include "tree-ssa-loop-ivopts.h"
109#include "tree-ssa-loop-manip.h"
110#include "tree-ssa-loop-niter.h"
442b4905 111#include "tree-ssa-loop.h"
36566b39
PK
112#include "hashtab.h"
113#include "rtl.h"
114#include "flags.h"
115#include "statistics.h"
116#include "real.h"
117#include "fixed-value.h"
118#include "insn-config.h"
119#include "expmed.h"
120#include "dojump.h"
121#include "explow.h"
122#include "calls.h"
123#include "emit-rtl.h"
124#include "varasm.h"
125#include "stmt.h"
d8a2d370 126#include "expr.h"
442b4905 127#include "tree-dfa.h"
7a300452 128#include "tree-ssa.h"
8b11a64c 129#include "cfgloop.h"
8b11a64c 130#include "tree-pass.h"
8b11a64c
ZD
131#include "tree-chrec.h"
132#include "tree-scalar-evolution.h"
8b11a64c 133#include "params.h"
39b4020c 134#include "langhooks.h"
73f30c63 135#include "tree-affine.h"
8318b0d9 136#include "target.h"
18081149 137#include "tree-inline.h"
17fc049f 138#include "tree-ssa-propagate.h"
4484a35a 139#include "tree-ssa-address.h"
9b2b7279 140#include "builtins.h"
7735d6c7 141
2eb79bbb
SB
142/* FIXME: Expressions are expanded to RTL in this pass to determine the
143 cost of different addressing modes. This should be moved to a TBD
144 interface between the GIMPLE and RTL worlds. */
1c1ad7bb 145#include "recog.h"
2eb79bbb 146
8b11a64c
ZD
147/* The infinite cost. */
148#define INFTY 10000000
149
8b11a64c
ZD
150#define AVG_LOOP_NITER(LOOP) 5
151
18081149
XDL
152/* Returns the expected number of loop iterations for LOOP.
153 The average trip count is computed from profile data if it
154 exists. */
155
156static inline HOST_WIDE_INT
157avg_loop_niter (struct loop *loop)
158{
652c4c71 159 HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
18081149
XDL
160 if (niter == -1)
161 return AVG_LOOP_NITER (loop);
162
163 return niter;
164}
8b11a64c
ZD
165
166/* Representation of the induction variable. */
167struct iv
168{
169 tree base; /* Initial value of the iv. */
e6845c23 170 tree base_object; /* A memory object to that the induction variable points. */
8b11a64c
ZD
171 tree step; /* Step of the iv (constant only). */
172 tree ssa_name; /* The ssa name with the value. */
173 bool biv_p; /* Is it a biv? */
174 bool have_use_for; /* Do we already have a use for it? */
175 unsigned use_id; /* The identifier in the use if it is the case. */
176};
177
178/* Per-ssa version information (induction variable descriptions, etc.). */
179struct version_info
180{
181 tree name; /* The ssa name. */
182 struct iv *iv; /* Induction variable description. */
183 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
184 an expression that is not an induction variable. */
8b11a64c 185 bool preserve_biv; /* For the original biv, whether to preserve it. */
448f65db 186 unsigned inv_id; /* Id of an invariant. */
8b11a64c
ZD
187};
188
8b11a64c
ZD
189/* Types of uses. */
190enum use_type
191{
192 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
8b11a64c
ZD
193 USE_ADDRESS, /* Use in an address. */
194 USE_COMPARE /* Use is a compare. */
195};
196
6e8c65f6
ZD
197/* Cost of a computation. */
198typedef struct
199{
2c08497a 200 int cost; /* The runtime cost. */
6e8c65f6
ZD
201 unsigned complexity; /* The estimate of the complexity of the code for
202 the computation (in no concrete units --
203 complexity field should be larger for more
204 complex expressions and addressing modes). */
205} comp_cost;
206
7735d6c7 207static const comp_cost no_cost = {0, 0};
6e8c65f6
ZD
208static const comp_cost infinite_cost = {INFTY, INFTY};
209
8b11a64c
ZD
210/* The candidate - cost pair. */
211struct cost_pair
212{
213 struct iv_cand *cand; /* The candidate. */
6e8c65f6 214 comp_cost cost; /* The cost. */
8b11a64c
ZD
215 bitmap depends_on; /* The list of invariants that have to be
216 preserved. */
f5f12961
ZD
217 tree value; /* For final value elimination, the expression for
218 the final value of the iv. For iv elimination,
219 the new bound to compare with. */
d8af4ba3 220 enum tree_code comp; /* For iv elimination, the comparison. */
18081149 221 int inv_expr_id; /* Loop invariant expression id. */
8b11a64c
ZD
222};
223
224/* Use. */
225struct iv_use
226{
227 unsigned id; /* The id of the use. */
228 enum use_type type; /* Type of the use. */
229 struct iv *iv; /* The induction variable it is based on. */
726a989a 230 gimple stmt; /* Statement in that it occurs. */
8b11a64c 231 tree *op_p; /* The place where it occurs. */
b1b02be2
ZD
232 bitmap related_cands; /* The set of "related" iv candidates, plus the common
233 important ones. */
8b11a64c
ZD
234
235 unsigned n_map_members; /* Number of candidates in the cost_map list. */
236 struct cost_pair *cost_map;
237 /* The costs wrto the iv candidates. */
238
239 struct iv_cand *selected;
240 /* The selected candidate. */
241};
242
243/* The position where the iv is computed. */
244enum iv_position
245{
246 IP_NORMAL, /* At the end, just before the exit condition. */
247 IP_END, /* At the end of the latch block. */
2c08497a
BS
248 IP_BEFORE_USE, /* Immediately before a specific use. */
249 IP_AFTER_USE, /* Immediately after a specific use. */
8b11a64c
ZD
250 IP_ORIGINAL /* The original biv. */
251};
252
253/* The induction variable candidate. */
254struct iv_cand
255{
256 unsigned id; /* The number of the candidate. */
257 bool important; /* Whether this is an "important" candidate, i.e. such
258 that it should be considered by all uses. */
448f65db 259 ENUM_BITFIELD(iv_position) pos : 8; /* Where it is computed. */
726a989a 260 gimple incremented_at;/* For original biv, the statement where it is
8b11a64c
ZD
261 incremented. */
262 tree var_before; /* The variable used for it before increment. */
263 tree var_after; /* The variable used for it after increment. */
264 struct iv *iv; /* The value of the candidate. NULL for
265 "pseudocandidate" used to indicate the possibility
266 to replace the final value of an iv by direct
267 computation of the value. */
268 unsigned cost; /* Cost of the candidate. */
2c08497a
BS
269 unsigned cost_step; /* Cost of the candidate's increment operation. */
270 struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
271 where it is incremented. */
9be872b7
ZD
272 bitmap depends_on; /* The list of invariants that are used in step of the
273 biv. */
8b11a64c
ZD
274};
275
18081149
XDL
276/* Loop invariant expression hashtable entry. */
277struct iv_inv_expr_ent
278{
279 tree expr;
280 int id;
281 hashval_t hash;
282};
283
8b11a64c
ZD
284/* The data used by the induction variable optimizations. */
285
69ebd99d 286typedef struct iv_use *iv_use_p;
69ebd99d
KH
287
288typedef struct iv_cand *iv_cand_p;
69ebd99d 289
4a8fb1a1
LC
290/* Hashtable helpers. */
291
292struct iv_inv_expr_hasher : typed_free_remove <iv_inv_expr_ent>
293{
294 typedef iv_inv_expr_ent value_type;
295 typedef iv_inv_expr_ent compare_type;
296 static inline hashval_t hash (const value_type *);
297 static inline bool equal (const value_type *, const compare_type *);
298};
299
300/* Hash function for loop invariant expressions. */
301
302inline hashval_t
303iv_inv_expr_hasher::hash (const value_type *expr)
304{
305 return expr->hash;
306}
307
308/* Hash table equality function for expressions. */
309
310inline bool
311iv_inv_expr_hasher::equal (const value_type *expr1, const compare_type *expr2)
312{
313 return expr1->hash == expr2->hash
314 && operand_equal_p (expr1->expr, expr2->expr, 0);
315}
316
8b11a64c
ZD
317struct ivopts_data
318{
319 /* The currently optimized loop. */
320 struct loop *current_loop;
321
8f5929e1 322 /* Numbers of iterations for all exits of the current loop. */
b787e7a2 323 hash_map<edge, tree_niter_desc *> *niters;
f40751dd 324
9a2ef6b8
ZD
325 /* Number of registers used in it. */
326 unsigned regs_used;
327
8b11a64c
ZD
328 /* The size of version_info array allocated. */
329 unsigned version_info_size;
330
331 /* The array of information for the ssa names. */
332 struct version_info *version_info;
333
18081149
XDL
334 /* The hashtable of loop invariant expressions created
335 by ivopt. */
c203e8a7 336 hash_table<iv_inv_expr_hasher> *inv_expr_tab;
18081149
XDL
337
338 /* Loop invariant expression id. */
339 int inv_expr_id;
340
8b11a64c
ZD
341 /* The bitmap of indices in version_info whose value was changed. */
342 bitmap relevant;
343
8b11a64c 344 /* The uses of induction variables. */
9771b263 345 vec<iv_use_p> iv_uses;
8b11a64c
ZD
346
347 /* The candidates. */
9771b263 348 vec<iv_cand_p> iv_candidates;
8b11a64c 349
80cad5fa
ZD
350 /* A bitmap of important candidates. */
351 bitmap important_candidates;
352
3230c614
BC
353 /* Cache used by tree_to_aff_combination_expand. */
354 hash_map<tree, name_expansion *> *name_expansion_cache;
355
8f5929e1
JJ
356 /* The maximum invariant id. */
357 unsigned max_inv_id;
358
8b11a64c
ZD
359 /* Whether to consider just related and important candidates when replacing a
360 use. */
361 bool consider_all_candidates;
8f5929e1
JJ
362
363 /* Are we optimizing for speed? */
364 bool speed;
bec922f0
SL
365
366 /* Whether the loop body includes any function calls. */
367 bool body_includes_call;
d8af4ba3
ZD
368
369 /* Whether the loop body can only be exited via single exit. */
370 bool loop_single_exit_p;
8b11a64c
ZD
371};
372
b1b02be2
ZD
373/* An assignment of iv candidates to uses. */
374
375struct iv_ca
376{
377 /* The number of uses covered by the assignment. */
378 unsigned upto;
379
380 /* Number of uses that cannot be expressed by the candidates in the set. */
381 unsigned bad_uses;
382
383 /* Candidate assigned to a use, together with the related costs. */
384 struct cost_pair **cand_for_use;
385
386 /* Number of times each candidate is used. */
387 unsigned *n_cand_uses;
388
389 /* The candidates used. */
390 bitmap cands;
391
36f5ada1
ZD
392 /* The number of candidates in the set. */
393 unsigned n_cands;
394
b1b02be2
ZD
395 /* Total number of registers needed. */
396 unsigned n_regs;
397
398 /* Total cost of expressing uses. */
6e8c65f6 399 comp_cost cand_use_cost;
b1b02be2
ZD
400
401 /* Total cost of candidates. */
402 unsigned cand_cost;
403
404 /* Number of times each invariant is used. */
405 unsigned *n_invariant_uses;
406
f06e400f
XDL
407 /* The array holding the number of uses of each loop
408 invariant expressions created by ivopt. */
409 unsigned *used_inv_expr;
410
411 /* The number of created loop invariants. */
412 unsigned num_used_inv_expr;
413
b1b02be2 414 /* Total cost of the assignment. */
6e8c65f6 415 comp_cost cost;
b1b02be2
ZD
416};
417
418/* Difference of two iv candidate assignments. */
419
420struct iv_ca_delta
421{
422 /* Changed use. */
423 struct iv_use *use;
424
425 /* An old assignment (for rollback purposes). */
426 struct cost_pair *old_cp;
427
428 /* A new assignment. */
429 struct cost_pair *new_cp;
430
431 /* Next change in the list. */
432 struct iv_ca_delta *next_change;
433};
434
8b11a64c
ZD
435/* Bound on number of candidates below that all candidates are considered. */
436
437#define CONSIDER_ALL_CANDIDATES_BOUND \
438 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
439
2a7e31df 440/* If there are more iv occurrences, we just give up (it is quite unlikely that
8b11a64c
ZD
441 optimizing such a loop would help, and it would take ages). */
442
443#define MAX_CONSIDERED_USES \
444 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
445
36f5ada1
ZD
446/* If there are at most this number of ivs in the set, try removing unnecessary
447 ivs from the set always. */
448
449#define ALWAYS_PRUNE_CAND_SET_BOUND \
450 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
451
8b11a64c
ZD
452/* The list of trees for that the decl_rtl field must be reset is stored
453 here. */
454
9771b263 455static vec<tree> decl_rtl_to_reset;
8b11a64c 456
e6450c11
TV
457static comp_cost force_expr_to_var_cost (tree, bool);
458
8b11a64c
ZD
459/* Number of uses recorded in DATA. */
460
461static inline unsigned
462n_iv_uses (struct ivopts_data *data)
463{
9771b263 464 return data->iv_uses.length ();
8b11a64c
ZD
465}
466
467/* Ith use recorded in DATA. */
468
469static inline struct iv_use *
470iv_use (struct ivopts_data *data, unsigned i)
471{
9771b263 472 return data->iv_uses[i];
8b11a64c
ZD
473}
474
475/* Number of candidates recorded in DATA. */
476
477static inline unsigned
478n_iv_cands (struct ivopts_data *data)
479{
9771b263 480 return data->iv_candidates.length ();
8b11a64c
ZD
481}
482
483/* Ith candidate recorded in DATA. */
484
485static inline struct iv_cand *
486iv_cand (struct ivopts_data *data, unsigned i)
487{
9771b263 488 return data->iv_candidates[i];
8b11a64c
ZD
489}
490
8b11a64c
ZD
491/* The single loop exit if it dominates the latch, NULL otherwise. */
492
b7eae7b8 493edge
8b11a64c
ZD
494single_dom_exit (struct loop *loop)
495{
ac8f6c69 496 edge exit = single_exit (loop);
8b11a64c
ZD
497
498 if (!exit)
499 return NULL;
500
501 if (!just_once_each_iteration_p (loop, exit->src))
502 return NULL;
503
504 return exit;
505}
506
507/* Dumps information about the induction variable IV to FILE. */
508
8b11a64c
ZD
509void
510dump_iv (FILE *file, struct iv *iv)
511{
e6845c23
ZD
512 if (iv->ssa_name)
513 {
514 fprintf (file, "ssa name ");
515 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
516 fprintf (file, "\n");
517 }
8b11a64c 518
2f4675b4
ZD
519 fprintf (file, " type ");
520 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
521 fprintf (file, "\n");
522
8b11a64c
ZD
523 if (iv->step)
524 {
525 fprintf (file, " base ");
526 print_generic_expr (file, iv->base, TDF_SLIM);
527 fprintf (file, "\n");
528
529 fprintf (file, " step ");
530 print_generic_expr (file, iv->step, TDF_SLIM);
531 fprintf (file, "\n");
532 }
533 else
534 {
535 fprintf (file, " invariant ");
536 print_generic_expr (file, iv->base, TDF_SLIM);
537 fprintf (file, "\n");
538 }
539
e6845c23
ZD
540 if (iv->base_object)
541 {
542 fprintf (file, " base object ");
543 print_generic_expr (file, iv->base_object, TDF_SLIM);
544 fprintf (file, "\n");
545 }
546
8b11a64c
ZD
547 if (iv->biv_p)
548 fprintf (file, " is a biv\n");
549}
550
551/* Dumps information about the USE to FILE. */
552
8b11a64c
ZD
553void
554dump_use (FILE *file, struct iv_use *use)
555{
8b11a64c
ZD
556 fprintf (file, "use %d\n", use->id);
557
558 switch (use->type)
559 {
560 case USE_NONLINEAR_EXPR:
561 fprintf (file, " generic\n");
562 break;
563
8b11a64c
ZD
564 case USE_ADDRESS:
565 fprintf (file, " address\n");
566 break;
567
568 case USE_COMPARE:
569 fprintf (file, " compare\n");
570 break;
571
572 default:
1e128c5f 573 gcc_unreachable ();
8b11a64c
ZD
574 }
575
2f4675b4 576 fprintf (file, " in statement ");
726a989a 577 print_gimple_stmt (file, use->stmt, 0, 0);
2f4675b4
ZD
578 fprintf (file, "\n");
579
580 fprintf (file, " at position ");
581 if (use->op_p)
582 print_generic_expr (file, *use->op_p, TDF_SLIM);
583 fprintf (file, "\n");
584
e6845c23 585 dump_iv (file, use->iv);
2f4675b4 586
eec5fec9
ZD
587 if (use->related_cands)
588 {
589 fprintf (file, " related candidates ");
590 dump_bitmap (file, use->related_cands);
591 }
8b11a64c
ZD
592}
593
594/* Dumps information about the uses to FILE. */
595
8b11a64c
ZD
596void
597dump_uses (FILE *file, struct ivopts_data *data)
598{
599 unsigned i;
600 struct iv_use *use;
601
602 for (i = 0; i < n_iv_uses (data); i++)
603 {
604 use = iv_use (data, i);
605
606 dump_use (file, use);
607 fprintf (file, "\n");
608 }
609}
610
611/* Dumps information about induction variable candidate CAND to FILE. */
612
8b11a64c
ZD
613void
614dump_cand (FILE *file, struct iv_cand *cand)
615{
616 struct iv *iv = cand->iv;
617
618 fprintf (file, "candidate %d%s\n",
619 cand->id, cand->important ? " (important)" : "");
620
9be872b7
ZD
621 if (cand->depends_on)
622 {
623 fprintf (file, " depends on ");
624 dump_bitmap (file, cand->depends_on);
625 }
626
8b11a64c
ZD
627 if (!iv)
628 {
629 fprintf (file, " final value replacement\n");
630 return;
631 }
632
18081149
XDL
633 if (cand->var_before)
634 {
635 fprintf (file, " var_before ");
636 print_generic_expr (file, cand->var_before, TDF_SLIM);
637 fprintf (file, "\n");
638 }
639 if (cand->var_after)
640 {
641 fprintf (file, " var_after ");
642 print_generic_expr (file, cand->var_after, TDF_SLIM);
643 fprintf (file, "\n");
644 }
645
8b11a64c
ZD
646 switch (cand->pos)
647 {
648 case IP_NORMAL:
649 fprintf (file, " incremented before exit test\n");
650 break;
651
2c08497a
BS
652 case IP_BEFORE_USE:
653 fprintf (file, " incremented before use %d\n", cand->ainc_use->id);
654 break;
655
656 case IP_AFTER_USE:
657 fprintf (file, " incremented after use %d\n", cand->ainc_use->id);
658 break;
659
8b11a64c
ZD
660 case IP_END:
661 fprintf (file, " incremented at end\n");
662 break;
663
664 case IP_ORIGINAL:
665 fprintf (file, " original biv\n");
666 break;
667 }
668
e6845c23 669 dump_iv (file, iv);
8b11a64c
ZD
670}
671
672/* Returns the info for ssa version VER. */
673
674static inline struct version_info *
675ver_info (struct ivopts_data *data, unsigned ver)
676{
677 return data->version_info + ver;
678}
679
680/* Returns the info for ssa name NAME. */
681
682static inline struct version_info *
683name_info (struct ivopts_data *data, tree name)
684{
685 return ver_info (data, SSA_NAME_VERSION (name));
686}
687
8b11a64c
ZD
688/* Returns true if STMT is after the place where the IP_NORMAL ivs will be
689 emitted in LOOP. */
690
691static bool
726a989a 692stmt_after_ip_normal_pos (struct loop *loop, gimple stmt)
8b11a64c 693{
726a989a 694 basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
8b11a64c 695
1e128c5f 696 gcc_assert (bb);
8b11a64c
ZD
697
698 if (sbb == loop->latch)
699 return true;
700
701 if (sbb != bb)
702 return false;
703
704 return stmt == last_stmt (bb);
705}
706
707/* Returns true if STMT if after the place where the original induction
2c08497a
BS
708 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
709 if the positions are identical. */
8b11a64c
ZD
710
711static bool
2c08497a 712stmt_after_inc_pos (struct iv_cand *cand, gimple stmt, bool true_if_equal)
8b11a64c 713{
726a989a
RB
714 basic_block cand_bb = gimple_bb (cand->incremented_at);
715 basic_block stmt_bb = gimple_bb (stmt);
8b11a64c
ZD
716
717 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
718 return false;
719
720 if (stmt_bb != cand_bb)
721 return true;
722
2c08497a
BS
723 if (true_if_equal
724 && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
725 return true;
726 return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
8b11a64c
ZD
727}
728
729/* Returns true if STMT if after the place where the induction variable
730 CAND is incremented in LOOP. */
731
732static bool
726a989a 733stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple stmt)
8b11a64c
ZD
734{
735 switch (cand->pos)
736 {
737 case IP_END:
738 return false;
739
740 case IP_NORMAL:
741 return stmt_after_ip_normal_pos (loop, stmt);
742
743 case IP_ORIGINAL:
2c08497a
BS
744 case IP_AFTER_USE:
745 return stmt_after_inc_pos (cand, stmt, false);
746
747 case IP_BEFORE_USE:
748 return stmt_after_inc_pos (cand, stmt, true);
8b11a64c
ZD
749
750 default:
1e128c5f 751 gcc_unreachable ();
8b11a64c
ZD
752 }
753}
754
dcccd88d
ZD
755/* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
756
757static bool
758abnormal_ssa_name_p (tree exp)
759{
760 if (!exp)
761 return false;
762
763 if (TREE_CODE (exp) != SSA_NAME)
764 return false;
765
766 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
767}
768
769/* Returns false if BASE or INDEX contains a ssa name that occurs in an
770 abnormal phi node. Callback for for_each_index. */
771
772static bool
773idx_contains_abnormal_ssa_name_p (tree base, tree *index,
774 void *data ATTRIBUTE_UNUSED)
775{
9f7ccf69 776 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
dcccd88d
ZD
777 {
778 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
779 return false;
780 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
781 return false;
782 }
783
784 return !abnormal_ssa_name_p (*index);
785}
786
787/* Returns true if EXPR contains a ssa name that occurs in an
788 abnormal phi node. */
789
e5db3515 790bool
dcccd88d
ZD
791contains_abnormal_ssa_name_p (tree expr)
792{
793 enum tree_code code;
c22940cd 794 enum tree_code_class codeclass;
dcccd88d
ZD
795
796 if (!expr)
797 return false;
798
799 code = TREE_CODE (expr);
c22940cd 800 codeclass = TREE_CODE_CLASS (code);
dcccd88d
ZD
801
802 if (code == SSA_NAME)
803 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
804
805 if (code == INTEGER_CST
806 || is_gimple_min_invariant (expr))
807 return false;
808
809 if (code == ADDR_EXPR)
810 return !for_each_index (&TREE_OPERAND (expr, 0),
811 idx_contains_abnormal_ssa_name_p,
812 NULL);
813
0a74c758
SP
814 if (code == COND_EXPR)
815 return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
816 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
817 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
818
c22940cd 819 switch (codeclass)
dcccd88d
ZD
820 {
821 case tcc_binary:
822 case tcc_comparison:
823 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
824 return true;
825
826 /* Fallthru. */
827 case tcc_unary:
828 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
829 return true;
830
831 break;
832
833 default:
834 gcc_unreachable ();
835 }
836
837 return false;
838}
839
d8af4ba3 840/* Returns the structure describing number of iterations determined from
ca4c3169
ZD
841 EXIT of DATA->current_loop, or NULL if something goes wrong. */
842
d8af4ba3
ZD
843static struct tree_niter_desc *
844niter_for_exit (struct ivopts_data *data, edge exit)
ca4c3169 845{
d8af4ba3 846 struct tree_niter_desc *desc;
b787e7a2 847 tree_niter_desc **slot;
ca4c3169 848
15814ba0 849 if (!data->niters)
ca4c3169 850 {
b787e7a2 851 data->niters = new hash_map<edge, tree_niter_desc *>;
15814ba0
PB
852 slot = NULL;
853 }
854 else
b787e7a2 855 slot = data->niters->get (exit);
dcccd88d 856
15814ba0
PB
857 if (!slot)
858 {
d8af4ba3
ZD
859 /* Try to determine number of iterations. We cannot safely work with ssa
860 names that appear in phi nodes on abnormal edges, so that we do not
861 create overlapping life ranges for them (PR 27283). */
e2102efc 862 desc = XNEW (struct tree_niter_desc);
d8af4ba3
ZD
863 if (!number_of_iterations_exit (data->current_loop,
864 exit, desc, true)
865 || contains_abnormal_ssa_name_p (desc->niter))
866 {
867 XDELETE (desc);
868 desc = NULL;
869 }
b787e7a2 870 data->niters->put (exit, desc);
ca4c3169
ZD
871 }
872 else
b787e7a2 873 desc = *slot;
ca4c3169 874
d8af4ba3 875 return desc;
ca4c3169
ZD
876}
877
d8af4ba3 878/* Returns the structure describing number of iterations determined from
ca4c3169
ZD
879 single dominating exit of DATA->current_loop, or NULL if something
880 goes wrong. */
881
d8af4ba3 882static struct tree_niter_desc *
ca4c3169
ZD
883niter_for_single_dom_exit (struct ivopts_data *data)
884{
885 edge exit = single_dom_exit (data->current_loop);
886
887 if (!exit)
888 return NULL;
889
d8af4ba3 890 return niter_for_exit (data, exit);
ca4c3169
ZD
891}
892
8b11a64c 893/* Initializes data structures used by the iv optimization pass, stored
9a2ef6b8 894 in DATA. */
8b11a64c
ZD
895
896static void
9a2ef6b8 897tree_ssa_iv_optimize_init (struct ivopts_data *data)
8b11a64c 898{
8b11a64c 899 data->version_info_size = 2 * num_ssa_names;
5ed6ace5 900 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
8bdbfff5
NS
901 data->relevant = BITMAP_ALLOC (NULL);
902 data->important_candidates = BITMAP_ALLOC (NULL);
8b11a64c 903 data->max_inv_id = 0;
15814ba0 904 data->niters = NULL;
9771b263
DN
905 data->iv_uses.create (20);
906 data->iv_candidates.create (20);
c203e8a7 907 data->inv_expr_tab = new hash_table<iv_inv_expr_hasher> (10);
18081149 908 data->inv_expr_id = 0;
3230c614 909 data->name_expansion_cache = NULL;
9771b263 910 decl_rtl_to_reset.create (20);
8b11a64c
ZD
911}
912
e6845c23
ZD
913/* Returns a memory object to that EXPR points. In case we are able to
914 determine that it does not point to any such object, NULL is returned. */
915
916static tree
917determine_base_object (tree expr)
918{
919 enum tree_code code = TREE_CODE (expr);
5be014d5 920 tree base, obj;
e6845c23 921
975626a7
ZD
922 /* If this is a pointer casted to any type, we need to determine
923 the base object for the pointer; so handle conversions before
924 throwing away non-pointer expressions. */
1043771b 925 if (CONVERT_EXPR_P (expr))
975626a7
ZD
926 return determine_base_object (TREE_OPERAND (expr, 0));
927
e6845c23
ZD
928 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
929 return NULL_TREE;
930
931 switch (code)
932 {
933 case INTEGER_CST:
934 return NULL_TREE;
935
936 case ADDR_EXPR:
937 obj = TREE_OPERAND (expr, 0);
938 base = get_base_address (obj);
939
940 if (!base)
f5e2738c 941 return expr;
e6845c23 942
70f34814 943 if (TREE_CODE (base) == MEM_REF)
f5e2738c 944 return determine_base_object (TREE_OPERAND (base, 0));
7299dbfb 945
62b37d91
RG
946 return fold_convert (ptr_type_node,
947 build_fold_addr_expr (base));
e6845c23 948
5be014d5
AP
949 case POINTER_PLUS_EXPR:
950 return determine_base_object (TREE_OPERAND (expr, 0));
951
e6845c23
ZD
952 case PLUS_EXPR:
953 case MINUS_EXPR:
5be014d5
AP
954 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
955 gcc_unreachable ();
e6845c23
ZD
956
957 default:
958 return fold_convert (ptr_type_node, expr);
959 }
960}
961
be9a0da5
BC
962/* Return true if address expression with non-DECL_P operand appears
963 in EXPR. */
964
965static bool
966contain_complex_addr_expr (tree expr)
967{
968 bool res = false;
969
970 STRIP_NOPS (expr);
971 switch (TREE_CODE (expr))
972 {
973 case POINTER_PLUS_EXPR:
974 case PLUS_EXPR:
975 case MINUS_EXPR:
976 res |= contain_complex_addr_expr (TREE_OPERAND (expr, 0));
977 res |= contain_complex_addr_expr (TREE_OPERAND (expr, 1));
978 break;
979
980 case ADDR_EXPR:
981 return (!DECL_P (TREE_OPERAND (expr, 0)));
982
983 default:
984 return false;
985 }
986
987 return res;
988}
989
8b11a64c
ZD
990/* Allocates an induction variable with given initial value BASE and step STEP
991 for loop LOOP. */
992
993static struct iv *
994alloc_iv (tree base, tree step)
995{
be9a0da5 996 tree expr = base;
5ed6ace5 997 struct iv *iv = XCNEW (struct iv);
6e42ce54 998 gcc_assert (step != NULL_TREE);
8b11a64c 999
be9a0da5 1000 /* Lower address expression in base except ones with DECL_P as operand.
be8c1c8c
BC
1001 By doing this:
1002 1) More accurate cost can be computed for address expressions;
1003 2) Duplicate candidates won't be created for bases in different
1004 forms, like &a[0] and &a. */
be9a0da5
BC
1005 STRIP_NOPS (expr);
1006 if ((TREE_CODE (expr) == ADDR_EXPR && !DECL_P (TREE_OPERAND (expr, 0)))
1007 || contain_complex_addr_expr (expr))
be8c1c8c
BC
1008 {
1009 aff_tree comb;
be9a0da5 1010 tree_to_aff_combination (expr, TREE_TYPE (base), &comb);
be8c1c8c
BC
1011 base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
1012 }
1013
8b11a64c 1014 iv->base = base;
be9a0da5 1015 iv->base_object = determine_base_object (base);
8b11a64c
ZD
1016 iv->step = step;
1017 iv->biv_p = false;
1018 iv->have_use_for = false;
1019 iv->use_id = 0;
1020 iv->ssa_name = NULL_TREE;
1021
1022 return iv;
1023}
1024
1025/* Sets STEP and BASE for induction variable IV. */
1026
1027static void
1028set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
1029{
1030 struct version_info *info = name_info (data, iv);
1031
1e128c5f 1032 gcc_assert (!info->iv);
8b11a64c
ZD
1033
1034 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
1035 info->iv = alloc_iv (base, step);
1036 info->iv->ssa_name = iv;
1037}
1038
1039/* Finds induction variable declaration for VAR. */
1040
1041static struct iv *
1042get_iv (struct ivopts_data *data, tree var)
1043{
1044 basic_block bb;
6e42ce54
ZD
1045 tree type = TREE_TYPE (var);
1046
1047 if (!POINTER_TYPE_P (type)
1048 && !INTEGRAL_TYPE_P (type))
1049 return NULL;
1050
8b11a64c
ZD
1051 if (!name_info (data, var)->iv)
1052 {
726a989a 1053 bb = gimple_bb (SSA_NAME_DEF_STMT (var));
8b11a64c
ZD
1054
1055 if (!bb
1056 || !flow_bb_inside_loop_p (data->current_loop, bb))
6e42ce54 1057 set_iv (data, var, var, build_int_cst (type, 0));
8b11a64c
ZD
1058 }
1059
1060 return name_info (data, var)->iv;
1061}
1062
9be872b7
ZD
1063/* Determines the step of a biv defined in PHI. Returns NULL if PHI does
1064 not define a simple affine biv with nonzero step. */
8b11a64c
ZD
1065
1066static tree
538dd0b7 1067determine_biv_step (gphi *phi)
8b11a64c 1068{
726a989a 1069 struct loop *loop = gimple_bb (phi)->loop_father;
a6f778b2
ZD
1070 tree name = PHI_RESULT (phi);
1071 affine_iv iv;
8b11a64c 1072
ea057359 1073 if (virtual_operand_p (name))
8b11a64c
ZD
1074 return NULL_TREE;
1075
f017bf5e 1076 if (!simple_iv (loop, loop, name, &iv, true))
8b11a64c
ZD
1077 return NULL_TREE;
1078
6e42ce54 1079 return integer_zerop (iv.step) ? NULL_TREE : iv.step;
8b11a64c
ZD
1080}
1081
8b11a64c
ZD
1082/* Finds basic ivs. */
1083
1084static bool
1085find_bivs (struct ivopts_data *data)
1086{
538dd0b7 1087 gphi *phi;
726a989a 1088 tree step, type, base;
8b11a64c
ZD
1089 bool found = false;
1090 struct loop *loop = data->current_loop;
538dd0b7 1091 gphi_iterator psi;
8b11a64c 1092
726a989a 1093 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
8b11a64c 1094 {
538dd0b7 1095 phi = psi.phi ();
726a989a 1096
8b11a64c
ZD
1097 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1098 continue;
1099
1100 step = determine_biv_step (phi);
8b11a64c
ZD
1101 if (!step)
1102 continue;
8b11a64c
ZD
1103
1104 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
d7bf3bcf 1105 base = expand_simple_operations (base);
9be872b7
ZD
1106 if (contains_abnormal_ssa_name_p (base)
1107 || contains_abnormal_ssa_name_p (step))
8b11a64c
ZD
1108 continue;
1109
1110 type = TREE_TYPE (PHI_RESULT (phi));
1111 base = fold_convert (type, base);
9be872b7 1112 if (step)
1ffe34d9
AP
1113 {
1114 if (POINTER_TYPE_P (type))
0d82a1c8 1115 step = convert_to_ptrofftype (step);
1ffe34d9
AP
1116 else
1117 step = fold_convert (type, step);
1118 }
8b11a64c
ZD
1119
1120 set_iv (data, PHI_RESULT (phi), base, step);
1121 found = true;
1122 }
1123
1124 return found;
1125}
1126
1127/* Marks basic ivs. */
1128
1129static void
1130mark_bivs (struct ivopts_data *data)
1131{
538dd0b7
DM
1132 gphi *phi;
1133 gimple def;
726a989a 1134 tree var;
8b11a64c
ZD
1135 struct iv *iv, *incr_iv;
1136 struct loop *loop = data->current_loop;
1137 basic_block incr_bb;
538dd0b7 1138 gphi_iterator psi;
8b11a64c 1139
726a989a 1140 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
8b11a64c 1141 {
538dd0b7 1142 phi = psi.phi ();
726a989a 1143
8b11a64c
ZD
1144 iv = get_iv (data, PHI_RESULT (phi));
1145 if (!iv)
1146 continue;
1147
1148 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
b83b5507
BC
1149 def = SSA_NAME_DEF_STMT (var);
1150 /* Don't mark iv peeled from other one as biv. */
1151 if (def
1152 && gimple_code (def) == GIMPLE_PHI
1153 && gimple_bb (def) == loop->header)
1154 continue;
1155
8b11a64c
ZD
1156 incr_iv = get_iv (data, var);
1157 if (!incr_iv)
1158 continue;
1159
1160 /* If the increment is in the subloop, ignore it. */
726a989a 1161 incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
8b11a64c
ZD
1162 if (incr_bb->loop_father != data->current_loop
1163 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1164 continue;
1165
1166 iv->biv_p = true;
1167 incr_iv->biv_p = true;
1168 }
1169}
1170
1171/* Checks whether STMT defines a linear induction variable and stores its
a6f778b2 1172 parameters to IV. */
8b11a64c
ZD
1173
1174static bool
726a989a 1175find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv)
8b11a64c
ZD
1176{
1177 tree lhs;
1178 struct loop *loop = data->current_loop;
1179
a6f778b2
ZD
1180 iv->base = NULL_TREE;
1181 iv->step = NULL_TREE;
8b11a64c 1182
726a989a 1183 if (gimple_code (stmt) != GIMPLE_ASSIGN)
8b11a64c
ZD
1184 return false;
1185
726a989a 1186 lhs = gimple_assign_lhs (stmt);
8b11a64c
ZD
1187 if (TREE_CODE (lhs) != SSA_NAME)
1188 return false;
1189
f017bf5e 1190 if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
8b11a64c 1191 return false;
a6f778b2 1192 iv->base = expand_simple_operations (iv->base);
8b11a64c 1193
a6f778b2
ZD
1194 if (contains_abnormal_ssa_name_p (iv->base)
1195 || contains_abnormal_ssa_name_p (iv->step))
8b11a64c
ZD
1196 return false;
1197
9f9ca914
JL
1198 /* If STMT could throw, then do not consider STMT as defining a GIV.
1199 While this will suppress optimizations, we can not safely delete this
1200 GIV and associated statements, even if it appears it is not used. */
1201 if (stmt_could_throw_p (stmt))
1202 return false;
1203
8b11a64c
ZD
1204 return true;
1205}
1206
1207/* Finds general ivs in statement STMT. */
1208
1209static void
726a989a 1210find_givs_in_stmt (struct ivopts_data *data, gimple stmt)
8b11a64c 1211{
a6f778b2 1212 affine_iv iv;
8b11a64c 1213
a6f778b2 1214 if (!find_givs_in_stmt_scev (data, stmt, &iv))
8b11a64c
ZD
1215 return;
1216
726a989a 1217 set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step);
8b11a64c
ZD
1218}
1219
1220/* Finds general ivs in basic block BB. */
1221
1222static void
1223find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1224{
726a989a 1225 gimple_stmt_iterator bsi;
8b11a64c 1226
726a989a
RB
1227 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1228 find_givs_in_stmt (data, gsi_stmt (bsi));
8b11a64c
ZD
1229}
1230
1231/* Finds general ivs. */
1232
1233static void
1234find_givs (struct ivopts_data *data)
1235{
1236 struct loop *loop = data->current_loop;
1237 basic_block *body = get_loop_body_in_dom_order (loop);
1238 unsigned i;
1239
1240 for (i = 0; i < loop->num_nodes; i++)
1241 find_givs_in_bb (data, body[i]);
1242 free (body);
1243}
1244
8b11a64c
ZD
1245/* For each ssa name defined in LOOP determines whether it is an induction
1246 variable and if so, its initial value and step. */
1247
1248static bool
1249find_induction_variables (struct ivopts_data *data)
1250{
1251 unsigned i;
87c476a2 1252 bitmap_iterator bi;
8b11a64c
ZD
1253
1254 if (!find_bivs (data))
1255 return false;
1256
1257 find_givs (data);
1258 mark_bivs (data);
8b11a64c
ZD
1259
1260 if (dump_file && (dump_flags & TDF_DETAILS))
1261 {
d8af4ba3 1262 struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
ca4c3169
ZD
1263
1264 if (niter)
8b11a64c
ZD
1265 {
1266 fprintf (dump_file, " number of iterations ");
d8af4ba3
ZD
1267 print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1268 if (!integer_zerop (niter->may_be_zero))
1269 {
1270 fprintf (dump_file, "; zero if ");
1271 print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1272 }
dcccd88d 1273 fprintf (dump_file, "\n\n");
8b11a64c 1274 };
b8698a0f 1275
8b11a64c
ZD
1276 fprintf (dump_file, "Induction variables:\n\n");
1277
87c476a2 1278 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
8b11a64c
ZD
1279 {
1280 if (ver_info (data, i)->iv)
1281 dump_iv (dump_file, ver_info (data, i)->iv);
87c476a2 1282 }
8b11a64c
ZD
1283 }
1284
1285 return true;
1286}
1287
1288/* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1289
1290static struct iv_use *
1291record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
726a989a 1292 gimple stmt, enum use_type use_type)
8b11a64c 1293{
5ed6ace5 1294 struct iv_use *use = XCNEW (struct iv_use);
8b11a64c
ZD
1295
1296 use->id = n_iv_uses (data);
1297 use->type = use_type;
1298 use->iv = iv;
1299 use->stmt = stmt;
1300 use->op_p = use_p;
8bdbfff5 1301 use->related_cands = BITMAP_ALLOC (NULL);
8b11a64c 1302
e6845c23
ZD
1303 /* To avoid showing ssa name in the dumps, if it was not reset by the
1304 caller. */
1305 iv->ssa_name = NULL_TREE;
1306
8b11a64c
ZD
1307 if (dump_file && (dump_flags & TDF_DETAILS))
1308 dump_use (dump_file, use);
1309
9771b263 1310 data->iv_uses.safe_push (use);
8b11a64c
ZD
1311
1312 return use;
1313}
1314
1315/* Checks whether OP is a loop-level invariant and if so, records it.
1316 NONLINEAR_USE is true if the invariant is used in a way we do not
1317 handle specially. */
1318
1319static void
1320record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1321{
1322 basic_block bb;
1323 struct version_info *info;
1324
1325 if (TREE_CODE (op) != SSA_NAME
ea057359 1326 || virtual_operand_p (op))
8b11a64c
ZD
1327 return;
1328
726a989a 1329 bb = gimple_bb (SSA_NAME_DEF_STMT (op));
8b11a64c
ZD
1330 if (bb
1331 && flow_bb_inside_loop_p (data->current_loop, bb))
1332 return;
1333
1334 info = name_info (data, op);
1335 info->name = op;
1336 info->has_nonlin_use |= nonlinear_use;
1337 if (!info->inv_id)
1338 info->inv_id = ++data->max_inv_id;
1339 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1340}
1341
50cc9802 1342/* Checks whether the use OP is interesting and if so, records it. */
8b11a64c
ZD
1343
1344static struct iv_use *
50cc9802 1345find_interesting_uses_op (struct ivopts_data *data, tree op)
8b11a64c
ZD
1346{
1347 struct iv *iv;
1348 struct iv *civ;
726a989a 1349 gimple stmt;
8b11a64c
ZD
1350 struct iv_use *use;
1351
1352 if (TREE_CODE (op) != SSA_NAME)
1353 return NULL;
1354
1355 iv = get_iv (data, op);
1356 if (!iv)
1357 return NULL;
b8698a0f 1358
8b11a64c
ZD
1359 if (iv->have_use_for)
1360 {
1361 use = iv_use (data, iv->use_id);
1362
50cc9802 1363 gcc_assert (use->type == USE_NONLINEAR_EXPR);
8b11a64c
ZD
1364 return use;
1365 }
1366
6e42ce54 1367 if (integer_zerop (iv->step))
8b11a64c
ZD
1368 {
1369 record_invariant (data, op, true);
1370 return NULL;
1371 }
1372 iv->have_use_for = true;
1373
5ed6ace5 1374 civ = XNEW (struct iv);
8b11a64c
ZD
1375 *civ = *iv;
1376
1377 stmt = SSA_NAME_DEF_STMT (op);
726a989a
RB
1378 gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1379 || is_gimple_assign (stmt));
8b11a64c 1380
50cc9802 1381 use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
8b11a64c
ZD
1382 iv->use_id = use->id;
1383
1384 return use;
1385}
1386
726a989a
RB
1387/* Given a condition in statement STMT, checks whether it is a compare
1388 of an induction variable and an invariant. If this is the case,
1389 CONTROL_VAR is set to location of the iv, BOUND to the location of
1390 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1391 induction variable descriptions, and true is returned. If this is not
1392 the case, CONTROL_VAR and BOUND are set to the arguments of the
1393 condition and false is returned. */
8b11a64c 1394
b697aed4 1395static bool
726a989a 1396extract_cond_operands (struct ivopts_data *data, gimple stmt,
b697aed4
ZD
1397 tree **control_var, tree **bound,
1398 struct iv **iv_var, struct iv **iv_bound)
1399{
726a989a 1400 /* The objects returned when COND has constant operands. */
b697aed4
ZD
1401 static struct iv const_iv;
1402 static tree zero;
b697aed4
ZD
1403 tree *op0 = &zero, *op1 = &zero, *tmp_op;
1404 struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1405 bool ret = false;
1406
726a989a 1407 if (gimple_code (stmt) == GIMPLE_COND)
8b11a64c 1408 {
538dd0b7
DM
1409 gcond *cond_stmt = as_a <gcond *> (stmt);
1410 op0 = gimple_cond_lhs_ptr (cond_stmt);
1411 op1 = gimple_cond_rhs_ptr (cond_stmt);
8b11a64c 1412 }
726a989a 1413 else
8b11a64c 1414 {
726a989a
RB
1415 op0 = gimple_assign_rhs1_ptr (stmt);
1416 op1 = gimple_assign_rhs2_ptr (stmt);
8b11a64c
ZD
1417 }
1418
726a989a
RB
1419 zero = integer_zero_node;
1420 const_iv.step = integer_zero_node;
1421
b697aed4
ZD
1422 if (TREE_CODE (*op0) == SSA_NAME)
1423 iv0 = get_iv (data, *op0);
1424 if (TREE_CODE (*op1) == SSA_NAME)
1425 iv1 = get_iv (data, *op1);
8b11a64c 1426
b697aed4
ZD
1427 /* Exactly one of the compared values must be an iv, and the other one must
1428 be an invariant. */
1429 if (!iv0 || !iv1)
1430 goto end;
1431
1432 if (integer_zerop (iv0->step))
1433 {
1434 /* Control variable may be on the other side. */
1435 tmp_op = op0; op0 = op1; op1 = tmp_op;
1436 tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
8b11a64c 1437 }
b697aed4
ZD
1438 ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1439
1440end:
1441 if (control_var)
1442 *control_var = op0;;
1443 if (iv_var)
1444 *iv_var = iv0;;
1445 if (bound)
1446 *bound = op1;
1447 if (iv_bound)
1448 *iv_bound = iv1;
1449
1450 return ret;
1451}
1452
726a989a
RB
1453/* Checks whether the condition in STMT is interesting and if so,
1454 records it. */
b697aed4
ZD
1455
1456static void
726a989a 1457find_interesting_uses_cond (struct ivopts_data *data, gimple stmt)
b697aed4
ZD
1458{
1459 tree *var_p, *bound_p;
1460 struct iv *var_iv, *civ;
8b11a64c 1461
726a989a 1462 if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
8b11a64c 1463 {
b697aed4
ZD
1464 find_interesting_uses_op (data, *var_p);
1465 find_interesting_uses_op (data, *bound_p);
8b11a64c
ZD
1466 return;
1467 }
1468
5ed6ace5 1469 civ = XNEW (struct iv);
b697aed4 1470 *civ = *var_iv;
726a989a 1471 record_use (data, NULL, civ, stmt, USE_COMPARE);
8b11a64c
ZD
1472}
1473
4ba5ea11
RB
1474/* Returns the outermost loop EXPR is obviously invariant in
1475 relative to the loop LOOP, i.e. if all its operands are defined
1476 outside of the returned loop. Returns NULL if EXPR is not
1477 even obviously invariant in LOOP. */
1478
1479struct loop *
1480outermost_invariant_loop_for_expr (struct loop *loop, tree expr)
1481{
1482 basic_block def_bb;
1483 unsigned i, len;
1484
1485 if (is_gimple_min_invariant (expr))
1486 return current_loops->tree_root;
1487
1488 if (TREE_CODE (expr) == SSA_NAME)
1489 {
1490 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1491 if (def_bb)
1492 {
1493 if (flow_bb_inside_loop_p (loop, def_bb))
1494 return NULL;
1495 return superloop_at_depth (loop,
1496 loop_depth (def_bb->loop_father) + 1);
1497 }
1498
1499 return current_loops->tree_root;
1500 }
1501
1502 if (!EXPR_P (expr))
1503 return NULL;
1504
1505 unsigned maxdepth = 0;
1506 len = TREE_OPERAND_LENGTH (expr);
1507 for (i = 0; i < len; i++)
1508 {
1509 struct loop *ivloop;
1510 if (!TREE_OPERAND (expr, i))
1511 continue;
1512
1513 ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1514 if (!ivloop)
1515 return NULL;
1516 maxdepth = MAX (maxdepth, loop_depth (ivloop));
1517 }
1518
1519 return superloop_at_depth (loop, maxdepth);
1520}
1521
be35cf60 1522/* Returns true if expression EXPR is obviously invariant in LOOP,
6a732743
SP
1523 i.e. if all its operands are defined outside of the LOOP. LOOP
1524 should not be the function body. */
be35cf60 1525
feb075f4 1526bool
be35cf60
ZD
1527expr_invariant_in_loop_p (struct loop *loop, tree expr)
1528{
1529 basic_block def_bb;
1530 unsigned i, len;
1531
6a732743
SP
1532 gcc_assert (loop_depth (loop) > 0);
1533
be35cf60
ZD
1534 if (is_gimple_min_invariant (expr))
1535 return true;
1536
1537 if (TREE_CODE (expr) == SSA_NAME)
1538 {
726a989a 1539 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
be35cf60
ZD
1540 if (def_bb
1541 && flow_bb_inside_loop_p (loop, def_bb))
1542 return false;
1543
1544 return true;
1545 }
1546
726a989a 1547 if (!EXPR_P (expr))
be35cf60
ZD
1548 return false;
1549
5039610b 1550 len = TREE_OPERAND_LENGTH (expr);
be35cf60 1551 for (i = 0; i < len; i++)
837a549b
JH
1552 if (TREE_OPERAND (expr, i)
1553 && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
be35cf60
ZD
1554 return false;
1555
1556 return true;
1557}
1558
8b11a64c
ZD
1559/* Cumulates the steps of indices into DATA and replaces their values with the
1560 initial ones. Returns false when the value of the index cannot be determined.
1561 Callback for for_each_index. */
1562
1563struct ifs_ivopts_data
1564{
1565 struct ivopts_data *ivopts_data;
726a989a 1566 gimple stmt;
6e42ce54 1567 tree step;
8b11a64c
ZD
1568};
1569
1570static bool
1571idx_find_step (tree base, tree *idx, void *data)
1572{
c22940cd 1573 struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
8b11a64c 1574 struct iv *iv;
d7f5de76 1575 tree step, iv_base, iv_step, lbound, off;
2f4675b4 1576 struct loop *loop = dta->ivopts_data->current_loop;
be35cf60 1577
be35cf60 1578 /* If base is a component ref, require that the offset of the reference
3a7c155d 1579 be invariant. */
be35cf60
ZD
1580 if (TREE_CODE (base) == COMPONENT_REF)
1581 {
1582 off = component_ref_field_offset (base);
1583 return expr_invariant_in_loop_p (loop, off);
1584 }
1585
1586 /* If base is array, first check whether we will be able to move the
1587 reference out of the loop (in order to take its address in strength
1588 reduction). In order for this to work we need both lower bound
1589 and step to be loop invariants. */
9f7ccf69 1590 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
be35cf60 1591 {
9f7ccf69
EB
1592 /* Moreover, for a range, the size needs to be invariant as well. */
1593 if (TREE_CODE (base) == ARRAY_RANGE_REF
1594 && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1595 return false;
1596
be35cf60
ZD
1597 step = array_ref_element_size (base);
1598 lbound = array_ref_low_bound (base);
1599
1600 if (!expr_invariant_in_loop_p (loop, step)
1601 || !expr_invariant_in_loop_p (loop, lbound))
1602 return false;
1603 }
1604
8b11a64c
ZD
1605 if (TREE_CODE (*idx) != SSA_NAME)
1606 return true;
1607
1608 iv = get_iv (dta->ivopts_data, *idx);
1609 if (!iv)
1610 return false;
1611
ea643120
RG
1612 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1613 *&x[0], which is not folded and does not trigger the
1614 ARRAY_REF path below. */
8b11a64c
ZD
1615 *idx = iv->base;
1616
6e42ce54 1617 if (integer_zerop (iv->step))
8b11a64c
ZD
1618 return true;
1619
9f7ccf69 1620 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2f4675b4
ZD
1621 {
1622 step = array_ref_element_size (base);
2f4675b4
ZD
1623
1624 /* We only handle addresses whose step is an integer constant. */
1625 if (TREE_CODE (step) != INTEGER_CST)
1626 return false;
2f4675b4 1627 }
8b11a64c 1628 else
5212068f 1629 /* The step for pointer arithmetics already is 1 byte. */
9a9d280e 1630 step = size_one_node;
8b11a64c 1631
d7f5de76
ZD
1632 iv_base = iv->base;
1633 iv_step = iv->step;
1634 if (!convert_affine_scev (dta->ivopts_data->current_loop,
1635 sizetype, &iv_base, &iv_step, dta->stmt,
1636 false))
8b11a64c
ZD
1637 {
1638 /* The index might wrap. */
1639 return false;
1640 }
1641
1c1205fb 1642 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
6e42ce54 1643 dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
8b11a64c
ZD
1644
1645 return true;
1646}
1647
1648/* Records use in index IDX. Callback for for_each_index. Ivopts data
1649 object is passed to it in DATA. */
1650
1651static bool
2f4675b4 1652idx_record_use (tree base, tree *idx,
c22940cd 1653 void *vdata)
8b11a64c 1654{
c22940cd 1655 struct ivopts_data *data = (struct ivopts_data *) vdata;
8b11a64c 1656 find_interesting_uses_op (data, *idx);
9f7ccf69 1657 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2f4675b4
ZD
1658 {
1659 find_interesting_uses_op (data, array_ref_element_size (base));
1660 find_interesting_uses_op (data, array_ref_low_bound (base));
1661 }
8b11a64c
ZD
1662 return true;
1663}
1664
32159434
CB
1665/* If we can prove that TOP = cst * BOT for some constant cst,
1666 store cst to MUL and return true. Otherwise return false.
1667 The returned value is always sign-extended, regardless of the
1668 signedness of TOP and BOT. */
1669
1670static bool
807e902e 1671constant_multiple_of (tree top, tree bot, widest_int *mul)
32159434
CB
1672{
1673 tree mby;
1674 enum tree_code code;
32159434 1675 unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
807e902e 1676 widest_int res, p0, p1;
32159434
CB
1677
1678 STRIP_NOPS (top);
1679 STRIP_NOPS (bot);
1680
1681 if (operand_equal_p (top, bot, 0))
1682 {
807e902e 1683 *mul = 1;
32159434
CB
1684 return true;
1685 }
1686
1687 code = TREE_CODE (top);
1688 switch (code)
1689 {
1690 case MULT_EXPR:
1691 mby = TREE_OPERAND (top, 1);
1692 if (TREE_CODE (mby) != INTEGER_CST)
1693 return false;
1694
1695 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1696 return false;
1697
807e902e 1698 *mul = wi::sext (res * wi::to_widest (mby), precision);
32159434
CB
1699 return true;
1700
1701 case PLUS_EXPR:
1702 case MINUS_EXPR:
1703 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1704 || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1705 return false;
1706
1707 if (code == MINUS_EXPR)
27bcd47c 1708 p1 = -p1;
807e902e 1709 *mul = wi::sext (p0 + p1, precision);
32159434
CB
1710 return true;
1711
1712 case INTEGER_CST:
1713 if (TREE_CODE (bot) != INTEGER_CST)
1714 return false;
1715
807e902e
KZ
1716 p0 = widest_int::from (top, SIGNED);
1717 p1 = widest_int::from (bot, SIGNED);
1718 if (p1 == 0)
32159434 1719 return false;
807e902e
KZ
1720 *mul = wi::sext (wi::divmod_trunc (p0, p1, SIGNED, &res), precision);
1721 return res == 0;
32159434
CB
1722
1723 default:
1724 return false;
1725 }
1726}
1727
e75fde1a 1728/* Return true if memory reference REF with step STEP may be unaligned. */
0a915e3d
ZD
1729
1730static bool
32159434 1731may_be_unaligned_p (tree ref, tree step)
0a915e3d 1732{
ac182688 1733 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
607fb860 1734 thus they are not misaligned. */
ac182688
ZD
1735 if (TREE_CODE (ref) == TARGET_MEM_REF)
1736 return false;
1737
e75fde1a 1738 unsigned int align = TYPE_ALIGN (TREE_TYPE (ref));
abf30454
RB
1739 if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))) > align)
1740 align = GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref)));
b8698a0f 1741
e75fde1a
EB
1742 unsigned HOST_WIDE_INT bitpos;
1743 unsigned int ref_align;
1744 get_object_alignment_1 (ref, &ref_align, &bitpos);
1745 if (ref_align < align
1746 || (bitpos % align) != 0
1747 || (bitpos % BITS_PER_UNIT) != 0)
1748 return true;
ce276b61 1749
e75fde1a
EB
1750 unsigned int trailing_zeros = tree_ctz (step);
1751 if (trailing_zeros < HOST_BITS_PER_INT
1752 && (1U << trailing_zeros) * BITS_PER_UNIT < align)
1753 return true;
0a915e3d
ZD
1754
1755 return false;
1756}
1757
75715cf6
EB
1758/* Return true if EXPR may be non-addressable. */
1759
bc068a23 1760bool
75715cf6
EB
1761may_be_nonaddressable_p (tree expr)
1762{
1763 switch (TREE_CODE (expr))
1764 {
928bc34f
EB
1765 case TARGET_MEM_REF:
1766 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1767 target, thus they are always addressable. */
1768 return false;
1769
75715cf6
EB
1770 case COMPONENT_REF:
1771 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1772 || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1773
75715cf6
EB
1774 case VIEW_CONVERT_EXPR:
1775 /* This kind of view-conversions may wrap non-addressable objects
1776 and make them look addressable. After some processing the
1777 non-addressability may be uncovered again, causing ADDR_EXPRs
1778 of inappropriate objects to be built. */
7a4fbffc
EB
1779 if (is_gimple_reg (TREE_OPERAND (expr, 0))
1780 || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
1781 return true;
1782
1783 /* ... fall through ... */
928bc34f
EB
1784
1785 case ARRAY_REF:
1786 case ARRAY_RANGE_REF:
7a4fbffc 1787 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
928bc34f 1788
1043771b 1789 CASE_CONVERT:
928bc34f 1790 return true;
75715cf6
EB
1791
1792 default:
1793 break;
1794 }
1795
1796 return false;
1797}
1798
8b11a64c
ZD
1799/* Finds addresses in *OP_P inside STMT. */
1800
1801static void
726a989a 1802find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p)
8b11a64c 1803{
9a9d280e 1804 tree base = *op_p, step = size_zero_node;
8b11a64c
ZD
1805 struct iv *civ;
1806 struct ifs_ivopts_data ifs_ivopts_data;
1807
e3cc7254
ZD
1808 /* Do not play with volatile memory references. A bit too conservative,
1809 perhaps, but safe. */
726a989a 1810 if (gimple_has_volatile_ops (stmt))
e3cc7254
ZD
1811 goto fail;
1812
8b11a64c
ZD
1813 /* Ignore bitfields for now. Not really something terribly complicated
1814 to handle. TODO. */
75715cf6
EB
1815 if (TREE_CODE (base) == BIT_FIELD_REF)
1816 goto fail;
1817
ac182688
ZD
1818 base = unshare_expr (base);
1819
1820 if (TREE_CODE (base) == TARGET_MEM_REF)
1821 {
1822 tree type = build_pointer_type (TREE_TYPE (base));
1823 tree astep;
1824
1825 if (TMR_BASE (base)
1826 && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1827 {
1828 civ = get_iv (data, TMR_BASE (base));
1829 if (!civ)
1830 goto fail;
1831
1832 TMR_BASE (base) = civ->base;
1833 step = civ->step;
1834 }
4d948885
RG
1835 if (TMR_INDEX2 (base)
1836 && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
1837 {
1838 civ = get_iv (data, TMR_INDEX2 (base));
1839 if (!civ)
1840 goto fail;
1841
1842 TMR_INDEX2 (base) = civ->base;
1843 step = civ->step;
1844 }
ac182688
ZD
1845 if (TMR_INDEX (base)
1846 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1847 {
1848 civ = get_iv (data, TMR_INDEX (base));
1849 if (!civ)
1850 goto fail;
8b11a64c 1851
ac182688
ZD
1852 TMR_INDEX (base) = civ->base;
1853 astep = civ->step;
be35cf60 1854
ac182688
ZD
1855 if (astep)
1856 {
1857 if (TMR_STEP (base))
1858 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1859
6e42ce54 1860 step = fold_build2 (PLUS_EXPR, type, step, astep);
ac182688
ZD
1861 }
1862 }
1863
6e42ce54 1864 if (integer_zerop (step))
ac182688
ZD
1865 goto fail;
1866 base = tree_mem_ref_addr (type, base);
1867 }
1868 else
1869 {
1870 ifs_ivopts_data.ivopts_data = data;
1871 ifs_ivopts_data.stmt = stmt;
9a9d280e 1872 ifs_ivopts_data.step = size_zero_node;
ac182688 1873 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
6e42ce54 1874 || integer_zerop (ifs_ivopts_data.step))
ac182688 1875 goto fail;
6e42ce54 1876 step = ifs_ivopts_data.step;
ac182688 1877
928bc34f
EB
1878 /* Check that the base expression is addressable. This needs
1879 to be done after substituting bases of IVs into it. */
1880 if (may_be_nonaddressable_p (base))
1881 goto fail;
1882
1883 /* Moreover, on strict alignment platforms, check that it is
1884 sufficiently aligned. */
32159434 1885 if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
928bc34f
EB
1886 goto fail;
1887
ac182688 1888 base = build_fold_addr_expr (base);
ea643120
RG
1889
1890 /* Substituting bases of IVs into the base expression might
1891 have caused folding opportunities. */
1892 if (TREE_CODE (base) == ADDR_EXPR)
1893 {
1894 tree *ref = &TREE_OPERAND (base, 0);
1895 while (handled_component_p (*ref))
1896 ref = &TREE_OPERAND (*ref, 0);
70f34814 1897 if (TREE_CODE (*ref) == MEM_REF)
cb6b911a 1898 {
bcf71673
RG
1899 tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
1900 TREE_OPERAND (*ref, 0),
1901 TREE_OPERAND (*ref, 1));
cb6b911a
RG
1902 if (tem)
1903 *ref = tem;
1904 }
ea643120 1905 }
ac182688 1906 }
8b11a64c
ZD
1907
1908 civ = alloc_iv (base, step);
1909 record_use (data, op_p, civ, stmt, USE_ADDRESS);
1910 return;
1911
1912fail:
1913 for_each_index (op_p, idx_record_use, data);
1914}
1915
1916/* Finds and records invariants used in STMT. */
1917
1918static void
726a989a 1919find_invariants_stmt (struct ivopts_data *data, gimple stmt)
8b11a64c 1920{
f47c96aa
AM
1921 ssa_op_iter iter;
1922 use_operand_p use_p;
8b11a64c
ZD
1923 tree op;
1924
f47c96aa 1925 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
8b11a64c 1926 {
f47c96aa 1927 op = USE_FROM_PTR (use_p);
8b11a64c
ZD
1928 record_invariant (data, op, false);
1929 }
1930}
1931
1932/* Finds interesting uses of induction variables in the statement STMT. */
1933
1934static void
726a989a 1935find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt)
8b11a64c
ZD
1936{
1937 struct iv *iv;
726a989a 1938 tree op, *lhs, *rhs;
f47c96aa
AM
1939 ssa_op_iter iter;
1940 use_operand_p use_p;
726a989a 1941 enum tree_code code;
8b11a64c
ZD
1942
1943 find_invariants_stmt (data, stmt);
1944
726a989a 1945 if (gimple_code (stmt) == GIMPLE_COND)
8b11a64c 1946 {
726a989a 1947 find_interesting_uses_cond (data, stmt);
8b11a64c
ZD
1948 return;
1949 }
1950
726a989a 1951 if (is_gimple_assign (stmt))
8b11a64c 1952 {
726a989a
RB
1953 lhs = gimple_assign_lhs_ptr (stmt);
1954 rhs = gimple_assign_rhs1_ptr (stmt);
8b11a64c 1955
726a989a 1956 if (TREE_CODE (*lhs) == SSA_NAME)
8b11a64c
ZD
1957 {
1958 /* If the statement defines an induction variable, the uses are not
1959 interesting by themselves. */
1960
726a989a 1961 iv = get_iv (data, *lhs);
8b11a64c 1962
6e42ce54 1963 if (iv && !integer_zerop (iv->step))
8b11a64c
ZD
1964 return;
1965 }
1966
726a989a
RB
1967 code = gimple_assign_rhs_code (stmt);
1968 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
1969 && (REFERENCE_CLASS_P (*rhs)
1970 || is_gimple_val (*rhs)))
8b11a64c 1971 {
726a989a
RB
1972 if (REFERENCE_CLASS_P (*rhs))
1973 find_interesting_uses_address (data, stmt, rhs);
1974 else
1975 find_interesting_uses_op (data, *rhs);
8b11a64c 1976
726a989a
RB
1977 if (REFERENCE_CLASS_P (*lhs))
1978 find_interesting_uses_address (data, stmt, lhs);
8b11a64c 1979 return;
8b11a64c 1980 }
726a989a 1981 else if (TREE_CODE_CLASS (code) == tcc_comparison)
8b11a64c 1982 {
726a989a 1983 find_interesting_uses_cond (data, stmt);
8b11a64c
ZD
1984 return;
1985 }
2f4675b4
ZD
1986
1987 /* TODO -- we should also handle address uses of type
1988
1989 memory = call (whatever);
1990
1991 and
1992
1993 call (memory). */
8b11a64c
ZD
1994 }
1995
726a989a
RB
1996 if (gimple_code (stmt) == GIMPLE_PHI
1997 && gimple_bb (stmt) == data->current_loop->header)
8b11a64c 1998 {
726a989a 1999 iv = get_iv (data, PHI_RESULT (stmt));
8b11a64c 2000
6e42ce54 2001 if (iv && !integer_zerop (iv->step))
8b11a64c
ZD
2002 return;
2003 }
2004
f47c96aa 2005 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
8b11a64c 2006 {
f47c96aa 2007 op = USE_FROM_PTR (use_p);
8b11a64c
ZD
2008
2009 if (TREE_CODE (op) != SSA_NAME)
2010 continue;
2011
2012 iv = get_iv (data, op);
2013 if (!iv)
2014 continue;
2015
2016 find_interesting_uses_op (data, op);
2017 }
2018}
2019
2020/* Finds interesting uses of induction variables outside of loops
2021 on loop exit edge EXIT. */
2022
2023static void
2024find_interesting_uses_outside (struct ivopts_data *data, edge exit)
2025{
538dd0b7
DM
2026 gphi *phi;
2027 gphi_iterator psi;
726a989a 2028 tree def;
8b11a64c 2029
726a989a 2030 for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
8b11a64c 2031 {
538dd0b7 2032 phi = psi.phi ();
8b11a64c 2033 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
ea057359 2034 if (!virtual_operand_p (def))
18081149 2035 find_interesting_uses_op (data, def);
8b11a64c
ZD
2036 }
2037}
2038
2039/* Finds uses of the induction variables that are interesting. */
2040
2041static void
2042find_interesting_uses (struct ivopts_data *data)
2043{
2044 basic_block bb;
726a989a 2045 gimple_stmt_iterator bsi;
8b11a64c
ZD
2046 basic_block *body = get_loop_body (data->current_loop);
2047 unsigned i;
2048 struct version_info *info;
2049 edge e;
2050
2051 if (dump_file && (dump_flags & TDF_DETAILS))
2052 fprintf (dump_file, "Uses:\n\n");
2053
2054 for (i = 0; i < data->current_loop->num_nodes; i++)
2055 {
628f6a4e 2056 edge_iterator ei;
8b11a64c
ZD
2057 bb = body[i];
2058
628f6a4e 2059 FOR_EACH_EDGE (e, ei, bb->succs)
fefa31b5 2060 if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
8b11a64c
ZD
2061 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2062 find_interesting_uses_outside (data, e);
2063
726a989a
RB
2064 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2065 find_interesting_uses_stmt (data, gsi_stmt (bsi));
2066 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
b5b8b0ac
AO
2067 if (!is_gimple_debug (gsi_stmt (bsi)))
2068 find_interesting_uses_stmt (data, gsi_stmt (bsi));
8b11a64c
ZD
2069 }
2070
2071 if (dump_file && (dump_flags & TDF_DETAILS))
2072 {
87c476a2
ZD
2073 bitmap_iterator bi;
2074
8b11a64c
ZD
2075 fprintf (dump_file, "\n");
2076
87c476a2 2077 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
8b11a64c
ZD
2078 {
2079 info = ver_info (data, i);
2080 if (info->inv_id)
2081 {
2082 fprintf (dump_file, " ");
2083 print_generic_expr (dump_file, info->name, TDF_SLIM);
2084 fprintf (dump_file, " is invariant (%d)%s\n",
2085 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
2086 }
87c476a2 2087 }
8b11a64c
ZD
2088
2089 fprintf (dump_file, "\n");
2090 }
2091
2092 free (body);
2093}
2094
f5e2738c 2095/* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
9be872b7
ZD
2096 is true, assume we are inside an address. If TOP_COMPREF is true, assume
2097 we are at the top-level of the processed address. */
f5e2738c
ZD
2098
2099static tree
9be872b7 2100strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
58fe50d5 2101 HOST_WIDE_INT *offset)
f5e2738c 2102{
9be872b7 2103 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
f5e2738c
ZD
2104 enum tree_code code;
2105 tree type, orig_type = TREE_TYPE (expr);
58fe50d5 2106 HOST_WIDE_INT off0, off1, st;
f5e2738c
ZD
2107 tree orig_expr = expr;
2108
2109 STRIP_NOPS (expr);
9be872b7 2110
f5e2738c
ZD
2111 type = TREE_TYPE (expr);
2112 code = TREE_CODE (expr);
2113 *offset = 0;
2114
2115 switch (code)
2116 {
2117 case INTEGER_CST:
2118 if (!cst_and_fits_in_hwi (expr)
6e682d7e 2119 || integer_zerop (expr))
f5e2738c
ZD
2120 return orig_expr;
2121
2122 *offset = int_cst_value (expr);
ff5e9a94 2123 return build_int_cst (orig_type, 0);
f5e2738c 2124
2d1a1007 2125 case POINTER_PLUS_EXPR:
f5e2738c
ZD
2126 case PLUS_EXPR:
2127 case MINUS_EXPR:
2128 op0 = TREE_OPERAND (expr, 0);
2129 op1 = TREE_OPERAND (expr, 1);
2130
9be872b7
ZD
2131 op0 = strip_offset_1 (op0, false, false, &off0);
2132 op1 = strip_offset_1 (op1, false, false, &off1);
f5e2738c 2133
2d1a1007 2134 *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
f5e2738c
ZD
2135 if (op0 == TREE_OPERAND (expr, 0)
2136 && op1 == TREE_OPERAND (expr, 1))
2137 return orig_expr;
2138
6e682d7e 2139 if (integer_zerop (op1))
f5e2738c 2140 expr = op0;
6e682d7e 2141 else if (integer_zerop (op0))
f5e2738c 2142 {
2d1a1007 2143 if (code == MINUS_EXPR)
9be872b7 2144 expr = fold_build1 (NEGATE_EXPR, type, op1);
2d1a1007
AP
2145 else
2146 expr = op1;
f5e2738c
ZD
2147 }
2148 else
9be872b7 2149 expr = fold_build2 (code, type, op0, op1);
f5e2738c
ZD
2150
2151 return fold_convert (orig_type, expr);
2152
7a2faca1
EB
2153 case MULT_EXPR:
2154 op1 = TREE_OPERAND (expr, 1);
2155 if (!cst_and_fits_in_hwi (op1))
2156 return orig_expr;
2157
2158 op0 = TREE_OPERAND (expr, 0);
2159 op0 = strip_offset_1 (op0, false, false, &off0);
2160 if (op0 == TREE_OPERAND (expr, 0))
2161 return orig_expr;
2162
2163 *offset = off0 * int_cst_value (op1);
2164 if (integer_zerop (op0))
2165 expr = op0;
2166 else
2167 expr = fold_build2 (MULT_EXPR, type, op0, op1);
2168
2169 return fold_convert (orig_type, expr);
2170
f5e2738c 2171 case ARRAY_REF:
9f7ccf69 2172 case ARRAY_RANGE_REF:
f5e2738c
ZD
2173 if (!inside_addr)
2174 return orig_expr;
2175
2176 step = array_ref_element_size (expr);
2177 if (!cst_and_fits_in_hwi (step))
2178 break;
2179
2180 st = int_cst_value (step);
2181 op1 = TREE_OPERAND (expr, 1);
9be872b7 2182 op1 = strip_offset_1 (op1, false, false, &off1);
f5e2738c 2183 *offset = off1 * st;
9be872b7
ZD
2184
2185 if (top_compref
6e682d7e 2186 && integer_zerop (op1))
9be872b7
ZD
2187 {
2188 /* Strip the component reference completely. */
2189 op0 = TREE_OPERAND (expr, 0);
2190 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2191 *offset += off0;
2192 return op0;
2193 }
f5e2738c
ZD
2194 break;
2195
2196 case COMPONENT_REF:
58fe50d5
BC
2197 {
2198 tree field;
2199
2200 if (!inside_addr)
2201 return orig_expr;
2202
2203 tmp = component_ref_field_offset (expr);
2204 field = TREE_OPERAND (expr, 1);
2205 if (top_compref
2206 && cst_and_fits_in_hwi (tmp)
2207 && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2208 {
2209 HOST_WIDE_INT boffset, abs_off;
2210
2211 /* Strip the component reference completely. */
2212 op0 = TREE_OPERAND (expr, 0);
2213 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2214 boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2215 abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2216 if (boffset < 0)
2217 abs_off = -abs_off;
2218
2219 *offset = off0 + int_cst_value (tmp) + abs_off;
2220 return op0;
2221 }
2222 }
f5e2738c
ZD
2223 break;
2224
2225 case ADDR_EXPR:
9be872b7
ZD
2226 op0 = TREE_OPERAND (expr, 0);
2227 op0 = strip_offset_1 (op0, true, true, &off0);
2228 *offset += off0;
2229
2230 if (op0 == TREE_OPERAND (expr, 0))
2231 return orig_expr;
2232
d5dc1717 2233 expr = build_fold_addr_expr (op0);
9be872b7
ZD
2234 return fold_convert (orig_type, expr);
2235
70f34814
RG
2236 case MEM_REF:
2237 /* ??? Offset operand? */
9be872b7 2238 inside_addr = false;
f5e2738c
ZD
2239 break;
2240
2241 default:
2242 return orig_expr;
2243 }
2244
2245 /* Default handling of expressions for that we want to recurse into
2246 the first operand. */
2247 op0 = TREE_OPERAND (expr, 0);
9be872b7 2248 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
f5e2738c
ZD
2249 *offset += off0;
2250
2251 if (op0 == TREE_OPERAND (expr, 0)
2252 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2253 return orig_expr;
2254
2255 expr = copy_node (expr);
2256 TREE_OPERAND (expr, 0) = op0;
2257 if (op1)
2258 TREE_OPERAND (expr, 1) = op1;
2259
9be872b7 2260 /* Inside address, we might strip the top level component references,
0fa2e4df 2261 thus changing type of the expression. Handling of ADDR_EXPR
9be872b7
ZD
2262 will fix that. */
2263 expr = fold_convert (orig_type, expr);
2264
2265 return expr;
2266}
2267
2268/* Strips constant offsets from EXPR and stores them to OFFSET. */
2269
2270static tree
2271strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2272{
58fe50d5
BC
2273 HOST_WIDE_INT off;
2274 tree core = strip_offset_1 (expr, false, false, &off);
2275 *offset = off;
2276 return core;
f5e2738c
ZD
2277}
2278
d482f417 2279/* Returns variant of TYPE that can be used as base for different uses.
20527215
ZD
2280 We return unsigned type with the same precision, which avoids problems
2281 with overflows. */
d482f417
ZD
2282
2283static tree
2284generic_type_for (tree type)
2285{
2286 if (POINTER_TYPE_P (type))
20527215 2287 return unsigned_type_for (type);
d482f417
ZD
2288
2289 if (TYPE_UNSIGNED (type))
2290 return type;
2291
2292 return unsigned_type_for (type);
2293}
2294
9be872b7
ZD
2295/* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2296 the bitmap to that we should store it. */
2297
2298static struct ivopts_data *fd_ivopts_data;
2299static tree
2300find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2301{
c22940cd 2302 bitmap *depends_on = (bitmap *) data;
9be872b7
ZD
2303 struct version_info *info;
2304
2305 if (TREE_CODE (*expr_p) != SSA_NAME)
2306 return NULL_TREE;
2307 info = name_info (fd_ivopts_data, *expr_p);
2308
2309 if (!info->inv_id || info->has_nonlin_use)
2310 return NULL_TREE;
2311
2312 if (!*depends_on)
2313 *depends_on = BITMAP_ALLOC (NULL);
2314 bitmap_set_bit (*depends_on, info->inv_id);
2315
2316 return NULL_TREE;
2317}
2318
8b11a64c
ZD
2319/* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2320 position to POS. If USE is not NULL, the candidate is set as related to
2321 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
2322 replacement of the final value of the iv by a direct computation. */
2323
2324static struct iv_cand *
2325add_candidate_1 (struct ivopts_data *data,
2326 tree base, tree step, bool important, enum iv_position pos,
726a989a 2327 struct iv_use *use, gimple incremented_at)
8b11a64c
ZD
2328{
2329 unsigned i;
2330 struct iv_cand *cand = NULL;
d482f417 2331 tree type, orig_type;
b8698a0f 2332
d8af4ba3
ZD
2333 /* For non-original variables, make sure their values are computed in a type
2334 that does not invoke undefined behavior on overflows (since in general,
2335 we cannot prove that these induction variables are non-wrapping). */
2336 if (pos != IP_ORIGINAL)
8b11a64c 2337 {
d482f417
ZD
2338 orig_type = TREE_TYPE (base);
2339 type = generic_type_for (orig_type);
71adbef3 2340 if (type != orig_type)
8b11a64c 2341 {
8b11a64c 2342 base = fold_convert (type, base);
6e42ce54 2343 step = fold_convert (type, step);
8b11a64c
ZD
2344 }
2345 }
2346
2347 for (i = 0; i < n_iv_cands (data); i++)
2348 {
2349 cand = iv_cand (data, i);
2350
2351 if (cand->pos != pos)
2352 continue;
2353
2c08497a
BS
2354 if (cand->incremented_at != incremented_at
2355 || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2356 && cand->ainc_use != use))
8b11a64c
ZD
2357 continue;
2358
2359 if (!cand->iv)
2360 {
2361 if (!base && !step)
2362 break;
2363
2364 continue;
2365 }
2366
2367 if (!base && !step)
2368 continue;
2369
6e42ce54 2370 if (operand_equal_p (base, cand->iv->base, 0)
18081149
XDL
2371 && operand_equal_p (step, cand->iv->step, 0)
2372 && (TYPE_PRECISION (TREE_TYPE (base))
2373 == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
6e42ce54 2374 break;
8b11a64c
ZD
2375 }
2376
2377 if (i == n_iv_cands (data))
2378 {
5ed6ace5 2379 cand = XCNEW (struct iv_cand);
8b11a64c
ZD
2380 cand->id = i;
2381
2382 if (!base && !step)
2383 cand->iv = NULL;
2384 else
2385 cand->iv = alloc_iv (base, step);
2386
2387 cand->pos = pos;
2388 if (pos != IP_ORIGINAL && cand->iv)
2389 {
2390 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2391 cand->var_after = cand->var_before;
2392 }
2393 cand->important = important;
2394 cand->incremented_at = incremented_at;
9771b263 2395 data->iv_candidates.safe_push (cand);
8b11a64c 2396
9be872b7
ZD
2397 if (step
2398 && TREE_CODE (step) != INTEGER_CST)
2399 {
2400 fd_ivopts_data = data;
2401 walk_tree (&step, find_depends, &cand->depends_on, NULL);
2402 }
2403
2c08497a
BS
2404 if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2405 cand->ainc_use = use;
2406 else
2407 cand->ainc_use = NULL;
2408
8b11a64c
ZD
2409 if (dump_file && (dump_flags & TDF_DETAILS))
2410 dump_cand (dump_file, cand);
2411 }
2412
2413 if (important && !cand->important)
2414 {
2415 cand->important = true;
2416 if (dump_file && (dump_flags & TDF_DETAILS))
2417 fprintf (dump_file, "Candidate %d is important\n", cand->id);
2418 }
2419
2420 if (use)
2421 {
2422 bitmap_set_bit (use->related_cands, i);
2423 if (dump_file && (dump_flags & TDF_DETAILS))
2424 fprintf (dump_file, "Candidate %d is related to use %d\n",
2425 cand->id, use->id);
2426 }
2427
2428 return cand;
2429}
2430
4366cf6d
ZD
2431/* Returns true if incrementing the induction variable at the end of the LOOP
2432 is allowed.
2433
2434 The purpose is to avoid splitting latch edge with a biv increment, thus
2435 creating a jump, possibly confusing other optimization passes and leaving
2436 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2437 is not available (so we do not have a better alternative), or if the latch
2438 edge is already nonempty. */
2439
2440static bool
2441allow_ip_end_pos_p (struct loop *loop)
2442{
2443 if (!ip_normal_pos (loop))
2444 return true;
2445
2446 if (!empty_block_p (ip_end_pos (loop)))
2447 return true;
2448
2449 return false;
2450}
2451
2c08497a
BS
2452/* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2453 Important field is set to IMPORTANT. */
2454
2455static void
2456add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2457 bool important, struct iv_use *use)
2458{
2459 basic_block use_bb = gimple_bb (use->stmt);
ef4bddc2 2460 machine_mode mem_mode;
2c08497a
BS
2461 unsigned HOST_WIDE_INT cstepi;
2462
2463 /* If we insert the increment in any position other than the standard
2464 ones, we must ensure that it is incremented once per iteration.
2465 It must not be in an inner nested loop, or one side of an if
2466 statement. */
2467 if (use_bb->loop_father != data->current_loop
2468 || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2469 || stmt_could_throw_p (use->stmt)
2470 || !cst_and_fits_in_hwi (step))
2471 return;
2472
2473 cstepi = int_cst_value (step);
2474
2475 mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
8875e939
RR
2476 if (((USE_LOAD_PRE_INCREMENT (mem_mode)
2477 || USE_STORE_PRE_INCREMENT (mem_mode))
2478 && GET_MODE_SIZE (mem_mode) == cstepi)
2479 || ((USE_LOAD_PRE_DECREMENT (mem_mode)
2480 || USE_STORE_PRE_DECREMENT (mem_mode))
2481 && GET_MODE_SIZE (mem_mode) == -cstepi))
2c08497a
BS
2482 {
2483 enum tree_code code = MINUS_EXPR;
2484 tree new_base;
2485 tree new_step = step;
2486
2487 if (POINTER_TYPE_P (TREE_TYPE (base)))
2488 {
2489 new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2490 code = POINTER_PLUS_EXPR;
2491 }
2492 else
2493 new_step = fold_convert (TREE_TYPE (base), new_step);
2494 new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2495 add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2496 use->stmt);
2497 }
8875e939
RR
2498 if (((USE_LOAD_POST_INCREMENT (mem_mode)
2499 || USE_STORE_POST_INCREMENT (mem_mode))
2500 && GET_MODE_SIZE (mem_mode) == cstepi)
2501 || ((USE_LOAD_POST_DECREMENT (mem_mode)
2502 || USE_STORE_POST_DECREMENT (mem_mode))
2503 && GET_MODE_SIZE (mem_mode) == -cstepi))
2c08497a
BS
2504 {
2505 add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
2506 use->stmt);
2507 }
2508}
2509
8b11a64c
ZD
2510/* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2511 position to POS. If USE is not NULL, the candidate is set as related to
2512 it. The candidate computation is scheduled on all available positions. */
2513
2514static void
b8698a0f 2515add_candidate (struct ivopts_data *data,
8b11a64c
ZD
2516 tree base, tree step, bool important, struct iv_use *use)
2517{
2518 if (ip_normal_pos (data->current_loop))
726a989a 2519 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL);
4366cf6d
ZD
2520 if (ip_end_pos (data->current_loop)
2521 && allow_ip_end_pos_p (data->current_loop))
726a989a 2522 add_candidate_1 (data, base, step, important, IP_END, use, NULL);
2c08497a
BS
2523
2524 if (use != NULL && use->type == USE_ADDRESS)
2525 add_autoinc_candidates (data, base, step, important, use);
8b11a64c
ZD
2526}
2527
2528/* Adds standard iv candidates. */
2529
2530static void
2531add_standard_iv_candidates (struct ivopts_data *data)
2532{
0f250839
RG
2533 add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
2534
2535 /* The same for a double-integer type if it is still fast enough. */
2536 if (TYPE_PRECISION
2537 (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
2538 && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
2539 add_candidate (data, build_int_cst (long_integer_type_node, 0),
2540 build_int_cst (long_integer_type_node, 1), true, NULL);
8b11a64c 2541
39b4020c 2542 /* The same for a double-integer type if it is still fast enough. */
0f250839
RG
2543 if (TYPE_PRECISION
2544 (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
2545 && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
2546 add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
2547 build_int_cst (long_long_integer_type_node, 1), true, NULL);
8b11a64c
ZD
2548}
2549
2550
2551/* Adds candidates bases on the old induction variable IV. */
2552
2553static void
2554add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2555{
726a989a
RB
2556 gimple phi;
2557 tree def;
8b11a64c
ZD
2558 struct iv_cand *cand;
2559
2560 add_candidate (data, iv->base, iv->step, true, NULL);
2561
2562 /* The same, but with initial value zero. */
1a00e5f7
JJ
2563 if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
2564 add_candidate (data, size_int (0), iv->step, true, NULL);
2565 else
2566 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2567 iv->step, true, NULL);
8b11a64c
ZD
2568
2569 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
726a989a 2570 if (gimple_code (phi) == GIMPLE_PHI)
8b11a64c
ZD
2571 {
2572 /* Additionally record the possibility of leaving the original iv
2573 untouched. */
2574 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
b83b5507
BC
2575 /* Don't add candidate if it's from another PHI node because
2576 it's an affine iv appearing in the form of PEELED_CHREC. */
2577 phi = SSA_NAME_DEF_STMT (def);
2578 if (gimple_code (phi) != GIMPLE_PHI)
2579 {
2580 cand = add_candidate_1 (data,
2581 iv->base, iv->step, true, IP_ORIGINAL, NULL,
2582 SSA_NAME_DEF_STMT (def));
2583 cand->var_before = iv->ssa_name;
2584 cand->var_after = def;
2585 }
2586 else
2587 gcc_assert (gimple_bb (phi) == data->current_loop->header);
8b11a64c
ZD
2588 }
2589}
2590
2591/* Adds candidates based on the old induction variables. */
2592
2593static void
2594add_old_ivs_candidates (struct ivopts_data *data)
2595{
2596 unsigned i;
2597 struct iv *iv;
87c476a2 2598 bitmap_iterator bi;
8b11a64c 2599
87c476a2 2600 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
8b11a64c
ZD
2601 {
2602 iv = ver_info (data, i)->iv;
6e42ce54 2603 if (iv && iv->biv_p && !integer_zerop (iv->step))
8b11a64c 2604 add_old_iv_candidates (data, iv);
87c476a2 2605 }
8b11a64c
ZD
2606}
2607
2608/* Adds candidates based on the value of the induction variable IV and USE. */
2609
2610static void
2611add_iv_value_candidates (struct ivopts_data *data,
2612 struct iv *iv, struct iv_use *use)
2613{
f5e2738c 2614 unsigned HOST_WIDE_INT offset;
9be872b7 2615 tree base;
1ffe34d9 2616 tree basetype;
8b11a64c 2617
9be872b7 2618 add_candidate (data, iv->base, iv->step, false, use);
be35cf60 2619
9be872b7
ZD
2620 /* The same, but with initial value zero. Make such variable important,
2621 since it is generic enough so that possibly many uses may be based
2622 on it. */
1ffe34d9
AP
2623 basetype = TREE_TYPE (iv->base);
2624 if (POINTER_TYPE_P (basetype))
2625 basetype = sizetype;
2626 add_candidate (data, build_int_cst (basetype, 0),
9be872b7 2627 iv->step, true, use);
8b11a64c 2628
7eeef08e
RG
2629 /* Third, try removing the constant offset. Make sure to even
2630 add a candidate for &a[0] vs. (T *)&a. */
9be872b7 2631 base = strip_offset (iv->base, &offset);
7eeef08e
RG
2632 if (offset
2633 || base != iv->base)
f5e2738c 2634 add_candidate (data, base, iv->step, false, use);
8b11a64c
ZD
2635}
2636
8b11a64c
ZD
2637/* Adds candidates based on the uses. */
2638
2639static void
2640add_derived_ivs_candidates (struct ivopts_data *data)
2641{
2642 unsigned i;
2643
2644 for (i = 0; i < n_iv_uses (data); i++)
2645 {
2646 struct iv_use *use = iv_use (data, i);
2647
2648 if (!use)
2649 continue;
2650
2651 switch (use->type)
2652 {
2653 case USE_NONLINEAR_EXPR:
2654 case USE_COMPARE:
9be872b7 2655 case USE_ADDRESS:
8b11a64c
ZD
2656 /* Just add the ivs based on the value of the iv used here. */
2657 add_iv_value_candidates (data, use->iv, use);
2658 break;
2659
8b11a64c 2660 default:
1e128c5f 2661 gcc_unreachable ();
8b11a64c
ZD
2662 }
2663 }
2664}
2665
b1b02be2
ZD
2666/* Record important candidates and add them to related_cands bitmaps
2667 if needed. */
2668
2669static void
2670record_important_candidates (struct ivopts_data *data)
2671{
2672 unsigned i;
2673 struct iv_use *use;
2674
2675 for (i = 0; i < n_iv_cands (data); i++)
2676 {
2677 struct iv_cand *cand = iv_cand (data, i);
2678
2679 if (cand->important)
2680 bitmap_set_bit (data->important_candidates, i);
2681 }
2682
2683 data->consider_all_candidates = (n_iv_cands (data)
2684 <= CONSIDER_ALL_CANDIDATES_BOUND);
2685
2686 if (data->consider_all_candidates)
2687 {
2688 /* We will not need "related_cands" bitmaps in this case,
2689 so release them to decrease peak memory consumption. */
2690 for (i = 0; i < n_iv_uses (data); i++)
2691 {
2692 use = iv_use (data, i);
8bdbfff5 2693 BITMAP_FREE (use->related_cands);
b1b02be2
ZD
2694 }
2695 }
2696 else
2697 {
2698 /* Add important candidates to the related_cands bitmaps. */
2699 for (i = 0; i < n_iv_uses (data); i++)
2700 bitmap_ior_into (iv_use (data, i)->related_cands,
2701 data->important_candidates);
2702 }
2703}
2704
8b11a64c
ZD
2705/* Allocates the data structure mapping the (use, candidate) pairs to costs.
2706 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2707 we allocate a simple list to every use. */
2708
2709static void
2710alloc_use_cost_map (struct ivopts_data *data)
2711{
79836a12 2712 unsigned i, size, s;
8b11a64c
ZD
2713
2714 for (i = 0; i < n_iv_uses (data); i++)
2715 {
2716 struct iv_use *use = iv_use (data, i);
2717
2718 if (data->consider_all_candidates)
b1b02be2 2719 size = n_iv_cands (data);
8b11a64c
ZD
2720 else
2721 {
79836a12 2722 s = bitmap_count_bits (use->related_cands);
b1b02be2
ZD
2723
2724 /* Round up to the power of two, so that moduling by it is fast. */
79836a12 2725 size = s ? (1 << ceil_log2 (s)) : 1;
8b11a64c
ZD
2726 }
2727
b1b02be2 2728 use->n_map_members = size;
5ed6ace5 2729 use->cost_map = XCNEWVEC (struct cost_pair, size);
8b11a64c
ZD
2730 }
2731}
2732
6e8c65f6
ZD
2733/* Returns description of computation cost of expression whose runtime
2734 cost is RUNTIME and complexity corresponds to COMPLEXITY. */
2735
2736static comp_cost
2737new_cost (unsigned runtime, unsigned complexity)
2738{
2739 comp_cost cost;
2740
2741 cost.cost = runtime;
2742 cost.complexity = complexity;
2743
2744 return cost;
2745}
2746
2747/* Adds costs COST1 and COST2. */
2748
2749static comp_cost
2750add_costs (comp_cost cost1, comp_cost cost2)
2751{
2752 cost1.cost += cost2.cost;
2753 cost1.complexity += cost2.complexity;
2754
2755 return cost1;
2756}
2757/* Subtracts costs COST1 and COST2. */
2758
2759static comp_cost
2760sub_costs (comp_cost cost1, comp_cost cost2)
2761{
2762 cost1.cost -= cost2.cost;
2763 cost1.complexity -= cost2.complexity;
2764
2765 return cost1;
2766}
2767
2768/* Returns a negative number if COST1 < COST2, a positive number if
2769 COST1 > COST2, and 0 if COST1 = COST2. */
2770
2771static int
2772compare_costs (comp_cost cost1, comp_cost cost2)
2773{
2774 if (cost1.cost == cost2.cost)
2775 return cost1.complexity - cost2.complexity;
2776
2777 return cost1.cost - cost2.cost;
2778}
2779
2780/* Returns true if COST is infinite. */
2781
2782static bool
2783infinite_cost_p (comp_cost cost)
2784{
2785 return cost.cost == INFTY;
2786}
2787
8b11a64c 2788/* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
f5f12961 2789 on invariants DEPENDS_ON and that the value used in expressing it
d8af4ba3 2790 is VALUE, and in case of iv elimination the comparison operator is COMP. */
8b11a64c
ZD
2791
2792static void
2793set_use_iv_cost (struct ivopts_data *data,
6e8c65f6 2794 struct iv_use *use, struct iv_cand *cand,
18081149 2795 comp_cost cost, bitmap depends_on, tree value,
d8af4ba3 2796 enum tree_code comp, int inv_expr_id)
8b11a64c 2797{
b1b02be2
ZD
2798 unsigned i, s;
2799
6e8c65f6 2800 if (infinite_cost_p (cost))
8b11a64c 2801 {
8bdbfff5 2802 BITMAP_FREE (depends_on);
b1b02be2 2803 return;
8b11a64c
ZD
2804 }
2805
2806 if (data->consider_all_candidates)
2807 {
2808 use->cost_map[cand->id].cand = cand;
2809 use->cost_map[cand->id].cost = cost;
2810 use->cost_map[cand->id].depends_on = depends_on;
f5f12961 2811 use->cost_map[cand->id].value = value;
d8af4ba3 2812 use->cost_map[cand->id].comp = comp;
18081149 2813 use->cost_map[cand->id].inv_expr_id = inv_expr_id;
8b11a64c
ZD
2814 return;
2815 }
2816
b1b02be2
ZD
2817 /* n_map_members is a power of two, so this computes modulo. */
2818 s = cand->id & (use->n_map_members - 1);
2819 for (i = s; i < use->n_map_members; i++)
2820 if (!use->cost_map[i].cand)
2821 goto found;
2822 for (i = 0; i < s; i++)
2823 if (!use->cost_map[i].cand)
2824 goto found;
2825
2826 gcc_unreachable ();
8b11a64c 2827
b1b02be2
ZD
2828found:
2829 use->cost_map[i].cand = cand;
2830 use->cost_map[i].cost = cost;
2831 use->cost_map[i].depends_on = depends_on;
f5f12961 2832 use->cost_map[i].value = value;
d8af4ba3 2833 use->cost_map[i].comp = comp;
18081149 2834 use->cost_map[i].inv_expr_id = inv_expr_id;
8b11a64c
ZD
2835}
2836
b1b02be2 2837/* Gets cost of (USE, CANDIDATE) pair. */
8b11a64c 2838
b1b02be2
ZD
2839static struct cost_pair *
2840get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2841 struct iv_cand *cand)
8b11a64c 2842{
b1b02be2
ZD
2843 unsigned i, s;
2844 struct cost_pair *ret;
8b11a64c
ZD
2845
2846 if (!cand)
b1b02be2 2847 return NULL;
8b11a64c
ZD
2848
2849 if (data->consider_all_candidates)
8b11a64c 2850 {
b1b02be2
ZD
2851 ret = use->cost_map + cand->id;
2852 if (!ret->cand)
2853 return NULL;
8b11a64c 2854
b1b02be2 2855 return ret;
8b11a64c 2856 }
b8698a0f 2857
b1b02be2
ZD
2858 /* n_map_members is a power of two, so this computes modulo. */
2859 s = cand->id & (use->n_map_members - 1);
2860 for (i = s; i < use->n_map_members; i++)
2861 if (use->cost_map[i].cand == cand)
2862 return use->cost_map + i;
79836a12
RB
2863 else if (use->cost_map[i].cand == NULL)
2864 return NULL;
b1b02be2
ZD
2865 for (i = 0; i < s; i++)
2866 if (use->cost_map[i].cand == cand)
2867 return use->cost_map + i;
79836a12
RB
2868 else if (use->cost_map[i].cand == NULL)
2869 return NULL;
b1b02be2
ZD
2870
2871 return NULL;
8b11a64c
ZD
2872}
2873
8679c649
JH
2874/* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2875static rtx
2876produce_memory_decl_rtl (tree obj, int *regno)
2877{
09e881c9 2878 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
ef4bddc2 2879 machine_mode address_mode = targetm.addr_space.address_mode (as);
8679c649 2880 rtx x;
b8698a0f 2881
e16acfcd 2882 gcc_assert (obj);
8679c649
JH
2883 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2884 {
2885 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
d4ebfa65 2886 x = gen_rtx_SYMBOL_REF (address_mode, name);
8318b0d9
RH
2887 SET_SYMBOL_REF_DECL (x, obj);
2888 x = gen_rtx_MEM (DECL_MODE (obj), x);
09e881c9 2889 set_mem_addr_space (x, as);
8318b0d9 2890 targetm.encode_section_info (obj, x, true);
8679c649
JH
2891 }
2892 else
8318b0d9 2893 {
d4ebfa65 2894 x = gen_raw_REG (address_mode, (*regno)++);
8318b0d9 2895 x = gen_rtx_MEM (DECL_MODE (obj), x);
09e881c9 2896 set_mem_addr_space (x, as);
8318b0d9 2897 }
8679c649 2898
8318b0d9 2899 return x;
8679c649
JH
2900}
2901
8b11a64c
ZD
2902/* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2903 walk_tree. DATA contains the actual fake register number. */
2904
2905static tree
2906prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2907{
2908 tree obj = NULL_TREE;
2909 rtx x = NULL_RTX;
c22940cd 2910 int *regno = (int *) data;
8b11a64c
ZD
2911
2912 switch (TREE_CODE (*expr_p))
2913 {
8679c649
JH
2914 case ADDR_EXPR:
2915 for (expr_p = &TREE_OPERAND (*expr_p, 0);
afe84921
RH
2916 handled_component_p (*expr_p);
2917 expr_p = &TREE_OPERAND (*expr_p, 0))
2918 continue;
8679c649 2919 obj = *expr_p;
c401fb6f 2920 if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
8679c649
JH
2921 x = produce_memory_decl_rtl (obj, regno);
2922 break;
2923
8b11a64c
ZD
2924 case SSA_NAME:
2925 *ws = 0;
2926 obj = SSA_NAME_VAR (*expr_p);
70b5e7dc
RG
2927 /* Defer handling of anonymous SSA_NAMEs to the expander. */
2928 if (!obj)
2929 return NULL_TREE;
8b11a64c
ZD
2930 if (!DECL_RTL_SET_P (obj))
2931 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2932 break;
2933
2934 case VAR_DECL:
2935 case PARM_DECL:
2936 case RESULT_DECL:
2937 *ws = 0;
2938 obj = *expr_p;
2939
2940 if (DECL_RTL_SET_P (obj))
2941 break;
2942
2943 if (DECL_MODE (obj) == BLKmode)
8679c649 2944 x = produce_memory_decl_rtl (obj, regno);
8b11a64c
ZD
2945 else
2946 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2947
2948 break;
2949
2950 default:
2951 break;
2952 }
2953
2954 if (x)
2955 {
9771b263 2956 decl_rtl_to_reset.safe_push (obj);
8b11a64c
ZD
2957 SET_DECL_RTL (obj, x);
2958 }
2959
2960 return NULL_TREE;
2961}
2962
2963/* Determines cost of the computation of EXPR. */
2964
2965static unsigned
f40751dd 2966computation_cost (tree expr, bool speed)
8b11a64c 2967{
b32d5189
DM
2968 rtx_insn *seq;
2969 rtx rslt;
8b11a64c
ZD
2970 tree type = TREE_TYPE (expr);
2971 unsigned cost;
1d27fed4
DD
2972 /* Avoid using hard regs in ways which may be unsupported. */
2973 int regno = LAST_VIRTUAL_REGISTER + 1;
d52f5295 2974 struct cgraph_node *node = cgraph_node::get (current_function_decl);
5fefcf92 2975 enum node_frequency real_frequency = node->frequency;
8b11a64c 2976
5fefcf92 2977 node->frequency = NODE_FREQUENCY_NORMAL;
f40751dd 2978 crtl->maybe_hot_insn_p = speed;
8b11a64c
ZD
2979 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2980 start_sequence ();
2981 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2982 seq = get_insns ();
2983 end_sequence ();
f40751dd 2984 default_rtl_profile ();
5fefcf92 2985 node->frequency = real_frequency;
8b11a64c 2986
f40751dd 2987 cost = seq_cost (seq, speed);
2ca202e7 2988 if (MEM_P (rslt))
09e881c9
BE
2989 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
2990 TYPE_ADDR_SPACE (type), speed);
b5ee6752 2991 else if (!REG_P (rslt))
5e8f01f4 2992 cost += set_src_cost (rslt, speed);
8b11a64c
ZD
2993
2994 return cost;
2995}
2996
2997/* Returns variable containing the value of candidate CAND at statement AT. */
2998
2999static tree
726a989a 3000var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple stmt)
8b11a64c
ZD
3001{
3002 if (stmt_after_increment (loop, cand, stmt))
3003 return cand->var_after;
3004 else
3005 return cand->var_before;
3006}
3007
b67102ae
ZD
3008/* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
3009 same precision that is at least as wide as the precision of TYPE, stores
3010 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
3011 type of A and B. */
3012
3013static tree
3014determine_common_wider_type (tree *a, tree *b)
3015{
3016 tree wider_type = NULL;
3017 tree suba, subb;
3018 tree atype = TREE_TYPE (*a);
3019
1043771b 3020 if (CONVERT_EXPR_P (*a))
b67102ae
ZD
3021 {
3022 suba = TREE_OPERAND (*a, 0);
3023 wider_type = TREE_TYPE (suba);
3024 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
3025 return atype;
3026 }
3027 else
3028 return atype;
3029
1043771b 3030 if (CONVERT_EXPR_P (*b))
b67102ae
ZD
3031 {
3032 subb = TREE_OPERAND (*b, 0);
3033 if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
3034 return atype;
3035 }
3036 else
3037 return atype;
3038
3039 *a = suba;
3040 *b = subb;
3041 return wider_type;
3042}
3043
8b11a64c 3044/* Determines the expression by that USE is expressed from induction variable
ac182688
ZD
3045 CAND at statement AT in LOOP. The expression is stored in a decomposed
3046 form into AFF. Returns false if USE cannot be expressed using CAND. */
8b11a64c 3047
ac182688
ZD
3048static bool
3049get_computation_aff (struct loop *loop,
726a989a 3050 struct iv_use *use, struct iv_cand *cand, gimple at,
84562394 3051 struct aff_tree *aff)
8b11a64c 3052{
2f4675b4
ZD
3053 tree ubase = use->iv->base;
3054 tree ustep = use->iv->step;
3055 tree cbase = cand->iv->base;
73f30c63 3056 tree cstep = cand->iv->step, cstep_common;
8b11a64c 3057 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
73f30c63 3058 tree common_type, var;
8b11a64c 3059 tree uutype;
73f30c63 3060 aff_tree cbase_aff, var_aff;
807e902e 3061 widest_int rat;
8b11a64c
ZD
3062
3063 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3064 {
3065 /* We do not have a precision to express the values of use. */
ac182688 3066 return false;
8b11a64c
ZD
3067 }
3068
73f30c63
ZD
3069 var = var_at_stmt (loop, cand, at);
3070 uutype = unsigned_type_for (utype);
8b11a64c 3071
73f30c63
ZD
3072 /* If the conversion is not noop, perform it. */
3073 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
8b11a64c 3074 {
8b11a64c 3075 cstep = fold_convert (uutype, cstep);
73f30c63
ZD
3076 cbase = fold_convert (uutype, cbase);
3077 var = fold_convert (uutype, var);
9be872b7 3078 }
9be872b7 3079
73f30c63
ZD
3080 if (!constant_multiple_of (ustep, cstep, &rat))
3081 return false;
8b11a64c 3082
b67102ae
ZD
3083 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3084 type, we achieve better folding by computing their difference in this
3085 wider type, and cast the result to UUTYPE. We do not need to worry about
3086 overflows, as all the arithmetics will in the end be performed in UUTYPE
3087 anyway. */
3088 common_type = determine_common_wider_type (&ubase, &cbase);
3089
73f30c63
ZD
3090 /* use = ubase - ratio * cbase + ratio * var. */
3091 tree_to_aff_combination (ubase, common_type, aff);
3092 tree_to_aff_combination (cbase, common_type, &cbase_aff);
3093 tree_to_aff_combination (var, uutype, &var_aff);
8b11a64c 3094
73f30c63
ZD
3095 /* We need to shift the value if we are after the increment. */
3096 if (stmt_after_increment (loop, cand, at))
8b11a64c 3097 {
73f30c63 3098 aff_tree cstep_aff;
b8698a0f 3099
73f30c63
ZD
3100 if (common_type != uutype)
3101 cstep_common = fold_convert (common_type, cstep);
9be872b7 3102 else
73f30c63 3103 cstep_common = cstep;
ac182688 3104
73f30c63
ZD
3105 tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
3106 aff_combination_add (&cbase_aff, &cstep_aff);
8b11a64c 3107 }
8b11a64c 3108
27bcd47c 3109 aff_combination_scale (&cbase_aff, -rat);
ac182688 3110 aff_combination_add (aff, &cbase_aff);
b67102ae 3111 if (common_type != uutype)
73f30c63
ZD
3112 aff_combination_convert (aff, uutype);
3113
3114 aff_combination_scale (&var_aff, rat);
3115 aff_combination_add (aff, &var_aff);
ac182688
ZD
3116
3117 return true;
3118}
3119
0c604a61
TV
3120/* Return the type of USE. */
3121
3122static tree
3123get_use_type (struct iv_use *use)
3124{
3125 tree base_type = TREE_TYPE (use->iv->base);
3126 tree type;
3127
3128 if (use->type == USE_ADDRESS)
3129 {
3130 /* The base_type may be a void pointer. Create a pointer type based on
3131 the mem_ref instead. */
3132 type = build_pointer_type (TREE_TYPE (*use->op_p));
3133 gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
3134 == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
3135 }
3136 else
3137 type = base_type;
3138
3139 return type;
3140}
3141
ac182688
ZD
3142/* Determines the expression by that USE is expressed from induction variable
3143 CAND at statement AT in LOOP. The computation is unshared. */
3144
3145static tree
3146get_computation_at (struct loop *loop,
726a989a 3147 struct iv_use *use, struct iv_cand *cand, gimple at)
ac182688 3148{
73f30c63 3149 aff_tree aff;
0c604a61 3150 tree type = get_use_type (use);
ac182688
ZD
3151
3152 if (!get_computation_aff (loop, use, cand, at, &aff))
3153 return NULL_TREE;
3154 unshare_aff_combination (&aff);
3155 return fold_convert (type, aff_combination_to_tree (&aff));
8b11a64c
ZD
3156}
3157
3158/* Determines the expression by that USE is expressed from induction variable
ac182688 3159 CAND in LOOP. The computation is unshared. */
8b11a64c
ZD
3160
3161static tree
3162get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3163{
3164 return get_computation_at (loop, use, cand, use->stmt);
3165}
3166
6521ac85
SL
3167/* Adjust the cost COST for being in loop setup rather than loop body.
3168 If we're optimizing for space, the loop setup overhead is constant;
3169 if we're optimizing for speed, amortize it over the per-iteration cost. */
3170static unsigned
3171adjust_setup_cost (struct ivopts_data *data, unsigned cost)
3172{
3173 if (cost == INFTY)
3174 return cost;
3175 else if (optimize_loop_for_speed_p (data->current_loop))
18081149 3176 return cost / avg_loop_niter (data->current_loop);
6521ac85
SL
3177 else
3178 return cost;
3179}
3180
b1dd92fd 3181/* Returns true if multiplying by RATIO is allowed in an address. Test the
09e881c9
BE
3182 validity for a memory reference accessing memory of mode MODE in
3183 address space AS. */
3184
ac182688
ZD
3185
3186bool
ef4bddc2 3187multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, machine_mode mode,
09e881c9 3188 addr_space_t as)
ac182688
ZD
3189{
3190#define MAX_RATIO 128
09e881c9 3191 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
9771b263 3192 static vec<sbitmap> valid_mult_list;
09e881c9
BE
3193 sbitmap valid_mult;
3194
9771b263
DN
3195 if (data_index >= valid_mult_list.length ())
3196 valid_mult_list.safe_grow_cleared (data_index + 1);
09e881c9 3197
9771b263 3198 valid_mult = valid_mult_list[data_index];
09e881c9 3199 if (!valid_mult)
ac182688 3200 {
ef4bddc2 3201 machine_mode address_mode = targetm.addr_space.address_mode (as);
d4ebfa65 3202 rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
f258df2d
IS
3203 rtx reg2 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3204 rtx addr, scaled;
ac182688
ZD
3205 HOST_WIDE_INT i;
3206
09e881c9 3207 valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
f61e445a 3208 bitmap_clear (valid_mult);
f258df2d
IS
3209 scaled = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3210 addr = gen_rtx_fmt_ee (PLUS, address_mode, scaled, reg2);
ac182688
ZD
3211 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3212 {
f258df2d
IS
3213 XEXP (scaled, 1) = gen_int_mode (i, address_mode);
3214 if (memory_address_addr_space_p (mode, addr, as)
3215 || memory_address_addr_space_p (mode, scaled, as))
d7c028c0 3216 bitmap_set_bit (valid_mult, i + MAX_RATIO);
ac182688
ZD
3217 }
3218
3219 if (dump_file && (dump_flags & TDF_DETAILS))
3220 {
3221 fprintf (dump_file, " allowed multipliers:");
3222 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
d7c028c0 3223 if (bitmap_bit_p (valid_mult, i + MAX_RATIO))
ac182688
ZD
3224 fprintf (dump_file, " %d", (int) i);
3225 fprintf (dump_file, "\n");
3226 fprintf (dump_file, "\n");
3227 }
09e881c9 3228
9771b263 3229 valid_mult_list[data_index] = valid_mult;
ac182688
ZD
3230 }
3231
3232 if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3233 return false;
3234
d7c028c0 3235 return bitmap_bit_p (valid_mult, ratio + MAX_RATIO);
ac182688
ZD
3236}
3237
8b11a64c
ZD
3238/* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3239 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
b1dd92fd 3240 variable is omitted. Compute the cost for a memory reference that accesses
09e881c9 3241 a memory location of mode MEM_MODE in address space AS.
b1dd92fd 3242
2c08497a
BS
3243 MAY_AUTOINC is set to true if the autoincrement (increasing index by
3244 size of MEM_MODE / RATIO) is available. To make this determination, we
3245 look at the size of the increment to be made, which is given in CSTEP.
3246 CSTEP may be zero if the step is unknown.
3247 STMT_AFTER_INC is true iff the statement we're looking at is after the
3248 increment of the original biv.
3249
8b11a64c
ZD
3250 TODO -- there must be some better way. This all is quite crude. */
3251
cd047059
BC
3252enum ainc_type
3253{
3254 AINC_PRE_INC, /* Pre increment. */
3255 AINC_PRE_DEC, /* Pre decrement. */
3256 AINC_POST_INC, /* Post increment. */
3257 AINC_POST_DEC, /* Post decrement. */
3258 AINC_NONE /* Also the number of auto increment types. */
3259};
3260
0823efed 3261typedef struct address_cost_data_s
09e881c9
BE
3262{
3263 HOST_WIDE_INT min_offset, max_offset;
3264 unsigned costs[2][2][2][2];
cd047059 3265 unsigned ainc_costs[AINC_NONE];
09e881c9
BE
3266} *address_cost_data;
3267
09e881c9 3268
6e8c65f6 3269static comp_cost
8b11a64c 3270get_address_cost (bool symbol_present, bool var_present,
dd5f63f8 3271 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
ef4bddc2 3272 HOST_WIDE_INT cstep, machine_mode mem_mode,
09e881c9 3273 addr_space_t as, bool speed,
2c08497a 3274 bool stmt_after_inc, bool *may_autoinc)
8b11a64c 3275{
ef4bddc2 3276 machine_mode address_mode = targetm.addr_space.address_mode (as);
9771b263 3277 static vec<address_cost_data> address_cost_data_list;
09e881c9
BE
3278 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3279 address_cost_data data;
2c08497a
BS
3280 static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3281 static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
6e8c65f6 3282 unsigned cost, acost, complexity;
cd047059 3283 enum ainc_type autoinc_type;
2c08497a
BS
3284 bool offset_p, ratio_p, autoinc;
3285 HOST_WIDE_INT s_offset, autoinc_offset, msize;
8b11a64c
ZD
3286 unsigned HOST_WIDE_INT mask;
3287 unsigned bits;
3288
9771b263
DN
3289 if (data_index >= address_cost_data_list.length ())
3290 address_cost_data_list.safe_grow_cleared (data_index + 1);
09e881c9 3291
9771b263 3292 data = address_cost_data_list[data_index];
09e881c9 3293 if (!data)
8b11a64c
ZD
3294 {
3295 HOST_WIDE_INT i;
d6fa2d17
XDL
3296 HOST_WIDE_INT rat, off = 0;
3297 int old_cse_not_expected, width;
975626a7 3298 unsigned sym_p, var_p, off_p, rat_p, add_c;
b32d5189
DM
3299 rtx_insn *seq;
3300 rtx addr, base;
975626a7
ZD
3301 rtx reg0, reg1;
3302
09e881c9 3303 data = (address_cost_data) xcalloc (1, sizeof (*data));
8b11a64c 3304
d4ebfa65 3305 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
8b11a64c 3306
d6fa2d17
XDL
3307 width = GET_MODE_BITSIZE (address_mode) - 1;
3308 if (width > (HOST_BITS_PER_WIDE_INT - 1))
3309 width = HOST_BITS_PER_WIDE_INT - 1;
d4ebfa65 3310 addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
d6fa2d17
XDL
3311
3312 for (i = width; i >= 0; i--)
8b11a64c 3313 {
eb87c7c4 3314 off = -((unsigned HOST_WIDE_INT) 1 << i);
d6fa2d17
XDL
3315 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3316 if (memory_address_addr_space_p (mem_mode, addr, as))
8b11a64c
ZD
3317 break;
3318 }
d6fa2d17 3319 data->min_offset = (i == -1? 0 : off);
8b11a64c 3320
d6fa2d17 3321 for (i = width; i >= 0; i--)
8b11a64c 3322 {
eb87c7c4 3323 off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
d6fa2d17
XDL
3324 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3325 if (memory_address_addr_space_p (mem_mode, addr, as))
8b11a64c 3326 break;
b88fe5e9
ZC
3327 /* For some TARGET, like ARM THUMB1, the offset should be nature
3328 aligned. Try an aligned offset if address_mode is not QImode. */
3329 off = (address_mode == QImode)
3330 ? 0
3331 : ((unsigned HOST_WIDE_INT) 1 << i)
3332 - GET_MODE_SIZE (address_mode);
3333 if (off > 0)
3334 {
3335 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3336 if (memory_address_addr_space_p (mem_mode, addr, as))
3337 break;
3338 }
8b11a64c 3339 }
d6fa2d17
XDL
3340 if (i == -1)
3341 off = 0;
3342 data->max_offset = off;
8b11a64c
ZD
3343
3344 if (dump_file && (dump_flags & TDF_DETAILS))
3345 {
3346 fprintf (dump_file, "get_address_cost:\n");
d6fa2d17 3347 fprintf (dump_file, " min offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
dd5f63f8 3348 GET_MODE_NAME (mem_mode),
d6fa2d17
XDL
3349 data->min_offset);
3350 fprintf (dump_file, " max offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
dd5f63f8 3351 GET_MODE_NAME (mem_mode),
d6fa2d17 3352 data->max_offset);
8b11a64c
ZD
3353 }
3354
09e881c9 3355 rat = 1;
ac182688 3356 for (i = 2; i <= MAX_RATIO; i++)
09e881c9 3357 if (multiplier_allowed_in_address_p (i, mem_mode, as))
ac182688 3358 {
09e881c9 3359 rat = i;
ac182688
ZD
3360 break;
3361 }
975626a7
ZD
3362
3363 /* Compute the cost of various addressing modes. */
3364 acost = 0;
d4ebfa65
BE
3365 reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3366 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
975626a7 3367
cd047059 3368 if (USE_LOAD_PRE_DECREMENT (mem_mode)
8875e939 3369 || USE_STORE_PRE_DECREMENT (mem_mode))
2c08497a 3370 {
d4ebfa65 3371 addr = gen_rtx_PRE_DEC (address_mode, reg0);
09e881c9
BE
3372 has_predec[mem_mode]
3373 = memory_address_addr_space_p (mem_mode, addr, as);
cd047059
BC
3374
3375 if (has_predec[mem_mode])
3376 data->ainc_costs[AINC_PRE_DEC]
3377 = address_cost (addr, mem_mode, as, speed);
2c08497a 3378 }
cd047059 3379 if (USE_LOAD_POST_DECREMENT (mem_mode)
8875e939 3380 || USE_STORE_POST_DECREMENT (mem_mode))
2c08497a 3381 {
d4ebfa65 3382 addr = gen_rtx_POST_DEC (address_mode, reg0);
09e881c9
BE
3383 has_postdec[mem_mode]
3384 = memory_address_addr_space_p (mem_mode, addr, as);
cd047059
BC
3385
3386 if (has_postdec[mem_mode])
3387 data->ainc_costs[AINC_POST_DEC]
3388 = address_cost (addr, mem_mode, as, speed);
2c08497a 3389 }
cd047059 3390 if (USE_LOAD_PRE_INCREMENT (mem_mode)
8875e939 3391 || USE_STORE_PRE_DECREMENT (mem_mode))
2c08497a 3392 {
d4ebfa65 3393 addr = gen_rtx_PRE_INC (address_mode, reg0);
09e881c9
BE
3394 has_preinc[mem_mode]
3395 = memory_address_addr_space_p (mem_mode, addr, as);
cd047059
BC
3396
3397 if (has_preinc[mem_mode])
3398 data->ainc_costs[AINC_PRE_INC]
3399 = address_cost (addr, mem_mode, as, speed);
2c08497a 3400 }
cd047059 3401 if (USE_LOAD_POST_INCREMENT (mem_mode)
8875e939 3402 || USE_STORE_POST_INCREMENT (mem_mode))
2c08497a 3403 {
d4ebfa65 3404 addr = gen_rtx_POST_INC (address_mode, reg0);
09e881c9
BE
3405 has_postinc[mem_mode]
3406 = memory_address_addr_space_p (mem_mode, addr, as);
cd047059
BC
3407
3408 if (has_postinc[mem_mode])
3409 data->ainc_costs[AINC_POST_INC]
3410 = address_cost (addr, mem_mode, as, speed);
2c08497a 3411 }
975626a7
ZD
3412 for (i = 0; i < 16; i++)
3413 {
3414 sym_p = i & 1;
3415 var_p = (i >> 1) & 1;
3416 off_p = (i >> 2) & 1;
3417 rat_p = (i >> 3) & 1;
3418
3419 addr = reg0;
3420 if (rat_p)
d4ebfa65
BE
3421 addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
3422 gen_int_mode (rat, address_mode));
975626a7
ZD
3423
3424 if (var_p)
d4ebfa65 3425 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
975626a7
ZD
3426
3427 if (sym_p)
3428 {
d4ebfa65 3429 base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
8318b0d9 3430 /* ??? We can run into trouble with some backends by presenting
fa10beec 3431 it with symbols which haven't been properly passed through
8318b0d9
RH
3432 targetm.encode_section_info. By setting the local bit, we
3433 enhance the probability of things working. */
3434 SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3435
975626a7 3436 if (off_p)
d4ebfa65 3437 base = gen_rtx_fmt_e (CONST, address_mode,
09e881c9 3438 gen_rtx_fmt_ee
d4ebfa65
BE
3439 (PLUS, address_mode, base,
3440 gen_int_mode (off, address_mode)));
975626a7
ZD
3441 }
3442 else if (off_p)
d4ebfa65 3443 base = gen_int_mode (off, address_mode);
975626a7
ZD
3444 else
3445 base = NULL_RTX;
b8698a0f 3446
975626a7 3447 if (base)
d4ebfa65 3448 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
2c08497a 3449
975626a7
ZD
3450 start_sequence ();
3451 /* To avoid splitting addressing modes, pretend that no cse will
3452 follow. */
3453 old_cse_not_expected = cse_not_expected;
3454 cse_not_expected = true;
09e881c9 3455 addr = memory_address_addr_space (mem_mode, addr, as);
975626a7
ZD
3456 cse_not_expected = old_cse_not_expected;
3457 seq = get_insns ();
3458 end_sequence ();
3459
f40751dd 3460 acost = seq_cost (seq, speed);
09e881c9 3461 acost += address_cost (addr, mem_mode, as, speed);
975626a7
ZD
3462
3463 if (!acost)
3464 acost = 1;
09e881c9 3465 data->costs[sym_p][var_p][off_p][rat_p] = acost;
975626a7
ZD
3466 }
3467
3468 /* On some targets, it is quite expensive to load symbol to a register,
3469 which makes addresses that contain symbols look much more expensive.
3470 However, the symbol will have to be loaded in any case before the
3471 loop (and quite likely we have it in register already), so it does not
3472 make much sense to penalize them too heavily. So make some final
3473 tweaks for the SYMBOL_PRESENT modes:
3474
3475 If VAR_PRESENT is false, and the mode obtained by changing symbol to
3476 var is cheaper, use this mode with small penalty.
3477 If VAR_PRESENT is true, try whether the mode with
3478 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3479 if this is the case, use it. */
5322d07e 3480 add_c = add_cost (speed, address_mode);
975626a7
ZD
3481 for (i = 0; i < 8; i++)
3482 {
3483 var_p = i & 1;
3484 off_p = (i >> 1) & 1;
3485 rat_p = (i >> 2) & 1;
3486
09e881c9 3487 acost = data->costs[0][1][off_p][rat_p] + 1;
975626a7
ZD
3488 if (var_p)
3489 acost += add_c;
3490
09e881c9
BE
3491 if (acost < data->costs[1][var_p][off_p][rat_p])
3492 data->costs[1][var_p][off_p][rat_p] = acost;
975626a7 3493 }
2c08497a 3494
975626a7
ZD
3495 if (dump_file && (dump_flags & TDF_DETAILS))
3496 {
3497 fprintf (dump_file, "Address costs:\n");
b8698a0f 3498
975626a7
ZD
3499 for (i = 0; i < 16; i++)
3500 {
3501 sym_p = i & 1;
3502 var_p = (i >> 1) & 1;
3503 off_p = (i >> 2) & 1;
3504 rat_p = (i >> 3) & 1;
3505
3506 fprintf (dump_file, " ");
3507 if (sym_p)
3508 fprintf (dump_file, "sym + ");
3509 if (var_p)
3510 fprintf (dump_file, "var + ");
3511 if (off_p)
3512 fprintf (dump_file, "cst + ");
3513 if (rat_p)
3514 fprintf (dump_file, "rat * ");
3515
09e881c9 3516 acost = data->costs[sym_p][var_p][off_p][rat_p];
975626a7
ZD
3517 fprintf (dump_file, "index costs %d\n", acost);
3518 }
2c08497a
BS
3519 if (has_predec[mem_mode] || has_postdec[mem_mode]
3520 || has_preinc[mem_mode] || has_postinc[mem_mode])
3521 fprintf (dump_file, " May include autoinc/dec\n");
975626a7
ZD
3522 fprintf (dump_file, "\n");
3523 }
09e881c9 3524
9771b263 3525 address_cost_data_list[data_index] = data;
8b11a64c
ZD
3526 }
3527
d4ebfa65 3528 bits = GET_MODE_BITSIZE (address_mode);
8b11a64c
ZD
3529 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3530 offset &= mask;
3531 if ((offset >> (bits - 1) & 1))
3532 offset |= ~mask;
3533 s_offset = offset;
3534
2c08497a 3535 autoinc = false;
cd047059 3536 autoinc_type = AINC_NONE;
2c08497a
BS
3537 msize = GET_MODE_SIZE (mem_mode);
3538 autoinc_offset = offset;
3539 if (stmt_after_inc)
3540 autoinc_offset += ratio * cstep;
3541 if (symbol_present || var_present || ratio != 1)
3542 autoinc = false;
cd047059
BC
3543 else
3544 {
3545 if (has_postinc[mem_mode] && autoinc_offset == 0
3546 && msize == cstep)
3547 autoinc_type = AINC_POST_INC;
3548 else if (has_postdec[mem_mode] && autoinc_offset == 0
2c08497a 3549 && msize == -cstep)
cd047059
BC
3550 autoinc_type = AINC_POST_DEC;
3551 else if (has_preinc[mem_mode] && autoinc_offset == msize
2c08497a 3552 && msize == cstep)
cd047059
BC
3553 autoinc_type = AINC_PRE_INC;
3554 else if (has_predec[mem_mode] && autoinc_offset == -msize
3555 && msize == -cstep)
3556 autoinc_type = AINC_PRE_DEC;
3557
3558 if (autoinc_type != AINC_NONE)
3559 autoinc = true;
3560 }
2c08497a 3561
8b11a64c 3562 cost = 0;
a85a9024 3563 offset_p = (s_offset != 0
09e881c9
BE
3564 && data->min_offset <= s_offset
3565 && s_offset <= data->max_offset);
8b11a64c 3566 ratio_p = (ratio != 1
09e881c9 3567 && multiplier_allowed_in_address_p (ratio, mem_mode, as));
8b11a64c
ZD
3568
3569 if (ratio != 1 && !ratio_p)
6dd8f4bb 3570 cost += mult_by_coeff_cost (ratio, address_mode, speed);
8b11a64c
ZD
3571
3572 if (s_offset && !offset_p && !symbol_present)
5322d07e 3573 cost += add_cost (speed, address_mode);
8b11a64c 3574
2c08497a
BS
3575 if (may_autoinc)
3576 *may_autoinc = autoinc;
cd047059
BC
3577 if (autoinc)
3578 acost = data->ainc_costs[autoinc_type];
3579 else
3580 acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
6e8c65f6
ZD
3581 complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3582 return new_cost (cost + acost, complexity);
8b11a64c 3583}
8b11a64c 3584
e6450c11
TV
3585 /* Calculate the SPEED or size cost of shiftadd EXPR in MODE. MULT is the
3586 the EXPR operand holding the shift. COST0 and COST1 are the costs for
3587 calculating the operands of EXPR. Returns true if successful, and returns
3588 the cost in COST. */
3589
3590static bool
ef4bddc2 3591get_shiftadd_cost (tree expr, machine_mode mode, comp_cost cost0,
e6450c11
TV
3592 comp_cost cost1, tree mult, bool speed, comp_cost *cost)
3593{
3594 comp_cost res;
3595 tree op1 = TREE_OPERAND (expr, 1);
3596 tree cst = TREE_OPERAND (mult, 1);
3597 tree multop = TREE_OPERAND (mult, 0);
3598 int m = exact_log2 (int_cst_value (cst));
3599 int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
3600 int sa_cost;
1a1a5534 3601 bool equal_p = false;
e6450c11
TV
3602
3603 if (!(m >= 0 && m < maxm))
3604 return false;
3605
1a1a5534
BC
3606 if (operand_equal_p (op1, mult, 0))
3607 equal_p = true;
3608
e6450c11 3609 sa_cost = (TREE_CODE (expr) != MINUS_EXPR
5322d07e 3610 ? shiftadd_cost (speed, mode, m)
1a1a5534 3611 : (equal_p
5322d07e
NF
3612 ? shiftsub1_cost (speed, mode, m)
3613 : shiftsub0_cost (speed, mode, m)));
e6450c11 3614 res = new_cost (sa_cost, 0);
1a1a5534 3615 res = add_costs (res, equal_p ? cost0 : cost1);
e6450c11
TV
3616
3617 STRIP_NOPS (multop);
3618 if (!is_gimple_val (multop))
3619 res = add_costs (res, force_expr_to_var_cost (multop, speed));
3620
3621 *cost = res;
3622 return true;
3623}
3624
3ac01fde
ZD
3625/* Estimates cost of forcing expression EXPR into a variable. */
3626
6e8c65f6 3627static comp_cost
f40751dd 3628force_expr_to_var_cost (tree expr, bool speed)
8b11a64c
ZD
3629{
3630 static bool costs_initialized = false;
f40751dd
JH
3631 static unsigned integer_cost [2];
3632 static unsigned symbol_cost [2];
3633 static unsigned address_cost [2];
7299dbfb 3634 tree op0, op1;
6e8c65f6 3635 comp_cost cost0, cost1, cost;
ef4bddc2 3636 machine_mode mode;
8b11a64c
ZD
3637
3638 if (!costs_initialized)
3639 {
8b11a64c 3640 tree type = build_pointer_type (integer_type_node);
8318b0d9
RH
3641 tree var, addr;
3642 rtx x;
f40751dd 3643 int i;
8318b0d9
RH
3644
3645 var = create_tmp_var_raw (integer_type_node, "test_var");
3646 TREE_STATIC (var) = 1;
3647 x = produce_memory_decl_rtl (var, NULL);
3648 SET_DECL_RTL (var, x);
8b11a64c 3649
8b11a64c 3650 addr = build1 (ADDR_EXPR, type, var);
8b11a64c 3651
f40751dd
JH
3652
3653 for (i = 0; i < 2; i++)
8b11a64c 3654 {
f40751dd
JH
3655 integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
3656 2000), i);
3657
3658 symbol_cost[i] = computation_cost (addr, i) + 1;
3659
3660 address_cost[i]
5d49b6a7 3661 = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
f40751dd
JH
3662 if (dump_file && (dump_flags & TDF_DETAILS))
3663 {
3664 fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
3665 fprintf (dump_file, " integer %d\n", (int) integer_cost[i]);
3666 fprintf (dump_file, " symbol %d\n", (int) symbol_cost[i]);
3667 fprintf (dump_file, " address %d\n", (int) address_cost[i]);
3668 fprintf (dump_file, " other %d\n", (int) target_spill_cost[i]);
3669 fprintf (dump_file, "\n");
3670 }
8b11a64c
ZD
3671 }
3672
3673 costs_initialized = true;
3674 }
3675
f5e2738c
ZD
3676 STRIP_NOPS (expr);
3677
8b11a64c 3678 if (SSA_VAR_P (expr))
7735d6c7 3679 return no_cost;
8b11a64c 3680
ad6003f2 3681 if (is_gimple_min_invariant (expr))
8b11a64c
ZD
3682 {
3683 if (TREE_CODE (expr) == INTEGER_CST)
f40751dd 3684 return new_cost (integer_cost [speed], 0);
8b11a64c
ZD
3685
3686 if (TREE_CODE (expr) == ADDR_EXPR)
3687 {
3688 tree obj = TREE_OPERAND (expr, 0);
3689
3690 if (TREE_CODE (obj) == VAR_DECL
3691 || TREE_CODE (obj) == PARM_DECL
3692 || TREE_CODE (obj) == RESULT_DECL)
f40751dd 3693 return new_cost (symbol_cost [speed], 0);
8b11a64c
ZD
3694 }
3695
f40751dd 3696 return new_cost (address_cost [speed], 0);
8b11a64c
ZD
3697 }
3698
7299dbfb
ZD
3699 switch (TREE_CODE (expr))
3700 {
5be014d5 3701 case POINTER_PLUS_EXPR:
7299dbfb
ZD
3702 case PLUS_EXPR:
3703 case MINUS_EXPR:
3704 case MULT_EXPR:
3705 op0 = TREE_OPERAND (expr, 0);
3706 op1 = TREE_OPERAND (expr, 1);
f5e2738c
ZD
3707 STRIP_NOPS (op0);
3708 STRIP_NOPS (op1);
7299dbfb
ZD
3709 break;
3710
801a661c 3711 CASE_CONVERT:
7a2faca1
EB
3712 case NEGATE_EXPR:
3713 op0 = TREE_OPERAND (expr, 0);
3714 STRIP_NOPS (op0);
3715 op1 = NULL_TREE;
7a2faca1
EB
3716 break;
3717
7299dbfb
ZD
3718 default:
3719 /* Just an arbitrary value, FIXME. */
f40751dd 3720 return new_cost (target_spill_cost[speed], 0);
7299dbfb
ZD
3721 }
3722
801a661c
BC
3723 if (op0 == NULL_TREE
3724 || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
3725 cost0 = no_cost;
3726 else
3727 cost0 = force_expr_to_var_cost (op0, speed);
3728
3729 if (op1 == NULL_TREE
3730 || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
3731 cost1 = no_cost;
3732 else
3733 cost1 = force_expr_to_var_cost (op1, speed);
3734
7299dbfb
ZD
3735 mode = TYPE_MODE (TREE_TYPE (expr));
3736 switch (TREE_CODE (expr))
3737 {
5be014d5 3738 case POINTER_PLUS_EXPR:
7299dbfb
ZD
3739 case PLUS_EXPR:
3740 case MINUS_EXPR:
7a2faca1 3741 case NEGATE_EXPR:
5322d07e 3742 cost = new_cost (add_cost (speed, mode), 0);
e6450c11
TV
3743 if (TREE_CODE (expr) != NEGATE_EXPR)
3744 {
3745 tree mult = NULL_TREE;
3746 comp_cost sa_cost;
3747 if (TREE_CODE (op1) == MULT_EXPR)
3748 mult = op1;
3749 else if (TREE_CODE (op0) == MULT_EXPR)
3750 mult = op0;
3751
3752 if (mult != NULL_TREE
50ad7db2 3753 && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
5322d07e
NF
3754 && get_shiftadd_cost (expr, mode, cost0, cost1, mult,
3755 speed, &sa_cost))
e6450c11
TV
3756 return sa_cost;
3757 }
7299dbfb
ZD
3758 break;
3759
801a661c
BC
3760 CASE_CONVERT:
3761 {
3762 tree inner_mode, outer_mode;
3763 outer_mode = TREE_TYPE (expr);
3764 inner_mode = TREE_TYPE (op0);
3765 cost = new_cost (convert_cost (TYPE_MODE (outer_mode),
3766 TYPE_MODE (inner_mode), speed), 0);
3767 }
3768 break;
3769
7299dbfb
ZD
3770 case MULT_EXPR:
3771 if (cst_and_fits_in_hwi (op0))
6dd8f4bb
BS
3772 cost = new_cost (mult_by_coeff_cost (int_cst_value (op0),
3773 mode, speed), 0);
b8698a0f 3774 else if (cst_and_fits_in_hwi (op1))
6dd8f4bb
BS
3775 cost = new_cost (mult_by_coeff_cost (int_cst_value (op1),
3776 mode, speed), 0);
7299dbfb 3777 else
f40751dd 3778 return new_cost (target_spill_cost [speed], 0);
7299dbfb
ZD
3779 break;
3780
3781 default:
3782 gcc_unreachable ();
3783 }
3784
6e8c65f6
ZD
3785 cost = add_costs (cost, cost0);
3786 cost = add_costs (cost, cost1);
7299dbfb
ZD
3787
3788 /* Bound the cost by target_spill_cost. The parts of complicated
3789 computations often are either loop invariant or at least can
3790 be shared between several iv uses, so letting this grow without
3791 limits would not give reasonable results. */
2c08497a 3792 if (cost.cost > (int) target_spill_cost [speed])
f40751dd 3793 cost.cost = target_spill_cost [speed];
6e8c65f6
ZD
3794
3795 return cost;
8b11a64c
ZD
3796}
3797
3ac01fde
ZD
3798/* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
3799 invariants the computation depends on. */
3800
6e8c65f6 3801static comp_cost
3ac01fde
ZD
3802force_var_cost (struct ivopts_data *data,
3803 tree expr, bitmap *depends_on)
3804{
3805 if (depends_on)
3806 {
3807 fd_ivopts_data = data;
3808 walk_tree (&expr, find_depends, depends_on, NULL);
3809 }
3810
f40751dd 3811 return force_expr_to_var_cost (expr, data->speed);
3ac01fde
ZD
3812}
3813
8b11a64c
ZD
3814/* Estimates cost of expressing address ADDR as var + symbol + offset. The
3815 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3816 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3817 invariants the computation depends on. */
3818
6e8c65f6 3819static comp_cost
8b11a64c
ZD
3820split_address_cost (struct ivopts_data *data,
3821 tree addr, bool *symbol_present, bool *var_present,
3822 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3823{
2f4675b4
ZD
3824 tree core;
3825 HOST_WIDE_INT bitsize;
3826 HOST_WIDE_INT bitpos;
3827 tree toffset;
ef4bddc2 3828 machine_mode mode;
2f4675b4 3829 int unsignedp, volatilep;
b8698a0f 3830
2f4675b4 3831 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
b3ecff82 3832 &unsignedp, &volatilep, false);
8b11a64c 3833
2f4675b4
ZD
3834 if (toffset != 0
3835 || bitpos % BITS_PER_UNIT != 0
3836 || TREE_CODE (core) != VAR_DECL)
8b11a64c
ZD
3837 {
3838 *symbol_present = false;
3839 *var_present = true;
3840 fd_ivopts_data = data;
3841 walk_tree (&addr, find_depends, depends_on, NULL);
f40751dd 3842 return new_cost (target_spill_cost[data->speed], 0);
2f4675b4
ZD
3843 }
3844
3845 *offset += bitpos / BITS_PER_UNIT;
8b11a64c
ZD
3846 if (TREE_STATIC (core)
3847 || DECL_EXTERNAL (core))
3848 {
3849 *symbol_present = true;
3850 *var_present = false;
7735d6c7 3851 return no_cost;
8b11a64c 3852 }
b8698a0f 3853
8b11a64c
ZD
3854 *symbol_present = false;
3855 *var_present = true;
7735d6c7 3856 return no_cost;
8b11a64c
ZD
3857}
3858
3859/* Estimates cost of expressing difference of addresses E1 - E2 as
3860 var + symbol + offset. The value of offset is added to OFFSET,
3861 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3862 part is missing. DEPENDS_ON is a set of the invariants the computation
3863 depends on. */
3864
6e8c65f6 3865static comp_cost
8b11a64c
ZD
3866ptr_difference_cost (struct ivopts_data *data,
3867 tree e1, tree e2, bool *symbol_present, bool *var_present,
3868 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3869{
2f4675b4 3870 HOST_WIDE_INT diff = 0;
7a2faca1
EB
3871 aff_tree aff_e1, aff_e2;
3872 tree type;
8b11a64c 3873
1e128c5f 3874 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
8b11a64c 3875
7299dbfb 3876 if (ptr_difference_const (e1, e2, &diff))
8b11a64c
ZD
3877 {
3878 *offset += diff;
3879 *symbol_present = false;
3880 *var_present = false;
7735d6c7 3881 return no_cost;
8b11a64c
ZD
3882 }
3883
6e8c65f6 3884 if (integer_zerop (e2))
8b11a64c
ZD
3885 return split_address_cost (data, TREE_OPERAND (e1, 0),
3886 symbol_present, var_present, offset, depends_on);
3887
3888 *symbol_present = false;
3889 *var_present = true;
8b11a64c 3890
7a2faca1
EB
3891 type = signed_type_for (TREE_TYPE (e1));
3892 tree_to_aff_combination (e1, type, &aff_e1);
3893 tree_to_aff_combination (e2, type, &aff_e2);
807e902e 3894 aff_combination_scale (&aff_e2, -1);
7a2faca1
EB
3895 aff_combination_add (&aff_e1, &aff_e2);
3896
3897 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
8b11a64c
ZD
3898}
3899
3900/* Estimates cost of expressing difference E1 - E2 as
3901 var + symbol + offset. The value of offset is added to OFFSET,
3902 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3903 part is missing. DEPENDS_ON is a set of the invariants the computation
3904 depends on. */
3905
6e8c65f6 3906static comp_cost
8b11a64c
ZD
3907difference_cost (struct ivopts_data *data,
3908 tree e1, tree e2, bool *symbol_present, bool *var_present,
3909 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3910{
ef4bddc2 3911 machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
f5e2738c 3912 unsigned HOST_WIDE_INT off1, off2;
7a2faca1
EB
3913 aff_tree aff_e1, aff_e2;
3914 tree type;
f5e2738c 3915
9be872b7
ZD
3916 e1 = strip_offset (e1, &off1);
3917 e2 = strip_offset (e2, &off2);
f5e2738c 3918 *offset += off1 - off2;
8b11a64c 3919
f5e2738c
ZD
3920 STRIP_NOPS (e1);
3921 STRIP_NOPS (e2);
8b11a64c
ZD
3922
3923 if (TREE_CODE (e1) == ADDR_EXPR)
7a2faca1
EB
3924 return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
3925 offset, depends_on);
8b11a64c
ZD
3926 *symbol_present = false;
3927
3928 if (operand_equal_p (e1, e2, 0))
3929 {
3930 *var_present = false;
7735d6c7 3931 return no_cost;
8b11a64c 3932 }
7a2faca1 3933
8b11a64c 3934 *var_present = true;
7a2faca1 3935
6e682d7e 3936 if (integer_zerop (e2))
8b11a64c
ZD
3937 return force_var_cost (data, e1, depends_on);
3938
6e682d7e 3939 if (integer_zerop (e1))
8b11a64c 3940 {
7a2faca1 3941 comp_cost cost = force_var_cost (data, e2, depends_on);
6dd8f4bb 3942 cost.cost += mult_by_coeff_cost (-1, mode, data->speed);
8b11a64c
ZD
3943 return cost;
3944 }
3945
7a2faca1
EB
3946 type = signed_type_for (TREE_TYPE (e1));
3947 tree_to_aff_combination (e1, type, &aff_e1);
3948 tree_to_aff_combination (e2, type, &aff_e2);
807e902e 3949 aff_combination_scale (&aff_e2, -1);
7a2faca1 3950 aff_combination_add (&aff_e1, &aff_e2);
8b11a64c 3951
7a2faca1 3952 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
8b11a64c
ZD
3953}
3954
18081149
XDL
3955/* Returns true if AFF1 and AFF2 are identical. */
3956
3957static bool
3958compare_aff_trees (aff_tree *aff1, aff_tree *aff2)
3959{
3960 unsigned i;
3961
3962 if (aff1->n != aff2->n)
3963 return false;
3964
3965 for (i = 0; i < aff1->n; i++)
3966 {
27bcd47c 3967 if (aff1->elts[i].coef != aff2->elts[i].coef)
18081149
XDL
3968 return false;
3969
3970 if (!operand_equal_p (aff1->elts[i].val, aff2->elts[i].val, 0))
3971 return false;
3972 }
3973 return true;
3974}
3975
bb8d292d
TV
3976/* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id. */
3977
3978static int
3979get_expr_id (struct ivopts_data *data, tree expr)
3980{
3981 struct iv_inv_expr_ent ent;
3982 struct iv_inv_expr_ent **slot;
3983
3984 ent.expr = expr;
3985 ent.hash = iterative_hash_expr (expr, 0);
c203e8a7 3986 slot = data->inv_expr_tab->find_slot (&ent, INSERT);
bb8d292d
TV
3987 if (*slot)
3988 return (*slot)->id;
3989
3990 *slot = XNEW (struct iv_inv_expr_ent);
3991 (*slot)->expr = expr;
3992 (*slot)->hash = ent.hash;
3993 (*slot)->id = data->inv_expr_id++;
3994 return (*slot)->id;
3995}
3996
18081149
XDL
3997/* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
3998 requires a new compiler generated temporary. Returns -1 otherwise.
3999 ADDRESS_P is a flag indicating if the expression is for address
4000 computation. */
4001
4002static int
4003get_loop_invariant_expr_id (struct ivopts_data *data, tree ubase,
4004 tree cbase, HOST_WIDE_INT ratio,
4005 bool address_p)
4006{
4007 aff_tree ubase_aff, cbase_aff;
4008 tree expr, ub, cb;
18081149
XDL
4009
4010 STRIP_NOPS (ubase);
4011 STRIP_NOPS (cbase);
4012 ub = ubase;
4013 cb = cbase;
4014
4015 if ((TREE_CODE (ubase) == INTEGER_CST)
4016 && (TREE_CODE (cbase) == INTEGER_CST))
4017 return -1;
4018
4019 /* Strips the constant part. */
4020 if (TREE_CODE (ubase) == PLUS_EXPR
4021 || TREE_CODE (ubase) == MINUS_EXPR
4022 || TREE_CODE (ubase) == POINTER_PLUS_EXPR)
4023 {
4024 if (TREE_CODE (TREE_OPERAND (ubase, 1)) == INTEGER_CST)
4025 ubase = TREE_OPERAND (ubase, 0);
4026 }
4027
4028 /* Strips the constant part. */
4029 if (TREE_CODE (cbase) == PLUS_EXPR
4030 || TREE_CODE (cbase) == MINUS_EXPR
4031 || TREE_CODE (cbase) == POINTER_PLUS_EXPR)
4032 {
4033 if (TREE_CODE (TREE_OPERAND (cbase, 1)) == INTEGER_CST)
4034 cbase = TREE_OPERAND (cbase, 0);
4035 }
4036
4037 if (address_p)
4038 {
4039 if (((TREE_CODE (ubase) == SSA_NAME)
4040 || (TREE_CODE (ubase) == ADDR_EXPR
4041 && is_gimple_min_invariant (ubase)))
4042 && (TREE_CODE (cbase) == INTEGER_CST))
4043 return -1;
4044
4045 if (((TREE_CODE (cbase) == SSA_NAME)
4046 || (TREE_CODE (cbase) == ADDR_EXPR
4047 && is_gimple_min_invariant (cbase)))
4048 && (TREE_CODE (ubase) == INTEGER_CST))
4049 return -1;
4050 }
4051
4052 if (ratio == 1)
4053 {
c3284718 4054 if (operand_equal_p (ubase, cbase, 0))
18081149
XDL
4055 return -1;
4056
4057 if (TREE_CODE (ubase) == ADDR_EXPR
4058 && TREE_CODE (cbase) == ADDR_EXPR)
4059 {
4060 tree usym, csym;
4061
4062 usym = TREE_OPERAND (ubase, 0);
4063 csym = TREE_OPERAND (cbase, 0);
4064 if (TREE_CODE (usym) == ARRAY_REF)
4065 {
4066 tree ind = TREE_OPERAND (usym, 1);
4067 if (TREE_CODE (ind) == INTEGER_CST
9541ffee 4068 && tree_fits_shwi_p (ind)
eb1ce453 4069 && tree_to_shwi (ind) == 0)
18081149
XDL
4070 usym = TREE_OPERAND (usym, 0);
4071 }
4072 if (TREE_CODE (csym) == ARRAY_REF)
4073 {
4074 tree ind = TREE_OPERAND (csym, 1);
4075 if (TREE_CODE (ind) == INTEGER_CST
9541ffee 4076 && tree_fits_shwi_p (ind)
eb1ce453 4077 && tree_to_shwi (ind) == 0)
18081149
XDL
4078 csym = TREE_OPERAND (csym, 0);
4079 }
4080 if (operand_equal_p (usym, csym, 0))
4081 return -1;
4082 }
4083 /* Now do more complex comparison */
4084 tree_to_aff_combination (ubase, TREE_TYPE (ubase), &ubase_aff);
4085 tree_to_aff_combination (cbase, TREE_TYPE (cbase), &cbase_aff);
4086 if (compare_aff_trees (&ubase_aff, &cbase_aff))
4087 return -1;
4088 }
4089
4090 tree_to_aff_combination (ub, TREE_TYPE (ub), &ubase_aff);
4091 tree_to_aff_combination (cb, TREE_TYPE (cb), &cbase_aff);
4092
807e902e 4093 aff_combination_scale (&cbase_aff, -1 * ratio);
18081149
XDL
4094 aff_combination_add (&ubase_aff, &cbase_aff);
4095 expr = aff_combination_to_tree (&ubase_aff);
bb8d292d 4096 return get_expr_id (data, expr);
18081149
XDL
4097}
4098
4099
4100
8b11a64c
ZD
4101/* Determines the cost of the computation by that USE is expressed
4102 from induction variable CAND. If ADDRESS_P is true, we just need
4103 to create an address from it, otherwise we want to get it into
4104 register. A set of invariants we depend on is stored in
2c08497a
BS
4105 DEPENDS_ON. AT is the statement at that the value is computed.
4106 If CAN_AUTOINC is nonnull, use it to record whether autoinc
4107 addressing is likely. */
8b11a64c 4108
6e8c65f6 4109static comp_cost
8b11a64c
ZD
4110get_computation_cost_at (struct ivopts_data *data,
4111 struct iv_use *use, struct iv_cand *cand,
2c08497a 4112 bool address_p, bitmap *depends_on, gimple at,
18081149
XDL
4113 bool *can_autoinc,
4114 int *inv_expr_id)
8b11a64c
ZD
4115{
4116 tree ubase = use->iv->base, ustep = use->iv->step;
4117 tree cbase, cstep;
4118 tree utype = TREE_TYPE (ubase), ctype;
73f30c63 4119 unsigned HOST_WIDE_INT cstepi, offset = 0;
8b11a64c 4120 HOST_WIDE_INT ratio, aratio;
2c08497a 4121 bool var_present, symbol_present, stmt_is_after_inc;
6e8c65f6 4122 comp_cost cost;
807e902e 4123 widest_int rat;
f40751dd 4124 bool speed = optimize_bb_for_speed_p (gimple_bb (at));
ef4bddc2 4125 machine_mode mem_mode = (address_p
0c604a61
TV
4126 ? TYPE_MODE (TREE_TYPE (*use->op_p))
4127 : VOIDmode);
8b11a64c
ZD
4128
4129 *depends_on = NULL;
4130
4131 /* Only consider real candidates. */
4132 if (!cand->iv)
6e8c65f6 4133 return infinite_cost;
8b11a64c
ZD
4134
4135 cbase = cand->iv->base;
4136 cstep = cand->iv->step;
4137 ctype = TREE_TYPE (cbase);
4138
4139 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4140 {
4141 /* We do not have a precision to express the values of use. */
6e8c65f6 4142 return infinite_cost;
8b11a64c
ZD
4143 }
4144
1d30a09a
RG
4145 if (address_p
4146 || (use->iv->base_object
4147 && cand->iv->base_object
4148 && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4149 && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
e6845c23
ZD
4150 {
4151 /* Do not try to express address of an object with computation based
4152 on address of a different object. This may cause problems in rtl
4153 level alias analysis (that does not expect this to be happening,
4154 as this is illegal in C), and would be unlikely to be useful
4155 anyway. */
4156 if (use->iv->base_object
4157 && cand->iv->base_object
4158 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
6e8c65f6 4159 return infinite_cost;
e6845c23
ZD
4160 }
4161
7a2faca1 4162 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
8b11a64c
ZD
4163 {
4164 /* TODO -- add direct handling of this case. */
4165 goto fallback;
4166 }
4167
9be872b7
ZD
4168 /* CSTEPI is removed from the offset in case statement is after the
4169 increment. If the step is not constant, we use zero instead.
1e1def1c 4170 This is a bit imprecise (there is the extra addition), but
9be872b7
ZD
4171 redundancy elimination is likely to transform the code so that
4172 it uses value of the variable before increment anyway,
4173 so it is not that much unrealistic. */
4174 if (cst_and_fits_in_hwi (cstep))
4175 cstepi = int_cst_value (cstep);
4176 else
4177 cstepi = 0;
4178
73f30c63 4179 if (!constant_multiple_of (ustep, cstep, &rat))
6e8c65f6 4180 return infinite_cost;
b8698a0f 4181
807e902e 4182 if (wi::fits_shwi_p (rat))
27bcd47c 4183 ratio = rat.to_shwi ();
73f30c63 4184 else
6e8c65f6 4185 return infinite_cost;
8b11a64c 4186
7a2faca1
EB
4187 STRIP_NOPS (cbase);
4188 ctype = TREE_TYPE (cbase);
4189
2fa692c0
XDL
4190 stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
4191
8b11a64c
ZD
4192 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
4193 or ratio == 1, it is better to handle this like
b8698a0f 4194
8b11a64c 4195 ubase - ratio * cbase + ratio * var
b8698a0f 4196
8b11a64c
ZD
4197 (also holds in the case ratio == -1, TODO. */
4198
9be872b7 4199 if (cst_and_fits_in_hwi (cbase))
8b11a64c 4200 {
d7ca26e4 4201 offset = - ratio * (unsigned HOST_WIDE_INT) int_cst_value (cbase);
6e8c65f6
ZD
4202 cost = difference_cost (data,
4203 ubase, build_int_cst (utype, 0),
4204 &symbol_present, &var_present, &offset,
4205 depends_on);
18081149 4206 cost.cost /= avg_loop_niter (data->current_loop);
8b11a64c
ZD
4207 }
4208 else if (ratio == 1)
4209 {
2fa692c0
XDL
4210 tree real_cbase = cbase;
4211
4212 /* Check to see if any adjustment is needed. */
4213 if (cstepi == 0 && stmt_is_after_inc)
4214 {
4215 aff_tree real_cbase_aff;
4216 aff_tree cstep_aff;
4217
4218 tree_to_aff_combination (cbase, TREE_TYPE (real_cbase),
4219 &real_cbase_aff);
4220 tree_to_aff_combination (cstep, TREE_TYPE (cstep), &cstep_aff);
4221
4222 aff_combination_add (&real_cbase_aff, &cstep_aff);
4223 real_cbase = aff_combination_to_tree (&real_cbase_aff);
4224 }
4225
6e8c65f6 4226 cost = difference_cost (data,
2fa692c0 4227 ubase, real_cbase,
6e8c65f6
ZD
4228 &symbol_present, &var_present, &offset,
4229 depends_on);
18081149 4230 cost.cost /= avg_loop_niter (data->current_loop);
8b11a64c 4231 }
7a2faca1
EB
4232 else if (address_p
4233 && !POINTER_TYPE_P (ctype)
09e881c9 4234 && multiplier_allowed_in_address_p
0c604a61 4235 (ratio, mem_mode,
09e881c9 4236 TYPE_ADDR_SPACE (TREE_TYPE (utype))))
7a2faca1
EB
4237 {
4238 cbase
4239 = fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio));
4240 cost = difference_cost (data,
4241 ubase, cbase,
4242 &symbol_present, &var_present, &offset,
4243 depends_on);
18081149 4244 cost.cost /= avg_loop_niter (data->current_loop);
7a2faca1 4245 }
8b11a64c
ZD
4246 else
4247 {
6e8c65f6 4248 cost = force_var_cost (data, cbase, depends_on);
6e8c65f6
ZD
4249 cost = add_costs (cost,
4250 difference_cost (data,
4251 ubase, build_int_cst (utype, 0),
4252 &symbol_present, &var_present,
4253 &offset, depends_on));
18081149 4254 cost.cost /= avg_loop_niter (data->current_loop);
5322d07e 4255 cost.cost += add_cost (data->speed, TYPE_MODE (ctype));
18081149
XDL
4256 }
4257
4258 if (inv_expr_id)
4259 {
4260 *inv_expr_id =
4261 get_loop_invariant_expr_id (data, ubase, cbase, ratio, address_p);
4262 /* Clear depends on. */
4263 if (*inv_expr_id != -1 && depends_on && *depends_on)
4264 bitmap_clear (*depends_on);
8b11a64c
ZD
4265 }
4266
4267 /* If we are after the increment, the value of the candidate is higher by
4268 one iteration. */
2c08497a 4269 if (stmt_is_after_inc)
8b11a64c
ZD
4270 offset -= ratio * cstepi;
4271
4272 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
7a2faca1
EB
4273 (symbol/var1/const parts may be omitted). If we are looking for an
4274 address, find the cost of addressing this. */
8b11a64c 4275 if (address_p)
7a2faca1
EB
4276 return add_costs (cost,
4277 get_address_cost (symbol_present, var_present,
2c08497a 4278 offset, ratio, cstepi,
0c604a61 4279 mem_mode,
09e881c9 4280 TYPE_ADDR_SPACE (TREE_TYPE (utype)),
2c08497a
BS
4281 speed, stmt_is_after_inc,
4282 can_autoinc));
8b11a64c
ZD
4283
4284 /* Otherwise estimate the costs for computing the expression. */
8b11a64c
ZD
4285 if (!symbol_present && !var_present && !offset)
4286 {
4287 if (ratio != 1)
6dd8f4bb 4288 cost.cost += mult_by_coeff_cost (ratio, TYPE_MODE (ctype), speed);
8b11a64c
ZD
4289 return cost;
4290 }
4291
7a2faca1
EB
4292 /* Symbol + offset should be compile-time computable so consider that they
4293 are added once to the variable, if present. */
4294 if (var_present && (symbol_present || offset))
6521ac85 4295 cost.cost += adjust_setup_cost (data,
5322d07e 4296 add_cost (speed, TYPE_MODE (ctype)));
8b11a64c 4297
6e8c65f6
ZD
4298 /* Having offset does not affect runtime cost in case it is added to
4299 symbol, but it increases complexity. */
4300 if (offset)
4301 cost.complexity++;
4302
5322d07e 4303 cost.cost += add_cost (speed, TYPE_MODE (ctype));
7a2faca1
EB
4304
4305 aratio = ratio > 0 ? ratio : -ratio;
4306 if (aratio != 1)
6dd8f4bb 4307 cost.cost += mult_by_coeff_cost (aratio, TYPE_MODE (ctype), speed);
5914a70f 4308 return cost;
8b11a64c
ZD
4309
4310fallback:
2c08497a
BS
4311 if (can_autoinc)
4312 *can_autoinc = false;
4313
8b11a64c
ZD
4314 {
4315 /* Just get the expression, expand it and measure the cost. */
4316 tree comp = get_computation_at (data->current_loop, use, cand, at);
4317
4318 if (!comp)
6e8c65f6 4319 return infinite_cost;
8b11a64c
ZD
4320
4321 if (address_p)
70f34814 4322 comp = build_simple_mem_ref (comp);
8b11a64c 4323
f40751dd 4324 return new_cost (computation_cost (comp, speed), 0);
8b11a64c
ZD
4325 }
4326}
4327
4328/* Determines the cost of the computation by that USE is expressed
4329 from induction variable CAND. If ADDRESS_P is true, we just need
4330 to create an address from it, otherwise we want to get it into
4331 register. A set of invariants we depend on is stored in
2c08497a
BS
4332 DEPENDS_ON. If CAN_AUTOINC is nonnull, use it to record whether
4333 autoinc addressing is likely. */
8b11a64c 4334
6e8c65f6 4335static comp_cost
8b11a64c
ZD
4336get_computation_cost (struct ivopts_data *data,
4337 struct iv_use *use, struct iv_cand *cand,
18081149
XDL
4338 bool address_p, bitmap *depends_on,
4339 bool *can_autoinc, int *inv_expr_id)
8b11a64c
ZD
4340{
4341 return get_computation_cost_at (data,
2c08497a 4342 use, cand, address_p, depends_on, use->stmt,
18081149 4343 can_autoinc, inv_expr_id);
8b11a64c
ZD
4344}
4345
4346/* Determines cost of basing replacement of USE on CAND in a generic
4347 expression. */
4348
b1b02be2 4349static bool
8b11a64c
ZD
4350determine_use_iv_cost_generic (struct ivopts_data *data,
4351 struct iv_use *use, struct iv_cand *cand)
4352{
4353 bitmap depends_on;
6e8c65f6 4354 comp_cost cost;
18081149 4355 int inv_expr_id = -1;
eec5fec9
ZD
4356
4357 /* The simple case first -- if we need to express value of the preserved
4358 original biv, the cost is 0. This also prevents us from counting the
4359 cost of increment twice -- once at this use and once in the cost of
4360 the candidate. */
4361 if (cand->pos == IP_ORIGINAL
4362 && cand->incremented_at == use->stmt)
4363 {
7735d6c7 4364 set_use_iv_cost (data, use, cand, no_cost, NULL, NULL_TREE,
d8af4ba3 4365 ERROR_MARK, -1);
eec5fec9
ZD
4366 return true;
4367 }
8b11a64c 4368
18081149
XDL
4369 cost = get_computation_cost (data, use, cand, false, &depends_on,
4370 NULL, &inv_expr_id);
4371
d8af4ba3 4372 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
18081149 4373 inv_expr_id);
b1b02be2 4374
6e8c65f6 4375 return !infinite_cost_p (cost);
8b11a64c
ZD
4376}
4377
4378/* Determines cost of basing replacement of USE on CAND in an address. */
4379
b1b02be2 4380static bool
8b11a64c
ZD
4381determine_use_iv_cost_address (struct ivopts_data *data,
4382 struct iv_use *use, struct iv_cand *cand)
4383{
4384 bitmap depends_on;
2c08497a 4385 bool can_autoinc;
18081149 4386 int inv_expr_id = -1;
2c08497a 4387 comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
18081149 4388 &can_autoinc, &inv_expr_id);
8b11a64c 4389
2c08497a
BS
4390 if (cand->ainc_use == use)
4391 {
4392 if (can_autoinc)
4393 cost.cost -= cand->cost_step;
4394 /* If we generated the candidate solely for exploiting autoincrement
4395 opportunities, and it turns out it can't be used, set the cost to
4396 infinity to make sure we ignore it. */
4397 else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
4398 cost = infinite_cost;
4399 }
d8af4ba3 4400 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
18081149 4401 inv_expr_id);
b1b02be2 4402
6e8c65f6 4403 return !infinite_cost_p (cost);
8b11a64c
ZD
4404}
4405
7e2ac86c
ZD
4406/* Computes value of candidate CAND at position AT in iteration NITER, and
4407 stores it to VAL. */
8b11a64c 4408
7e2ac86c 4409static void
726a989a 4410cand_value_at (struct loop *loop, struct iv_cand *cand, gimple at, tree niter,
7e2ac86c 4411 aff_tree *val)
8b11a64c 4412{
7e2ac86c
ZD
4413 aff_tree step, delta, nit;
4414 struct iv *iv = cand->iv;
8b11a64c 4415 tree type = TREE_TYPE (iv->base);
1ffe34d9
AP
4416 tree steptype = type;
4417 if (POINTER_TYPE_P (type))
4418 steptype = sizetype;
d6adff07 4419 steptype = unsigned_type_for (type);
8b11a64c 4420
d6adff07
RB
4421 tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step);
4422 aff_combination_convert (&step, steptype);
7e2ac86c 4423 tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
1ffe34d9 4424 aff_combination_convert (&nit, steptype);
7e2ac86c 4425 aff_combination_mult (&nit, &step, &delta);
8b11a64c 4426 if (stmt_after_increment (loop, cand, at))
7e2ac86c 4427 aff_combination_add (&delta, &step);
8b11a64c 4428
7e2ac86c 4429 tree_to_aff_combination (iv->base, type, val);
d6adff07
RB
4430 if (!POINTER_TYPE_P (type))
4431 aff_combination_convert (val, steptype);
7e2ac86c 4432 aff_combination_add (val, &delta);
8b11a64c
ZD
4433}
4434
ca4c3169
ZD
4435/* Returns period of induction variable iv. */
4436
4437static tree
4438iv_period (struct iv *iv)
4439{
4440 tree step = iv->step, period, type;
4441 tree pow2div;
4442
4443 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4444
ca4c3169 4445 type = unsigned_type_for (TREE_TYPE (step));
e2102efc
XDL
4446 /* Period of the iv is lcm (step, type_range)/step -1,
4447 i.e., N*type_range/step - 1. Since type range is power
4448 of two, N == (step >> num_of_ending_zeros_binary (step),
4449 so the final result is
4450
4451 (type_range >> num_of_ending_zeros_binary (step)) - 1
4452
4453 */
4454 pow2div = num_ending_zeros (step);
ca4c3169
ZD
4455
4456 period = build_low_bits_mask (type,
e2102efc 4457 (TYPE_PRECISION (type)
ae7e9ddd 4458 - tree_to_uhwi (pow2div)));
ca4c3169
ZD
4459
4460 return period;
4461}
4462
f5f12961
ZD
4463/* Returns the comparison operator used when eliminating the iv USE. */
4464
4465static enum tree_code
4466iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4467{
4468 struct loop *loop = data->current_loop;
4469 basic_block ex_bb;
4470 edge exit;
4471
726a989a 4472 ex_bb = gimple_bb (use->stmt);
f5f12961
ZD
4473 exit = EDGE_SUCC (ex_bb, 0);
4474 if (flow_bb_inside_loop_p (loop, exit->dest))
4475 exit = EDGE_SUCC (ex_bb, 1);
4476
4477 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4478}
4479
d8af4ba3
ZD
4480/* Returns true if we can prove that BASE - OFFSET does not overflow. For now,
4481 we only detect the situation that BASE = SOMETHING + OFFSET, where the
4482 calculation is performed in non-wrapping type.
4483
4484 TODO: More generally, we could test for the situation that
4485 BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
3230c614 4486 This would require knowing the sign of OFFSET. */
d8af4ba3
ZD
4487
4488static bool
3230c614 4489difference_cannot_overflow_p (struct ivopts_data *data, tree base, tree offset)
d8af4ba3
ZD
4490{
4491 enum tree_code code;
4492 tree e1, e2;
3230c614 4493 aff_tree aff_e1, aff_e2, aff_offset;
d8af4ba3
ZD
4494
4495 if (!nowrap_type_p (TREE_TYPE (base)))
4496 return false;
4497
4498 base = expand_simple_operations (base);
4499
4500 if (TREE_CODE (base) == SSA_NAME)
4501 {
4502 gimple stmt = SSA_NAME_DEF_STMT (base);
4503
4504 if (gimple_code (stmt) != GIMPLE_ASSIGN)
4505 return false;
4506
4507 code = gimple_assign_rhs_code (stmt);
4508 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4509 return false;
4510
4511 e1 = gimple_assign_rhs1 (stmt);
4512 e2 = gimple_assign_rhs2 (stmt);
4513 }
4514 else
4515 {
4516 code = TREE_CODE (base);
4517 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4518 return false;
4519 e1 = TREE_OPERAND (base, 0);
4520 e2 = TREE_OPERAND (base, 1);
4521 }
4522
3230c614
BC
4523 /* Use affine expansion as deeper inspection to prove the equality. */
4524 tree_to_aff_combination_expand (e2, TREE_TYPE (e2),
4525 &aff_e2, &data->name_expansion_cache);
4526 tree_to_aff_combination_expand (offset, TREE_TYPE (offset),
4527 &aff_offset, &data->name_expansion_cache);
4528 aff_combination_scale (&aff_offset, -1);
d8af4ba3
ZD
4529 switch (code)
4530 {
4531 case PLUS_EXPR:
3230c614
BC
4532 aff_combination_add (&aff_e2, &aff_offset);
4533 if (aff_combination_zero_p (&aff_e2))
4534 return true;
4535
4536 tree_to_aff_combination_expand (e1, TREE_TYPE (e1),
4537 &aff_e1, &data->name_expansion_cache);
4538 aff_combination_add (&aff_e1, &aff_offset);
4539 return aff_combination_zero_p (&aff_e1);
4540
d8af4ba3 4541 case POINTER_PLUS_EXPR:
3230c614
BC
4542 aff_combination_add (&aff_e2, &aff_offset);
4543 return aff_combination_zero_p (&aff_e2);
d8af4ba3
ZD
4544
4545 default:
4546 return false;
4547 }
4548}
4549
4550/* Tries to replace loop exit by one formulated in terms of a LT_EXPR
4551 comparison with CAND. NITER describes the number of iterations of
4552 the loops. If successful, the comparison in COMP_P is altered accordingly.
4553
4554 We aim to handle the following situation:
4555
4556 sometype *base, *p;
4557 int a, b, i;
4558
4559 i = a;
4560 p = p_0 = base + a;
4561
4562 do
4563 {
4564 bla (*p);
4565 p++;
4566 i++;
4567 }
4568 while (i < b);
4569
4570 Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
4571 We aim to optimize this to
4572
4573 p = p_0 = base + a;
4574 do
4575 {
4576 bla (*p);
4577 p++;
4578 }
4579 while (p < p_0 - a + b);
4580
4581 This preserves the correctness, since the pointer arithmetics does not
4582 overflow. More precisely:
4583
4584 1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
4585 overflow in computing it or the values of p.
4586 2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
4587 overflow. To prove this, we use the fact that p_0 = base + a. */
4588
4589static bool
4590iv_elimination_compare_lt (struct ivopts_data *data,
4591 struct iv_cand *cand, enum tree_code *comp_p,
4592 struct tree_niter_desc *niter)
4593{
4594 tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
84562394 4595 struct aff_tree nit, tmpa, tmpb;
d8af4ba3
ZD
4596 enum tree_code comp;
4597 HOST_WIDE_INT step;
4598
4599 /* We need to know that the candidate induction variable does not overflow.
4600 While more complex analysis may be used to prove this, for now just
4601 check that the variable appears in the original program and that it
4602 is computed in a type that guarantees no overflows. */
4603 cand_type = TREE_TYPE (cand->iv->base);
4604 if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
4605 return false;
4606
4607 /* Make sure that the loop iterates till the loop bound is hit, as otherwise
4608 the calculation of the BOUND could overflow, making the comparison
4609 invalid. */
4610 if (!data->loop_single_exit_p)
4611 return false;
4612
4613 /* We need to be able to decide whether candidate is increasing or decreasing
4614 in order to choose the right comparison operator. */
4615 if (!cst_and_fits_in_hwi (cand->iv->step))
4616 return false;
4617 step = int_cst_value (cand->iv->step);
4618
4619 /* Check that the number of iterations matches the expected pattern:
4620 a + 1 > b ? 0 : b - a - 1. */
4621 mbz = niter->may_be_zero;
4622 if (TREE_CODE (mbz) == GT_EXPR)
4623 {
4624 /* Handle a + 1 > b. */
4625 tree op0 = TREE_OPERAND (mbz, 0);
4626 if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
4627 {
4628 a = TREE_OPERAND (op0, 0);
4629 b = TREE_OPERAND (mbz, 1);
4630 }
4631 else
4632 return false;
4633 }
4634 else if (TREE_CODE (mbz) == LT_EXPR)
4635 {
4636 tree op1 = TREE_OPERAND (mbz, 1);
4637
4638 /* Handle b < a + 1. */
4639 if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
4640 {
4641 a = TREE_OPERAND (op1, 0);
4642 b = TREE_OPERAND (mbz, 0);
4643 }
4644 else
4645 return false;
4646 }
4647 else
4648 return false;
4649
4650 /* Expected number of iterations is B - A - 1. Check that it matches
4651 the actual number, i.e., that B - A - NITER = 1. */
4652 tree_to_aff_combination (niter->niter, nit_type, &nit);
4653 tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
4654 tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
807e902e
KZ
4655 aff_combination_scale (&nit, -1);
4656 aff_combination_scale (&tmpa, -1);
d8af4ba3
ZD
4657 aff_combination_add (&tmpb, &tmpa);
4658 aff_combination_add (&tmpb, &nit);
807e902e 4659 if (tmpb.n != 0 || tmpb.offset != 1)
d8af4ba3
ZD
4660 return false;
4661
4662 /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
4663 overflow. */
4664 offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
4665 cand->iv->step,
4666 fold_convert (TREE_TYPE (cand->iv->step), a));
3230c614 4667 if (!difference_cannot_overflow_p (data, cand->iv->base, offset))
d8af4ba3
ZD
4668 return false;
4669
4670 /* Determine the new comparison operator. */
4671 comp = step < 0 ? GT_EXPR : LT_EXPR;
4672 if (*comp_p == NE_EXPR)
4673 *comp_p = comp;
4674 else if (*comp_p == EQ_EXPR)
4675 *comp_p = invert_tree_comparison (comp, false);
4676 else
4677 gcc_unreachable ();
4678
4679 return true;
4680}
4681
8b11a64c 4682/* Check whether it is possible to express the condition in USE by comparison
d8af4ba3
ZD
4683 of candidate CAND. If so, store the value compared with to BOUND, and the
4684 comparison operator to COMP. */
8b11a64c
ZD
4685
4686static bool
ca4c3169 4687may_eliminate_iv (struct ivopts_data *data,
d8af4ba3
ZD
4688 struct iv_use *use, struct iv_cand *cand, tree *bound,
4689 enum tree_code *comp)
8b11a64c 4690{
e6845c23 4691 basic_block ex_bb;
8b11a64c 4692 edge exit;
d8af4ba3 4693 tree period;
ca4c3169 4694 struct loop *loop = data->current_loop;
7e2ac86c 4695 aff_tree bnd;
e2102efc 4696 struct tree_niter_desc *desc = NULL;
cbc012d5 4697
9be872b7
ZD
4698 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
4699 return false;
4700
52778e2a
EB
4701 /* For now works only for exits that dominate the loop latch.
4702 TODO: extend to other conditions inside loop body. */
726a989a 4703 ex_bb = gimple_bb (use->stmt);
e6845c23 4704 if (use->stmt != last_stmt (ex_bb)
726a989a
RB
4705 || gimple_code (use->stmt) != GIMPLE_COND
4706 || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
8b11a64c
ZD
4707 return false;
4708
e6845c23
ZD
4709 exit = EDGE_SUCC (ex_bb, 0);
4710 if (flow_bb_inside_loop_p (loop, exit->dest))
4711 exit = EDGE_SUCC (ex_bb, 1);
4712 if (flow_bb_inside_loop_p (loop, exit->dest))
4713 return false;
4714
d8af4ba3
ZD
4715 desc = niter_for_exit (data, exit);
4716 if (!desc)
8b11a64c
ZD
4717 return false;
4718
52778e2a
EB
4719 /* Determine whether we can use the variable to test the exit condition.
4720 This is the case iff the period of the induction variable is greater
4721 than the number of iterations for which the exit condition is true. */
ca4c3169 4722 period = iv_period (cand->iv);
8b11a64c 4723
52778e2a 4724 /* If the number of iterations is constant, compare against it directly. */
d8af4ba3 4725 if (TREE_CODE (desc->niter) == INTEGER_CST)
52778e2a 4726 {
e2102efc
XDL
4727 /* See cand_value_at. */
4728 if (stmt_after_increment (loop, cand, use->stmt))
4729 {
d8af4ba3 4730 if (!tree_int_cst_lt (desc->niter, period))
e2102efc
XDL
4731 return false;
4732 }
4733 else
4734 {
d8af4ba3 4735 if (tree_int_cst_lt (period, desc->niter))
e2102efc
XDL
4736 return false;
4737 }
52778e2a
EB
4738 }
4739
4740 /* If not, and if this is the only possible exit of the loop, see whether
4741 we can get a conservative estimate on the number of iterations of the
4742 entire loop and compare against that instead. */
e2102efc 4743 else
52778e2a 4744 {
807e902e 4745 widest_int period_value, max_niter;
e2102efc
XDL
4746
4747 max_niter = desc->max;
4748 if (stmt_after_increment (loop, cand, use->stmt))
807e902e
KZ
4749 max_niter += 1;
4750 period_value = wi::to_widest (period);
4751 if (wi::gtu_p (max_niter, period_value))
e2102efc 4752 {
073a8998 4753 /* See if we can take advantage of inferred loop bound information. */
d8af4ba3 4754 if (data->loop_single_exit_p)
e2102efc 4755 {
652c4c71 4756 if (!max_loop_iterations (loop, &max_niter))
e2102efc
XDL
4757 return false;
4758 /* The loop bound is already adjusted by adding 1. */
807e902e 4759 if (wi::gtu_p (max_niter, period_value))
e2102efc
XDL
4760 return false;
4761 }
4762 else
4763 return false;
4764 }
52778e2a
EB
4765 }
4766
d8af4ba3 4767 cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
771f882e 4768
d6adff07
RB
4769 *bound = fold_convert (TREE_TYPE (cand->iv->base),
4770 aff_combination_to_tree (&bnd));
d8af4ba3
ZD
4771 *comp = iv_elimination_compare (data, use);
4772
771f882e
ZD
4773 /* It is unlikely that computing the number of iterations using division
4774 would be more profitable than keeping the original induction variable. */
4775 if (expression_expensive_p (*bound))
4776 return false;
d8af4ba3
ZD
4777
4778 /* Sometimes, it is possible to handle the situation that the number of
4779 iterations may be zero unless additional assumtions by using <
4780 instead of != in the exit condition.
4781
4782 TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
4783 base the exit condition on it. However, that is often too
4784 expensive. */
4785 if (!integer_zerop (desc->may_be_zero))
4786 return iv_elimination_compare_lt (data, cand, comp, desc);
4787
8b11a64c
ZD
4788 return true;
4789}
4790
bb8d292d
TV
4791 /* Calculates the cost of BOUND, if it is a PARM_DECL. A PARM_DECL must
4792 be copied, if is is used in the loop body and DATA->body_includes_call. */
4793
4794static int
4795parm_decl_cost (struct ivopts_data *data, tree bound)
4796{
4797 tree sbound = bound;
4798 STRIP_NOPS (sbound);
4799
4800 if (TREE_CODE (sbound) == SSA_NAME
67386041 4801 && SSA_NAME_IS_DEFAULT_DEF (sbound)
bb8d292d 4802 && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
bb8d292d
TV
4803 && data->body_includes_call)
4804 return COSTS_N_INSNS (1);
4805
4806 return 0;
4807}
e2102efc 4808
8b11a64c
ZD
4809/* Determines cost of basing replacement of USE on CAND in a condition. */
4810
b1b02be2 4811static bool
8b11a64c
ZD
4812determine_use_iv_cost_condition (struct ivopts_data *data,
4813 struct iv_use *use, struct iv_cand *cand)
4814{
b697aed4
ZD
4815 tree bound = NULL_TREE;
4816 struct iv *cmp_iv;
4817 bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
bb8d292d 4818 comp_cost elim_cost, express_cost, cost, bound_cost;
b697aed4 4819 bool ok;
bb8d292d 4820 int elim_inv_expr_id = -1, express_inv_expr_id = -1, inv_expr_id;
04eadb13 4821 tree *control_var, *bound_cst;
76725a03 4822 enum tree_code comp = ERROR_MARK;
8b11a64c
ZD
4823
4824 /* Only consider real candidates. */
4825 if (!cand->iv)
4826 {
d8af4ba3
ZD
4827 set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE,
4828 ERROR_MARK, -1);
b1b02be2 4829 return false;
8b11a64c
ZD
4830 }
4831
b697aed4 4832 /* Try iv elimination. */
d8af4ba3 4833 if (may_eliminate_iv (data, use, cand, &bound, &comp))
cbc012d5
ZD
4834 {
4835 elim_cost = force_var_cost (data, bound, &depends_on_elim);
bb8d292d
TV
4836 if (elim_cost.cost == 0)
4837 elim_cost.cost = parm_decl_cost (data, bound);
4838 else if (TREE_CODE (bound) == INTEGER_CST)
4839 elim_cost.cost = 0;
4840 /* If we replace a loop condition 'i < n' with 'p < base + n',
4841 depends_on_elim will have 'base' and 'n' set, which implies
4842 that both 'base' and 'n' will be live during the loop. More likely,
4843 'base + n' will be loop invariant, resulting in only one live value
4844 during the loop. So in that case we clear depends_on_elim and set
4845 elim_inv_expr_id instead. */
4846 if (depends_on_elim && bitmap_count_bits (depends_on_elim) > 1)
4847 {
4848 elim_inv_expr_id = get_expr_id (data, bound);
4849 bitmap_clear (depends_on_elim);
4850 }
cbc012d5
ZD
4851 /* The bound is a loop invariant, so it will be only computed
4852 once. */
6521ac85 4853 elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
cbc012d5 4854 }
b697aed4 4855 else
6e8c65f6 4856 elim_cost = infinite_cost;
8b11a64c 4857
b697aed4
ZD
4858 /* Try expressing the original giv. If it is compared with an invariant,
4859 note that we cannot get rid of it. */
04eadb13
SP
4860 ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
4861 NULL, &cmp_iv);
b697aed4 4862 gcc_assert (ok);
8b11a64c 4863
04eadb13
SP
4864 /* When the condition is a comparison of the candidate IV against
4865 zero, prefer this IV.
4866
073a8998 4867 TODO: The constant that we're subtracting from the cost should
04eadb13
SP
4868 be target-dependent. This information should be added to the
4869 target costs for each backend. */
572ae476
CF
4870 if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */
4871 && integer_zerop (*bound_cst)
04eadb13
SP
4872 && (operand_equal_p (*control_var, cand->var_after, 0)
4873 || operand_equal_p (*control_var, cand->var_before, 0)))
4874 elim_cost.cost -= 1;
4875
b697aed4 4876 express_cost = get_computation_cost (data, use, cand, false,
18081149 4877 &depends_on_express, NULL,
bb8d292d 4878 &express_inv_expr_id);
b697aed4
ZD
4879 fd_ivopts_data = data;
4880 walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
f5f12961 4881
bb8d292d
TV
4882 /* Count the cost of the original bound as well. */
4883 bound_cost = force_var_cost (data, *bound_cst, NULL);
4884 if (bound_cost.cost == 0)
4885 bound_cost.cost = parm_decl_cost (data, *bound_cst);
4886 else if (TREE_CODE (*bound_cst) == INTEGER_CST)
4887 bound_cost.cost = 0;
4888 express_cost.cost += bound_cost.cost;
4889
b9ff6079
AP
4890 /* Choose the better approach, preferring the eliminated IV. */
4891 if (compare_costs (elim_cost, express_cost) <= 0)
8b11a64c 4892 {
b697aed4
ZD
4893 cost = elim_cost;
4894 depends_on = depends_on_elim;
4895 depends_on_elim = NULL;
bb8d292d 4896 inv_expr_id = elim_inv_expr_id;
b697aed4
ZD
4897 }
4898 else
4899 {
4900 cost = express_cost;
4901 depends_on = depends_on_express;
4902 depends_on_express = NULL;
4903 bound = NULL_TREE;
d8af4ba3 4904 comp = ERROR_MARK;
bb8d292d 4905 inv_expr_id = express_inv_expr_id;
8b11a64c
ZD
4906 }
4907
d8af4ba3 4908 set_use_iv_cost (data, use, cand, cost, depends_on, bound, comp, inv_expr_id);
b697aed4
ZD
4909
4910 if (depends_on_elim)
4911 BITMAP_FREE (depends_on_elim);
4912 if (depends_on_express)
4913 BITMAP_FREE (depends_on_express);
4914
6e8c65f6 4915 return !infinite_cost_p (cost);
8b11a64c
ZD
4916}
4917
b1b02be2
ZD
4918/* Determines cost of basing replacement of USE on CAND. Returns false
4919 if USE cannot be based on CAND. */
8b11a64c 4920
b1b02be2 4921static bool
8b11a64c
ZD
4922determine_use_iv_cost (struct ivopts_data *data,
4923 struct iv_use *use, struct iv_cand *cand)
4924{
4925 switch (use->type)
4926 {
4927 case USE_NONLINEAR_EXPR:
b1b02be2 4928 return determine_use_iv_cost_generic (data, use, cand);
8b11a64c 4929
8b11a64c 4930 case USE_ADDRESS:
b1b02be2 4931 return determine_use_iv_cost_address (data, use, cand);
8b11a64c
ZD
4932
4933 case USE_COMPARE:
b1b02be2 4934 return determine_use_iv_cost_condition (data, use, cand);
8b11a64c
ZD
4935
4936 default:
1e128c5f 4937 gcc_unreachable ();
8b11a64c
ZD
4938 }
4939}
4940
2c08497a
BS
4941/* Return true if get_computation_cost indicates that autoincrement is
4942 a possibility for the pair of USE and CAND, false otherwise. */
4943
4944static bool
4945autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
4946 struct iv_cand *cand)
4947{
4948 bitmap depends_on;
4949 bool can_autoinc;
4950 comp_cost cost;
4951
4952 if (use->type != USE_ADDRESS)
4953 return false;
4954
4955 cost = get_computation_cost (data, use, cand, true, &depends_on,
18081149 4956 &can_autoinc, NULL);
2c08497a
BS
4957
4958 BITMAP_FREE (depends_on);
4959
4960 return !infinite_cost_p (cost) && can_autoinc;
4961}
4962
4963/* Examine IP_ORIGINAL candidates to see if they are incremented next to a
4964 use that allows autoincrement, and set their AINC_USE if possible. */
4965
4966static void
4967set_autoinc_for_original_candidates (struct ivopts_data *data)
4968{
4969 unsigned i, j;
4970
4971 for (i = 0; i < n_iv_cands (data); i++)
4972 {
4973 struct iv_cand *cand = iv_cand (data, i);
85ff4ec6
BC
4974 struct iv_use *closest_before = NULL;
4975 struct iv_use *closest_after = NULL;
2c08497a
BS
4976 if (cand->pos != IP_ORIGINAL)
4977 continue;
85ff4ec6 4978
2c08497a
BS
4979 for (j = 0; j < n_iv_uses (data); j++)
4980 {
4981 struct iv_use *use = iv_use (data, j);
4982 unsigned uid = gimple_uid (use->stmt);
85ff4ec6
BC
4983
4984 if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
2c08497a 4985 continue;
85ff4ec6
BC
4986
4987 if (uid < gimple_uid (cand->incremented_at)
4988 && (closest_before == NULL
4989 || uid > gimple_uid (closest_before->stmt)))
4990 closest_before = use;
4991
4992 if (uid > gimple_uid (cand->incremented_at)
4993 && (closest_after == NULL
4994 || uid < gimple_uid (closest_after->stmt)))
4995 closest_after = use;
2c08497a 4996 }
85ff4ec6
BC
4997
4998 if (closest_before != NULL
4999 && autoinc_possible_for_pair (data, closest_before, cand))
5000 cand->ainc_use = closest_before;
5001 else if (closest_after != NULL
5002 && autoinc_possible_for_pair (data, closest_after, cand))
5003 cand->ainc_use = closest_after;
2c08497a
BS
5004 }
5005}
5006
5007/* Finds the candidates for the induction variables. */
5008
5009static void
5010find_iv_candidates (struct ivopts_data *data)
5011{
5012 /* Add commonly used ivs. */
5013 add_standard_iv_candidates (data);
5014
5015 /* Add old induction variables. */
5016 add_old_ivs_candidates (data);
5017
5018 /* Add induction variables derived from uses. */
5019 add_derived_ivs_candidates (data);
5020
5021 set_autoinc_for_original_candidates (data);
5022
5023 /* Record the important candidates. */
5024 record_important_candidates (data);
5025}
5026
8b11a64c
ZD
5027/* Determines costs of basing the use of the iv on an iv candidate. */
5028
5029static void
5030determine_use_iv_costs (struct ivopts_data *data)
5031{
5032 unsigned i, j;
5033 struct iv_use *use;
5034 struct iv_cand *cand;
8bdbfff5 5035 bitmap to_clear = BITMAP_ALLOC (NULL);
8b11a64c
ZD
5036
5037 alloc_use_cost_map (data);
5038
8b11a64c
ZD
5039 for (i = 0; i < n_iv_uses (data); i++)
5040 {
5041 use = iv_use (data, i);
5042
5043 if (data->consider_all_candidates)
5044 {
5045 for (j = 0; j < n_iv_cands (data); j++)
5046 {
5047 cand = iv_cand (data, j);
5048 determine_use_iv_cost (data, use, cand);
5049 }
5050 }
5051 else
5052 {
87c476a2
ZD
5053 bitmap_iterator bi;
5054
5055 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
8b11a64c
ZD
5056 {
5057 cand = iv_cand (data, j);
b1b02be2
ZD
5058 if (!determine_use_iv_cost (data, use, cand))
5059 bitmap_set_bit (to_clear, j);
87c476a2 5060 }
b1b02be2
ZD
5061
5062 /* Remove the candidates for that the cost is infinite from
5063 the list of related candidates. */
5064 bitmap_and_compl_into (use->related_cands, to_clear);
5065 bitmap_clear (to_clear);
8b11a64c
ZD
5066 }
5067 }
5068
8bdbfff5 5069 BITMAP_FREE (to_clear);
b1b02be2 5070
8b11a64c
ZD
5071 if (dump_file && (dump_flags & TDF_DETAILS))
5072 {
5073 fprintf (dump_file, "Use-candidate costs:\n");
5074
5075 for (i = 0; i < n_iv_uses (data); i++)
5076 {
5077 use = iv_use (data, i);
5078
5079 fprintf (dump_file, "Use %d:\n", i);
6e8c65f6 5080 fprintf (dump_file, " cand\tcost\tcompl.\tdepends on\n");
8b11a64c
ZD
5081 for (j = 0; j < use->n_map_members; j++)
5082 {
5083 if (!use->cost_map[j].cand
6e8c65f6 5084 || infinite_cost_p (use->cost_map[j].cost))
8b11a64c
ZD
5085 continue;
5086
6e8c65f6 5087 fprintf (dump_file, " %d\t%d\t%d\t",
8b11a64c 5088 use->cost_map[j].cand->id,
6e8c65f6
ZD
5089 use->cost_map[j].cost.cost,
5090 use->cost_map[j].cost.complexity);
8b11a64c
ZD
5091 if (use->cost_map[j].depends_on)
5092 bitmap_print (dump_file,
5093 use->cost_map[j].depends_on, "","");
18081149
XDL
5094 if (use->cost_map[j].inv_expr_id != -1)
5095 fprintf (dump_file, " inv_expr:%d", use->cost_map[j].inv_expr_id);
8b11a64c
ZD
5096 fprintf (dump_file, "\n");
5097 }
5098
5099 fprintf (dump_file, "\n");
5100 }
5101 fprintf (dump_file, "\n");
5102 }
5103}
5104
5105/* Determines cost of the candidate CAND. */
5106
5107static void
5108determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5109{
6e8c65f6
ZD
5110 comp_cost cost_base;
5111 unsigned cost, cost_step;
4366cf6d 5112 tree base;
8b11a64c
ZD
5113
5114 if (!cand->iv)
5115 {
5116 cand->cost = 0;
5117 return;
5118 }
5119
5120 /* There are two costs associated with the candidate -- its increment
5121 and its initialization. The second is almost negligible for any loop
5122 that rolls enough, so we take it just very little into account. */
5123
5124 base = cand->iv->base;
5125 cost_base = force_var_cost (data, base, NULL);
a53c5024
TV
5126 /* It will be exceptional that the iv register happens to be initialized with
5127 the proper value at no cost. In general, there will at least be a regcopy
5128 or a const set. */
5129 if (cost_base.cost == 0)
5130 cost_base.cost = COSTS_N_INSNS (1);
5322d07e 5131 cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
8b11a64c 5132
6521ac85 5133 cost = cost_step + adjust_setup_cost (data, cost_base.cost);
8b11a64c 5134
6e8c65f6 5135 /* Prefer the original ivs unless we may gain something by replacing it.
fa10beec 5136 The reason is to make debugging simpler; so this is not relevant for
6e8c65f6
ZD
5137 artificial ivs created by other optimization passes. */
5138 if (cand->pos != IP_ORIGINAL
70b5e7dc 5139 || !SSA_NAME_VAR (cand->var_before)
6e8c65f6
ZD
5140 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
5141 cost++;
b8698a0f 5142
8b11a64c
ZD
5143 /* Prefer not to insert statements into latch unless there are some
5144 already (so that we do not create unnecessary jumps). */
4366cf6d
ZD
5145 if (cand->pos == IP_END
5146 && empty_block_p (ip_end_pos (data->current_loop)))
6e8c65f6
ZD
5147 cost++;
5148
5149 cand->cost = cost;
2c08497a 5150 cand->cost_step = cost_step;
8b11a64c
ZD
5151}
5152
5153/* Determines costs of computation of the candidates. */
5154
5155static void
5156determine_iv_costs (struct ivopts_data *data)
5157{
5158 unsigned i;
5159
5160 if (dump_file && (dump_flags & TDF_DETAILS))
5161 {
5162 fprintf (dump_file, "Candidate costs:\n");
5163 fprintf (dump_file, " cand\tcost\n");
5164 }
5165
5166 for (i = 0; i < n_iv_cands (data); i++)
5167 {
5168 struct iv_cand *cand = iv_cand (data, i);
5169
5170 determine_iv_cost (data, cand);
5171
5172 if (dump_file && (dump_flags & TDF_DETAILS))
5173 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
5174 }
2c08497a 5175
0f14009a
BS
5176 if (dump_file && (dump_flags & TDF_DETAILS))
5177 fprintf (dump_file, "\n");
8b11a64c
ZD
5178}
5179
5180/* Calculates cost for having SIZE induction variables. */
5181
5182static unsigned
5183ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
5184{
a154b43a
ZD
5185 /* We add size to the cost, so that we prefer eliminating ivs
5186 if possible. */
bec922f0
SL
5187 return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed,
5188 data->body_includes_call);
8b11a64c
ZD
5189}
5190
5191/* For each size of the induction variable set determine the penalty. */
5192
5193static void
5194determine_set_costs (struct ivopts_data *data)
5195{
5196 unsigned j, n;
538dd0b7
DM
5197 gphi *phi;
5198 gphi_iterator psi;
726a989a 5199 tree op;
8b11a64c 5200 struct loop *loop = data->current_loop;
87c476a2 5201 bitmap_iterator bi;
8b11a64c 5202
8b11a64c
ZD
5203 if (dump_file && (dump_flags & TDF_DETAILS))
5204 {
5205 fprintf (dump_file, "Global costs:\n");
5206 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
bec922f0 5207 fprintf (dump_file, " target_clobbered_regs %d\n", target_clobbered_regs);
f40751dd
JH
5208 fprintf (dump_file, " target_reg_cost %d\n", target_reg_cost[data->speed]);
5209 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost[data->speed]);
8b11a64c
ZD
5210 }
5211
5212 n = 0;
726a989a 5213 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
8b11a64c 5214 {
538dd0b7 5215 phi = psi.phi ();
8b11a64c
ZD
5216 op = PHI_RESULT (phi);
5217
ea057359 5218 if (virtual_operand_p (op))
8b11a64c
ZD
5219 continue;
5220
5221 if (get_iv (data, op))
5222 continue;
5223
5224 n++;
5225 }
5226
87c476a2 5227 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
8b11a64c
ZD
5228 {
5229 struct version_info *info = ver_info (data, j);
5230
5231 if (info->inv_id && info->has_nonlin_use)
5232 n++;
87c476a2 5233 }
8b11a64c 5234
9a2ef6b8 5235 data->regs_used = n;
8b11a64c
ZD
5236 if (dump_file && (dump_flags & TDF_DETAILS))
5237 fprintf (dump_file, " regs_used %d\n", n);
5238
5239 if (dump_file && (dump_flags & TDF_DETAILS))
5240 {
5241 fprintf (dump_file, " cost for size:\n");
5242 fprintf (dump_file, " ivs\tcost\n");
5243 for (j = 0; j <= 2 * target_avail_regs; j++)
5244 fprintf (dump_file, " %d\t%d\n", j,
5245 ivopts_global_cost_for_size (data, j));
5246 fprintf (dump_file, "\n");
5247 }
5248}
5249
b1b02be2 5250/* Returns true if A is a cheaper cost pair than B. */
8b11a64c 5251
b1b02be2
ZD
5252static bool
5253cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
8b11a64c 5254{
6e8c65f6
ZD
5255 int cmp;
5256
b1b02be2
ZD
5257 if (!a)
5258 return false;
8b11a64c 5259
b1b02be2
ZD
5260 if (!b)
5261 return true;
5262
6e8c65f6
ZD
5263 cmp = compare_costs (a->cost, b->cost);
5264 if (cmp < 0)
b1b02be2
ZD
5265 return true;
5266
6e8c65f6 5267 if (cmp > 0)
b1b02be2
ZD
5268 return false;
5269
5270 /* In case the costs are the same, prefer the cheaper candidate. */
5271 if (a->cand->cost < b->cand->cost)
5272 return true;
5273
5274 return false;
5275}
5276
18081149
XDL
5277
5278/* Returns candidate by that USE is expressed in IVS. */
5279
5280static struct cost_pair *
5281iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
5282{
5283 return ivs->cand_for_use[use->id];
5284}
5285
b1b02be2
ZD
5286/* Computes the cost field of IVS structure. */
5287
5288static void
5289iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
5290{
6e8c65f6 5291 comp_cost cost = ivs->cand_use_cost;
18081149 5292
6e8c65f6 5293 cost.cost += ivs->cand_cost;
18081149 5294
18081149 5295 cost.cost += ivopts_global_cost_for_size (data,
f06e400f 5296 ivs->n_regs + ivs->num_used_inv_expr);
b1b02be2
ZD
5297
5298 ivs->cost = cost;
5299}
5300
9be872b7
ZD
5301/* Remove invariants in set INVS to set IVS. */
5302
5303static void
5304iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
5305{
5306 bitmap_iterator bi;
5307 unsigned iid;
5308
5309 if (!invs)
5310 return;
5311
5312 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5313 {
5314 ivs->n_invariant_uses[iid]--;
5315 if (ivs->n_invariant_uses[iid] == 0)
18081149 5316 ivs->n_regs--;
9be872b7
ZD
5317 }
5318}
5319
b1b02be2
ZD
5320/* Set USE not to be expressed by any candidate in IVS. */
5321
5322static void
5323iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
5324 struct iv_use *use)
5325{
9be872b7 5326 unsigned uid = use->id, cid;
b1b02be2 5327 struct cost_pair *cp;
b1b02be2
ZD
5328
5329 cp = ivs->cand_for_use[uid];
5330 if (!cp)
5331 return;
5332 cid = cp->cand->id;
5333
5334 ivs->bad_uses++;
5335 ivs->cand_for_use[uid] = NULL;
5336 ivs->n_cand_uses[cid]--;
5337
5338 if (ivs->n_cand_uses[cid] == 0)
8b11a64c 5339 {
b1b02be2
ZD
5340 bitmap_clear_bit (ivs->cands, cid);
5341 /* Do not count the pseudocandidates. */
5342 if (cp->cand->iv)
5343 ivs->n_regs--;
36f5ada1 5344 ivs->n_cands--;
b1b02be2 5345 ivs->cand_cost -= cp->cand->cost;
9be872b7
ZD
5346
5347 iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
b1b02be2
ZD
5348 }
5349
6e8c65f6 5350 ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
b1b02be2 5351
9be872b7 5352 iv_ca_set_remove_invariants (ivs, cp->depends_on);
f06e400f
XDL
5353
5354 if (cp->inv_expr_id != -1)
5355 {
5356 ivs->used_inv_expr[cp->inv_expr_id]--;
5357 if (ivs->used_inv_expr[cp->inv_expr_id] == 0)
5358 ivs->num_used_inv_expr--;
5359 }
9be872b7
ZD
5360 iv_ca_recount_cost (data, ivs);
5361}
5362
5363/* Add invariants in set INVS to set IVS. */
80cad5fa 5364
9be872b7
ZD
5365static void
5366iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
5367{
5368 bitmap_iterator bi;
5369 unsigned iid;
5370
5371 if (!invs)
5372 return;
5373
5374 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
b1b02be2 5375 {
9be872b7
ZD
5376 ivs->n_invariant_uses[iid]++;
5377 if (ivs->n_invariant_uses[iid] == 1)
18081149 5378 ivs->n_regs++;
8b11a64c 5379 }
b1b02be2
ZD
5380}
5381
5382/* Set cost pair for USE in set IVS to CP. */
5383
5384static void
5385iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
5386 struct iv_use *use, struct cost_pair *cp)
5387{
9be872b7 5388 unsigned uid = use->id, cid;
b1b02be2
ZD
5389
5390 if (ivs->cand_for_use[uid] == cp)
5391 return;
5392
5393 if (ivs->cand_for_use[uid])
5394 iv_ca_set_no_cp (data, ivs, use);
5395
5396 if (cp)
8b11a64c 5397 {
b1b02be2 5398 cid = cp->cand->id;
8b11a64c 5399
b1b02be2
ZD
5400 ivs->bad_uses--;
5401 ivs->cand_for_use[uid] = cp;
5402 ivs->n_cand_uses[cid]++;
5403 if (ivs->n_cand_uses[cid] == 1)
8b11a64c 5404 {
b1b02be2
ZD
5405 bitmap_set_bit (ivs->cands, cid);
5406 /* Do not count the pseudocandidates. */
5407 if (cp->cand->iv)
5408 ivs->n_regs++;
36f5ada1 5409 ivs->n_cands++;
b1b02be2 5410 ivs->cand_cost += cp->cand->cost;
b1b02be2 5411
9be872b7 5412 iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
8b11a64c
ZD
5413 }
5414
6e8c65f6 5415 ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
9be872b7 5416 iv_ca_set_add_invariants (ivs, cp->depends_on);
f06e400f
XDL
5417
5418 if (cp->inv_expr_id != -1)
5419 {
5420 ivs->used_inv_expr[cp->inv_expr_id]++;
5421 if (ivs->used_inv_expr[cp->inv_expr_id] == 1)
5422 ivs->num_used_inv_expr++;
5423 }
b1b02be2 5424 iv_ca_recount_cost (data, ivs);
87c476a2 5425 }
b1b02be2
ZD
5426}
5427
5428/* Extend set IVS by expressing USE by some of the candidates in it
f22ae1ec
BC
5429 if possible. Consider all important candidates if candidates in
5430 set IVS don't give any result. */
b1b02be2
ZD
5431
5432static void
5433iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
f22ae1ec 5434 struct iv_use *use)
b1b02be2
ZD
5435{
5436 struct cost_pair *best_cp = NULL, *cp;
5437 bitmap_iterator bi;
5438 unsigned i;
f22ae1ec 5439 struct iv_cand *cand;
8b11a64c 5440
b1b02be2 5441 gcc_assert (ivs->upto >= use->id);
f22ae1ec
BC
5442 ivs->upto++;
5443 ivs->bad_uses++;
b1b02be2 5444
f22ae1ec 5445 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
b1b02be2 5446 {
f22ae1ec 5447 cand = iv_cand (data, i);
18081149 5448 cp = get_use_iv_cost (data, use, cand);
b1b02be2
ZD
5449 if (cheaper_cost_pair (cp, best_cp))
5450 best_cp = cp;
5451 }
f22ae1ec
BC
5452
5453 if (best_cp == NULL)
5454 {
5455 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5456 {
5457 cand = iv_cand (data, i);
5458 cp = get_use_iv_cost (data, use, cand);
5459 if (cheaper_cost_pair (cp, best_cp))
5460 best_cp = cp;
5461 }
5462 }
8b11a64c 5463
b1b02be2 5464 iv_ca_set_cp (data, ivs, use, best_cp);
8b11a64c
ZD
5465}
5466
b1b02be2 5467/* Get cost for assignment IVS. */
8b11a64c 5468
6e8c65f6 5469static comp_cost
b1b02be2
ZD
5470iv_ca_cost (struct iv_ca *ivs)
5471{
c4e93e28
AH
5472 /* This was a conditional expression but it triggered a bug in
5473 Sun C 5.5. */
cb4ad180
AH
5474 if (ivs->bad_uses)
5475 return infinite_cost;
5476 else
5477 return ivs->cost;
b1b02be2
ZD
5478}
5479
5480/* Returns true if all dependences of CP are among invariants in IVS. */
5481
5482static bool
5483iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
8b11a64c
ZD
5484{
5485 unsigned i;
87c476a2 5486 bitmap_iterator bi;
8b11a64c 5487
b1b02be2
ZD
5488 if (!cp->depends_on)
5489 return true;
5490
5491 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
8b11a64c 5492 {
b1b02be2
ZD
5493 if (ivs->n_invariant_uses[i] == 0)
5494 return false;
5495 }
5496
5497 return true;
5498}
5499
5500/* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
5501 it before NEXT_CHANGE. */
5502
5503static struct iv_ca_delta *
5504iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
5505 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
5506{
5ed6ace5 5507 struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
b1b02be2
ZD
5508
5509 change->use = use;
5510 change->old_cp = old_cp;
5511 change->new_cp = new_cp;
5512 change->next_change = next_change;
5513
5514 return change;
5515}
5516
36f5ada1 5517/* Joins two lists of changes L1 and L2. Destructive -- old lists
6c6cfbfd 5518 are rewritten. */
36f5ada1
ZD
5519
5520static struct iv_ca_delta *
5521iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
5522{
5523 struct iv_ca_delta *last;
5524
5525 if (!l2)
5526 return l1;
5527
5528 if (!l1)
5529 return l2;
5530
5531 for (last = l1; last->next_change; last = last->next_change)
5532 continue;
5533 last->next_change = l2;
5534
5535 return l1;
5536}
5537
36f5ada1
ZD
5538/* Reverse the list of changes DELTA, forming the inverse to it. */
5539
5540static struct iv_ca_delta *
5541iv_ca_delta_reverse (struct iv_ca_delta *delta)
5542{
5543 struct iv_ca_delta *act, *next, *prev = NULL;
5544 struct cost_pair *tmp;
5545
5546 for (act = delta; act; act = next)
5547 {
5548 next = act->next_change;
5549 act->next_change = prev;
5550 prev = act;
5551
5552 tmp = act->old_cp;
5553 act->old_cp = act->new_cp;
5554 act->new_cp = tmp;
5555 }
5556
5557 return prev;
5558}
5559
b1b02be2
ZD
5560/* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
5561 reverted instead. */
5562
5563static void
5564iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
5565 struct iv_ca_delta *delta, bool forward)
5566{
5567 struct cost_pair *from, *to;
36f5ada1 5568 struct iv_ca_delta *act;
b1b02be2 5569
36f5ada1
ZD
5570 if (!forward)
5571 delta = iv_ca_delta_reverse (delta);
b1b02be2 5572
36f5ada1
ZD
5573 for (act = delta; act; act = act->next_change)
5574 {
5575 from = act->old_cp;
5576 to = act->new_cp;
5577 gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
5578 iv_ca_set_cp (data, ivs, act->use, to);
8b11a64c 5579 }
36f5ada1
ZD
5580
5581 if (!forward)
5582 iv_ca_delta_reverse (delta);
b1b02be2 5583}
8b11a64c 5584
b1b02be2 5585/* Returns true if CAND is used in IVS. */
8b11a64c 5586
b1b02be2
ZD
5587static bool
5588iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
5589{
5590 return ivs->n_cand_uses[cand->id] > 0;
5591}
8b11a64c 5592
36f5ada1
ZD
5593/* Returns number of induction variable candidates in the set IVS. */
5594
5595static unsigned
5596iv_ca_n_cands (struct iv_ca *ivs)
5597{
5598 return ivs->n_cands;
5599}
5600
b1b02be2
ZD
5601/* Free the list of changes DELTA. */
5602
5603static void
5604iv_ca_delta_free (struct iv_ca_delta **delta)
5605{
5606 struct iv_ca_delta *act, *next;
5607
5608 for (act = *delta; act; act = next)
87c476a2 5609 {
b1b02be2
ZD
5610 next = act->next_change;
5611 free (act);
87c476a2 5612 }
8b11a64c 5613
b1b02be2
ZD
5614 *delta = NULL;
5615}
5616
5617/* Allocates new iv candidates assignment. */
5618
5619static struct iv_ca *
5620iv_ca_new (struct ivopts_data *data)
5621{
5ed6ace5 5622 struct iv_ca *nw = XNEW (struct iv_ca);
8b11a64c 5623
b1b02be2
ZD
5624 nw->upto = 0;
5625 nw->bad_uses = 0;
5ed6ace5
MD
5626 nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
5627 nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
8bdbfff5 5628 nw->cands = BITMAP_ALLOC (NULL);
36f5ada1 5629 nw->n_cands = 0;
b1b02be2 5630 nw->n_regs = 0;
7735d6c7 5631 nw->cand_use_cost = no_cost;
b1b02be2 5632 nw->cand_cost = 0;
5ed6ace5 5633 nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
7735d6c7 5634 nw->cost = no_cost;
f06e400f
XDL
5635 nw->used_inv_expr = XCNEWVEC (unsigned, data->inv_expr_id + 1);
5636 nw->num_used_inv_expr = 0;
b1b02be2
ZD
5637
5638 return nw;
5639}
5640
5641/* Free memory occupied by the set IVS. */
5642
5643static void
5644iv_ca_free (struct iv_ca **ivs)
5645{
5646 free ((*ivs)->cand_for_use);
5647 free ((*ivs)->n_cand_uses);
8bdbfff5 5648 BITMAP_FREE ((*ivs)->cands);
b1b02be2 5649 free ((*ivs)->n_invariant_uses);
f06e400f 5650 free ((*ivs)->used_inv_expr);
b1b02be2
ZD
5651 free (*ivs);
5652 *ivs = NULL;
5653}
5654
5655/* Dumps IVS to FILE. */
5656
5657static void
5658iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
5659{
5660 const char *pref = " invariants ";
5661 unsigned i;
6e8c65f6 5662 comp_cost cost = iv_ca_cost (ivs);
b1b02be2 5663
18081149
XDL
5664 fprintf (file, " cost: %d (complexity %d)\n", cost.cost, cost.complexity);
5665 fprintf (file, " cand_cost: %d\n cand_use_cost: %d (complexity %d)\n",
5666 ivs->cand_cost, ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
5667 bitmap_print (file, ivs->cands, " candidates: ","\n");
5668
5669 for (i = 0; i < ivs->upto; i++)
5670 {
5671 struct iv_use *use = iv_use (data, i);
5672 struct cost_pair *cp = iv_ca_cand_for_use (ivs, use);
5673 if (cp)
5674 fprintf (file, " use:%d --> iv_cand:%d, cost=(%d,%d)\n",
5675 use->id, cp->cand->id, cp->cost.cost, cp->cost.complexity);
5676 else
5677 fprintf (file, " use:%d --> ??\n", use->id);
5678 }
b1b02be2
ZD
5679
5680 for (i = 1; i <= data->max_inv_id; i++)
5681 if (ivs->n_invariant_uses[i])
5682 {
5683 fprintf (file, "%s%d", pref, i);
5684 pref = ", ";
5685 }
18081149 5686 fprintf (file, "\n\n");
b1b02be2
ZD
5687}
5688
5689/* Try changing candidate in IVS to CAND for each use. Return cost of the
36f5ada1 5690 new set, and store differences in DELTA. Number of induction variables
18081149
XDL
5691 in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
5692 the function will try to find a solution with mimimal iv candidates. */
b1b02be2 5693
6e8c65f6 5694static comp_cost
b1b02be2 5695iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
36f5ada1 5696 struct iv_cand *cand, struct iv_ca_delta **delta,
18081149 5697 unsigned *n_ivs, bool min_ncand)
b1b02be2 5698{
6e8c65f6
ZD
5699 unsigned i;
5700 comp_cost cost;
b1b02be2
ZD
5701 struct iv_use *use;
5702 struct cost_pair *old_cp, *new_cp;
5703
5704 *delta = NULL;
5705 for (i = 0; i < ivs->upto; i++)
5706 {
5707 use = iv_use (data, i);
5708 old_cp = iv_ca_cand_for_use (ivs, use);
5709
5710 if (old_cp
5711 && old_cp->cand == cand)
5712 continue;
5713
5714 new_cp = get_use_iv_cost (data, use, cand);
5715 if (!new_cp)
5716 continue;
5717
18081149 5718 if (!min_ncand && !iv_ca_has_deps (ivs, new_cp))
b1b02be2 5719 continue;
b8698a0f 5720
18081149
XDL
5721 if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp))
5722 continue;
b1b02be2
ZD
5723
5724 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5725 }
5726
5727 iv_ca_delta_commit (data, ivs, *delta, true);
5728 cost = iv_ca_cost (ivs);
36f5ada1
ZD
5729 if (n_ivs)
5730 *n_ivs = iv_ca_n_cands (ivs);
b1b02be2 5731 iv_ca_delta_commit (data, ivs, *delta, false);
8b11a64c
ZD
5732
5733 return cost;
5734}
5735
a0eca485 5736/* Try narrowing set IVS by removing CAND. Return the cost of
2c407426
BC
5737 the new set and store the differences in DELTA. START is
5738 the candidate with which we start narrowing. */
8b11a64c 5739
6e8c65f6 5740static comp_cost
b1b02be2 5741iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
2c407426
BC
5742 struct iv_cand *cand, struct iv_cand *start,
5743 struct iv_ca_delta **delta)
8b11a64c 5744{
b1b02be2
ZD
5745 unsigned i, ci;
5746 struct iv_use *use;
5747 struct cost_pair *old_cp, *new_cp, *cp;
5748 bitmap_iterator bi;
5749 struct iv_cand *cnd;
2c407426 5750 comp_cost cost, best_cost, acost;
b1b02be2
ZD
5751
5752 *delta = NULL;
5753 for (i = 0; i < n_iv_uses (data); i++)
5754 {
5755 use = iv_use (data, i);
5756
5757 old_cp = iv_ca_cand_for_use (ivs, use);
5758 if (old_cp->cand != cand)
5759 continue;
5760
2c407426
BC
5761 best_cost = iv_ca_cost (ivs);
5762 /* Start narrowing with START. */
5763 new_cp = get_use_iv_cost (data, use, start);
b1b02be2
ZD
5764
5765 if (data->consider_all_candidates)
5766 {
5767 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
5768 {
2c407426 5769 if (ci == cand->id || (start && ci == start->id))
b1b02be2
ZD
5770 continue;
5771
5772 cnd = iv_cand (data, ci);
5773
5774 cp = get_use_iv_cost (data, use, cnd);
5775 if (!cp)
5776 continue;
18081149 5777
2c407426
BC
5778 iv_ca_set_cp (data, ivs, use, cp);
5779 acost = iv_ca_cost (ivs);
b1b02be2 5780
2c407426
BC
5781 if (compare_costs (acost, best_cost) < 0)
5782 {
5783 best_cost = acost;
5784 new_cp = cp;
5785 }
b1b02be2
ZD
5786 }
5787 }
5788 else
5789 {
5790 EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
5791 {
2c407426 5792 if (ci == cand->id || (start && ci == start->id))
b1b02be2
ZD
5793 continue;
5794
5795 cnd = iv_cand (data, ci);
5796
5797 cp = get_use_iv_cost (data, use, cnd);
5798 if (!cp)
5799 continue;
b8698a0f 5800
2c407426
BC
5801 iv_ca_set_cp (data, ivs, use, cp);
5802 acost = iv_ca_cost (ivs);
b1b02be2 5803
2c407426
BC
5804 if (compare_costs (acost, best_cost) < 0)
5805 {
5806 best_cost = acost;
5807 new_cp = cp;
5808 }
b1b02be2
ZD
5809 }
5810 }
2c407426
BC
5811 /* Restore to old cp for use. */
5812 iv_ca_set_cp (data, ivs, use, old_cp);
b1b02be2
ZD
5813
5814 if (!new_cp)
5815 {
5816 iv_ca_delta_free (delta);
6e8c65f6 5817 return infinite_cost;
b1b02be2
ZD
5818 }
5819
5820 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5821 }
5822
5823 iv_ca_delta_commit (data, ivs, *delta, true);
5824 cost = iv_ca_cost (ivs);
5825 iv_ca_delta_commit (data, ivs, *delta, false);
5826
5827 return cost;
8b11a64c
ZD
5828}
5829
36f5ada1
ZD
5830/* Try optimizing the set of candidates IVS by removing candidates different
5831 from to EXCEPT_CAND from it. Return cost of the new set, and store
5832 differences in DELTA. */
5833
6e8c65f6 5834static comp_cost
36f5ada1
ZD
5835iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
5836 struct iv_cand *except_cand, struct iv_ca_delta **delta)
5837{
5838 bitmap_iterator bi;
5839 struct iv_ca_delta *act_delta, *best_delta;
6e8c65f6
ZD
5840 unsigned i;
5841 comp_cost best_cost, acost;
36f5ada1
ZD
5842 struct iv_cand *cand;
5843
5844 best_delta = NULL;
5845 best_cost = iv_ca_cost (ivs);
5846
5847 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5848 {
5849 cand = iv_cand (data, i);
5850
5851 if (cand == except_cand)
5852 continue;
5853
2c407426 5854 acost = iv_ca_narrow (data, ivs, cand, except_cand, &act_delta);
36f5ada1 5855
6e8c65f6 5856 if (compare_costs (acost, best_cost) < 0)
36f5ada1
ZD
5857 {
5858 best_cost = acost;
5859 iv_ca_delta_free (&best_delta);
5860 best_delta = act_delta;
5861 }
5862 else
5863 iv_ca_delta_free (&act_delta);
5864 }
5865
5866 if (!best_delta)
5867 {
5868 *delta = NULL;
5869 return best_cost;
5870 }
5871
5872 /* Recurse to possibly remove other unnecessary ivs. */
5873 iv_ca_delta_commit (data, ivs, best_delta, true);
5874 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
5875 iv_ca_delta_commit (data, ivs, best_delta, false);
5876 *delta = iv_ca_delta_join (best_delta, *delta);
5877 return best_cost;
5878}
5879
6326a5f5
BC
5880/* Check if CAND_IDX is a candidate other than OLD_CAND and has
5881 cheaper local cost for USE than BEST_CP. Return pointer to
5882 the corresponding cost_pair, otherwise just return BEST_CP. */
5883
5884static struct cost_pair*
5885cheaper_cost_with_cand (struct ivopts_data *data, struct iv_use *use,
5886 unsigned int cand_idx, struct iv_cand *old_cand,
5887 struct cost_pair *best_cp)
5888{
5889 struct iv_cand *cand;
5890 struct cost_pair *cp;
5891
5892 gcc_assert (old_cand != NULL && best_cp != NULL);
5893 if (cand_idx == old_cand->id)
5894 return best_cp;
5895
5896 cand = iv_cand (data, cand_idx);
5897 cp = get_use_iv_cost (data, use, cand);
5898 if (cp != NULL && cheaper_cost_pair (cp, best_cp))
5899 return cp;
5900
5901 return best_cp;
5902}
5903
5904/* Try breaking local optimal fixed-point for IVS by replacing candidates
5905 which are used by more than one iv uses. For each of those candidates,
5906 this function tries to represent iv uses under that candidate using
5907 other ones with lower local cost, then tries to prune the new set.
5908 If the new set has lower cost, It returns the new cost after recording
5909 candidate replacement in list DELTA. */
5910
5911static comp_cost
5912iv_ca_replace (struct ivopts_data *data, struct iv_ca *ivs,
5913 struct iv_ca_delta **delta)
5914{
5915 bitmap_iterator bi, bj;
5916 unsigned int i, j, k;
5917 struct iv_use *use;
5918 struct iv_cand *cand;
5919 comp_cost orig_cost, acost;
5920 struct iv_ca_delta *act_delta, *tmp_delta;
5921 struct cost_pair *old_cp, *best_cp = NULL;
5922
5923 *delta = NULL;
5924 orig_cost = iv_ca_cost (ivs);
5925
5926 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5927 {
5928 if (ivs->n_cand_uses[i] == 1
5929 || ivs->n_cand_uses[i] > ALWAYS_PRUNE_CAND_SET_BOUND)
5930 continue;
5931
5932 cand = iv_cand (data, i);
5933
5934 act_delta = NULL;
5935 /* Represent uses under current candidate using other ones with
5936 lower local cost. */
5937 for (j = 0; j < ivs->upto; j++)
5938 {
5939 use = iv_use (data, j);
5940 old_cp = iv_ca_cand_for_use (ivs, use);
5941
5942 if (old_cp->cand != cand)
5943 continue;
5944
5945 best_cp = old_cp;
5946 if (data->consider_all_candidates)
5947 for (k = 0; k < n_iv_cands (data); k++)
5948 best_cp = cheaper_cost_with_cand (data, use, k,
5949 old_cp->cand, best_cp);
5950 else
5951 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, k, bj)
5952 best_cp = cheaper_cost_with_cand (data, use, k,
5953 old_cp->cand, best_cp);
5954
5955 if (best_cp == old_cp)
5956 continue;
5957
5958 act_delta = iv_ca_delta_add (use, old_cp, best_cp, act_delta);
5959 }
5960 /* No need for further prune. */
5961 if (!act_delta)
5962 continue;
5963
5964 /* Prune the new candidate set. */
5965 iv_ca_delta_commit (data, ivs, act_delta, true);
5966 acost = iv_ca_prune (data, ivs, NULL, &tmp_delta);
5967 iv_ca_delta_commit (data, ivs, act_delta, false);
5968 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
5969
5970 if (compare_costs (acost, orig_cost) < 0)
5971 {
5972 *delta = act_delta;
5973 return acost;
5974 }
5975 else
5976 iv_ca_delta_free (&act_delta);
5977 }
5978
5979 return orig_cost;
5980}
5981
b1b02be2 5982/* Tries to extend the sets IVS in the best possible way in order
16ad8025
SL
5983 to express the USE. If ORIGINALP is true, prefer candidates from
5984 the original set of IVs, otherwise favor important candidates not
5985 based on any memory object. */
8b11a64c
ZD
5986
5987static bool
b1b02be2 5988try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
16ad8025 5989 struct iv_use *use, bool originalp)
8b11a64c 5990{
6e8c65f6 5991 comp_cost best_cost, act_cost;
8b11a64c 5992 unsigned i;
38b0dcb8
ZD
5993 bitmap_iterator bi;
5994 struct iv_cand *cand;
b1b02be2
ZD
5995 struct iv_ca_delta *best_delta = NULL, *act_delta;
5996 struct cost_pair *cp;
5997
f22ae1ec 5998 iv_ca_add_use (data, ivs, use);
b1b02be2 5999 best_cost = iv_ca_cost (ivs);
b1b02be2
ZD
6000 cp = iv_ca_cand_for_use (ivs, use);
6001 if (cp)
6002 {
6003 best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
6004 iv_ca_set_no_cp (data, ivs, use);
6005 }
8b11a64c 6006
16ad8025
SL
6007 /* If ORIGINALP is true, try to find the original IV for the use. Otherwise
6008 first try important candidates not based on any memory object. Only if
6e8c65f6
ZD
6009 this fails, try the specific ones. Rationale -- in loops with many
6010 variables the best choice often is to use just one generic biv. If we
6011 added here many ivs specific to the uses, the optimization algorithm later
6012 would be likely to get stuck in a local minimum, thus causing us to create
6013 too many ivs. The approach from few ivs to more seems more likely to be
6014 successful -- starting from few ivs, replacing an expensive use by a
6015 specific iv should always be a win. */
38b0dcb8 6016 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
8b11a64c 6017 {
38b0dcb8
ZD
6018 cand = iv_cand (data, i);
6019
16ad8025
SL
6020 if (originalp && cand->pos !=IP_ORIGINAL)
6021 continue;
6022
6023 if (!originalp && cand->iv->base_object != NULL_TREE)
6e8c65f6
ZD
6024 continue;
6025
b1b02be2 6026 if (iv_ca_cand_used_p (ivs, cand))
18081149 6027 continue;
8b11a64c 6028
b1b02be2
ZD
6029 cp = get_use_iv_cost (data, use, cand);
6030 if (!cp)
6031 continue;
6032
6033 iv_ca_set_cp (data, ivs, use, cp);
18081149
XDL
6034 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
6035 true);
b1b02be2
ZD
6036 iv_ca_set_no_cp (data, ivs, use);
6037 act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
8b11a64c 6038
6e8c65f6 6039 if (compare_costs (act_cost, best_cost) < 0)
8b11a64c
ZD
6040 {
6041 best_cost = act_cost;
b1b02be2
ZD
6042
6043 iv_ca_delta_free (&best_delta);
6044 best_delta = act_delta;
8b11a64c 6045 }
b1b02be2
ZD
6046 else
6047 iv_ca_delta_free (&act_delta);
8b11a64c
ZD
6048 }
6049
6e8c65f6 6050 if (infinite_cost_p (best_cost))
38b0dcb8
ZD
6051 {
6052 for (i = 0; i < use->n_map_members; i++)
6053 {
6054 cp = use->cost_map + i;
b1b02be2
ZD
6055 cand = cp->cand;
6056 if (!cand)
38b0dcb8
ZD
6057 continue;
6058
6059 /* Already tried this. */
16ad8025
SL
6060 if (cand->important)
6061 {
6062 if (originalp && cand->pos == IP_ORIGINAL)
6063 continue;
6064 if (!originalp && cand->iv->base_object == NULL_TREE)
6065 continue;
6066 }
b8698a0f 6067
b1b02be2 6068 if (iv_ca_cand_used_p (ivs, cand))
38b0dcb8
ZD
6069 continue;
6070
b1b02be2
ZD
6071 act_delta = NULL;
6072 iv_ca_set_cp (data, ivs, use, cp);
18081149 6073 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
b1b02be2
ZD
6074 iv_ca_set_no_cp (data, ivs, use);
6075 act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
6076 cp, act_delta);
38b0dcb8 6077
6e8c65f6 6078 if (compare_costs (act_cost, best_cost) < 0)
38b0dcb8
ZD
6079 {
6080 best_cost = act_cost;
b1b02be2
ZD
6081
6082 if (best_delta)
6083 iv_ca_delta_free (&best_delta);
6084 best_delta = act_delta;
38b0dcb8 6085 }
b1b02be2
ZD
6086 else
6087 iv_ca_delta_free (&act_delta);
38b0dcb8
ZD
6088 }
6089 }
6090
b1b02be2
ZD
6091 iv_ca_delta_commit (data, ivs, best_delta, true);
6092 iv_ca_delta_free (&best_delta);
8b11a64c 6093
6e8c65f6 6094 return !infinite_cost_p (best_cost);
8b11a64c
ZD
6095}
6096
b1b02be2 6097/* Finds an initial assignment of candidates to uses. */
8b11a64c 6098
b1b02be2 6099static struct iv_ca *
16ad8025 6100get_initial_solution (struct ivopts_data *data, bool originalp)
8b11a64c 6101{
b1b02be2 6102 struct iv_ca *ivs = iv_ca_new (data);
8b11a64c
ZD
6103 unsigned i;
6104
6105 for (i = 0; i < n_iv_uses (data); i++)
16ad8025 6106 if (!try_add_cand_for (data, ivs, iv_use (data, i), originalp))
b1b02be2
ZD
6107 {
6108 iv_ca_free (&ivs);
6109 return NULL;
6110 }
8b11a64c 6111
b1b02be2 6112 return ivs;
8b11a64c
ZD
6113}
6114
6326a5f5
BC
6115/* Tries to improve set of induction variables IVS. TRY_REPLACE_P
6116 points to a bool variable, this function tries to break local
6117 optimal fixed-point by replacing candidates in IVS if it's true. */
8b11a64c
ZD
6118
6119static bool
6326a5f5
BC
6120try_improve_iv_set (struct ivopts_data *data,
6121 struct iv_ca *ivs, bool *try_replace_p)
8b11a64c 6122{
6e8c65f6
ZD
6123 unsigned i, n_ivs;
6124 comp_cost acost, best_cost = iv_ca_cost (ivs);
36f5ada1 6125 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
b1b02be2 6126 struct iv_cand *cand;
8b11a64c 6127
36f5ada1 6128 /* Try extending the set of induction variables by one. */
8b11a64c
ZD
6129 for (i = 0; i < n_iv_cands (data); i++)
6130 {
b1b02be2 6131 cand = iv_cand (data, i);
b8698a0f 6132
b1b02be2 6133 if (iv_ca_cand_used_p (ivs, cand))
36f5ada1
ZD
6134 continue;
6135
18081149 6136 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
36f5ada1
ZD
6137 if (!act_delta)
6138 continue;
6139
6140 /* If we successfully added the candidate and the set is small enough,
6141 try optimizing it by removing other candidates. */
6142 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
6143 {
6144 iv_ca_delta_commit (data, ivs, act_delta, true);
6145 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
6146 iv_ca_delta_commit (data, ivs, act_delta, false);
6147 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6148 }
8b11a64c 6149
6e8c65f6 6150 if (compare_costs (acost, best_cost) < 0)
8b11a64c 6151 {
b1b02be2 6152 best_cost = acost;
36f5ada1 6153 iv_ca_delta_free (&best_delta);
b1b02be2 6154 best_delta = act_delta;
8b11a64c 6155 }
8b11a64c 6156 else
b1b02be2 6157 iv_ca_delta_free (&act_delta);
8b11a64c
ZD
6158 }
6159
b1b02be2 6160 if (!best_delta)
36f5ada1
ZD
6161 {
6162 /* Try removing the candidates from the set instead. */
6163 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
6164
6326a5f5
BC
6165 if (!best_delta && *try_replace_p)
6166 {
6167 *try_replace_p = false;
6168 /* So far candidate selecting algorithm tends to choose fewer IVs
6169 so that it can handle cases in which loops have many variables
6170 but the best choice is often to use only one general biv. One
6171 weakness is it can't handle opposite cases, in which different
6172 candidates should be chosen with respect to each use. To solve
6173 the problem, we replace candidates in a manner described by the
6174 comments of iv_ca_replace, thus give general algorithm a chance
6175 to break local optimal fixed-point in these cases. */
6176 best_cost = iv_ca_replace (data, ivs, &best_delta);
6177 }
6178
36f5ada1
ZD
6179 if (!best_delta)
6180 return false;
6181 }
8b11a64c 6182
b1b02be2 6183 iv_ca_delta_commit (data, ivs, best_delta, true);
6e8c65f6 6184 gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
b1b02be2 6185 iv_ca_delta_free (&best_delta);
8b11a64c
ZD
6186 return true;
6187}
6188
6189/* Attempts to find the optimal set of induction variables. We do simple
6190 greedy heuristic -- we try to replace at most one candidate in the selected
6191 solution and remove the unused ivs while this improves the cost. */
6192
b1b02be2 6193static struct iv_ca *
16ad8025 6194find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
8b11a64c 6195{
b1b02be2 6196 struct iv_ca *set;
6326a5f5 6197 bool try_replace_p = true;
8b11a64c 6198
b1b02be2 6199 /* Get the initial solution. */
16ad8025 6200 set = get_initial_solution (data, originalp);
b1b02be2 6201 if (!set)
8b11a64c
ZD
6202 {
6203 if (dump_file && (dump_flags & TDF_DETAILS))
6204 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
8b11a64c
ZD
6205 return NULL;
6206 }
6207
6208 if (dump_file && (dump_flags & TDF_DETAILS))
6209 {
b1b02be2
ZD
6210 fprintf (dump_file, "Initial set of candidates:\n");
6211 iv_ca_dump (data, dump_file, set);
8b11a64c
ZD
6212 }
6213
6326a5f5 6214 while (try_improve_iv_set (data, set, &try_replace_p))
8b11a64c
ZD
6215 {
6216 if (dump_file && (dump_flags & TDF_DETAILS))
6217 {
b1b02be2
ZD
6218 fprintf (dump_file, "Improved to:\n");
6219 iv_ca_dump (data, dump_file, set);
8b11a64c
ZD
6220 }
6221 }
6222
16ad8025
SL
6223 return set;
6224}
6225
6226static struct iv_ca *
6227find_optimal_iv_set (struct ivopts_data *data)
6228{
6229 unsigned i;
6230 struct iv_ca *set, *origset;
6231 struct iv_use *use;
6232 comp_cost cost, origcost;
6233
6234 /* Determine the cost based on a strategy that starts with original IVs,
6235 and try again using a strategy that prefers candidates not based
6236 on any IVs. */
6237 origset = find_optimal_iv_set_1 (data, true);
6238 set = find_optimal_iv_set_1 (data, false);
6239
6240 if (!origset && !set)
6241 return NULL;
6242
6243 origcost = origset ? iv_ca_cost (origset) : infinite_cost;
6244 cost = set ? iv_ca_cost (set) : infinite_cost;
6245
8b11a64c 6246 if (dump_file && (dump_flags & TDF_DETAILS))
6e8c65f6 6247 {
16ad8025
SL
6248 fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
6249 origcost.cost, origcost.complexity);
6250 fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
6251 cost.cost, cost.complexity);
6252 }
6253
6254 /* Choose the one with the best cost. */
6255 if (compare_costs (origcost, cost) <= 0)
6256 {
6257 if (set)
6258 iv_ca_free (&set);
6259 set = origset;
6e8c65f6 6260 }
16ad8025
SL
6261 else if (origset)
6262 iv_ca_free (&origset);
8b11a64c
ZD
6263
6264 for (i = 0; i < n_iv_uses (data); i++)
6265 {
6266 use = iv_use (data, i);
b1b02be2 6267 use->selected = iv_ca_cand_for_use (set, use)->cand;
8b11a64c
ZD
6268 }
6269
8b11a64c
ZD
6270 return set;
6271}
6272
6273/* Creates a new induction variable corresponding to CAND. */
6274
6275static void
6276create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
6277{
726a989a 6278 gimple_stmt_iterator incr_pos;
8b11a64c
ZD
6279 tree base;
6280 bool after = false;
6281
6282 if (!cand->iv)
6283 return;
6284
6285 switch (cand->pos)
6286 {
6287 case IP_NORMAL:
726a989a 6288 incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
8b11a64c
ZD
6289 break;
6290
6291 case IP_END:
726a989a 6292 incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
8b11a64c
ZD
6293 after = true;
6294 break;
6295
2c08497a
BS
6296 case IP_AFTER_USE:
6297 after = true;
6298 /* fall through */
6299 case IP_BEFORE_USE:
6300 incr_pos = gsi_for_stmt (cand->incremented_at);
6301 break;
6302
8b11a64c
ZD
6303 case IP_ORIGINAL:
6304 /* Mark that the iv is preserved. */
6305 name_info (data, cand->var_before)->preserve_biv = true;
6306 name_info (data, cand->var_after)->preserve_biv = true;
6307
6308 /* Rewrite the increment so that it uses var_before directly. */
6309 find_interesting_uses_op (data, cand->var_after)->selected = cand;
8b11a64c
ZD
6310 return;
6311 }
b8698a0f 6312
8b11a64c 6313 gimple_add_tmp_var (cand->var_before);
8b11a64c
ZD
6314
6315 base = unshare_expr (cand->iv->base);
6316
9be872b7
ZD
6317 create_iv (base, unshare_expr (cand->iv->step),
6318 cand->var_before, data->current_loop,
8b11a64c
ZD
6319 &incr_pos, after, &cand->var_before, &cand->var_after);
6320}
6321
6322/* Creates new induction variables described in SET. */
6323
6324static void
b1b02be2 6325create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
8b11a64c
ZD
6326{
6327 unsigned i;
6328 struct iv_cand *cand;
87c476a2 6329 bitmap_iterator bi;
8b11a64c 6330
b1b02be2 6331 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
8b11a64c
ZD
6332 {
6333 cand = iv_cand (data, i);
6334 create_new_iv (data, cand);
87c476a2 6335 }
8b11a64c 6336
18081149
XDL
6337 if (dump_file && (dump_flags & TDF_DETAILS))
6338 {
6339 fprintf (dump_file, "\nSelected IV set: \n");
6340 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6341 {
6342 cand = iv_cand (data, i);
6343 dump_cand (dump_file, cand);
6344 }
6345 fprintf (dump_file, "\n");
6346 }
6347}
8b11a64c
ZD
6348
6349/* Rewrites USE (definition of iv used in a nonlinear expression)
6350 using candidate CAND. */
6351
6352static void
6353rewrite_use_nonlinear_expr (struct ivopts_data *data,
6354 struct iv_use *use, struct iv_cand *cand)
6355{
3520b745 6356 tree comp;
726a989a 6357 tree op, tgt;
538dd0b7 6358 gassign *ass;
726a989a 6359 gimple_stmt_iterator bsi;
3520b745
ZD
6360
6361 /* An important special case -- if we are asked to express value of
6362 the original iv by itself, just exit; there is no need to
6363 introduce a new computation (that might also need casting the
6364 variable to unsigned and back). */
6365 if (cand->pos == IP_ORIGINAL
7b9d4f70 6366 && cand->incremented_at == use->stmt)
3520b745 6367 {
d06a01bf 6368 enum tree_code stmt_code;
7b9d4f70 6369
726a989a
RB
6370 gcc_assert (is_gimple_assign (use->stmt));
6371 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
7b9d4f70 6372
7b9d4f70
ZD
6373 /* Check whether we may leave the computation unchanged.
6374 This is the case only if it does not rely on other
6375 computations in the loop -- otherwise, the computation
6376 we rely upon may be removed in remove_unused_ivs,
6377 thus leading to ICE. */
d06a01bf
ZD
6378 stmt_code = gimple_assign_rhs_code (use->stmt);
6379 if (stmt_code == PLUS_EXPR
6380 || stmt_code == MINUS_EXPR
6381 || stmt_code == POINTER_PLUS_EXPR)
7b9d4f70 6382 {
726a989a
RB
6383 if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
6384 op = gimple_assign_rhs2 (use->stmt);
d06a01bf 6385 else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
726a989a 6386 op = gimple_assign_rhs1 (use->stmt);
7b9d4f70
ZD
6387 else
6388 op = NULL_TREE;
6389 }
6390 else
6391 op = NULL_TREE;
3520b745 6392
d06a01bf 6393 if (op && expr_invariant_in_loop_p (data->current_loop, op))
3520b745 6394 return;
73f30c63 6395 }
3520b745 6396
d06a01bf
ZD
6397 comp = get_computation (data->current_loop, use, cand);
6398 gcc_assert (comp != NULL_TREE);
6399
726a989a 6400 switch (gimple_code (use->stmt))
8b11a64c 6401 {
726a989a 6402 case GIMPLE_PHI:
8b11a64c
ZD
6403 tgt = PHI_RESULT (use->stmt);
6404
6405 /* If we should keep the biv, do not replace it. */
6406 if (name_info (data, tgt)->preserve_biv)
6407 return;
6408
726a989a 6409 bsi = gsi_after_labels (gimple_bb (use->stmt));
1e128c5f
GB
6410 break;
6411
726a989a
RB
6412 case GIMPLE_ASSIGN:
6413 tgt = gimple_assign_lhs (use->stmt);
6414 bsi = gsi_for_stmt (use->stmt);
1e128c5f
GB
6415 break;
6416
6417 default:
6418 gcc_unreachable ();
8b11a64c 6419 }
8b11a64c 6420
17fc049f
RG
6421 if (!valid_gimple_rhs_p (comp)
6422 || (gimple_code (use->stmt) != GIMPLE_PHI
6423 /* We can't allow re-allocating the stmt as it might be pointed
6424 to still. */
6425 && (get_gimple_rhs_num_ops (TREE_CODE (comp))
6426 >= gimple_num_ops (gsi_stmt (bsi)))))
bdf0f819
RG
6427 {
6428 comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE,
6429 true, GSI_SAME_STMT);
6430 if (POINTER_TYPE_P (TREE_TYPE (tgt)))
b5c878a5
RG
6431 {
6432 duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
6433 /* As this isn't a plain copy we have to reset alignment
6434 information. */
6435 if (SSA_NAME_PTR_INFO (comp))
644ffefd 6436 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
b5c878a5 6437 }
bdf0f819 6438 }
8b11a64c 6439
726a989a 6440 if (gimple_code (use->stmt) == GIMPLE_PHI)
8b11a64c 6441 {
17fc049f 6442 ass = gimple_build_assign (tgt, comp);
726a989a 6443 gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
ae0a4449
AO
6444
6445 bsi = gsi_for_stmt (use->stmt);
6446 remove_phi_node (&bsi, false);
8b11a64c
ZD
6447 }
6448 else
726a989a 6449 {
17fc049f 6450 gimple_assign_set_rhs_from_tree (&bsi, comp);
726a989a
RB
6451 use->stmt = gsi_stmt (bsi);
6452 }
8b11a64c
ZD
6453}
6454
55791fcd
XDL
6455/* Performs a peephole optimization to reorder the iv update statement with
6456 a mem ref to enable instruction combining in later phases. The mem ref uses
6457 the iv value before the update, so the reordering transformation requires
6458 adjustment of the offset. CAND is the selected IV_CAND.
6459
6460 Example:
6461
6462 t = MEM_REF (base, iv1, 8, 16); // base, index, stride, offset
6463 iv2 = iv1 + 1;
6464
6465 if (t < val) (1)
6466 goto L;
6467 goto Head;
6468
6469
6470 directly propagating t over to (1) will introduce overlapping live range
6471 thus increase register pressure. This peephole transform it into:
6472
6473
6474 iv2 = iv1 + 1;
6475 t = MEM_REF (base, iv2, 8, 8);
6476 if (t < val)
6477 goto L;
6478 goto Head;
6479*/
6480
6481static void
6482adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
6483{
6484 tree var_after;
6485 gimple iv_update, stmt;
6486 basic_block bb;
6487 gimple_stmt_iterator gsi, gsi_iv;
6488
6489 if (cand->pos != IP_NORMAL)
6490 return;
6491
6492 var_after = cand->var_after;
6493 iv_update = SSA_NAME_DEF_STMT (var_after);
6494
6495 bb = gimple_bb (iv_update);
6496 gsi = gsi_last_nondebug_bb (bb);
6497 stmt = gsi_stmt (gsi);
6498
6499 /* Only handle conditional statement for now. */
6500 if (gimple_code (stmt) != GIMPLE_COND)
6501 return;
6502
6503 gsi_prev_nondebug (&gsi);
6504 stmt = gsi_stmt (gsi);
6505 if (stmt != iv_update)
6506 return;
6507
6508 gsi_prev_nondebug (&gsi);
6509 if (gsi_end_p (gsi))
6510 return;
6511
6512 stmt = gsi_stmt (gsi);
6513 if (gimple_code (stmt) != GIMPLE_ASSIGN)
6514 return;
6515
6516 if (stmt != use->stmt)
6517 return;
6518
6519 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
6520 return;
6521
6522 if (dump_file && (dump_flags & TDF_DETAILS))
6523 {
6524 fprintf (dump_file, "Reordering \n");
6525 print_gimple_stmt (dump_file, iv_update, 0, 0);
6526 print_gimple_stmt (dump_file, use->stmt, 0, 0);
6527 fprintf (dump_file, "\n");
6528 }
6529
6530 gsi = gsi_for_stmt (use->stmt);
6531 gsi_iv = gsi_for_stmt (iv_update);
6532 gsi_move_before (&gsi_iv, &gsi);
6533
6534 cand->pos = IP_BEFORE_USE;
6535 cand->incremented_at = use->stmt;
6536}
6537
8b11a64c
ZD
6538/* Rewrites USE (address that is an iv) using candidate CAND. */
6539
6540static void
6541rewrite_use_address (struct ivopts_data *data,
6542 struct iv_use *use, struct iv_cand *cand)
6543{
73f30c63 6544 aff_tree aff;
726a989a 6545 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
d7c0c068 6546 tree base_hint = NULL_TREE;
880a1451 6547 tree ref, iv;
73f30c63 6548 bool ok;
8b11a64c 6549
55791fcd 6550 adjust_iv_update_pos (cand, use);
73f30c63
ZD
6551 ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
6552 gcc_assert (ok);
ac182688 6553 unshare_aff_combination (&aff);
8b11a64c 6554
d7c0c068
UW
6555 /* To avoid undefined overflow problems, all IV candidates use unsigned
6556 integer types. The drawback is that this makes it impossible for
6557 create_mem_ref to distinguish an IV that is based on a memory object
6558 from one that represents simply an offset.
6559
6560 To work around this problem, we pass a hint to create_mem_ref that
6561 indicates which variable (if any) in aff is an IV based on a memory
6562 object. Note that we only consider the candidate. If this is not
6563 based on an object, the base of the reference is in some subexpression
6564 of the use -- but these will use pointer types, so they are recognized
6565 by the create_mem_ref heuristics anyway. */
6566 if (cand->iv->base_object)
6567 base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
6568
880a1451
XDL
6569 iv = var_at_stmt (data->current_loop, cand, use->stmt);
6570 ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff,
a41e5e86 6571 reference_alias_ptr_type (*use->op_p),
880a1451 6572 iv, base_hint, data->speed);
ac182688
ZD
6573 copy_ref_info (ref, *use->op_p);
6574 *use->op_p = ref;
8b11a64c
ZD
6575}
6576
6577/* Rewrites USE (the condition such that one of the arguments is an iv) using
6578 candidate CAND. */
6579
6580static void
6581rewrite_use_compare (struct ivopts_data *data,
6582 struct iv_use *use, struct iv_cand *cand)
6583{
b697aed4 6584 tree comp, *var_p, op, bound;
726a989a 6585 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
8b11a64c 6586 enum tree_code compare;
f5f12961 6587 struct cost_pair *cp = get_use_iv_cost (data, use, cand);
b697aed4
ZD
6588 bool ok;
6589
f5f12961
ZD
6590 bound = cp->value;
6591 if (bound)
8b11a64c 6592 {
9e7376e5
ZD
6593 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
6594 tree var_type = TREE_TYPE (var);
dc5b3407 6595 gimple_seq stmts;
9e7376e5 6596
18081149
XDL
6597 if (dump_file && (dump_flags & TDF_DETAILS))
6598 {
6599 fprintf (dump_file, "Replacing exit test: ");
6600 print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
6601 }
d8af4ba3 6602 compare = cp->comp;
b697aed4 6603 bound = unshare_expr (fold_convert (var_type, bound));
dc5b3407
ZD
6604 op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
6605 if (stmts)
6606 gsi_insert_seq_on_edge_immediate (
6607 loop_preheader_edge (data->current_loop),
6608 stmts);
8b11a64c 6609
538dd0b7
DM
6610 gcond *cond_stmt = as_a <gcond *> (use->stmt);
6611 gimple_cond_set_lhs (cond_stmt, var);
6612 gimple_cond_set_code (cond_stmt, compare);
6613 gimple_cond_set_rhs (cond_stmt, op);
8b11a64c
ZD
6614 return;
6615 }
6616
6617 /* The induction variable elimination failed; just express the original
6618 giv. */
ac182688 6619 comp = get_computation (data->current_loop, use, cand);
73f30c63 6620 gcc_assert (comp != NULL_TREE);
8b11a64c 6621
726a989a 6622 ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
b697aed4 6623 gcc_assert (ok);
8b11a64c 6624
726a989a
RB
6625 *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
6626 true, GSI_SAME_STMT);
8b11a64c
ZD
6627}
6628
8b11a64c
ZD
6629/* Rewrites USE using candidate CAND. */
6630
6631static void
cfaab3a9 6632rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
8b11a64c
ZD
6633{
6634 switch (use->type)
6635 {
6636 case USE_NONLINEAR_EXPR:
6637 rewrite_use_nonlinear_expr (data, use, cand);
6638 break;
6639
8b11a64c
ZD
6640 case USE_ADDRESS:
6641 rewrite_use_address (data, use, cand);
6642 break;
6643
6644 case USE_COMPARE:
6645 rewrite_use_compare (data, use, cand);
6646 break;
6647
6648 default:
1e128c5f 6649 gcc_unreachable ();
8b11a64c 6650 }
b8698a0f 6651
cff4e50d 6652 update_stmt (use->stmt);
8b11a64c
ZD
6653}
6654
6655/* Rewrite the uses using the selected induction variables. */
6656
6657static void
6658rewrite_uses (struct ivopts_data *data)
6659{
6660 unsigned i;
6661 struct iv_cand *cand;
6662 struct iv_use *use;
6663
6664 for (i = 0; i < n_iv_uses (data); i++)
6665 {
6666 use = iv_use (data, i);
6667 cand = use->selected;
1e128c5f 6668 gcc_assert (cand);
8b11a64c
ZD
6669
6670 rewrite_use (data, use, cand);
6671 }
6672}
6673
6674/* Removes the ivs that are not used after rewriting. */
6675
6676static void
6677remove_unused_ivs (struct ivopts_data *data)
6678{
6679 unsigned j;
87c476a2 6680 bitmap_iterator bi;
ae0a4449 6681 bitmap toremove = BITMAP_ALLOC (NULL);
8b11a64c 6682
ae0a4449
AO
6683 /* Figure out an order in which to release SSA DEFs so that we don't
6684 release something that we'd have to propagate into a debug stmt
6685 afterwards. */
87c476a2 6686 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
8b11a64c
ZD
6687 {
6688 struct version_info *info;
6689
6690 info = ver_info (data, j);
6691 if (info->iv
6e42ce54 6692 && !integer_zerop (info->iv->step)
8b11a64c
ZD
6693 && !info->inv_id
6694 && !info->iv->have_use_for
6695 && !info->preserve_biv)
e1066560
AO
6696 {
6697 bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
6698
6699 tree def = info->iv->ssa_name;
6700
6701 if (MAY_HAVE_DEBUG_STMTS && SSA_NAME_DEF_STMT (def))
6702 {
6703 imm_use_iterator imm_iter;
6704 use_operand_p use_p;
6705 gimple stmt;
6706 int count = 0;
6707
6708 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
6709 {
6710 if (!gimple_debug_bind_p (stmt))
6711 continue;
6712
6713 /* We just want to determine whether to do nothing
6714 (count == 0), to substitute the computed
6715 expression into a single use of the SSA DEF by
6716 itself (count == 1), or to use a debug temp
6717 because the SSA DEF is used multiple times or as
6718 part of a larger expression (count > 1). */
6719 count++;
6720 if (gimple_debug_bind_get_value (stmt) != def)
6721 count++;
6722
6723 if (count > 1)
6724 BREAK_FROM_IMM_USE_STMT (imm_iter);
6725 }
6726
6727 if (!count)
6728 continue;
6729
6730 struct iv_use dummy_use;
6731 struct iv_cand *best_cand = NULL, *cand;
6732 unsigned i, best_pref = 0, cand_pref;
6733
6734 memset (&dummy_use, 0, sizeof (dummy_use));
6735 dummy_use.iv = info->iv;
6736 for (i = 0; i < n_iv_uses (data) && i < 64; i++)
6737 {
6738 cand = iv_use (data, i)->selected;
6739 if (cand == best_cand)
6740 continue;
6741 cand_pref = operand_equal_p (cand->iv->step,
6742 info->iv->step, 0)
6743 ? 4 : 0;
6744 cand_pref
6745 += TYPE_MODE (TREE_TYPE (cand->iv->base))
6746 == TYPE_MODE (TREE_TYPE (info->iv->base))
6747 ? 2 : 0;
6748 cand_pref
6749 += TREE_CODE (cand->iv->base) == INTEGER_CST
6750 ? 1 : 0;
6751 if (best_cand == NULL || best_pref < cand_pref)
6752 {
6753 best_cand = cand;
6754 best_pref = cand_pref;
6755 }
6756 }
6757
6758 if (!best_cand)
6759 continue;
6760
6761 tree comp = get_computation_at (data->current_loop,
6762 &dummy_use, best_cand,
6763 SSA_NAME_DEF_STMT (def));
6764 if (!comp)
6765 continue;
6766
6767 if (count > 1)
6768 {
6769 tree vexpr = make_node (DEBUG_EXPR_DECL);
6770 DECL_ARTIFICIAL (vexpr) = 1;
6771 TREE_TYPE (vexpr) = TREE_TYPE (comp);
6772 if (SSA_NAME_VAR (def))
6773 DECL_MODE (vexpr) = DECL_MODE (SSA_NAME_VAR (def));
6774 else
6775 DECL_MODE (vexpr) = TYPE_MODE (TREE_TYPE (vexpr));
538dd0b7
DM
6776 gdebug *def_temp
6777 = gimple_build_debug_bind (vexpr, comp, NULL);
e1066560
AO
6778 gimple_stmt_iterator gsi;
6779
6780 if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
6781 gsi = gsi_after_labels (gimple_bb
6782 (SSA_NAME_DEF_STMT (def)));
6783 else
6784 gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
6785
6786 gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
6787 comp = vexpr;
6788 }
6789
6790 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
6791 {
6792 if (!gimple_debug_bind_p (stmt))
6793 continue;
6794
6795 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
6796 SET_USE (use_p, comp);
6797
6798 update_stmt (stmt);
6799 }
6800 }
6801 }
87c476a2 6802 }
ae0a4449
AO
6803
6804 release_defs_bitset (toremove);
6805
6806 BITMAP_FREE (toremove);
8b11a64c
ZD
6807}
6808
e2102efc 6809/* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
b787e7a2 6810 for hash_map::traverse. */
e2102efc 6811
b787e7a2
TS
6812bool
6813free_tree_niter_desc (edge const &, tree_niter_desc *const &value, void *)
e2102efc 6814{
b787e7a2 6815 free (value);
e2102efc
XDL
6816 return true;
6817}
6818
8b11a64c
ZD
6819/* Frees data allocated by the optimization of a single loop. */
6820
6821static void
6822free_loop_data (struct ivopts_data *data)
6823{
6824 unsigned i, j;
87c476a2 6825 bitmap_iterator bi;
69ebd99d 6826 tree obj;
8b11a64c 6827
15814ba0
PB
6828 if (data->niters)
6829 {
b787e7a2
TS
6830 data->niters->traverse<void *, free_tree_niter_desc> (NULL);
6831 delete data->niters;
15814ba0
PB
6832 data->niters = NULL;
6833 }
ca4c3169 6834
87c476a2 6835 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
8b11a64c
ZD
6836 {
6837 struct version_info *info;
6838
6839 info = ver_info (data, i);
04695783 6840 free (info->iv);
8b11a64c
ZD
6841 info->iv = NULL;
6842 info->has_nonlin_use = false;
6843 info->preserve_biv = false;
6844 info->inv_id = 0;
87c476a2 6845 }
8b11a64c 6846 bitmap_clear (data->relevant);
b1b02be2 6847 bitmap_clear (data->important_candidates);
8b11a64c
ZD
6848
6849 for (i = 0; i < n_iv_uses (data); i++)
6850 {
6851 struct iv_use *use = iv_use (data, i);
6852
6853 free (use->iv);
8bdbfff5 6854 BITMAP_FREE (use->related_cands);
8b11a64c
ZD
6855 for (j = 0; j < use->n_map_members; j++)
6856 if (use->cost_map[j].depends_on)
8bdbfff5 6857 BITMAP_FREE (use->cost_map[j].depends_on);
8b11a64c
ZD
6858 free (use->cost_map);
6859 free (use);
6860 }
9771b263 6861 data->iv_uses.truncate (0);
8b11a64c
ZD
6862
6863 for (i = 0; i < n_iv_cands (data); i++)
6864 {
6865 struct iv_cand *cand = iv_cand (data, i);
6866
04695783 6867 free (cand->iv);
9be872b7
ZD
6868 if (cand->depends_on)
6869 BITMAP_FREE (cand->depends_on);
8b11a64c
ZD
6870 free (cand);
6871 }
9771b263 6872 data->iv_candidates.truncate (0);
8b11a64c
ZD
6873
6874 if (data->version_info_size < num_ssa_names)
6875 {
6876 data->version_info_size = 2 * num_ssa_names;
6877 free (data->version_info);
5ed6ace5 6878 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
8b11a64c
ZD
6879 }
6880
6881 data->max_inv_id = 0;
6882
9771b263 6883 FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
69ebd99d 6884 SET_DECL_RTL (obj, NULL_RTX);
8b11a64c 6885
9771b263 6886 decl_rtl_to_reset.truncate (0);
18081149 6887
c203e8a7 6888 data->inv_expr_tab->empty ();
18081149 6889 data->inv_expr_id = 0;
8b11a64c
ZD
6890}
6891
6892/* Finalizes data structures used by the iv optimization pass. LOOPS is the
6893 loop tree. */
6894
6895static void
9a2ef6b8 6896tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
8b11a64c 6897{
8b11a64c
ZD
6898 free_loop_data (data);
6899 free (data->version_info);
8bdbfff5
NS
6900 BITMAP_FREE (data->relevant);
6901 BITMAP_FREE (data->important_candidates);
8b11a64c 6902
9771b263
DN
6903 decl_rtl_to_reset.release ();
6904 data->iv_uses.release ();
6905 data->iv_candidates.release ();
c203e8a7
TS
6906 delete data->inv_expr_tab;
6907 data->inv_expr_tab = NULL;
3230c614 6908 free_affine_expand_cache (&data->name_expansion_cache);
8b11a64c
ZD
6909}
6910
bec922f0
SL
6911/* Returns true if the loop body BODY includes any function calls. */
6912
6913static bool
6914loop_body_includes_call (basic_block *body, unsigned num_nodes)
6915{
6916 gimple_stmt_iterator gsi;
6917 unsigned i;
6918
6919 for (i = 0; i < num_nodes; i++)
6920 for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
6921 {
6922 gimple stmt = gsi_stmt (gsi);
6923 if (is_gimple_call (stmt)
6924 && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
6925 return true;
6926 }
6927 return false;
6928}
6929
8b11a64c
ZD
6930/* Optimizes the LOOP. Returns true if anything changed. */
6931
6932static bool
6933tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
6934{
6935 bool changed = false;
b1b02be2 6936 struct iv_ca *iv_ca;
d8af4ba3 6937 edge exit = single_dom_exit (loop);
2c08497a 6938 basic_block *body;
8b11a64c 6939
15814ba0 6940 gcc_assert (!data->niters);
8b11a64c 6941 data->current_loop = loop;
f40751dd 6942 data->speed = optimize_loop_for_speed_p (loop);
8b11a64c
ZD
6943
6944 if (dump_file && (dump_flags & TDF_DETAILS))
6945 {
6946 fprintf (dump_file, "Processing loop %d\n", loop->num);
b8698a0f 6947
8b11a64c
ZD
6948 if (exit)
6949 {
6950 fprintf (dump_file, " single exit %d -> %d, exit condition ",
6951 exit->src->index, exit->dest->index);
726a989a 6952 print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
8b11a64c
ZD
6953 fprintf (dump_file, "\n");
6954 }
6955
6956 fprintf (dump_file, "\n");
6957 }
6958
2c08497a 6959 body = get_loop_body (loop);
bec922f0 6960 data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
2c08497a
BS
6961 renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
6962 free (body);
6963
d8af4ba3
ZD
6964 data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
6965
8b11a64c
ZD
6966 /* For each ssa name determines whether it behaves as an induction variable
6967 in some loop. */
6968 if (!find_induction_variables (data))
6969 goto finish;
6970
6971 /* Finds interesting uses (item 1). */
6972 find_interesting_uses (data);
6973 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
6974 goto finish;
6975
6976 /* Finds candidates for the induction variables (item 2). */
6977 find_iv_candidates (data);
6978
6979 /* Calculates the costs (item 3, part 1). */
8b11a64c 6980 determine_iv_costs (data);
2c08497a 6981 determine_use_iv_costs (data);
8b11a64c
ZD
6982 determine_set_costs (data);
6983
6984 /* Find the optimal set of induction variables (item 3, part 2). */
b1b02be2
ZD
6985 iv_ca = find_optimal_iv_set (data);
6986 if (!iv_ca)
8b11a64c
ZD
6987 goto finish;
6988 changed = true;
6989
6990 /* Create the new induction variables (item 4, part 1). */
b1b02be2
ZD
6991 create_new_ivs (data, iv_ca);
6992 iv_ca_free (&iv_ca);
b8698a0f 6993
8b11a64c
ZD
6994 /* Rewrite the uses (item 4, part 2). */
6995 rewrite_uses (data);
6996
6997 /* Remove the ivs that are unused after rewriting. */
6998 remove_unused_ivs (data);
6999
8b11a64c
ZD
7000 /* We have changed the structure of induction variables; it might happen
7001 that definitions in the scev database refer to some of them that were
7002 eliminated. */
7003 scev_reset ();
7004
7005finish:
7006 free_loop_data (data);
7007
7008 return changed;
7009}
7010
d73be268 7011/* Main entry point. Optimizes induction variables in loops. */
8b11a64c
ZD
7012
7013void
d73be268 7014tree_ssa_iv_optimize (void)
8b11a64c
ZD
7015{
7016 struct loop *loop;
7017 struct ivopts_data data;
7018
9a2ef6b8 7019 tree_ssa_iv_optimize_init (&data);
8b11a64c
ZD
7020
7021 /* Optimize the loops starting with the innermost ones. */
f0bd40b1 7022 FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
8b11a64c 7023 {
8679c649
JH
7024 if (dump_file && (dump_flags & TDF_DETAILS))
7025 flow_loop_dump (loop, dump_file, NULL, 1);
e9472263
ZD
7026
7027 tree_ssa_iv_optimize_loop (&data, loop);
8b11a64c
ZD
7028 }
7029
9a2ef6b8 7030 tree_ssa_iv_optimize_finalize (&data);
8b11a64c 7031}