]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-ssa-loop-ivopts.c
target.def (rtx_costs): Remove "code" param, add "mode".
[thirdparty/gcc.git] / gcc / tree-ssa-loop-ivopts.c
CommitLineData
8b11a64c 1/* Induction variable optimizations.
5624e564 2 Copyright (C) 2003-2015 Free Software Foundation, Inc.
b8698a0f 3
8b11a64c 4This file is part of GCC.
b8698a0f 5
8b11a64c
ZD
6GCC is free software; you can redistribute it and/or modify it
7under the terms of the GNU General Public License as published by the
9dcd6f09 8Free Software Foundation; either version 3, or (at your option) any
8b11a64c 9later version.
b8698a0f 10
8b11a64c
ZD
11GCC is distributed in the hope that it will be useful, but WITHOUT
12ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14for more details.
b8698a0f 15
8b11a64c 16You should have received a copy of the GNU General Public License
9dcd6f09
NC
17along with GCC; see the file COPYING3. If not see
18<http://www.gnu.org/licenses/>. */
8b11a64c
ZD
19
20/* This pass tries to find the optimal set of induction variables for the loop.
21 It optimizes just the basic linear induction variables (although adding
22 support for other types should not be too hard). It includes the
23 optimizations commonly known as strength reduction, induction variable
24 coalescing and induction variable elimination. It does it in the
25 following steps:
26
27 1) The interesting uses of induction variables are found. This includes
28
29 -- uses of induction variables in non-linear expressions
30 -- addresses of arrays
31 -- comparisons of induction variables
32
33 2) Candidates for the induction variables are found. This includes
34
35 -- old induction variables
36 -- the variables defined by expressions derived from the "interesting
37 uses" above
38
39 3) The optimal (w.r. to a cost function) set of variables is chosen. The
40 cost function assigns a cost to sets of induction variables and consists
41 of three parts:
42
43 -- The use costs. Each of the interesting uses chooses the best induction
44 variable in the set and adds its cost to the sum. The cost reflects
45 the time spent on modifying the induction variables value to be usable
46 for the given purpose (adding base and offset for arrays, etc.).
47 -- The variable costs. Each of the variables has a cost assigned that
48 reflects the costs associated with incrementing the value of the
49 variable. The original variables are somewhat preferred.
50 -- The set cost. Depending on the size of the set, extra cost may be
51 added to reflect register pressure.
52
53 All the costs are defined in a machine-specific way, using the target
54 hooks and machine descriptions to determine them.
55
56 4) The trees are transformed to use the new variables, the dead code is
57 removed.
b8698a0f 58
8b11a64c
ZD
59 All of this is done loop by loop. Doing it globally is theoretically
60 possible, it might give a better performance and it might enable us
61 to decide costs more precisely, but getting all the interactions right
62 would be complicated. */
63
64#include "config.h"
65#include "system.h"
66#include "coretypes.h"
c7131fb2 67#include "backend.h"
8b11a64c 68#include "tree.h"
c7131fb2
AM
69#include "gimple.h"
70#include "rtl.h"
71#include "ssa.h"
72#include "alias.h"
40e23961 73#include "fold-const.h"
d8a2d370 74#include "stor-layout.h"
8b11a64c 75#include "tm_p.h"
cf835838 76#include "gimple-pretty-print.h"
2fb9a547
AM
77#include "internal-fn.h"
78#include "tree-eh.h"
45b0be94 79#include "gimplify.h"
5be5c238 80#include "gimple-iterator.h"
18f429e2 81#include "gimplify-me.h"
442b4905
AM
82#include "cgraph.h"
83#include "tree-cfg.h"
e28030cf
AM
84#include "tree-ssa-loop-ivopts.h"
85#include "tree-ssa-loop-manip.h"
86#include "tree-ssa-loop-niter.h"
442b4905 87#include "tree-ssa-loop.h"
36566b39 88#include "flags.h"
36566b39
PK
89#include "insn-config.h"
90#include "expmed.h"
91#include "dojump.h"
92#include "explow.h"
93#include "calls.h"
94#include "emit-rtl.h"
95#include "varasm.h"
96#include "stmt.h"
d8a2d370 97#include "expr.h"
442b4905 98#include "tree-dfa.h"
7a300452 99#include "tree-ssa.h"
8b11a64c 100#include "cfgloop.h"
8b11a64c 101#include "tree-pass.h"
8b11a64c
ZD
102#include "tree-chrec.h"
103#include "tree-scalar-evolution.h"
8b11a64c 104#include "params.h"
39b4020c 105#include "langhooks.h"
73f30c63 106#include "tree-affine.h"
8318b0d9 107#include "target.h"
18081149 108#include "tree-inline.h"
17fc049f 109#include "tree-ssa-propagate.h"
4484a35a 110#include "tree-ssa-address.h"
9b2b7279 111#include "builtins.h"
28002f1a 112#include "tree-vectorizer.h"
7735d6c7 113
2eb79bbb
SB
114/* FIXME: Expressions are expanded to RTL in this pass to determine the
115 cost of different addressing modes. This should be moved to a TBD
116 interface between the GIMPLE and RTL worlds. */
1c1ad7bb 117#include "recog.h"
2eb79bbb 118
8b11a64c
ZD
119/* The infinite cost. */
120#define INFTY 10000000
121
8b11a64c
ZD
122#define AVG_LOOP_NITER(LOOP) 5
123
18081149
XDL
124/* Returns the expected number of loop iterations for LOOP.
125 The average trip count is computed from profile data if it
126 exists. */
127
128static inline HOST_WIDE_INT
129avg_loop_niter (struct loop *loop)
130{
652c4c71 131 HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
18081149
XDL
132 if (niter == -1)
133 return AVG_LOOP_NITER (loop);
134
135 return niter;
136}
8b11a64c
ZD
137
138/* Representation of the induction variable. */
139struct iv
140{
141 tree base; /* Initial value of the iv. */
e6845c23 142 tree base_object; /* A memory object to that the induction variable points. */
8b11a64c
ZD
143 tree step; /* Step of the iv (constant only). */
144 tree ssa_name; /* The ssa name with the value. */
c70ed622 145 unsigned use_id; /* The identifier in the use if it is the case. */
8b11a64c
ZD
146 bool biv_p; /* Is it a biv? */
147 bool have_use_for; /* Do we already have a use for it? */
c70ed622 148 bool no_overflow; /* True if the iv doesn't overflow. */
8b11a64c
ZD
149};
150
151/* Per-ssa version information (induction variable descriptions, etc.). */
152struct version_info
153{
154 tree name; /* The ssa name. */
155 struct iv *iv; /* Induction variable description. */
156 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
157 an expression that is not an induction variable. */
8b11a64c 158 bool preserve_biv; /* For the original biv, whether to preserve it. */
448f65db 159 unsigned inv_id; /* Id of an invariant. */
8b11a64c
ZD
160};
161
8b11a64c
ZD
162/* Types of uses. */
163enum use_type
164{
165 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
8b11a64c
ZD
166 USE_ADDRESS, /* Use in an address. */
167 USE_COMPARE /* Use is a compare. */
168};
169
6e8c65f6
ZD
170/* Cost of a computation. */
171typedef struct
172{
2c08497a 173 int cost; /* The runtime cost. */
6e8c65f6
ZD
174 unsigned complexity; /* The estimate of the complexity of the code for
175 the computation (in no concrete units --
176 complexity field should be larger for more
177 complex expressions and addressing modes). */
178} comp_cost;
179
7735d6c7 180static const comp_cost no_cost = {0, 0};
6e8c65f6
ZD
181static const comp_cost infinite_cost = {INFTY, INFTY};
182
8b11a64c
ZD
183/* The candidate - cost pair. */
184struct cost_pair
185{
186 struct iv_cand *cand; /* The candidate. */
6e8c65f6 187 comp_cost cost; /* The cost. */
8b11a64c
ZD
188 bitmap depends_on; /* The list of invariants that have to be
189 preserved. */
f5f12961
ZD
190 tree value; /* For final value elimination, the expression for
191 the final value of the iv. For iv elimination,
192 the new bound to compare with. */
d8af4ba3 193 enum tree_code comp; /* For iv elimination, the comparison. */
18081149 194 int inv_expr_id; /* Loop invariant expression id. */
8b11a64c
ZD
195};
196
197/* Use. */
198struct iv_use
199{
200 unsigned id; /* The id of the use. */
a7e43c57 201 unsigned sub_id; /* The id of the sub use. */
8b11a64c
ZD
202 enum use_type type; /* Type of the use. */
203 struct iv *iv; /* The induction variable it is based on. */
726a989a 204 gimple stmt; /* Statement in that it occurs. */
8b11a64c 205 tree *op_p; /* The place where it occurs. */
b1b02be2
ZD
206 bitmap related_cands; /* The set of "related" iv candidates, plus the common
207 important ones. */
8b11a64c
ZD
208
209 unsigned n_map_members; /* Number of candidates in the cost_map list. */
210 struct cost_pair *cost_map;
211 /* The costs wrto the iv candidates. */
212
213 struct iv_cand *selected;
214 /* The selected candidate. */
a7e43c57
BC
215
216 struct iv_use *next; /* The next sub use. */
217 tree addr_base; /* Base address with const offset stripped. */
218 unsigned HOST_WIDE_INT addr_offset;
219 /* Const offset stripped from base address. */
8b11a64c
ZD
220};
221
222/* The position where the iv is computed. */
223enum iv_position
224{
225 IP_NORMAL, /* At the end, just before the exit condition. */
226 IP_END, /* At the end of the latch block. */
2c08497a
BS
227 IP_BEFORE_USE, /* Immediately before a specific use. */
228 IP_AFTER_USE, /* Immediately after a specific use. */
8b11a64c
ZD
229 IP_ORIGINAL /* The original biv. */
230};
231
232/* The induction variable candidate. */
233struct iv_cand
234{
235 unsigned id; /* The number of the candidate. */
236 bool important; /* Whether this is an "important" candidate, i.e. such
237 that it should be considered by all uses. */
448f65db 238 ENUM_BITFIELD(iv_position) pos : 8; /* Where it is computed. */
726a989a 239 gimple incremented_at;/* For original biv, the statement where it is
8b11a64c
ZD
240 incremented. */
241 tree var_before; /* The variable used for it before increment. */
242 tree var_after; /* The variable used for it after increment. */
243 struct iv *iv; /* The value of the candidate. NULL for
244 "pseudocandidate" used to indicate the possibility
245 to replace the final value of an iv by direct
246 computation of the value. */
247 unsigned cost; /* Cost of the candidate. */
2c08497a
BS
248 unsigned cost_step; /* Cost of the candidate's increment operation. */
249 struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
250 where it is incremented. */
9be872b7
ZD
251 bitmap depends_on; /* The list of invariants that are used in step of the
252 biv. */
8b11a64c
ZD
253};
254
18081149
XDL
255/* Loop invariant expression hashtable entry. */
256struct iv_inv_expr_ent
257{
258 tree expr;
259 int id;
260 hashval_t hash;
261};
262
8b11a64c
ZD
263/* The data used by the induction variable optimizations. */
264
69ebd99d 265typedef struct iv_use *iv_use_p;
69ebd99d
KH
266
267typedef struct iv_cand *iv_cand_p;
69ebd99d 268
4a8fb1a1
LC
269/* Hashtable helpers. */
270
95fbe13e 271struct iv_inv_expr_hasher : free_ptr_hash <iv_inv_expr_ent>
4a8fb1a1 272{
67f58944
TS
273 static inline hashval_t hash (const iv_inv_expr_ent *);
274 static inline bool equal (const iv_inv_expr_ent *, const iv_inv_expr_ent *);
4a8fb1a1
LC
275};
276
277/* Hash function for loop invariant expressions. */
278
279inline hashval_t
67f58944 280iv_inv_expr_hasher::hash (const iv_inv_expr_ent *expr)
4a8fb1a1
LC
281{
282 return expr->hash;
283}
284
285/* Hash table equality function for expressions. */
286
287inline bool
67f58944
TS
288iv_inv_expr_hasher::equal (const iv_inv_expr_ent *expr1,
289 const iv_inv_expr_ent *expr2)
4a8fb1a1
LC
290{
291 return expr1->hash == expr2->hash
292 && operand_equal_p (expr1->expr, expr2->expr, 0);
293}
294
8b11a64c
ZD
295struct ivopts_data
296{
297 /* The currently optimized loop. */
298 struct loop *current_loop;
28002f1a 299 source_location loop_loc;
8b11a64c 300
8f5929e1 301 /* Numbers of iterations for all exits of the current loop. */
b787e7a2 302 hash_map<edge, tree_niter_desc *> *niters;
f40751dd 303
9a2ef6b8
ZD
304 /* Number of registers used in it. */
305 unsigned regs_used;
306
8b11a64c
ZD
307 /* The size of version_info array allocated. */
308 unsigned version_info_size;
309
310 /* The array of information for the ssa names. */
311 struct version_info *version_info;
312
18081149
XDL
313 /* The hashtable of loop invariant expressions created
314 by ivopt. */
c203e8a7 315 hash_table<iv_inv_expr_hasher> *inv_expr_tab;
18081149
XDL
316
317 /* Loop invariant expression id. */
318 int inv_expr_id;
319
8b11a64c
ZD
320 /* The bitmap of indices in version_info whose value was changed. */
321 bitmap relevant;
322
8b11a64c 323 /* The uses of induction variables. */
9771b263 324 vec<iv_use_p> iv_uses;
8b11a64c
ZD
325
326 /* The candidates. */
9771b263 327 vec<iv_cand_p> iv_candidates;
8b11a64c 328
80cad5fa
ZD
329 /* A bitmap of important candidates. */
330 bitmap important_candidates;
331
3230c614
BC
332 /* Cache used by tree_to_aff_combination_expand. */
333 hash_map<tree, name_expansion *> *name_expansion_cache;
334
8f5929e1
JJ
335 /* The maximum invariant id. */
336 unsigned max_inv_id;
337
6f929985
BC
338 /* Obstack for iv structure. */
339 struct obstack iv_obstack;
340
8b11a64c
ZD
341 /* Whether to consider just related and important candidates when replacing a
342 use. */
343 bool consider_all_candidates;
8f5929e1
JJ
344
345 /* Are we optimizing for speed? */
346 bool speed;
bec922f0
SL
347
348 /* Whether the loop body includes any function calls. */
349 bool body_includes_call;
d8af4ba3
ZD
350
351 /* Whether the loop body can only be exited via single exit. */
352 bool loop_single_exit_p;
8b11a64c
ZD
353};
354
b1b02be2
ZD
355/* An assignment of iv candidates to uses. */
356
357struct iv_ca
358{
359 /* The number of uses covered by the assignment. */
360 unsigned upto;
361
362 /* Number of uses that cannot be expressed by the candidates in the set. */
363 unsigned bad_uses;
364
365 /* Candidate assigned to a use, together with the related costs. */
366 struct cost_pair **cand_for_use;
367
368 /* Number of times each candidate is used. */
369 unsigned *n_cand_uses;
370
371 /* The candidates used. */
372 bitmap cands;
373
36f5ada1
ZD
374 /* The number of candidates in the set. */
375 unsigned n_cands;
376
b1b02be2
ZD
377 /* Total number of registers needed. */
378 unsigned n_regs;
379
380 /* Total cost of expressing uses. */
6e8c65f6 381 comp_cost cand_use_cost;
b1b02be2
ZD
382
383 /* Total cost of candidates. */
384 unsigned cand_cost;
385
386 /* Number of times each invariant is used. */
387 unsigned *n_invariant_uses;
388
f06e400f
XDL
389 /* The array holding the number of uses of each loop
390 invariant expressions created by ivopt. */
391 unsigned *used_inv_expr;
392
393 /* The number of created loop invariants. */
394 unsigned num_used_inv_expr;
395
b1b02be2 396 /* Total cost of the assignment. */
6e8c65f6 397 comp_cost cost;
b1b02be2
ZD
398};
399
400/* Difference of two iv candidate assignments. */
401
402struct iv_ca_delta
403{
404 /* Changed use. */
405 struct iv_use *use;
406
407 /* An old assignment (for rollback purposes). */
408 struct cost_pair *old_cp;
409
410 /* A new assignment. */
411 struct cost_pair *new_cp;
412
413 /* Next change in the list. */
414 struct iv_ca_delta *next_change;
415};
416
8b11a64c
ZD
417/* Bound on number of candidates below that all candidates are considered. */
418
419#define CONSIDER_ALL_CANDIDATES_BOUND \
420 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
421
2a7e31df 422/* If there are more iv occurrences, we just give up (it is quite unlikely that
8b11a64c
ZD
423 optimizing such a loop would help, and it would take ages). */
424
425#define MAX_CONSIDERED_USES \
426 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
427
36f5ada1
ZD
428/* If there are at most this number of ivs in the set, try removing unnecessary
429 ivs from the set always. */
430
431#define ALWAYS_PRUNE_CAND_SET_BOUND \
432 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
433
8b11a64c
ZD
434/* The list of trees for that the decl_rtl field must be reset is stored
435 here. */
436
9771b263 437static vec<tree> decl_rtl_to_reset;
8b11a64c 438
e6450c11
TV
439static comp_cost force_expr_to_var_cost (tree, bool);
440
8b11a64c
ZD
441/* Number of uses recorded in DATA. */
442
443static inline unsigned
444n_iv_uses (struct ivopts_data *data)
445{
9771b263 446 return data->iv_uses.length ();
8b11a64c
ZD
447}
448
449/* Ith use recorded in DATA. */
450
451static inline struct iv_use *
452iv_use (struct ivopts_data *data, unsigned i)
453{
9771b263 454 return data->iv_uses[i];
8b11a64c
ZD
455}
456
457/* Number of candidates recorded in DATA. */
458
459static inline unsigned
460n_iv_cands (struct ivopts_data *data)
461{
9771b263 462 return data->iv_candidates.length ();
8b11a64c
ZD
463}
464
465/* Ith candidate recorded in DATA. */
466
467static inline struct iv_cand *
468iv_cand (struct ivopts_data *data, unsigned i)
469{
9771b263 470 return data->iv_candidates[i];
8b11a64c
ZD
471}
472
8b11a64c
ZD
473/* The single loop exit if it dominates the latch, NULL otherwise. */
474
b7eae7b8 475edge
8b11a64c
ZD
476single_dom_exit (struct loop *loop)
477{
ac8f6c69 478 edge exit = single_exit (loop);
8b11a64c
ZD
479
480 if (!exit)
481 return NULL;
482
483 if (!just_once_each_iteration_p (loop, exit->src))
484 return NULL;
485
486 return exit;
487}
488
489/* Dumps information about the induction variable IV to FILE. */
490
8b11a64c 491void
e185f450 492dump_iv (FILE *file, struct iv *iv, bool dump_name)
8b11a64c 493{
e185f450 494 if (iv->ssa_name && dump_name)
e6845c23
ZD
495 {
496 fprintf (file, "ssa name ");
497 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
498 fprintf (file, "\n");
499 }
8b11a64c 500
2f4675b4
ZD
501 fprintf (file, " type ");
502 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
503 fprintf (file, "\n");
504
8b11a64c
ZD
505 if (iv->step)
506 {
507 fprintf (file, " base ");
508 print_generic_expr (file, iv->base, TDF_SLIM);
509 fprintf (file, "\n");
510
511 fprintf (file, " step ");
512 print_generic_expr (file, iv->step, TDF_SLIM);
513 fprintf (file, "\n");
514 }
515 else
516 {
517 fprintf (file, " invariant ");
518 print_generic_expr (file, iv->base, TDF_SLIM);
519 fprintf (file, "\n");
520 }
521
e6845c23
ZD
522 if (iv->base_object)
523 {
524 fprintf (file, " base object ");
525 print_generic_expr (file, iv->base_object, TDF_SLIM);
526 fprintf (file, "\n");
527 }
528
8b11a64c
ZD
529 if (iv->biv_p)
530 fprintf (file, " is a biv\n");
531}
532
533/* Dumps information about the USE to FILE. */
534
8b11a64c
ZD
535void
536dump_use (FILE *file, struct iv_use *use)
537{
a7e43c57
BC
538 fprintf (file, "use %d", use->id);
539 if (use->sub_id)
540 fprintf (file, ".%d", use->sub_id);
541
542 fprintf (file, "\n");
8b11a64c
ZD
543
544 switch (use->type)
545 {
546 case USE_NONLINEAR_EXPR:
547 fprintf (file, " generic\n");
548 break;
549
8b11a64c
ZD
550 case USE_ADDRESS:
551 fprintf (file, " address\n");
552 break;
553
554 case USE_COMPARE:
555 fprintf (file, " compare\n");
556 break;
557
558 default:
1e128c5f 559 gcc_unreachable ();
8b11a64c
ZD
560 }
561
2f4675b4 562 fprintf (file, " in statement ");
726a989a 563 print_gimple_stmt (file, use->stmt, 0, 0);
2f4675b4
ZD
564 fprintf (file, "\n");
565
566 fprintf (file, " at position ");
567 if (use->op_p)
568 print_generic_expr (file, *use->op_p, TDF_SLIM);
569 fprintf (file, "\n");
570
e185f450 571 dump_iv (file, use->iv, false);
2f4675b4 572
eec5fec9
ZD
573 if (use->related_cands)
574 {
575 fprintf (file, " related candidates ");
576 dump_bitmap (file, use->related_cands);
577 }
8b11a64c
ZD
578}
579
580/* Dumps information about the uses to FILE. */
581
8b11a64c
ZD
582void
583dump_uses (FILE *file, struct ivopts_data *data)
584{
585 unsigned i;
586 struct iv_use *use;
587
588 for (i = 0; i < n_iv_uses (data); i++)
589 {
590 use = iv_use (data, i);
a7e43c57
BC
591 do
592 {
593 dump_use (file, use);
594 use = use->next;
595 }
596 while (use);
8b11a64c
ZD
597 fprintf (file, "\n");
598 }
599}
600
601/* Dumps information about induction variable candidate CAND to FILE. */
602
8b11a64c
ZD
603void
604dump_cand (FILE *file, struct iv_cand *cand)
605{
606 struct iv *iv = cand->iv;
607
608 fprintf (file, "candidate %d%s\n",
609 cand->id, cand->important ? " (important)" : "");
610
9be872b7
ZD
611 if (cand->depends_on)
612 {
613 fprintf (file, " depends on ");
614 dump_bitmap (file, cand->depends_on);
615 }
616
8b11a64c
ZD
617 if (!iv)
618 {
619 fprintf (file, " final value replacement\n");
620 return;
621 }
622
18081149
XDL
623 if (cand->var_before)
624 {
625 fprintf (file, " var_before ");
626 print_generic_expr (file, cand->var_before, TDF_SLIM);
627 fprintf (file, "\n");
628 }
629 if (cand->var_after)
630 {
631 fprintf (file, " var_after ");
632 print_generic_expr (file, cand->var_after, TDF_SLIM);
633 fprintf (file, "\n");
634 }
635
8b11a64c
ZD
636 switch (cand->pos)
637 {
638 case IP_NORMAL:
639 fprintf (file, " incremented before exit test\n");
640 break;
641
2c08497a
BS
642 case IP_BEFORE_USE:
643 fprintf (file, " incremented before use %d\n", cand->ainc_use->id);
644 break;
645
646 case IP_AFTER_USE:
647 fprintf (file, " incremented after use %d\n", cand->ainc_use->id);
648 break;
649
8b11a64c
ZD
650 case IP_END:
651 fprintf (file, " incremented at end\n");
652 break;
653
654 case IP_ORIGINAL:
655 fprintf (file, " original biv\n");
656 break;
657 }
658
e185f450 659 dump_iv (file, iv, false);
8b11a64c
ZD
660}
661
662/* Returns the info for ssa version VER. */
663
664static inline struct version_info *
665ver_info (struct ivopts_data *data, unsigned ver)
666{
667 return data->version_info + ver;
668}
669
670/* Returns the info for ssa name NAME. */
671
672static inline struct version_info *
673name_info (struct ivopts_data *data, tree name)
674{
675 return ver_info (data, SSA_NAME_VERSION (name));
676}
677
8b11a64c
ZD
678/* Returns true if STMT is after the place where the IP_NORMAL ivs will be
679 emitted in LOOP. */
680
681static bool
726a989a 682stmt_after_ip_normal_pos (struct loop *loop, gimple stmt)
8b11a64c 683{
726a989a 684 basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
8b11a64c 685
1e128c5f 686 gcc_assert (bb);
8b11a64c
ZD
687
688 if (sbb == loop->latch)
689 return true;
690
691 if (sbb != bb)
692 return false;
693
694 return stmt == last_stmt (bb);
695}
696
697/* Returns true if STMT if after the place where the original induction
2c08497a
BS
698 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
699 if the positions are identical. */
8b11a64c
ZD
700
701static bool
2c08497a 702stmt_after_inc_pos (struct iv_cand *cand, gimple stmt, bool true_if_equal)
8b11a64c 703{
726a989a
RB
704 basic_block cand_bb = gimple_bb (cand->incremented_at);
705 basic_block stmt_bb = gimple_bb (stmt);
8b11a64c
ZD
706
707 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
708 return false;
709
710 if (stmt_bb != cand_bb)
711 return true;
712
2c08497a
BS
713 if (true_if_equal
714 && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
715 return true;
716 return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
8b11a64c
ZD
717}
718
719/* Returns true if STMT if after the place where the induction variable
720 CAND is incremented in LOOP. */
721
722static bool
726a989a 723stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple stmt)
8b11a64c
ZD
724{
725 switch (cand->pos)
726 {
727 case IP_END:
728 return false;
729
730 case IP_NORMAL:
731 return stmt_after_ip_normal_pos (loop, stmt);
732
733 case IP_ORIGINAL:
2c08497a
BS
734 case IP_AFTER_USE:
735 return stmt_after_inc_pos (cand, stmt, false);
736
737 case IP_BEFORE_USE:
738 return stmt_after_inc_pos (cand, stmt, true);
8b11a64c
ZD
739
740 default:
1e128c5f 741 gcc_unreachable ();
8b11a64c
ZD
742 }
743}
744
dcccd88d
ZD
745/* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
746
747static bool
748abnormal_ssa_name_p (tree exp)
749{
750 if (!exp)
751 return false;
752
753 if (TREE_CODE (exp) != SSA_NAME)
754 return false;
755
756 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
757}
758
759/* Returns false if BASE or INDEX contains a ssa name that occurs in an
760 abnormal phi node. Callback for for_each_index. */
761
762static bool
763idx_contains_abnormal_ssa_name_p (tree base, tree *index,
764 void *data ATTRIBUTE_UNUSED)
765{
9f7ccf69 766 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
dcccd88d
ZD
767 {
768 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
769 return false;
770 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
771 return false;
772 }
773
774 return !abnormal_ssa_name_p (*index);
775}
776
777/* Returns true if EXPR contains a ssa name that occurs in an
778 abnormal phi node. */
779
e5db3515 780bool
dcccd88d
ZD
781contains_abnormal_ssa_name_p (tree expr)
782{
783 enum tree_code code;
c22940cd 784 enum tree_code_class codeclass;
dcccd88d
ZD
785
786 if (!expr)
787 return false;
788
789 code = TREE_CODE (expr);
c22940cd 790 codeclass = TREE_CODE_CLASS (code);
dcccd88d
ZD
791
792 if (code == SSA_NAME)
793 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
794
795 if (code == INTEGER_CST
796 || is_gimple_min_invariant (expr))
797 return false;
798
799 if (code == ADDR_EXPR)
800 return !for_each_index (&TREE_OPERAND (expr, 0),
801 idx_contains_abnormal_ssa_name_p,
802 NULL);
803
0a74c758
SP
804 if (code == COND_EXPR)
805 return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
806 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
807 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
808
c22940cd 809 switch (codeclass)
dcccd88d
ZD
810 {
811 case tcc_binary:
812 case tcc_comparison:
813 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
814 return true;
815
816 /* Fallthru. */
817 case tcc_unary:
818 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
819 return true;
820
821 break;
822
823 default:
824 gcc_unreachable ();
825 }
826
827 return false;
828}
829
d8af4ba3 830/* Returns the structure describing number of iterations determined from
ca4c3169
ZD
831 EXIT of DATA->current_loop, or NULL if something goes wrong. */
832
d8af4ba3
ZD
833static struct tree_niter_desc *
834niter_for_exit (struct ivopts_data *data, edge exit)
ca4c3169 835{
d8af4ba3 836 struct tree_niter_desc *desc;
b787e7a2 837 tree_niter_desc **slot;
ca4c3169 838
15814ba0 839 if (!data->niters)
ca4c3169 840 {
b787e7a2 841 data->niters = new hash_map<edge, tree_niter_desc *>;
15814ba0
PB
842 slot = NULL;
843 }
844 else
b787e7a2 845 slot = data->niters->get (exit);
dcccd88d 846
15814ba0
PB
847 if (!slot)
848 {
d8af4ba3
ZD
849 /* Try to determine number of iterations. We cannot safely work with ssa
850 names that appear in phi nodes on abnormal edges, so that we do not
851 create overlapping life ranges for them (PR 27283). */
e2102efc 852 desc = XNEW (struct tree_niter_desc);
d8af4ba3
ZD
853 if (!number_of_iterations_exit (data->current_loop,
854 exit, desc, true)
855 || contains_abnormal_ssa_name_p (desc->niter))
856 {
857 XDELETE (desc);
858 desc = NULL;
859 }
b787e7a2 860 data->niters->put (exit, desc);
ca4c3169
ZD
861 }
862 else
b787e7a2 863 desc = *slot;
ca4c3169 864
d8af4ba3 865 return desc;
ca4c3169
ZD
866}
867
d8af4ba3 868/* Returns the structure describing number of iterations determined from
ca4c3169
ZD
869 single dominating exit of DATA->current_loop, or NULL if something
870 goes wrong. */
871
d8af4ba3 872static struct tree_niter_desc *
ca4c3169
ZD
873niter_for_single_dom_exit (struct ivopts_data *data)
874{
875 edge exit = single_dom_exit (data->current_loop);
876
877 if (!exit)
878 return NULL;
879
d8af4ba3 880 return niter_for_exit (data, exit);
ca4c3169
ZD
881}
882
8b11a64c 883/* Initializes data structures used by the iv optimization pass, stored
9a2ef6b8 884 in DATA. */
8b11a64c
ZD
885
886static void
9a2ef6b8 887tree_ssa_iv_optimize_init (struct ivopts_data *data)
8b11a64c 888{
8b11a64c 889 data->version_info_size = 2 * num_ssa_names;
5ed6ace5 890 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
8bdbfff5
NS
891 data->relevant = BITMAP_ALLOC (NULL);
892 data->important_candidates = BITMAP_ALLOC (NULL);
8b11a64c 893 data->max_inv_id = 0;
15814ba0 894 data->niters = NULL;
9771b263
DN
895 data->iv_uses.create (20);
896 data->iv_candidates.create (20);
c203e8a7 897 data->inv_expr_tab = new hash_table<iv_inv_expr_hasher> (10);
18081149 898 data->inv_expr_id = 0;
3230c614 899 data->name_expansion_cache = NULL;
9771b263 900 decl_rtl_to_reset.create (20);
6f929985 901 gcc_obstack_init (&data->iv_obstack);
8b11a64c
ZD
902}
903
e6845c23
ZD
904/* Returns a memory object to that EXPR points. In case we are able to
905 determine that it does not point to any such object, NULL is returned. */
906
907static tree
908determine_base_object (tree expr)
909{
910 enum tree_code code = TREE_CODE (expr);
5be014d5 911 tree base, obj;
e6845c23 912
975626a7
ZD
913 /* If this is a pointer casted to any type, we need to determine
914 the base object for the pointer; so handle conversions before
915 throwing away non-pointer expressions. */
1043771b 916 if (CONVERT_EXPR_P (expr))
975626a7
ZD
917 return determine_base_object (TREE_OPERAND (expr, 0));
918
e6845c23
ZD
919 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
920 return NULL_TREE;
921
922 switch (code)
923 {
924 case INTEGER_CST:
925 return NULL_TREE;
926
927 case ADDR_EXPR:
928 obj = TREE_OPERAND (expr, 0);
929 base = get_base_address (obj);
930
931 if (!base)
f5e2738c 932 return expr;
e6845c23 933
70f34814 934 if (TREE_CODE (base) == MEM_REF)
f5e2738c 935 return determine_base_object (TREE_OPERAND (base, 0));
7299dbfb 936
62b37d91
RG
937 return fold_convert (ptr_type_node,
938 build_fold_addr_expr (base));
e6845c23 939
5be014d5
AP
940 case POINTER_PLUS_EXPR:
941 return determine_base_object (TREE_OPERAND (expr, 0));
942
e6845c23
ZD
943 case PLUS_EXPR:
944 case MINUS_EXPR:
5be014d5
AP
945 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
946 gcc_unreachable ();
e6845c23
ZD
947
948 default:
949 return fold_convert (ptr_type_node, expr);
950 }
951}
952
be9a0da5
BC
953/* Return true if address expression with non-DECL_P operand appears
954 in EXPR. */
955
956static bool
957contain_complex_addr_expr (tree expr)
958{
959 bool res = false;
960
961 STRIP_NOPS (expr);
962 switch (TREE_CODE (expr))
963 {
964 case POINTER_PLUS_EXPR:
965 case PLUS_EXPR:
966 case MINUS_EXPR:
967 res |= contain_complex_addr_expr (TREE_OPERAND (expr, 0));
968 res |= contain_complex_addr_expr (TREE_OPERAND (expr, 1));
969 break;
970
971 case ADDR_EXPR:
972 return (!DECL_P (TREE_OPERAND (expr, 0)));
973
974 default:
975 return false;
976 }
977
978 return res;
979}
980
8b11a64c 981/* Allocates an induction variable with given initial value BASE and step STEP
c70ed622 982 for loop LOOP. NO_OVERFLOW implies the iv doesn't overflow. */
8b11a64c
ZD
983
984static struct iv *
6f929985
BC
985alloc_iv (struct ivopts_data *data, tree base, tree step,
986 bool no_overflow = false)
8b11a64c 987{
be9a0da5 988 tree expr = base;
6f929985
BC
989 struct iv *iv = (struct iv*) obstack_alloc (&data->iv_obstack,
990 sizeof (struct iv));
6e42ce54 991 gcc_assert (step != NULL_TREE);
8b11a64c 992
be9a0da5 993 /* Lower address expression in base except ones with DECL_P as operand.
be8c1c8c
BC
994 By doing this:
995 1) More accurate cost can be computed for address expressions;
996 2) Duplicate candidates won't be created for bases in different
997 forms, like &a[0] and &a. */
be9a0da5
BC
998 STRIP_NOPS (expr);
999 if ((TREE_CODE (expr) == ADDR_EXPR && !DECL_P (TREE_OPERAND (expr, 0)))
1000 || contain_complex_addr_expr (expr))
be8c1c8c
BC
1001 {
1002 aff_tree comb;
be9a0da5 1003 tree_to_aff_combination (expr, TREE_TYPE (base), &comb);
be8c1c8c
BC
1004 base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
1005 }
1006
8b11a64c 1007 iv->base = base;
be9a0da5 1008 iv->base_object = determine_base_object (base);
8b11a64c
ZD
1009 iv->step = step;
1010 iv->biv_p = false;
1011 iv->have_use_for = false;
1012 iv->use_id = 0;
1013 iv->ssa_name = NULL_TREE;
c70ed622 1014 iv->no_overflow = no_overflow;
8b11a64c
ZD
1015
1016 return iv;
1017}
1018
c70ed622
BC
1019/* Sets STEP and BASE for induction variable IV. NO_OVERFLOW implies the IV
1020 doesn't overflow. */
8b11a64c
ZD
1021
1022static void
c70ed622
BC
1023set_iv (struct ivopts_data *data, tree iv, tree base, tree step,
1024 bool no_overflow)
8b11a64c
ZD
1025{
1026 struct version_info *info = name_info (data, iv);
1027
1e128c5f 1028 gcc_assert (!info->iv);
8b11a64c
ZD
1029
1030 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
6f929985 1031 info->iv = alloc_iv (data, base, step, no_overflow);
8b11a64c
ZD
1032 info->iv->ssa_name = iv;
1033}
1034
1035/* Finds induction variable declaration for VAR. */
1036
1037static struct iv *
1038get_iv (struct ivopts_data *data, tree var)
1039{
1040 basic_block bb;
6e42ce54
ZD
1041 tree type = TREE_TYPE (var);
1042
1043 if (!POINTER_TYPE_P (type)
1044 && !INTEGRAL_TYPE_P (type))
1045 return NULL;
1046
8b11a64c
ZD
1047 if (!name_info (data, var)->iv)
1048 {
726a989a 1049 bb = gimple_bb (SSA_NAME_DEF_STMT (var));
8b11a64c
ZD
1050
1051 if (!bb
1052 || !flow_bb_inside_loop_p (data->current_loop, bb))
c70ed622 1053 set_iv (data, var, var, build_int_cst (type, 0), true);
8b11a64c
ZD
1054 }
1055
1056 return name_info (data, var)->iv;
1057}
1058
fc06280e
BC
1059/* Return the first non-invariant ssa var found in EXPR. */
1060
1061static tree
1062extract_single_var_from_expr (tree expr)
1063{
1064 int i, n;
1065 tree tmp;
1066 enum tree_code code;
1067
1068 if (!expr || is_gimple_min_invariant (expr))
1069 return NULL;
1070
1071 code = TREE_CODE (expr);
1072 if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1073 {
1074 n = TREE_OPERAND_LENGTH (expr);
1075 for (i = 0; i < n; i++)
1076 {
1077 tmp = extract_single_var_from_expr (TREE_OPERAND (expr, i));
1078
1079 if (tmp)
1080 return tmp;
1081 }
1082 }
1083 return (TREE_CODE (expr) == SSA_NAME) ? expr : NULL;
1084}
1085
8b11a64c
ZD
1086/* Finds basic ivs. */
1087
1088static bool
1089find_bivs (struct ivopts_data *data)
1090{
538dd0b7 1091 gphi *phi;
c70ed622 1092 affine_iv iv;
fc06280e 1093 tree step, type, base, stop;
8b11a64c
ZD
1094 bool found = false;
1095 struct loop *loop = data->current_loop;
538dd0b7 1096 gphi_iterator psi;
8b11a64c 1097
726a989a 1098 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
8b11a64c 1099 {
538dd0b7 1100 phi = psi.phi ();
726a989a 1101
8b11a64c
ZD
1102 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1103 continue;
1104
c70ed622 1105 if (virtual_operand_p (PHI_RESULT (phi)))
8b11a64c 1106 continue;
8b11a64c 1107
c70ed622
BC
1108 if (!simple_iv (loop, loop, PHI_RESULT (phi), &iv, true))
1109 continue;
1110
1111 if (integer_zerop (iv.step))
1112 continue;
1113
1114 step = iv.step;
8b11a64c 1115 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
fc06280e
BC
1116 /* Stop expanding iv base at the first ssa var referred by iv step.
1117 Ideally we should stop at any ssa var, because that's expensive
1118 and unusual to happen, we just do it on the first one.
1119
1120 See PR64705 for the rationale. */
1121 stop = extract_single_var_from_expr (step);
1122 base = expand_simple_operations (base, stop);
9be872b7
ZD
1123 if (contains_abnormal_ssa_name_p (base)
1124 || contains_abnormal_ssa_name_p (step))
8b11a64c
ZD
1125 continue;
1126
1127 type = TREE_TYPE (PHI_RESULT (phi));
1128 base = fold_convert (type, base);
9be872b7 1129 if (step)
1ffe34d9
AP
1130 {
1131 if (POINTER_TYPE_P (type))
0d82a1c8 1132 step = convert_to_ptrofftype (step);
1ffe34d9
AP
1133 else
1134 step = fold_convert (type, step);
1135 }
8b11a64c 1136
c70ed622 1137 set_iv (data, PHI_RESULT (phi), base, step, iv.no_overflow);
8b11a64c
ZD
1138 found = true;
1139 }
1140
1141 return found;
1142}
1143
1144/* Marks basic ivs. */
1145
1146static void
1147mark_bivs (struct ivopts_data *data)
1148{
538dd0b7
DM
1149 gphi *phi;
1150 gimple def;
726a989a 1151 tree var;
8b11a64c
ZD
1152 struct iv *iv, *incr_iv;
1153 struct loop *loop = data->current_loop;
1154 basic_block incr_bb;
538dd0b7 1155 gphi_iterator psi;
8b11a64c 1156
726a989a 1157 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
8b11a64c 1158 {
538dd0b7 1159 phi = psi.phi ();
726a989a 1160
8b11a64c
ZD
1161 iv = get_iv (data, PHI_RESULT (phi));
1162 if (!iv)
1163 continue;
1164
1165 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
b83b5507
BC
1166 def = SSA_NAME_DEF_STMT (var);
1167 /* Don't mark iv peeled from other one as biv. */
1168 if (def
1169 && gimple_code (def) == GIMPLE_PHI
1170 && gimple_bb (def) == loop->header)
1171 continue;
1172
8b11a64c
ZD
1173 incr_iv = get_iv (data, var);
1174 if (!incr_iv)
1175 continue;
1176
1177 /* If the increment is in the subloop, ignore it. */
726a989a 1178 incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
8b11a64c
ZD
1179 if (incr_bb->loop_father != data->current_loop
1180 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1181 continue;
1182
1183 iv->biv_p = true;
1184 incr_iv->biv_p = true;
1185 }
1186}
1187
1188/* Checks whether STMT defines a linear induction variable and stores its
a6f778b2 1189 parameters to IV. */
8b11a64c
ZD
1190
1191static bool
726a989a 1192find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv)
8b11a64c 1193{
fc06280e 1194 tree lhs, stop;
8b11a64c
ZD
1195 struct loop *loop = data->current_loop;
1196
a6f778b2
ZD
1197 iv->base = NULL_TREE;
1198 iv->step = NULL_TREE;
8b11a64c 1199
726a989a 1200 if (gimple_code (stmt) != GIMPLE_ASSIGN)
8b11a64c
ZD
1201 return false;
1202
726a989a 1203 lhs = gimple_assign_lhs (stmt);
8b11a64c
ZD
1204 if (TREE_CODE (lhs) != SSA_NAME)
1205 return false;
1206
f017bf5e 1207 if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
8b11a64c
ZD
1208 return false;
1209
fc06280e
BC
1210 /* Stop expanding iv base at the first ssa var referred by iv step.
1211 Ideally we should stop at any ssa var, because that's expensive
1212 and unusual to happen, we just do it on the first one.
1213
1214 See PR64705 for the rationale. */
1215 stop = extract_single_var_from_expr (iv->step);
1216 iv->base = expand_simple_operations (iv->base, stop);
a6f778b2
ZD
1217 if (contains_abnormal_ssa_name_p (iv->base)
1218 || contains_abnormal_ssa_name_p (iv->step))
8b11a64c
ZD
1219 return false;
1220
fc06280e 1221 /* If STMT could throw, then do not consider STMT as defining a GIV.
9f9ca914
JL
1222 While this will suppress optimizations, we can not safely delete this
1223 GIV and associated statements, even if it appears it is not used. */
1224 if (stmt_could_throw_p (stmt))
1225 return false;
1226
8b11a64c
ZD
1227 return true;
1228}
1229
1230/* Finds general ivs in statement STMT. */
1231
1232static void
726a989a 1233find_givs_in_stmt (struct ivopts_data *data, gimple stmt)
8b11a64c 1234{
a6f778b2 1235 affine_iv iv;
8b11a64c 1236
a6f778b2 1237 if (!find_givs_in_stmt_scev (data, stmt, &iv))
8b11a64c
ZD
1238 return;
1239
c70ed622 1240 set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step, iv.no_overflow);
8b11a64c
ZD
1241}
1242
1243/* Finds general ivs in basic block BB. */
1244
1245static void
1246find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1247{
726a989a 1248 gimple_stmt_iterator bsi;
8b11a64c 1249
726a989a
RB
1250 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1251 find_givs_in_stmt (data, gsi_stmt (bsi));
8b11a64c
ZD
1252}
1253
1254/* Finds general ivs. */
1255
1256static void
1257find_givs (struct ivopts_data *data)
1258{
1259 struct loop *loop = data->current_loop;
1260 basic_block *body = get_loop_body_in_dom_order (loop);
1261 unsigned i;
1262
1263 for (i = 0; i < loop->num_nodes; i++)
1264 find_givs_in_bb (data, body[i]);
1265 free (body);
1266}
1267
8b11a64c
ZD
1268/* For each ssa name defined in LOOP determines whether it is an induction
1269 variable and if so, its initial value and step. */
1270
1271static bool
1272find_induction_variables (struct ivopts_data *data)
1273{
1274 unsigned i;
87c476a2 1275 bitmap_iterator bi;
8b11a64c
ZD
1276
1277 if (!find_bivs (data))
1278 return false;
1279
1280 find_givs (data);
1281 mark_bivs (data);
8b11a64c
ZD
1282
1283 if (dump_file && (dump_flags & TDF_DETAILS))
1284 {
d8af4ba3 1285 struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
ca4c3169
ZD
1286
1287 if (niter)
8b11a64c
ZD
1288 {
1289 fprintf (dump_file, " number of iterations ");
d8af4ba3
ZD
1290 print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1291 if (!integer_zerop (niter->may_be_zero))
1292 {
1293 fprintf (dump_file, "; zero if ");
1294 print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1295 }
dcccd88d 1296 fprintf (dump_file, "\n\n");
8b11a64c 1297 };
b8698a0f 1298
8b11a64c
ZD
1299 fprintf (dump_file, "Induction variables:\n\n");
1300
87c476a2 1301 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
8b11a64c
ZD
1302 {
1303 if (ver_info (data, i)->iv)
e185f450 1304 dump_iv (dump_file, ver_info (data, i)->iv, true);
87c476a2 1305 }
8b11a64c
ZD
1306 }
1307
1308 return true;
1309}
1310
a7e43c57
BC
1311/* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.
1312 For address type use, ADDR_BASE is the stripped IV base, ADDR_OFFSET
1313 is the const offset stripped from IV base. For uses of other types,
1314 ADDR_BASE and ADDR_OFFSET are zero by default. */
8b11a64c
ZD
1315
1316static struct iv_use *
1317record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
a7e43c57
BC
1318 gimple stmt, enum use_type use_type, tree addr_base = NULL,
1319 unsigned HOST_WIDE_INT addr_offset = 0)
8b11a64c 1320{
5ed6ace5 1321 struct iv_use *use = XCNEW (struct iv_use);
8b11a64c
ZD
1322
1323 use->id = n_iv_uses (data);
a7e43c57 1324 use->sub_id = 0;
8b11a64c
ZD
1325 use->type = use_type;
1326 use->iv = iv;
1327 use->stmt = stmt;
1328 use->op_p = use_p;
8bdbfff5 1329 use->related_cands = BITMAP_ALLOC (NULL);
a7e43c57
BC
1330 use->next = NULL;
1331 use->addr_base = addr_base;
1332 use->addr_offset = addr_offset;
8b11a64c 1333
9771b263 1334 data->iv_uses.safe_push (use);
8b11a64c
ZD
1335
1336 return use;
1337}
1338
a7e43c57
BC
1339/* Records a sub use of type USE_TYPE at *USE_P in STMT whose value is IV.
1340 The sub use is recorded under the one whose use id is ID_GROUP. */
1341
1342static struct iv_use *
1343record_sub_use (struct ivopts_data *data, tree *use_p,
1344 struct iv *iv, gimple stmt, enum use_type use_type,
1345 tree addr_base, unsigned HOST_WIDE_INT addr_offset,
1346 unsigned int id_group)
1347{
1348 struct iv_use *use = XCNEW (struct iv_use);
1349 struct iv_use *group = iv_use (data, id_group);
1350
1351 use->id = group->id;
1352 use->sub_id = 0;
1353 use->type = use_type;
1354 use->iv = iv;
1355 use->stmt = stmt;
1356 use->op_p = use_p;
1357 use->related_cands = NULL;
1358 use->addr_base = addr_base;
1359 use->addr_offset = addr_offset;
1360
1361 /* Sub use list is maintained in offset ascending order. */
1362 if (addr_offset <= group->addr_offset)
1363 {
1364 use->related_cands = group->related_cands;
1365 group->related_cands = NULL;
1366 use->next = group;
1367 data->iv_uses[id_group] = use;
1368 }
1369 else
1370 {
1371 struct iv_use *pre;
1372 do
1373 {
1374 pre = group;
1375 group = group->next;
1376 }
1377 while (group && addr_offset > group->addr_offset);
1378 use->next = pre->next;
1379 pre->next = use;
1380 }
1381
a7e43c57
BC
1382 return use;
1383}
1384
8b11a64c
ZD
1385/* Checks whether OP is a loop-level invariant and if so, records it.
1386 NONLINEAR_USE is true if the invariant is used in a way we do not
1387 handle specially. */
1388
1389static void
1390record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1391{
1392 basic_block bb;
1393 struct version_info *info;
1394
1395 if (TREE_CODE (op) != SSA_NAME
ea057359 1396 || virtual_operand_p (op))
8b11a64c
ZD
1397 return;
1398
726a989a 1399 bb = gimple_bb (SSA_NAME_DEF_STMT (op));
8b11a64c
ZD
1400 if (bb
1401 && flow_bb_inside_loop_p (data->current_loop, bb))
1402 return;
1403
1404 info = name_info (data, op);
1405 info->name = op;
1406 info->has_nonlin_use |= nonlinear_use;
1407 if (!info->inv_id)
1408 info->inv_id = ++data->max_inv_id;
1409 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1410}
1411
50cc9802 1412/* Checks whether the use OP is interesting and if so, records it. */
8b11a64c
ZD
1413
1414static struct iv_use *
50cc9802 1415find_interesting_uses_op (struct ivopts_data *data, tree op)
8b11a64c
ZD
1416{
1417 struct iv *iv;
726a989a 1418 gimple stmt;
8b11a64c
ZD
1419 struct iv_use *use;
1420
1421 if (TREE_CODE (op) != SSA_NAME)
1422 return NULL;
1423
1424 iv = get_iv (data, op);
1425 if (!iv)
1426 return NULL;
b8698a0f 1427
8b11a64c
ZD
1428 if (iv->have_use_for)
1429 {
1430 use = iv_use (data, iv->use_id);
1431
50cc9802 1432 gcc_assert (use->type == USE_NONLINEAR_EXPR);
8b11a64c
ZD
1433 return use;
1434 }
1435
6e42ce54 1436 if (integer_zerop (iv->step))
8b11a64c
ZD
1437 {
1438 record_invariant (data, op, true);
1439 return NULL;
1440 }
1441 iv->have_use_for = true;
1442
8b11a64c 1443 stmt = SSA_NAME_DEF_STMT (op);
726a989a
RB
1444 gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1445 || is_gimple_assign (stmt));
8b11a64c 1446
6f929985 1447 use = record_use (data, NULL, iv, stmt, USE_NONLINEAR_EXPR);
8b11a64c
ZD
1448 iv->use_id = use->id;
1449
1450 return use;
1451}
1452
726a989a
RB
1453/* Given a condition in statement STMT, checks whether it is a compare
1454 of an induction variable and an invariant. If this is the case,
1455 CONTROL_VAR is set to location of the iv, BOUND to the location of
1456 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1457 induction variable descriptions, and true is returned. If this is not
1458 the case, CONTROL_VAR and BOUND are set to the arguments of the
1459 condition and false is returned. */
8b11a64c 1460
b697aed4 1461static bool
726a989a 1462extract_cond_operands (struct ivopts_data *data, gimple stmt,
b697aed4
ZD
1463 tree **control_var, tree **bound,
1464 struct iv **iv_var, struct iv **iv_bound)
1465{
726a989a 1466 /* The objects returned when COND has constant operands. */
b697aed4
ZD
1467 static struct iv const_iv;
1468 static tree zero;
6b4db501
MM
1469 tree *op0 = &zero, *op1 = &zero;
1470 struct iv *iv0 = &const_iv, *iv1 = &const_iv;
b697aed4
ZD
1471 bool ret = false;
1472
726a989a 1473 if (gimple_code (stmt) == GIMPLE_COND)
8b11a64c 1474 {
538dd0b7
DM
1475 gcond *cond_stmt = as_a <gcond *> (stmt);
1476 op0 = gimple_cond_lhs_ptr (cond_stmt);
1477 op1 = gimple_cond_rhs_ptr (cond_stmt);
8b11a64c 1478 }
726a989a 1479 else
8b11a64c 1480 {
726a989a
RB
1481 op0 = gimple_assign_rhs1_ptr (stmt);
1482 op1 = gimple_assign_rhs2_ptr (stmt);
8b11a64c
ZD
1483 }
1484
726a989a
RB
1485 zero = integer_zero_node;
1486 const_iv.step = integer_zero_node;
1487
b697aed4
ZD
1488 if (TREE_CODE (*op0) == SSA_NAME)
1489 iv0 = get_iv (data, *op0);
1490 if (TREE_CODE (*op1) == SSA_NAME)
1491 iv1 = get_iv (data, *op1);
8b11a64c 1492
b697aed4
ZD
1493 /* Exactly one of the compared values must be an iv, and the other one must
1494 be an invariant. */
1495 if (!iv0 || !iv1)
1496 goto end;
1497
1498 if (integer_zerop (iv0->step))
1499 {
1500 /* Control variable may be on the other side. */
6b4db501
MM
1501 std::swap (op0, op1);
1502 std::swap (iv0, iv1);
8b11a64c 1503 }
b697aed4
ZD
1504 ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1505
1506end:
1507 if (control_var)
6f3d1a5e 1508 *control_var = op0;
b697aed4 1509 if (iv_var)
6f3d1a5e 1510 *iv_var = iv0;
b697aed4
ZD
1511 if (bound)
1512 *bound = op1;
1513 if (iv_bound)
1514 *iv_bound = iv1;
1515
1516 return ret;
1517}
1518
726a989a
RB
1519/* Checks whether the condition in STMT is interesting and if so,
1520 records it. */
b697aed4
ZD
1521
1522static void
726a989a 1523find_interesting_uses_cond (struct ivopts_data *data, gimple stmt)
b697aed4
ZD
1524{
1525 tree *var_p, *bound_p;
6f929985 1526 struct iv *var_iv;
8b11a64c 1527
726a989a 1528 if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
8b11a64c 1529 {
b697aed4
ZD
1530 find_interesting_uses_op (data, *var_p);
1531 find_interesting_uses_op (data, *bound_p);
8b11a64c
ZD
1532 return;
1533 }
1534
6f929985 1535 record_use (data, NULL, var_iv, stmt, USE_COMPARE);
8b11a64c
ZD
1536}
1537
4ba5ea11
RB
1538/* Returns the outermost loop EXPR is obviously invariant in
1539 relative to the loop LOOP, i.e. if all its operands are defined
1540 outside of the returned loop. Returns NULL if EXPR is not
1541 even obviously invariant in LOOP. */
1542
1543struct loop *
1544outermost_invariant_loop_for_expr (struct loop *loop, tree expr)
1545{
1546 basic_block def_bb;
1547 unsigned i, len;
1548
1549 if (is_gimple_min_invariant (expr))
1550 return current_loops->tree_root;
1551
1552 if (TREE_CODE (expr) == SSA_NAME)
1553 {
1554 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1555 if (def_bb)
1556 {
1557 if (flow_bb_inside_loop_p (loop, def_bb))
1558 return NULL;
1559 return superloop_at_depth (loop,
1560 loop_depth (def_bb->loop_father) + 1);
1561 }
1562
1563 return current_loops->tree_root;
1564 }
1565
1566 if (!EXPR_P (expr))
1567 return NULL;
1568
1569 unsigned maxdepth = 0;
1570 len = TREE_OPERAND_LENGTH (expr);
1571 for (i = 0; i < len; i++)
1572 {
1573 struct loop *ivloop;
1574 if (!TREE_OPERAND (expr, i))
1575 continue;
1576
1577 ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1578 if (!ivloop)
1579 return NULL;
1580 maxdepth = MAX (maxdepth, loop_depth (ivloop));
1581 }
1582
1583 return superloop_at_depth (loop, maxdepth);
1584}
1585
be35cf60 1586/* Returns true if expression EXPR is obviously invariant in LOOP,
6a732743
SP
1587 i.e. if all its operands are defined outside of the LOOP. LOOP
1588 should not be the function body. */
be35cf60 1589
feb075f4 1590bool
be35cf60
ZD
1591expr_invariant_in_loop_p (struct loop *loop, tree expr)
1592{
1593 basic_block def_bb;
1594 unsigned i, len;
1595
6a732743
SP
1596 gcc_assert (loop_depth (loop) > 0);
1597
be35cf60
ZD
1598 if (is_gimple_min_invariant (expr))
1599 return true;
1600
1601 if (TREE_CODE (expr) == SSA_NAME)
1602 {
726a989a 1603 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
be35cf60
ZD
1604 if (def_bb
1605 && flow_bb_inside_loop_p (loop, def_bb))
1606 return false;
1607
1608 return true;
1609 }
1610
726a989a 1611 if (!EXPR_P (expr))
be35cf60
ZD
1612 return false;
1613
5039610b 1614 len = TREE_OPERAND_LENGTH (expr);
be35cf60 1615 for (i = 0; i < len; i++)
837a549b
JH
1616 if (TREE_OPERAND (expr, i)
1617 && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
be35cf60
ZD
1618 return false;
1619
1620 return true;
1621}
1622
8b11a64c
ZD
1623/* Cumulates the steps of indices into DATA and replaces their values with the
1624 initial ones. Returns false when the value of the index cannot be determined.
1625 Callback for for_each_index. */
1626
1627struct ifs_ivopts_data
1628{
1629 struct ivopts_data *ivopts_data;
726a989a 1630 gimple stmt;
6e42ce54 1631 tree step;
8b11a64c
ZD
1632};
1633
1634static bool
1635idx_find_step (tree base, tree *idx, void *data)
1636{
c22940cd 1637 struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
8b11a64c 1638 struct iv *iv;
c70ed622 1639 bool use_overflow_semantics = false;
d7f5de76 1640 tree step, iv_base, iv_step, lbound, off;
2f4675b4 1641 struct loop *loop = dta->ivopts_data->current_loop;
be35cf60 1642
be35cf60 1643 /* If base is a component ref, require that the offset of the reference
3a7c155d 1644 be invariant. */
be35cf60
ZD
1645 if (TREE_CODE (base) == COMPONENT_REF)
1646 {
1647 off = component_ref_field_offset (base);
1648 return expr_invariant_in_loop_p (loop, off);
1649 }
1650
1651 /* If base is array, first check whether we will be able to move the
1652 reference out of the loop (in order to take its address in strength
1653 reduction). In order for this to work we need both lower bound
1654 and step to be loop invariants. */
9f7ccf69 1655 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
be35cf60 1656 {
9f7ccf69
EB
1657 /* Moreover, for a range, the size needs to be invariant as well. */
1658 if (TREE_CODE (base) == ARRAY_RANGE_REF
1659 && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1660 return false;
1661
be35cf60
ZD
1662 step = array_ref_element_size (base);
1663 lbound = array_ref_low_bound (base);
1664
1665 if (!expr_invariant_in_loop_p (loop, step)
1666 || !expr_invariant_in_loop_p (loop, lbound))
1667 return false;
1668 }
1669
8b11a64c
ZD
1670 if (TREE_CODE (*idx) != SSA_NAME)
1671 return true;
1672
1673 iv = get_iv (dta->ivopts_data, *idx);
1674 if (!iv)
1675 return false;
1676
ea643120
RG
1677 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1678 *&x[0], which is not folded and does not trigger the
1679 ARRAY_REF path below. */
8b11a64c
ZD
1680 *idx = iv->base;
1681
6e42ce54 1682 if (integer_zerop (iv->step))
8b11a64c
ZD
1683 return true;
1684
9f7ccf69 1685 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2f4675b4
ZD
1686 {
1687 step = array_ref_element_size (base);
2f4675b4
ZD
1688
1689 /* We only handle addresses whose step is an integer constant. */
1690 if (TREE_CODE (step) != INTEGER_CST)
1691 return false;
2f4675b4 1692 }
8b11a64c 1693 else
5212068f 1694 /* The step for pointer arithmetics already is 1 byte. */
9a9d280e 1695 step = size_one_node;
8b11a64c 1696
d7f5de76
ZD
1697 iv_base = iv->base;
1698 iv_step = iv->step;
c70ed622
BC
1699 if (iv->no_overflow && nowrap_type_p (TREE_TYPE (iv_step)))
1700 use_overflow_semantics = true;
1701
d7f5de76
ZD
1702 if (!convert_affine_scev (dta->ivopts_data->current_loop,
1703 sizetype, &iv_base, &iv_step, dta->stmt,
c70ed622 1704 use_overflow_semantics))
8b11a64c
ZD
1705 {
1706 /* The index might wrap. */
1707 return false;
1708 }
1709
1c1205fb 1710 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
6e42ce54 1711 dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
8b11a64c
ZD
1712
1713 return true;
1714}
1715
1716/* Records use in index IDX. Callback for for_each_index. Ivopts data
1717 object is passed to it in DATA. */
1718
1719static bool
2f4675b4 1720idx_record_use (tree base, tree *idx,
c22940cd 1721 void *vdata)
8b11a64c 1722{
c22940cd 1723 struct ivopts_data *data = (struct ivopts_data *) vdata;
8b11a64c 1724 find_interesting_uses_op (data, *idx);
9f7ccf69 1725 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2f4675b4
ZD
1726 {
1727 find_interesting_uses_op (data, array_ref_element_size (base));
1728 find_interesting_uses_op (data, array_ref_low_bound (base));
1729 }
8b11a64c
ZD
1730 return true;
1731}
1732
32159434
CB
1733/* If we can prove that TOP = cst * BOT for some constant cst,
1734 store cst to MUL and return true. Otherwise return false.
1735 The returned value is always sign-extended, regardless of the
1736 signedness of TOP and BOT. */
1737
1738static bool
807e902e 1739constant_multiple_of (tree top, tree bot, widest_int *mul)
32159434
CB
1740{
1741 tree mby;
1742 enum tree_code code;
32159434 1743 unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
807e902e 1744 widest_int res, p0, p1;
32159434
CB
1745
1746 STRIP_NOPS (top);
1747 STRIP_NOPS (bot);
1748
1749 if (operand_equal_p (top, bot, 0))
1750 {
807e902e 1751 *mul = 1;
32159434
CB
1752 return true;
1753 }
1754
1755 code = TREE_CODE (top);
1756 switch (code)
1757 {
1758 case MULT_EXPR:
1759 mby = TREE_OPERAND (top, 1);
1760 if (TREE_CODE (mby) != INTEGER_CST)
1761 return false;
1762
1763 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1764 return false;
1765
807e902e 1766 *mul = wi::sext (res * wi::to_widest (mby), precision);
32159434
CB
1767 return true;
1768
1769 case PLUS_EXPR:
1770 case MINUS_EXPR:
1771 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1772 || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1773 return false;
1774
1775 if (code == MINUS_EXPR)
27bcd47c 1776 p1 = -p1;
807e902e 1777 *mul = wi::sext (p0 + p1, precision);
32159434
CB
1778 return true;
1779
1780 case INTEGER_CST:
1781 if (TREE_CODE (bot) != INTEGER_CST)
1782 return false;
1783
807e902e
KZ
1784 p0 = widest_int::from (top, SIGNED);
1785 p1 = widest_int::from (bot, SIGNED);
1786 if (p1 == 0)
32159434 1787 return false;
807e902e
KZ
1788 *mul = wi::sext (wi::divmod_trunc (p0, p1, SIGNED, &res), precision);
1789 return res == 0;
32159434
CB
1790
1791 default:
1792 return false;
1793 }
1794}
1795
e75fde1a 1796/* Return true if memory reference REF with step STEP may be unaligned. */
0a915e3d
ZD
1797
1798static bool
32159434 1799may_be_unaligned_p (tree ref, tree step)
0a915e3d 1800{
ac182688 1801 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
607fb860 1802 thus they are not misaligned. */
ac182688
ZD
1803 if (TREE_CODE (ref) == TARGET_MEM_REF)
1804 return false;
1805
e75fde1a 1806 unsigned int align = TYPE_ALIGN (TREE_TYPE (ref));
abf30454
RB
1807 if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))) > align)
1808 align = GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref)));
b8698a0f 1809
e75fde1a
EB
1810 unsigned HOST_WIDE_INT bitpos;
1811 unsigned int ref_align;
1812 get_object_alignment_1 (ref, &ref_align, &bitpos);
1813 if (ref_align < align
1814 || (bitpos % align) != 0
1815 || (bitpos % BITS_PER_UNIT) != 0)
1816 return true;
ce276b61 1817
e75fde1a
EB
1818 unsigned int trailing_zeros = tree_ctz (step);
1819 if (trailing_zeros < HOST_BITS_PER_INT
1820 && (1U << trailing_zeros) * BITS_PER_UNIT < align)
1821 return true;
0a915e3d
ZD
1822
1823 return false;
1824}
1825
75715cf6
EB
1826/* Return true if EXPR may be non-addressable. */
1827
bc068a23 1828bool
75715cf6
EB
1829may_be_nonaddressable_p (tree expr)
1830{
1831 switch (TREE_CODE (expr))
1832 {
928bc34f
EB
1833 case TARGET_MEM_REF:
1834 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1835 target, thus they are always addressable. */
1836 return false;
1837
75715cf6
EB
1838 case COMPONENT_REF:
1839 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1840 || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1841
75715cf6
EB
1842 case VIEW_CONVERT_EXPR:
1843 /* This kind of view-conversions may wrap non-addressable objects
1844 and make them look addressable. After some processing the
1845 non-addressability may be uncovered again, causing ADDR_EXPRs
1846 of inappropriate objects to be built. */
7a4fbffc
EB
1847 if (is_gimple_reg (TREE_OPERAND (expr, 0))
1848 || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
1849 return true;
1850
1851 /* ... fall through ... */
928bc34f
EB
1852
1853 case ARRAY_REF:
1854 case ARRAY_RANGE_REF:
7a4fbffc 1855 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
928bc34f 1856
1043771b 1857 CASE_CONVERT:
928bc34f 1858 return true;
75715cf6
EB
1859
1860 default:
1861 break;
1862 }
1863
1864 return false;
1865}
1866
a7e43c57
BC
1867static tree
1868strip_offset (tree expr, unsigned HOST_WIDE_INT *offset);
1869
1870/* Record a use of type USE_TYPE at *USE_P in STMT whose value is IV.
1871 If there is an existing use which has same stripped iv base and step,
1872 this function records this one as a sub use to that; otherwise records
1873 it as a normal one. */
1874
1875static struct iv_use *
1876record_group_use (struct ivopts_data *data, tree *use_p,
1877 struct iv *iv, gimple stmt, enum use_type use_type)
1878{
1879 unsigned int i;
1880 struct iv_use *use;
1881 tree addr_base;
1882 unsigned HOST_WIDE_INT addr_offset;
1883
1884 /* Only support sub use for address type uses, that is, with base
1885 object. */
1886 if (!iv->base_object)
1887 return record_use (data, use_p, iv, stmt, use_type);
1888
1889 addr_base = strip_offset (iv->base, &addr_offset);
1890 for (i = 0; i < n_iv_uses (data); i++)
1891 {
1892 use = iv_use (data, i);
1893 if (use->type != USE_ADDRESS || !use->iv->base_object)
1894 continue;
1895
1896 /* Check if it has the same stripped base and step. */
1897 if (operand_equal_p (iv->base_object, use->iv->base_object, 0)
1898 && operand_equal_p (iv->step, use->iv->step, 0)
1899 && operand_equal_p (addr_base, use->addr_base, 0))
1900 break;
1901 }
1902
1903 if (i == n_iv_uses (data))
1904 return record_use (data, use_p, iv, stmt,
1905 use_type, addr_base, addr_offset);
1906 else
1907 return record_sub_use (data, use_p, iv, stmt,
1908 use_type, addr_base, addr_offset, i);
1909}
1910
8b11a64c
ZD
1911/* Finds addresses in *OP_P inside STMT. */
1912
1913static void
726a989a 1914find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p)
8b11a64c 1915{
9a9d280e 1916 tree base = *op_p, step = size_zero_node;
8b11a64c
ZD
1917 struct iv *civ;
1918 struct ifs_ivopts_data ifs_ivopts_data;
1919
e3cc7254
ZD
1920 /* Do not play with volatile memory references. A bit too conservative,
1921 perhaps, but safe. */
726a989a 1922 if (gimple_has_volatile_ops (stmt))
e3cc7254
ZD
1923 goto fail;
1924
8b11a64c
ZD
1925 /* Ignore bitfields for now. Not really something terribly complicated
1926 to handle. TODO. */
75715cf6
EB
1927 if (TREE_CODE (base) == BIT_FIELD_REF)
1928 goto fail;
1929
ac182688
ZD
1930 base = unshare_expr (base);
1931
1932 if (TREE_CODE (base) == TARGET_MEM_REF)
1933 {
1934 tree type = build_pointer_type (TREE_TYPE (base));
1935 tree astep;
1936
1937 if (TMR_BASE (base)
1938 && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1939 {
1940 civ = get_iv (data, TMR_BASE (base));
1941 if (!civ)
1942 goto fail;
1943
1944 TMR_BASE (base) = civ->base;
1945 step = civ->step;
1946 }
4d948885
RG
1947 if (TMR_INDEX2 (base)
1948 && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
1949 {
1950 civ = get_iv (data, TMR_INDEX2 (base));
1951 if (!civ)
1952 goto fail;
1953
1954 TMR_INDEX2 (base) = civ->base;
1955 step = civ->step;
1956 }
ac182688
ZD
1957 if (TMR_INDEX (base)
1958 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1959 {
1960 civ = get_iv (data, TMR_INDEX (base));
1961 if (!civ)
1962 goto fail;
8b11a64c 1963
ac182688
ZD
1964 TMR_INDEX (base) = civ->base;
1965 astep = civ->step;
be35cf60 1966
ac182688
ZD
1967 if (astep)
1968 {
1969 if (TMR_STEP (base))
1970 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1971
6e42ce54 1972 step = fold_build2 (PLUS_EXPR, type, step, astep);
ac182688
ZD
1973 }
1974 }
1975
6e42ce54 1976 if (integer_zerop (step))
ac182688
ZD
1977 goto fail;
1978 base = tree_mem_ref_addr (type, base);
1979 }
1980 else
1981 {
1982 ifs_ivopts_data.ivopts_data = data;
1983 ifs_ivopts_data.stmt = stmt;
9a9d280e 1984 ifs_ivopts_data.step = size_zero_node;
ac182688 1985 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
6e42ce54 1986 || integer_zerop (ifs_ivopts_data.step))
ac182688 1987 goto fail;
6e42ce54 1988 step = ifs_ivopts_data.step;
ac182688 1989
928bc34f
EB
1990 /* Check that the base expression is addressable. This needs
1991 to be done after substituting bases of IVs into it. */
1992 if (may_be_nonaddressable_p (base))
1993 goto fail;
1994
1995 /* Moreover, on strict alignment platforms, check that it is
1996 sufficiently aligned. */
32159434 1997 if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
928bc34f
EB
1998 goto fail;
1999
ac182688 2000 base = build_fold_addr_expr (base);
ea643120
RG
2001
2002 /* Substituting bases of IVs into the base expression might
2003 have caused folding opportunities. */
2004 if (TREE_CODE (base) == ADDR_EXPR)
2005 {
2006 tree *ref = &TREE_OPERAND (base, 0);
2007 while (handled_component_p (*ref))
2008 ref = &TREE_OPERAND (*ref, 0);
70f34814 2009 if (TREE_CODE (*ref) == MEM_REF)
cb6b911a 2010 {
bcf71673
RG
2011 tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
2012 TREE_OPERAND (*ref, 0),
2013 TREE_OPERAND (*ref, 1));
cb6b911a
RG
2014 if (tem)
2015 *ref = tem;
2016 }
ea643120 2017 }
ac182688 2018 }
8b11a64c 2019
6f929985 2020 civ = alloc_iv (data, base, step);
a7e43c57 2021 record_group_use (data, op_p, civ, stmt, USE_ADDRESS);
8b11a64c
ZD
2022 return;
2023
2024fail:
2025 for_each_index (op_p, idx_record_use, data);
2026}
2027
2028/* Finds and records invariants used in STMT. */
2029
2030static void
726a989a 2031find_invariants_stmt (struct ivopts_data *data, gimple stmt)
8b11a64c 2032{
f47c96aa
AM
2033 ssa_op_iter iter;
2034 use_operand_p use_p;
8b11a64c
ZD
2035 tree op;
2036
f47c96aa 2037 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
8b11a64c 2038 {
f47c96aa 2039 op = USE_FROM_PTR (use_p);
8b11a64c
ZD
2040 record_invariant (data, op, false);
2041 }
2042}
2043
2044/* Finds interesting uses of induction variables in the statement STMT. */
2045
2046static void
726a989a 2047find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt)
8b11a64c
ZD
2048{
2049 struct iv *iv;
726a989a 2050 tree op, *lhs, *rhs;
f47c96aa
AM
2051 ssa_op_iter iter;
2052 use_operand_p use_p;
726a989a 2053 enum tree_code code;
8b11a64c
ZD
2054
2055 find_invariants_stmt (data, stmt);
2056
726a989a 2057 if (gimple_code (stmt) == GIMPLE_COND)
8b11a64c 2058 {
726a989a 2059 find_interesting_uses_cond (data, stmt);
8b11a64c
ZD
2060 return;
2061 }
2062
726a989a 2063 if (is_gimple_assign (stmt))
8b11a64c 2064 {
726a989a
RB
2065 lhs = gimple_assign_lhs_ptr (stmt);
2066 rhs = gimple_assign_rhs1_ptr (stmt);
8b11a64c 2067
726a989a 2068 if (TREE_CODE (*lhs) == SSA_NAME)
8b11a64c
ZD
2069 {
2070 /* If the statement defines an induction variable, the uses are not
2071 interesting by themselves. */
2072
726a989a 2073 iv = get_iv (data, *lhs);
8b11a64c 2074
6e42ce54 2075 if (iv && !integer_zerop (iv->step))
8b11a64c
ZD
2076 return;
2077 }
2078
726a989a
RB
2079 code = gimple_assign_rhs_code (stmt);
2080 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
2081 && (REFERENCE_CLASS_P (*rhs)
2082 || is_gimple_val (*rhs)))
8b11a64c 2083 {
726a989a
RB
2084 if (REFERENCE_CLASS_P (*rhs))
2085 find_interesting_uses_address (data, stmt, rhs);
2086 else
2087 find_interesting_uses_op (data, *rhs);
8b11a64c 2088
726a989a
RB
2089 if (REFERENCE_CLASS_P (*lhs))
2090 find_interesting_uses_address (data, stmt, lhs);
8b11a64c 2091 return;
8b11a64c 2092 }
726a989a 2093 else if (TREE_CODE_CLASS (code) == tcc_comparison)
8b11a64c 2094 {
726a989a 2095 find_interesting_uses_cond (data, stmt);
8b11a64c
ZD
2096 return;
2097 }
2f4675b4
ZD
2098
2099 /* TODO -- we should also handle address uses of type
2100
2101 memory = call (whatever);
2102
2103 and
2104
2105 call (memory). */
8b11a64c
ZD
2106 }
2107
726a989a
RB
2108 if (gimple_code (stmt) == GIMPLE_PHI
2109 && gimple_bb (stmt) == data->current_loop->header)
8b11a64c 2110 {
726a989a 2111 iv = get_iv (data, PHI_RESULT (stmt));
8b11a64c 2112
6e42ce54 2113 if (iv && !integer_zerop (iv->step))
8b11a64c
ZD
2114 return;
2115 }
2116
f47c96aa 2117 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
8b11a64c 2118 {
f47c96aa 2119 op = USE_FROM_PTR (use_p);
8b11a64c
ZD
2120
2121 if (TREE_CODE (op) != SSA_NAME)
2122 continue;
2123
2124 iv = get_iv (data, op);
2125 if (!iv)
2126 continue;
2127
2128 find_interesting_uses_op (data, op);
2129 }
2130}
2131
2132/* Finds interesting uses of induction variables outside of loops
2133 on loop exit edge EXIT. */
2134
2135static void
2136find_interesting_uses_outside (struct ivopts_data *data, edge exit)
2137{
538dd0b7
DM
2138 gphi *phi;
2139 gphi_iterator psi;
726a989a 2140 tree def;
8b11a64c 2141
726a989a 2142 for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
8b11a64c 2143 {
538dd0b7 2144 phi = psi.phi ();
8b11a64c 2145 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
ea057359 2146 if (!virtual_operand_p (def))
18081149 2147 find_interesting_uses_op (data, def);
8b11a64c
ZD
2148 }
2149}
2150
2151/* Finds uses of the induction variables that are interesting. */
2152
2153static void
2154find_interesting_uses (struct ivopts_data *data)
2155{
2156 basic_block bb;
726a989a 2157 gimple_stmt_iterator bsi;
8b11a64c
ZD
2158 basic_block *body = get_loop_body (data->current_loop);
2159 unsigned i;
2160 struct version_info *info;
2161 edge e;
2162
2163 if (dump_file && (dump_flags & TDF_DETAILS))
2164 fprintf (dump_file, "Uses:\n\n");
2165
2166 for (i = 0; i < data->current_loop->num_nodes; i++)
2167 {
628f6a4e 2168 edge_iterator ei;
8b11a64c
ZD
2169 bb = body[i];
2170
628f6a4e 2171 FOR_EACH_EDGE (e, ei, bb->succs)
fefa31b5 2172 if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
8b11a64c
ZD
2173 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2174 find_interesting_uses_outside (data, e);
2175
726a989a
RB
2176 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2177 find_interesting_uses_stmt (data, gsi_stmt (bsi));
2178 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
b5b8b0ac
AO
2179 if (!is_gimple_debug (gsi_stmt (bsi)))
2180 find_interesting_uses_stmt (data, gsi_stmt (bsi));
8b11a64c
ZD
2181 }
2182
2183 if (dump_file && (dump_flags & TDF_DETAILS))
2184 {
87c476a2
ZD
2185 bitmap_iterator bi;
2186
8b11a64c
ZD
2187 fprintf (dump_file, "\n");
2188
87c476a2 2189 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
8b11a64c
ZD
2190 {
2191 info = ver_info (data, i);
2192 if (info->inv_id)
2193 {
2194 fprintf (dump_file, " ");
2195 print_generic_expr (dump_file, info->name, TDF_SLIM);
2196 fprintf (dump_file, " is invariant (%d)%s\n",
2197 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
2198 }
87c476a2 2199 }
8b11a64c
ZD
2200
2201 fprintf (dump_file, "\n");
2202 }
2203
2204 free (body);
2205}
2206
a7e43c57
BC
2207/* Compute maximum offset of [base + offset] addressing mode
2208 for memory reference represented by USE. */
2209
2210static HOST_WIDE_INT
2211compute_max_addr_offset (struct iv_use *use)
2212{
2213 int width;
2214 rtx reg, addr;
2215 HOST_WIDE_INT i, off;
2216 unsigned list_index, num;
2217 addr_space_t as;
2218 machine_mode mem_mode, addr_mode;
2219 static vec<HOST_WIDE_INT> max_offset_list;
2220
2221 as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
2222 mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2223
2224 num = max_offset_list.length ();
2225 list_index = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
2226 if (list_index >= num)
2227 {
2228 max_offset_list.safe_grow (list_index + MAX_MACHINE_MODE);
2229 for (; num < max_offset_list.length (); num++)
2230 max_offset_list[num] = -1;
2231 }
2232
2233 off = max_offset_list[list_index];
2234 if (off != -1)
2235 return off;
2236
2237 addr_mode = targetm.addr_space.address_mode (as);
2238 reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
2239 addr = gen_rtx_fmt_ee (PLUS, addr_mode, reg, NULL_RTX);
2240
2241 width = GET_MODE_BITSIZE (addr_mode) - 1;
2242 if (width > (HOST_BITS_PER_WIDE_INT - 1))
2243 width = HOST_BITS_PER_WIDE_INT - 1;
2244
2245 for (i = width; i > 0; i--)
2246 {
2247 off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
2248 XEXP (addr, 1) = gen_int_mode (off, addr_mode);
2249 if (memory_address_addr_space_p (mem_mode, addr, as))
2250 break;
2251
2252 /* For some strict-alignment targets, the offset must be naturally
2253 aligned. Try an aligned offset if mem_mode is not QImode. */
2254 off = ((unsigned HOST_WIDE_INT) 1 << i);
2255 if (off > GET_MODE_SIZE (mem_mode) && mem_mode != QImode)
2256 {
2257 off -= GET_MODE_SIZE (mem_mode);
2258 XEXP (addr, 1) = gen_int_mode (off, addr_mode);
2259 if (memory_address_addr_space_p (mem_mode, addr, as))
2260 break;
2261 }
2262 }
2263 if (i == 0)
2264 off = 0;
2265
2266 max_offset_list[list_index] = off;
2267 return off;
2268}
2269
2270/* Check if all small groups should be split. Return true if and
2271 only if:
2272
2273 1) At least one groups contain two uses with different offsets.
2274 2) No group contains more than two uses with different offsets.
2275
2276 Return false otherwise. We want to split such groups because:
2277
2278 1) Small groups don't have much benefit and may interfer with
2279 general candidate selection.
2280 2) Size for problem with only small groups is usually small and
2281 general algorithm can handle it well.
2282
2283 TODO -- Above claim may not hold when auto increment is supported. */
2284
2285static bool
2286split_all_small_groups (struct ivopts_data *data)
2287{
2288 bool split_p = false;
2289 unsigned int i, n, distinct;
2290 struct iv_use *pre, *use;
2291
2292 n = n_iv_uses (data);
2293 for (i = 0; i < n; i++)
2294 {
2295 use = iv_use (data, i);
2296 if (!use->next)
2297 continue;
2298
2299 distinct = 1;
2300 gcc_assert (use->type == USE_ADDRESS);
2301 for (pre = use, use = use->next; use; pre = use, use = use->next)
2302 {
2303 if (pre->addr_offset != use->addr_offset)
2304 distinct++;
2305
2306 if (distinct > 2)
2307 return false;
2308 }
2309 if (distinct == 2)
2310 split_p = true;
2311 }
2312
2313 return split_p;
2314}
2315
2316/* For each group of address type uses, this function further groups
2317 these uses according to the maximum offset supported by target's
2318 [base + offset] addressing mode. */
2319
2320static void
2321group_address_uses (struct ivopts_data *data)
2322{
2323 HOST_WIDE_INT max_offset = -1;
2324 unsigned int i, n, sub_id;
2325 struct iv_use *pre, *use;
2326 unsigned HOST_WIDE_INT addr_offset_first;
2327
2328 /* Reset max offset to split all small groups. */
2329 if (split_all_small_groups (data))
2330 max_offset = 0;
2331
2332 n = n_iv_uses (data);
2333 for (i = 0; i < n; i++)
2334 {
2335 use = iv_use (data, i);
2336 if (!use->next)
2337 continue;
2338
2339 gcc_assert (use->type == USE_ADDRESS);
2340 if (max_offset != 0)
2341 max_offset = compute_max_addr_offset (use);
2342
2343 while (use)
2344 {
2345 sub_id = 0;
2346 addr_offset_first = use->addr_offset;
2347 /* Only uses with offset that can fit in offset part against
2348 the first use can be grouped together. */
2349 for (pre = use, use = use->next;
2350 use && (use->addr_offset - addr_offset_first
2351 <= (unsigned HOST_WIDE_INT) max_offset);
2352 pre = use, use = use->next)
2353 {
2354 use->id = pre->id;
2355 use->sub_id = ++sub_id;
2356 }
2357
2358 /* Break the list and create new group. */
2359 if (use)
2360 {
2361 pre->next = NULL;
2362 use->id = n_iv_uses (data);
2363 use->related_cands = BITMAP_ALLOC (NULL);
2364 data->iv_uses.safe_push (use);
2365 }
2366 }
2367 }
2368
2369 if (dump_file && (dump_flags & TDF_DETAILS))
2370 dump_uses (dump_file, data);
2371}
2372
f5e2738c 2373/* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
9be872b7
ZD
2374 is true, assume we are inside an address. If TOP_COMPREF is true, assume
2375 we are at the top-level of the processed address. */
f5e2738c
ZD
2376
2377static tree
9be872b7 2378strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
58fe50d5 2379 HOST_WIDE_INT *offset)
f5e2738c 2380{
9be872b7 2381 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
f5e2738c
ZD
2382 enum tree_code code;
2383 tree type, orig_type = TREE_TYPE (expr);
58fe50d5 2384 HOST_WIDE_INT off0, off1, st;
f5e2738c
ZD
2385 tree orig_expr = expr;
2386
2387 STRIP_NOPS (expr);
9be872b7 2388
f5e2738c
ZD
2389 type = TREE_TYPE (expr);
2390 code = TREE_CODE (expr);
2391 *offset = 0;
2392
2393 switch (code)
2394 {
2395 case INTEGER_CST:
2396 if (!cst_and_fits_in_hwi (expr)
6e682d7e 2397 || integer_zerop (expr))
f5e2738c
ZD
2398 return orig_expr;
2399
2400 *offset = int_cst_value (expr);
ff5e9a94 2401 return build_int_cst (orig_type, 0);
f5e2738c 2402
2d1a1007 2403 case POINTER_PLUS_EXPR:
f5e2738c
ZD
2404 case PLUS_EXPR:
2405 case MINUS_EXPR:
2406 op0 = TREE_OPERAND (expr, 0);
2407 op1 = TREE_OPERAND (expr, 1);
2408
9be872b7
ZD
2409 op0 = strip_offset_1 (op0, false, false, &off0);
2410 op1 = strip_offset_1 (op1, false, false, &off1);
f5e2738c 2411
2d1a1007 2412 *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
f5e2738c
ZD
2413 if (op0 == TREE_OPERAND (expr, 0)
2414 && op1 == TREE_OPERAND (expr, 1))
2415 return orig_expr;
2416
6e682d7e 2417 if (integer_zerop (op1))
f5e2738c 2418 expr = op0;
6e682d7e 2419 else if (integer_zerop (op0))
f5e2738c 2420 {
2d1a1007 2421 if (code == MINUS_EXPR)
9be872b7 2422 expr = fold_build1 (NEGATE_EXPR, type, op1);
2d1a1007
AP
2423 else
2424 expr = op1;
f5e2738c
ZD
2425 }
2426 else
9be872b7 2427 expr = fold_build2 (code, type, op0, op1);
f5e2738c
ZD
2428
2429 return fold_convert (orig_type, expr);
2430
7a2faca1
EB
2431 case MULT_EXPR:
2432 op1 = TREE_OPERAND (expr, 1);
2433 if (!cst_and_fits_in_hwi (op1))
2434 return orig_expr;
2435
2436 op0 = TREE_OPERAND (expr, 0);
2437 op0 = strip_offset_1 (op0, false, false, &off0);
2438 if (op0 == TREE_OPERAND (expr, 0))
2439 return orig_expr;
2440
2441 *offset = off0 * int_cst_value (op1);
2442 if (integer_zerop (op0))
2443 expr = op0;
2444 else
2445 expr = fold_build2 (MULT_EXPR, type, op0, op1);
2446
2447 return fold_convert (orig_type, expr);
2448
f5e2738c 2449 case ARRAY_REF:
9f7ccf69 2450 case ARRAY_RANGE_REF:
f5e2738c
ZD
2451 if (!inside_addr)
2452 return orig_expr;
2453
2454 step = array_ref_element_size (expr);
2455 if (!cst_and_fits_in_hwi (step))
2456 break;
2457
2458 st = int_cst_value (step);
2459 op1 = TREE_OPERAND (expr, 1);
9be872b7 2460 op1 = strip_offset_1 (op1, false, false, &off1);
f5e2738c 2461 *offset = off1 * st;
9be872b7
ZD
2462
2463 if (top_compref
6e682d7e 2464 && integer_zerop (op1))
9be872b7
ZD
2465 {
2466 /* Strip the component reference completely. */
2467 op0 = TREE_OPERAND (expr, 0);
2468 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2469 *offset += off0;
2470 return op0;
2471 }
f5e2738c
ZD
2472 break;
2473
2474 case COMPONENT_REF:
58fe50d5
BC
2475 {
2476 tree field;
2477
2478 if (!inside_addr)
2479 return orig_expr;
2480
2481 tmp = component_ref_field_offset (expr);
2482 field = TREE_OPERAND (expr, 1);
2483 if (top_compref
2484 && cst_and_fits_in_hwi (tmp)
2485 && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2486 {
2487 HOST_WIDE_INT boffset, abs_off;
2488
2489 /* Strip the component reference completely. */
2490 op0 = TREE_OPERAND (expr, 0);
2491 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2492 boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2493 abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2494 if (boffset < 0)
2495 abs_off = -abs_off;
2496
2497 *offset = off0 + int_cst_value (tmp) + abs_off;
2498 return op0;
2499 }
2500 }
f5e2738c
ZD
2501 break;
2502
2503 case ADDR_EXPR:
9be872b7
ZD
2504 op0 = TREE_OPERAND (expr, 0);
2505 op0 = strip_offset_1 (op0, true, true, &off0);
2506 *offset += off0;
2507
2508 if (op0 == TREE_OPERAND (expr, 0))
2509 return orig_expr;
2510
d5dc1717 2511 expr = build_fold_addr_expr (op0);
9be872b7
ZD
2512 return fold_convert (orig_type, expr);
2513
70f34814
RG
2514 case MEM_REF:
2515 /* ??? Offset operand? */
9be872b7 2516 inside_addr = false;
f5e2738c
ZD
2517 break;
2518
2519 default:
2520 return orig_expr;
2521 }
2522
2523 /* Default handling of expressions for that we want to recurse into
2524 the first operand. */
2525 op0 = TREE_OPERAND (expr, 0);
9be872b7 2526 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
f5e2738c
ZD
2527 *offset += off0;
2528
2529 if (op0 == TREE_OPERAND (expr, 0)
2530 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2531 return orig_expr;
2532
2533 expr = copy_node (expr);
2534 TREE_OPERAND (expr, 0) = op0;
2535 if (op1)
2536 TREE_OPERAND (expr, 1) = op1;
2537
9be872b7 2538 /* Inside address, we might strip the top level component references,
0fa2e4df 2539 thus changing type of the expression. Handling of ADDR_EXPR
9be872b7
ZD
2540 will fix that. */
2541 expr = fold_convert (orig_type, expr);
2542
2543 return expr;
2544}
2545
2546/* Strips constant offsets from EXPR and stores them to OFFSET. */
2547
2548static tree
2549strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2550{
58fe50d5
BC
2551 HOST_WIDE_INT off;
2552 tree core = strip_offset_1 (expr, false, false, &off);
2553 *offset = off;
2554 return core;
f5e2738c
ZD
2555}
2556
d482f417 2557/* Returns variant of TYPE that can be used as base for different uses.
20527215
ZD
2558 We return unsigned type with the same precision, which avoids problems
2559 with overflows. */
d482f417
ZD
2560
2561static tree
2562generic_type_for (tree type)
2563{
2564 if (POINTER_TYPE_P (type))
20527215 2565 return unsigned_type_for (type);
d482f417
ZD
2566
2567 if (TYPE_UNSIGNED (type))
2568 return type;
2569
2570 return unsigned_type_for (type);
2571}
2572
9be872b7
ZD
2573/* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2574 the bitmap to that we should store it. */
2575
2576static struct ivopts_data *fd_ivopts_data;
2577static tree
2578find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2579{
c22940cd 2580 bitmap *depends_on = (bitmap *) data;
9be872b7
ZD
2581 struct version_info *info;
2582
2583 if (TREE_CODE (*expr_p) != SSA_NAME)
2584 return NULL_TREE;
2585 info = name_info (fd_ivopts_data, *expr_p);
2586
2587 if (!info->inv_id || info->has_nonlin_use)
2588 return NULL_TREE;
2589
2590 if (!*depends_on)
2591 *depends_on = BITMAP_ALLOC (NULL);
2592 bitmap_set_bit (*depends_on, info->inv_id);
2593
2594 return NULL_TREE;
2595}
2596
8b11a64c
ZD
2597/* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2598 position to POS. If USE is not NULL, the candidate is set as related to
2599 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
2600 replacement of the final value of the iv by a direct computation. */
2601
2602static struct iv_cand *
2603add_candidate_1 (struct ivopts_data *data,
2604 tree base, tree step, bool important, enum iv_position pos,
726a989a 2605 struct iv_use *use, gimple incremented_at)
8b11a64c
ZD
2606{
2607 unsigned i;
2608 struct iv_cand *cand = NULL;
d482f417 2609 tree type, orig_type;
b8698a0f 2610
d8af4ba3
ZD
2611 /* For non-original variables, make sure their values are computed in a type
2612 that does not invoke undefined behavior on overflows (since in general,
2613 we cannot prove that these induction variables are non-wrapping). */
2614 if (pos != IP_ORIGINAL)
8b11a64c 2615 {
d482f417
ZD
2616 orig_type = TREE_TYPE (base);
2617 type = generic_type_for (orig_type);
71adbef3 2618 if (type != orig_type)
8b11a64c 2619 {
8b11a64c 2620 base = fold_convert (type, base);
6e42ce54 2621 step = fold_convert (type, step);
8b11a64c
ZD
2622 }
2623 }
2624
2625 for (i = 0; i < n_iv_cands (data); i++)
2626 {
2627 cand = iv_cand (data, i);
2628
2629 if (cand->pos != pos)
2630 continue;
2631
2c08497a
BS
2632 if (cand->incremented_at != incremented_at
2633 || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2634 && cand->ainc_use != use))
8b11a64c
ZD
2635 continue;
2636
2637 if (!cand->iv)
2638 {
2639 if (!base && !step)
2640 break;
2641
2642 continue;
2643 }
2644
2645 if (!base && !step)
2646 continue;
2647
6e42ce54 2648 if (operand_equal_p (base, cand->iv->base, 0)
18081149
XDL
2649 && operand_equal_p (step, cand->iv->step, 0)
2650 && (TYPE_PRECISION (TREE_TYPE (base))
2651 == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
6e42ce54 2652 break;
8b11a64c
ZD
2653 }
2654
2655 if (i == n_iv_cands (data))
2656 {
5ed6ace5 2657 cand = XCNEW (struct iv_cand);
8b11a64c
ZD
2658 cand->id = i;
2659
2660 if (!base && !step)
2661 cand->iv = NULL;
2662 else
6f929985 2663 cand->iv = alloc_iv (data, base, step);
8b11a64c
ZD
2664
2665 cand->pos = pos;
2666 if (pos != IP_ORIGINAL && cand->iv)
2667 {
2668 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2669 cand->var_after = cand->var_before;
2670 }
2671 cand->important = important;
2672 cand->incremented_at = incremented_at;
9771b263 2673 data->iv_candidates.safe_push (cand);
8b11a64c 2674
9be872b7
ZD
2675 if (step
2676 && TREE_CODE (step) != INTEGER_CST)
2677 {
2678 fd_ivopts_data = data;
2679 walk_tree (&step, find_depends, &cand->depends_on, NULL);
2680 }
2681
2c08497a
BS
2682 if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2683 cand->ainc_use = use;
2684 else
2685 cand->ainc_use = NULL;
2686
8b11a64c
ZD
2687 if (dump_file && (dump_flags & TDF_DETAILS))
2688 dump_cand (dump_file, cand);
2689 }
2690
2691 if (important && !cand->important)
2692 {
2693 cand->important = true;
2694 if (dump_file && (dump_flags & TDF_DETAILS))
2695 fprintf (dump_file, "Candidate %d is important\n", cand->id);
2696 }
2697
2698 if (use)
2699 {
2700 bitmap_set_bit (use->related_cands, i);
2701 if (dump_file && (dump_flags & TDF_DETAILS))
2702 fprintf (dump_file, "Candidate %d is related to use %d\n",
2703 cand->id, use->id);
2704 }
2705
2706 return cand;
2707}
2708
4366cf6d
ZD
2709/* Returns true if incrementing the induction variable at the end of the LOOP
2710 is allowed.
2711
2712 The purpose is to avoid splitting latch edge with a biv increment, thus
2713 creating a jump, possibly confusing other optimization passes and leaving
2714 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2715 is not available (so we do not have a better alternative), or if the latch
2716 edge is already nonempty. */
2717
2718static bool
2719allow_ip_end_pos_p (struct loop *loop)
2720{
2721 if (!ip_normal_pos (loop))
2722 return true;
2723
2724 if (!empty_block_p (ip_end_pos (loop)))
2725 return true;
2726
2727 return false;
2728}
2729
2c08497a
BS
2730/* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2731 Important field is set to IMPORTANT. */
2732
2733static void
2734add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2735 bool important, struct iv_use *use)
2736{
2737 basic_block use_bb = gimple_bb (use->stmt);
ef4bddc2 2738 machine_mode mem_mode;
2c08497a
BS
2739 unsigned HOST_WIDE_INT cstepi;
2740
2741 /* If we insert the increment in any position other than the standard
2742 ones, we must ensure that it is incremented once per iteration.
2743 It must not be in an inner nested loop, or one side of an if
2744 statement. */
2745 if (use_bb->loop_father != data->current_loop
2746 || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2747 || stmt_could_throw_p (use->stmt)
2748 || !cst_and_fits_in_hwi (step))
2749 return;
2750
2751 cstepi = int_cst_value (step);
2752
2753 mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
8875e939
RR
2754 if (((USE_LOAD_PRE_INCREMENT (mem_mode)
2755 || USE_STORE_PRE_INCREMENT (mem_mode))
2756 && GET_MODE_SIZE (mem_mode) == cstepi)
2757 || ((USE_LOAD_PRE_DECREMENT (mem_mode)
2758 || USE_STORE_PRE_DECREMENT (mem_mode))
2759 && GET_MODE_SIZE (mem_mode) == -cstepi))
2c08497a
BS
2760 {
2761 enum tree_code code = MINUS_EXPR;
2762 tree new_base;
2763 tree new_step = step;
2764
2765 if (POINTER_TYPE_P (TREE_TYPE (base)))
2766 {
2767 new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2768 code = POINTER_PLUS_EXPR;
2769 }
2770 else
2771 new_step = fold_convert (TREE_TYPE (base), new_step);
2772 new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2773 add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2774 use->stmt);
2775 }
8875e939
RR
2776 if (((USE_LOAD_POST_INCREMENT (mem_mode)
2777 || USE_STORE_POST_INCREMENT (mem_mode))
2778 && GET_MODE_SIZE (mem_mode) == cstepi)
2779 || ((USE_LOAD_POST_DECREMENT (mem_mode)
2780 || USE_STORE_POST_DECREMENT (mem_mode))
2781 && GET_MODE_SIZE (mem_mode) == -cstepi))
2c08497a
BS
2782 {
2783 add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
2784 use->stmt);
2785 }
2786}
2787
8b11a64c
ZD
2788/* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2789 position to POS. If USE is not NULL, the candidate is set as related to
2790 it. The candidate computation is scheduled on all available positions. */
2791
2792static void
b8698a0f 2793add_candidate (struct ivopts_data *data,
8b11a64c
ZD
2794 tree base, tree step, bool important, struct iv_use *use)
2795{
a7e43c57
BC
2796 gcc_assert (use == NULL || use->sub_id == 0);
2797
8b11a64c 2798 if (ip_normal_pos (data->current_loop))
726a989a 2799 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL);
4366cf6d
ZD
2800 if (ip_end_pos (data->current_loop)
2801 && allow_ip_end_pos_p (data->current_loop))
726a989a 2802 add_candidate_1 (data, base, step, important, IP_END, use, NULL);
2c08497a
BS
2803
2804 if (use != NULL && use->type == USE_ADDRESS)
2805 add_autoinc_candidates (data, base, step, important, use);
8b11a64c
ZD
2806}
2807
2808/* Adds standard iv candidates. */
2809
2810static void
2811add_standard_iv_candidates (struct ivopts_data *data)
2812{
0f250839
RG
2813 add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
2814
2815 /* The same for a double-integer type if it is still fast enough. */
2816 if (TYPE_PRECISION
2817 (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
2818 && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
2819 add_candidate (data, build_int_cst (long_integer_type_node, 0),
2820 build_int_cst (long_integer_type_node, 1), true, NULL);
8b11a64c 2821
39b4020c 2822 /* The same for a double-integer type if it is still fast enough. */
0f250839
RG
2823 if (TYPE_PRECISION
2824 (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
2825 && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
2826 add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
2827 build_int_cst (long_long_integer_type_node, 1), true, NULL);
8b11a64c
ZD
2828}
2829
2830
2831/* Adds candidates bases on the old induction variable IV. */
2832
2833static void
2834add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2835{
726a989a
RB
2836 gimple phi;
2837 tree def;
8b11a64c
ZD
2838 struct iv_cand *cand;
2839
2840 add_candidate (data, iv->base, iv->step, true, NULL);
2841
2842 /* The same, but with initial value zero. */
1a00e5f7
JJ
2843 if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
2844 add_candidate (data, size_int (0), iv->step, true, NULL);
2845 else
2846 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2847 iv->step, true, NULL);
8b11a64c
ZD
2848
2849 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
726a989a 2850 if (gimple_code (phi) == GIMPLE_PHI)
8b11a64c
ZD
2851 {
2852 /* Additionally record the possibility of leaving the original iv
2853 untouched. */
2854 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
b83b5507
BC
2855 /* Don't add candidate if it's from another PHI node because
2856 it's an affine iv appearing in the form of PEELED_CHREC. */
2857 phi = SSA_NAME_DEF_STMT (def);
2858 if (gimple_code (phi) != GIMPLE_PHI)
2859 {
2860 cand = add_candidate_1 (data,
2861 iv->base, iv->step, true, IP_ORIGINAL, NULL,
2862 SSA_NAME_DEF_STMT (def));
2863 cand->var_before = iv->ssa_name;
2864 cand->var_after = def;
2865 }
2866 else
2867 gcc_assert (gimple_bb (phi) == data->current_loop->header);
8b11a64c
ZD
2868 }
2869}
2870
2871/* Adds candidates based on the old induction variables. */
2872
2873static void
2874add_old_ivs_candidates (struct ivopts_data *data)
2875{
2876 unsigned i;
2877 struct iv *iv;
87c476a2 2878 bitmap_iterator bi;
8b11a64c 2879
87c476a2 2880 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
8b11a64c
ZD
2881 {
2882 iv = ver_info (data, i)->iv;
6e42ce54 2883 if (iv && iv->biv_p && !integer_zerop (iv->step))
8b11a64c 2884 add_old_iv_candidates (data, iv);
87c476a2 2885 }
8b11a64c
ZD
2886}
2887
2888/* Adds candidates based on the value of the induction variable IV and USE. */
2889
2890static void
2891add_iv_value_candidates (struct ivopts_data *data,
2892 struct iv *iv, struct iv_use *use)
2893{
f5e2738c 2894 unsigned HOST_WIDE_INT offset;
9be872b7 2895 tree base;
1ffe34d9 2896 tree basetype;
8b11a64c 2897
9be872b7 2898 add_candidate (data, iv->base, iv->step, false, use);
be35cf60 2899
9be872b7
ZD
2900 /* The same, but with initial value zero. Make such variable important,
2901 since it is generic enough so that possibly many uses may be based
2902 on it. */
1ffe34d9
AP
2903 basetype = TREE_TYPE (iv->base);
2904 if (POINTER_TYPE_P (basetype))
2905 basetype = sizetype;
2906 add_candidate (data, build_int_cst (basetype, 0),
9be872b7 2907 iv->step, true, use);
8b11a64c 2908
7eeef08e
RG
2909 /* Third, try removing the constant offset. Make sure to even
2910 add a candidate for &a[0] vs. (T *)&a. */
9be872b7 2911 base = strip_offset (iv->base, &offset);
7eeef08e
RG
2912 if (offset
2913 || base != iv->base)
f5e2738c 2914 add_candidate (data, base, iv->step, false, use);
8b11a64c
ZD
2915}
2916
8b11a64c
ZD
2917/* Adds candidates based on the uses. */
2918
2919static void
2920add_derived_ivs_candidates (struct ivopts_data *data)
2921{
2922 unsigned i;
2923
2924 for (i = 0; i < n_iv_uses (data); i++)
2925 {
2926 struct iv_use *use = iv_use (data, i);
2927
2928 if (!use)
2929 continue;
2930
2931 switch (use->type)
2932 {
2933 case USE_NONLINEAR_EXPR:
2934 case USE_COMPARE:
9be872b7 2935 case USE_ADDRESS:
8b11a64c
ZD
2936 /* Just add the ivs based on the value of the iv used here. */
2937 add_iv_value_candidates (data, use->iv, use);
2938 break;
2939
8b11a64c 2940 default:
1e128c5f 2941 gcc_unreachable ();
8b11a64c
ZD
2942 }
2943 }
2944}
2945
b1b02be2
ZD
2946/* Record important candidates and add them to related_cands bitmaps
2947 if needed. */
2948
2949static void
2950record_important_candidates (struct ivopts_data *data)
2951{
2952 unsigned i;
2953 struct iv_use *use;
2954
2955 for (i = 0; i < n_iv_cands (data); i++)
2956 {
2957 struct iv_cand *cand = iv_cand (data, i);
2958
2959 if (cand->important)
2960 bitmap_set_bit (data->important_candidates, i);
2961 }
2962
2963 data->consider_all_candidates = (n_iv_cands (data)
2964 <= CONSIDER_ALL_CANDIDATES_BOUND);
2965
2966 if (data->consider_all_candidates)
2967 {
2968 /* We will not need "related_cands" bitmaps in this case,
2969 so release them to decrease peak memory consumption. */
2970 for (i = 0; i < n_iv_uses (data); i++)
2971 {
2972 use = iv_use (data, i);
8bdbfff5 2973 BITMAP_FREE (use->related_cands);
b1b02be2
ZD
2974 }
2975 }
2976 else
2977 {
2978 /* Add important candidates to the related_cands bitmaps. */
2979 for (i = 0; i < n_iv_uses (data); i++)
2980 bitmap_ior_into (iv_use (data, i)->related_cands,
2981 data->important_candidates);
2982 }
2983}
2984
8b11a64c
ZD
2985/* Allocates the data structure mapping the (use, candidate) pairs to costs.
2986 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2987 we allocate a simple list to every use. */
2988
2989static void
2990alloc_use_cost_map (struct ivopts_data *data)
2991{
79836a12 2992 unsigned i, size, s;
8b11a64c
ZD
2993
2994 for (i = 0; i < n_iv_uses (data); i++)
2995 {
2996 struct iv_use *use = iv_use (data, i);
2997
2998 if (data->consider_all_candidates)
b1b02be2 2999 size = n_iv_cands (data);
8b11a64c
ZD
3000 else
3001 {
79836a12 3002 s = bitmap_count_bits (use->related_cands);
b1b02be2
ZD
3003
3004 /* Round up to the power of two, so that moduling by it is fast. */
79836a12 3005 size = s ? (1 << ceil_log2 (s)) : 1;
8b11a64c
ZD
3006 }
3007
b1b02be2 3008 use->n_map_members = size;
5ed6ace5 3009 use->cost_map = XCNEWVEC (struct cost_pair, size);
8b11a64c
ZD
3010 }
3011}
3012
6e8c65f6
ZD
3013/* Returns description of computation cost of expression whose runtime
3014 cost is RUNTIME and complexity corresponds to COMPLEXITY. */
3015
3016static comp_cost
3017new_cost (unsigned runtime, unsigned complexity)
3018{
3019 comp_cost cost;
3020
3021 cost.cost = runtime;
3022 cost.complexity = complexity;
3023
3024 return cost;
3025}
3026
a7e43c57
BC
3027/* Returns true if COST is infinite. */
3028
3029static bool
3030infinite_cost_p (comp_cost cost)
3031{
3032 return cost.cost == INFTY;
3033}
3034
6e8c65f6
ZD
3035/* Adds costs COST1 and COST2. */
3036
3037static comp_cost
3038add_costs (comp_cost cost1, comp_cost cost2)
3039{
a7e43c57
BC
3040 if (infinite_cost_p (cost1) || infinite_cost_p (cost2))
3041 return infinite_cost;
3042
6e8c65f6
ZD
3043 cost1.cost += cost2.cost;
3044 cost1.complexity += cost2.complexity;
3045
3046 return cost1;
3047}
3048/* Subtracts costs COST1 and COST2. */
3049
3050static comp_cost
3051sub_costs (comp_cost cost1, comp_cost cost2)
3052{
3053 cost1.cost -= cost2.cost;
3054 cost1.complexity -= cost2.complexity;
3055
3056 return cost1;
3057}
3058
3059/* Returns a negative number if COST1 < COST2, a positive number if
3060 COST1 > COST2, and 0 if COST1 = COST2. */
3061
3062static int
3063compare_costs (comp_cost cost1, comp_cost cost2)
3064{
3065 if (cost1.cost == cost2.cost)
3066 return cost1.complexity - cost2.complexity;
3067
3068 return cost1.cost - cost2.cost;
3069}
3070
8b11a64c 3071/* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
f5f12961 3072 on invariants DEPENDS_ON and that the value used in expressing it
d8af4ba3 3073 is VALUE, and in case of iv elimination the comparison operator is COMP. */
8b11a64c
ZD
3074
3075static void
3076set_use_iv_cost (struct ivopts_data *data,
6e8c65f6 3077 struct iv_use *use, struct iv_cand *cand,
18081149 3078 comp_cost cost, bitmap depends_on, tree value,
d8af4ba3 3079 enum tree_code comp, int inv_expr_id)
8b11a64c 3080{
b1b02be2
ZD
3081 unsigned i, s;
3082
6e8c65f6 3083 if (infinite_cost_p (cost))
8b11a64c 3084 {
8bdbfff5 3085 BITMAP_FREE (depends_on);
b1b02be2 3086 return;
8b11a64c
ZD
3087 }
3088
3089 if (data->consider_all_candidates)
3090 {
3091 use->cost_map[cand->id].cand = cand;
3092 use->cost_map[cand->id].cost = cost;
3093 use->cost_map[cand->id].depends_on = depends_on;
f5f12961 3094 use->cost_map[cand->id].value = value;
d8af4ba3 3095 use->cost_map[cand->id].comp = comp;
18081149 3096 use->cost_map[cand->id].inv_expr_id = inv_expr_id;
8b11a64c
ZD
3097 return;
3098 }
3099
b1b02be2
ZD
3100 /* n_map_members is a power of two, so this computes modulo. */
3101 s = cand->id & (use->n_map_members - 1);
3102 for (i = s; i < use->n_map_members; i++)
3103 if (!use->cost_map[i].cand)
3104 goto found;
3105 for (i = 0; i < s; i++)
3106 if (!use->cost_map[i].cand)
3107 goto found;
3108
3109 gcc_unreachable ();
8b11a64c 3110
b1b02be2
ZD
3111found:
3112 use->cost_map[i].cand = cand;
3113 use->cost_map[i].cost = cost;
3114 use->cost_map[i].depends_on = depends_on;
f5f12961 3115 use->cost_map[i].value = value;
d8af4ba3 3116 use->cost_map[i].comp = comp;
18081149 3117 use->cost_map[i].inv_expr_id = inv_expr_id;
8b11a64c
ZD
3118}
3119
b1b02be2 3120/* Gets cost of (USE, CANDIDATE) pair. */
8b11a64c 3121
b1b02be2
ZD
3122static struct cost_pair *
3123get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
3124 struct iv_cand *cand)
8b11a64c 3125{
b1b02be2
ZD
3126 unsigned i, s;
3127 struct cost_pair *ret;
8b11a64c
ZD
3128
3129 if (!cand)
b1b02be2 3130 return NULL;
8b11a64c
ZD
3131
3132 if (data->consider_all_candidates)
8b11a64c 3133 {
b1b02be2
ZD
3134 ret = use->cost_map + cand->id;
3135 if (!ret->cand)
3136 return NULL;
8b11a64c 3137
b1b02be2 3138 return ret;
8b11a64c 3139 }
b8698a0f 3140
b1b02be2
ZD
3141 /* n_map_members is a power of two, so this computes modulo. */
3142 s = cand->id & (use->n_map_members - 1);
3143 for (i = s; i < use->n_map_members; i++)
3144 if (use->cost_map[i].cand == cand)
3145 return use->cost_map + i;
79836a12
RB
3146 else if (use->cost_map[i].cand == NULL)
3147 return NULL;
b1b02be2
ZD
3148 for (i = 0; i < s; i++)
3149 if (use->cost_map[i].cand == cand)
3150 return use->cost_map + i;
79836a12
RB
3151 else if (use->cost_map[i].cand == NULL)
3152 return NULL;
b1b02be2
ZD
3153
3154 return NULL;
8b11a64c
ZD
3155}
3156
8679c649
JH
3157/* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
3158static rtx
3159produce_memory_decl_rtl (tree obj, int *regno)
3160{
09e881c9 3161 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
ef4bddc2 3162 machine_mode address_mode = targetm.addr_space.address_mode (as);
8679c649 3163 rtx x;
b8698a0f 3164
e16acfcd 3165 gcc_assert (obj);
8679c649
JH
3166 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
3167 {
3168 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
d4ebfa65 3169 x = gen_rtx_SYMBOL_REF (address_mode, name);
8318b0d9
RH
3170 SET_SYMBOL_REF_DECL (x, obj);
3171 x = gen_rtx_MEM (DECL_MODE (obj), x);
09e881c9 3172 set_mem_addr_space (x, as);
8318b0d9 3173 targetm.encode_section_info (obj, x, true);
8679c649
JH
3174 }
3175 else
8318b0d9 3176 {
d4ebfa65 3177 x = gen_raw_REG (address_mode, (*regno)++);
8318b0d9 3178 x = gen_rtx_MEM (DECL_MODE (obj), x);
09e881c9 3179 set_mem_addr_space (x, as);
8318b0d9 3180 }
8679c649 3181
8318b0d9 3182 return x;
8679c649
JH
3183}
3184
8b11a64c
ZD
3185/* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
3186 walk_tree. DATA contains the actual fake register number. */
3187
3188static tree
3189prepare_decl_rtl (tree *expr_p, int *ws, void *data)
3190{
3191 tree obj = NULL_TREE;
3192 rtx x = NULL_RTX;
c22940cd 3193 int *regno = (int *) data;
8b11a64c
ZD
3194
3195 switch (TREE_CODE (*expr_p))
3196 {
8679c649
JH
3197 case ADDR_EXPR:
3198 for (expr_p = &TREE_OPERAND (*expr_p, 0);
afe84921
RH
3199 handled_component_p (*expr_p);
3200 expr_p = &TREE_OPERAND (*expr_p, 0))
3201 continue;
8679c649 3202 obj = *expr_p;
c401fb6f 3203 if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
8679c649
JH
3204 x = produce_memory_decl_rtl (obj, regno);
3205 break;
3206
8b11a64c
ZD
3207 case SSA_NAME:
3208 *ws = 0;
3209 obj = SSA_NAME_VAR (*expr_p);
70b5e7dc
RG
3210 /* Defer handling of anonymous SSA_NAMEs to the expander. */
3211 if (!obj)
3212 return NULL_TREE;
8b11a64c
ZD
3213 if (!DECL_RTL_SET_P (obj))
3214 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3215 break;
3216
3217 case VAR_DECL:
3218 case PARM_DECL:
3219 case RESULT_DECL:
3220 *ws = 0;
3221 obj = *expr_p;
3222
3223 if (DECL_RTL_SET_P (obj))
3224 break;
3225
3226 if (DECL_MODE (obj) == BLKmode)
8679c649 3227 x = produce_memory_decl_rtl (obj, regno);
8b11a64c
ZD
3228 else
3229 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3230
3231 break;
3232
3233 default:
3234 break;
3235 }
3236
3237 if (x)
3238 {
9771b263 3239 decl_rtl_to_reset.safe_push (obj);
8b11a64c
ZD
3240 SET_DECL_RTL (obj, x);
3241 }
3242
3243 return NULL_TREE;
3244}
3245
3246/* Determines cost of the computation of EXPR. */
3247
3248static unsigned
f40751dd 3249computation_cost (tree expr, bool speed)
8b11a64c 3250{
b32d5189
DM
3251 rtx_insn *seq;
3252 rtx rslt;
8b11a64c
ZD
3253 tree type = TREE_TYPE (expr);
3254 unsigned cost;
1d27fed4
DD
3255 /* Avoid using hard regs in ways which may be unsupported. */
3256 int regno = LAST_VIRTUAL_REGISTER + 1;
d52f5295 3257 struct cgraph_node *node = cgraph_node::get (current_function_decl);
5fefcf92 3258 enum node_frequency real_frequency = node->frequency;
8b11a64c 3259
5fefcf92 3260 node->frequency = NODE_FREQUENCY_NORMAL;
f40751dd 3261 crtl->maybe_hot_insn_p = speed;
8b11a64c
ZD
3262 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
3263 start_sequence ();
3264 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
3265 seq = get_insns ();
3266 end_sequence ();
f40751dd 3267 default_rtl_profile ();
5fefcf92 3268 node->frequency = real_frequency;
8b11a64c 3269
f40751dd 3270 cost = seq_cost (seq, speed);
2ca202e7 3271 if (MEM_P (rslt))
09e881c9
BE
3272 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
3273 TYPE_ADDR_SPACE (type), speed);
b5ee6752 3274 else if (!REG_P (rslt))
e548c9df 3275 cost += set_src_cost (rslt, TYPE_MODE (type), speed);
8b11a64c
ZD
3276
3277 return cost;
3278}
3279
3280/* Returns variable containing the value of candidate CAND at statement AT. */
3281
3282static tree
726a989a 3283var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple stmt)
8b11a64c
ZD
3284{
3285 if (stmt_after_increment (loop, cand, stmt))
3286 return cand->var_after;
3287 else
3288 return cand->var_before;
3289}
3290
b67102ae
ZD
3291/* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
3292 same precision that is at least as wide as the precision of TYPE, stores
3293 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
3294 type of A and B. */
3295
3296static tree
3297determine_common_wider_type (tree *a, tree *b)
3298{
3299 tree wider_type = NULL;
3300 tree suba, subb;
3301 tree atype = TREE_TYPE (*a);
3302
1043771b 3303 if (CONVERT_EXPR_P (*a))
b67102ae
ZD
3304 {
3305 suba = TREE_OPERAND (*a, 0);
3306 wider_type = TREE_TYPE (suba);
3307 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
3308 return atype;
3309 }
3310 else
3311 return atype;
3312
1043771b 3313 if (CONVERT_EXPR_P (*b))
b67102ae
ZD
3314 {
3315 subb = TREE_OPERAND (*b, 0);
3316 if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
3317 return atype;
3318 }
3319 else
3320 return atype;
3321
3322 *a = suba;
3323 *b = subb;
3324 return wider_type;
3325}
3326
8b11a64c 3327/* Determines the expression by that USE is expressed from induction variable
ac182688
ZD
3328 CAND at statement AT in LOOP. The expression is stored in a decomposed
3329 form into AFF. Returns false if USE cannot be expressed using CAND. */
8b11a64c 3330
ac182688
ZD
3331static bool
3332get_computation_aff (struct loop *loop,
726a989a 3333 struct iv_use *use, struct iv_cand *cand, gimple at,
84562394 3334 struct aff_tree *aff)
8b11a64c 3335{
2f4675b4
ZD
3336 tree ubase = use->iv->base;
3337 tree ustep = use->iv->step;
3338 tree cbase = cand->iv->base;
73f30c63 3339 tree cstep = cand->iv->step, cstep_common;
8b11a64c 3340 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
73f30c63 3341 tree common_type, var;
8b11a64c 3342 tree uutype;
73f30c63 3343 aff_tree cbase_aff, var_aff;
807e902e 3344 widest_int rat;
8b11a64c
ZD
3345
3346 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3347 {
3348 /* We do not have a precision to express the values of use. */
ac182688 3349 return false;
8b11a64c
ZD
3350 }
3351
73f30c63
ZD
3352 var = var_at_stmt (loop, cand, at);
3353 uutype = unsigned_type_for (utype);
8b11a64c 3354
73f30c63
ZD
3355 /* If the conversion is not noop, perform it. */
3356 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
8b11a64c 3357 {
8b11a64c 3358 cstep = fold_convert (uutype, cstep);
73f30c63
ZD
3359 cbase = fold_convert (uutype, cbase);
3360 var = fold_convert (uutype, var);
9be872b7 3361 }
9be872b7 3362
73f30c63
ZD
3363 if (!constant_multiple_of (ustep, cstep, &rat))
3364 return false;
8b11a64c 3365
b67102ae
ZD
3366 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3367 type, we achieve better folding by computing their difference in this
3368 wider type, and cast the result to UUTYPE. We do not need to worry about
3369 overflows, as all the arithmetics will in the end be performed in UUTYPE
3370 anyway. */
3371 common_type = determine_common_wider_type (&ubase, &cbase);
3372
73f30c63
ZD
3373 /* use = ubase - ratio * cbase + ratio * var. */
3374 tree_to_aff_combination (ubase, common_type, aff);
3375 tree_to_aff_combination (cbase, common_type, &cbase_aff);
3376 tree_to_aff_combination (var, uutype, &var_aff);
8b11a64c 3377
73f30c63
ZD
3378 /* We need to shift the value if we are after the increment. */
3379 if (stmt_after_increment (loop, cand, at))
8b11a64c 3380 {
73f30c63 3381 aff_tree cstep_aff;
b8698a0f 3382
73f30c63
ZD
3383 if (common_type != uutype)
3384 cstep_common = fold_convert (common_type, cstep);
9be872b7 3385 else
73f30c63 3386 cstep_common = cstep;
ac182688 3387
73f30c63
ZD
3388 tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
3389 aff_combination_add (&cbase_aff, &cstep_aff);
8b11a64c 3390 }
8b11a64c 3391
27bcd47c 3392 aff_combination_scale (&cbase_aff, -rat);
ac182688 3393 aff_combination_add (aff, &cbase_aff);
b67102ae 3394 if (common_type != uutype)
73f30c63
ZD
3395 aff_combination_convert (aff, uutype);
3396
3397 aff_combination_scale (&var_aff, rat);
3398 aff_combination_add (aff, &var_aff);
ac182688
ZD
3399
3400 return true;
3401}
3402
0c604a61
TV
3403/* Return the type of USE. */
3404
3405static tree
3406get_use_type (struct iv_use *use)
3407{
3408 tree base_type = TREE_TYPE (use->iv->base);
3409 tree type;
3410
3411 if (use->type == USE_ADDRESS)
3412 {
3413 /* The base_type may be a void pointer. Create a pointer type based on
3414 the mem_ref instead. */
3415 type = build_pointer_type (TREE_TYPE (*use->op_p));
3416 gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
3417 == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
3418 }
3419 else
3420 type = base_type;
3421
3422 return type;
3423}
3424
ac182688
ZD
3425/* Determines the expression by that USE is expressed from induction variable
3426 CAND at statement AT in LOOP. The computation is unshared. */
3427
3428static tree
3429get_computation_at (struct loop *loop,
726a989a 3430 struct iv_use *use, struct iv_cand *cand, gimple at)
ac182688 3431{
73f30c63 3432 aff_tree aff;
0c604a61 3433 tree type = get_use_type (use);
ac182688
ZD
3434
3435 if (!get_computation_aff (loop, use, cand, at, &aff))
3436 return NULL_TREE;
3437 unshare_aff_combination (&aff);
3438 return fold_convert (type, aff_combination_to_tree (&aff));
8b11a64c
ZD
3439}
3440
3441/* Determines the expression by that USE is expressed from induction variable
ac182688 3442 CAND in LOOP. The computation is unshared. */
8b11a64c
ZD
3443
3444static tree
3445get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3446{
3447 return get_computation_at (loop, use, cand, use->stmt);
3448}
3449
6521ac85
SL
3450/* Adjust the cost COST for being in loop setup rather than loop body.
3451 If we're optimizing for space, the loop setup overhead is constant;
3452 if we're optimizing for speed, amortize it over the per-iteration cost. */
3453static unsigned
3454adjust_setup_cost (struct ivopts_data *data, unsigned cost)
3455{
3456 if (cost == INFTY)
3457 return cost;
3458 else if (optimize_loop_for_speed_p (data->current_loop))
18081149 3459 return cost / avg_loop_niter (data->current_loop);
6521ac85
SL
3460 else
3461 return cost;
3462}
3463
b1dd92fd 3464/* Returns true if multiplying by RATIO is allowed in an address. Test the
09e881c9
BE
3465 validity for a memory reference accessing memory of mode MODE in
3466 address space AS. */
3467
ac182688
ZD
3468
3469bool
ef4bddc2 3470multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, machine_mode mode,
09e881c9 3471 addr_space_t as)
ac182688
ZD
3472{
3473#define MAX_RATIO 128
09e881c9 3474 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
9771b263 3475 static vec<sbitmap> valid_mult_list;
09e881c9
BE
3476 sbitmap valid_mult;
3477
9771b263
DN
3478 if (data_index >= valid_mult_list.length ())
3479 valid_mult_list.safe_grow_cleared (data_index + 1);
09e881c9 3480
9771b263 3481 valid_mult = valid_mult_list[data_index];
09e881c9 3482 if (!valid_mult)
ac182688 3483 {
ef4bddc2 3484 machine_mode address_mode = targetm.addr_space.address_mode (as);
d4ebfa65 3485 rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
f258df2d
IS
3486 rtx reg2 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3487 rtx addr, scaled;
ac182688
ZD
3488 HOST_WIDE_INT i;
3489
09e881c9 3490 valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
f61e445a 3491 bitmap_clear (valid_mult);
f258df2d
IS
3492 scaled = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3493 addr = gen_rtx_fmt_ee (PLUS, address_mode, scaled, reg2);
ac182688
ZD
3494 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3495 {
f258df2d
IS
3496 XEXP (scaled, 1) = gen_int_mode (i, address_mode);
3497 if (memory_address_addr_space_p (mode, addr, as)
3498 || memory_address_addr_space_p (mode, scaled, as))
d7c028c0 3499 bitmap_set_bit (valid_mult, i + MAX_RATIO);
ac182688
ZD
3500 }
3501
3502 if (dump_file && (dump_flags & TDF_DETAILS))
3503 {
3504 fprintf (dump_file, " allowed multipliers:");
3505 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
d7c028c0 3506 if (bitmap_bit_p (valid_mult, i + MAX_RATIO))
ac182688
ZD
3507 fprintf (dump_file, " %d", (int) i);
3508 fprintf (dump_file, "\n");
3509 fprintf (dump_file, "\n");
3510 }
09e881c9 3511
9771b263 3512 valid_mult_list[data_index] = valid_mult;
ac182688
ZD
3513 }
3514
3515 if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3516 return false;
3517
d7c028c0 3518 return bitmap_bit_p (valid_mult, ratio + MAX_RATIO);
ac182688
ZD
3519}
3520
8b11a64c
ZD
3521/* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3522 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
b1dd92fd 3523 variable is omitted. Compute the cost for a memory reference that accesses
09e881c9 3524 a memory location of mode MEM_MODE in address space AS.
b1dd92fd 3525
2c08497a
BS
3526 MAY_AUTOINC is set to true if the autoincrement (increasing index by
3527 size of MEM_MODE / RATIO) is available. To make this determination, we
3528 look at the size of the increment to be made, which is given in CSTEP.
3529 CSTEP may be zero if the step is unknown.
3530 STMT_AFTER_INC is true iff the statement we're looking at is after the
3531 increment of the original biv.
3532
8b11a64c
ZD
3533 TODO -- there must be some better way. This all is quite crude. */
3534
cd047059
BC
3535enum ainc_type
3536{
3537 AINC_PRE_INC, /* Pre increment. */
3538 AINC_PRE_DEC, /* Pre decrement. */
3539 AINC_POST_INC, /* Post increment. */
3540 AINC_POST_DEC, /* Post decrement. */
3541 AINC_NONE /* Also the number of auto increment types. */
3542};
3543
0823efed 3544typedef struct address_cost_data_s
09e881c9
BE
3545{
3546 HOST_WIDE_INT min_offset, max_offset;
3547 unsigned costs[2][2][2][2];
cd047059 3548 unsigned ainc_costs[AINC_NONE];
09e881c9
BE
3549} *address_cost_data;
3550
09e881c9 3551
6e8c65f6 3552static comp_cost
8b11a64c 3553get_address_cost (bool symbol_present, bool var_present,
dd5f63f8 3554 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
ef4bddc2 3555 HOST_WIDE_INT cstep, machine_mode mem_mode,
09e881c9 3556 addr_space_t as, bool speed,
2c08497a 3557 bool stmt_after_inc, bool *may_autoinc)
8b11a64c 3558{
ef4bddc2 3559 machine_mode address_mode = targetm.addr_space.address_mode (as);
9771b263 3560 static vec<address_cost_data> address_cost_data_list;
09e881c9
BE
3561 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3562 address_cost_data data;
2c08497a
BS
3563 static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3564 static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
6e8c65f6 3565 unsigned cost, acost, complexity;
cd047059 3566 enum ainc_type autoinc_type;
2c08497a
BS
3567 bool offset_p, ratio_p, autoinc;
3568 HOST_WIDE_INT s_offset, autoinc_offset, msize;
8b11a64c
ZD
3569 unsigned HOST_WIDE_INT mask;
3570 unsigned bits;
3571
9771b263
DN
3572 if (data_index >= address_cost_data_list.length ())
3573 address_cost_data_list.safe_grow_cleared (data_index + 1);
09e881c9 3574
9771b263 3575 data = address_cost_data_list[data_index];
09e881c9 3576 if (!data)
8b11a64c
ZD
3577 {
3578 HOST_WIDE_INT i;
d6fa2d17
XDL
3579 HOST_WIDE_INT rat, off = 0;
3580 int old_cse_not_expected, width;
975626a7 3581 unsigned sym_p, var_p, off_p, rat_p, add_c;
b32d5189
DM
3582 rtx_insn *seq;
3583 rtx addr, base;
975626a7
ZD
3584 rtx reg0, reg1;
3585
09e881c9 3586 data = (address_cost_data) xcalloc (1, sizeof (*data));
8b11a64c 3587
d4ebfa65 3588 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
8b11a64c 3589
d6fa2d17
XDL
3590 width = GET_MODE_BITSIZE (address_mode) - 1;
3591 if (width > (HOST_BITS_PER_WIDE_INT - 1))
3592 width = HOST_BITS_PER_WIDE_INT - 1;
d4ebfa65 3593 addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
d6fa2d17
XDL
3594
3595 for (i = width; i >= 0; i--)
8b11a64c 3596 {
eb87c7c4 3597 off = -((unsigned HOST_WIDE_INT) 1 << i);
d6fa2d17
XDL
3598 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3599 if (memory_address_addr_space_p (mem_mode, addr, as))
8b11a64c
ZD
3600 break;
3601 }
d6fa2d17 3602 data->min_offset = (i == -1? 0 : off);
8b11a64c 3603
d6fa2d17 3604 for (i = width; i >= 0; i--)
8b11a64c 3605 {
eb87c7c4 3606 off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
d6fa2d17
XDL
3607 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3608 if (memory_address_addr_space_p (mem_mode, addr, as))
8b11a64c 3609 break;
5a33f47d
EB
3610 /* For some strict-alignment targets, the offset must be naturally
3611 aligned. Try an aligned offset if mem_mode is not QImode. */
3612 off = mem_mode != QImode
3613 ? ((unsigned HOST_WIDE_INT) 1 << i)
3614 - GET_MODE_SIZE (mem_mode)
3615 : 0;
b88fe5e9
ZC
3616 if (off > 0)
3617 {
3618 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3619 if (memory_address_addr_space_p (mem_mode, addr, as))
3620 break;
3621 }
8b11a64c 3622 }
d6fa2d17
XDL
3623 if (i == -1)
3624 off = 0;
3625 data->max_offset = off;
8b11a64c
ZD
3626
3627 if (dump_file && (dump_flags & TDF_DETAILS))
3628 {
3629 fprintf (dump_file, "get_address_cost:\n");
d6fa2d17 3630 fprintf (dump_file, " min offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
dd5f63f8 3631 GET_MODE_NAME (mem_mode),
d6fa2d17
XDL
3632 data->min_offset);
3633 fprintf (dump_file, " max offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
dd5f63f8 3634 GET_MODE_NAME (mem_mode),
d6fa2d17 3635 data->max_offset);
8b11a64c
ZD
3636 }
3637
09e881c9 3638 rat = 1;
ac182688 3639 for (i = 2; i <= MAX_RATIO; i++)
09e881c9 3640 if (multiplier_allowed_in_address_p (i, mem_mode, as))
ac182688 3641 {
09e881c9 3642 rat = i;
ac182688
ZD
3643 break;
3644 }
975626a7
ZD
3645
3646 /* Compute the cost of various addressing modes. */
3647 acost = 0;
d4ebfa65
BE
3648 reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3649 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
975626a7 3650
cd047059 3651 if (USE_LOAD_PRE_DECREMENT (mem_mode)
8875e939 3652 || USE_STORE_PRE_DECREMENT (mem_mode))
2c08497a 3653 {
d4ebfa65 3654 addr = gen_rtx_PRE_DEC (address_mode, reg0);
09e881c9
BE
3655 has_predec[mem_mode]
3656 = memory_address_addr_space_p (mem_mode, addr, as);
cd047059
BC
3657
3658 if (has_predec[mem_mode])
3659 data->ainc_costs[AINC_PRE_DEC]
3660 = address_cost (addr, mem_mode, as, speed);
2c08497a 3661 }
cd047059 3662 if (USE_LOAD_POST_DECREMENT (mem_mode)
8875e939 3663 || USE_STORE_POST_DECREMENT (mem_mode))
2c08497a 3664 {
d4ebfa65 3665 addr = gen_rtx_POST_DEC (address_mode, reg0);
09e881c9
BE
3666 has_postdec[mem_mode]
3667 = memory_address_addr_space_p (mem_mode, addr, as);
cd047059
BC
3668
3669 if (has_postdec[mem_mode])
3670 data->ainc_costs[AINC_POST_DEC]
3671 = address_cost (addr, mem_mode, as, speed);
2c08497a 3672 }
cd047059 3673 if (USE_LOAD_PRE_INCREMENT (mem_mode)
8875e939 3674 || USE_STORE_PRE_DECREMENT (mem_mode))
2c08497a 3675 {
d4ebfa65 3676 addr = gen_rtx_PRE_INC (address_mode, reg0);
09e881c9
BE
3677 has_preinc[mem_mode]
3678 = memory_address_addr_space_p (mem_mode, addr, as);
cd047059
BC
3679
3680 if (has_preinc[mem_mode])
3681 data->ainc_costs[AINC_PRE_INC]
3682 = address_cost (addr, mem_mode, as, speed);
2c08497a 3683 }
cd047059 3684 if (USE_LOAD_POST_INCREMENT (mem_mode)
8875e939 3685 || USE_STORE_POST_INCREMENT (mem_mode))
2c08497a 3686 {
d4ebfa65 3687 addr = gen_rtx_POST_INC (address_mode, reg0);
09e881c9
BE
3688 has_postinc[mem_mode]
3689 = memory_address_addr_space_p (mem_mode, addr, as);
cd047059
BC
3690
3691 if (has_postinc[mem_mode])
3692 data->ainc_costs[AINC_POST_INC]
3693 = address_cost (addr, mem_mode, as, speed);
2c08497a 3694 }
975626a7
ZD
3695 for (i = 0; i < 16; i++)
3696 {
3697 sym_p = i & 1;
3698 var_p = (i >> 1) & 1;
3699 off_p = (i >> 2) & 1;
3700 rat_p = (i >> 3) & 1;
3701
3702 addr = reg0;
3703 if (rat_p)
d4ebfa65
BE
3704 addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
3705 gen_int_mode (rat, address_mode));
975626a7
ZD
3706
3707 if (var_p)
d4ebfa65 3708 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
975626a7
ZD
3709
3710 if (sym_p)
3711 {
d4ebfa65 3712 base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
8318b0d9 3713 /* ??? We can run into trouble with some backends by presenting
fa10beec 3714 it with symbols which haven't been properly passed through
8318b0d9
RH
3715 targetm.encode_section_info. By setting the local bit, we
3716 enhance the probability of things working. */
3717 SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3718
975626a7 3719 if (off_p)
d4ebfa65 3720 base = gen_rtx_fmt_e (CONST, address_mode,
09e881c9 3721 gen_rtx_fmt_ee
d4ebfa65
BE
3722 (PLUS, address_mode, base,
3723 gen_int_mode (off, address_mode)));
975626a7
ZD
3724 }
3725 else if (off_p)
d4ebfa65 3726 base = gen_int_mode (off, address_mode);
975626a7
ZD
3727 else
3728 base = NULL_RTX;
b8698a0f 3729
975626a7 3730 if (base)
d4ebfa65 3731 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
2c08497a 3732
975626a7
ZD
3733 start_sequence ();
3734 /* To avoid splitting addressing modes, pretend that no cse will
3735 follow. */
3736 old_cse_not_expected = cse_not_expected;
3737 cse_not_expected = true;
09e881c9 3738 addr = memory_address_addr_space (mem_mode, addr, as);
975626a7
ZD
3739 cse_not_expected = old_cse_not_expected;
3740 seq = get_insns ();
3741 end_sequence ();
3742
f40751dd 3743 acost = seq_cost (seq, speed);
09e881c9 3744 acost += address_cost (addr, mem_mode, as, speed);
975626a7
ZD
3745
3746 if (!acost)
3747 acost = 1;
09e881c9 3748 data->costs[sym_p][var_p][off_p][rat_p] = acost;
975626a7
ZD
3749 }
3750
3751 /* On some targets, it is quite expensive to load symbol to a register,
3752 which makes addresses that contain symbols look much more expensive.
3753 However, the symbol will have to be loaded in any case before the
3754 loop (and quite likely we have it in register already), so it does not
3755 make much sense to penalize them too heavily. So make some final
3756 tweaks for the SYMBOL_PRESENT modes:
3757
3758 If VAR_PRESENT is false, and the mode obtained by changing symbol to
3759 var is cheaper, use this mode with small penalty.
3760 If VAR_PRESENT is true, try whether the mode with
3761 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3762 if this is the case, use it. */
5322d07e 3763 add_c = add_cost (speed, address_mode);
975626a7
ZD
3764 for (i = 0; i < 8; i++)
3765 {
3766 var_p = i & 1;
3767 off_p = (i >> 1) & 1;
3768 rat_p = (i >> 2) & 1;
3769
09e881c9 3770 acost = data->costs[0][1][off_p][rat_p] + 1;
975626a7
ZD
3771 if (var_p)
3772 acost += add_c;
3773
09e881c9
BE
3774 if (acost < data->costs[1][var_p][off_p][rat_p])
3775 data->costs[1][var_p][off_p][rat_p] = acost;
975626a7 3776 }
2c08497a 3777
975626a7
ZD
3778 if (dump_file && (dump_flags & TDF_DETAILS))
3779 {
3780 fprintf (dump_file, "Address costs:\n");
b8698a0f 3781
975626a7
ZD
3782 for (i = 0; i < 16; i++)
3783 {
3784 sym_p = i & 1;
3785 var_p = (i >> 1) & 1;
3786 off_p = (i >> 2) & 1;
3787 rat_p = (i >> 3) & 1;
3788
3789 fprintf (dump_file, " ");
3790 if (sym_p)
3791 fprintf (dump_file, "sym + ");
3792 if (var_p)
3793 fprintf (dump_file, "var + ");
3794 if (off_p)
3795 fprintf (dump_file, "cst + ");
3796 if (rat_p)
3797 fprintf (dump_file, "rat * ");
3798
09e881c9 3799 acost = data->costs[sym_p][var_p][off_p][rat_p];
975626a7
ZD
3800 fprintf (dump_file, "index costs %d\n", acost);
3801 }
2c08497a
BS
3802 if (has_predec[mem_mode] || has_postdec[mem_mode]
3803 || has_preinc[mem_mode] || has_postinc[mem_mode])
3804 fprintf (dump_file, " May include autoinc/dec\n");
975626a7
ZD
3805 fprintf (dump_file, "\n");
3806 }
09e881c9 3807
9771b263 3808 address_cost_data_list[data_index] = data;
8b11a64c
ZD
3809 }
3810
d4ebfa65 3811 bits = GET_MODE_BITSIZE (address_mode);
8b11a64c
ZD
3812 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3813 offset &= mask;
3814 if ((offset >> (bits - 1) & 1))
3815 offset |= ~mask;
3816 s_offset = offset;
3817
2c08497a 3818 autoinc = false;
cd047059 3819 autoinc_type = AINC_NONE;
2c08497a
BS
3820 msize = GET_MODE_SIZE (mem_mode);
3821 autoinc_offset = offset;
3822 if (stmt_after_inc)
3823 autoinc_offset += ratio * cstep;
3824 if (symbol_present || var_present || ratio != 1)
3825 autoinc = false;
cd047059
BC
3826 else
3827 {
3828 if (has_postinc[mem_mode] && autoinc_offset == 0
3829 && msize == cstep)
3830 autoinc_type = AINC_POST_INC;
3831 else if (has_postdec[mem_mode] && autoinc_offset == 0
2c08497a 3832 && msize == -cstep)
cd047059
BC
3833 autoinc_type = AINC_POST_DEC;
3834 else if (has_preinc[mem_mode] && autoinc_offset == msize
2c08497a 3835 && msize == cstep)
cd047059
BC
3836 autoinc_type = AINC_PRE_INC;
3837 else if (has_predec[mem_mode] && autoinc_offset == -msize
3838 && msize == -cstep)
3839 autoinc_type = AINC_PRE_DEC;
3840
3841 if (autoinc_type != AINC_NONE)
3842 autoinc = true;
3843 }
2c08497a 3844
8b11a64c 3845 cost = 0;
a85a9024 3846 offset_p = (s_offset != 0
09e881c9
BE
3847 && data->min_offset <= s_offset
3848 && s_offset <= data->max_offset);
8b11a64c 3849 ratio_p = (ratio != 1
09e881c9 3850 && multiplier_allowed_in_address_p (ratio, mem_mode, as));
8b11a64c
ZD
3851
3852 if (ratio != 1 && !ratio_p)
6dd8f4bb 3853 cost += mult_by_coeff_cost (ratio, address_mode, speed);
8b11a64c
ZD
3854
3855 if (s_offset && !offset_p && !symbol_present)
5322d07e 3856 cost += add_cost (speed, address_mode);
8b11a64c 3857
2c08497a
BS
3858 if (may_autoinc)
3859 *may_autoinc = autoinc;
cd047059
BC
3860 if (autoinc)
3861 acost = data->ainc_costs[autoinc_type];
3862 else
3863 acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
6e8c65f6
ZD
3864 complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3865 return new_cost (cost + acost, complexity);
8b11a64c 3866}
8b11a64c 3867
e6450c11
TV
3868 /* Calculate the SPEED or size cost of shiftadd EXPR in MODE. MULT is the
3869 the EXPR operand holding the shift. COST0 and COST1 are the costs for
3870 calculating the operands of EXPR. Returns true if successful, and returns
3871 the cost in COST. */
3872
3873static bool
ef4bddc2 3874get_shiftadd_cost (tree expr, machine_mode mode, comp_cost cost0,
e6450c11
TV
3875 comp_cost cost1, tree mult, bool speed, comp_cost *cost)
3876{
3877 comp_cost res;
3878 tree op1 = TREE_OPERAND (expr, 1);
3879 tree cst = TREE_OPERAND (mult, 1);
3880 tree multop = TREE_OPERAND (mult, 0);
3881 int m = exact_log2 (int_cst_value (cst));
3882 int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
9fb87eb0
EB
3883 int as_cost, sa_cost;
3884 bool mult_in_op1;
e6450c11
TV
3885
3886 if (!(m >= 0 && m < maxm))
3887 return false;
3888
9fb87eb0 3889 mult_in_op1 = operand_equal_p (op1, mult, 0);
1a1a5534 3890
9fb87eb0
EB
3891 as_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
3892
3893 /* If the target has a cheap shift-and-add or shift-and-sub instruction,
3894 use that in preference to a shift insn followed by an add insn. */
e6450c11 3895 sa_cost = (TREE_CODE (expr) != MINUS_EXPR
5322d07e 3896 ? shiftadd_cost (speed, mode, m)
9fb87eb0 3897 : (mult_in_op1
5322d07e
NF
3898 ? shiftsub1_cost (speed, mode, m)
3899 : shiftsub0_cost (speed, mode, m)));
9fb87eb0
EB
3900
3901 res = new_cost (MIN (as_cost, sa_cost), 0);
3902 res = add_costs (res, mult_in_op1 ? cost0 : cost1);
e6450c11
TV
3903
3904 STRIP_NOPS (multop);
3905 if (!is_gimple_val (multop))
3906 res = add_costs (res, force_expr_to_var_cost (multop, speed));
3907
3908 *cost = res;
3909 return true;
3910}
3911
3ac01fde
ZD
3912/* Estimates cost of forcing expression EXPR into a variable. */
3913
6e8c65f6 3914static comp_cost
f40751dd 3915force_expr_to_var_cost (tree expr, bool speed)
8b11a64c
ZD
3916{
3917 static bool costs_initialized = false;
f40751dd
JH
3918 static unsigned integer_cost [2];
3919 static unsigned symbol_cost [2];
3920 static unsigned address_cost [2];
7299dbfb 3921 tree op0, op1;
6e8c65f6 3922 comp_cost cost0, cost1, cost;
ef4bddc2 3923 machine_mode mode;
8b11a64c
ZD
3924
3925 if (!costs_initialized)
3926 {
8b11a64c 3927 tree type = build_pointer_type (integer_type_node);
8318b0d9
RH
3928 tree var, addr;
3929 rtx x;
f40751dd 3930 int i;
8318b0d9
RH
3931
3932 var = create_tmp_var_raw (integer_type_node, "test_var");
3933 TREE_STATIC (var) = 1;
3934 x = produce_memory_decl_rtl (var, NULL);
3935 SET_DECL_RTL (var, x);
8b11a64c 3936
8b11a64c 3937 addr = build1 (ADDR_EXPR, type, var);
8b11a64c 3938
f40751dd
JH
3939
3940 for (i = 0; i < 2; i++)
8b11a64c 3941 {
f40751dd
JH
3942 integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
3943 2000), i);
3944
3945 symbol_cost[i] = computation_cost (addr, i) + 1;
3946
3947 address_cost[i]
5d49b6a7 3948 = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
f40751dd
JH
3949 if (dump_file && (dump_flags & TDF_DETAILS))
3950 {
3951 fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
3952 fprintf (dump_file, " integer %d\n", (int) integer_cost[i]);
3953 fprintf (dump_file, " symbol %d\n", (int) symbol_cost[i]);
3954 fprintf (dump_file, " address %d\n", (int) address_cost[i]);
3955 fprintf (dump_file, " other %d\n", (int) target_spill_cost[i]);
3956 fprintf (dump_file, "\n");
3957 }
8b11a64c
ZD
3958 }
3959
3960 costs_initialized = true;
3961 }
3962
f5e2738c
ZD
3963 STRIP_NOPS (expr);
3964
8b11a64c 3965 if (SSA_VAR_P (expr))
7735d6c7 3966 return no_cost;
8b11a64c 3967
ad6003f2 3968 if (is_gimple_min_invariant (expr))
8b11a64c
ZD
3969 {
3970 if (TREE_CODE (expr) == INTEGER_CST)
f40751dd 3971 return new_cost (integer_cost [speed], 0);
8b11a64c
ZD
3972
3973 if (TREE_CODE (expr) == ADDR_EXPR)
3974 {
3975 tree obj = TREE_OPERAND (expr, 0);
3976
3977 if (TREE_CODE (obj) == VAR_DECL
3978 || TREE_CODE (obj) == PARM_DECL
3979 || TREE_CODE (obj) == RESULT_DECL)
f40751dd 3980 return new_cost (symbol_cost [speed], 0);
8b11a64c
ZD
3981 }
3982
f40751dd 3983 return new_cost (address_cost [speed], 0);
8b11a64c
ZD
3984 }
3985
7299dbfb
ZD
3986 switch (TREE_CODE (expr))
3987 {
5be014d5 3988 case POINTER_PLUS_EXPR:
7299dbfb
ZD
3989 case PLUS_EXPR:
3990 case MINUS_EXPR:
3991 case MULT_EXPR:
3992 op0 = TREE_OPERAND (expr, 0);
3993 op1 = TREE_OPERAND (expr, 1);
f5e2738c
ZD
3994 STRIP_NOPS (op0);
3995 STRIP_NOPS (op1);
7299dbfb
ZD
3996 break;
3997
801a661c 3998 CASE_CONVERT:
7a2faca1
EB
3999 case NEGATE_EXPR:
4000 op0 = TREE_OPERAND (expr, 0);
4001 STRIP_NOPS (op0);
4002 op1 = NULL_TREE;
7a2faca1
EB
4003 break;
4004
7299dbfb
ZD
4005 default:
4006 /* Just an arbitrary value, FIXME. */
f40751dd 4007 return new_cost (target_spill_cost[speed], 0);
7299dbfb
ZD
4008 }
4009
801a661c
BC
4010 if (op0 == NULL_TREE
4011 || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
4012 cost0 = no_cost;
4013 else
4014 cost0 = force_expr_to_var_cost (op0, speed);
4015
4016 if (op1 == NULL_TREE
4017 || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
4018 cost1 = no_cost;
4019 else
4020 cost1 = force_expr_to_var_cost (op1, speed);
4021
7299dbfb
ZD
4022 mode = TYPE_MODE (TREE_TYPE (expr));
4023 switch (TREE_CODE (expr))
4024 {
5be014d5 4025 case POINTER_PLUS_EXPR:
7299dbfb
ZD
4026 case PLUS_EXPR:
4027 case MINUS_EXPR:
7a2faca1 4028 case NEGATE_EXPR:
5322d07e 4029 cost = new_cost (add_cost (speed, mode), 0);
e6450c11
TV
4030 if (TREE_CODE (expr) != NEGATE_EXPR)
4031 {
4032 tree mult = NULL_TREE;
4033 comp_cost sa_cost;
4034 if (TREE_CODE (op1) == MULT_EXPR)
4035 mult = op1;
4036 else if (TREE_CODE (op0) == MULT_EXPR)
4037 mult = op0;
4038
4039 if (mult != NULL_TREE
50ad7db2 4040 && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
5322d07e
NF
4041 && get_shiftadd_cost (expr, mode, cost0, cost1, mult,
4042 speed, &sa_cost))
e6450c11
TV
4043 return sa_cost;
4044 }
7299dbfb
ZD
4045 break;
4046
801a661c
BC
4047 CASE_CONVERT:
4048 {
4049 tree inner_mode, outer_mode;
4050 outer_mode = TREE_TYPE (expr);
4051 inner_mode = TREE_TYPE (op0);
4052 cost = new_cost (convert_cost (TYPE_MODE (outer_mode),
4053 TYPE_MODE (inner_mode), speed), 0);
4054 }
4055 break;
4056
7299dbfb
ZD
4057 case MULT_EXPR:
4058 if (cst_and_fits_in_hwi (op0))
6dd8f4bb
BS
4059 cost = new_cost (mult_by_coeff_cost (int_cst_value (op0),
4060 mode, speed), 0);
b8698a0f 4061 else if (cst_and_fits_in_hwi (op1))
6dd8f4bb
BS
4062 cost = new_cost (mult_by_coeff_cost (int_cst_value (op1),
4063 mode, speed), 0);
7299dbfb 4064 else
f40751dd 4065 return new_cost (target_spill_cost [speed], 0);
7299dbfb
ZD
4066 break;
4067
4068 default:
4069 gcc_unreachable ();
4070 }
4071
6e8c65f6
ZD
4072 cost = add_costs (cost, cost0);
4073 cost = add_costs (cost, cost1);
7299dbfb
ZD
4074
4075 /* Bound the cost by target_spill_cost. The parts of complicated
4076 computations often are either loop invariant or at least can
4077 be shared between several iv uses, so letting this grow without
4078 limits would not give reasonable results. */
2c08497a 4079 if (cost.cost > (int) target_spill_cost [speed])
f40751dd 4080 cost.cost = target_spill_cost [speed];
6e8c65f6
ZD
4081
4082 return cost;
8b11a64c
ZD
4083}
4084
3ac01fde
ZD
4085/* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
4086 invariants the computation depends on. */
4087
6e8c65f6 4088static comp_cost
3ac01fde
ZD
4089force_var_cost (struct ivopts_data *data,
4090 tree expr, bitmap *depends_on)
4091{
4092 if (depends_on)
4093 {
4094 fd_ivopts_data = data;
4095 walk_tree (&expr, find_depends, depends_on, NULL);
4096 }
4097
f40751dd 4098 return force_expr_to_var_cost (expr, data->speed);
3ac01fde
ZD
4099}
4100
8b11a64c
ZD
4101/* Estimates cost of expressing address ADDR as var + symbol + offset. The
4102 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
4103 to false if the corresponding part is missing. DEPENDS_ON is a set of the
4104 invariants the computation depends on. */
4105
6e8c65f6 4106static comp_cost
8b11a64c
ZD
4107split_address_cost (struct ivopts_data *data,
4108 tree addr, bool *symbol_present, bool *var_present,
4109 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4110{
2f4675b4
ZD
4111 tree core;
4112 HOST_WIDE_INT bitsize;
4113 HOST_WIDE_INT bitpos;
4114 tree toffset;
ef4bddc2 4115 machine_mode mode;
2f4675b4 4116 int unsignedp, volatilep;
b8698a0f 4117
2f4675b4 4118 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
b3ecff82 4119 &unsignedp, &volatilep, false);
8b11a64c 4120
2f4675b4
ZD
4121 if (toffset != 0
4122 || bitpos % BITS_PER_UNIT != 0
4123 || TREE_CODE (core) != VAR_DECL)
8b11a64c
ZD
4124 {
4125 *symbol_present = false;
4126 *var_present = true;
4127 fd_ivopts_data = data;
4128 walk_tree (&addr, find_depends, depends_on, NULL);
f40751dd 4129 return new_cost (target_spill_cost[data->speed], 0);
2f4675b4
ZD
4130 }
4131
4132 *offset += bitpos / BITS_PER_UNIT;
8b11a64c
ZD
4133 if (TREE_STATIC (core)
4134 || DECL_EXTERNAL (core))
4135 {
4136 *symbol_present = true;
4137 *var_present = false;
7735d6c7 4138 return no_cost;
8b11a64c 4139 }
b8698a0f 4140
8b11a64c
ZD
4141 *symbol_present = false;
4142 *var_present = true;
7735d6c7 4143 return no_cost;
8b11a64c
ZD
4144}
4145
4146/* Estimates cost of expressing difference of addresses E1 - E2 as
4147 var + symbol + offset. The value of offset is added to OFFSET,
4148 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
4149 part is missing. DEPENDS_ON is a set of the invariants the computation
4150 depends on. */
4151
6e8c65f6 4152static comp_cost
8b11a64c
ZD
4153ptr_difference_cost (struct ivopts_data *data,
4154 tree e1, tree e2, bool *symbol_present, bool *var_present,
4155 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4156{
2f4675b4 4157 HOST_WIDE_INT diff = 0;
7a2faca1
EB
4158 aff_tree aff_e1, aff_e2;
4159 tree type;
8b11a64c 4160
1e128c5f 4161 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
8b11a64c 4162
7299dbfb 4163 if (ptr_difference_const (e1, e2, &diff))
8b11a64c
ZD
4164 {
4165 *offset += diff;
4166 *symbol_present = false;
4167 *var_present = false;
7735d6c7 4168 return no_cost;
8b11a64c
ZD
4169 }
4170
6e8c65f6 4171 if (integer_zerop (e2))
8b11a64c
ZD
4172 return split_address_cost (data, TREE_OPERAND (e1, 0),
4173 symbol_present, var_present, offset, depends_on);
4174
4175 *symbol_present = false;
4176 *var_present = true;
8b11a64c 4177
7a2faca1
EB
4178 type = signed_type_for (TREE_TYPE (e1));
4179 tree_to_aff_combination (e1, type, &aff_e1);
4180 tree_to_aff_combination (e2, type, &aff_e2);
807e902e 4181 aff_combination_scale (&aff_e2, -1);
7a2faca1
EB
4182 aff_combination_add (&aff_e1, &aff_e2);
4183
4184 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
8b11a64c
ZD
4185}
4186
4187/* Estimates cost of expressing difference E1 - E2 as
4188 var + symbol + offset. The value of offset is added to OFFSET,
4189 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
4190 part is missing. DEPENDS_ON is a set of the invariants the computation
4191 depends on. */
4192
6e8c65f6 4193static comp_cost
8b11a64c
ZD
4194difference_cost (struct ivopts_data *data,
4195 tree e1, tree e2, bool *symbol_present, bool *var_present,
4196 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4197{
ef4bddc2 4198 machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
f5e2738c 4199 unsigned HOST_WIDE_INT off1, off2;
7a2faca1
EB
4200 aff_tree aff_e1, aff_e2;
4201 tree type;
f5e2738c 4202
9be872b7
ZD
4203 e1 = strip_offset (e1, &off1);
4204 e2 = strip_offset (e2, &off2);
f5e2738c 4205 *offset += off1 - off2;
8b11a64c 4206
f5e2738c
ZD
4207 STRIP_NOPS (e1);
4208 STRIP_NOPS (e2);
8b11a64c
ZD
4209
4210 if (TREE_CODE (e1) == ADDR_EXPR)
7a2faca1
EB
4211 return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
4212 offset, depends_on);
8b11a64c
ZD
4213 *symbol_present = false;
4214
4215 if (operand_equal_p (e1, e2, 0))
4216 {
4217 *var_present = false;
7735d6c7 4218 return no_cost;
8b11a64c 4219 }
7a2faca1 4220
8b11a64c 4221 *var_present = true;
7a2faca1 4222
6e682d7e 4223 if (integer_zerop (e2))
8b11a64c
ZD
4224 return force_var_cost (data, e1, depends_on);
4225
6e682d7e 4226 if (integer_zerop (e1))
8b11a64c 4227 {
7a2faca1 4228 comp_cost cost = force_var_cost (data, e2, depends_on);
6dd8f4bb 4229 cost.cost += mult_by_coeff_cost (-1, mode, data->speed);
8b11a64c
ZD
4230 return cost;
4231 }
4232
7a2faca1
EB
4233 type = signed_type_for (TREE_TYPE (e1));
4234 tree_to_aff_combination (e1, type, &aff_e1);
4235 tree_to_aff_combination (e2, type, &aff_e2);
807e902e 4236 aff_combination_scale (&aff_e2, -1);
7a2faca1 4237 aff_combination_add (&aff_e1, &aff_e2);
8b11a64c 4238
7a2faca1 4239 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
8b11a64c
ZD
4240}
4241
18081149
XDL
4242/* Returns true if AFF1 and AFF2 are identical. */
4243
4244static bool
4245compare_aff_trees (aff_tree *aff1, aff_tree *aff2)
4246{
4247 unsigned i;
4248
4249 if (aff1->n != aff2->n)
4250 return false;
4251
4252 for (i = 0; i < aff1->n; i++)
4253 {
27bcd47c 4254 if (aff1->elts[i].coef != aff2->elts[i].coef)
18081149
XDL
4255 return false;
4256
4257 if (!operand_equal_p (aff1->elts[i].val, aff2->elts[i].val, 0))
4258 return false;
4259 }
4260 return true;
4261}
4262
bb8d292d
TV
4263/* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id. */
4264
4265static int
4266get_expr_id (struct ivopts_data *data, tree expr)
4267{
4268 struct iv_inv_expr_ent ent;
4269 struct iv_inv_expr_ent **slot;
4270
4271 ent.expr = expr;
4272 ent.hash = iterative_hash_expr (expr, 0);
c203e8a7 4273 slot = data->inv_expr_tab->find_slot (&ent, INSERT);
bb8d292d
TV
4274 if (*slot)
4275 return (*slot)->id;
4276
4277 *slot = XNEW (struct iv_inv_expr_ent);
4278 (*slot)->expr = expr;
4279 (*slot)->hash = ent.hash;
4280 (*slot)->id = data->inv_expr_id++;
4281 return (*slot)->id;
4282}
4283
18081149
XDL
4284/* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
4285 requires a new compiler generated temporary. Returns -1 otherwise.
4286 ADDRESS_P is a flag indicating if the expression is for address
4287 computation. */
4288
4289static int
4290get_loop_invariant_expr_id (struct ivopts_data *data, tree ubase,
4291 tree cbase, HOST_WIDE_INT ratio,
4292 bool address_p)
4293{
4294 aff_tree ubase_aff, cbase_aff;
4295 tree expr, ub, cb;
18081149
XDL
4296
4297 STRIP_NOPS (ubase);
4298 STRIP_NOPS (cbase);
4299 ub = ubase;
4300 cb = cbase;
4301
4302 if ((TREE_CODE (ubase) == INTEGER_CST)
4303 && (TREE_CODE (cbase) == INTEGER_CST))
4304 return -1;
4305
4306 /* Strips the constant part. */
4307 if (TREE_CODE (ubase) == PLUS_EXPR
4308 || TREE_CODE (ubase) == MINUS_EXPR
4309 || TREE_CODE (ubase) == POINTER_PLUS_EXPR)
4310 {
4311 if (TREE_CODE (TREE_OPERAND (ubase, 1)) == INTEGER_CST)
4312 ubase = TREE_OPERAND (ubase, 0);
4313 }
4314
4315 /* Strips the constant part. */
4316 if (TREE_CODE (cbase) == PLUS_EXPR
4317 || TREE_CODE (cbase) == MINUS_EXPR
4318 || TREE_CODE (cbase) == POINTER_PLUS_EXPR)
4319 {
4320 if (TREE_CODE (TREE_OPERAND (cbase, 1)) == INTEGER_CST)
4321 cbase = TREE_OPERAND (cbase, 0);
4322 }
4323
4324 if (address_p)
4325 {
4326 if (((TREE_CODE (ubase) == SSA_NAME)
4327 || (TREE_CODE (ubase) == ADDR_EXPR
4328 && is_gimple_min_invariant (ubase)))
4329 && (TREE_CODE (cbase) == INTEGER_CST))
4330 return -1;
4331
4332 if (((TREE_CODE (cbase) == SSA_NAME)
4333 || (TREE_CODE (cbase) == ADDR_EXPR
4334 && is_gimple_min_invariant (cbase)))
4335 && (TREE_CODE (ubase) == INTEGER_CST))
4336 return -1;
4337 }
4338
4339 if (ratio == 1)
4340 {
c3284718 4341 if (operand_equal_p (ubase, cbase, 0))
18081149
XDL
4342 return -1;
4343
4344 if (TREE_CODE (ubase) == ADDR_EXPR
4345 && TREE_CODE (cbase) == ADDR_EXPR)
4346 {
4347 tree usym, csym;
4348
4349 usym = TREE_OPERAND (ubase, 0);
4350 csym = TREE_OPERAND (cbase, 0);
4351 if (TREE_CODE (usym) == ARRAY_REF)
4352 {
4353 tree ind = TREE_OPERAND (usym, 1);
4354 if (TREE_CODE (ind) == INTEGER_CST
9541ffee 4355 && tree_fits_shwi_p (ind)
eb1ce453 4356 && tree_to_shwi (ind) == 0)
18081149
XDL
4357 usym = TREE_OPERAND (usym, 0);
4358 }
4359 if (TREE_CODE (csym) == ARRAY_REF)
4360 {
4361 tree ind = TREE_OPERAND (csym, 1);
4362 if (TREE_CODE (ind) == INTEGER_CST
9541ffee 4363 && tree_fits_shwi_p (ind)
eb1ce453 4364 && tree_to_shwi (ind) == 0)
18081149
XDL
4365 csym = TREE_OPERAND (csym, 0);
4366 }
4367 if (operand_equal_p (usym, csym, 0))
4368 return -1;
4369 }
4370 /* Now do more complex comparison */
4371 tree_to_aff_combination (ubase, TREE_TYPE (ubase), &ubase_aff);
4372 tree_to_aff_combination (cbase, TREE_TYPE (cbase), &cbase_aff);
4373 if (compare_aff_trees (&ubase_aff, &cbase_aff))
4374 return -1;
4375 }
4376
4377 tree_to_aff_combination (ub, TREE_TYPE (ub), &ubase_aff);
4378 tree_to_aff_combination (cb, TREE_TYPE (cb), &cbase_aff);
4379
807e902e 4380 aff_combination_scale (&cbase_aff, -1 * ratio);
18081149
XDL
4381 aff_combination_add (&ubase_aff, &cbase_aff);
4382 expr = aff_combination_to_tree (&ubase_aff);
bb8d292d 4383 return get_expr_id (data, expr);
18081149
XDL
4384}
4385
4386
4387
8b11a64c
ZD
4388/* Determines the cost of the computation by that USE is expressed
4389 from induction variable CAND. If ADDRESS_P is true, we just need
4390 to create an address from it, otherwise we want to get it into
4391 register. A set of invariants we depend on is stored in
2c08497a
BS
4392 DEPENDS_ON. AT is the statement at that the value is computed.
4393 If CAN_AUTOINC is nonnull, use it to record whether autoinc
4394 addressing is likely. */
8b11a64c 4395
6e8c65f6 4396static comp_cost
8b11a64c
ZD
4397get_computation_cost_at (struct ivopts_data *data,
4398 struct iv_use *use, struct iv_cand *cand,
2c08497a 4399 bool address_p, bitmap *depends_on, gimple at,
18081149
XDL
4400 bool *can_autoinc,
4401 int *inv_expr_id)
8b11a64c
ZD
4402{
4403 tree ubase = use->iv->base, ustep = use->iv->step;
4404 tree cbase, cstep;
4405 tree utype = TREE_TYPE (ubase), ctype;
73f30c63 4406 unsigned HOST_WIDE_INT cstepi, offset = 0;
8b11a64c 4407 HOST_WIDE_INT ratio, aratio;
2c08497a 4408 bool var_present, symbol_present, stmt_is_after_inc;
6e8c65f6 4409 comp_cost cost;
807e902e 4410 widest_int rat;
f40751dd 4411 bool speed = optimize_bb_for_speed_p (gimple_bb (at));
ef4bddc2 4412 machine_mode mem_mode = (address_p
0c604a61
TV
4413 ? TYPE_MODE (TREE_TYPE (*use->op_p))
4414 : VOIDmode);
8b11a64c
ZD
4415
4416 *depends_on = NULL;
4417
4418 /* Only consider real candidates. */
4419 if (!cand->iv)
6e8c65f6 4420 return infinite_cost;
8b11a64c
ZD
4421
4422 cbase = cand->iv->base;
4423 cstep = cand->iv->step;
4424 ctype = TREE_TYPE (cbase);
4425
4426 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4427 {
4428 /* We do not have a precision to express the values of use. */
6e8c65f6 4429 return infinite_cost;
8b11a64c
ZD
4430 }
4431
1d30a09a
RG
4432 if (address_p
4433 || (use->iv->base_object
4434 && cand->iv->base_object
4435 && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4436 && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
e6845c23
ZD
4437 {
4438 /* Do not try to express address of an object with computation based
4439 on address of a different object. This may cause problems in rtl
4440 level alias analysis (that does not expect this to be happening,
4441 as this is illegal in C), and would be unlikely to be useful
4442 anyway. */
4443 if (use->iv->base_object
4444 && cand->iv->base_object
4445 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
6e8c65f6 4446 return infinite_cost;
e6845c23
ZD
4447 }
4448
7a2faca1 4449 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
8b11a64c
ZD
4450 {
4451 /* TODO -- add direct handling of this case. */
4452 goto fallback;
4453 }
4454
9be872b7
ZD
4455 /* CSTEPI is removed from the offset in case statement is after the
4456 increment. If the step is not constant, we use zero instead.
1e1def1c 4457 This is a bit imprecise (there is the extra addition), but
9be872b7
ZD
4458 redundancy elimination is likely to transform the code so that
4459 it uses value of the variable before increment anyway,
4460 so it is not that much unrealistic. */
4461 if (cst_and_fits_in_hwi (cstep))
4462 cstepi = int_cst_value (cstep);
4463 else
4464 cstepi = 0;
4465
73f30c63 4466 if (!constant_multiple_of (ustep, cstep, &rat))
6e8c65f6 4467 return infinite_cost;
b8698a0f 4468
807e902e 4469 if (wi::fits_shwi_p (rat))
27bcd47c 4470 ratio = rat.to_shwi ();
73f30c63 4471 else
6e8c65f6 4472 return infinite_cost;
8b11a64c 4473
7a2faca1
EB
4474 STRIP_NOPS (cbase);
4475 ctype = TREE_TYPE (cbase);
4476
2fa692c0
XDL
4477 stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
4478
8b11a64c
ZD
4479 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
4480 or ratio == 1, it is better to handle this like
b8698a0f 4481
8b11a64c 4482 ubase - ratio * cbase + ratio * var
b8698a0f 4483
8b11a64c
ZD
4484 (also holds in the case ratio == -1, TODO. */
4485
9be872b7 4486 if (cst_and_fits_in_hwi (cbase))
8b11a64c 4487 {
d7ca26e4 4488 offset = - ratio * (unsigned HOST_WIDE_INT) int_cst_value (cbase);
6e8c65f6
ZD
4489 cost = difference_cost (data,
4490 ubase, build_int_cst (utype, 0),
4491 &symbol_present, &var_present, &offset,
4492 depends_on);
18081149 4493 cost.cost /= avg_loop_niter (data->current_loop);
8b11a64c
ZD
4494 }
4495 else if (ratio == 1)
4496 {
2fa692c0
XDL
4497 tree real_cbase = cbase;
4498
4499 /* Check to see if any adjustment is needed. */
4500 if (cstepi == 0 && stmt_is_after_inc)
4501 {
4502 aff_tree real_cbase_aff;
4503 aff_tree cstep_aff;
4504
4505 tree_to_aff_combination (cbase, TREE_TYPE (real_cbase),
4506 &real_cbase_aff);
4507 tree_to_aff_combination (cstep, TREE_TYPE (cstep), &cstep_aff);
4508
4509 aff_combination_add (&real_cbase_aff, &cstep_aff);
4510 real_cbase = aff_combination_to_tree (&real_cbase_aff);
4511 }
4512
6e8c65f6 4513 cost = difference_cost (data,
2fa692c0 4514 ubase, real_cbase,
6e8c65f6
ZD
4515 &symbol_present, &var_present, &offset,
4516 depends_on);
18081149 4517 cost.cost /= avg_loop_niter (data->current_loop);
8b11a64c 4518 }
7a2faca1
EB
4519 else if (address_p
4520 && !POINTER_TYPE_P (ctype)
09e881c9 4521 && multiplier_allowed_in_address_p
0c604a61 4522 (ratio, mem_mode,
09e881c9 4523 TYPE_ADDR_SPACE (TREE_TYPE (utype))))
7a2faca1
EB
4524 {
4525 cbase
4526 = fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio));
4527 cost = difference_cost (data,
4528 ubase, cbase,
4529 &symbol_present, &var_present, &offset,
4530 depends_on);
18081149 4531 cost.cost /= avg_loop_niter (data->current_loop);
7a2faca1 4532 }
8b11a64c
ZD
4533 else
4534 {
6e8c65f6 4535 cost = force_var_cost (data, cbase, depends_on);
6e8c65f6
ZD
4536 cost = add_costs (cost,
4537 difference_cost (data,
4538 ubase, build_int_cst (utype, 0),
4539 &symbol_present, &var_present,
4540 &offset, depends_on));
18081149 4541 cost.cost /= avg_loop_niter (data->current_loop);
5322d07e 4542 cost.cost += add_cost (data->speed, TYPE_MODE (ctype));
18081149
XDL
4543 }
4544
a7e43c57
BC
4545 /* Set of invariants depended on by sub use has already been computed
4546 for the first use in the group. */
4547 if (use->sub_id)
4548 {
4549 cost.cost = 0;
4550 if (depends_on && *depends_on)
4551 bitmap_clear (*depends_on);
4552 }
4553 else if (inv_expr_id)
18081149
XDL
4554 {
4555 *inv_expr_id =
4556 get_loop_invariant_expr_id (data, ubase, cbase, ratio, address_p);
4557 /* Clear depends on. */
4558 if (*inv_expr_id != -1 && depends_on && *depends_on)
4559 bitmap_clear (*depends_on);
8b11a64c
ZD
4560 }
4561
4562 /* If we are after the increment, the value of the candidate is higher by
4563 one iteration. */
2c08497a 4564 if (stmt_is_after_inc)
8b11a64c
ZD
4565 offset -= ratio * cstepi;
4566
4567 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
7a2faca1
EB
4568 (symbol/var1/const parts may be omitted). If we are looking for an
4569 address, find the cost of addressing this. */
8b11a64c 4570 if (address_p)
7a2faca1
EB
4571 return add_costs (cost,
4572 get_address_cost (symbol_present, var_present,
2c08497a 4573 offset, ratio, cstepi,
0c604a61 4574 mem_mode,
09e881c9 4575 TYPE_ADDR_SPACE (TREE_TYPE (utype)),
2c08497a
BS
4576 speed, stmt_is_after_inc,
4577 can_autoinc));
8b11a64c
ZD
4578
4579 /* Otherwise estimate the costs for computing the expression. */
8b11a64c
ZD
4580 if (!symbol_present && !var_present && !offset)
4581 {
4582 if (ratio != 1)
6dd8f4bb 4583 cost.cost += mult_by_coeff_cost (ratio, TYPE_MODE (ctype), speed);
8b11a64c
ZD
4584 return cost;
4585 }
4586
7a2faca1
EB
4587 /* Symbol + offset should be compile-time computable so consider that they
4588 are added once to the variable, if present. */
4589 if (var_present && (symbol_present || offset))
6521ac85 4590 cost.cost += adjust_setup_cost (data,
5322d07e 4591 add_cost (speed, TYPE_MODE (ctype)));
8b11a64c 4592
6e8c65f6
ZD
4593 /* Having offset does not affect runtime cost in case it is added to
4594 symbol, but it increases complexity. */
4595 if (offset)
4596 cost.complexity++;
4597
5322d07e 4598 cost.cost += add_cost (speed, TYPE_MODE (ctype));
7a2faca1
EB
4599
4600 aratio = ratio > 0 ? ratio : -ratio;
4601 if (aratio != 1)
6dd8f4bb 4602 cost.cost += mult_by_coeff_cost (aratio, TYPE_MODE (ctype), speed);
5914a70f 4603 return cost;
8b11a64c
ZD
4604
4605fallback:
2c08497a
BS
4606 if (can_autoinc)
4607 *can_autoinc = false;
4608
8b11a64c
ZD
4609 {
4610 /* Just get the expression, expand it and measure the cost. */
4611 tree comp = get_computation_at (data->current_loop, use, cand, at);
4612
4613 if (!comp)
6e8c65f6 4614 return infinite_cost;
8b11a64c
ZD
4615
4616 if (address_p)
70f34814 4617 comp = build_simple_mem_ref (comp);
8b11a64c 4618
f40751dd 4619 return new_cost (computation_cost (comp, speed), 0);
8b11a64c
ZD
4620 }
4621}
4622
4623/* Determines the cost of the computation by that USE is expressed
4624 from induction variable CAND. If ADDRESS_P is true, we just need
4625 to create an address from it, otherwise we want to get it into
4626 register. A set of invariants we depend on is stored in
2c08497a
BS
4627 DEPENDS_ON. If CAN_AUTOINC is nonnull, use it to record whether
4628 autoinc addressing is likely. */
8b11a64c 4629
6e8c65f6 4630static comp_cost
8b11a64c
ZD
4631get_computation_cost (struct ivopts_data *data,
4632 struct iv_use *use, struct iv_cand *cand,
18081149
XDL
4633 bool address_p, bitmap *depends_on,
4634 bool *can_autoinc, int *inv_expr_id)
8b11a64c
ZD
4635{
4636 return get_computation_cost_at (data,
2c08497a 4637 use, cand, address_p, depends_on, use->stmt,
18081149 4638 can_autoinc, inv_expr_id);
8b11a64c
ZD
4639}
4640
4641/* Determines cost of basing replacement of USE on CAND in a generic
4642 expression. */
4643
b1b02be2 4644static bool
8b11a64c
ZD
4645determine_use_iv_cost_generic (struct ivopts_data *data,
4646 struct iv_use *use, struct iv_cand *cand)
4647{
4648 bitmap depends_on;
6e8c65f6 4649 comp_cost cost;
18081149 4650 int inv_expr_id = -1;
eec5fec9
ZD
4651
4652 /* The simple case first -- if we need to express value of the preserved
4653 original biv, the cost is 0. This also prevents us from counting the
4654 cost of increment twice -- once at this use and once in the cost of
4655 the candidate. */
4656 if (cand->pos == IP_ORIGINAL
4657 && cand->incremented_at == use->stmt)
4658 {
7735d6c7 4659 set_use_iv_cost (data, use, cand, no_cost, NULL, NULL_TREE,
d8af4ba3 4660 ERROR_MARK, -1);
eec5fec9
ZD
4661 return true;
4662 }
8b11a64c 4663
18081149
XDL
4664 cost = get_computation_cost (data, use, cand, false, &depends_on,
4665 NULL, &inv_expr_id);
4666
d8af4ba3 4667 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
18081149 4668 inv_expr_id);
b1b02be2 4669
6e8c65f6 4670 return !infinite_cost_p (cost);
8b11a64c
ZD
4671}
4672
4673/* Determines cost of basing replacement of USE on CAND in an address. */
4674
b1b02be2 4675static bool
8b11a64c
ZD
4676determine_use_iv_cost_address (struct ivopts_data *data,
4677 struct iv_use *use, struct iv_cand *cand)
4678{
4679 bitmap depends_on;
2c08497a 4680 bool can_autoinc;
18081149 4681 int inv_expr_id = -1;
a7e43c57
BC
4682 struct iv_use *sub_use;
4683 comp_cost sub_cost;
2c08497a 4684 comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
18081149 4685 &can_autoinc, &inv_expr_id);
8b11a64c 4686
2c08497a
BS
4687 if (cand->ainc_use == use)
4688 {
4689 if (can_autoinc)
4690 cost.cost -= cand->cost_step;
4691 /* If we generated the candidate solely for exploiting autoincrement
4692 opportunities, and it turns out it can't be used, set the cost to
4693 infinity to make sure we ignore it. */
4694 else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
4695 cost = infinite_cost;
4696 }
a7e43c57
BC
4697 for (sub_use = use->next;
4698 sub_use && !infinite_cost_p (cost);
4699 sub_use = sub_use->next)
4700 {
4701 sub_cost = get_computation_cost (data, sub_use, cand, true, &depends_on,
4702 &can_autoinc, &inv_expr_id);
4703 cost = add_costs (cost, sub_cost);
4704 }
4705
d8af4ba3 4706 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
18081149 4707 inv_expr_id);
b1b02be2 4708
6e8c65f6 4709 return !infinite_cost_p (cost);
8b11a64c
ZD
4710}
4711
7e2ac86c
ZD
4712/* Computes value of candidate CAND at position AT in iteration NITER, and
4713 stores it to VAL. */
8b11a64c 4714
7e2ac86c 4715static void
726a989a 4716cand_value_at (struct loop *loop, struct iv_cand *cand, gimple at, tree niter,
7e2ac86c 4717 aff_tree *val)
8b11a64c 4718{
7e2ac86c
ZD
4719 aff_tree step, delta, nit;
4720 struct iv *iv = cand->iv;
8b11a64c 4721 tree type = TREE_TYPE (iv->base);
1ffe34d9
AP
4722 tree steptype = type;
4723 if (POINTER_TYPE_P (type))
4724 steptype = sizetype;
d6adff07 4725 steptype = unsigned_type_for (type);
8b11a64c 4726
d6adff07
RB
4727 tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step);
4728 aff_combination_convert (&step, steptype);
7e2ac86c 4729 tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
1ffe34d9 4730 aff_combination_convert (&nit, steptype);
7e2ac86c 4731 aff_combination_mult (&nit, &step, &delta);
8b11a64c 4732 if (stmt_after_increment (loop, cand, at))
7e2ac86c 4733 aff_combination_add (&delta, &step);
8b11a64c 4734
7e2ac86c 4735 tree_to_aff_combination (iv->base, type, val);
d6adff07
RB
4736 if (!POINTER_TYPE_P (type))
4737 aff_combination_convert (val, steptype);
7e2ac86c 4738 aff_combination_add (val, &delta);
8b11a64c
ZD
4739}
4740
ca4c3169
ZD
4741/* Returns period of induction variable iv. */
4742
4743static tree
4744iv_period (struct iv *iv)
4745{
4746 tree step = iv->step, period, type;
4747 tree pow2div;
4748
4749 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4750
ca4c3169 4751 type = unsigned_type_for (TREE_TYPE (step));
e2102efc
XDL
4752 /* Period of the iv is lcm (step, type_range)/step -1,
4753 i.e., N*type_range/step - 1. Since type range is power
4754 of two, N == (step >> num_of_ending_zeros_binary (step),
4755 so the final result is
4756
4757 (type_range >> num_of_ending_zeros_binary (step)) - 1
4758
4759 */
4760 pow2div = num_ending_zeros (step);
ca4c3169
ZD
4761
4762 period = build_low_bits_mask (type,
e2102efc 4763 (TYPE_PRECISION (type)
ae7e9ddd 4764 - tree_to_uhwi (pow2div)));
ca4c3169
ZD
4765
4766 return period;
4767}
4768
f5f12961
ZD
4769/* Returns the comparison operator used when eliminating the iv USE. */
4770
4771static enum tree_code
4772iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4773{
4774 struct loop *loop = data->current_loop;
4775 basic_block ex_bb;
4776 edge exit;
4777
726a989a 4778 ex_bb = gimple_bb (use->stmt);
f5f12961
ZD
4779 exit = EDGE_SUCC (ex_bb, 0);
4780 if (flow_bb_inside_loop_p (loop, exit->dest))
4781 exit = EDGE_SUCC (ex_bb, 1);
4782
4783 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4784}
4785
d8af4ba3
ZD
4786/* Returns true if we can prove that BASE - OFFSET does not overflow. For now,
4787 we only detect the situation that BASE = SOMETHING + OFFSET, where the
4788 calculation is performed in non-wrapping type.
4789
4790 TODO: More generally, we could test for the situation that
4791 BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
3230c614 4792 This would require knowing the sign of OFFSET. */
d8af4ba3
ZD
4793
4794static bool
3230c614 4795difference_cannot_overflow_p (struct ivopts_data *data, tree base, tree offset)
d8af4ba3
ZD
4796{
4797 enum tree_code code;
4798 tree e1, e2;
3230c614 4799 aff_tree aff_e1, aff_e2, aff_offset;
d8af4ba3
ZD
4800
4801 if (!nowrap_type_p (TREE_TYPE (base)))
4802 return false;
4803
4804 base = expand_simple_operations (base);
4805
4806 if (TREE_CODE (base) == SSA_NAME)
4807 {
4808 gimple stmt = SSA_NAME_DEF_STMT (base);
4809
4810 if (gimple_code (stmt) != GIMPLE_ASSIGN)
4811 return false;
4812
4813 code = gimple_assign_rhs_code (stmt);
4814 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4815 return false;
4816
4817 e1 = gimple_assign_rhs1 (stmt);
4818 e2 = gimple_assign_rhs2 (stmt);
4819 }
4820 else
4821 {
4822 code = TREE_CODE (base);
4823 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4824 return false;
4825 e1 = TREE_OPERAND (base, 0);
4826 e2 = TREE_OPERAND (base, 1);
4827 }
4828
3230c614
BC
4829 /* Use affine expansion as deeper inspection to prove the equality. */
4830 tree_to_aff_combination_expand (e2, TREE_TYPE (e2),
4831 &aff_e2, &data->name_expansion_cache);
4832 tree_to_aff_combination_expand (offset, TREE_TYPE (offset),
4833 &aff_offset, &data->name_expansion_cache);
4834 aff_combination_scale (&aff_offset, -1);
d8af4ba3
ZD
4835 switch (code)
4836 {
4837 case PLUS_EXPR:
3230c614
BC
4838 aff_combination_add (&aff_e2, &aff_offset);
4839 if (aff_combination_zero_p (&aff_e2))
4840 return true;
4841
4842 tree_to_aff_combination_expand (e1, TREE_TYPE (e1),
4843 &aff_e1, &data->name_expansion_cache);
4844 aff_combination_add (&aff_e1, &aff_offset);
4845 return aff_combination_zero_p (&aff_e1);
4846
d8af4ba3 4847 case POINTER_PLUS_EXPR:
3230c614
BC
4848 aff_combination_add (&aff_e2, &aff_offset);
4849 return aff_combination_zero_p (&aff_e2);
d8af4ba3
ZD
4850
4851 default:
4852 return false;
4853 }
4854}
4855
4856/* Tries to replace loop exit by one formulated in terms of a LT_EXPR
4857 comparison with CAND. NITER describes the number of iterations of
4858 the loops. If successful, the comparison in COMP_P is altered accordingly.
4859
4860 We aim to handle the following situation:
4861
4862 sometype *base, *p;
4863 int a, b, i;
4864
4865 i = a;
4866 p = p_0 = base + a;
4867
4868 do
4869 {
4870 bla (*p);
4871 p++;
4872 i++;
4873 }
4874 while (i < b);
4875
4876 Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
4877 We aim to optimize this to
4878
4879 p = p_0 = base + a;
4880 do
4881 {
4882 bla (*p);
4883 p++;
4884 }
4885 while (p < p_0 - a + b);
4886
4887 This preserves the correctness, since the pointer arithmetics does not
4888 overflow. More precisely:
4889
4890 1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
4891 overflow in computing it or the values of p.
4892 2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
4893 overflow. To prove this, we use the fact that p_0 = base + a. */
4894
4895static bool
4896iv_elimination_compare_lt (struct ivopts_data *data,
4897 struct iv_cand *cand, enum tree_code *comp_p,
4898 struct tree_niter_desc *niter)
4899{
4900 tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
84562394 4901 struct aff_tree nit, tmpa, tmpb;
d8af4ba3
ZD
4902 enum tree_code comp;
4903 HOST_WIDE_INT step;
4904
4905 /* We need to know that the candidate induction variable does not overflow.
4906 While more complex analysis may be used to prove this, for now just
4907 check that the variable appears in the original program and that it
4908 is computed in a type that guarantees no overflows. */
4909 cand_type = TREE_TYPE (cand->iv->base);
4910 if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
4911 return false;
4912
4913 /* Make sure that the loop iterates till the loop bound is hit, as otherwise
4914 the calculation of the BOUND could overflow, making the comparison
4915 invalid. */
4916 if (!data->loop_single_exit_p)
4917 return false;
4918
4919 /* We need to be able to decide whether candidate is increasing or decreasing
4920 in order to choose the right comparison operator. */
4921 if (!cst_and_fits_in_hwi (cand->iv->step))
4922 return false;
4923 step = int_cst_value (cand->iv->step);
4924
4925 /* Check that the number of iterations matches the expected pattern:
4926 a + 1 > b ? 0 : b - a - 1. */
4927 mbz = niter->may_be_zero;
4928 if (TREE_CODE (mbz) == GT_EXPR)
4929 {
4930 /* Handle a + 1 > b. */
4931 tree op0 = TREE_OPERAND (mbz, 0);
4932 if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
4933 {
4934 a = TREE_OPERAND (op0, 0);
4935 b = TREE_OPERAND (mbz, 1);
4936 }
4937 else
4938 return false;
4939 }
4940 else if (TREE_CODE (mbz) == LT_EXPR)
4941 {
4942 tree op1 = TREE_OPERAND (mbz, 1);
4943
4944 /* Handle b < a + 1. */
4945 if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
4946 {
4947 a = TREE_OPERAND (op1, 0);
4948 b = TREE_OPERAND (mbz, 0);
4949 }
4950 else
4951 return false;
4952 }
4953 else
4954 return false;
4955
4956 /* Expected number of iterations is B - A - 1. Check that it matches
4957 the actual number, i.e., that B - A - NITER = 1. */
4958 tree_to_aff_combination (niter->niter, nit_type, &nit);
4959 tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
4960 tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
807e902e
KZ
4961 aff_combination_scale (&nit, -1);
4962 aff_combination_scale (&tmpa, -1);
d8af4ba3
ZD
4963 aff_combination_add (&tmpb, &tmpa);
4964 aff_combination_add (&tmpb, &nit);
807e902e 4965 if (tmpb.n != 0 || tmpb.offset != 1)
d8af4ba3
ZD
4966 return false;
4967
4968 /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
4969 overflow. */
4970 offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
4971 cand->iv->step,
4972 fold_convert (TREE_TYPE (cand->iv->step), a));
3230c614 4973 if (!difference_cannot_overflow_p (data, cand->iv->base, offset))
d8af4ba3
ZD
4974 return false;
4975
4976 /* Determine the new comparison operator. */
4977 comp = step < 0 ? GT_EXPR : LT_EXPR;
4978 if (*comp_p == NE_EXPR)
4979 *comp_p = comp;
4980 else if (*comp_p == EQ_EXPR)
4981 *comp_p = invert_tree_comparison (comp, false);
4982 else
4983 gcc_unreachable ();
4984
4985 return true;
4986}
4987
8b11a64c 4988/* Check whether it is possible to express the condition in USE by comparison
d8af4ba3
ZD
4989 of candidate CAND. If so, store the value compared with to BOUND, and the
4990 comparison operator to COMP. */
8b11a64c
ZD
4991
4992static bool
ca4c3169 4993may_eliminate_iv (struct ivopts_data *data,
d8af4ba3
ZD
4994 struct iv_use *use, struct iv_cand *cand, tree *bound,
4995 enum tree_code *comp)
8b11a64c 4996{
e6845c23 4997 basic_block ex_bb;
8b11a64c 4998 edge exit;
d8af4ba3 4999 tree period;
ca4c3169 5000 struct loop *loop = data->current_loop;
7e2ac86c 5001 aff_tree bnd;
e2102efc 5002 struct tree_niter_desc *desc = NULL;
cbc012d5 5003
9be872b7
ZD
5004 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
5005 return false;
5006
52778e2a
EB
5007 /* For now works only for exits that dominate the loop latch.
5008 TODO: extend to other conditions inside loop body. */
726a989a 5009 ex_bb = gimple_bb (use->stmt);
e6845c23 5010 if (use->stmt != last_stmt (ex_bb)
726a989a
RB
5011 || gimple_code (use->stmt) != GIMPLE_COND
5012 || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
8b11a64c
ZD
5013 return false;
5014
e6845c23
ZD
5015 exit = EDGE_SUCC (ex_bb, 0);
5016 if (flow_bb_inside_loop_p (loop, exit->dest))
5017 exit = EDGE_SUCC (ex_bb, 1);
5018 if (flow_bb_inside_loop_p (loop, exit->dest))
5019 return false;
5020
d8af4ba3
ZD
5021 desc = niter_for_exit (data, exit);
5022 if (!desc)
8b11a64c
ZD
5023 return false;
5024
52778e2a
EB
5025 /* Determine whether we can use the variable to test the exit condition.
5026 This is the case iff the period of the induction variable is greater
5027 than the number of iterations for which the exit condition is true. */
ca4c3169 5028 period = iv_period (cand->iv);
8b11a64c 5029
52778e2a 5030 /* If the number of iterations is constant, compare against it directly. */
d8af4ba3 5031 if (TREE_CODE (desc->niter) == INTEGER_CST)
52778e2a 5032 {
e2102efc
XDL
5033 /* See cand_value_at. */
5034 if (stmt_after_increment (loop, cand, use->stmt))
5035 {
d8af4ba3 5036 if (!tree_int_cst_lt (desc->niter, period))
e2102efc
XDL
5037 return false;
5038 }
5039 else
5040 {
d8af4ba3 5041 if (tree_int_cst_lt (period, desc->niter))
e2102efc
XDL
5042 return false;
5043 }
52778e2a
EB
5044 }
5045
5046 /* If not, and if this is the only possible exit of the loop, see whether
5047 we can get a conservative estimate on the number of iterations of the
5048 entire loop and compare against that instead. */
e2102efc 5049 else
52778e2a 5050 {
807e902e 5051 widest_int period_value, max_niter;
e2102efc
XDL
5052
5053 max_niter = desc->max;
5054 if (stmt_after_increment (loop, cand, use->stmt))
807e902e
KZ
5055 max_niter += 1;
5056 period_value = wi::to_widest (period);
5057 if (wi::gtu_p (max_niter, period_value))
e2102efc 5058 {
073a8998 5059 /* See if we can take advantage of inferred loop bound information. */
d8af4ba3 5060 if (data->loop_single_exit_p)
e2102efc 5061 {
652c4c71 5062 if (!max_loop_iterations (loop, &max_niter))
e2102efc
XDL
5063 return false;
5064 /* The loop bound is already adjusted by adding 1. */
807e902e 5065 if (wi::gtu_p (max_niter, period_value))
e2102efc
XDL
5066 return false;
5067 }
5068 else
5069 return false;
5070 }
52778e2a
EB
5071 }
5072
d8af4ba3 5073 cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
771f882e 5074
d6adff07
RB
5075 *bound = fold_convert (TREE_TYPE (cand->iv->base),
5076 aff_combination_to_tree (&bnd));
d8af4ba3
ZD
5077 *comp = iv_elimination_compare (data, use);
5078
771f882e
ZD
5079 /* It is unlikely that computing the number of iterations using division
5080 would be more profitable than keeping the original induction variable. */
5081 if (expression_expensive_p (*bound))
5082 return false;
d8af4ba3
ZD
5083
5084 /* Sometimes, it is possible to handle the situation that the number of
5085 iterations may be zero unless additional assumtions by using <
5086 instead of != in the exit condition.
5087
5088 TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
5089 base the exit condition on it. However, that is often too
5090 expensive. */
5091 if (!integer_zerop (desc->may_be_zero))
5092 return iv_elimination_compare_lt (data, cand, comp, desc);
5093
8b11a64c
ZD
5094 return true;
5095}
5096
bb8d292d
TV
5097 /* Calculates the cost of BOUND, if it is a PARM_DECL. A PARM_DECL must
5098 be copied, if is is used in the loop body and DATA->body_includes_call. */
5099
5100static int
5101parm_decl_cost (struct ivopts_data *data, tree bound)
5102{
5103 tree sbound = bound;
5104 STRIP_NOPS (sbound);
5105
5106 if (TREE_CODE (sbound) == SSA_NAME
67386041 5107 && SSA_NAME_IS_DEFAULT_DEF (sbound)
bb8d292d 5108 && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
bb8d292d
TV
5109 && data->body_includes_call)
5110 return COSTS_N_INSNS (1);
5111
5112 return 0;
5113}
e2102efc 5114
8b11a64c
ZD
5115/* Determines cost of basing replacement of USE on CAND in a condition. */
5116
b1b02be2 5117static bool
8b11a64c
ZD
5118determine_use_iv_cost_condition (struct ivopts_data *data,
5119 struct iv_use *use, struct iv_cand *cand)
5120{
b697aed4
ZD
5121 tree bound = NULL_TREE;
5122 struct iv *cmp_iv;
5123 bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
bb8d292d 5124 comp_cost elim_cost, express_cost, cost, bound_cost;
b697aed4 5125 bool ok;
bb8d292d 5126 int elim_inv_expr_id = -1, express_inv_expr_id = -1, inv_expr_id;
04eadb13 5127 tree *control_var, *bound_cst;
76725a03 5128 enum tree_code comp = ERROR_MARK;
8b11a64c
ZD
5129
5130 /* Only consider real candidates. */
5131 if (!cand->iv)
5132 {
d8af4ba3
ZD
5133 set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE,
5134 ERROR_MARK, -1);
b1b02be2 5135 return false;
8b11a64c
ZD
5136 }
5137
b697aed4 5138 /* Try iv elimination. */
d8af4ba3 5139 if (may_eliminate_iv (data, use, cand, &bound, &comp))
cbc012d5
ZD
5140 {
5141 elim_cost = force_var_cost (data, bound, &depends_on_elim);
bb8d292d
TV
5142 if (elim_cost.cost == 0)
5143 elim_cost.cost = parm_decl_cost (data, bound);
5144 else if (TREE_CODE (bound) == INTEGER_CST)
5145 elim_cost.cost = 0;
5146 /* If we replace a loop condition 'i < n' with 'p < base + n',
5147 depends_on_elim will have 'base' and 'n' set, which implies
5148 that both 'base' and 'n' will be live during the loop. More likely,
5149 'base + n' will be loop invariant, resulting in only one live value
5150 during the loop. So in that case we clear depends_on_elim and set
5151 elim_inv_expr_id instead. */
5152 if (depends_on_elim && bitmap_count_bits (depends_on_elim) > 1)
5153 {
5154 elim_inv_expr_id = get_expr_id (data, bound);
5155 bitmap_clear (depends_on_elim);
5156 }
cbc012d5
ZD
5157 /* The bound is a loop invariant, so it will be only computed
5158 once. */
6521ac85 5159 elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
cbc012d5 5160 }
b697aed4 5161 else
6e8c65f6 5162 elim_cost = infinite_cost;
8b11a64c 5163
b697aed4
ZD
5164 /* Try expressing the original giv. If it is compared with an invariant,
5165 note that we cannot get rid of it. */
04eadb13
SP
5166 ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
5167 NULL, &cmp_iv);
b697aed4 5168 gcc_assert (ok);
8b11a64c 5169
04eadb13
SP
5170 /* When the condition is a comparison of the candidate IV against
5171 zero, prefer this IV.
5172
073a8998 5173 TODO: The constant that we're subtracting from the cost should
04eadb13
SP
5174 be target-dependent. This information should be added to the
5175 target costs for each backend. */
572ae476
CF
5176 if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */
5177 && integer_zerop (*bound_cst)
04eadb13
SP
5178 && (operand_equal_p (*control_var, cand->var_after, 0)
5179 || operand_equal_p (*control_var, cand->var_before, 0)))
5180 elim_cost.cost -= 1;
5181
b697aed4 5182 express_cost = get_computation_cost (data, use, cand, false,
18081149 5183 &depends_on_express, NULL,
bb8d292d 5184 &express_inv_expr_id);
b697aed4
ZD
5185 fd_ivopts_data = data;
5186 walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
f5f12961 5187
bb8d292d
TV
5188 /* Count the cost of the original bound as well. */
5189 bound_cost = force_var_cost (data, *bound_cst, NULL);
5190 if (bound_cost.cost == 0)
5191 bound_cost.cost = parm_decl_cost (data, *bound_cst);
5192 else if (TREE_CODE (*bound_cst) == INTEGER_CST)
5193 bound_cost.cost = 0;
5194 express_cost.cost += bound_cost.cost;
5195
b9ff6079
AP
5196 /* Choose the better approach, preferring the eliminated IV. */
5197 if (compare_costs (elim_cost, express_cost) <= 0)
8b11a64c 5198 {
b697aed4
ZD
5199 cost = elim_cost;
5200 depends_on = depends_on_elim;
5201 depends_on_elim = NULL;
bb8d292d 5202 inv_expr_id = elim_inv_expr_id;
b697aed4
ZD
5203 }
5204 else
5205 {
5206 cost = express_cost;
5207 depends_on = depends_on_express;
5208 depends_on_express = NULL;
5209 bound = NULL_TREE;
d8af4ba3 5210 comp = ERROR_MARK;
bb8d292d 5211 inv_expr_id = express_inv_expr_id;
8b11a64c
ZD
5212 }
5213
d8af4ba3 5214 set_use_iv_cost (data, use, cand, cost, depends_on, bound, comp, inv_expr_id);
b697aed4
ZD
5215
5216 if (depends_on_elim)
5217 BITMAP_FREE (depends_on_elim);
5218 if (depends_on_express)
5219 BITMAP_FREE (depends_on_express);
5220
6e8c65f6 5221 return !infinite_cost_p (cost);
8b11a64c
ZD
5222}
5223
b1b02be2
ZD
5224/* Determines cost of basing replacement of USE on CAND. Returns false
5225 if USE cannot be based on CAND. */
8b11a64c 5226
b1b02be2 5227static bool
8b11a64c
ZD
5228determine_use_iv_cost (struct ivopts_data *data,
5229 struct iv_use *use, struct iv_cand *cand)
5230{
5231 switch (use->type)
5232 {
5233 case USE_NONLINEAR_EXPR:
b1b02be2 5234 return determine_use_iv_cost_generic (data, use, cand);
8b11a64c 5235
8b11a64c 5236 case USE_ADDRESS:
b1b02be2 5237 return determine_use_iv_cost_address (data, use, cand);
8b11a64c
ZD
5238
5239 case USE_COMPARE:
b1b02be2 5240 return determine_use_iv_cost_condition (data, use, cand);
8b11a64c
ZD
5241
5242 default:
1e128c5f 5243 gcc_unreachable ();
8b11a64c
ZD
5244 }
5245}
5246
2c08497a
BS
5247/* Return true if get_computation_cost indicates that autoincrement is
5248 a possibility for the pair of USE and CAND, false otherwise. */
5249
5250static bool
5251autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
5252 struct iv_cand *cand)
5253{
5254 bitmap depends_on;
5255 bool can_autoinc;
5256 comp_cost cost;
5257
5258 if (use->type != USE_ADDRESS)
5259 return false;
5260
5261 cost = get_computation_cost (data, use, cand, true, &depends_on,
18081149 5262 &can_autoinc, NULL);
2c08497a
BS
5263
5264 BITMAP_FREE (depends_on);
5265
5266 return !infinite_cost_p (cost) && can_autoinc;
5267}
5268
5269/* Examine IP_ORIGINAL candidates to see if they are incremented next to a
5270 use that allows autoincrement, and set their AINC_USE if possible. */
5271
5272static void
5273set_autoinc_for_original_candidates (struct ivopts_data *data)
5274{
5275 unsigned i, j;
5276
5277 for (i = 0; i < n_iv_cands (data); i++)
5278 {
5279 struct iv_cand *cand = iv_cand (data, i);
85ff4ec6
BC
5280 struct iv_use *closest_before = NULL;
5281 struct iv_use *closest_after = NULL;
2c08497a
BS
5282 if (cand->pos != IP_ORIGINAL)
5283 continue;
85ff4ec6 5284
2c08497a
BS
5285 for (j = 0; j < n_iv_uses (data); j++)
5286 {
5287 struct iv_use *use = iv_use (data, j);
5288 unsigned uid = gimple_uid (use->stmt);
85ff4ec6
BC
5289
5290 if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
2c08497a 5291 continue;
85ff4ec6
BC
5292
5293 if (uid < gimple_uid (cand->incremented_at)
5294 && (closest_before == NULL
5295 || uid > gimple_uid (closest_before->stmt)))
5296 closest_before = use;
5297
5298 if (uid > gimple_uid (cand->incremented_at)
5299 && (closest_after == NULL
5300 || uid < gimple_uid (closest_after->stmt)))
5301 closest_after = use;
2c08497a 5302 }
85ff4ec6
BC
5303
5304 if (closest_before != NULL
5305 && autoinc_possible_for_pair (data, closest_before, cand))
5306 cand->ainc_use = closest_before;
5307 else if (closest_after != NULL
5308 && autoinc_possible_for_pair (data, closest_after, cand))
5309 cand->ainc_use = closest_after;
2c08497a
BS
5310 }
5311}
5312
5313/* Finds the candidates for the induction variables. */
5314
5315static void
5316find_iv_candidates (struct ivopts_data *data)
5317{
5318 /* Add commonly used ivs. */
5319 add_standard_iv_candidates (data);
5320
5321 /* Add old induction variables. */
5322 add_old_ivs_candidates (data);
5323
5324 /* Add induction variables derived from uses. */
5325 add_derived_ivs_candidates (data);
5326
5327 set_autoinc_for_original_candidates (data);
5328
5329 /* Record the important candidates. */
5330 record_important_candidates (data);
5331}
5332
8b11a64c
ZD
5333/* Determines costs of basing the use of the iv on an iv candidate. */
5334
5335static void
5336determine_use_iv_costs (struct ivopts_data *data)
5337{
5338 unsigned i, j;
5339 struct iv_use *use;
5340 struct iv_cand *cand;
8bdbfff5 5341 bitmap to_clear = BITMAP_ALLOC (NULL);
8b11a64c
ZD
5342
5343 alloc_use_cost_map (data);
5344
8b11a64c
ZD
5345 for (i = 0; i < n_iv_uses (data); i++)
5346 {
5347 use = iv_use (data, i);
5348
5349 if (data->consider_all_candidates)
5350 {
5351 for (j = 0; j < n_iv_cands (data); j++)
5352 {
5353 cand = iv_cand (data, j);
5354 determine_use_iv_cost (data, use, cand);
5355 }
5356 }
5357 else
5358 {
87c476a2
ZD
5359 bitmap_iterator bi;
5360
5361 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
8b11a64c
ZD
5362 {
5363 cand = iv_cand (data, j);
b1b02be2
ZD
5364 if (!determine_use_iv_cost (data, use, cand))
5365 bitmap_set_bit (to_clear, j);
87c476a2 5366 }
b1b02be2
ZD
5367
5368 /* Remove the candidates for that the cost is infinite from
5369 the list of related candidates. */
5370 bitmap_and_compl_into (use->related_cands, to_clear);
5371 bitmap_clear (to_clear);
8b11a64c
ZD
5372 }
5373 }
5374
8bdbfff5 5375 BITMAP_FREE (to_clear);
b1b02be2 5376
8b11a64c
ZD
5377 if (dump_file && (dump_flags & TDF_DETAILS))
5378 {
5379 fprintf (dump_file, "Use-candidate costs:\n");
5380
5381 for (i = 0; i < n_iv_uses (data); i++)
5382 {
5383 use = iv_use (data, i);
5384
5385 fprintf (dump_file, "Use %d:\n", i);
6e8c65f6 5386 fprintf (dump_file, " cand\tcost\tcompl.\tdepends on\n");
8b11a64c
ZD
5387 for (j = 0; j < use->n_map_members; j++)
5388 {
5389 if (!use->cost_map[j].cand
6e8c65f6 5390 || infinite_cost_p (use->cost_map[j].cost))
8b11a64c
ZD
5391 continue;
5392
6e8c65f6 5393 fprintf (dump_file, " %d\t%d\t%d\t",
8b11a64c 5394 use->cost_map[j].cand->id,
6e8c65f6
ZD
5395 use->cost_map[j].cost.cost,
5396 use->cost_map[j].cost.complexity);
8b11a64c
ZD
5397 if (use->cost_map[j].depends_on)
5398 bitmap_print (dump_file,
5399 use->cost_map[j].depends_on, "","");
18081149
XDL
5400 if (use->cost_map[j].inv_expr_id != -1)
5401 fprintf (dump_file, " inv_expr:%d", use->cost_map[j].inv_expr_id);
8b11a64c
ZD
5402 fprintf (dump_file, "\n");
5403 }
5404
5405 fprintf (dump_file, "\n");
5406 }
5407 fprintf (dump_file, "\n");
5408 }
5409}
5410
5411/* Determines cost of the candidate CAND. */
5412
5413static void
5414determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5415{
6e8c65f6
ZD
5416 comp_cost cost_base;
5417 unsigned cost, cost_step;
4366cf6d 5418 tree base;
8b11a64c
ZD
5419
5420 if (!cand->iv)
5421 {
5422 cand->cost = 0;
5423 return;
5424 }
5425
5426 /* There are two costs associated with the candidate -- its increment
5427 and its initialization. The second is almost negligible for any loop
5428 that rolls enough, so we take it just very little into account. */
5429
5430 base = cand->iv->base;
5431 cost_base = force_var_cost (data, base, NULL);
a53c5024
TV
5432 /* It will be exceptional that the iv register happens to be initialized with
5433 the proper value at no cost. In general, there will at least be a regcopy
5434 or a const set. */
5435 if (cost_base.cost == 0)
5436 cost_base.cost = COSTS_N_INSNS (1);
5322d07e 5437 cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
8b11a64c 5438
6521ac85 5439 cost = cost_step + adjust_setup_cost (data, cost_base.cost);
8b11a64c 5440
6e8c65f6 5441 /* Prefer the original ivs unless we may gain something by replacing it.
fa10beec 5442 The reason is to make debugging simpler; so this is not relevant for
6e8c65f6
ZD
5443 artificial ivs created by other optimization passes. */
5444 if (cand->pos != IP_ORIGINAL
70b5e7dc 5445 || !SSA_NAME_VAR (cand->var_before)
6e8c65f6
ZD
5446 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
5447 cost++;
b8698a0f 5448
8b11a64c
ZD
5449 /* Prefer not to insert statements into latch unless there are some
5450 already (so that we do not create unnecessary jumps). */
4366cf6d
ZD
5451 if (cand->pos == IP_END
5452 && empty_block_p (ip_end_pos (data->current_loop)))
6e8c65f6
ZD
5453 cost++;
5454
5455 cand->cost = cost;
2c08497a 5456 cand->cost_step = cost_step;
8b11a64c
ZD
5457}
5458
5459/* Determines costs of computation of the candidates. */
5460
5461static void
5462determine_iv_costs (struct ivopts_data *data)
5463{
5464 unsigned i;
5465
5466 if (dump_file && (dump_flags & TDF_DETAILS))
5467 {
5468 fprintf (dump_file, "Candidate costs:\n");
5469 fprintf (dump_file, " cand\tcost\n");
5470 }
5471
5472 for (i = 0; i < n_iv_cands (data); i++)
5473 {
5474 struct iv_cand *cand = iv_cand (data, i);
5475
5476 determine_iv_cost (data, cand);
5477
5478 if (dump_file && (dump_flags & TDF_DETAILS))
5479 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
5480 }
2c08497a 5481
0f14009a
BS
5482 if (dump_file && (dump_flags & TDF_DETAILS))
5483 fprintf (dump_file, "\n");
8b11a64c
ZD
5484}
5485
5486/* Calculates cost for having SIZE induction variables. */
5487
5488static unsigned
5489ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
5490{
a154b43a
ZD
5491 /* We add size to the cost, so that we prefer eliminating ivs
5492 if possible. */
bec922f0
SL
5493 return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed,
5494 data->body_includes_call);
8b11a64c
ZD
5495}
5496
5497/* For each size of the induction variable set determine the penalty. */
5498
5499static void
5500determine_set_costs (struct ivopts_data *data)
5501{
5502 unsigned j, n;
538dd0b7
DM
5503 gphi *phi;
5504 gphi_iterator psi;
726a989a 5505 tree op;
8b11a64c 5506 struct loop *loop = data->current_loop;
87c476a2 5507 bitmap_iterator bi;
8b11a64c 5508
8b11a64c
ZD
5509 if (dump_file && (dump_flags & TDF_DETAILS))
5510 {
5511 fprintf (dump_file, "Global costs:\n");
5512 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
bec922f0 5513 fprintf (dump_file, " target_clobbered_regs %d\n", target_clobbered_regs);
f40751dd
JH
5514 fprintf (dump_file, " target_reg_cost %d\n", target_reg_cost[data->speed]);
5515 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost[data->speed]);
8b11a64c
ZD
5516 }
5517
5518 n = 0;
726a989a 5519 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
8b11a64c 5520 {
538dd0b7 5521 phi = psi.phi ();
8b11a64c
ZD
5522 op = PHI_RESULT (phi);
5523
ea057359 5524 if (virtual_operand_p (op))
8b11a64c
ZD
5525 continue;
5526
5527 if (get_iv (data, op))
5528 continue;
5529
5530 n++;
5531 }
5532
87c476a2 5533 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
8b11a64c
ZD
5534 {
5535 struct version_info *info = ver_info (data, j);
5536
5537 if (info->inv_id && info->has_nonlin_use)
5538 n++;
87c476a2 5539 }
8b11a64c 5540
9a2ef6b8 5541 data->regs_used = n;
8b11a64c
ZD
5542 if (dump_file && (dump_flags & TDF_DETAILS))
5543 fprintf (dump_file, " regs_used %d\n", n);
5544
5545 if (dump_file && (dump_flags & TDF_DETAILS))
5546 {
5547 fprintf (dump_file, " cost for size:\n");
5548 fprintf (dump_file, " ivs\tcost\n");
5549 for (j = 0; j <= 2 * target_avail_regs; j++)
5550 fprintf (dump_file, " %d\t%d\n", j,
5551 ivopts_global_cost_for_size (data, j));
5552 fprintf (dump_file, "\n");
5553 }
5554}
5555
b1b02be2 5556/* Returns true if A is a cheaper cost pair than B. */
8b11a64c 5557
b1b02be2
ZD
5558static bool
5559cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
8b11a64c 5560{
6e8c65f6
ZD
5561 int cmp;
5562
b1b02be2
ZD
5563 if (!a)
5564 return false;
8b11a64c 5565
b1b02be2
ZD
5566 if (!b)
5567 return true;
5568
6e8c65f6
ZD
5569 cmp = compare_costs (a->cost, b->cost);
5570 if (cmp < 0)
b1b02be2
ZD
5571 return true;
5572
6e8c65f6 5573 if (cmp > 0)
b1b02be2
ZD
5574 return false;
5575
5576 /* In case the costs are the same, prefer the cheaper candidate. */
5577 if (a->cand->cost < b->cand->cost)
5578 return true;
5579
5580 return false;
5581}
5582
18081149
XDL
5583
5584/* Returns candidate by that USE is expressed in IVS. */
5585
5586static struct cost_pair *
5587iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
5588{
5589 return ivs->cand_for_use[use->id];
5590}
5591
b1b02be2
ZD
5592/* Computes the cost field of IVS structure. */
5593
5594static void
5595iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
5596{
6e8c65f6 5597 comp_cost cost = ivs->cand_use_cost;
18081149 5598
6e8c65f6 5599 cost.cost += ivs->cand_cost;
18081149 5600
18081149 5601 cost.cost += ivopts_global_cost_for_size (data,
f06e400f 5602 ivs->n_regs + ivs->num_used_inv_expr);
b1b02be2
ZD
5603
5604 ivs->cost = cost;
5605}
5606
9be872b7
ZD
5607/* Remove invariants in set INVS to set IVS. */
5608
5609static void
5610iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
5611{
5612 bitmap_iterator bi;
5613 unsigned iid;
5614
5615 if (!invs)
5616 return;
5617
5618 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5619 {
5620 ivs->n_invariant_uses[iid]--;
5621 if (ivs->n_invariant_uses[iid] == 0)
18081149 5622 ivs->n_regs--;
9be872b7
ZD
5623 }
5624}
5625
b1b02be2
ZD
5626/* Set USE not to be expressed by any candidate in IVS. */
5627
5628static void
5629iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
5630 struct iv_use *use)
5631{
9be872b7 5632 unsigned uid = use->id, cid;
b1b02be2 5633 struct cost_pair *cp;
b1b02be2
ZD
5634
5635 cp = ivs->cand_for_use[uid];
5636 if (!cp)
5637 return;
5638 cid = cp->cand->id;
5639
5640 ivs->bad_uses++;
5641 ivs->cand_for_use[uid] = NULL;
5642 ivs->n_cand_uses[cid]--;
5643
5644 if (ivs->n_cand_uses[cid] == 0)
8b11a64c 5645 {
b1b02be2
ZD
5646 bitmap_clear_bit (ivs->cands, cid);
5647 /* Do not count the pseudocandidates. */
5648 if (cp->cand->iv)
5649 ivs->n_regs--;
36f5ada1 5650 ivs->n_cands--;
b1b02be2 5651 ivs->cand_cost -= cp->cand->cost;
9be872b7
ZD
5652
5653 iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
b1b02be2
ZD
5654 }
5655
6e8c65f6 5656 ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
b1b02be2 5657
9be872b7 5658 iv_ca_set_remove_invariants (ivs, cp->depends_on);
f06e400f
XDL
5659
5660 if (cp->inv_expr_id != -1)
5661 {
5662 ivs->used_inv_expr[cp->inv_expr_id]--;
5663 if (ivs->used_inv_expr[cp->inv_expr_id] == 0)
5664 ivs->num_used_inv_expr--;
5665 }
9be872b7
ZD
5666 iv_ca_recount_cost (data, ivs);
5667}
5668
5669/* Add invariants in set INVS to set IVS. */
80cad5fa 5670
9be872b7
ZD
5671static void
5672iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
5673{
5674 bitmap_iterator bi;
5675 unsigned iid;
5676
5677 if (!invs)
5678 return;
5679
5680 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
b1b02be2 5681 {
9be872b7
ZD
5682 ivs->n_invariant_uses[iid]++;
5683 if (ivs->n_invariant_uses[iid] == 1)
18081149 5684 ivs->n_regs++;
8b11a64c 5685 }
b1b02be2
ZD
5686}
5687
5688/* Set cost pair for USE in set IVS to CP. */
5689
5690static void
5691iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
5692 struct iv_use *use, struct cost_pair *cp)
5693{
9be872b7 5694 unsigned uid = use->id, cid;
b1b02be2
ZD
5695
5696 if (ivs->cand_for_use[uid] == cp)
5697 return;
5698
5699 if (ivs->cand_for_use[uid])
5700 iv_ca_set_no_cp (data, ivs, use);
5701
5702 if (cp)
8b11a64c 5703 {
b1b02be2 5704 cid = cp->cand->id;
8b11a64c 5705
b1b02be2
ZD
5706 ivs->bad_uses--;
5707 ivs->cand_for_use[uid] = cp;
5708 ivs->n_cand_uses[cid]++;
5709 if (ivs->n_cand_uses[cid] == 1)
8b11a64c 5710 {
b1b02be2
ZD
5711 bitmap_set_bit (ivs->cands, cid);
5712 /* Do not count the pseudocandidates. */
5713 if (cp->cand->iv)
5714 ivs->n_regs++;
36f5ada1 5715 ivs->n_cands++;
b1b02be2 5716 ivs->cand_cost += cp->cand->cost;
b1b02be2 5717
9be872b7 5718 iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
8b11a64c
ZD
5719 }
5720
6e8c65f6 5721 ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
9be872b7 5722 iv_ca_set_add_invariants (ivs, cp->depends_on);
f06e400f
XDL
5723
5724 if (cp->inv_expr_id != -1)
5725 {
5726 ivs->used_inv_expr[cp->inv_expr_id]++;
5727 if (ivs->used_inv_expr[cp->inv_expr_id] == 1)
5728 ivs->num_used_inv_expr++;
5729 }
b1b02be2 5730 iv_ca_recount_cost (data, ivs);
87c476a2 5731 }
b1b02be2
ZD
5732}
5733
5734/* Extend set IVS by expressing USE by some of the candidates in it
f22ae1ec
BC
5735 if possible. Consider all important candidates if candidates in
5736 set IVS don't give any result. */
b1b02be2
ZD
5737
5738static void
5739iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
f22ae1ec 5740 struct iv_use *use)
b1b02be2
ZD
5741{
5742 struct cost_pair *best_cp = NULL, *cp;
5743 bitmap_iterator bi;
5744 unsigned i;
f22ae1ec 5745 struct iv_cand *cand;
8b11a64c 5746
b1b02be2 5747 gcc_assert (ivs->upto >= use->id);
f22ae1ec
BC
5748 ivs->upto++;
5749 ivs->bad_uses++;
b1b02be2 5750
f22ae1ec 5751 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
b1b02be2 5752 {
f22ae1ec 5753 cand = iv_cand (data, i);
18081149 5754 cp = get_use_iv_cost (data, use, cand);
b1b02be2
ZD
5755 if (cheaper_cost_pair (cp, best_cp))
5756 best_cp = cp;
5757 }
f22ae1ec
BC
5758
5759 if (best_cp == NULL)
5760 {
5761 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5762 {
5763 cand = iv_cand (data, i);
5764 cp = get_use_iv_cost (data, use, cand);
5765 if (cheaper_cost_pair (cp, best_cp))
5766 best_cp = cp;
5767 }
5768 }
8b11a64c 5769
b1b02be2 5770 iv_ca_set_cp (data, ivs, use, best_cp);
8b11a64c
ZD
5771}
5772
b1b02be2 5773/* Get cost for assignment IVS. */
8b11a64c 5774
6e8c65f6 5775static comp_cost
b1b02be2
ZD
5776iv_ca_cost (struct iv_ca *ivs)
5777{
c4e93e28
AH
5778 /* This was a conditional expression but it triggered a bug in
5779 Sun C 5.5. */
cb4ad180
AH
5780 if (ivs->bad_uses)
5781 return infinite_cost;
5782 else
5783 return ivs->cost;
b1b02be2
ZD
5784}
5785
5786/* Returns true if all dependences of CP are among invariants in IVS. */
5787
5788static bool
5789iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
8b11a64c
ZD
5790{
5791 unsigned i;
87c476a2 5792 bitmap_iterator bi;
8b11a64c 5793
b1b02be2
ZD
5794 if (!cp->depends_on)
5795 return true;
5796
5797 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
8b11a64c 5798 {
b1b02be2
ZD
5799 if (ivs->n_invariant_uses[i] == 0)
5800 return false;
5801 }
5802
5803 return true;
5804}
5805
5806/* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
5807 it before NEXT_CHANGE. */
5808
5809static struct iv_ca_delta *
5810iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
5811 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
5812{
5ed6ace5 5813 struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
b1b02be2
ZD
5814
5815 change->use = use;
5816 change->old_cp = old_cp;
5817 change->new_cp = new_cp;
5818 change->next_change = next_change;
5819
5820 return change;
5821}
5822
36f5ada1 5823/* Joins two lists of changes L1 and L2. Destructive -- old lists
6c6cfbfd 5824 are rewritten. */
36f5ada1
ZD
5825
5826static struct iv_ca_delta *
5827iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
5828{
5829 struct iv_ca_delta *last;
5830
5831 if (!l2)
5832 return l1;
5833
5834 if (!l1)
5835 return l2;
5836
5837 for (last = l1; last->next_change; last = last->next_change)
5838 continue;
5839 last->next_change = l2;
5840
5841 return l1;
5842}
5843
36f5ada1
ZD
5844/* Reverse the list of changes DELTA, forming the inverse to it. */
5845
5846static struct iv_ca_delta *
5847iv_ca_delta_reverse (struct iv_ca_delta *delta)
5848{
5849 struct iv_ca_delta *act, *next, *prev = NULL;
36f5ada1
ZD
5850
5851 for (act = delta; act; act = next)
5852 {
5853 next = act->next_change;
5854 act->next_change = prev;
5855 prev = act;
5856
fab27f52 5857 std::swap (act->old_cp, act->new_cp);
36f5ada1
ZD
5858 }
5859
5860 return prev;
5861}
5862
b1b02be2
ZD
5863/* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
5864 reverted instead. */
5865
5866static void
5867iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
5868 struct iv_ca_delta *delta, bool forward)
5869{
5870 struct cost_pair *from, *to;
36f5ada1 5871 struct iv_ca_delta *act;
b1b02be2 5872
36f5ada1
ZD
5873 if (!forward)
5874 delta = iv_ca_delta_reverse (delta);
b1b02be2 5875
36f5ada1
ZD
5876 for (act = delta; act; act = act->next_change)
5877 {
5878 from = act->old_cp;
5879 to = act->new_cp;
5880 gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
5881 iv_ca_set_cp (data, ivs, act->use, to);
8b11a64c 5882 }
36f5ada1
ZD
5883
5884 if (!forward)
5885 iv_ca_delta_reverse (delta);
b1b02be2 5886}
8b11a64c 5887
b1b02be2 5888/* Returns true if CAND is used in IVS. */
8b11a64c 5889
b1b02be2
ZD
5890static bool
5891iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
5892{
5893 return ivs->n_cand_uses[cand->id] > 0;
5894}
8b11a64c 5895
36f5ada1
ZD
5896/* Returns number of induction variable candidates in the set IVS. */
5897
5898static unsigned
5899iv_ca_n_cands (struct iv_ca *ivs)
5900{
5901 return ivs->n_cands;
5902}
5903
b1b02be2
ZD
5904/* Free the list of changes DELTA. */
5905
5906static void
5907iv_ca_delta_free (struct iv_ca_delta **delta)
5908{
5909 struct iv_ca_delta *act, *next;
5910
5911 for (act = *delta; act; act = next)
87c476a2 5912 {
b1b02be2
ZD
5913 next = act->next_change;
5914 free (act);
87c476a2 5915 }
8b11a64c 5916
b1b02be2
ZD
5917 *delta = NULL;
5918}
5919
5920/* Allocates new iv candidates assignment. */
5921
5922static struct iv_ca *
5923iv_ca_new (struct ivopts_data *data)
5924{
5ed6ace5 5925 struct iv_ca *nw = XNEW (struct iv_ca);
8b11a64c 5926
b1b02be2
ZD
5927 nw->upto = 0;
5928 nw->bad_uses = 0;
5ed6ace5
MD
5929 nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
5930 nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
8bdbfff5 5931 nw->cands = BITMAP_ALLOC (NULL);
36f5ada1 5932 nw->n_cands = 0;
b1b02be2 5933 nw->n_regs = 0;
7735d6c7 5934 nw->cand_use_cost = no_cost;
b1b02be2 5935 nw->cand_cost = 0;
5ed6ace5 5936 nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
7735d6c7 5937 nw->cost = no_cost;
f06e400f
XDL
5938 nw->used_inv_expr = XCNEWVEC (unsigned, data->inv_expr_id + 1);
5939 nw->num_used_inv_expr = 0;
b1b02be2
ZD
5940
5941 return nw;
5942}
5943
5944/* Free memory occupied by the set IVS. */
5945
5946static void
5947iv_ca_free (struct iv_ca **ivs)
5948{
5949 free ((*ivs)->cand_for_use);
5950 free ((*ivs)->n_cand_uses);
8bdbfff5 5951 BITMAP_FREE ((*ivs)->cands);
b1b02be2 5952 free ((*ivs)->n_invariant_uses);
f06e400f 5953 free ((*ivs)->used_inv_expr);
b1b02be2
ZD
5954 free (*ivs);
5955 *ivs = NULL;
5956}
5957
5958/* Dumps IVS to FILE. */
5959
5960static void
5961iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
5962{
5963 const char *pref = " invariants ";
5964 unsigned i;
6e8c65f6 5965 comp_cost cost = iv_ca_cost (ivs);
b1b02be2 5966
18081149
XDL
5967 fprintf (file, " cost: %d (complexity %d)\n", cost.cost, cost.complexity);
5968 fprintf (file, " cand_cost: %d\n cand_use_cost: %d (complexity %d)\n",
5969 ivs->cand_cost, ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
5970 bitmap_print (file, ivs->cands, " candidates: ","\n");
5971
5972 for (i = 0; i < ivs->upto; i++)
5973 {
5974 struct iv_use *use = iv_use (data, i);
5975 struct cost_pair *cp = iv_ca_cand_for_use (ivs, use);
5976 if (cp)
5977 fprintf (file, " use:%d --> iv_cand:%d, cost=(%d,%d)\n",
5978 use->id, cp->cand->id, cp->cost.cost, cp->cost.complexity);
5979 else
5980 fprintf (file, " use:%d --> ??\n", use->id);
5981 }
b1b02be2
ZD
5982
5983 for (i = 1; i <= data->max_inv_id; i++)
5984 if (ivs->n_invariant_uses[i])
5985 {
5986 fprintf (file, "%s%d", pref, i);
5987 pref = ", ";
5988 }
18081149 5989 fprintf (file, "\n\n");
b1b02be2
ZD
5990}
5991
5992/* Try changing candidate in IVS to CAND for each use. Return cost of the
36f5ada1 5993 new set, and store differences in DELTA. Number of induction variables
18081149
XDL
5994 in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
5995 the function will try to find a solution with mimimal iv candidates. */
b1b02be2 5996
6e8c65f6 5997static comp_cost
b1b02be2 5998iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
36f5ada1 5999 struct iv_cand *cand, struct iv_ca_delta **delta,
18081149 6000 unsigned *n_ivs, bool min_ncand)
b1b02be2 6001{
6e8c65f6
ZD
6002 unsigned i;
6003 comp_cost cost;
b1b02be2
ZD
6004 struct iv_use *use;
6005 struct cost_pair *old_cp, *new_cp;
6006
6007 *delta = NULL;
6008 for (i = 0; i < ivs->upto; i++)
6009 {
6010 use = iv_use (data, i);
6011 old_cp = iv_ca_cand_for_use (ivs, use);
6012
6013 if (old_cp
6014 && old_cp->cand == cand)
6015 continue;
6016
6017 new_cp = get_use_iv_cost (data, use, cand);
6018 if (!new_cp)
6019 continue;
6020
18081149 6021 if (!min_ncand && !iv_ca_has_deps (ivs, new_cp))
b1b02be2 6022 continue;
b8698a0f 6023
18081149
XDL
6024 if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp))
6025 continue;
b1b02be2
ZD
6026
6027 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
6028 }
6029
6030 iv_ca_delta_commit (data, ivs, *delta, true);
6031 cost = iv_ca_cost (ivs);
36f5ada1
ZD
6032 if (n_ivs)
6033 *n_ivs = iv_ca_n_cands (ivs);
b1b02be2 6034 iv_ca_delta_commit (data, ivs, *delta, false);
8b11a64c
ZD
6035
6036 return cost;
6037}
6038
a0eca485 6039/* Try narrowing set IVS by removing CAND. Return the cost of
2c407426
BC
6040 the new set and store the differences in DELTA. START is
6041 the candidate with which we start narrowing. */
8b11a64c 6042
6e8c65f6 6043static comp_cost
b1b02be2 6044iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
2c407426
BC
6045 struct iv_cand *cand, struct iv_cand *start,
6046 struct iv_ca_delta **delta)
8b11a64c 6047{
b1b02be2
ZD
6048 unsigned i, ci;
6049 struct iv_use *use;
6050 struct cost_pair *old_cp, *new_cp, *cp;
6051 bitmap_iterator bi;
6052 struct iv_cand *cnd;
2c407426 6053 comp_cost cost, best_cost, acost;
b1b02be2
ZD
6054
6055 *delta = NULL;
6056 for (i = 0; i < n_iv_uses (data); i++)
6057 {
6058 use = iv_use (data, i);
6059
6060 old_cp = iv_ca_cand_for_use (ivs, use);
6061 if (old_cp->cand != cand)
6062 continue;
6063
2c407426
BC
6064 best_cost = iv_ca_cost (ivs);
6065 /* Start narrowing with START. */
6066 new_cp = get_use_iv_cost (data, use, start);
b1b02be2
ZD
6067
6068 if (data->consider_all_candidates)
6069 {
6070 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
6071 {
2c407426 6072 if (ci == cand->id || (start && ci == start->id))
b1b02be2
ZD
6073 continue;
6074
6075 cnd = iv_cand (data, ci);
6076
6077 cp = get_use_iv_cost (data, use, cnd);
6078 if (!cp)
6079 continue;
18081149 6080
2c407426
BC
6081 iv_ca_set_cp (data, ivs, use, cp);
6082 acost = iv_ca_cost (ivs);
b1b02be2 6083
2c407426
BC
6084 if (compare_costs (acost, best_cost) < 0)
6085 {
6086 best_cost = acost;
6087 new_cp = cp;
6088 }
b1b02be2
ZD
6089 }
6090 }
6091 else
6092 {
6093 EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
6094 {
2c407426 6095 if (ci == cand->id || (start && ci == start->id))
b1b02be2
ZD
6096 continue;
6097
6098 cnd = iv_cand (data, ci);
6099
6100 cp = get_use_iv_cost (data, use, cnd);
6101 if (!cp)
6102 continue;
b8698a0f 6103
2c407426
BC
6104 iv_ca_set_cp (data, ivs, use, cp);
6105 acost = iv_ca_cost (ivs);
b1b02be2 6106
2c407426
BC
6107 if (compare_costs (acost, best_cost) < 0)
6108 {
6109 best_cost = acost;
6110 new_cp = cp;
6111 }
b1b02be2
ZD
6112 }
6113 }
2c407426
BC
6114 /* Restore to old cp for use. */
6115 iv_ca_set_cp (data, ivs, use, old_cp);
b1b02be2
ZD
6116
6117 if (!new_cp)
6118 {
6119 iv_ca_delta_free (delta);
6e8c65f6 6120 return infinite_cost;
b1b02be2
ZD
6121 }
6122
6123 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
6124 }
6125
6126 iv_ca_delta_commit (data, ivs, *delta, true);
6127 cost = iv_ca_cost (ivs);
6128 iv_ca_delta_commit (data, ivs, *delta, false);
6129
6130 return cost;
8b11a64c
ZD
6131}
6132
36f5ada1
ZD
6133/* Try optimizing the set of candidates IVS by removing candidates different
6134 from to EXCEPT_CAND from it. Return cost of the new set, and store
6135 differences in DELTA. */
6136
6e8c65f6 6137static comp_cost
36f5ada1
ZD
6138iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
6139 struct iv_cand *except_cand, struct iv_ca_delta **delta)
6140{
6141 bitmap_iterator bi;
6142 struct iv_ca_delta *act_delta, *best_delta;
6e8c65f6
ZD
6143 unsigned i;
6144 comp_cost best_cost, acost;
36f5ada1
ZD
6145 struct iv_cand *cand;
6146
6147 best_delta = NULL;
6148 best_cost = iv_ca_cost (ivs);
6149
6150 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6151 {
6152 cand = iv_cand (data, i);
6153
6154 if (cand == except_cand)
6155 continue;
6156
2c407426 6157 acost = iv_ca_narrow (data, ivs, cand, except_cand, &act_delta);
36f5ada1 6158
6e8c65f6 6159 if (compare_costs (acost, best_cost) < 0)
36f5ada1
ZD
6160 {
6161 best_cost = acost;
6162 iv_ca_delta_free (&best_delta);
6163 best_delta = act_delta;
6164 }
6165 else
6166 iv_ca_delta_free (&act_delta);
6167 }
6168
6169 if (!best_delta)
6170 {
6171 *delta = NULL;
6172 return best_cost;
6173 }
6174
6175 /* Recurse to possibly remove other unnecessary ivs. */
6176 iv_ca_delta_commit (data, ivs, best_delta, true);
6177 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
6178 iv_ca_delta_commit (data, ivs, best_delta, false);
6179 *delta = iv_ca_delta_join (best_delta, *delta);
6180 return best_cost;
6181}
6182
6326a5f5
BC
6183/* Check if CAND_IDX is a candidate other than OLD_CAND and has
6184 cheaper local cost for USE than BEST_CP. Return pointer to
6185 the corresponding cost_pair, otherwise just return BEST_CP. */
6186
6187static struct cost_pair*
6188cheaper_cost_with_cand (struct ivopts_data *data, struct iv_use *use,
6189 unsigned int cand_idx, struct iv_cand *old_cand,
6190 struct cost_pair *best_cp)
6191{
6192 struct iv_cand *cand;
6193 struct cost_pair *cp;
6194
6195 gcc_assert (old_cand != NULL && best_cp != NULL);
6196 if (cand_idx == old_cand->id)
6197 return best_cp;
6198
6199 cand = iv_cand (data, cand_idx);
6200 cp = get_use_iv_cost (data, use, cand);
6201 if (cp != NULL && cheaper_cost_pair (cp, best_cp))
6202 return cp;
6203
6204 return best_cp;
6205}
6206
6207/* Try breaking local optimal fixed-point for IVS by replacing candidates
6208 which are used by more than one iv uses. For each of those candidates,
6209 this function tries to represent iv uses under that candidate using
6210 other ones with lower local cost, then tries to prune the new set.
6211 If the new set has lower cost, It returns the new cost after recording
6212 candidate replacement in list DELTA. */
6213
6214static comp_cost
6215iv_ca_replace (struct ivopts_data *data, struct iv_ca *ivs,
6216 struct iv_ca_delta **delta)
6217{
6218 bitmap_iterator bi, bj;
6219 unsigned int i, j, k;
6220 struct iv_use *use;
6221 struct iv_cand *cand;
6222 comp_cost orig_cost, acost;
6223 struct iv_ca_delta *act_delta, *tmp_delta;
6224 struct cost_pair *old_cp, *best_cp = NULL;
6225
6226 *delta = NULL;
6227 orig_cost = iv_ca_cost (ivs);
6228
6229 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6230 {
6231 if (ivs->n_cand_uses[i] == 1
6232 || ivs->n_cand_uses[i] > ALWAYS_PRUNE_CAND_SET_BOUND)
6233 continue;
6234
6235 cand = iv_cand (data, i);
6236
6237 act_delta = NULL;
6238 /* Represent uses under current candidate using other ones with
6239 lower local cost. */
6240 for (j = 0; j < ivs->upto; j++)
6241 {
6242 use = iv_use (data, j);
6243 old_cp = iv_ca_cand_for_use (ivs, use);
6244
6245 if (old_cp->cand != cand)
6246 continue;
6247
6248 best_cp = old_cp;
6249 if (data->consider_all_candidates)
6250 for (k = 0; k < n_iv_cands (data); k++)
6251 best_cp = cheaper_cost_with_cand (data, use, k,
6252 old_cp->cand, best_cp);
6253 else
6254 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, k, bj)
6255 best_cp = cheaper_cost_with_cand (data, use, k,
6256 old_cp->cand, best_cp);
6257
6258 if (best_cp == old_cp)
6259 continue;
6260
6261 act_delta = iv_ca_delta_add (use, old_cp, best_cp, act_delta);
6262 }
6263 /* No need for further prune. */
6264 if (!act_delta)
6265 continue;
6266
6267 /* Prune the new candidate set. */
6268 iv_ca_delta_commit (data, ivs, act_delta, true);
6269 acost = iv_ca_prune (data, ivs, NULL, &tmp_delta);
6270 iv_ca_delta_commit (data, ivs, act_delta, false);
6271 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6272
6273 if (compare_costs (acost, orig_cost) < 0)
6274 {
6275 *delta = act_delta;
6276 return acost;
6277 }
6278 else
6279 iv_ca_delta_free (&act_delta);
6280 }
6281
6282 return orig_cost;
6283}
6284
b1b02be2 6285/* Tries to extend the sets IVS in the best possible way in order
16ad8025
SL
6286 to express the USE. If ORIGINALP is true, prefer candidates from
6287 the original set of IVs, otherwise favor important candidates not
6288 based on any memory object. */
8b11a64c
ZD
6289
6290static bool
b1b02be2 6291try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
16ad8025 6292 struct iv_use *use, bool originalp)
8b11a64c 6293{
6e8c65f6 6294 comp_cost best_cost, act_cost;
8b11a64c 6295 unsigned i;
38b0dcb8
ZD
6296 bitmap_iterator bi;
6297 struct iv_cand *cand;
b1b02be2
ZD
6298 struct iv_ca_delta *best_delta = NULL, *act_delta;
6299 struct cost_pair *cp;
6300
f22ae1ec 6301 iv_ca_add_use (data, ivs, use);
b1b02be2 6302 best_cost = iv_ca_cost (ivs);
b1b02be2
ZD
6303 cp = iv_ca_cand_for_use (ivs, use);
6304 if (cp)
6305 {
6306 best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
6307 iv_ca_set_no_cp (data, ivs, use);
6308 }
8b11a64c 6309
16ad8025
SL
6310 /* If ORIGINALP is true, try to find the original IV for the use. Otherwise
6311 first try important candidates not based on any memory object. Only if
6e8c65f6
ZD
6312 this fails, try the specific ones. Rationale -- in loops with many
6313 variables the best choice often is to use just one generic biv. If we
6314 added here many ivs specific to the uses, the optimization algorithm later
6315 would be likely to get stuck in a local minimum, thus causing us to create
6316 too many ivs. The approach from few ivs to more seems more likely to be
6317 successful -- starting from few ivs, replacing an expensive use by a
6318 specific iv should always be a win. */
38b0dcb8 6319 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
8b11a64c 6320 {
38b0dcb8
ZD
6321 cand = iv_cand (data, i);
6322
16ad8025
SL
6323 if (originalp && cand->pos !=IP_ORIGINAL)
6324 continue;
6325
6326 if (!originalp && cand->iv->base_object != NULL_TREE)
6e8c65f6
ZD
6327 continue;
6328
b1b02be2 6329 if (iv_ca_cand_used_p (ivs, cand))
18081149 6330 continue;
8b11a64c 6331
b1b02be2
ZD
6332 cp = get_use_iv_cost (data, use, cand);
6333 if (!cp)
6334 continue;
6335
6336 iv_ca_set_cp (data, ivs, use, cp);
18081149
XDL
6337 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
6338 true);
b1b02be2
ZD
6339 iv_ca_set_no_cp (data, ivs, use);
6340 act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
8b11a64c 6341
6e8c65f6 6342 if (compare_costs (act_cost, best_cost) < 0)
8b11a64c
ZD
6343 {
6344 best_cost = act_cost;
b1b02be2
ZD
6345
6346 iv_ca_delta_free (&best_delta);
6347 best_delta = act_delta;
8b11a64c 6348 }
b1b02be2
ZD
6349 else
6350 iv_ca_delta_free (&act_delta);
8b11a64c
ZD
6351 }
6352
6e8c65f6 6353 if (infinite_cost_p (best_cost))
38b0dcb8
ZD
6354 {
6355 for (i = 0; i < use->n_map_members; i++)
6356 {
6357 cp = use->cost_map + i;
b1b02be2
ZD
6358 cand = cp->cand;
6359 if (!cand)
38b0dcb8
ZD
6360 continue;
6361
6362 /* Already tried this. */
16ad8025
SL
6363 if (cand->important)
6364 {
6365 if (originalp && cand->pos == IP_ORIGINAL)
6366 continue;
6367 if (!originalp && cand->iv->base_object == NULL_TREE)
6368 continue;
6369 }
b8698a0f 6370
b1b02be2 6371 if (iv_ca_cand_used_p (ivs, cand))
38b0dcb8
ZD
6372 continue;
6373
b1b02be2
ZD
6374 act_delta = NULL;
6375 iv_ca_set_cp (data, ivs, use, cp);
18081149 6376 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
b1b02be2
ZD
6377 iv_ca_set_no_cp (data, ivs, use);
6378 act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
6379 cp, act_delta);
38b0dcb8 6380
6e8c65f6 6381 if (compare_costs (act_cost, best_cost) < 0)
38b0dcb8
ZD
6382 {
6383 best_cost = act_cost;
b1b02be2
ZD
6384
6385 if (best_delta)
6386 iv_ca_delta_free (&best_delta);
6387 best_delta = act_delta;
38b0dcb8 6388 }
b1b02be2
ZD
6389 else
6390 iv_ca_delta_free (&act_delta);
38b0dcb8
ZD
6391 }
6392 }
6393
b1b02be2
ZD
6394 iv_ca_delta_commit (data, ivs, best_delta, true);
6395 iv_ca_delta_free (&best_delta);
8b11a64c 6396
6e8c65f6 6397 return !infinite_cost_p (best_cost);
8b11a64c
ZD
6398}
6399
b1b02be2 6400/* Finds an initial assignment of candidates to uses. */
8b11a64c 6401
b1b02be2 6402static struct iv_ca *
16ad8025 6403get_initial_solution (struct ivopts_data *data, bool originalp)
8b11a64c 6404{
b1b02be2 6405 struct iv_ca *ivs = iv_ca_new (data);
8b11a64c
ZD
6406 unsigned i;
6407
6408 for (i = 0; i < n_iv_uses (data); i++)
16ad8025 6409 if (!try_add_cand_for (data, ivs, iv_use (data, i), originalp))
b1b02be2
ZD
6410 {
6411 iv_ca_free (&ivs);
6412 return NULL;
6413 }
8b11a64c 6414
b1b02be2 6415 return ivs;
8b11a64c
ZD
6416}
6417
6326a5f5
BC
6418/* Tries to improve set of induction variables IVS. TRY_REPLACE_P
6419 points to a bool variable, this function tries to break local
6420 optimal fixed-point by replacing candidates in IVS if it's true. */
8b11a64c
ZD
6421
6422static bool
6326a5f5
BC
6423try_improve_iv_set (struct ivopts_data *data,
6424 struct iv_ca *ivs, bool *try_replace_p)
8b11a64c 6425{
6e8c65f6
ZD
6426 unsigned i, n_ivs;
6427 comp_cost acost, best_cost = iv_ca_cost (ivs);
36f5ada1 6428 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
b1b02be2 6429 struct iv_cand *cand;
8b11a64c 6430
36f5ada1 6431 /* Try extending the set of induction variables by one. */
8b11a64c
ZD
6432 for (i = 0; i < n_iv_cands (data); i++)
6433 {
b1b02be2 6434 cand = iv_cand (data, i);
b8698a0f 6435
b1b02be2 6436 if (iv_ca_cand_used_p (ivs, cand))
36f5ada1
ZD
6437 continue;
6438
18081149 6439 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
36f5ada1
ZD
6440 if (!act_delta)
6441 continue;
6442
6443 /* If we successfully added the candidate and the set is small enough,
6444 try optimizing it by removing other candidates. */
6445 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
6446 {
6447 iv_ca_delta_commit (data, ivs, act_delta, true);
6448 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
6449 iv_ca_delta_commit (data, ivs, act_delta, false);
6450 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6451 }
8b11a64c 6452
6e8c65f6 6453 if (compare_costs (acost, best_cost) < 0)
8b11a64c 6454 {
b1b02be2 6455 best_cost = acost;
36f5ada1 6456 iv_ca_delta_free (&best_delta);
b1b02be2 6457 best_delta = act_delta;
8b11a64c 6458 }
8b11a64c 6459 else
b1b02be2 6460 iv_ca_delta_free (&act_delta);
8b11a64c
ZD
6461 }
6462
b1b02be2 6463 if (!best_delta)
36f5ada1
ZD
6464 {
6465 /* Try removing the candidates from the set instead. */
6466 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
6467
6326a5f5
BC
6468 if (!best_delta && *try_replace_p)
6469 {
6470 *try_replace_p = false;
6471 /* So far candidate selecting algorithm tends to choose fewer IVs
6472 so that it can handle cases in which loops have many variables
6473 but the best choice is often to use only one general biv. One
6474 weakness is it can't handle opposite cases, in which different
6475 candidates should be chosen with respect to each use. To solve
6476 the problem, we replace candidates in a manner described by the
6477 comments of iv_ca_replace, thus give general algorithm a chance
6478 to break local optimal fixed-point in these cases. */
6479 best_cost = iv_ca_replace (data, ivs, &best_delta);
6480 }
6481
36f5ada1
ZD
6482 if (!best_delta)
6483 return false;
6484 }
8b11a64c 6485
b1b02be2 6486 iv_ca_delta_commit (data, ivs, best_delta, true);
6e8c65f6 6487 gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
b1b02be2 6488 iv_ca_delta_free (&best_delta);
8b11a64c
ZD
6489 return true;
6490}
6491
6492/* Attempts to find the optimal set of induction variables. We do simple
6493 greedy heuristic -- we try to replace at most one candidate in the selected
6494 solution and remove the unused ivs while this improves the cost. */
6495
b1b02be2 6496static struct iv_ca *
16ad8025 6497find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
8b11a64c 6498{
b1b02be2 6499 struct iv_ca *set;
6326a5f5 6500 bool try_replace_p = true;
8b11a64c 6501
b1b02be2 6502 /* Get the initial solution. */
16ad8025 6503 set = get_initial_solution (data, originalp);
b1b02be2 6504 if (!set)
8b11a64c
ZD
6505 {
6506 if (dump_file && (dump_flags & TDF_DETAILS))
6507 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
8b11a64c
ZD
6508 return NULL;
6509 }
6510
6511 if (dump_file && (dump_flags & TDF_DETAILS))
6512 {
b1b02be2
ZD
6513 fprintf (dump_file, "Initial set of candidates:\n");
6514 iv_ca_dump (data, dump_file, set);
8b11a64c
ZD
6515 }
6516
6326a5f5 6517 while (try_improve_iv_set (data, set, &try_replace_p))
8b11a64c
ZD
6518 {
6519 if (dump_file && (dump_flags & TDF_DETAILS))
6520 {
b1b02be2
ZD
6521 fprintf (dump_file, "Improved to:\n");
6522 iv_ca_dump (data, dump_file, set);
8b11a64c
ZD
6523 }
6524 }
6525
16ad8025
SL
6526 return set;
6527}
6528
6529static struct iv_ca *
6530find_optimal_iv_set (struct ivopts_data *data)
6531{
6532 unsigned i;
6533 struct iv_ca *set, *origset;
6534 struct iv_use *use;
6535 comp_cost cost, origcost;
6536
6537 /* Determine the cost based on a strategy that starts with original IVs,
6538 and try again using a strategy that prefers candidates not based
6539 on any IVs. */
6540 origset = find_optimal_iv_set_1 (data, true);
6541 set = find_optimal_iv_set_1 (data, false);
6542
6543 if (!origset && !set)
6544 return NULL;
6545
6546 origcost = origset ? iv_ca_cost (origset) : infinite_cost;
6547 cost = set ? iv_ca_cost (set) : infinite_cost;
6548
8b11a64c 6549 if (dump_file && (dump_flags & TDF_DETAILS))
6e8c65f6 6550 {
16ad8025
SL
6551 fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
6552 origcost.cost, origcost.complexity);
6553 fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
6554 cost.cost, cost.complexity);
6555 }
6556
6557 /* Choose the one with the best cost. */
6558 if (compare_costs (origcost, cost) <= 0)
6559 {
6560 if (set)
6561 iv_ca_free (&set);
6562 set = origset;
6e8c65f6 6563 }
16ad8025
SL
6564 else if (origset)
6565 iv_ca_free (&origset);
8b11a64c
ZD
6566
6567 for (i = 0; i < n_iv_uses (data); i++)
6568 {
6569 use = iv_use (data, i);
b1b02be2 6570 use->selected = iv_ca_cand_for_use (set, use)->cand;
8b11a64c
ZD
6571 }
6572
8b11a64c
ZD
6573 return set;
6574}
6575
6576/* Creates a new induction variable corresponding to CAND. */
6577
6578static void
6579create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
6580{
726a989a 6581 gimple_stmt_iterator incr_pos;
8b11a64c
ZD
6582 tree base;
6583 bool after = false;
6584
6585 if (!cand->iv)
6586 return;
6587
6588 switch (cand->pos)
6589 {
6590 case IP_NORMAL:
726a989a 6591 incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
8b11a64c
ZD
6592 break;
6593
6594 case IP_END:
726a989a 6595 incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
8b11a64c
ZD
6596 after = true;
6597 break;
6598
2c08497a
BS
6599 case IP_AFTER_USE:
6600 after = true;
6601 /* fall through */
6602 case IP_BEFORE_USE:
6603 incr_pos = gsi_for_stmt (cand->incremented_at);
6604 break;
6605
8b11a64c
ZD
6606 case IP_ORIGINAL:
6607 /* Mark that the iv is preserved. */
6608 name_info (data, cand->var_before)->preserve_biv = true;
6609 name_info (data, cand->var_after)->preserve_biv = true;
6610
6611 /* Rewrite the increment so that it uses var_before directly. */
6612 find_interesting_uses_op (data, cand->var_after)->selected = cand;
8b11a64c
ZD
6613 return;
6614 }
b8698a0f 6615
8b11a64c 6616 gimple_add_tmp_var (cand->var_before);
8b11a64c
ZD
6617
6618 base = unshare_expr (cand->iv->base);
6619
9be872b7
ZD
6620 create_iv (base, unshare_expr (cand->iv->step),
6621 cand->var_before, data->current_loop,
8b11a64c
ZD
6622 &incr_pos, after, &cand->var_before, &cand->var_after);
6623}
6624
6625/* Creates new induction variables described in SET. */
6626
6627static void
b1b02be2 6628create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
8b11a64c
ZD
6629{
6630 unsigned i;
6631 struct iv_cand *cand;
87c476a2 6632 bitmap_iterator bi;
8b11a64c 6633
b1b02be2 6634 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
8b11a64c
ZD
6635 {
6636 cand = iv_cand (data, i);
6637 create_new_iv (data, cand);
87c476a2 6638 }
8b11a64c 6639
18081149
XDL
6640 if (dump_file && (dump_flags & TDF_DETAILS))
6641 {
28002f1a
RB
6642 fprintf (dump_file, "Selected IV set for loop %d",
6643 data->current_loop->num);
6644 if (data->loop_loc != UNKNOWN_LOCATION)
6645 fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
6646 LOCATION_LINE (data->loop_loc));
6647 fprintf (dump_file, ", %lu IVs:\n", bitmap_count_bits (set->cands));
18081149
XDL
6648 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6649 {
6650 cand = iv_cand (data, i);
6651 dump_cand (dump_file, cand);
6652 }
6653 fprintf (dump_file, "\n");
6654 }
6655}
8b11a64c
ZD
6656
6657/* Rewrites USE (definition of iv used in a nonlinear expression)
6658 using candidate CAND. */
6659
6660static void
6661rewrite_use_nonlinear_expr (struct ivopts_data *data,
6662 struct iv_use *use, struct iv_cand *cand)
6663{
3520b745 6664 tree comp;
726a989a 6665 tree op, tgt;
538dd0b7 6666 gassign *ass;
726a989a 6667 gimple_stmt_iterator bsi;
3520b745
ZD
6668
6669 /* An important special case -- if we are asked to express value of
6670 the original iv by itself, just exit; there is no need to
6671 introduce a new computation (that might also need casting the
6672 variable to unsigned and back). */
6673 if (cand->pos == IP_ORIGINAL
7b9d4f70 6674 && cand->incremented_at == use->stmt)
3520b745 6675 {
d06a01bf 6676 enum tree_code stmt_code;
7b9d4f70 6677
726a989a
RB
6678 gcc_assert (is_gimple_assign (use->stmt));
6679 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
7b9d4f70 6680
7b9d4f70
ZD
6681 /* Check whether we may leave the computation unchanged.
6682 This is the case only if it does not rely on other
6683 computations in the loop -- otherwise, the computation
6684 we rely upon may be removed in remove_unused_ivs,
6685 thus leading to ICE. */
d06a01bf
ZD
6686 stmt_code = gimple_assign_rhs_code (use->stmt);
6687 if (stmt_code == PLUS_EXPR
6688 || stmt_code == MINUS_EXPR
6689 || stmt_code == POINTER_PLUS_EXPR)
7b9d4f70 6690 {
726a989a
RB
6691 if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
6692 op = gimple_assign_rhs2 (use->stmt);
d06a01bf 6693 else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
726a989a 6694 op = gimple_assign_rhs1 (use->stmt);
7b9d4f70
ZD
6695 else
6696 op = NULL_TREE;
6697 }
6698 else
6699 op = NULL_TREE;
3520b745 6700
d06a01bf 6701 if (op && expr_invariant_in_loop_p (data->current_loop, op))
3520b745 6702 return;
73f30c63 6703 }
3520b745 6704
d06a01bf
ZD
6705 comp = get_computation (data->current_loop, use, cand);
6706 gcc_assert (comp != NULL_TREE);
6707
726a989a 6708 switch (gimple_code (use->stmt))
8b11a64c 6709 {
726a989a 6710 case GIMPLE_PHI:
8b11a64c
ZD
6711 tgt = PHI_RESULT (use->stmt);
6712
6713 /* If we should keep the biv, do not replace it. */
6714 if (name_info (data, tgt)->preserve_biv)
6715 return;
6716
726a989a 6717 bsi = gsi_after_labels (gimple_bb (use->stmt));
1e128c5f
GB
6718 break;
6719
726a989a
RB
6720 case GIMPLE_ASSIGN:
6721 tgt = gimple_assign_lhs (use->stmt);
6722 bsi = gsi_for_stmt (use->stmt);
1e128c5f
GB
6723 break;
6724
6725 default:
6726 gcc_unreachable ();
8b11a64c 6727 }
8b11a64c 6728
17fc049f
RG
6729 if (!valid_gimple_rhs_p (comp)
6730 || (gimple_code (use->stmt) != GIMPLE_PHI
6731 /* We can't allow re-allocating the stmt as it might be pointed
6732 to still. */
6733 && (get_gimple_rhs_num_ops (TREE_CODE (comp))
6734 >= gimple_num_ops (gsi_stmt (bsi)))))
bdf0f819
RG
6735 {
6736 comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE,
6737 true, GSI_SAME_STMT);
6738 if (POINTER_TYPE_P (TREE_TYPE (tgt)))
b5c878a5
RG
6739 {
6740 duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
6741 /* As this isn't a plain copy we have to reset alignment
6742 information. */
6743 if (SSA_NAME_PTR_INFO (comp))
644ffefd 6744 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
b5c878a5 6745 }
bdf0f819 6746 }
8b11a64c 6747
726a989a 6748 if (gimple_code (use->stmt) == GIMPLE_PHI)
8b11a64c 6749 {
17fc049f 6750 ass = gimple_build_assign (tgt, comp);
726a989a 6751 gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
ae0a4449
AO
6752
6753 bsi = gsi_for_stmt (use->stmt);
6754 remove_phi_node (&bsi, false);
8b11a64c
ZD
6755 }
6756 else
726a989a 6757 {
17fc049f 6758 gimple_assign_set_rhs_from_tree (&bsi, comp);
726a989a
RB
6759 use->stmt = gsi_stmt (bsi);
6760 }
8b11a64c
ZD
6761}
6762
55791fcd
XDL
6763/* Performs a peephole optimization to reorder the iv update statement with
6764 a mem ref to enable instruction combining in later phases. The mem ref uses
6765 the iv value before the update, so the reordering transformation requires
6766 adjustment of the offset. CAND is the selected IV_CAND.
6767
6768 Example:
6769
6770 t = MEM_REF (base, iv1, 8, 16); // base, index, stride, offset
6771 iv2 = iv1 + 1;
6772
6773 if (t < val) (1)
6774 goto L;
6775 goto Head;
6776
6777
6778 directly propagating t over to (1) will introduce overlapping live range
6779 thus increase register pressure. This peephole transform it into:
6780
6781
6782 iv2 = iv1 + 1;
6783 t = MEM_REF (base, iv2, 8, 8);
6784 if (t < val)
6785 goto L;
6786 goto Head;
6787*/
6788
6789static void
6790adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
6791{
6792 tree var_after;
6793 gimple iv_update, stmt;
6794 basic_block bb;
6795 gimple_stmt_iterator gsi, gsi_iv;
6796
6797 if (cand->pos != IP_NORMAL)
6798 return;
6799
6800 var_after = cand->var_after;
6801 iv_update = SSA_NAME_DEF_STMT (var_after);
6802
6803 bb = gimple_bb (iv_update);
6804 gsi = gsi_last_nondebug_bb (bb);
6805 stmt = gsi_stmt (gsi);
6806
6807 /* Only handle conditional statement for now. */
6808 if (gimple_code (stmt) != GIMPLE_COND)
6809 return;
6810
6811 gsi_prev_nondebug (&gsi);
6812 stmt = gsi_stmt (gsi);
6813 if (stmt != iv_update)
6814 return;
6815
6816 gsi_prev_nondebug (&gsi);
6817 if (gsi_end_p (gsi))
6818 return;
6819
6820 stmt = gsi_stmt (gsi);
6821 if (gimple_code (stmt) != GIMPLE_ASSIGN)
6822 return;
6823
6824 if (stmt != use->stmt)
6825 return;
6826
6827 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
6828 return;
6829
6830 if (dump_file && (dump_flags & TDF_DETAILS))
6831 {
6832 fprintf (dump_file, "Reordering \n");
6833 print_gimple_stmt (dump_file, iv_update, 0, 0);
6834 print_gimple_stmt (dump_file, use->stmt, 0, 0);
6835 fprintf (dump_file, "\n");
6836 }
6837
6838 gsi = gsi_for_stmt (use->stmt);
6839 gsi_iv = gsi_for_stmt (iv_update);
6840 gsi_move_before (&gsi_iv, &gsi);
6841
6842 cand->pos = IP_BEFORE_USE;
6843 cand->incremented_at = use->stmt;
6844}
6845
8b11a64c
ZD
6846/* Rewrites USE (address that is an iv) using candidate CAND. */
6847
6848static void
a7e43c57
BC
6849rewrite_use_address_1 (struct ivopts_data *data,
6850 struct iv_use *use, struct iv_cand *cand)
8b11a64c 6851{
73f30c63 6852 aff_tree aff;
726a989a 6853 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
d7c0c068 6854 tree base_hint = NULL_TREE;
880a1451 6855 tree ref, iv;
73f30c63 6856 bool ok;
8b11a64c 6857
55791fcd 6858 adjust_iv_update_pos (cand, use);
73f30c63
ZD
6859 ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
6860 gcc_assert (ok);
ac182688 6861 unshare_aff_combination (&aff);
8b11a64c 6862
d7c0c068
UW
6863 /* To avoid undefined overflow problems, all IV candidates use unsigned
6864 integer types. The drawback is that this makes it impossible for
6865 create_mem_ref to distinguish an IV that is based on a memory object
6866 from one that represents simply an offset.
6867
6868 To work around this problem, we pass a hint to create_mem_ref that
6869 indicates which variable (if any) in aff is an IV based on a memory
6870 object. Note that we only consider the candidate. If this is not
6871 based on an object, the base of the reference is in some subexpression
6872 of the use -- but these will use pointer types, so they are recognized
6873 by the create_mem_ref heuristics anyway. */
6874 if (cand->iv->base_object)
6875 base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
6876
880a1451
XDL
6877 iv = var_at_stmt (data->current_loop, cand, use->stmt);
6878 ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff,
a41e5e86 6879 reference_alias_ptr_type (*use->op_p),
880a1451 6880 iv, base_hint, data->speed);
ac182688
ZD
6881 copy_ref_info (ref, *use->op_p);
6882 *use->op_p = ref;
8b11a64c
ZD
6883}
6884
a7e43c57
BC
6885/* Rewrites USE (address that is an iv) using candidate CAND. If it's the
6886 first use of a group, rewrites sub uses in the group too. */
6887
6888static void
6889rewrite_use_address (struct ivopts_data *data,
6890 struct iv_use *use, struct iv_cand *cand)
6891{
6892 struct iv_use *next;
6893
6894 gcc_assert (use->sub_id == 0);
6895 rewrite_use_address_1 (data, use, cand);
6896 update_stmt (use->stmt);
6897
6898 for (next = use->next; next != NULL; next = next->next)
6899 {
6900 rewrite_use_address_1 (data, next, cand);
6901 update_stmt (next->stmt);
6902 }
6903
6904 return;
6905}
6906
8b11a64c
ZD
6907/* Rewrites USE (the condition such that one of the arguments is an iv) using
6908 candidate CAND. */
6909
6910static void
6911rewrite_use_compare (struct ivopts_data *data,
6912 struct iv_use *use, struct iv_cand *cand)
6913{
b697aed4 6914 tree comp, *var_p, op, bound;
726a989a 6915 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
8b11a64c 6916 enum tree_code compare;
f5f12961 6917 struct cost_pair *cp = get_use_iv_cost (data, use, cand);
b697aed4
ZD
6918 bool ok;
6919
f5f12961
ZD
6920 bound = cp->value;
6921 if (bound)
8b11a64c 6922 {
9e7376e5
ZD
6923 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
6924 tree var_type = TREE_TYPE (var);
dc5b3407 6925 gimple_seq stmts;
9e7376e5 6926
18081149
XDL
6927 if (dump_file && (dump_flags & TDF_DETAILS))
6928 {
6929 fprintf (dump_file, "Replacing exit test: ");
6930 print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
6931 }
d8af4ba3 6932 compare = cp->comp;
b697aed4 6933 bound = unshare_expr (fold_convert (var_type, bound));
dc5b3407
ZD
6934 op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
6935 if (stmts)
6936 gsi_insert_seq_on_edge_immediate (
6937 loop_preheader_edge (data->current_loop),
6938 stmts);
8b11a64c 6939
538dd0b7
DM
6940 gcond *cond_stmt = as_a <gcond *> (use->stmt);
6941 gimple_cond_set_lhs (cond_stmt, var);
6942 gimple_cond_set_code (cond_stmt, compare);
6943 gimple_cond_set_rhs (cond_stmt, op);
8b11a64c
ZD
6944 return;
6945 }
6946
6947 /* The induction variable elimination failed; just express the original
6948 giv. */
ac182688 6949 comp = get_computation (data->current_loop, use, cand);
73f30c63 6950 gcc_assert (comp != NULL_TREE);
8b11a64c 6951
726a989a 6952 ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
b697aed4 6953 gcc_assert (ok);
8b11a64c 6954
726a989a
RB
6955 *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
6956 true, GSI_SAME_STMT);
8b11a64c
ZD
6957}
6958
8b11a64c
ZD
6959/* Rewrites USE using candidate CAND. */
6960
6961static void
cfaab3a9 6962rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
8b11a64c
ZD
6963{
6964 switch (use->type)
6965 {
6966 case USE_NONLINEAR_EXPR:
6967 rewrite_use_nonlinear_expr (data, use, cand);
6968 break;
6969
8b11a64c
ZD
6970 case USE_ADDRESS:
6971 rewrite_use_address (data, use, cand);
6972 break;
6973
6974 case USE_COMPARE:
6975 rewrite_use_compare (data, use, cand);
6976 break;
6977
6978 default:
1e128c5f 6979 gcc_unreachable ();
8b11a64c 6980 }
b8698a0f 6981
cff4e50d 6982 update_stmt (use->stmt);
8b11a64c
ZD
6983}
6984
6985/* Rewrite the uses using the selected induction variables. */
6986
6987static void
6988rewrite_uses (struct ivopts_data *data)
6989{
6990 unsigned i;
6991 struct iv_cand *cand;
6992 struct iv_use *use;
6993
6994 for (i = 0; i < n_iv_uses (data); i++)
6995 {
6996 use = iv_use (data, i);
6997 cand = use->selected;
1e128c5f 6998 gcc_assert (cand);
8b11a64c
ZD
6999
7000 rewrite_use (data, use, cand);
7001 }
7002}
7003
7004/* Removes the ivs that are not used after rewriting. */
7005
7006static void
7007remove_unused_ivs (struct ivopts_data *data)
7008{
7009 unsigned j;
87c476a2 7010 bitmap_iterator bi;
ae0a4449 7011 bitmap toremove = BITMAP_ALLOC (NULL);
8b11a64c 7012
ae0a4449
AO
7013 /* Figure out an order in which to release SSA DEFs so that we don't
7014 release something that we'd have to propagate into a debug stmt
7015 afterwards. */
87c476a2 7016 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
8b11a64c
ZD
7017 {
7018 struct version_info *info;
7019
7020 info = ver_info (data, j);
7021 if (info->iv
6e42ce54 7022 && !integer_zerop (info->iv->step)
8b11a64c
ZD
7023 && !info->inv_id
7024 && !info->iv->have_use_for
7025 && !info->preserve_biv)
e1066560
AO
7026 {
7027 bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
7028
7029 tree def = info->iv->ssa_name;
7030
7031 if (MAY_HAVE_DEBUG_STMTS && SSA_NAME_DEF_STMT (def))
7032 {
7033 imm_use_iterator imm_iter;
7034 use_operand_p use_p;
7035 gimple stmt;
7036 int count = 0;
7037
7038 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7039 {
7040 if (!gimple_debug_bind_p (stmt))
7041 continue;
7042
7043 /* We just want to determine whether to do nothing
7044 (count == 0), to substitute the computed
7045 expression into a single use of the SSA DEF by
7046 itself (count == 1), or to use a debug temp
7047 because the SSA DEF is used multiple times or as
7048 part of a larger expression (count > 1). */
7049 count++;
7050 if (gimple_debug_bind_get_value (stmt) != def)
7051 count++;
7052
7053 if (count > 1)
7054 BREAK_FROM_IMM_USE_STMT (imm_iter);
7055 }
7056
7057 if (!count)
7058 continue;
7059
7060 struct iv_use dummy_use;
7061 struct iv_cand *best_cand = NULL, *cand;
7062 unsigned i, best_pref = 0, cand_pref;
7063
7064 memset (&dummy_use, 0, sizeof (dummy_use));
7065 dummy_use.iv = info->iv;
7066 for (i = 0; i < n_iv_uses (data) && i < 64; i++)
7067 {
7068 cand = iv_use (data, i)->selected;
7069 if (cand == best_cand)
7070 continue;
7071 cand_pref = operand_equal_p (cand->iv->step,
7072 info->iv->step, 0)
7073 ? 4 : 0;
7074 cand_pref
7075 += TYPE_MODE (TREE_TYPE (cand->iv->base))
7076 == TYPE_MODE (TREE_TYPE (info->iv->base))
7077 ? 2 : 0;
7078 cand_pref
7079 += TREE_CODE (cand->iv->base) == INTEGER_CST
7080 ? 1 : 0;
7081 if (best_cand == NULL || best_pref < cand_pref)
7082 {
7083 best_cand = cand;
7084 best_pref = cand_pref;
7085 }
7086 }
7087
7088 if (!best_cand)
7089 continue;
7090
7091 tree comp = get_computation_at (data->current_loop,
7092 &dummy_use, best_cand,
7093 SSA_NAME_DEF_STMT (def));
7094 if (!comp)
7095 continue;
7096
7097 if (count > 1)
7098 {
7099 tree vexpr = make_node (DEBUG_EXPR_DECL);
7100 DECL_ARTIFICIAL (vexpr) = 1;
7101 TREE_TYPE (vexpr) = TREE_TYPE (comp);
7102 if (SSA_NAME_VAR (def))
7103 DECL_MODE (vexpr) = DECL_MODE (SSA_NAME_VAR (def));
7104 else
7105 DECL_MODE (vexpr) = TYPE_MODE (TREE_TYPE (vexpr));
538dd0b7
DM
7106 gdebug *def_temp
7107 = gimple_build_debug_bind (vexpr, comp, NULL);
e1066560
AO
7108 gimple_stmt_iterator gsi;
7109
7110 if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
7111 gsi = gsi_after_labels (gimple_bb
7112 (SSA_NAME_DEF_STMT (def)));
7113 else
7114 gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
7115
7116 gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
7117 comp = vexpr;
7118 }
7119
7120 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7121 {
7122 if (!gimple_debug_bind_p (stmt))
7123 continue;
7124
7125 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
7126 SET_USE (use_p, comp);
7127
7128 update_stmt (stmt);
7129 }
7130 }
7131 }
87c476a2 7132 }
ae0a4449
AO
7133
7134 release_defs_bitset (toremove);
7135
7136 BITMAP_FREE (toremove);
8b11a64c
ZD
7137}
7138
e2102efc 7139/* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
b787e7a2 7140 for hash_map::traverse. */
e2102efc 7141
b787e7a2
TS
7142bool
7143free_tree_niter_desc (edge const &, tree_niter_desc *const &value, void *)
e2102efc 7144{
b787e7a2 7145 free (value);
e2102efc
XDL
7146 return true;
7147}
7148
8b11a64c
ZD
7149/* Frees data allocated by the optimization of a single loop. */
7150
7151static void
7152free_loop_data (struct ivopts_data *data)
7153{
7154 unsigned i, j;
87c476a2 7155 bitmap_iterator bi;
69ebd99d 7156 tree obj;
8b11a64c 7157
15814ba0
PB
7158 if (data->niters)
7159 {
b787e7a2
TS
7160 data->niters->traverse<void *, free_tree_niter_desc> (NULL);
7161 delete data->niters;
15814ba0
PB
7162 data->niters = NULL;
7163 }
ca4c3169 7164
87c476a2 7165 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
8b11a64c
ZD
7166 {
7167 struct version_info *info;
7168
7169 info = ver_info (data, i);
8b11a64c
ZD
7170 info->iv = NULL;
7171 info->has_nonlin_use = false;
7172 info->preserve_biv = false;
7173 info->inv_id = 0;
87c476a2 7174 }
8b11a64c 7175 bitmap_clear (data->relevant);
b1b02be2 7176 bitmap_clear (data->important_candidates);
8b11a64c
ZD
7177
7178 for (i = 0; i < n_iv_uses (data); i++)
7179 {
7180 struct iv_use *use = iv_use (data, i);
a7e43c57
BC
7181 struct iv_use *pre = use, *sub = use->next;
7182
7183 while (sub)
7184 {
7185 gcc_assert (sub->related_cands == NULL);
7186 gcc_assert (sub->n_map_members == 0 && sub->cost_map == NULL);
7187
a7e43c57
BC
7188 pre = sub;
7189 sub = sub->next;
7190 free (pre);
7191 }
8b11a64c 7192
8bdbfff5 7193 BITMAP_FREE (use->related_cands);
8b11a64c
ZD
7194 for (j = 0; j < use->n_map_members; j++)
7195 if (use->cost_map[j].depends_on)
8bdbfff5 7196 BITMAP_FREE (use->cost_map[j].depends_on);
8b11a64c
ZD
7197 free (use->cost_map);
7198 free (use);
7199 }
9771b263 7200 data->iv_uses.truncate (0);
8b11a64c
ZD
7201
7202 for (i = 0; i < n_iv_cands (data); i++)
7203 {
7204 struct iv_cand *cand = iv_cand (data, i);
7205
9be872b7
ZD
7206 if (cand->depends_on)
7207 BITMAP_FREE (cand->depends_on);
8b11a64c
ZD
7208 free (cand);
7209 }
9771b263 7210 data->iv_candidates.truncate (0);
8b11a64c
ZD
7211
7212 if (data->version_info_size < num_ssa_names)
7213 {
7214 data->version_info_size = 2 * num_ssa_names;
7215 free (data->version_info);
5ed6ace5 7216 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
8b11a64c
ZD
7217 }
7218
7219 data->max_inv_id = 0;
7220
9771b263 7221 FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
69ebd99d 7222 SET_DECL_RTL (obj, NULL_RTX);
8b11a64c 7223
9771b263 7224 decl_rtl_to_reset.truncate (0);
18081149 7225
c203e8a7 7226 data->inv_expr_tab->empty ();
18081149 7227 data->inv_expr_id = 0;
8b11a64c
ZD
7228}
7229
7230/* Finalizes data structures used by the iv optimization pass. LOOPS is the
7231 loop tree. */
7232
7233static void
9a2ef6b8 7234tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
8b11a64c 7235{
8b11a64c
ZD
7236 free_loop_data (data);
7237 free (data->version_info);
8bdbfff5
NS
7238 BITMAP_FREE (data->relevant);
7239 BITMAP_FREE (data->important_candidates);
8b11a64c 7240
9771b263
DN
7241 decl_rtl_to_reset.release ();
7242 data->iv_uses.release ();
7243 data->iv_candidates.release ();
c203e8a7
TS
7244 delete data->inv_expr_tab;
7245 data->inv_expr_tab = NULL;
3230c614 7246 free_affine_expand_cache (&data->name_expansion_cache);
6f929985 7247 obstack_free (&data->iv_obstack, NULL);
8b11a64c
ZD
7248}
7249
bec922f0
SL
7250/* Returns true if the loop body BODY includes any function calls. */
7251
7252static bool
7253loop_body_includes_call (basic_block *body, unsigned num_nodes)
7254{
7255 gimple_stmt_iterator gsi;
7256 unsigned i;
7257
7258 for (i = 0; i < num_nodes; i++)
7259 for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
7260 {
7261 gimple stmt = gsi_stmt (gsi);
7262 if (is_gimple_call (stmt)
7263 && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
7264 return true;
7265 }
7266 return false;
7267}
7268
8b11a64c
ZD
7269/* Optimizes the LOOP. Returns true if anything changed. */
7270
7271static bool
7272tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
7273{
7274 bool changed = false;
b1b02be2 7275 struct iv_ca *iv_ca;
d8af4ba3 7276 edge exit = single_dom_exit (loop);
2c08497a 7277 basic_block *body;
8b11a64c 7278
15814ba0 7279 gcc_assert (!data->niters);
8b11a64c 7280 data->current_loop = loop;
28002f1a 7281 data->loop_loc = find_loop_location (loop);
f40751dd 7282 data->speed = optimize_loop_for_speed_p (loop);
8b11a64c
ZD
7283
7284 if (dump_file && (dump_flags & TDF_DETAILS))
7285 {
28002f1a
RB
7286 fprintf (dump_file, "Processing loop %d", loop->num);
7287 if (data->loop_loc != UNKNOWN_LOCATION)
7288 fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
7289 LOCATION_LINE (data->loop_loc));
7290 fprintf (dump_file, "\n");
b8698a0f 7291
8b11a64c
ZD
7292 if (exit)
7293 {
7294 fprintf (dump_file, " single exit %d -> %d, exit condition ",
7295 exit->src->index, exit->dest->index);
726a989a 7296 print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
8b11a64c
ZD
7297 fprintf (dump_file, "\n");
7298 }
7299
7300 fprintf (dump_file, "\n");
7301 }
7302
2c08497a 7303 body = get_loop_body (loop);
bec922f0 7304 data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
2c08497a
BS
7305 renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
7306 free (body);
7307
d8af4ba3
ZD
7308 data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
7309
8b11a64c
ZD
7310 /* For each ssa name determines whether it behaves as an induction variable
7311 in some loop. */
7312 if (!find_induction_variables (data))
7313 goto finish;
7314
7315 /* Finds interesting uses (item 1). */
7316 find_interesting_uses (data);
a7e43c57 7317 group_address_uses (data);
8b11a64c
ZD
7318 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
7319 goto finish;
7320
7321 /* Finds candidates for the induction variables (item 2). */
7322 find_iv_candidates (data);
7323
7324 /* Calculates the costs (item 3, part 1). */
8b11a64c 7325 determine_iv_costs (data);
2c08497a 7326 determine_use_iv_costs (data);
8b11a64c
ZD
7327 determine_set_costs (data);
7328
7329 /* Find the optimal set of induction variables (item 3, part 2). */
b1b02be2
ZD
7330 iv_ca = find_optimal_iv_set (data);
7331 if (!iv_ca)
8b11a64c
ZD
7332 goto finish;
7333 changed = true;
7334
7335 /* Create the new induction variables (item 4, part 1). */
b1b02be2
ZD
7336 create_new_ivs (data, iv_ca);
7337 iv_ca_free (&iv_ca);
b8698a0f 7338
8b11a64c
ZD
7339 /* Rewrite the uses (item 4, part 2). */
7340 rewrite_uses (data);
7341
7342 /* Remove the ivs that are unused after rewriting. */
7343 remove_unused_ivs (data);
7344
8b11a64c
ZD
7345 /* We have changed the structure of induction variables; it might happen
7346 that definitions in the scev database refer to some of them that were
7347 eliminated. */
7348 scev_reset ();
7349
7350finish:
7351 free_loop_data (data);
7352
7353 return changed;
7354}
7355
d73be268 7356/* Main entry point. Optimizes induction variables in loops. */
8b11a64c
ZD
7357
7358void
d73be268 7359tree_ssa_iv_optimize (void)
8b11a64c
ZD
7360{
7361 struct loop *loop;
7362 struct ivopts_data data;
7363
9a2ef6b8 7364 tree_ssa_iv_optimize_init (&data);
8b11a64c
ZD
7365
7366 /* Optimize the loops starting with the innermost ones. */
f0bd40b1 7367 FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
8b11a64c 7368 {
8679c649
JH
7369 if (dump_file && (dump_flags & TDF_DETAILS))
7370 flow_loop_dump (loop, dump_file, NULL, 1);
e9472263
ZD
7371
7372 tree_ssa_iv_optimize_loop (&data, loop);
8b11a64c
ZD
7373 }
7374
9a2ef6b8 7375 tree_ssa_iv_optimize_finalize (&data);
8b11a64c 7376}