]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-ssa-loop-ivopts.c
[62/77] Big machine_mode to scalar_int_mode replacement
[thirdparty/gcc.git] / gcc / tree-ssa-loop-ivopts.c
CommitLineData
8b11a64c 1/* Induction variable optimizations.
cbe34bb5 2 Copyright (C) 2003-2017 Free Software Foundation, Inc.
b8698a0f 3
8b11a64c 4This file is part of GCC.
b8698a0f 5
8b11a64c
ZD
6GCC is free software; you can redistribute it and/or modify it
7under the terms of the GNU General Public License as published by the
9dcd6f09 8Free Software Foundation; either version 3, or (at your option) any
8b11a64c 9later version.
b8698a0f 10
8b11a64c
ZD
11GCC is distributed in the hope that it will be useful, but WITHOUT
12ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14for more details.
b8698a0f 15
8b11a64c 16You should have received a copy of the GNU General Public License
9dcd6f09
NC
17along with GCC; see the file COPYING3. If not see
18<http://www.gnu.org/licenses/>. */
8b11a64c
ZD
19
20/* This pass tries to find the optimal set of induction variables for the loop.
21 It optimizes just the basic linear induction variables (although adding
22 support for other types should not be too hard). It includes the
23 optimizations commonly known as strength reduction, induction variable
24 coalescing and induction variable elimination. It does it in the
25 following steps:
26
27 1) The interesting uses of induction variables are found. This includes
28
29 -- uses of induction variables in non-linear expressions
30 -- addresses of arrays
31 -- comparisons of induction variables
32
309a0cf6
BC
33 Note the interesting uses are categorized and handled in group.
34 Generally, address type uses are grouped together if their iv bases
35 are different in constant offset.
36
8b11a64c
ZD
37 2) Candidates for the induction variables are found. This includes
38
39 -- old induction variables
40 -- the variables defined by expressions derived from the "interesting
309a0cf6 41 groups/uses" above
8b11a64c
ZD
42
43 3) The optimal (w.r. to a cost function) set of variables is chosen. The
44 cost function assigns a cost to sets of induction variables and consists
45 of three parts:
46
309a0cf6
BC
47 -- The group/use costs. Each of the interesting groups/uses chooses
48 the best induction variable in the set and adds its cost to the sum.
49 The cost reflects the time spent on modifying the induction variables
50 value to be usable for the given purpose (adding base and offset for
51 arrays, etc.).
8b11a64c
ZD
52 -- The variable costs. Each of the variables has a cost assigned that
53 reflects the costs associated with incrementing the value of the
54 variable. The original variables are somewhat preferred.
55 -- The set cost. Depending on the size of the set, extra cost may be
56 added to reflect register pressure.
57
58 All the costs are defined in a machine-specific way, using the target
59 hooks and machine descriptions to determine them.
60
61 4) The trees are transformed to use the new variables, the dead code is
62 removed.
b8698a0f 63
8b11a64c
ZD
64 All of this is done loop by loop. Doing it globally is theoretically
65 possible, it might give a better performance and it might enable us
66 to decide costs more precisely, but getting all the interactions right
67 would be complicated. */
68
69#include "config.h"
70#include "system.h"
71#include "coretypes.h"
c7131fb2 72#include "backend.h"
957060b5 73#include "rtl.h"
8b11a64c 74#include "tree.h"
c7131fb2 75#include "gimple.h"
957060b5
AM
76#include "cfghooks.h"
77#include "tree-pass.h"
4d0cdd0c 78#include "memmodel.h"
957060b5 79#include "tm_p.h"
c7131fb2 80#include "ssa.h"
957060b5
AM
81#include "expmed.h"
82#include "insn-config.h"
83#include "emit-rtl.h"
84#include "recog.h"
85#include "cgraph.h"
86#include "gimple-pretty-print.h"
c7131fb2 87#include "alias.h"
40e23961 88#include "fold-const.h"
d8a2d370 89#include "stor-layout.h"
2fb9a547 90#include "tree-eh.h"
45b0be94 91#include "gimplify.h"
5be5c238 92#include "gimple-iterator.h"
18f429e2 93#include "gimplify-me.h"
442b4905 94#include "tree-cfg.h"
e28030cf
AM
95#include "tree-ssa-loop-ivopts.h"
96#include "tree-ssa-loop-manip.h"
97#include "tree-ssa-loop-niter.h"
442b4905 98#include "tree-ssa-loop.h"
36566b39 99#include "explow.h"
d8a2d370 100#include "expr.h"
442b4905 101#include "tree-dfa.h"
7a300452 102#include "tree-ssa.h"
8b11a64c 103#include "cfgloop.h"
8b11a64c 104#include "tree-scalar-evolution.h"
8b11a64c 105#include "params.h"
73f30c63 106#include "tree-affine.h"
17fc049f 107#include "tree-ssa-propagate.h"
4484a35a 108#include "tree-ssa-address.h"
9b2b7279 109#include "builtins.h"
28002f1a 110#include "tree-vectorizer.h"
7735d6c7 111
2eb79bbb
SB
112/* FIXME: Expressions are expanded to RTL in this pass to determine the
113 cost of different addressing modes. This should be moved to a TBD
114 interface between the GIMPLE and RTL worlds. */
2eb79bbb 115
8b11a64c
ZD
116/* The infinite cost. */
117#define INFTY 10000000
118
18081149
XDL
119/* Returns the expected number of loop iterations for LOOP.
120 The average trip count is computed from profile data if it
121 exists. */
122
123static inline HOST_WIDE_INT
124avg_loop_niter (struct loop *loop)
125{
652c4c71 126 HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
18081149 127 if (niter == -1)
fa519ca6 128 {
c8cf746a 129 niter = likely_max_stmt_executions_int (loop);
4661839e
ML
130
131 if (niter == -1 || niter > PARAM_VALUE (PARAM_AVG_LOOP_NITER))
132 return PARAM_VALUE (PARAM_AVG_LOOP_NITER);
fa519ca6 133 }
18081149
XDL
134
135 return niter;
136}
8b11a64c 137
309a0cf6
BC
138struct iv_use;
139
8b11a64c
ZD
140/* Representation of the induction variable. */
141struct iv
142{
143 tree base; /* Initial value of the iv. */
e6845c23 144 tree base_object; /* A memory object to that the induction variable points. */
8b11a64c
ZD
145 tree step; /* Step of the iv (constant only). */
146 tree ssa_name; /* The ssa name with the value. */
309a0cf6 147 struct iv_use *nonlin_use; /* The identifier in the use if it is the case. */
8b11a64c 148 bool biv_p; /* Is it a biv? */
c70ed622 149 bool no_overflow; /* True if the iv doesn't overflow. */
e4142529
BC
150 bool have_address_use;/* For biv, indicate if it's used in any address
151 type use. */
8b11a64c
ZD
152};
153
154/* Per-ssa version information (induction variable descriptions, etc.). */
155struct version_info
156{
157 tree name; /* The ssa name. */
158 struct iv *iv; /* Induction variable description. */
159 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
160 an expression that is not an induction variable. */
8b11a64c 161 bool preserve_biv; /* For the original biv, whether to preserve it. */
448f65db 162 unsigned inv_id; /* Id of an invariant. */
8b11a64c
ZD
163};
164
8b11a64c
ZD
165/* Types of uses. */
166enum use_type
167{
168 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
8b11a64c
ZD
169 USE_ADDRESS, /* Use in an address. */
170 USE_COMPARE /* Use is a compare. */
171};
172
6e8c65f6 173/* Cost of a computation. */
50686850 174struct comp_cost
6e8c65f6 175{
8d18b6df
ML
176 comp_cost (): cost (0), complexity (0), scratch (0)
177 {}
178
179 comp_cost (int cost, unsigned complexity, int scratch = 0)
180 : cost (cost), complexity (complexity), scratch (scratch)
181 {}
182
183 /* Returns true if COST is infinite. */
184 bool infinite_cost_p ();
185
186 /* Adds costs COST1 and COST2. */
187 friend comp_cost operator+ (comp_cost cost1, comp_cost cost2);
188
189 /* Adds COST to the comp_cost. */
190 comp_cost operator+= (comp_cost cost);
191
192 /* Adds constant C to this comp_cost. */
193 comp_cost operator+= (HOST_WIDE_INT c);
194
195 /* Subtracts constant C to this comp_cost. */
196 comp_cost operator-= (HOST_WIDE_INT c);
197
198 /* Divide the comp_cost by constant C. */
199 comp_cost operator/= (HOST_WIDE_INT c);
200
201 /* Multiply the comp_cost by constant C. */
202 comp_cost operator*= (HOST_WIDE_INT c);
203
204 /* Subtracts costs COST1 and COST2. */
205 friend comp_cost operator- (comp_cost cost1, comp_cost cost2);
206
207 /* Subtracts COST from this comp_cost. */
208 comp_cost operator-= (comp_cost cost);
209
210 /* Returns true if COST1 is smaller than COST2. */
211 friend bool operator< (comp_cost cost1, comp_cost cost2);
212
213 /* Returns true if COST1 and COST2 are equal. */
214 friend bool operator== (comp_cost cost1, comp_cost cost2);
215
216 /* Returns true if COST1 is smaller or equal than COST2. */
217 friend bool operator<= (comp_cost cost1, comp_cost cost2);
218
2c08497a 219 int cost; /* The runtime cost. */
8d18b6df 220 unsigned complexity; /* The estimate of the complexity of the code for
6e8c65f6
ZD
221 the computation (in no concrete units --
222 complexity field should be larger for more
223 complex expressions and addressing modes). */
515558b8 224 int scratch; /* Scratch used during cost computation. */
50686850 225};
6e8c65f6 226
8d18b6df
ML
227static const comp_cost no_cost;
228static const comp_cost infinite_cost (INFTY, INFTY, INFTY);
229
230bool
231comp_cost::infinite_cost_p ()
232{
233 return cost == INFTY;
234}
235
236comp_cost
237operator+ (comp_cost cost1, comp_cost cost2)
238{
239 if (cost1.infinite_cost_p () || cost2.infinite_cost_p ())
240 return infinite_cost;
241
242 cost1.cost += cost2.cost;
243 cost1.complexity += cost2.complexity;
244
245 return cost1;
246}
247
248comp_cost
249operator- (comp_cost cost1, comp_cost cost2)
250{
251 if (cost1.infinite_cost_p ())
252 return infinite_cost;
253
254 gcc_assert (!cost2.infinite_cost_p ());
255
256 cost1.cost -= cost2.cost;
257 cost1.complexity -= cost2.complexity;
258
259 return cost1;
260}
261
262comp_cost
263comp_cost::operator+= (comp_cost cost)
264{
265 *this = *this + cost;
266 return *this;
267}
268
269comp_cost
270comp_cost::operator+= (HOST_WIDE_INT c)
271{
272 if (infinite_cost_p ())
273 return *this;
274
275 this->cost += c;
276
277 return *this;
278}
279
280comp_cost
281comp_cost::operator-= (HOST_WIDE_INT c)
282{
283 if (infinite_cost_p ())
284 return *this;
285
286 this->cost -= c;
287
288 return *this;
289}
290
291comp_cost
292comp_cost::operator/= (HOST_WIDE_INT c)
293{
294 if (infinite_cost_p ())
295 return *this;
296
297 this->cost /= c;
298
299 return *this;
300}
301
302comp_cost
303comp_cost::operator*= (HOST_WIDE_INT c)
304{
305 if (infinite_cost_p ())
306 return *this;
307
308 this->cost *= c;
309
310 return *this;
311}
312
313comp_cost
314comp_cost::operator-= (comp_cost cost)
315{
316 *this = *this - cost;
317 return *this;
318}
319
320bool
321operator< (comp_cost cost1, comp_cost cost2)
322{
323 if (cost1.cost == cost2.cost)
324 return cost1.complexity < cost2.complexity;
325
326 return cost1.cost < cost2.cost;
327}
328
329bool
330operator== (comp_cost cost1, comp_cost cost2)
331{
332 return cost1.cost == cost2.cost
333 && cost1.complexity == cost2.complexity;
334}
335
336bool
337operator<= (comp_cost cost1, comp_cost cost2)
338{
339 return cost1 < cost2 || cost1 == cost2;
340}
6e8c65f6 341
623b8e0a
ML
342struct iv_inv_expr_ent;
343
8b11a64c
ZD
344/* The candidate - cost pair. */
345struct cost_pair
346{
347 struct iv_cand *cand; /* The candidate. */
6e8c65f6 348 comp_cost cost; /* The cost. */
34e82342 349 enum tree_code comp; /* For iv elimination, the comparison. */
4c11bdff
BC
350 bitmap inv_vars; /* The list of invariant ssa_vars that have to be
351 preserved when representing iv_use with iv_cand. */
352 bitmap inv_exprs; /* The list of newly created invariant expressions
353 when representing iv_use with iv_cand. */
f5f12961
ZD
354 tree value; /* For final value elimination, the expression for
355 the final value of the iv. For iv elimination,
356 the new bound to compare with. */
8b11a64c
ZD
357};
358
359/* Use. */
360struct iv_use
361{
362 unsigned id; /* The id of the use. */
309a0cf6 363 unsigned group_id; /* The group id the use belongs to. */
8b11a64c
ZD
364 enum use_type type; /* Type of the use. */
365 struct iv *iv; /* The induction variable it is based on. */
355fe088 366 gimple *stmt; /* Statement in that it occurs. */
8b11a64c 367 tree *op_p; /* The place where it occurs. */
8b11a64c 368
a7e43c57
BC
369 tree addr_base; /* Base address with const offset stripped. */
370 unsigned HOST_WIDE_INT addr_offset;
371 /* Const offset stripped from base address. */
8b11a64c
ZD
372};
373
309a0cf6
BC
374/* Group of uses. */
375struct iv_group
376{
377 /* The id of the group. */
378 unsigned id;
379 /* Uses of the group are of the same type. */
380 enum use_type type;
381 /* The set of "related" IV candidates, plus the important ones. */
382 bitmap related_cands;
383 /* Number of IV candidates in the cost_map. */
384 unsigned n_map_members;
385 /* The costs wrto the iv candidates. */
386 struct cost_pair *cost_map;
387 /* The selected candidate for the group. */
388 struct iv_cand *selected;
389 /* Uses in the group. */
390 vec<struct iv_use *> vuses;
391};
392
8b11a64c
ZD
393/* The position where the iv is computed. */
394enum iv_position
395{
396 IP_NORMAL, /* At the end, just before the exit condition. */
397 IP_END, /* At the end of the latch block. */
2c08497a
BS
398 IP_BEFORE_USE, /* Immediately before a specific use. */
399 IP_AFTER_USE, /* Immediately after a specific use. */
8b11a64c
ZD
400 IP_ORIGINAL /* The original biv. */
401};
402
403/* The induction variable candidate. */
404struct iv_cand
405{
406 unsigned id; /* The number of the candidate. */
407 bool important; /* Whether this is an "important" candidate, i.e. such
408 that it should be considered by all uses. */
448f65db 409 ENUM_BITFIELD(iv_position) pos : 8; /* Where it is computed. */
355fe088 410 gimple *incremented_at;/* For original biv, the statement where it is
8b11a64c
ZD
411 incremented. */
412 tree var_before; /* The variable used for it before increment. */
413 tree var_after; /* The variable used for it after increment. */
414 struct iv *iv; /* The value of the candidate. NULL for
415 "pseudocandidate" used to indicate the possibility
416 to replace the final value of an iv by direct
417 computation of the value. */
418 unsigned cost; /* Cost of the candidate. */
2c08497a
BS
419 unsigned cost_step; /* Cost of the candidate's increment operation. */
420 struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
421 where it is incremented. */
4c11bdff
BC
422 bitmap inv_vars; /* The list of invariant ssa_vars used in step of the
423 iv_cand. */
424 bitmap inv_exprs; /* If step is more complicated than a single ssa_var,
425 hanlde it as a new invariant expression which will
426 be hoisted out of loop. */
e4142529
BC
427 struct iv *orig_iv; /* The original iv if this cand is added from biv with
428 smaller type. */
8b11a64c
ZD
429};
430
cf5b92ef
BC
431/* Hashtable entry for common candidate derived from iv uses. */
432struct iv_common_cand
433{
434 tree base;
435 tree step;
436 /* IV uses from which this common candidate is derived. */
309a0cf6 437 auto_vec<struct iv_use *> uses;
cf5b92ef
BC
438 hashval_t hash;
439};
440
441/* Hashtable helpers. */
442
74fbae92 443struct iv_common_cand_hasher : delete_ptr_hash <iv_common_cand>
cf5b92ef
BC
444{
445 static inline hashval_t hash (const iv_common_cand *);
446 static inline bool equal (const iv_common_cand *, const iv_common_cand *);
447};
448
449/* Hash function for possible common candidates. */
450
451inline hashval_t
452iv_common_cand_hasher::hash (const iv_common_cand *ccand)
453{
454 return ccand->hash;
455}
456
457/* Hash table equality function for common candidates. */
458
459inline bool
460iv_common_cand_hasher::equal (const iv_common_cand *ccand1,
461 const iv_common_cand *ccand2)
462{
463 return (ccand1->hash == ccand2->hash
464 && operand_equal_p (ccand1->base, ccand2->base, 0)
465 && operand_equal_p (ccand1->step, ccand2->step, 0)
466 && (TYPE_PRECISION (TREE_TYPE (ccand1->base))
467 == TYPE_PRECISION (TREE_TYPE (ccand2->base))));
468}
469
18081149 470/* Loop invariant expression hashtable entry. */
623b8e0a 471
18081149
XDL
472struct iv_inv_expr_ent
473{
623b8e0a 474 /* Tree expression of the entry. */
18081149 475 tree expr;
623b8e0a 476 /* Unique indentifier. */
18081149 477 int id;
623b8e0a 478 /* Hash value. */
18081149
XDL
479 hashval_t hash;
480};
481
623b8e0a
ML
482/* Sort iv_inv_expr_ent pair A and B by id field. */
483
484static int
485sort_iv_inv_expr_ent (const void *a, const void *b)
486{
487 const iv_inv_expr_ent * const *e1 = (const iv_inv_expr_ent * const *) (a);
488 const iv_inv_expr_ent * const *e2 = (const iv_inv_expr_ent * const *) (b);
489
490 unsigned id1 = (*e1)->id;
491 unsigned id2 = (*e2)->id;
492
493 if (id1 < id2)
494 return -1;
495 else if (id1 > id2)
496 return 1;
497 else
498 return 0;
499}
500
4a8fb1a1
LC
501/* Hashtable helpers. */
502
95fbe13e 503struct iv_inv_expr_hasher : free_ptr_hash <iv_inv_expr_ent>
4a8fb1a1 504{
67f58944
TS
505 static inline hashval_t hash (const iv_inv_expr_ent *);
506 static inline bool equal (const iv_inv_expr_ent *, const iv_inv_expr_ent *);
4a8fb1a1
LC
507};
508
509/* Hash function for loop invariant expressions. */
510
511inline hashval_t
67f58944 512iv_inv_expr_hasher::hash (const iv_inv_expr_ent *expr)
4a8fb1a1
LC
513{
514 return expr->hash;
515}
516
517/* Hash table equality function for expressions. */
518
519inline bool
67f58944
TS
520iv_inv_expr_hasher::equal (const iv_inv_expr_ent *expr1,
521 const iv_inv_expr_ent *expr2)
4a8fb1a1
LC
522{
523 return expr1->hash == expr2->hash
524 && operand_equal_p (expr1->expr, expr2->expr, 0);
525}
526
8b11a64c
ZD
527struct ivopts_data
528{
529 /* The currently optimized loop. */
530 struct loop *current_loop;
28002f1a 531 source_location loop_loc;
8b11a64c 532
8f5929e1 533 /* Numbers of iterations for all exits of the current loop. */
b787e7a2 534 hash_map<edge, tree_niter_desc *> *niters;
f40751dd 535
9a2ef6b8
ZD
536 /* Number of registers used in it. */
537 unsigned regs_used;
538
8b11a64c
ZD
539 /* The size of version_info array allocated. */
540 unsigned version_info_size;
541
542 /* The array of information for the ssa names. */
543 struct version_info *version_info;
544
18081149
XDL
545 /* The hashtable of loop invariant expressions created
546 by ivopt. */
c203e8a7 547 hash_table<iv_inv_expr_hasher> *inv_expr_tab;
18081149 548
8b11a64c
ZD
549 /* The bitmap of indices in version_info whose value was changed. */
550 bitmap relevant;
551
8b11a64c 552 /* The uses of induction variables. */
309a0cf6 553 vec<iv_group *> vgroups;
8b11a64c
ZD
554
555 /* The candidates. */
309a0cf6 556 vec<iv_cand *> vcands;
8b11a64c 557
80cad5fa
ZD
558 /* A bitmap of important candidates. */
559 bitmap important_candidates;
560
3230c614
BC
561 /* Cache used by tree_to_aff_combination_expand. */
562 hash_map<tree, name_expansion *> *name_expansion_cache;
563
cf5b92ef
BC
564 /* The hashtable of common candidates derived from iv uses. */
565 hash_table<iv_common_cand_hasher> *iv_common_cand_tab;
566
567 /* The common candidates. */
568 vec<iv_common_cand *> iv_common_cands;
569
0ca91c77
BC
570 /* The maximum invariant variable id. */
571 unsigned max_inv_var_id;
572
573 /* The maximum invariant expression id. */
574 unsigned max_inv_expr_id;
8f5929e1 575
e4142529
BC
576 /* Number of no_overflow BIVs which are not used in memory address. */
577 unsigned bivs_not_used_in_addr;
578
6f929985
BC
579 /* Obstack for iv structure. */
580 struct obstack iv_obstack;
581
8b11a64c
ZD
582 /* Whether to consider just related and important candidates when replacing a
583 use. */
584 bool consider_all_candidates;
8f5929e1
JJ
585
586 /* Are we optimizing for speed? */
587 bool speed;
bec922f0
SL
588
589 /* Whether the loop body includes any function calls. */
590 bool body_includes_call;
d8af4ba3
ZD
591
592 /* Whether the loop body can only be exited via single exit. */
593 bool loop_single_exit_p;
8b11a64c
ZD
594};
595
b1b02be2
ZD
596/* An assignment of iv candidates to uses. */
597
598struct iv_ca
599{
600 /* The number of uses covered by the assignment. */
601 unsigned upto;
602
603 /* Number of uses that cannot be expressed by the candidates in the set. */
309a0cf6 604 unsigned bad_groups;
b1b02be2
ZD
605
606 /* Candidate assigned to a use, together with the related costs. */
309a0cf6 607 struct cost_pair **cand_for_group;
b1b02be2
ZD
608
609 /* Number of times each candidate is used. */
610 unsigned *n_cand_uses;
611
612 /* The candidates used. */
613 bitmap cands;
614
36f5ada1
ZD
615 /* The number of candidates in the set. */
616 unsigned n_cands;
617
1136cae4
BC
618 /* The number of invariants needed, including both invariant variants and
619 invariant expressions. */
620 unsigned n_invs;
b1b02be2
ZD
621
622 /* Total cost of expressing uses. */
6e8c65f6 623 comp_cost cand_use_cost;
b1b02be2
ZD
624
625 /* Total cost of candidates. */
626 unsigned cand_cost;
627
0ca91c77
BC
628 /* Number of times each invariant variable is used. */
629 unsigned *n_inv_var_uses;
b1b02be2 630
0ca91c77
BC
631 /* Number of times each invariant expression is used. */
632 unsigned *n_inv_expr_uses;
f06e400f 633
b1b02be2 634 /* Total cost of the assignment. */
6e8c65f6 635 comp_cost cost;
b1b02be2
ZD
636};
637
638/* Difference of two iv candidate assignments. */
639
640struct iv_ca_delta
641{
309a0cf6
BC
642 /* Changed group. */
643 struct iv_group *group;
b1b02be2
ZD
644
645 /* An old assignment (for rollback purposes). */
646 struct cost_pair *old_cp;
647
648 /* A new assignment. */
649 struct cost_pair *new_cp;
650
651 /* Next change in the list. */
309a0cf6 652 struct iv_ca_delta *next;
b1b02be2
ZD
653};
654
8b11a64c
ZD
655/* Bound on number of candidates below that all candidates are considered. */
656
657#define CONSIDER_ALL_CANDIDATES_BOUND \
658 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
659
2a7e31df 660/* If there are more iv occurrences, we just give up (it is quite unlikely that
8b11a64c
ZD
661 optimizing such a loop would help, and it would take ages). */
662
309a0cf6 663#define MAX_CONSIDERED_GROUPS \
8b11a64c
ZD
664 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
665
36f5ada1
ZD
666/* If there are at most this number of ivs in the set, try removing unnecessary
667 ivs from the set always. */
668
669#define ALWAYS_PRUNE_CAND_SET_BOUND \
670 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
671
8b11a64c
ZD
672/* The list of trees for that the decl_rtl field must be reset is stored
673 here. */
674
9771b263 675static vec<tree> decl_rtl_to_reset;
8b11a64c 676
e6450c11
TV
677static comp_cost force_expr_to_var_cost (tree, bool);
678
8b11a64c
ZD
679/* The single loop exit if it dominates the latch, NULL otherwise. */
680
b7eae7b8 681edge
8b11a64c
ZD
682single_dom_exit (struct loop *loop)
683{
ac8f6c69 684 edge exit = single_exit (loop);
8b11a64c
ZD
685
686 if (!exit)
687 return NULL;
688
689 if (!just_once_each_iteration_p (loop, exit->src))
690 return NULL;
691
692 return exit;
693}
694
309a0cf6
BC
695/* Dumps information about the induction variable IV to FILE. Don't dump
696 variable's name if DUMP_NAME is FALSE. The information is dumped with
697 preceding spaces indicated by INDENT_LEVEL. */
8b11a64c 698
8b11a64c 699void
309a0cf6 700dump_iv (FILE *file, struct iv *iv, bool dump_name, unsigned indent_level)
8b11a64c 701{
309a0cf6
BC
702 const char *p;
703 const char spaces[9] = {' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '\0'};
704
705 if (indent_level > 4)
706 indent_level = 4;
707 p = spaces + 8 - (indent_level << 1);
708
709 fprintf (file, "%sIV struct:\n", p);
e185f450 710 if (iv->ssa_name && dump_name)
e6845c23 711 {
309a0cf6 712 fprintf (file, "%s SSA_NAME:\t", p);
e6845c23
ZD
713 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
714 fprintf (file, "\n");
715 }
8b11a64c 716
309a0cf6 717 fprintf (file, "%s Type:\t", p);
2f4675b4
ZD
718 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
719 fprintf (file, "\n");
720
309a0cf6
BC
721 fprintf (file, "%s Base:\t", p);
722 print_generic_expr (file, iv->base, TDF_SLIM);
723 fprintf (file, "\n");
8b11a64c 724
309a0cf6
BC
725 fprintf (file, "%s Step:\t", p);
726 print_generic_expr (file, iv->step, TDF_SLIM);
727 fprintf (file, "\n");
8b11a64c 728
e6845c23
ZD
729 if (iv->base_object)
730 {
309a0cf6 731 fprintf (file, "%s Object:\t", p);
e6845c23
ZD
732 print_generic_expr (file, iv->base_object, TDF_SLIM);
733 fprintf (file, "\n");
734 }
735
309a0cf6 736 fprintf (file, "%s Biv:\t%c\n", p, iv->biv_p ? 'Y' : 'N');
e4142529 737
309a0cf6
BC
738 fprintf (file, "%s Overflowness wrto loop niter:\t%s\n",
739 p, iv->no_overflow ? "No-overflow" : "Overflow");
8b11a64c
ZD
740}
741
742/* Dumps information about the USE to FILE. */
743
8b11a64c
ZD
744void
745dump_use (FILE *file, struct iv_use *use)
746{
309a0cf6
BC
747 fprintf (file, " Use %d.%d:\n", use->group_id, use->id);
748 fprintf (file, " At stmt:\t");
ef6cb4c7 749 print_gimple_stmt (file, use->stmt, 0);
309a0cf6 750 fprintf (file, " At pos:\t");
2f4675b4
ZD
751 if (use->op_p)
752 print_generic_expr (file, *use->op_p, TDF_SLIM);
753 fprintf (file, "\n");
309a0cf6 754 dump_iv (file, use->iv, false, 2);
8b11a64c
ZD
755}
756
757/* Dumps information about the uses to FILE. */
758
8b11a64c 759void
309a0cf6 760dump_groups (FILE *file, struct ivopts_data *data)
8b11a64c 761{
309a0cf6
BC
762 unsigned i, j;
763 struct iv_group *group;
8b11a64c 764
309a0cf6 765 for (i = 0; i < data->vgroups.length (); i++)
8b11a64c 766 {
309a0cf6
BC
767 group = data->vgroups[i];
768 fprintf (file, "Group %d:\n", group->id);
769 if (group->type == USE_NONLINEAR_EXPR)
770 fprintf (file, " Type:\tGENERIC\n");
771 else if (group->type == USE_ADDRESS)
772 fprintf (file, " Type:\tADDRESS\n");
773 else
a7e43c57 774 {
309a0cf6
BC
775 gcc_assert (group->type == USE_COMPARE);
776 fprintf (file, " Type:\tCOMPARE\n");
a7e43c57 777 }
309a0cf6
BC
778 for (j = 0; j < group->vuses.length (); j++)
779 dump_use (file, group->vuses[j]);
8b11a64c
ZD
780 }
781}
782
783/* Dumps information about induction variable candidate CAND to FILE. */
784
8b11a64c
ZD
785void
786dump_cand (FILE *file, struct iv_cand *cand)
787{
788 struct iv *iv = cand->iv;
789
309a0cf6 790 fprintf (file, "Candidate %d:\n", cand->id);
0ca91c77 791 if (cand->inv_vars)
9be872b7 792 {
0ca91c77
BC
793 fprintf (file, " Depend on inv.vars: ");
794 dump_bitmap (file, cand->inv_vars);
9be872b7 795 }
4c11bdff
BC
796 if (cand->inv_exprs)
797 {
798 fprintf (file, " Depend on inv.exprs: ");
799 dump_bitmap (file, cand->inv_exprs);
800 }
9be872b7 801
18081149
XDL
802 if (cand->var_before)
803 {
309a0cf6 804 fprintf (file, " Var befor: ");
18081149
XDL
805 print_generic_expr (file, cand->var_before, TDF_SLIM);
806 fprintf (file, "\n");
807 }
808 if (cand->var_after)
809 {
309a0cf6 810 fprintf (file, " Var after: ");
18081149
XDL
811 print_generic_expr (file, cand->var_after, TDF_SLIM);
812 fprintf (file, "\n");
813 }
814
8b11a64c
ZD
815 switch (cand->pos)
816 {
817 case IP_NORMAL:
309a0cf6 818 fprintf (file, " Incr POS: before exit test\n");
8b11a64c
ZD
819 break;
820
2c08497a 821 case IP_BEFORE_USE:
309a0cf6 822 fprintf (file, " Incr POS: before use %d\n", cand->ainc_use->id);
2c08497a
BS
823 break;
824
825 case IP_AFTER_USE:
309a0cf6 826 fprintf (file, " Incr POS: after use %d\n", cand->ainc_use->id);
2c08497a
BS
827 break;
828
8b11a64c 829 case IP_END:
309a0cf6 830 fprintf (file, " Incr POS: at end\n");
8b11a64c
ZD
831 break;
832
833 case IP_ORIGINAL:
309a0cf6 834 fprintf (file, " Incr POS: orig biv\n");
8b11a64c
ZD
835 break;
836 }
837
309a0cf6 838 dump_iv (file, iv, false, 1);
8b11a64c
ZD
839}
840
841/* Returns the info for ssa version VER. */
842
843static inline struct version_info *
844ver_info (struct ivopts_data *data, unsigned ver)
845{
846 return data->version_info + ver;
847}
848
849/* Returns the info for ssa name NAME. */
850
851static inline struct version_info *
852name_info (struct ivopts_data *data, tree name)
853{
854 return ver_info (data, SSA_NAME_VERSION (name));
855}
856
8b11a64c
ZD
857/* Returns true if STMT is after the place where the IP_NORMAL ivs will be
858 emitted in LOOP. */
859
860static bool
355fe088 861stmt_after_ip_normal_pos (struct loop *loop, gimple *stmt)
8b11a64c 862{
726a989a 863 basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
8b11a64c 864
1e128c5f 865 gcc_assert (bb);
8b11a64c
ZD
866
867 if (sbb == loop->latch)
868 return true;
869
870 if (sbb != bb)
871 return false;
872
873 return stmt == last_stmt (bb);
874}
875
876/* Returns true if STMT if after the place where the original induction
2c08497a
BS
877 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
878 if the positions are identical. */
8b11a64c
ZD
879
880static bool
355fe088 881stmt_after_inc_pos (struct iv_cand *cand, gimple *stmt, bool true_if_equal)
8b11a64c 882{
726a989a
RB
883 basic_block cand_bb = gimple_bb (cand->incremented_at);
884 basic_block stmt_bb = gimple_bb (stmt);
8b11a64c
ZD
885
886 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
887 return false;
888
889 if (stmt_bb != cand_bb)
890 return true;
891
2c08497a
BS
892 if (true_if_equal
893 && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
894 return true;
895 return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
8b11a64c
ZD
896}
897
898/* Returns true if STMT if after the place where the induction variable
899 CAND is incremented in LOOP. */
900
901static bool
355fe088 902stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple *stmt)
8b11a64c
ZD
903{
904 switch (cand->pos)
905 {
906 case IP_END:
907 return false;
908
909 case IP_NORMAL:
910 return stmt_after_ip_normal_pos (loop, stmt);
911
912 case IP_ORIGINAL:
2c08497a
BS
913 case IP_AFTER_USE:
914 return stmt_after_inc_pos (cand, stmt, false);
915
916 case IP_BEFORE_USE:
917 return stmt_after_inc_pos (cand, stmt, true);
8b11a64c
ZD
918
919 default:
1e128c5f 920 gcc_unreachable ();
8b11a64c
ZD
921 }
922}
923
dcccd88d
ZD
924/* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
925
926static bool
927abnormal_ssa_name_p (tree exp)
928{
929 if (!exp)
930 return false;
931
932 if (TREE_CODE (exp) != SSA_NAME)
933 return false;
934
935 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
936}
937
938/* Returns false if BASE or INDEX contains a ssa name that occurs in an
939 abnormal phi node. Callback for for_each_index. */
940
941static bool
942idx_contains_abnormal_ssa_name_p (tree base, tree *index,
943 void *data ATTRIBUTE_UNUSED)
944{
9f7ccf69 945 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
dcccd88d
ZD
946 {
947 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
948 return false;
949 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
950 return false;
951 }
952
953 return !abnormal_ssa_name_p (*index);
954}
955
956/* Returns true if EXPR contains a ssa name that occurs in an
957 abnormal phi node. */
958
e5db3515 959bool
dcccd88d
ZD
960contains_abnormal_ssa_name_p (tree expr)
961{
962 enum tree_code code;
c22940cd 963 enum tree_code_class codeclass;
dcccd88d
ZD
964
965 if (!expr)
966 return false;
967
968 code = TREE_CODE (expr);
c22940cd 969 codeclass = TREE_CODE_CLASS (code);
dcccd88d
ZD
970
971 if (code == SSA_NAME)
972 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
973
974 if (code == INTEGER_CST
975 || is_gimple_min_invariant (expr))
976 return false;
977
978 if (code == ADDR_EXPR)
979 return !for_each_index (&TREE_OPERAND (expr, 0),
980 idx_contains_abnormal_ssa_name_p,
981 NULL);
982
0a74c758
SP
983 if (code == COND_EXPR)
984 return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
985 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
986 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
987
c22940cd 988 switch (codeclass)
dcccd88d
ZD
989 {
990 case tcc_binary:
991 case tcc_comparison:
992 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
993 return true;
994
995 /* Fallthru. */
996 case tcc_unary:
997 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
998 return true;
999
1000 break;
1001
1002 default:
1003 gcc_unreachable ();
1004 }
1005
1006 return false;
1007}
1008
d8af4ba3 1009/* Returns the structure describing number of iterations determined from
ca4c3169
ZD
1010 EXIT of DATA->current_loop, or NULL if something goes wrong. */
1011
d8af4ba3
ZD
1012static struct tree_niter_desc *
1013niter_for_exit (struct ivopts_data *data, edge exit)
ca4c3169 1014{
d8af4ba3 1015 struct tree_niter_desc *desc;
b787e7a2 1016 tree_niter_desc **slot;
ca4c3169 1017
15814ba0 1018 if (!data->niters)
ca4c3169 1019 {
b787e7a2 1020 data->niters = new hash_map<edge, tree_niter_desc *>;
15814ba0
PB
1021 slot = NULL;
1022 }
1023 else
b787e7a2 1024 slot = data->niters->get (exit);
dcccd88d 1025
15814ba0
PB
1026 if (!slot)
1027 {
d8af4ba3 1028 /* Try to determine number of iterations. We cannot safely work with ssa
623b8e0a
ML
1029 names that appear in phi nodes on abnormal edges, so that we do not
1030 create overlapping life ranges for them (PR 27283). */
e2102efc 1031 desc = XNEW (struct tree_niter_desc);
d8af4ba3
ZD
1032 if (!number_of_iterations_exit (data->current_loop,
1033 exit, desc, true)
1034 || contains_abnormal_ssa_name_p (desc->niter))
1035 {
1036 XDELETE (desc);
1037 desc = NULL;
1038 }
b787e7a2 1039 data->niters->put (exit, desc);
ca4c3169
ZD
1040 }
1041 else
b787e7a2 1042 desc = *slot;
ca4c3169 1043
d8af4ba3 1044 return desc;
ca4c3169
ZD
1045}
1046
d8af4ba3 1047/* Returns the structure describing number of iterations determined from
ca4c3169
ZD
1048 single dominating exit of DATA->current_loop, or NULL if something
1049 goes wrong. */
1050
d8af4ba3 1051static struct tree_niter_desc *
ca4c3169
ZD
1052niter_for_single_dom_exit (struct ivopts_data *data)
1053{
1054 edge exit = single_dom_exit (data->current_loop);
1055
1056 if (!exit)
1057 return NULL;
1058
d8af4ba3 1059 return niter_for_exit (data, exit);
ca4c3169
ZD
1060}
1061
8b11a64c 1062/* Initializes data structures used by the iv optimization pass, stored
9a2ef6b8 1063 in DATA. */
8b11a64c
ZD
1064
1065static void
9a2ef6b8 1066tree_ssa_iv_optimize_init (struct ivopts_data *data)
8b11a64c 1067{
8b11a64c 1068 data->version_info_size = 2 * num_ssa_names;
5ed6ace5 1069 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
8bdbfff5
NS
1070 data->relevant = BITMAP_ALLOC (NULL);
1071 data->important_candidates = BITMAP_ALLOC (NULL);
0ca91c77
BC
1072 data->max_inv_var_id = 0;
1073 data->max_inv_expr_id = 0;
15814ba0 1074 data->niters = NULL;
309a0cf6
BC
1075 data->vgroups.create (20);
1076 data->vcands.create (20);
c203e8a7 1077 data->inv_expr_tab = new hash_table<iv_inv_expr_hasher> (10);
3230c614 1078 data->name_expansion_cache = NULL;
cf5b92ef
BC
1079 data->iv_common_cand_tab = new hash_table<iv_common_cand_hasher> (10);
1080 data->iv_common_cands.create (20);
9771b263 1081 decl_rtl_to_reset.create (20);
6f929985 1082 gcc_obstack_init (&data->iv_obstack);
8b11a64c
ZD
1083}
1084
e6845c23
ZD
1085/* Returns a memory object to that EXPR points. In case we are able to
1086 determine that it does not point to any such object, NULL is returned. */
1087
1088static tree
1089determine_base_object (tree expr)
1090{
1091 enum tree_code code = TREE_CODE (expr);
5be014d5 1092 tree base, obj;
e6845c23 1093
975626a7
ZD
1094 /* If this is a pointer casted to any type, we need to determine
1095 the base object for the pointer; so handle conversions before
1096 throwing away non-pointer expressions. */
1043771b 1097 if (CONVERT_EXPR_P (expr))
975626a7
ZD
1098 return determine_base_object (TREE_OPERAND (expr, 0));
1099
e6845c23
ZD
1100 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
1101 return NULL_TREE;
1102
1103 switch (code)
1104 {
1105 case INTEGER_CST:
1106 return NULL_TREE;
1107
1108 case ADDR_EXPR:
1109 obj = TREE_OPERAND (expr, 0);
1110 base = get_base_address (obj);
1111
1112 if (!base)
f5e2738c 1113 return expr;
e6845c23 1114
70f34814 1115 if (TREE_CODE (base) == MEM_REF)
f5e2738c 1116 return determine_base_object (TREE_OPERAND (base, 0));
7299dbfb 1117
62b37d91 1118 return fold_convert (ptr_type_node,
623b8e0a 1119 build_fold_addr_expr (base));
e6845c23 1120
5be014d5
AP
1121 case POINTER_PLUS_EXPR:
1122 return determine_base_object (TREE_OPERAND (expr, 0));
1123
e6845c23
ZD
1124 case PLUS_EXPR:
1125 case MINUS_EXPR:
5be014d5
AP
1126 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
1127 gcc_unreachable ();
e6845c23
ZD
1128
1129 default:
1130 return fold_convert (ptr_type_node, expr);
1131 }
1132}
1133
be9a0da5
BC
1134/* Return true if address expression with non-DECL_P operand appears
1135 in EXPR. */
1136
1137static bool
1138contain_complex_addr_expr (tree expr)
1139{
1140 bool res = false;
1141
1142 STRIP_NOPS (expr);
1143 switch (TREE_CODE (expr))
1144 {
1145 case POINTER_PLUS_EXPR:
1146 case PLUS_EXPR:
1147 case MINUS_EXPR:
1148 res |= contain_complex_addr_expr (TREE_OPERAND (expr, 0));
1149 res |= contain_complex_addr_expr (TREE_OPERAND (expr, 1));
1150 break;
1151
1152 case ADDR_EXPR:
1153 return (!DECL_P (TREE_OPERAND (expr, 0)));
1154
1155 default:
1156 return false;
1157 }
1158
1159 return res;
1160}
1161
8b11a64c 1162/* Allocates an induction variable with given initial value BASE and step STEP
c70ed622 1163 for loop LOOP. NO_OVERFLOW implies the iv doesn't overflow. */
8b11a64c
ZD
1164
1165static struct iv *
6f929985
BC
1166alloc_iv (struct ivopts_data *data, tree base, tree step,
1167 bool no_overflow = false)
8b11a64c 1168{
be9a0da5 1169 tree expr = base;
6f929985
BC
1170 struct iv *iv = (struct iv*) obstack_alloc (&data->iv_obstack,
1171 sizeof (struct iv));
6e42ce54 1172 gcc_assert (step != NULL_TREE);
8b11a64c 1173
be9a0da5 1174 /* Lower address expression in base except ones with DECL_P as operand.
be8c1c8c
BC
1175 By doing this:
1176 1) More accurate cost can be computed for address expressions;
1177 2) Duplicate candidates won't be created for bases in different
623b8e0a 1178 forms, like &a[0] and &a. */
be9a0da5
BC
1179 STRIP_NOPS (expr);
1180 if ((TREE_CODE (expr) == ADDR_EXPR && !DECL_P (TREE_OPERAND (expr, 0)))
1181 || contain_complex_addr_expr (expr))
be8c1c8c
BC
1182 {
1183 aff_tree comb;
aac69a62 1184 tree_to_aff_combination (expr, TREE_TYPE (expr), &comb);
be8c1c8c
BC
1185 base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
1186 }
1187
8b11a64c 1188 iv->base = base;
be9a0da5 1189 iv->base_object = determine_base_object (base);
8b11a64c
ZD
1190 iv->step = step;
1191 iv->biv_p = false;
309a0cf6 1192 iv->nonlin_use = NULL;
8b11a64c 1193 iv->ssa_name = NULL_TREE;
019d6598
JH
1194 if (!no_overflow
1195 && !iv_can_overflow_p (data->current_loop, TREE_TYPE (base),
1196 base, step))
1197 no_overflow = true;
c70ed622 1198 iv->no_overflow = no_overflow;
e4142529 1199 iv->have_address_use = false;
8b11a64c
ZD
1200
1201 return iv;
1202}
1203
c70ed622
BC
1204/* Sets STEP and BASE for induction variable IV. NO_OVERFLOW implies the IV
1205 doesn't overflow. */
8b11a64c
ZD
1206
1207static void
c70ed622
BC
1208set_iv (struct ivopts_data *data, tree iv, tree base, tree step,
1209 bool no_overflow)
8b11a64c
ZD
1210{
1211 struct version_info *info = name_info (data, iv);
1212
1e128c5f 1213 gcc_assert (!info->iv);
8b11a64c
ZD
1214
1215 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
6f929985 1216 info->iv = alloc_iv (data, base, step, no_overflow);
8b11a64c
ZD
1217 info->iv->ssa_name = iv;
1218}
1219
1220/* Finds induction variable declaration for VAR. */
1221
1222static struct iv *
1223get_iv (struct ivopts_data *data, tree var)
1224{
1225 basic_block bb;
6e42ce54
ZD
1226 tree type = TREE_TYPE (var);
1227
1228 if (!POINTER_TYPE_P (type)
1229 && !INTEGRAL_TYPE_P (type))
1230 return NULL;
1231
8b11a64c
ZD
1232 if (!name_info (data, var)->iv)
1233 {
726a989a 1234 bb = gimple_bb (SSA_NAME_DEF_STMT (var));
8b11a64c
ZD
1235
1236 if (!bb
1237 || !flow_bb_inside_loop_p (data->current_loop, bb))
c70ed622 1238 set_iv (data, var, var, build_int_cst (type, 0), true);
8b11a64c
ZD
1239 }
1240
1241 return name_info (data, var)->iv;
1242}
1243
fc06280e
BC
1244/* Return the first non-invariant ssa var found in EXPR. */
1245
1246static tree
1247extract_single_var_from_expr (tree expr)
1248{
1249 int i, n;
1250 tree tmp;
1251 enum tree_code code;
1252
1253 if (!expr || is_gimple_min_invariant (expr))
1254 return NULL;
1255
1256 code = TREE_CODE (expr);
1257 if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1258 {
1259 n = TREE_OPERAND_LENGTH (expr);
1260 for (i = 0; i < n; i++)
1261 {
1262 tmp = extract_single_var_from_expr (TREE_OPERAND (expr, i));
1263
1264 if (tmp)
1265 return tmp;
1266 }
1267 }
1268 return (TREE_CODE (expr) == SSA_NAME) ? expr : NULL;
1269}
1270
8b11a64c
ZD
1271/* Finds basic ivs. */
1272
1273static bool
1274find_bivs (struct ivopts_data *data)
1275{
538dd0b7 1276 gphi *phi;
c70ed622 1277 affine_iv iv;
fc06280e 1278 tree step, type, base, stop;
8b11a64c
ZD
1279 bool found = false;
1280 struct loop *loop = data->current_loop;
538dd0b7 1281 gphi_iterator psi;
8b11a64c 1282
726a989a 1283 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
8b11a64c 1284 {
538dd0b7 1285 phi = psi.phi ();
726a989a 1286
8b11a64c
ZD
1287 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1288 continue;
1289
c70ed622 1290 if (virtual_operand_p (PHI_RESULT (phi)))
8b11a64c 1291 continue;
8b11a64c 1292
c70ed622
BC
1293 if (!simple_iv (loop, loop, PHI_RESULT (phi), &iv, true))
1294 continue;
1295
1296 if (integer_zerop (iv.step))
1297 continue;
1298
1299 step = iv.step;
8b11a64c 1300 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
fc06280e
BC
1301 /* Stop expanding iv base at the first ssa var referred by iv step.
1302 Ideally we should stop at any ssa var, because that's expensive
1303 and unusual to happen, we just do it on the first one.
1304
1305 See PR64705 for the rationale. */
1306 stop = extract_single_var_from_expr (step);
1307 base = expand_simple_operations (base, stop);
9be872b7
ZD
1308 if (contains_abnormal_ssa_name_p (base)
1309 || contains_abnormal_ssa_name_p (step))
8b11a64c
ZD
1310 continue;
1311
1312 type = TREE_TYPE (PHI_RESULT (phi));
1313 base = fold_convert (type, base);
9be872b7 1314 if (step)
1ffe34d9
AP
1315 {
1316 if (POINTER_TYPE_P (type))
0d82a1c8 1317 step = convert_to_ptrofftype (step);
1ffe34d9
AP
1318 else
1319 step = fold_convert (type, step);
1320 }
8b11a64c 1321
c70ed622 1322 set_iv (data, PHI_RESULT (phi), base, step, iv.no_overflow);
8b11a64c
ZD
1323 found = true;
1324 }
1325
1326 return found;
1327}
1328
1329/* Marks basic ivs. */
1330
1331static void
1332mark_bivs (struct ivopts_data *data)
1333{
538dd0b7 1334 gphi *phi;
355fe088 1335 gimple *def;
726a989a 1336 tree var;
8b11a64c
ZD
1337 struct iv *iv, *incr_iv;
1338 struct loop *loop = data->current_loop;
1339 basic_block incr_bb;
538dd0b7 1340 gphi_iterator psi;
8b11a64c 1341
e4142529 1342 data->bivs_not_used_in_addr = 0;
726a989a 1343 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
8b11a64c 1344 {
538dd0b7 1345 phi = psi.phi ();
726a989a 1346
8b11a64c
ZD
1347 iv = get_iv (data, PHI_RESULT (phi));
1348 if (!iv)
1349 continue;
1350
1351 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
b83b5507
BC
1352 def = SSA_NAME_DEF_STMT (var);
1353 /* Don't mark iv peeled from other one as biv. */
1354 if (def
1355 && gimple_code (def) == GIMPLE_PHI
1356 && gimple_bb (def) == loop->header)
1357 continue;
1358
8b11a64c
ZD
1359 incr_iv = get_iv (data, var);
1360 if (!incr_iv)
1361 continue;
1362
1363 /* If the increment is in the subloop, ignore it. */
726a989a 1364 incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
8b11a64c
ZD
1365 if (incr_bb->loop_father != data->current_loop
1366 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1367 continue;
1368
1369 iv->biv_p = true;
1370 incr_iv->biv_p = true;
e4142529
BC
1371 if (iv->no_overflow)
1372 data->bivs_not_used_in_addr++;
1373 if (incr_iv->no_overflow)
1374 data->bivs_not_used_in_addr++;
8b11a64c
ZD
1375 }
1376}
1377
1378/* Checks whether STMT defines a linear induction variable and stores its
a6f778b2 1379 parameters to IV. */
8b11a64c
ZD
1380
1381static bool
355fe088 1382find_givs_in_stmt_scev (struct ivopts_data *data, gimple *stmt, affine_iv *iv)
8b11a64c 1383{
fc06280e 1384 tree lhs, stop;
8b11a64c
ZD
1385 struct loop *loop = data->current_loop;
1386
a6f778b2
ZD
1387 iv->base = NULL_TREE;
1388 iv->step = NULL_TREE;
8b11a64c 1389
726a989a 1390 if (gimple_code (stmt) != GIMPLE_ASSIGN)
8b11a64c
ZD
1391 return false;
1392
726a989a 1393 lhs = gimple_assign_lhs (stmt);
8b11a64c
ZD
1394 if (TREE_CODE (lhs) != SSA_NAME)
1395 return false;
1396
f017bf5e 1397 if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
8b11a64c
ZD
1398 return false;
1399
fc06280e
BC
1400 /* Stop expanding iv base at the first ssa var referred by iv step.
1401 Ideally we should stop at any ssa var, because that's expensive
1402 and unusual to happen, we just do it on the first one.
1403
1404 See PR64705 for the rationale. */
1405 stop = extract_single_var_from_expr (iv->step);
1406 iv->base = expand_simple_operations (iv->base, stop);
a6f778b2
ZD
1407 if (contains_abnormal_ssa_name_p (iv->base)
1408 || contains_abnormal_ssa_name_p (iv->step))
8b11a64c
ZD
1409 return false;
1410
fc06280e 1411 /* If STMT could throw, then do not consider STMT as defining a GIV.
9f9ca914
JL
1412 While this will suppress optimizations, we can not safely delete this
1413 GIV and associated statements, even if it appears it is not used. */
1414 if (stmt_could_throw_p (stmt))
1415 return false;
1416
8b11a64c
ZD
1417 return true;
1418}
1419
1420/* Finds general ivs in statement STMT. */
1421
1422static void
355fe088 1423find_givs_in_stmt (struct ivopts_data *data, gimple *stmt)
8b11a64c 1424{
a6f778b2 1425 affine_iv iv;
8b11a64c 1426
a6f778b2 1427 if (!find_givs_in_stmt_scev (data, stmt, &iv))
8b11a64c
ZD
1428 return;
1429
c70ed622 1430 set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step, iv.no_overflow);
8b11a64c
ZD
1431}
1432
1433/* Finds general ivs in basic block BB. */
1434
1435static void
1436find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1437{
726a989a 1438 gimple_stmt_iterator bsi;
8b11a64c 1439
726a989a
RB
1440 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1441 find_givs_in_stmt (data, gsi_stmt (bsi));
8b11a64c
ZD
1442}
1443
1444/* Finds general ivs. */
1445
1446static void
1447find_givs (struct ivopts_data *data)
1448{
1449 struct loop *loop = data->current_loop;
1450 basic_block *body = get_loop_body_in_dom_order (loop);
1451 unsigned i;
1452
1453 for (i = 0; i < loop->num_nodes; i++)
1454 find_givs_in_bb (data, body[i]);
1455 free (body);
1456}
1457
8b11a64c
ZD
1458/* For each ssa name defined in LOOP determines whether it is an induction
1459 variable and if so, its initial value and step. */
1460
1461static bool
1462find_induction_variables (struct ivopts_data *data)
1463{
1464 unsigned i;
87c476a2 1465 bitmap_iterator bi;
8b11a64c
ZD
1466
1467 if (!find_bivs (data))
1468 return false;
1469
1470 find_givs (data);
1471 mark_bivs (data);
8b11a64c
ZD
1472
1473 if (dump_file && (dump_flags & TDF_DETAILS))
1474 {
d8af4ba3 1475 struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
ca4c3169
ZD
1476
1477 if (niter)
8b11a64c
ZD
1478 {
1479 fprintf (dump_file, " number of iterations ");
d8af4ba3
ZD
1480 print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1481 if (!integer_zerop (niter->may_be_zero))
1482 {
1483 fprintf (dump_file, "; zero if ");
1484 print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1485 }
309a0cf6
BC
1486 fprintf (dump_file, "\n");
1487 };
8b11a64c 1488
309a0cf6 1489 fprintf (dump_file, "\n<Induction Vars>:\n");
87c476a2 1490 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
8b11a64c 1491 {
309a0cf6
BC
1492 struct version_info *info = ver_info (data, i);
1493 if (info->iv && info->iv->step && !integer_zerop (info->iv->step))
1494 dump_iv (dump_file, ver_info (data, i)->iv, true, 0);
87c476a2 1495 }
8b11a64c
ZD
1496 }
1497
1498 return true;
1499}
1500
309a0cf6 1501/* Records a use of TYPE at *USE_P in STMT whose value is IV in GROUP.
a7e43c57 1502 For address type use, ADDR_BASE is the stripped IV base, ADDR_OFFSET
309a0cf6
BC
1503 is the const offset stripped from IV base; for other types use, both
1504 are zero by default. */
8b11a64c
ZD
1505
1506static struct iv_use *
309a0cf6
BC
1507record_use (struct iv_group *group, tree *use_p, struct iv *iv,
1508 gimple *stmt, enum use_type type, tree addr_base,
1509 unsigned HOST_WIDE_INT addr_offset)
8b11a64c 1510{
5ed6ace5 1511 struct iv_use *use = XCNEW (struct iv_use);
8b11a64c 1512
309a0cf6
BC
1513 use->id = group->vuses.length ();
1514 use->group_id = group->id;
1515 use->type = type;
8b11a64c
ZD
1516 use->iv = iv;
1517 use->stmt = stmt;
1518 use->op_p = use_p;
a7e43c57
BC
1519 use->addr_base = addr_base;
1520 use->addr_offset = addr_offset;
8b11a64c 1521
309a0cf6 1522 group->vuses.safe_push (use);
a7e43c57
BC
1523 return use;
1524}
1525
8b11a64c
ZD
1526/* Checks whether OP is a loop-level invariant and if so, records it.
1527 NONLINEAR_USE is true if the invariant is used in a way we do not
1528 handle specially. */
1529
1530static void
1531record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1532{
1533 basic_block bb;
1534 struct version_info *info;
1535
1536 if (TREE_CODE (op) != SSA_NAME
ea057359 1537 || virtual_operand_p (op))
8b11a64c
ZD
1538 return;
1539
726a989a 1540 bb = gimple_bb (SSA_NAME_DEF_STMT (op));
8b11a64c
ZD
1541 if (bb
1542 && flow_bb_inside_loop_p (data->current_loop, bb))
1543 return;
1544
1545 info = name_info (data, op);
1546 info->name = op;
1547 info->has_nonlin_use |= nonlinear_use;
1548 if (!info->inv_id)
0ca91c77 1549 info->inv_id = ++data->max_inv_var_id;
8b11a64c
ZD
1550 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1551}
1552
309a0cf6
BC
1553static tree
1554strip_offset (tree expr, unsigned HOST_WIDE_INT *offset);
1555
1556/* Record a group of TYPE. */
1557
1558static struct iv_group *
1559record_group (struct ivopts_data *data, enum use_type type)
1560{
1561 struct iv_group *group = XCNEW (struct iv_group);
1562
1563 group->id = data->vgroups.length ();
1564 group->type = type;
1565 group->related_cands = BITMAP_ALLOC (NULL);
1566 group->vuses.create (1);
1567
1568 data->vgroups.safe_push (group);
1569 return group;
1570}
1571
1572/* Record a use of TYPE at *USE_P in STMT whose value is IV in a group.
1573 New group will be created if there is no existing group for the use. */
1574
1575static struct iv_use *
1576record_group_use (struct ivopts_data *data, tree *use_p,
1577 struct iv *iv, gimple *stmt, enum use_type type)
1578{
1579 tree addr_base = NULL;
1580 struct iv_group *group = NULL;
1581 unsigned HOST_WIDE_INT addr_offset = 0;
1582
1583 /* Record non address type use in a new group. */
1584 if (type == USE_ADDRESS && iv->base_object)
1585 {
1586 unsigned int i;
1587
1588 addr_base = strip_offset (iv->base, &addr_offset);
1589 for (i = 0; i < data->vgroups.length (); i++)
1590 {
1591 struct iv_use *use;
1592
1593 group = data->vgroups[i];
1594 use = group->vuses[0];
1595 if (use->type != USE_ADDRESS || !use->iv->base_object)
1596 continue;
1597
1598 /* Check if it has the same stripped base and step. */
1599 if (operand_equal_p (iv->base_object, use->iv->base_object, 0)
1600 && operand_equal_p (iv->step, use->iv->step, 0)
1601 && operand_equal_p (addr_base, use->addr_base, 0))
1602 break;
1603 }
1604 if (i == data->vgroups.length ())
1605 group = NULL;
1606 }
1607
1608 if (!group)
1609 group = record_group (data, type);
1610
1611 return record_use (group, use_p, iv, stmt, type, addr_base, addr_offset);
1612}
1613
50cc9802 1614/* Checks whether the use OP is interesting and if so, records it. */
8b11a64c
ZD
1615
1616static struct iv_use *
50cc9802 1617find_interesting_uses_op (struct ivopts_data *data, tree op)
8b11a64c
ZD
1618{
1619 struct iv *iv;
355fe088 1620 gimple *stmt;
8b11a64c
ZD
1621 struct iv_use *use;
1622
1623 if (TREE_CODE (op) != SSA_NAME)
1624 return NULL;
1625
1626 iv = get_iv (data, op);
1627 if (!iv)
1628 return NULL;
b8698a0f 1629
309a0cf6 1630 if (iv->nonlin_use)
8b11a64c 1631 {
309a0cf6
BC
1632 gcc_assert (iv->nonlin_use->type == USE_NONLINEAR_EXPR);
1633 return iv->nonlin_use;
8b11a64c
ZD
1634 }
1635
6e42ce54 1636 if (integer_zerop (iv->step))
8b11a64c
ZD
1637 {
1638 record_invariant (data, op, true);
1639 return NULL;
1640 }
8b11a64c 1641
8b11a64c 1642 stmt = SSA_NAME_DEF_STMT (op);
309a0cf6 1643 gcc_assert (gimple_code (stmt) == GIMPLE_PHI || is_gimple_assign (stmt));
8b11a64c 1644
309a0cf6
BC
1645 use = record_group_use (data, NULL, iv, stmt, USE_NONLINEAR_EXPR);
1646 iv->nonlin_use = use;
8b11a64c
ZD
1647 return use;
1648}
1649
b6a2258f
BC
1650/* Indicate how compare type iv_use can be handled. */
1651enum comp_iv_rewrite
1652{
1653 COMP_IV_NA,
1654 /* We may rewrite compare type iv_use by expressing value of the iv_use. */
1655 COMP_IV_EXPR,
80ca1cfa
BC
1656 /* We may rewrite compare type iv_uses on both sides of comparison by
1657 expressing value of each iv_use. */
1658 COMP_IV_EXPR_2,
b6a2258f
BC
1659 /* We may rewrite compare type iv_use by expressing value of the iv_use
1660 or by eliminating it with other iv_cand. */
1661 COMP_IV_ELIM
1662};
1663
726a989a
RB
1664/* Given a condition in statement STMT, checks whether it is a compare
1665 of an induction variable and an invariant. If this is the case,
1666 CONTROL_VAR is set to location of the iv, BOUND to the location of
1667 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1668 induction variable descriptions, and true is returned. If this is not
1669 the case, CONTROL_VAR and BOUND are set to the arguments of the
1670 condition and false is returned. */
8b11a64c 1671
b6a2258f 1672static enum comp_iv_rewrite
355fe088 1673extract_cond_operands (struct ivopts_data *data, gimple *stmt,
b697aed4
ZD
1674 tree **control_var, tree **bound,
1675 struct iv **iv_var, struct iv **iv_bound)
1676{
726a989a 1677 /* The objects returned when COND has constant operands. */
b697aed4
ZD
1678 static struct iv const_iv;
1679 static tree zero;
6b4db501
MM
1680 tree *op0 = &zero, *op1 = &zero;
1681 struct iv *iv0 = &const_iv, *iv1 = &const_iv;
b6a2258f 1682 enum comp_iv_rewrite rewrite_type = COMP_IV_NA;
b697aed4 1683
726a989a 1684 if (gimple_code (stmt) == GIMPLE_COND)
8b11a64c 1685 {
538dd0b7
DM
1686 gcond *cond_stmt = as_a <gcond *> (stmt);
1687 op0 = gimple_cond_lhs_ptr (cond_stmt);
1688 op1 = gimple_cond_rhs_ptr (cond_stmt);
8b11a64c 1689 }
726a989a 1690 else
8b11a64c 1691 {
726a989a
RB
1692 op0 = gimple_assign_rhs1_ptr (stmt);
1693 op1 = gimple_assign_rhs2_ptr (stmt);
8b11a64c
ZD
1694 }
1695
726a989a
RB
1696 zero = integer_zero_node;
1697 const_iv.step = integer_zero_node;
1698
b697aed4
ZD
1699 if (TREE_CODE (*op0) == SSA_NAME)
1700 iv0 = get_iv (data, *op0);
1701 if (TREE_CODE (*op1) == SSA_NAME)
1702 iv1 = get_iv (data, *op1);
8b11a64c 1703
80ca1cfa 1704 /* If both sides of comparison are IVs. We can express ivs on both end. */
b6a2258f 1705 if (iv0 && iv1 && !integer_zerop (iv0->step) && !integer_zerop (iv1->step))
80ca1cfa
BC
1706 {
1707 rewrite_type = COMP_IV_EXPR_2;
1708 goto end;
1709 }
b697aed4 1710
b6a2258f
BC
1711 /* If none side of comparison is IV. */
1712 if ((!iv0 || integer_zerop (iv0->step))
1713 && (!iv1 || integer_zerop (iv1->step)))
1714 goto end;
1715
1716 /* Control variable may be on the other side. */
1717 if (!iv0 || integer_zerop (iv0->step))
b697aed4 1718 {
6b4db501
MM
1719 std::swap (op0, op1);
1720 std::swap (iv0, iv1);
8b11a64c 1721 }
b6a2258f
BC
1722 /* If one side is IV and the other side isn't loop invariant. */
1723 if (!iv1)
1724 rewrite_type = COMP_IV_EXPR;
1725 /* If one side is IV and the other side is loop invariant. */
1726 else if (!integer_zerop (iv0->step) && integer_zerop (iv1->step))
1727 rewrite_type = COMP_IV_ELIM;
b697aed4
ZD
1728
1729end:
1730 if (control_var)
6f3d1a5e 1731 *control_var = op0;
b697aed4 1732 if (iv_var)
6f3d1a5e 1733 *iv_var = iv0;
b697aed4
ZD
1734 if (bound)
1735 *bound = op1;
1736 if (iv_bound)
1737 *iv_bound = iv1;
1738
b6a2258f 1739 return rewrite_type;
b697aed4
ZD
1740}
1741
726a989a
RB
1742/* Checks whether the condition in STMT is interesting and if so,
1743 records it. */
b697aed4
ZD
1744
1745static void
355fe088 1746find_interesting_uses_cond (struct ivopts_data *data, gimple *stmt)
b697aed4
ZD
1747{
1748 tree *var_p, *bound_p;
80ca1cfa 1749 struct iv *var_iv, *bound_iv;
b6a2258f 1750 enum comp_iv_rewrite ret;
8b11a64c 1751
80ca1cfa
BC
1752 ret = extract_cond_operands (data, stmt,
1753 &var_p, &bound_p, &var_iv, &bound_iv);
b6a2258f 1754 if (ret == COMP_IV_NA)
8b11a64c 1755 {
b697aed4
ZD
1756 find_interesting_uses_op (data, *var_p);
1757 find_interesting_uses_op (data, *bound_p);
8b11a64c
ZD
1758 return;
1759 }
1760
b6a2258f 1761 record_group_use (data, var_p, var_iv, stmt, USE_COMPARE);
80ca1cfa
BC
1762 /* Record compare type iv_use for iv on the other side of comparison. */
1763 if (ret == COMP_IV_EXPR_2)
1764 record_group_use (data, bound_p, bound_iv, stmt, USE_COMPARE);
8b11a64c
ZD
1765}
1766
4ba5ea11
RB
1767/* Returns the outermost loop EXPR is obviously invariant in
1768 relative to the loop LOOP, i.e. if all its operands are defined
1769 outside of the returned loop. Returns NULL if EXPR is not
1770 even obviously invariant in LOOP. */
1771
1772struct loop *
1773outermost_invariant_loop_for_expr (struct loop *loop, tree expr)
1774{
1775 basic_block def_bb;
1776 unsigned i, len;
1777
1778 if (is_gimple_min_invariant (expr))
1779 return current_loops->tree_root;
1780
1781 if (TREE_CODE (expr) == SSA_NAME)
1782 {
1783 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1784 if (def_bb)
1785 {
1786 if (flow_bb_inside_loop_p (loop, def_bb))
1787 return NULL;
1788 return superloop_at_depth (loop,
1789 loop_depth (def_bb->loop_father) + 1);
1790 }
1791
1792 return current_loops->tree_root;
1793 }
1794
1795 if (!EXPR_P (expr))
1796 return NULL;
1797
1798 unsigned maxdepth = 0;
1799 len = TREE_OPERAND_LENGTH (expr);
1800 for (i = 0; i < len; i++)
1801 {
1802 struct loop *ivloop;
1803 if (!TREE_OPERAND (expr, i))
1804 continue;
1805
1806 ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1807 if (!ivloop)
1808 return NULL;
1809 maxdepth = MAX (maxdepth, loop_depth (ivloop));
1810 }
1811
1812 return superloop_at_depth (loop, maxdepth);
1813}
1814
be35cf60 1815/* Returns true if expression EXPR is obviously invariant in LOOP,
6a732743
SP
1816 i.e. if all its operands are defined outside of the LOOP. LOOP
1817 should not be the function body. */
be35cf60 1818
feb075f4 1819bool
be35cf60
ZD
1820expr_invariant_in_loop_p (struct loop *loop, tree expr)
1821{
1822 basic_block def_bb;
1823 unsigned i, len;
1824
6a732743
SP
1825 gcc_assert (loop_depth (loop) > 0);
1826
be35cf60
ZD
1827 if (is_gimple_min_invariant (expr))
1828 return true;
1829
1830 if (TREE_CODE (expr) == SSA_NAME)
1831 {
726a989a 1832 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
be35cf60
ZD
1833 if (def_bb
1834 && flow_bb_inside_loop_p (loop, def_bb))
1835 return false;
1836
1837 return true;
1838 }
1839
726a989a 1840 if (!EXPR_P (expr))
be35cf60
ZD
1841 return false;
1842
5039610b 1843 len = TREE_OPERAND_LENGTH (expr);
be35cf60 1844 for (i = 0; i < len; i++)
837a549b
JH
1845 if (TREE_OPERAND (expr, i)
1846 && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
be35cf60
ZD
1847 return false;
1848
1849 return true;
1850}
1851
e4142529
BC
1852/* Given expression EXPR which computes inductive values with respect
1853 to loop recorded in DATA, this function returns biv from which EXPR
1854 is derived by tracing definition chains of ssa variables in EXPR. */
1855
1856static struct iv*
1857find_deriving_biv_for_expr (struct ivopts_data *data, tree expr)
1858{
1859 struct iv *iv;
1860 unsigned i, n;
1861 tree e2, e1;
1862 enum tree_code code;
355fe088 1863 gimple *stmt;
e4142529
BC
1864
1865 if (expr == NULL_TREE)
1866 return NULL;
1867
1868 if (is_gimple_min_invariant (expr))
1869 return NULL;
1870
1871 code = TREE_CODE (expr);
1872 if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1873 {
1874 n = TREE_OPERAND_LENGTH (expr);
1875 for (i = 0; i < n; i++)
1876 {
1877 iv = find_deriving_biv_for_expr (data, TREE_OPERAND (expr, i));
1878 if (iv)
1879 return iv;
1880 }
1881 }
1882
1883 /* Stop if it's not ssa name. */
1884 if (code != SSA_NAME)
1885 return NULL;
1886
1887 iv = get_iv (data, expr);
1888 if (!iv || integer_zerop (iv->step))
1889 return NULL;
1890 else if (iv->biv_p)
1891 return iv;
1892
1893 stmt = SSA_NAME_DEF_STMT (expr);
1894 if (gphi *phi = dyn_cast <gphi *> (stmt))
1895 {
1896 ssa_op_iter iter;
1897 use_operand_p use_p;
aec0ee11
BC
1898 basic_block phi_bb = gimple_bb (phi);
1899
1900 /* Skip loop header PHI that doesn't define biv. */
1901 if (phi_bb->loop_father == data->current_loop)
1902 return NULL;
e4142529
BC
1903
1904 if (virtual_operand_p (gimple_phi_result (phi)))
1905 return NULL;
1906
1907 FOR_EACH_PHI_ARG (use_p, phi, iter, SSA_OP_USE)
1908 {
1909 tree use = USE_FROM_PTR (use_p);
1910 iv = find_deriving_biv_for_expr (data, use);
1911 if (iv)
1912 return iv;
1913 }
1914 return NULL;
1915 }
1916 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1917 return NULL;
1918
1919 e1 = gimple_assign_rhs1 (stmt);
1920 code = gimple_assign_rhs_code (stmt);
1921 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS)
1922 return find_deriving_biv_for_expr (data, e1);
1923
1924 switch (code)
1925 {
1926 case MULT_EXPR:
1927 case PLUS_EXPR:
1928 case MINUS_EXPR:
1929 case POINTER_PLUS_EXPR:
1930 /* Increments, decrements and multiplications by a constant
1931 are simple. */
1932 e2 = gimple_assign_rhs2 (stmt);
1933 iv = find_deriving_biv_for_expr (data, e2);
1934 if (iv)
1935 return iv;
81fea426 1936 gcc_fallthrough ();
e4142529 1937
e4142529
BC
1938 CASE_CONVERT:
1939 /* Casts are simple. */
1940 return find_deriving_biv_for_expr (data, e1);
1941
1942 default:
1943 break;
1944 }
1945
1946 return NULL;
1947}
1948
1949/* Record BIV, its predecessor and successor that they are used in
1950 address type uses. */
1951
1952static void
1953record_biv_for_address_use (struct ivopts_data *data, struct iv *biv)
1954{
1955 unsigned i;
1956 tree type, base_1, base_2;
1957 bitmap_iterator bi;
1958
1959 if (!biv || !biv->biv_p || integer_zerop (biv->step)
1960 || biv->have_address_use || !biv->no_overflow)
1961 return;
1962
1963 type = TREE_TYPE (biv->base);
1964 if (!INTEGRAL_TYPE_P (type))
1965 return;
1966
1967 biv->have_address_use = true;
1968 data->bivs_not_used_in_addr--;
1969 base_1 = fold_build2 (PLUS_EXPR, type, biv->base, biv->step);
1970 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1971 {
1972 struct iv *iv = ver_info (data, i)->iv;
1973
1974 if (!iv || !iv->biv_p || integer_zerop (iv->step)
1975 || iv->have_address_use || !iv->no_overflow)
1976 continue;
1977
1978 if (type != TREE_TYPE (iv->base)
1979 || !INTEGRAL_TYPE_P (TREE_TYPE (iv->base)))
1980 continue;
1981
1982 if (!operand_equal_p (biv->step, iv->step, 0))
1983 continue;
1984
1985 base_2 = fold_build2 (PLUS_EXPR, type, iv->base, iv->step);
1986 if (operand_equal_p (base_1, iv->base, 0)
1987 || operand_equal_p (base_2, biv->base, 0))
1988 {
1989 iv->have_address_use = true;
1990 data->bivs_not_used_in_addr--;
1991 }
1992 }
1993}
1994
8b11a64c
ZD
1995/* Cumulates the steps of indices into DATA and replaces their values with the
1996 initial ones. Returns false when the value of the index cannot be determined.
1997 Callback for for_each_index. */
1998
1999struct ifs_ivopts_data
2000{
2001 struct ivopts_data *ivopts_data;
355fe088 2002 gimple *stmt;
6e42ce54 2003 tree step;
8b11a64c
ZD
2004};
2005
2006static bool
2007idx_find_step (tree base, tree *idx, void *data)
2008{
c22940cd 2009 struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
8b11a64c 2010 struct iv *iv;
c70ed622 2011 bool use_overflow_semantics = false;
d7f5de76 2012 tree step, iv_base, iv_step, lbound, off;
2f4675b4 2013 struct loop *loop = dta->ivopts_data->current_loop;
be35cf60 2014
be35cf60 2015 /* If base is a component ref, require that the offset of the reference
3a7c155d 2016 be invariant. */
be35cf60
ZD
2017 if (TREE_CODE (base) == COMPONENT_REF)
2018 {
2019 off = component_ref_field_offset (base);
2020 return expr_invariant_in_loop_p (loop, off);
2021 }
2022
2023 /* If base is array, first check whether we will be able to move the
2024 reference out of the loop (in order to take its address in strength
2025 reduction). In order for this to work we need both lower bound
2026 and step to be loop invariants. */
9f7ccf69 2027 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
be35cf60 2028 {
9f7ccf69
EB
2029 /* Moreover, for a range, the size needs to be invariant as well. */
2030 if (TREE_CODE (base) == ARRAY_RANGE_REF
2031 && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
2032 return false;
2033
be35cf60
ZD
2034 step = array_ref_element_size (base);
2035 lbound = array_ref_low_bound (base);
2036
2037 if (!expr_invariant_in_loop_p (loop, step)
2038 || !expr_invariant_in_loop_p (loop, lbound))
2039 return false;
2040 }
2041
8b11a64c
ZD
2042 if (TREE_CODE (*idx) != SSA_NAME)
2043 return true;
2044
2045 iv = get_iv (dta->ivopts_data, *idx);
2046 if (!iv)
2047 return false;
2048
ea643120
RG
2049 /* XXX We produce for a base of *D42 with iv->base being &x[0]
2050 *&x[0], which is not folded and does not trigger the
2051 ARRAY_REF path below. */
8b11a64c
ZD
2052 *idx = iv->base;
2053
6e42ce54 2054 if (integer_zerop (iv->step))
8b11a64c
ZD
2055 return true;
2056
9f7ccf69 2057 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2f4675b4
ZD
2058 {
2059 step = array_ref_element_size (base);
2f4675b4
ZD
2060
2061 /* We only handle addresses whose step is an integer constant. */
2062 if (TREE_CODE (step) != INTEGER_CST)
2063 return false;
2f4675b4 2064 }
8b11a64c 2065 else
5212068f 2066 /* The step for pointer arithmetics already is 1 byte. */
9a9d280e 2067 step = size_one_node;
8b11a64c 2068
d7f5de76
ZD
2069 iv_base = iv->base;
2070 iv_step = iv->step;
c70ed622
BC
2071 if (iv->no_overflow && nowrap_type_p (TREE_TYPE (iv_step)))
2072 use_overflow_semantics = true;
2073
d7f5de76
ZD
2074 if (!convert_affine_scev (dta->ivopts_data->current_loop,
2075 sizetype, &iv_base, &iv_step, dta->stmt,
c70ed622 2076 use_overflow_semantics))
8b11a64c
ZD
2077 {
2078 /* The index might wrap. */
2079 return false;
2080 }
2081
1c1205fb 2082 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
6e42ce54 2083 dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
8b11a64c 2084
e4142529
BC
2085 if (dta->ivopts_data->bivs_not_used_in_addr)
2086 {
2087 if (!iv->biv_p)
2088 iv = find_deriving_biv_for_expr (dta->ivopts_data, iv->ssa_name);
2089
2090 record_biv_for_address_use (dta->ivopts_data, iv);
2091 }
8b11a64c
ZD
2092 return true;
2093}
2094
2095/* Records use in index IDX. Callback for for_each_index. Ivopts data
2096 object is passed to it in DATA. */
2097
2098static bool
2f4675b4 2099idx_record_use (tree base, tree *idx,
c22940cd 2100 void *vdata)
8b11a64c 2101{
c22940cd 2102 struct ivopts_data *data = (struct ivopts_data *) vdata;
8b11a64c 2103 find_interesting_uses_op (data, *idx);
9f7ccf69 2104 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2f4675b4
ZD
2105 {
2106 find_interesting_uses_op (data, array_ref_element_size (base));
2107 find_interesting_uses_op (data, array_ref_low_bound (base));
2108 }
8b11a64c
ZD
2109 return true;
2110}
2111
32159434
CB
2112/* If we can prove that TOP = cst * BOT for some constant cst,
2113 store cst to MUL and return true. Otherwise return false.
2114 The returned value is always sign-extended, regardless of the
2115 signedness of TOP and BOT. */
2116
2117static bool
807e902e 2118constant_multiple_of (tree top, tree bot, widest_int *mul)
32159434
CB
2119{
2120 tree mby;
2121 enum tree_code code;
32159434 2122 unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
807e902e 2123 widest_int res, p0, p1;
32159434
CB
2124
2125 STRIP_NOPS (top);
2126 STRIP_NOPS (bot);
2127
2128 if (operand_equal_p (top, bot, 0))
2129 {
807e902e 2130 *mul = 1;
32159434
CB
2131 return true;
2132 }
2133
2134 code = TREE_CODE (top);
2135 switch (code)
2136 {
2137 case MULT_EXPR:
2138 mby = TREE_OPERAND (top, 1);
2139 if (TREE_CODE (mby) != INTEGER_CST)
2140 return false;
2141
2142 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
2143 return false;
2144
807e902e 2145 *mul = wi::sext (res * wi::to_widest (mby), precision);
32159434
CB
2146 return true;
2147
2148 case PLUS_EXPR:
2149 case MINUS_EXPR:
2150 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
2151 || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
2152 return false;
2153
2154 if (code == MINUS_EXPR)
27bcd47c 2155 p1 = -p1;
807e902e 2156 *mul = wi::sext (p0 + p1, precision);
32159434
CB
2157 return true;
2158
2159 case INTEGER_CST:
2160 if (TREE_CODE (bot) != INTEGER_CST)
2161 return false;
2162
807e902e
KZ
2163 p0 = widest_int::from (top, SIGNED);
2164 p1 = widest_int::from (bot, SIGNED);
2165 if (p1 == 0)
32159434 2166 return false;
807e902e
KZ
2167 *mul = wi::sext (wi::divmod_trunc (p0, p1, SIGNED, &res), precision);
2168 return res == 0;
32159434
CB
2169
2170 default:
2171 return false;
2172 }
2173}
2174
e75fde1a 2175/* Return true if memory reference REF with step STEP may be unaligned. */
0a915e3d
ZD
2176
2177static bool
32159434 2178may_be_unaligned_p (tree ref, tree step)
0a915e3d 2179{
ac182688 2180 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
607fb860 2181 thus they are not misaligned. */
ac182688
ZD
2182 if (TREE_CODE (ref) == TARGET_MEM_REF)
2183 return false;
2184
e75fde1a 2185 unsigned int align = TYPE_ALIGN (TREE_TYPE (ref));
abf30454
RB
2186 if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))) > align)
2187 align = GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref)));
b8698a0f 2188
e75fde1a
EB
2189 unsigned HOST_WIDE_INT bitpos;
2190 unsigned int ref_align;
2191 get_object_alignment_1 (ref, &ref_align, &bitpos);
2192 if (ref_align < align
2193 || (bitpos % align) != 0
2194 || (bitpos % BITS_PER_UNIT) != 0)
2195 return true;
ce276b61 2196
e75fde1a
EB
2197 unsigned int trailing_zeros = tree_ctz (step);
2198 if (trailing_zeros < HOST_BITS_PER_INT
2199 && (1U << trailing_zeros) * BITS_PER_UNIT < align)
2200 return true;
0a915e3d
ZD
2201
2202 return false;
2203}
2204
75715cf6
EB
2205/* Return true if EXPR may be non-addressable. */
2206
bc068a23 2207bool
75715cf6
EB
2208may_be_nonaddressable_p (tree expr)
2209{
2210 switch (TREE_CODE (expr))
2211 {
928bc34f
EB
2212 case TARGET_MEM_REF:
2213 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
2214 target, thus they are always addressable. */
2215 return false;
2216
ee45a32d
EB
2217 case MEM_REF:
2218 /* Likewise for MEM_REFs, modulo the storage order. */
2219 return REF_REVERSE_STORAGE_ORDER (expr);
2220
2221 case BIT_FIELD_REF:
2222 if (REF_REVERSE_STORAGE_ORDER (expr))
2223 return true;
2224 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2225
75715cf6 2226 case COMPONENT_REF:
ee45a32d
EB
2227 if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2228 return true;
75715cf6
EB
2229 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
2230 || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2231
ee45a32d
EB
2232 case ARRAY_REF:
2233 case ARRAY_RANGE_REF:
2234 if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2235 return true;
2236 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2237
75715cf6
EB
2238 case VIEW_CONVERT_EXPR:
2239 /* This kind of view-conversions may wrap non-addressable objects
2240 and make them look addressable. After some processing the
2241 non-addressability may be uncovered again, causing ADDR_EXPRs
2242 of inappropriate objects to be built. */
7a4fbffc
EB
2243 if (is_gimple_reg (TREE_OPERAND (expr, 0))
2244 || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
2245 return true;
7a4fbffc 2246 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
928bc34f 2247
1043771b 2248 CASE_CONVERT:
928bc34f 2249 return true;
75715cf6
EB
2250
2251 default:
2252 break;
2253 }
2254
2255 return false;
2256}
2257
8b11a64c
ZD
2258/* Finds addresses in *OP_P inside STMT. */
2259
2260static void
355fe088
TS
2261find_interesting_uses_address (struct ivopts_data *data, gimple *stmt,
2262 tree *op_p)
8b11a64c 2263{
9a9d280e 2264 tree base = *op_p, step = size_zero_node;
8b11a64c
ZD
2265 struct iv *civ;
2266 struct ifs_ivopts_data ifs_ivopts_data;
2267
e3cc7254
ZD
2268 /* Do not play with volatile memory references. A bit too conservative,
2269 perhaps, but safe. */
726a989a 2270 if (gimple_has_volatile_ops (stmt))
e3cc7254
ZD
2271 goto fail;
2272
8b11a64c
ZD
2273 /* Ignore bitfields for now. Not really something terribly complicated
2274 to handle. TODO. */
75715cf6
EB
2275 if (TREE_CODE (base) == BIT_FIELD_REF)
2276 goto fail;
2277
ac182688
ZD
2278 base = unshare_expr (base);
2279
2280 if (TREE_CODE (base) == TARGET_MEM_REF)
2281 {
2282 tree type = build_pointer_type (TREE_TYPE (base));
2283 tree astep;
2284
2285 if (TMR_BASE (base)
2286 && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
2287 {
2288 civ = get_iv (data, TMR_BASE (base));
2289 if (!civ)
2290 goto fail;
2291
2292 TMR_BASE (base) = civ->base;
2293 step = civ->step;
2294 }
4d948885
RG
2295 if (TMR_INDEX2 (base)
2296 && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
2297 {
2298 civ = get_iv (data, TMR_INDEX2 (base));
2299 if (!civ)
2300 goto fail;
2301
2302 TMR_INDEX2 (base) = civ->base;
2303 step = civ->step;
2304 }
ac182688
ZD
2305 if (TMR_INDEX (base)
2306 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
2307 {
2308 civ = get_iv (data, TMR_INDEX (base));
2309 if (!civ)
2310 goto fail;
8b11a64c 2311
ac182688
ZD
2312 TMR_INDEX (base) = civ->base;
2313 astep = civ->step;
be35cf60 2314
ac182688
ZD
2315 if (astep)
2316 {
2317 if (TMR_STEP (base))
2318 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
2319
6e42ce54 2320 step = fold_build2 (PLUS_EXPR, type, step, astep);
ac182688
ZD
2321 }
2322 }
2323
6e42ce54 2324 if (integer_zerop (step))
ac182688
ZD
2325 goto fail;
2326 base = tree_mem_ref_addr (type, base);
2327 }
2328 else
2329 {
2330 ifs_ivopts_data.ivopts_data = data;
2331 ifs_ivopts_data.stmt = stmt;
9a9d280e 2332 ifs_ivopts_data.step = size_zero_node;
ac182688 2333 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
6e42ce54 2334 || integer_zerop (ifs_ivopts_data.step))
ac182688 2335 goto fail;
6e42ce54 2336 step = ifs_ivopts_data.step;
ac182688 2337
928bc34f
EB
2338 /* Check that the base expression is addressable. This needs
2339 to be done after substituting bases of IVs into it. */
2340 if (may_be_nonaddressable_p (base))
2341 goto fail;
2342
2343 /* Moreover, on strict alignment platforms, check that it is
2344 sufficiently aligned. */
32159434 2345 if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
928bc34f
EB
2346 goto fail;
2347
ac182688 2348 base = build_fold_addr_expr (base);
ea643120
RG
2349
2350 /* Substituting bases of IVs into the base expression might
2351 have caused folding opportunities. */
2352 if (TREE_CODE (base) == ADDR_EXPR)
2353 {
2354 tree *ref = &TREE_OPERAND (base, 0);
2355 while (handled_component_p (*ref))
2356 ref = &TREE_OPERAND (*ref, 0);
70f34814 2357 if (TREE_CODE (*ref) == MEM_REF)
cb6b911a 2358 {
bcf71673
RG
2359 tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
2360 TREE_OPERAND (*ref, 0),
2361 TREE_OPERAND (*ref, 1));
cb6b911a
RG
2362 if (tem)
2363 *ref = tem;
2364 }
ea643120 2365 }
ac182688 2366 }
8b11a64c 2367
6f929985 2368 civ = alloc_iv (data, base, step);
653a4b32
BC
2369 /* Fail if base object of this memory reference is unknown. */
2370 if (civ->base_object == NULL_TREE)
2371 goto fail;
2372
a7e43c57 2373 record_group_use (data, op_p, civ, stmt, USE_ADDRESS);
8b11a64c
ZD
2374 return;
2375
2376fail:
2377 for_each_index (op_p, idx_record_use, data);
2378}
2379
2380/* Finds and records invariants used in STMT. */
2381
2382static void
355fe088 2383find_invariants_stmt (struct ivopts_data *data, gimple *stmt)
8b11a64c 2384{
f47c96aa
AM
2385 ssa_op_iter iter;
2386 use_operand_p use_p;
8b11a64c
ZD
2387 tree op;
2388
f47c96aa 2389 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
8b11a64c 2390 {
f47c96aa 2391 op = USE_FROM_PTR (use_p);
8b11a64c
ZD
2392 record_invariant (data, op, false);
2393 }
2394}
2395
2396/* Finds interesting uses of induction variables in the statement STMT. */
2397
2398static void
355fe088 2399find_interesting_uses_stmt (struct ivopts_data *data, gimple *stmt)
8b11a64c
ZD
2400{
2401 struct iv *iv;
726a989a 2402 tree op, *lhs, *rhs;
f47c96aa
AM
2403 ssa_op_iter iter;
2404 use_operand_p use_p;
726a989a 2405 enum tree_code code;
8b11a64c
ZD
2406
2407 find_invariants_stmt (data, stmt);
2408
726a989a 2409 if (gimple_code (stmt) == GIMPLE_COND)
8b11a64c 2410 {
726a989a 2411 find_interesting_uses_cond (data, stmt);
8b11a64c
ZD
2412 return;
2413 }
2414
726a989a 2415 if (is_gimple_assign (stmt))
8b11a64c 2416 {
726a989a
RB
2417 lhs = gimple_assign_lhs_ptr (stmt);
2418 rhs = gimple_assign_rhs1_ptr (stmt);
8b11a64c 2419
726a989a 2420 if (TREE_CODE (*lhs) == SSA_NAME)
8b11a64c
ZD
2421 {
2422 /* If the statement defines an induction variable, the uses are not
2423 interesting by themselves. */
2424
726a989a 2425 iv = get_iv (data, *lhs);
8b11a64c 2426
6e42ce54 2427 if (iv && !integer_zerop (iv->step))
8b11a64c
ZD
2428 return;
2429 }
2430
726a989a
RB
2431 code = gimple_assign_rhs_code (stmt);
2432 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
2433 && (REFERENCE_CLASS_P (*rhs)
2434 || is_gimple_val (*rhs)))
8b11a64c 2435 {
726a989a
RB
2436 if (REFERENCE_CLASS_P (*rhs))
2437 find_interesting_uses_address (data, stmt, rhs);
2438 else
2439 find_interesting_uses_op (data, *rhs);
8b11a64c 2440
726a989a
RB
2441 if (REFERENCE_CLASS_P (*lhs))
2442 find_interesting_uses_address (data, stmt, lhs);
8b11a64c 2443 return;
8b11a64c 2444 }
726a989a 2445 else if (TREE_CODE_CLASS (code) == tcc_comparison)
8b11a64c 2446 {
726a989a 2447 find_interesting_uses_cond (data, stmt);
8b11a64c
ZD
2448 return;
2449 }
2f4675b4
ZD
2450
2451 /* TODO -- we should also handle address uses of type
2452
2453 memory = call (whatever);
2454
2455 and
2456
2457 call (memory). */
8b11a64c
ZD
2458 }
2459
726a989a
RB
2460 if (gimple_code (stmt) == GIMPLE_PHI
2461 && gimple_bb (stmt) == data->current_loop->header)
8b11a64c 2462 {
726a989a 2463 iv = get_iv (data, PHI_RESULT (stmt));
8b11a64c 2464
6e42ce54 2465 if (iv && !integer_zerop (iv->step))
8b11a64c
ZD
2466 return;
2467 }
2468
f47c96aa 2469 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
8b11a64c 2470 {
f47c96aa 2471 op = USE_FROM_PTR (use_p);
8b11a64c
ZD
2472
2473 if (TREE_CODE (op) != SSA_NAME)
2474 continue;
2475
2476 iv = get_iv (data, op);
2477 if (!iv)
2478 continue;
2479
2480 find_interesting_uses_op (data, op);
2481 }
2482}
2483
2484/* Finds interesting uses of induction variables outside of loops
2485 on loop exit edge EXIT. */
2486
2487static void
2488find_interesting_uses_outside (struct ivopts_data *data, edge exit)
2489{
538dd0b7
DM
2490 gphi *phi;
2491 gphi_iterator psi;
726a989a 2492 tree def;
8b11a64c 2493
726a989a 2494 for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
8b11a64c 2495 {
538dd0b7 2496 phi = psi.phi ();
8b11a64c 2497 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
ea057359 2498 if (!virtual_operand_p (def))
623b8e0a 2499 find_interesting_uses_op (data, def);
8b11a64c
ZD
2500 }
2501}
2502
359b060e
BC
2503/* Return TRUE if OFFSET is within the range of [base + offset] addressing
2504 mode for memory reference represented by USE. */
a7e43c57 2505
359b060e
BC
2506static GTY (()) vec<rtx, va_gc> *addr_list;
2507
2508static bool
2509addr_offset_valid_p (struct iv_use *use, HOST_WIDE_INT offset)
a7e43c57 2510{
a7e43c57 2511 rtx reg, addr;
359b060e
BC
2512 unsigned list_index;
2513 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
2514 machine_mode addr_mode, mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
a7e43c57 2515
a7e43c57 2516 list_index = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
359b060e
BC
2517 if (list_index >= vec_safe_length (addr_list))
2518 vec_safe_grow_cleared (addr_list, list_index + MAX_MACHINE_MODE);
a7e43c57 2519
359b060e
BC
2520 addr = (*addr_list)[list_index];
2521 if (!addr)
a7e43c57 2522 {
359b060e
BC
2523 addr_mode = targetm.addr_space.address_mode (as);
2524 reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
2525 addr = gen_rtx_fmt_ee (PLUS, addr_mode, reg, NULL_RTX);
2526 (*addr_list)[list_index] = addr;
a7e43c57 2527 }
359b060e
BC
2528 else
2529 addr_mode = GET_MODE (addr);
a7e43c57 2530
359b060e
BC
2531 XEXP (addr, 1) = gen_int_mode (offset, addr_mode);
2532 return (memory_address_addr_space_p (mem_mode, addr, as));
a7e43c57
BC
2533}
2534
309a0cf6 2535/* Comparison function to sort group in ascending order of addr_offset. */
a7e43c57 2536
309a0cf6
BC
2537static int
2538group_compare_offset (const void *a, const void *b)
2539{
2540 const struct iv_use *const *u1 = (const struct iv_use *const *) a;
2541 const struct iv_use *const *u2 = (const struct iv_use *const *) b;
2542
2543 if ((*u1)->addr_offset != (*u2)->addr_offset)
2544 return (*u1)->addr_offset < (*u2)->addr_offset ? -1 : 1;
2545 else
2546 return 0;
2547}
a7e43c57 2548
309a0cf6
BC
2549/* Check if small groups should be split. Return true if no group
2550 contains more than two uses with distinct addr_offsets. Return
2551 false otherwise. We want to split such groups because:
a7e43c57
BC
2552
2553 1) Small groups don't have much benefit and may interfer with
2554 general candidate selection.
2555 2) Size for problem with only small groups is usually small and
2556 general algorithm can handle it well.
2557
309a0cf6
BC
2558 TODO -- Above claim may not hold when we want to merge memory
2559 accesses with conseuctive addresses. */
a7e43c57
BC
2560
2561static bool
309a0cf6 2562split_small_address_groups_p (struct ivopts_data *data)
a7e43c57 2563{
309a0cf6
BC
2564 unsigned int i, j, distinct = 1;
2565 struct iv_use *pre;
2566 struct iv_group *group;
a7e43c57 2567
309a0cf6 2568 for (i = 0; i < data->vgroups.length (); i++)
a7e43c57 2569 {
309a0cf6
BC
2570 group = data->vgroups[i];
2571 if (group->vuses.length () == 1)
2572 continue;
2573
2574 gcc_assert (group->type == USE_ADDRESS);
2575 if (group->vuses.length () == 2)
2576 {
2577 if (group->vuses[0]->addr_offset > group->vuses[1]->addr_offset)
2578 std::swap (group->vuses[0], group->vuses[1]);
2579 }
2580 else
2581 group->vuses.qsort (group_compare_offset);
2582
2583 if (distinct > 2)
a7e43c57
BC
2584 continue;
2585
2586 distinct = 1;
309a0cf6 2587 for (pre = group->vuses[0], j = 1; j < group->vuses.length (); j++)
a7e43c57 2588 {
309a0cf6
BC
2589 if (group->vuses[j]->addr_offset != pre->addr_offset)
2590 {
2591 pre = group->vuses[j];
2592 distinct++;
2593 }
a7e43c57
BC
2594
2595 if (distinct > 2)
309a0cf6 2596 break;
a7e43c57 2597 }
a7e43c57
BC
2598 }
2599
309a0cf6 2600 return (distinct <= 2);
a7e43c57
BC
2601}
2602
2603/* For each group of address type uses, this function further groups
2604 these uses according to the maximum offset supported by target's
2605 [base + offset] addressing mode. */
2606
2607static void
309a0cf6 2608split_address_groups (struct ivopts_data *data)
a7e43c57 2609{
309a0cf6 2610 unsigned int i, j;
359b060e
BC
2611 /* Always split group. */
2612 bool split_p = split_small_address_groups_p (data);
a7e43c57 2613
309a0cf6 2614 for (i = 0; i < data->vgroups.length (); i++)
a7e43c57 2615 {
359b060e 2616 struct iv_group *new_group = NULL;
309a0cf6
BC
2617 struct iv_group *group = data->vgroups[i];
2618 struct iv_use *use = group->vuses[0];
2619
2620 use->id = 0;
2621 use->group_id = group->id;
2622 if (group->vuses.length () == 1)
a7e43c57
BC
2623 continue;
2624
359b060e 2625 gcc_assert (group->type == USE_ADDRESS);
a7e43c57 2626
359b060e 2627 for (j = 1; j < group->vuses.length ();)
a7e43c57 2628 {
309a0cf6 2629 struct iv_use *next = group->vuses[j];
359b060e 2630 HOST_WIDE_INT offset = next->addr_offset - use->addr_offset;
309a0cf6 2631
359b060e
BC
2632 /* Split group if aksed to, or the offset against the first
2633 use can't fit in offset part of addressing mode. IV uses
2634 having the same offset are still kept in one group. */
2635 if (offset != 0 &&
2636 (split_p || !addr_offset_valid_p (use, offset)))
2637 {
2638 if (!new_group)
2639 new_group = record_group (data, group->type);
2640 group->vuses.ordered_remove (j);
2641 new_group->vuses.safe_push (next);
2642 continue;
2643 }
309a0cf6
BC
2644
2645 next->id = j;
2646 next->group_id = group->id;
359b060e 2647 j++;
309a0cf6
BC
2648 }
2649 }
2650}
2651
2652/* Finds uses of the induction variables that are interesting. */
2653
2654static void
2655find_interesting_uses (struct ivopts_data *data)
2656{
2657 basic_block bb;
2658 gimple_stmt_iterator bsi;
2659 basic_block *body = get_loop_body (data->current_loop);
2660 unsigned i;
2661 edge e;
2662
2663 for (i = 0; i < data->current_loop->num_nodes; i++)
2664 {
2665 edge_iterator ei;
2666 bb = body[i];
2667
2668 FOR_EACH_EDGE (e, ei, bb->succs)
2669 if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2670 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2671 find_interesting_uses_outside (data, e);
2672
2673 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2674 find_interesting_uses_stmt (data, gsi_stmt (bsi));
2675 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2676 if (!is_gimple_debug (gsi_stmt (bsi)))
2677 find_interesting_uses_stmt (data, gsi_stmt (bsi));
2678 }
e97cac02 2679 free (body);
309a0cf6
BC
2680
2681 split_address_groups (data);
2682
2683 if (dump_file && (dump_flags & TDF_DETAILS))
2684 {
309a0cf6
BC
2685 fprintf (dump_file, "\n<IV Groups>:\n");
2686 dump_groups (dump_file, data);
2687 fprintf (dump_file, "\n");
a7e43c57 2688 }
a7e43c57
BC
2689}
2690
f5e2738c 2691/* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
9be872b7
ZD
2692 is true, assume we are inside an address. If TOP_COMPREF is true, assume
2693 we are at the top-level of the processed address. */
f5e2738c
ZD
2694
2695static tree
9be872b7 2696strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
58fe50d5 2697 HOST_WIDE_INT *offset)
f5e2738c 2698{
9be872b7 2699 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
f5e2738c
ZD
2700 enum tree_code code;
2701 tree type, orig_type = TREE_TYPE (expr);
58fe50d5 2702 HOST_WIDE_INT off0, off1, st;
f5e2738c
ZD
2703 tree orig_expr = expr;
2704
2705 STRIP_NOPS (expr);
9be872b7 2706
f5e2738c
ZD
2707 type = TREE_TYPE (expr);
2708 code = TREE_CODE (expr);
2709 *offset = 0;
2710
2711 switch (code)
2712 {
2713 case INTEGER_CST:
2714 if (!cst_and_fits_in_hwi (expr)
6e682d7e 2715 || integer_zerop (expr))
f5e2738c
ZD
2716 return orig_expr;
2717
2718 *offset = int_cst_value (expr);
ff5e9a94 2719 return build_int_cst (orig_type, 0);
f5e2738c 2720
2d1a1007 2721 case POINTER_PLUS_EXPR:
f5e2738c
ZD
2722 case PLUS_EXPR:
2723 case MINUS_EXPR:
2724 op0 = TREE_OPERAND (expr, 0);
2725 op1 = TREE_OPERAND (expr, 1);
2726
9be872b7
ZD
2727 op0 = strip_offset_1 (op0, false, false, &off0);
2728 op1 = strip_offset_1 (op1, false, false, &off1);
f5e2738c 2729
2d1a1007 2730 *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
f5e2738c
ZD
2731 if (op0 == TREE_OPERAND (expr, 0)
2732 && op1 == TREE_OPERAND (expr, 1))
2733 return orig_expr;
2734
6e682d7e 2735 if (integer_zerop (op1))
f5e2738c 2736 expr = op0;
6e682d7e 2737 else if (integer_zerop (op0))
f5e2738c 2738 {
2d1a1007 2739 if (code == MINUS_EXPR)
9be872b7 2740 expr = fold_build1 (NEGATE_EXPR, type, op1);
2d1a1007
AP
2741 else
2742 expr = op1;
f5e2738c
ZD
2743 }
2744 else
9be872b7 2745 expr = fold_build2 (code, type, op0, op1);
f5e2738c
ZD
2746
2747 return fold_convert (orig_type, expr);
2748
7a2faca1
EB
2749 case MULT_EXPR:
2750 op1 = TREE_OPERAND (expr, 1);
2751 if (!cst_and_fits_in_hwi (op1))
2752 return orig_expr;
2753
2754 op0 = TREE_OPERAND (expr, 0);
2755 op0 = strip_offset_1 (op0, false, false, &off0);
2756 if (op0 == TREE_OPERAND (expr, 0))
2757 return orig_expr;
2758
2759 *offset = off0 * int_cst_value (op1);
2760 if (integer_zerop (op0))
2761 expr = op0;
2762 else
2763 expr = fold_build2 (MULT_EXPR, type, op0, op1);
2764
2765 return fold_convert (orig_type, expr);
2766
f5e2738c 2767 case ARRAY_REF:
9f7ccf69 2768 case ARRAY_RANGE_REF:
f5e2738c
ZD
2769 if (!inside_addr)
2770 return orig_expr;
2771
2772 step = array_ref_element_size (expr);
2773 if (!cst_and_fits_in_hwi (step))
2774 break;
2775
2776 st = int_cst_value (step);
2777 op1 = TREE_OPERAND (expr, 1);
9be872b7 2778 op1 = strip_offset_1 (op1, false, false, &off1);
f5e2738c 2779 *offset = off1 * st;
9be872b7
ZD
2780
2781 if (top_compref
6e682d7e 2782 && integer_zerop (op1))
9be872b7
ZD
2783 {
2784 /* Strip the component reference completely. */
2785 op0 = TREE_OPERAND (expr, 0);
2786 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2787 *offset += off0;
2788 return op0;
2789 }
f5e2738c
ZD
2790 break;
2791
2792 case COMPONENT_REF:
58fe50d5
BC
2793 {
2794 tree field;
2795
2796 if (!inside_addr)
2797 return orig_expr;
2798
2799 tmp = component_ref_field_offset (expr);
2800 field = TREE_OPERAND (expr, 1);
2801 if (top_compref
2802 && cst_and_fits_in_hwi (tmp)
2803 && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2804 {
2805 HOST_WIDE_INT boffset, abs_off;
2806
2807 /* Strip the component reference completely. */
2808 op0 = TREE_OPERAND (expr, 0);
2809 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2810 boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2811 abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2812 if (boffset < 0)
2813 abs_off = -abs_off;
2814
2815 *offset = off0 + int_cst_value (tmp) + abs_off;
2816 return op0;
2817 }
2818 }
f5e2738c
ZD
2819 break;
2820
2821 case ADDR_EXPR:
9be872b7
ZD
2822 op0 = TREE_OPERAND (expr, 0);
2823 op0 = strip_offset_1 (op0, true, true, &off0);
2824 *offset += off0;
2825
2826 if (op0 == TREE_OPERAND (expr, 0))
2827 return orig_expr;
2828
d5dc1717 2829 expr = build_fold_addr_expr (op0);
9be872b7
ZD
2830 return fold_convert (orig_type, expr);
2831
70f34814
RG
2832 case MEM_REF:
2833 /* ??? Offset operand? */
9be872b7 2834 inside_addr = false;
f5e2738c
ZD
2835 break;
2836
2837 default:
2838 return orig_expr;
2839 }
2840
2841 /* Default handling of expressions for that we want to recurse into
2842 the first operand. */
2843 op0 = TREE_OPERAND (expr, 0);
9be872b7 2844 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
f5e2738c
ZD
2845 *offset += off0;
2846
2847 if (op0 == TREE_OPERAND (expr, 0)
2848 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2849 return orig_expr;
2850
2851 expr = copy_node (expr);
2852 TREE_OPERAND (expr, 0) = op0;
2853 if (op1)
2854 TREE_OPERAND (expr, 1) = op1;
2855
9be872b7 2856 /* Inside address, we might strip the top level component references,
0fa2e4df 2857 thus changing type of the expression. Handling of ADDR_EXPR
9be872b7
ZD
2858 will fix that. */
2859 expr = fold_convert (orig_type, expr);
2860
2861 return expr;
2862}
2863
2864/* Strips constant offsets from EXPR and stores them to OFFSET. */
2865
2866static tree
2867strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2868{
58fe50d5
BC
2869 HOST_WIDE_INT off;
2870 tree core = strip_offset_1 (expr, false, false, &off);
2871 *offset = off;
2872 return core;
f5e2738c
ZD
2873}
2874
d482f417 2875/* Returns variant of TYPE that can be used as base for different uses.
20527215
ZD
2876 We return unsigned type with the same precision, which avoids problems
2877 with overflows. */
d482f417
ZD
2878
2879static tree
2880generic_type_for (tree type)
2881{
2882 if (POINTER_TYPE_P (type))
20527215 2883 return unsigned_type_for (type);
d482f417
ZD
2884
2885 if (TYPE_UNSIGNED (type))
2886 return type;
2887
2888 return unsigned_type_for (type);
2889}
2890
1c52c69f
BC
2891/* Private data for walk_tree. */
2892
2893struct walk_tree_data
2894{
2895 bitmap *inv_vars;
2896 struct ivopts_data *idata;
2897};
2898
2899/* Callback function for walk_tree, it records invariants and symbol
2900 reference in *EXPR_P. DATA is the structure storing result info. */
9be872b7 2901
9be872b7 2902static tree
1c52c69f 2903find_inv_vars_cb (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
9be872b7 2904{
e97cac02 2905 tree op = *expr_p;
9be872b7 2906 struct version_info *info;
e97cac02 2907 struct walk_tree_data *wdata = (struct walk_tree_data*) data;
9be872b7 2908
e97cac02 2909 if (TREE_CODE (op) != SSA_NAME)
9be872b7 2910 return NULL_TREE;
9be872b7 2911
e97cac02
BC
2912 info = name_info (wdata->idata, op);
2913 /* Because we expand simple operations when finding IVs, loop invariant
2914 variable that isn't referred by the original loop could be used now.
2915 Record such invariant variables here. */
2916 if (!info->iv)
2917 {
2918 struct ivopts_data *idata = wdata->idata;
2919 basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (op));
2920
2921 if (!bb || !flow_bb_inside_loop_p (idata->current_loop, bb))
2922 {
2923 set_iv (idata, op, op, build_int_cst (TREE_TYPE (op), 0), true);
2924 record_invariant (idata, op, false);
2925 }
2926 }
9be872b7
ZD
2927 if (!info->inv_id || info->has_nonlin_use)
2928 return NULL_TREE;
2929
1c52c69f
BC
2930 if (!*wdata->inv_vars)
2931 *wdata->inv_vars = BITMAP_ALLOC (NULL);
2932 bitmap_set_bit (*wdata->inv_vars, info->inv_id);
9be872b7
ZD
2933
2934 return NULL_TREE;
2935}
2936
1c52c69f
BC
2937/* Records invariants in *EXPR_P. INV_VARS is the bitmap to that we should
2938 store it. */
2939
2940static inline void
2941find_inv_vars (struct ivopts_data *data, tree *expr_p, bitmap *inv_vars)
2942{
2943 struct walk_tree_data wdata;
2944
2945 if (!inv_vars)
2946 return;
2947
2948 wdata.idata = data;
2949 wdata.inv_vars = inv_vars;
2950 walk_tree (expr_p, find_inv_vars_cb, &wdata, NULL);
2951}
2952
f9f69dd6
BC
2953/* Get entry from invariant expr hash table for INV_EXPR. New entry
2954 will be recorded if it doesn't exist yet. Given below two exprs:
2955 inv_expr + cst1, inv_expr + cst2
2956 It's hard to make decision whether constant part should be stripped
2957 or not. We choose to not strip based on below facts:
2958 1) We need to count ADD cost for constant part if it's stripped,
2959 which is't always trivial where this functions is called.
2960 2) Stripping constant away may be conflict with following loop
2961 invariant hoisting pass.
2962 3) Not stripping constant away results in more invariant exprs,
2963 which usually leads to decision preferring lower reg pressure. */
2964
2965static iv_inv_expr_ent *
2966get_loop_invariant_expr (struct ivopts_data *data, tree inv_expr)
2967{
2968 STRIP_NOPS (inv_expr);
2969
2970 if (TREE_CODE (inv_expr) == INTEGER_CST || TREE_CODE (inv_expr) == SSA_NAME)
2971 return NULL;
2972
2973 /* Don't strip constant part away as we used to. */
2974
2975 /* Stores EXPR in DATA->inv_expr_tab, return pointer to iv_inv_expr_ent. */
2976 struct iv_inv_expr_ent ent;
2977 ent.expr = inv_expr;
2978 ent.hash = iterative_hash_expr (inv_expr, 0);
2979 struct iv_inv_expr_ent **slot = data->inv_expr_tab->find_slot (&ent, INSERT);
2980
2981 if (!*slot)
2982 {
2983 *slot = XNEW (struct iv_inv_expr_ent);
2984 (*slot)->expr = inv_expr;
2985 (*slot)->hash = ent.hash;
2986 (*slot)->id = ++data->max_inv_expr_id;
2987 }
2988
2989 return *slot;
2990}
2991
8b11a64c
ZD
2992/* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2993 position to POS. If USE is not NULL, the candidate is set as related to
2994 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
2995 replacement of the final value of the iv by a direct computation. */
2996
2997static struct iv_cand *
2998add_candidate_1 (struct ivopts_data *data,
2999 tree base, tree step, bool important, enum iv_position pos,
355fe088 3000 struct iv_use *use, gimple *incremented_at,
e4142529 3001 struct iv *orig_iv = NULL)
8b11a64c
ZD
3002{
3003 unsigned i;
3004 struct iv_cand *cand = NULL;
d482f417 3005 tree type, orig_type;
b8698a0f 3006
309a0cf6
BC
3007 gcc_assert (base && step);
3008
1a218fc9
ILT
3009 /* -fkeep-gc-roots-live means that we have to keep a real pointer
3010 live, but the ivopts code may replace a real pointer with one
3011 pointing before or after the memory block that is then adjusted
3012 into the memory block during the loop. FIXME: It would likely be
3013 better to actually force the pointer live and still use ivopts;
3014 for example, it would be enough to write the pointer into memory
3015 and keep it there until after the loop. */
3016 if (flag_keep_gc_roots_live && POINTER_TYPE_P (TREE_TYPE (base)))
3017 return NULL;
3018
d8af4ba3
ZD
3019 /* For non-original variables, make sure their values are computed in a type
3020 that does not invoke undefined behavior on overflows (since in general,
3021 we cannot prove that these induction variables are non-wrapping). */
3022 if (pos != IP_ORIGINAL)
8b11a64c 3023 {
d482f417
ZD
3024 orig_type = TREE_TYPE (base);
3025 type = generic_type_for (orig_type);
71adbef3 3026 if (type != orig_type)
8b11a64c 3027 {
8b11a64c 3028 base = fold_convert (type, base);
6e42ce54 3029 step = fold_convert (type, step);
8b11a64c
ZD
3030 }
3031 }
3032
309a0cf6 3033 for (i = 0; i < data->vcands.length (); i++)
8b11a64c 3034 {
309a0cf6 3035 cand = data->vcands[i];
8b11a64c
ZD
3036
3037 if (cand->pos != pos)
3038 continue;
3039
2c08497a
BS
3040 if (cand->incremented_at != incremented_at
3041 || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
3042 && cand->ainc_use != use))
8b11a64c
ZD
3043 continue;
3044
6e42ce54 3045 if (operand_equal_p (base, cand->iv->base, 0)
18081149 3046 && operand_equal_p (step, cand->iv->step, 0)
623b8e0a
ML
3047 && (TYPE_PRECISION (TREE_TYPE (base))
3048 == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
6e42ce54 3049 break;
8b11a64c
ZD
3050 }
3051
309a0cf6 3052 if (i == data->vcands.length ())
8b11a64c 3053 {
5ed6ace5 3054 cand = XCNEW (struct iv_cand);
8b11a64c 3055 cand->id = i;
309a0cf6 3056 cand->iv = alloc_iv (data, base, step);
8b11a64c 3057 cand->pos = pos;
309a0cf6 3058 if (pos != IP_ORIGINAL)
8b11a64c
ZD
3059 {
3060 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
3061 cand->var_after = cand->var_before;
3062 }
3063 cand->important = important;
3064 cand->incremented_at = incremented_at;
309a0cf6 3065 data->vcands.safe_push (cand);
8b11a64c 3066
309a0cf6 3067 if (TREE_CODE (step) != INTEGER_CST)
4c11bdff
BC
3068 {
3069 find_inv_vars (data, &step, &cand->inv_vars);
3070
3071 iv_inv_expr_ent *inv_expr = get_loop_invariant_expr (data, step);
3072 /* Share bitmap between inv_vars and inv_exprs for cand. */
3073 if (inv_expr != NULL)
3074 {
3075 cand->inv_exprs = cand->inv_vars;
3076 cand->inv_vars = NULL;
3077 if (cand->inv_exprs)
3078 bitmap_clear (cand->inv_exprs);
3079 else
3080 cand->inv_exprs = BITMAP_ALLOC (NULL);
3081
3082 bitmap_set_bit (cand->inv_exprs, inv_expr->id);
3083 }
3084 }
9be872b7 3085
2c08497a
BS
3086 if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
3087 cand->ainc_use = use;
3088 else
3089 cand->ainc_use = NULL;
3090
e4142529 3091 cand->orig_iv = orig_iv;
8b11a64c
ZD
3092 if (dump_file && (dump_flags & TDF_DETAILS))
3093 dump_cand (dump_file, cand);
3094 }
3095
309a0cf6 3096 cand->important |= important;
8b11a64c 3097
309a0cf6 3098 /* Relate candidate to the group for which it is added. */
8b11a64c 3099 if (use)
309a0cf6 3100 bitmap_set_bit (data->vgroups[use->group_id]->related_cands, i);
8b11a64c
ZD
3101
3102 return cand;
3103}
3104
4366cf6d
ZD
3105/* Returns true if incrementing the induction variable at the end of the LOOP
3106 is allowed.
3107
3108 The purpose is to avoid splitting latch edge with a biv increment, thus
3109 creating a jump, possibly confusing other optimization passes and leaving
ac5344e0
BC
3110 less freedom to scheduler. So we allow IP_END only if IP_NORMAL is not
3111 available (so we do not have a better alternative), or if the latch edge
3112 is already nonempty. */
4366cf6d
ZD
3113
3114static bool
3115allow_ip_end_pos_p (struct loop *loop)
3116{
3117 if (!ip_normal_pos (loop))
3118 return true;
3119
3120 if (!empty_block_p (ip_end_pos (loop)))
3121 return true;
3122
3123 return false;
3124}
3125
2c08497a
BS
3126/* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
3127 Important field is set to IMPORTANT. */
3128
3129static void
3130add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
3131 bool important, struct iv_use *use)
3132{
3133 basic_block use_bb = gimple_bb (use->stmt);
ef4bddc2 3134 machine_mode mem_mode;
2c08497a
BS
3135 unsigned HOST_WIDE_INT cstepi;
3136
3137 /* If we insert the increment in any position other than the standard
3138 ones, we must ensure that it is incremented once per iteration.
3139 It must not be in an inner nested loop, or one side of an if
3140 statement. */
3141 if (use_bb->loop_father != data->current_loop
3142 || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
3143 || stmt_could_throw_p (use->stmt)
3144 || !cst_and_fits_in_hwi (step))
3145 return;
3146
3147 cstepi = int_cst_value (step);
3148
3149 mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
8875e939
RR
3150 if (((USE_LOAD_PRE_INCREMENT (mem_mode)
3151 || USE_STORE_PRE_INCREMENT (mem_mode))
3152 && GET_MODE_SIZE (mem_mode) == cstepi)
3153 || ((USE_LOAD_PRE_DECREMENT (mem_mode)
3154 || USE_STORE_PRE_DECREMENT (mem_mode))
3155 && GET_MODE_SIZE (mem_mode) == -cstepi))
2c08497a
BS
3156 {
3157 enum tree_code code = MINUS_EXPR;
3158 tree new_base;
3159 tree new_step = step;
3160
3161 if (POINTER_TYPE_P (TREE_TYPE (base)))
3162 {
3163 new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
3164 code = POINTER_PLUS_EXPR;
3165 }
3166 else
3167 new_step = fold_convert (TREE_TYPE (base), new_step);
3168 new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
3169 add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
3170 use->stmt);
3171 }
8875e939
RR
3172 if (((USE_LOAD_POST_INCREMENT (mem_mode)
3173 || USE_STORE_POST_INCREMENT (mem_mode))
3174 && GET_MODE_SIZE (mem_mode) == cstepi)
3175 || ((USE_LOAD_POST_DECREMENT (mem_mode)
3176 || USE_STORE_POST_DECREMENT (mem_mode))
3177 && GET_MODE_SIZE (mem_mode) == -cstepi))
2c08497a
BS
3178 {
3179 add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
3180 use->stmt);
3181 }
3182}
3183
8b11a64c
ZD
3184/* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
3185 position to POS. If USE is not NULL, the candidate is set as related to
4c3b378b
BC
3186 it. The candidate computation is scheduled before exit condition and at
3187 the end of loop. */
8b11a64c
ZD
3188
3189static void
b8698a0f 3190add_candidate (struct ivopts_data *data,
e4142529
BC
3191 tree base, tree step, bool important, struct iv_use *use,
3192 struct iv *orig_iv = NULL)
8b11a64c
ZD
3193{
3194 if (ip_normal_pos (data->current_loop))
e4142529
BC
3195 add_candidate_1 (data, base, step, important,
3196 IP_NORMAL, use, NULL, orig_iv);
4366cf6d
ZD
3197 if (ip_end_pos (data->current_loop)
3198 && allow_ip_end_pos_p (data->current_loop))
e4142529 3199 add_candidate_1 (data, base, step, important, IP_END, use, NULL, orig_iv);
8b11a64c
ZD
3200}
3201
3202/* Adds standard iv candidates. */
3203
3204static void
3205add_standard_iv_candidates (struct ivopts_data *data)
3206{
0f250839
RG
3207 add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
3208
3209 /* The same for a double-integer type if it is still fast enough. */
3210 if (TYPE_PRECISION
623b8e0a 3211 (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
0f250839
RG
3212 && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
3213 add_candidate (data, build_int_cst (long_integer_type_node, 0),
3214 build_int_cst (long_integer_type_node, 1), true, NULL);
8b11a64c 3215
39b4020c 3216 /* The same for a double-integer type if it is still fast enough. */
0f250839 3217 if (TYPE_PRECISION
623b8e0a 3218 (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
0f250839
RG
3219 && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
3220 add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
3221 build_int_cst (long_long_integer_type_node, 1), true, NULL);
8b11a64c
ZD
3222}
3223
3224
3225/* Adds candidates bases on the old induction variable IV. */
3226
3227static void
4c3b378b 3228add_iv_candidate_for_biv (struct ivopts_data *data, struct iv *iv)
8b11a64c 3229{
355fe088 3230 gimple *phi;
726a989a 3231 tree def;
8b11a64c
ZD
3232 struct iv_cand *cand;
3233
e4142529
BC
3234 /* Check if this biv is used in address type use. */
3235 if (iv->no_overflow && iv->have_address_use
3236 && INTEGRAL_TYPE_P (TREE_TYPE (iv->base))
3237 && TYPE_PRECISION (TREE_TYPE (iv->base)) < TYPE_PRECISION (sizetype))
3238 {
3239 tree base = fold_convert (sizetype, iv->base);
3240 tree step = fold_convert (sizetype, iv->step);
3241
3242 /* Add iv cand of same precision as index part in TARGET_MEM_REF. */
3243 add_candidate (data, base, step, true, NULL, iv);
3244 /* Add iv cand of the original type only if it has nonlinear use. */
309a0cf6 3245 if (iv->nonlin_use)
e4142529
BC
3246 add_candidate (data, iv->base, iv->step, true, NULL);
3247 }
3248 else
3249 add_candidate (data, iv->base, iv->step, true, NULL);
8b11a64c
ZD
3250
3251 /* The same, but with initial value zero. */
1a00e5f7
JJ
3252 if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
3253 add_candidate (data, size_int (0), iv->step, true, NULL);
3254 else
3255 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
3256 iv->step, true, NULL);
8b11a64c
ZD
3257
3258 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
726a989a 3259 if (gimple_code (phi) == GIMPLE_PHI)
8b11a64c
ZD
3260 {
3261 /* Additionally record the possibility of leaving the original iv
3262 untouched. */
3263 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
b83b5507
BC
3264 /* Don't add candidate if it's from another PHI node because
3265 it's an affine iv appearing in the form of PEELED_CHREC. */
3266 phi = SSA_NAME_DEF_STMT (def);
3267 if (gimple_code (phi) != GIMPLE_PHI)
3268 {
3269 cand = add_candidate_1 (data,
3270 iv->base, iv->step, true, IP_ORIGINAL, NULL,
3271 SSA_NAME_DEF_STMT (def));
1a218fc9
ILT
3272 if (cand)
3273 {
3274 cand->var_before = iv->ssa_name;
3275 cand->var_after = def;
3276 }
b83b5507
BC
3277 }
3278 else
3279 gcc_assert (gimple_bb (phi) == data->current_loop->header);
8b11a64c
ZD
3280 }
3281}
3282
3283/* Adds candidates based on the old induction variables. */
3284
3285static void
4c3b378b 3286add_iv_candidate_for_bivs (struct ivopts_data *data)
8b11a64c
ZD
3287{
3288 unsigned i;
3289 struct iv *iv;
87c476a2 3290 bitmap_iterator bi;
8b11a64c 3291
87c476a2 3292 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
8b11a64c
ZD
3293 {
3294 iv = ver_info (data, i)->iv;
6e42ce54 3295 if (iv && iv->biv_p && !integer_zerop (iv->step))
4c3b378b 3296 add_iv_candidate_for_biv (data, iv);
87c476a2 3297 }
8b11a64c
ZD
3298}
3299
cf5b92ef
BC
3300/* Record common candidate {BASE, STEP} derived from USE in hashtable. */
3301
3302static void
3303record_common_cand (struct ivopts_data *data, tree base,
3304 tree step, struct iv_use *use)
3305{
3306 struct iv_common_cand ent;
3307 struct iv_common_cand **slot;
3308
cf5b92ef
BC
3309 ent.base = base;
3310 ent.step = step;
3311 ent.hash = iterative_hash_expr (base, 0);
3312 ent.hash = iterative_hash_expr (step, ent.hash);
3313
3314 slot = data->iv_common_cand_tab->find_slot (&ent, INSERT);
3315 if (*slot == NULL)
3316 {
74fbae92 3317 *slot = new iv_common_cand ();
cf5b92ef
BC
3318 (*slot)->base = base;
3319 (*slot)->step = step;
3320 (*slot)->uses.create (8);
3321 (*slot)->hash = ent.hash;
3322 data->iv_common_cands.safe_push ((*slot));
3323 }
309a0cf6
BC
3324
3325 gcc_assert (use != NULL);
cf5b92ef
BC
3326 (*slot)->uses.safe_push (use);
3327 return;
3328}
3329
3330/* Comparison function used to sort common candidates. */
3331
3332static int
3333common_cand_cmp (const void *p1, const void *p2)
3334{
3335 unsigned n1, n2;
3336 const struct iv_common_cand *const *const ccand1
3337 = (const struct iv_common_cand *const *)p1;
3338 const struct iv_common_cand *const *const ccand2
3339 = (const struct iv_common_cand *const *)p2;
3340
3341 n1 = (*ccand1)->uses.length ();
3342 n2 = (*ccand2)->uses.length ();
3343 return n2 - n1;
3344}
3345
3346/* Adds IV candidates based on common candidated recorded. */
3347
3348static void
3349add_iv_candidate_derived_from_uses (struct ivopts_data *data)
3350{
3351 unsigned i, j;
3352 struct iv_cand *cand_1, *cand_2;
3353
3354 data->iv_common_cands.qsort (common_cand_cmp);
3355 for (i = 0; i < data->iv_common_cands.length (); i++)
3356 {
3357 struct iv_common_cand *ptr = data->iv_common_cands[i];
3358
3359 /* Only add IV candidate if it's derived from multiple uses. */
3360 if (ptr->uses.length () <= 1)
3361 break;
3362
3363 cand_1 = NULL;
3364 cand_2 = NULL;
3365 if (ip_normal_pos (data->current_loop))
3366 cand_1 = add_candidate_1 (data, ptr->base, ptr->step,
3367 false, IP_NORMAL, NULL, NULL);
3368
3369 if (ip_end_pos (data->current_loop)
3370 && allow_ip_end_pos_p (data->current_loop))
3371 cand_2 = add_candidate_1 (data, ptr->base, ptr->step,
3372 false, IP_END, NULL, NULL);
3373
3374 /* Bind deriving uses and the new candidates. */
3375 for (j = 0; j < ptr->uses.length (); j++)
3376 {
309a0cf6 3377 struct iv_group *group = data->vgroups[ptr->uses[j]->group_id];
cf5b92ef 3378 if (cand_1)
309a0cf6 3379 bitmap_set_bit (group->related_cands, cand_1->id);
cf5b92ef 3380 if (cand_2)
309a0cf6 3381 bitmap_set_bit (group->related_cands, cand_2->id);
cf5b92ef
BC
3382 }
3383 }
3384
3385 /* Release data since it is useless from this point. */
3386 data->iv_common_cand_tab->empty ();
3387 data->iv_common_cands.truncate (0);
3388}
3389
4c3b378b 3390/* Adds candidates based on the value of USE's iv. */
8b11a64c
ZD
3391
3392static void
4c3b378b 3393add_iv_candidate_for_use (struct ivopts_data *data, struct iv_use *use)
8b11a64c 3394{
f5e2738c 3395 unsigned HOST_WIDE_INT offset;
9be872b7 3396 tree base;
1ffe34d9 3397 tree basetype;
4c3b378b 3398 struct iv *iv = use->iv;
8b11a64c 3399
9be872b7 3400 add_candidate (data, iv->base, iv->step, false, use);
be35cf60 3401
cf5b92ef
BC
3402 /* Record common candidate for use in case it can be shared by others. */
3403 record_common_cand (data, iv->base, iv->step, use);
3404
3405 /* Record common candidate with initial value zero. */
1ffe34d9
AP
3406 basetype = TREE_TYPE (iv->base);
3407 if (POINTER_TYPE_P (basetype))
3408 basetype = sizetype;
cf5b92ef
BC
3409 record_common_cand (data, build_int_cst (basetype, 0), iv->step, use);
3410
cfe8aebe
BC
3411 /* Record common candidate with constant offset stripped in base.
3412 Like the use itself, we also add candidate directly for it. */
3413 base = strip_offset (iv->base, &offset);
3414 if (offset || base != iv->base)
cf5b92ef 3415 {
cfe8aebe
BC
3416 record_common_cand (data, base, iv->step, use);
3417 add_candidate (data, base, iv->step, false, use);
cf5b92ef
BC
3418 }
3419
3420 /* Record common candidate with base_object removed in base. */
46bf3f84
BC
3421 base = iv->base;
3422 STRIP_NOPS (base);
3423 if (iv->base_object != NULL && TREE_CODE (base) == POINTER_PLUS_EXPR)
cf5b92ef 3424 {
46bf3f84 3425 tree step = iv->step;
8b11a64c 3426
cf5b92ef 3427 STRIP_NOPS (step);
46bf3f84
BC
3428 base = TREE_OPERAND (base, 1);
3429 step = fold_convert (sizetype, step);
3430 record_common_cand (data, base, step, use);
3431 /* Also record common candidate with offset stripped. */
3432 base = strip_offset (base, &offset);
3433 if (offset)
3434 record_common_cand (data, base, step, use);
cf5b92ef 3435 }
4c3b378b
BC
3436
3437 /* At last, add auto-incremental candidates. Make such variables
3438 important since other iv uses with same base object may be based
3439 on it. */
3440 if (use != NULL && use->type == USE_ADDRESS)
3441 add_autoinc_candidates (data, iv->base, iv->step, true, use);
8b11a64c
ZD
3442}
3443
8b11a64c
ZD
3444/* Adds candidates based on the uses. */
3445
3446static void
309a0cf6 3447add_iv_candidate_for_groups (struct ivopts_data *data)
8b11a64c
ZD
3448{
3449 unsigned i;
3450
309a0cf6
BC
3451 /* Only add candidate for the first use in group. */
3452 for (i = 0; i < data->vgroups.length (); i++)
8b11a64c 3453 {
309a0cf6 3454 struct iv_group *group = data->vgroups[i];
8b11a64c 3455
309a0cf6
BC
3456 gcc_assert (group->vuses[0] != NULL);
3457 add_iv_candidate_for_use (data, group->vuses[0]);
8b11a64c 3458 }
cf5b92ef 3459 add_iv_candidate_derived_from_uses (data);
8b11a64c
ZD
3460}
3461
cf5b92ef 3462/* Record important candidates and add them to related_cands bitmaps. */
b1b02be2
ZD
3463
3464static void
3465record_important_candidates (struct ivopts_data *data)
3466{
3467 unsigned i;
309a0cf6 3468 struct iv_group *group;
b1b02be2 3469
309a0cf6 3470 for (i = 0; i < data->vcands.length (); i++)
b1b02be2 3471 {
309a0cf6 3472 struct iv_cand *cand = data->vcands[i];
b1b02be2
ZD
3473
3474 if (cand->important)
3475 bitmap_set_bit (data->important_candidates, i);
3476 }
3477
309a0cf6 3478 data->consider_all_candidates = (data->vcands.length ()
b1b02be2
ZD
3479 <= CONSIDER_ALL_CANDIDATES_BOUND);
3480
309a0cf6
BC
3481 /* Add important candidates to groups' related_cands bitmaps. */
3482 for (i = 0; i < data->vgroups.length (); i++)
b1b02be2 3483 {
309a0cf6
BC
3484 group = data->vgroups[i];
3485 bitmap_ior_into (group->related_cands, data->important_candidates);
b1b02be2
ZD
3486 }
3487}
3488
8b11a64c
ZD
3489/* Allocates the data structure mapping the (use, candidate) pairs to costs.
3490 If consider_all_candidates is true, we use a two-dimensional array, otherwise
3491 we allocate a simple list to every use. */
3492
3493static void
3494alloc_use_cost_map (struct ivopts_data *data)
3495{
79836a12 3496 unsigned i, size, s;
8b11a64c 3497
309a0cf6 3498 for (i = 0; i < data->vgroups.length (); i++)
8b11a64c 3499 {
309a0cf6 3500 struct iv_group *group = data->vgroups[i];
8b11a64c
ZD
3501
3502 if (data->consider_all_candidates)
309a0cf6 3503 size = data->vcands.length ();
8b11a64c
ZD
3504 else
3505 {
309a0cf6 3506 s = bitmap_count_bits (group->related_cands);
b1b02be2
ZD
3507
3508 /* Round up to the power of two, so that moduling by it is fast. */
79836a12 3509 size = s ? (1 << ceil_log2 (s)) : 1;
8b11a64c
ZD
3510 }
3511
309a0cf6
BC
3512 group->n_map_members = size;
3513 group->cost_map = XCNEWVEC (struct cost_pair, size);
8b11a64c
ZD
3514 }
3515}
3516
309a0cf6 3517/* Sets cost of (GROUP, CAND) pair to COST and record that it depends
0ca91c77
BC
3518 on invariants INV_VARS and that the value used in expressing it is
3519 VALUE, and in case of iv elimination the comparison operator is COMP. */
8b11a64c
ZD
3520
3521static void
309a0cf6
BC
3522set_group_iv_cost (struct ivopts_data *data,
3523 struct iv_group *group, struct iv_cand *cand,
0ca91c77
BC
3524 comp_cost cost, bitmap inv_vars, tree value,
3525 enum tree_code comp, bitmap inv_exprs)
8b11a64c 3526{
b1b02be2
ZD
3527 unsigned i, s;
3528
8d18b6df 3529 if (cost.infinite_cost_p ())
8b11a64c 3530 {
0ca91c77
BC
3531 BITMAP_FREE (inv_vars);
3532 BITMAP_FREE (inv_exprs);
b1b02be2 3533 return;
8b11a64c
ZD
3534 }
3535
3536 if (data->consider_all_candidates)
3537 {
309a0cf6
BC
3538 group->cost_map[cand->id].cand = cand;
3539 group->cost_map[cand->id].cost = cost;
0ca91c77
BC
3540 group->cost_map[cand->id].inv_vars = inv_vars;
3541 group->cost_map[cand->id].inv_exprs = inv_exprs;
309a0cf6
BC
3542 group->cost_map[cand->id].value = value;
3543 group->cost_map[cand->id].comp = comp;
8b11a64c
ZD
3544 return;
3545 }
3546
b1b02be2 3547 /* n_map_members is a power of two, so this computes modulo. */
309a0cf6
BC
3548 s = cand->id & (group->n_map_members - 1);
3549 for (i = s; i < group->n_map_members; i++)
3550 if (!group->cost_map[i].cand)
b1b02be2
ZD
3551 goto found;
3552 for (i = 0; i < s; i++)
309a0cf6 3553 if (!group->cost_map[i].cand)
b1b02be2
ZD
3554 goto found;
3555
3556 gcc_unreachable ();
8b11a64c 3557
b1b02be2 3558found:
309a0cf6
BC
3559 group->cost_map[i].cand = cand;
3560 group->cost_map[i].cost = cost;
0ca91c77
BC
3561 group->cost_map[i].inv_vars = inv_vars;
3562 group->cost_map[i].inv_exprs = inv_exprs;
309a0cf6
BC
3563 group->cost_map[i].value = value;
3564 group->cost_map[i].comp = comp;
8b11a64c
ZD
3565}
3566
309a0cf6 3567/* Gets cost of (GROUP, CAND) pair. */
8b11a64c 3568
b1b02be2 3569static struct cost_pair *
309a0cf6
BC
3570get_group_iv_cost (struct ivopts_data *data, struct iv_group *group,
3571 struct iv_cand *cand)
8b11a64c 3572{
b1b02be2
ZD
3573 unsigned i, s;
3574 struct cost_pair *ret;
8b11a64c
ZD
3575
3576 if (!cand)
b1b02be2 3577 return NULL;
8b11a64c
ZD
3578
3579 if (data->consider_all_candidates)
8b11a64c 3580 {
309a0cf6 3581 ret = group->cost_map + cand->id;
b1b02be2
ZD
3582 if (!ret->cand)
3583 return NULL;
8b11a64c 3584
b1b02be2 3585 return ret;
8b11a64c 3586 }
b8698a0f 3587
b1b02be2 3588 /* n_map_members is a power of two, so this computes modulo. */
309a0cf6
BC
3589 s = cand->id & (group->n_map_members - 1);
3590 for (i = s; i < group->n_map_members; i++)
3591 if (group->cost_map[i].cand == cand)
3592 return group->cost_map + i;
3593 else if (group->cost_map[i].cand == NULL)
79836a12 3594 return NULL;
b1b02be2 3595 for (i = 0; i < s; i++)
309a0cf6
BC
3596 if (group->cost_map[i].cand == cand)
3597 return group->cost_map + i;
3598 else if (group->cost_map[i].cand == NULL)
79836a12 3599 return NULL;
b1b02be2
ZD
3600
3601 return NULL;
8b11a64c
ZD
3602}
3603
8679c649
JH
3604/* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
3605static rtx
3606produce_memory_decl_rtl (tree obj, int *regno)
3607{
09e881c9 3608 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
ef4bddc2 3609 machine_mode address_mode = targetm.addr_space.address_mode (as);
8679c649 3610 rtx x;
b8698a0f 3611
e16acfcd 3612 gcc_assert (obj);
8679c649
JH
3613 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
3614 {
3615 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
d4ebfa65 3616 x = gen_rtx_SYMBOL_REF (address_mode, name);
8318b0d9
RH
3617 SET_SYMBOL_REF_DECL (x, obj);
3618 x = gen_rtx_MEM (DECL_MODE (obj), x);
09e881c9 3619 set_mem_addr_space (x, as);
8318b0d9 3620 targetm.encode_section_info (obj, x, true);
8679c649
JH
3621 }
3622 else
8318b0d9 3623 {
d4ebfa65 3624 x = gen_raw_REG (address_mode, (*regno)++);
8318b0d9 3625 x = gen_rtx_MEM (DECL_MODE (obj), x);
09e881c9 3626 set_mem_addr_space (x, as);
8318b0d9 3627 }
8679c649 3628
8318b0d9 3629 return x;
8679c649
JH
3630}
3631
8b11a64c
ZD
3632/* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
3633 walk_tree. DATA contains the actual fake register number. */
3634
3635static tree
3636prepare_decl_rtl (tree *expr_p, int *ws, void *data)
3637{
3638 tree obj = NULL_TREE;
3639 rtx x = NULL_RTX;
c22940cd 3640 int *regno = (int *) data;
8b11a64c
ZD
3641
3642 switch (TREE_CODE (*expr_p))
3643 {
8679c649
JH
3644 case ADDR_EXPR:
3645 for (expr_p = &TREE_OPERAND (*expr_p, 0);
afe84921
RH
3646 handled_component_p (*expr_p);
3647 expr_p = &TREE_OPERAND (*expr_p, 0))
3648 continue;
8679c649 3649 obj = *expr_p;
c401fb6f 3650 if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
623b8e0a 3651 x = produce_memory_decl_rtl (obj, regno);
8679c649
JH
3652 break;
3653
8b11a64c
ZD
3654 case SSA_NAME:
3655 *ws = 0;
3656 obj = SSA_NAME_VAR (*expr_p);
70b5e7dc
RG
3657 /* Defer handling of anonymous SSA_NAMEs to the expander. */
3658 if (!obj)
3659 return NULL_TREE;
8b11a64c
ZD
3660 if (!DECL_RTL_SET_P (obj))
3661 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3662 break;
3663
3664 case VAR_DECL:
3665 case PARM_DECL:
3666 case RESULT_DECL:
3667 *ws = 0;
3668 obj = *expr_p;
3669
3670 if (DECL_RTL_SET_P (obj))
3671 break;
3672
3673 if (DECL_MODE (obj) == BLKmode)
8679c649 3674 x = produce_memory_decl_rtl (obj, regno);
8b11a64c
ZD
3675 else
3676 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3677
3678 break;
3679
3680 default:
3681 break;
3682 }
3683
3684 if (x)
3685 {
9771b263 3686 decl_rtl_to_reset.safe_push (obj);
8b11a64c
ZD
3687 SET_DECL_RTL (obj, x);
3688 }
3689
3690 return NULL_TREE;
3691}
3692
3693/* Determines cost of the computation of EXPR. */
3694
3695static unsigned
f40751dd 3696computation_cost (tree expr, bool speed)
8b11a64c 3697{
b32d5189
DM
3698 rtx_insn *seq;
3699 rtx rslt;
8b11a64c
ZD
3700 tree type = TREE_TYPE (expr);
3701 unsigned cost;
1d27fed4
DD
3702 /* Avoid using hard regs in ways which may be unsupported. */
3703 int regno = LAST_VIRTUAL_REGISTER + 1;
d52f5295 3704 struct cgraph_node *node = cgraph_node::get (current_function_decl);
5fefcf92 3705 enum node_frequency real_frequency = node->frequency;
8b11a64c 3706
5fefcf92 3707 node->frequency = NODE_FREQUENCY_NORMAL;
f40751dd 3708 crtl->maybe_hot_insn_p = speed;
8b11a64c
ZD
3709 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
3710 start_sequence ();
3711 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
3712 seq = get_insns ();
3713 end_sequence ();
f40751dd 3714 default_rtl_profile ();
5fefcf92 3715 node->frequency = real_frequency;
8b11a64c 3716
f40751dd 3717 cost = seq_cost (seq, speed);
2ca202e7 3718 if (MEM_P (rslt))
09e881c9
BE
3719 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
3720 TYPE_ADDR_SPACE (type), speed);
b5ee6752 3721 else if (!REG_P (rslt))
e548c9df 3722 cost += set_src_cost (rslt, TYPE_MODE (type), speed);
8b11a64c
ZD
3723
3724 return cost;
3725}
3726
3727/* Returns variable containing the value of candidate CAND at statement AT. */
3728
3729static tree
355fe088 3730var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple *stmt)
8b11a64c
ZD
3731{
3732 if (stmt_after_increment (loop, cand, stmt))
3733 return cand->var_after;
3734 else
3735 return cand->var_before;
3736}
3737
b67102ae
ZD
3738/* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
3739 same precision that is at least as wide as the precision of TYPE, stores
3740 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
3741 type of A and B. */
3742
3743static tree
3744determine_common_wider_type (tree *a, tree *b)
3745{
3746 tree wider_type = NULL;
3747 tree suba, subb;
3748 tree atype = TREE_TYPE (*a);
3749
1043771b 3750 if (CONVERT_EXPR_P (*a))
b67102ae
ZD
3751 {
3752 suba = TREE_OPERAND (*a, 0);
3753 wider_type = TREE_TYPE (suba);
3754 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
3755 return atype;
3756 }
3757 else
3758 return atype;
3759
1043771b 3760 if (CONVERT_EXPR_P (*b))
b67102ae
ZD
3761 {
3762 subb = TREE_OPERAND (*b, 0);
3763 if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
3764 return atype;
3765 }
3766 else
3767 return atype;
3768
3769 *a = suba;
3770 *b = subb;
3771 return wider_type;
3772}
3773
8b11a64c 3774/* Determines the expression by that USE is expressed from induction variable
db61fc7a
BC
3775 CAND at statement AT in LOOP. The expression is stored in two parts in a
3776 decomposed form. The invariant part is stored in AFF_INV; while variant
3777 part in AFF_VAR. Store ratio of CAND.step over USE.step in PRAT if it's
3778 non-null. Returns false if USE cannot be expressed using CAND. */
8b11a64c 3779
ac182688 3780static bool
db61fc7a
BC
3781get_computation_aff_1 (struct loop *loop, gimple *at, struct iv_use *use,
3782 struct iv_cand *cand, struct aff_tree *aff_inv,
3783 struct aff_tree *aff_var, widest_int *prat = NULL)
3784{
3785 tree ubase = use->iv->base, ustep = use->iv->step;
3786 tree cbase = cand->iv->base, cstep = cand->iv->step;
3787 tree common_type, uutype, var, cstep_common;
8b11a64c 3788 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
db61fc7a 3789 aff_tree aff_cbase;
807e902e 3790 widest_int rat;
8b11a64c 3791
db61fc7a 3792 /* We must have a precision to express the values of use. */
8b11a64c 3793 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
db61fc7a 3794 return false;
8b11a64c 3795
73f30c63
ZD
3796 var = var_at_stmt (loop, cand, at);
3797 uutype = unsigned_type_for (utype);
8b11a64c 3798
73f30c63
ZD
3799 /* If the conversion is not noop, perform it. */
3800 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
8b11a64c 3801 {
e4142529
BC
3802 if (cand->orig_iv != NULL && CONVERT_EXPR_P (cbase)
3803 && (CONVERT_EXPR_P (cstep) || TREE_CODE (cstep) == INTEGER_CST))
3804 {
3805 tree inner_base, inner_step, inner_type;
3806 inner_base = TREE_OPERAND (cbase, 0);
3807 if (CONVERT_EXPR_P (cstep))
3808 inner_step = TREE_OPERAND (cstep, 0);
3809 else
3810 inner_step = cstep;
3811
3812 inner_type = TREE_TYPE (inner_base);
3813 /* If candidate is added from a biv whose type is smaller than
3814 ctype, we know both candidate and the biv won't overflow.
3815 In this case, it's safe to skip the convertion in candidate.
3816 As an example, (unsigned short)((unsigned long)A) equals to
3817 (unsigned short)A, if A has a type no larger than short. */
3818 if (TYPE_PRECISION (inner_type) <= TYPE_PRECISION (uutype))
3819 {
3820 cbase = inner_base;
3821 cstep = inner_step;
3822 }
3823 }
73f30c63 3824 cbase = fold_convert (uutype, cbase);
db61fc7a 3825 cstep = fold_convert (uutype, cstep);
73f30c63 3826 var = fold_convert (uutype, var);
9be872b7 3827 }
9be872b7 3828
13b562c5
BC
3829 /* Ratio is 1 when computing the value of biv cand by itself.
3830 We can't rely on constant_multiple_of in this case because the
3831 use is created after the original biv is selected. The call
3832 could fail because of inconsistent fold behavior. See PR68021
3833 for more information. */
3834 if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt)
3835 {
3836 gcc_assert (is_gimple_assign (use->stmt));
3837 gcc_assert (use->iv->ssa_name == cand->var_after);
3838 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
3839 rat = 1;
3840 }
3841 else if (!constant_multiple_of (ustep, cstep, &rat))
73f30c63 3842 return false;
8b11a64c 3843
db61fc7a
BC
3844 if (prat)
3845 *prat = rat;
3846
b67102ae
ZD
3847 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3848 type, we achieve better folding by computing their difference in this
3849 wider type, and cast the result to UUTYPE. We do not need to worry about
3850 overflows, as all the arithmetics will in the end be performed in UUTYPE
3851 anyway. */
3852 common_type = determine_common_wider_type (&ubase, &cbase);
3853
73f30c63 3854 /* use = ubase - ratio * cbase + ratio * var. */
db61fc7a
BC
3855 tree_to_aff_combination (ubase, common_type, aff_inv);
3856 tree_to_aff_combination (cbase, common_type, &aff_cbase);
3857 tree_to_aff_combination (var, uutype, aff_var);
8b11a64c 3858
73f30c63
ZD
3859 /* We need to shift the value if we are after the increment. */
3860 if (stmt_after_increment (loop, cand, at))
8b11a64c 3861 {
73f30c63 3862 aff_tree cstep_aff;
b8698a0f 3863
73f30c63
ZD
3864 if (common_type != uutype)
3865 cstep_common = fold_convert (common_type, cstep);
9be872b7 3866 else
73f30c63 3867 cstep_common = cstep;
ac182688 3868
73f30c63 3869 tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
db61fc7a 3870 aff_combination_add (&aff_cbase, &cstep_aff);
8b11a64c 3871 }
8b11a64c 3872
db61fc7a
BC
3873 aff_combination_scale (&aff_cbase, -rat);
3874 aff_combination_add (aff_inv, &aff_cbase);
b67102ae 3875 if (common_type != uutype)
db61fc7a 3876 aff_combination_convert (aff_inv, uutype);
73f30c63 3877
db61fc7a
BC
3878 aff_combination_scale (aff_var, rat);
3879 return true;
3880}
3881
3882/* Determines the expression by that USE is expressed from induction variable
3883 CAND at statement AT in LOOP. The expression is stored in a decomposed
3884 form into AFF. Returns false if USE cannot be expressed using CAND. */
3885
3886static bool
3887get_computation_aff (struct loop *loop, gimple *at, struct iv_use *use,
3888 struct iv_cand *cand, struct aff_tree *aff)
3889{
3890 aff_tree aff_var;
3891
3892 if (!get_computation_aff_1 (loop, at, use, cand, aff, &aff_var))
3893 return false;
ac182688 3894
db61fc7a 3895 aff_combination_add (aff, &aff_var);
ac182688
ZD
3896 return true;
3897}
3898
0c604a61
TV
3899/* Return the type of USE. */
3900
3901static tree
3902get_use_type (struct iv_use *use)
3903{
3904 tree base_type = TREE_TYPE (use->iv->base);
3905 tree type;
3906
3907 if (use->type == USE_ADDRESS)
3908 {
3909 /* The base_type may be a void pointer. Create a pointer type based on
3910 the mem_ref instead. */
3911 type = build_pointer_type (TREE_TYPE (*use->op_p));
3912 gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
3913 == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
3914 }
3915 else
3916 type = base_type;
3917
3918 return type;
3919}
3920
ac182688
ZD
3921/* Determines the expression by that USE is expressed from induction variable
3922 CAND at statement AT in LOOP. The computation is unshared. */
3923
3924static tree
c7da0e81
BC
3925get_computation_at (struct loop *loop, gimple *at,
3926 struct iv_use *use, struct iv_cand *cand)
ac182688 3927{
73f30c63 3928 aff_tree aff;
0c604a61 3929 tree type = get_use_type (use);
ac182688 3930
db61fc7a 3931 if (!get_computation_aff (loop, at, use, cand, &aff))
ac182688
ZD
3932 return NULL_TREE;
3933 unshare_aff_combination (&aff);
3934 return fold_convert (type, aff_combination_to_tree (&aff));
8b11a64c
ZD
3935}
3936
6521ac85
SL
3937/* Adjust the cost COST for being in loop setup rather than loop body.
3938 If we're optimizing for space, the loop setup overhead is constant;
f9f69dd6
BC
3939 if we're optimizing for speed, amortize it over the per-iteration cost.
3940 If ROUND_UP_P is true, the result is round up rather than to zero when
3941 optimizing for speed. */
6521ac85 3942static unsigned
f9f69dd6
BC
3943adjust_setup_cost (struct ivopts_data *data, unsigned cost,
3944 bool round_up_p = false)
6521ac85
SL
3945{
3946 if (cost == INFTY)
3947 return cost;
3948 else if (optimize_loop_for_speed_p (data->current_loop))
f9f69dd6
BC
3949 {
3950 HOST_WIDE_INT niters = avg_loop_niter (data->current_loop);
3951 return ((HOST_WIDE_INT) cost + (round_up_p ? niters - 1 : 0)) / niters;
3952 }
6521ac85
SL
3953 else
3954 return cost;
3955}
3956
ac5344e0
BC
3957/* Calculate the SPEED or size cost of shiftadd EXPR in MODE. MULT is the
3958 EXPR operand holding the shift. COST0 and COST1 are the costs for
3959 calculating the operands of EXPR. Returns true if successful, and returns
3960 the cost in COST. */
e6450c11
TV
3961
3962static bool
095a2d76 3963get_shiftadd_cost (tree expr, scalar_int_mode mode, comp_cost cost0,
623b8e0a 3964 comp_cost cost1, tree mult, bool speed, comp_cost *cost)
e6450c11
TV
3965{
3966 comp_cost res;
3967 tree op1 = TREE_OPERAND (expr, 1);
3968 tree cst = TREE_OPERAND (mult, 1);
3969 tree multop = TREE_OPERAND (mult, 0);
3970 int m = exact_log2 (int_cst_value (cst));
3971 int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
9fb87eb0
EB
3972 int as_cost, sa_cost;
3973 bool mult_in_op1;
e6450c11
TV
3974
3975 if (!(m >= 0 && m < maxm))
3976 return false;
3977
f468efc5 3978 STRIP_NOPS (op1);
9fb87eb0 3979 mult_in_op1 = operand_equal_p (op1, mult, 0);
1a1a5534 3980
9fb87eb0
EB
3981 as_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
3982
3983 /* If the target has a cheap shift-and-add or shift-and-sub instruction,
3984 use that in preference to a shift insn followed by an add insn. */
e6450c11 3985 sa_cost = (TREE_CODE (expr) != MINUS_EXPR
623b8e0a
ML
3986 ? shiftadd_cost (speed, mode, m)
3987 : (mult_in_op1
3988 ? shiftsub1_cost (speed, mode, m)
3989 : shiftsub0_cost (speed, mode, m)));
9fb87eb0 3990
8d18b6df
ML
3991 res = comp_cost (MIN (as_cost, sa_cost), 0);
3992 res += (mult_in_op1 ? cost0 : cost1);
e6450c11
TV
3993
3994 STRIP_NOPS (multop);
3995 if (!is_gimple_val (multop))
8d18b6df 3996 res += force_expr_to_var_cost (multop, speed);
e6450c11
TV
3997
3998 *cost = res;
3999 return true;
4000}
4001
3ac01fde
ZD
4002/* Estimates cost of forcing expression EXPR into a variable. */
4003
6e8c65f6 4004static comp_cost
f40751dd 4005force_expr_to_var_cost (tree expr, bool speed)
8b11a64c
ZD
4006{
4007 static bool costs_initialized = false;
f40751dd
JH
4008 static unsigned integer_cost [2];
4009 static unsigned symbol_cost [2];
4010 static unsigned address_cost [2];
7299dbfb 4011 tree op0, op1;
6e8c65f6 4012 comp_cost cost0, cost1, cost;
ef4bddc2 4013 machine_mode mode;
54651377 4014 scalar_int_mode int_mode;
8b11a64c
ZD
4015
4016 if (!costs_initialized)
4017 {
8b11a64c 4018 tree type = build_pointer_type (integer_type_node);
8318b0d9
RH
4019 tree var, addr;
4020 rtx x;
f40751dd 4021 int i;
8318b0d9
RH
4022
4023 var = create_tmp_var_raw (integer_type_node, "test_var");
4024 TREE_STATIC (var) = 1;
4025 x = produce_memory_decl_rtl (var, NULL);
4026 SET_DECL_RTL (var, x);
8b11a64c 4027
8b11a64c 4028 addr = build1 (ADDR_EXPR, type, var);
8b11a64c 4029
f40751dd
JH
4030
4031 for (i = 0; i < 2; i++)
8b11a64c 4032 {
f40751dd
JH
4033 integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
4034 2000), i);
4035
4036 symbol_cost[i] = computation_cost (addr, i) + 1;
4037
4038 address_cost[i]
5d49b6a7 4039 = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
f40751dd
JH
4040 if (dump_file && (dump_flags & TDF_DETAILS))
4041 {
4042 fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
4043 fprintf (dump_file, " integer %d\n", (int) integer_cost[i]);
4044 fprintf (dump_file, " symbol %d\n", (int) symbol_cost[i]);
4045 fprintf (dump_file, " address %d\n", (int) address_cost[i]);
4046 fprintf (dump_file, " other %d\n", (int) target_spill_cost[i]);
4047 fprintf (dump_file, "\n");
4048 }
8b11a64c
ZD
4049 }
4050
4051 costs_initialized = true;
4052 }
4053
f5e2738c
ZD
4054 STRIP_NOPS (expr);
4055
8b11a64c 4056 if (SSA_VAR_P (expr))
7735d6c7 4057 return no_cost;
8b11a64c 4058
ad6003f2 4059 if (is_gimple_min_invariant (expr))
8b11a64c
ZD
4060 {
4061 if (TREE_CODE (expr) == INTEGER_CST)
8d18b6df 4062 return comp_cost (integer_cost [speed], 0);
8b11a64c
ZD
4063
4064 if (TREE_CODE (expr) == ADDR_EXPR)
4065 {
4066 tree obj = TREE_OPERAND (expr, 0);
4067
8813a647 4068 if (VAR_P (obj)
8b11a64c
ZD
4069 || TREE_CODE (obj) == PARM_DECL
4070 || TREE_CODE (obj) == RESULT_DECL)
8d18b6df 4071 return comp_cost (symbol_cost [speed], 0);
8b11a64c
ZD
4072 }
4073
8d18b6df 4074 return comp_cost (address_cost [speed], 0);
8b11a64c
ZD
4075 }
4076
7299dbfb
ZD
4077 switch (TREE_CODE (expr))
4078 {
5be014d5 4079 case POINTER_PLUS_EXPR:
7299dbfb
ZD
4080 case PLUS_EXPR:
4081 case MINUS_EXPR:
4082 case MULT_EXPR:
c544114f
BC
4083 case TRUNC_DIV_EXPR:
4084 case BIT_AND_EXPR:
4085 case BIT_IOR_EXPR:
4086 case LSHIFT_EXPR:
4087 case RSHIFT_EXPR:
7299dbfb
ZD
4088 op0 = TREE_OPERAND (expr, 0);
4089 op1 = TREE_OPERAND (expr, 1);
f5e2738c
ZD
4090 STRIP_NOPS (op0);
4091 STRIP_NOPS (op1);
7299dbfb
ZD
4092 break;
4093
801a661c 4094 CASE_CONVERT:
7a2faca1 4095 case NEGATE_EXPR:
c544114f 4096 case BIT_NOT_EXPR:
7a2faca1
EB
4097 op0 = TREE_OPERAND (expr, 0);
4098 STRIP_NOPS (op0);
4099 op1 = NULL_TREE;
7a2faca1
EB
4100 break;
4101
7299dbfb
ZD
4102 default:
4103 /* Just an arbitrary value, FIXME. */
8d18b6df 4104 return comp_cost (target_spill_cost[speed], 0);
7299dbfb
ZD
4105 }
4106
801a661c
BC
4107 if (op0 == NULL_TREE
4108 || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
4109 cost0 = no_cost;
4110 else
4111 cost0 = force_expr_to_var_cost (op0, speed);
4112
4113 if (op1 == NULL_TREE
4114 || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
4115 cost1 = no_cost;
4116 else
4117 cost1 = force_expr_to_var_cost (op1, speed);
4118
7299dbfb
ZD
4119 mode = TYPE_MODE (TREE_TYPE (expr));
4120 switch (TREE_CODE (expr))
4121 {
5be014d5 4122 case POINTER_PLUS_EXPR:
7299dbfb
ZD
4123 case PLUS_EXPR:
4124 case MINUS_EXPR:
7a2faca1 4125 case NEGATE_EXPR:
8d18b6df 4126 cost = comp_cost (add_cost (speed, mode), 0);
e6450c11 4127 if (TREE_CODE (expr) != NEGATE_EXPR)
623b8e0a
ML
4128 {
4129 tree mult = NULL_TREE;
4130 comp_cost sa_cost;
4131 if (TREE_CODE (op1) == MULT_EXPR)
4132 mult = op1;
4133 else if (TREE_CODE (op0) == MULT_EXPR)
4134 mult = op0;
4135
4136 if (mult != NULL_TREE
54651377 4137 && is_a <scalar_int_mode> (mode, &int_mode)
623b8e0a 4138 && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
54651377 4139 && get_shiftadd_cost (expr, int_mode, cost0, cost1, mult,
623b8e0a
ML
4140 speed, &sa_cost))
4141 return sa_cost;
4142 }
7299dbfb
ZD
4143 break;
4144
801a661c
BC
4145 CASE_CONVERT:
4146 {
4147 tree inner_mode, outer_mode;
4148 outer_mode = TREE_TYPE (expr);
4149 inner_mode = TREE_TYPE (op0);
8d18b6df 4150 cost = comp_cost (convert_cost (TYPE_MODE (outer_mode),
801a661c
BC
4151 TYPE_MODE (inner_mode), speed), 0);
4152 }
4153 break;
4154
7299dbfb
ZD
4155 case MULT_EXPR:
4156 if (cst_and_fits_in_hwi (op0))
8d18b6df 4157 cost = comp_cost (mult_by_coeff_cost (int_cst_value (op0),
6dd8f4bb 4158 mode, speed), 0);
b8698a0f 4159 else if (cst_and_fits_in_hwi (op1))
8d18b6df 4160 cost = comp_cost (mult_by_coeff_cost (int_cst_value (op1),
6dd8f4bb 4161 mode, speed), 0);
7299dbfb 4162 else
8d18b6df 4163 return comp_cost (target_spill_cost [speed], 0);
7299dbfb
ZD
4164 break;
4165
c544114f
BC
4166 case TRUNC_DIV_EXPR:
4167 /* Division by power of two is usually cheap, so we allow it. Forbid
4168 anything else. */
4169 if (integer_pow2p (TREE_OPERAND (expr, 1)))
4170 cost = comp_cost (add_cost (speed, mode), 0);
4171 else
4172 cost = comp_cost (target_spill_cost[speed], 0);
4173 break;
4174
4175 case BIT_AND_EXPR:
4176 case BIT_IOR_EXPR:
4177 case BIT_NOT_EXPR:
4178 case LSHIFT_EXPR:
4179 case RSHIFT_EXPR:
4180 cost = comp_cost (add_cost (speed, mode), 0);
4181 break;
4182
7299dbfb
ZD
4183 default:
4184 gcc_unreachable ();
4185 }
4186
8d18b6df
ML
4187 cost += cost0;
4188 cost += cost1;
6e8c65f6 4189 return cost;
8b11a64c
ZD
4190}
4191
0ca91c77 4192/* Estimates cost of forcing EXPR into a variable. INV_VARS is a set of the
3ac01fde
ZD
4193 invariants the computation depends on. */
4194
6e8c65f6 4195static comp_cost
1c52c69f 4196force_var_cost (struct ivopts_data *data, tree expr, bitmap *inv_vars)
3ac01fde 4197{
1c52c69f
BC
4198 if (!expr)
4199 return no_cost;
3ac01fde 4200
1c52c69f 4201 find_inv_vars (data, &expr, inv_vars);
f40751dd 4202 return force_expr_to_var_cost (expr, data->speed);
3ac01fde
ZD
4203}
4204
f9f69dd6
BC
4205/* Returns cost of auto-modifying address expression in shape base + offset.
4206 AINC_STEP is step size of the address IV. AINC_OFFSET is offset of the
4207 address expression. The address expression has ADDR_MODE in addr space
4208 AS. The memory access has MEM_MODE. SPEED means we are optimizing for
4209 speed or size. */
b8698a0f 4210
f9f69dd6 4211enum ainc_type
8b11a64c 4212{
f9f69dd6
BC
4213 AINC_PRE_INC, /* Pre increment. */
4214 AINC_PRE_DEC, /* Pre decrement. */
4215 AINC_POST_INC, /* Post increment. */
4216 AINC_POST_DEC, /* Post decrement. */
4217 AINC_NONE /* Also the number of auto increment types. */
4218};
8b11a64c 4219
f9f69dd6 4220struct ainc_cost_data
8b11a64c 4221{
f9f69dd6
BC
4222 unsigned costs[AINC_NONE];
4223};
8b11a64c 4224
f9f69dd6
BC
4225static comp_cost
4226get_address_cost_ainc (HOST_WIDE_INT ainc_step, HOST_WIDE_INT ainc_offset,
4227 machine_mode addr_mode, machine_mode mem_mode,
4228 addr_space_t as, bool speed)
4229{
4230 if (!USE_LOAD_PRE_DECREMENT (mem_mode)
4231 && !USE_STORE_PRE_DECREMENT (mem_mode)
4232 && !USE_LOAD_POST_DECREMENT (mem_mode)
4233 && !USE_STORE_POST_DECREMENT (mem_mode)
4234 && !USE_LOAD_PRE_INCREMENT (mem_mode)
4235 && !USE_STORE_PRE_INCREMENT (mem_mode)
4236 && !USE_LOAD_POST_INCREMENT (mem_mode)
4237 && !USE_STORE_POST_INCREMENT (mem_mode))
4238 return infinite_cost;
8b11a64c 4239
f9f69dd6
BC
4240 static vec<ainc_cost_data *> ainc_cost_data_list;
4241 unsigned idx = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
4242 if (idx >= ainc_cost_data_list.length ())
8b11a64c 4243 {
f9f69dd6 4244 unsigned nsize = ((unsigned) as + 1) *MAX_MACHINE_MODE;
7a2faca1 4245
f9f69dd6
BC
4246 gcc_assert (nsize > idx);
4247 ainc_cost_data_list.safe_grow_cleared (nsize);
8b11a64c
ZD
4248 }
4249
f9f69dd6
BC
4250 ainc_cost_data *data = ainc_cost_data_list[idx];
4251 if (data == NULL)
18081149 4252 {
f9f69dd6 4253 rtx reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
18081149 4254
f9f69dd6
BC
4255 data = (ainc_cost_data *) xcalloc (1, sizeof (*data));
4256 data->costs[AINC_PRE_DEC] = INFTY;
4257 data->costs[AINC_POST_DEC] = INFTY;
4258 data->costs[AINC_PRE_INC] = INFTY;
4259 data->costs[AINC_POST_INC] = INFTY;
4260 if (USE_LOAD_PRE_DECREMENT (mem_mode)
4261 || USE_STORE_PRE_DECREMENT (mem_mode))
4262 {
4263 rtx addr = gen_rtx_PRE_DEC (addr_mode, reg);
18081149 4264
f9f69dd6
BC
4265 if (memory_address_addr_space_p (mem_mode, addr, as))
4266 data->costs[AINC_PRE_DEC]
4267 = address_cost (addr, mem_mode, as, speed);
4268 }
4269 if (USE_LOAD_POST_DECREMENT (mem_mode)
4270 || USE_STORE_POST_DECREMENT (mem_mode))
4271 {
4272 rtx addr = gen_rtx_POST_DEC (addr_mode, reg);
bb8d292d 4273
f9f69dd6
BC
4274 if (memory_address_addr_space_p (mem_mode, addr, as))
4275 data->costs[AINC_POST_DEC]
4276 = address_cost (addr, mem_mode, as, speed);
4277 }
4278 if (USE_LOAD_PRE_INCREMENT (mem_mode)
4279 || USE_STORE_PRE_INCREMENT (mem_mode))
4280 {
4281 rtx addr = gen_rtx_PRE_INC (addr_mode, reg);
bb8d292d 4282
f9f69dd6
BC
4283 if (memory_address_addr_space_p (mem_mode, addr, as))
4284 data->costs[AINC_PRE_INC]
4285 = address_cost (addr, mem_mode, as, speed);
4286 }
4287 if (USE_LOAD_POST_INCREMENT (mem_mode)
4288 || USE_STORE_POST_INCREMENT (mem_mode))
4289 {
4290 rtx addr = gen_rtx_POST_INC (addr_mode, reg);
bb8d292d 4291
f9f69dd6
BC
4292 if (memory_address_addr_space_p (mem_mode, addr, as))
4293 data->costs[AINC_POST_INC]
4294 = address_cost (addr, mem_mode, as, speed);
4295 }
4296 ainc_cost_data_list[idx] = data;
623b8e0a
ML
4297 }
4298
f9f69dd6
BC
4299 HOST_WIDE_INT msize = GET_MODE_SIZE (mem_mode);
4300 if (ainc_offset == 0 && msize == ainc_step)
4301 return comp_cost (data->costs[AINC_POST_INC], 0);
4302 if (ainc_offset == 0 && msize == -ainc_step)
4303 return comp_cost (data->costs[AINC_POST_DEC], 0);
4304 if (ainc_offset == msize && msize == ainc_step)
4305 return comp_cost (data->costs[AINC_PRE_INC], 0);
4306 if (ainc_offset == -msize && msize == -ainc_step)
4307 return comp_cost (data->costs[AINC_PRE_DEC], 0);
bb8d292d 4308
f9f69dd6
BC
4309 return infinite_cost;
4310}
18081149 4311
f9f69dd6
BC
4312/* Return cost of computing USE's address expression by using CAND.
4313 AFF_INV and AFF_VAR represent invariant and variant parts of the
4314 address expression, respectively. If AFF_INV is simple, store
4315 the loop invariant variables which are depended by it in INV_VARS;
4316 if AFF_INV is complicated, handle it as a new invariant expression
4317 and record it in INV_EXPR. RATIO indicates multiple times between
4318 steps of USE and CAND. If CAN_AUTOINC is nonNULL, store boolean
4319 value to it indicating if this is an auto-increment address. */
18081149 4320
f9f69dd6
BC
4321static comp_cost
4322get_address_cost (struct ivopts_data *data, struct iv_use *use,
4323 struct iv_cand *cand, aff_tree *aff_inv,
4324 aff_tree *aff_var, HOST_WIDE_INT ratio,
4325 bitmap *inv_vars, iv_inv_expr_ent **inv_expr,
4326 bool *can_autoinc, bool speed)
4327{
4328 rtx addr;
4329 bool simple_inv = true;
4330 tree comp_inv = NULL_TREE, type = aff_var->type;
4331 comp_cost var_cost = no_cost, cost = no_cost;
4332 struct mem_address parts = {NULL_TREE, integer_one_node,
4333 NULL_TREE, NULL_TREE, NULL_TREE};
4334 machine_mode addr_mode = TYPE_MODE (type);
4335 machine_mode mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
4336 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
18081149 4337
f9f69dd6 4338 if (!aff_combination_const_p (aff_inv))
18081149 4339 {
f9f69dd6
BC
4340 parts.index = integer_one_node;
4341 /* Addressing mode "base + index". */
4342 if (valid_mem_ref_p (mem_mode, as, &parts))
4343 {
4344 parts.step = wide_int_to_tree (type, ratio);
4345 /* Addressing mode "base + index << scale". */
4346 if (ratio != 1 && !valid_mem_ref_p (mem_mode, as, &parts))
4347 parts.step = NULL_TREE;
18081149 4348
f9f69dd6
BC
4349 if (aff_inv->offset != 0)
4350 {
4351 parts.offset = wide_int_to_tree (sizetype, aff_inv->offset);
4352 /* Addressing mode "base + index [<< scale] + offset". */
4353 if (!valid_mem_ref_p (mem_mode, as, &parts))
4354 parts.offset = NULL_TREE;
4355 else
4356 aff_inv->offset = 0;
4357 }
18081149 4358
f9f69dd6
BC
4359 move_fixed_address_to_symbol (&parts, aff_inv);
4360 /* Base is fixed address and is moved to symbol part. */
4361 if (parts.symbol != NULL_TREE && aff_combination_zero_p (aff_inv))
4362 parts.base = NULL_TREE;
18081149 4363
f9f69dd6
BC
4364 /* Addressing mode "symbol + base + index [<< scale] [+ offset]". */
4365 if (parts.symbol != NULL_TREE
4366 && !valid_mem_ref_p (mem_mode, as, &parts))
4367 {
4368 aff_combination_add_elt (aff_inv, parts.symbol, 1);
4369 parts.symbol = NULL_TREE;
4370 /* Reset SIMPLE_INV since symbol address needs to be computed
4371 outside of address expression in this case. */
4372 simple_inv = false;
4373 /* Symbol part is moved back to base part, it can't be NULL. */
4374 parts.base = integer_one_node;
4375 }
4376 }
4377 else
4378 parts.index = NULL_TREE;
18081149 4379 }
f9f69dd6 4380 else
18081149 4381 {
f9f69dd6 4382 if (can_autoinc && ratio == 1 && cst_and_fits_in_hwi (cand->iv->step))
623b8e0a 4383 {
f9f69dd6
BC
4384 HOST_WIDE_INT ainc_step = int_cst_value (cand->iv->step);
4385 HOST_WIDE_INT ainc_offset = (aff_inv->offset).to_shwi ();
4386
4387 if (stmt_after_increment (data->current_loop, cand, use->stmt))
4388 ainc_offset += ainc_step;
4389 cost = get_address_cost_ainc (ainc_step, ainc_offset,
4390 addr_mode, mem_mode, as, speed);
4391 if (!cost.infinite_cost_p ())
623b8e0a 4392 {
f9f69dd6
BC
4393 *can_autoinc = true;
4394 return cost;
623b8e0a 4395 }
f9f69dd6
BC
4396 cost = no_cost;
4397 }
4398 if (!aff_combination_zero_p (aff_inv))
4399 {
4400 parts.offset = wide_int_to_tree (sizetype, aff_inv->offset);
4401 /* Addressing mode "base + offset". */
4402 if (!valid_mem_ref_p (mem_mode, as, &parts))
4403 parts.offset = NULL_TREE;
4404 else
4405 aff_inv->offset = 0;
623b8e0a 4406 }
18081149
XDL
4407 }
4408
f9f69dd6
BC
4409 if (simple_inv)
4410 simple_inv = (aff_inv == NULL
4411 || aff_combination_const_p (aff_inv)
4412 || aff_combination_singleton_var_p (aff_inv));
4413 if (!aff_combination_zero_p (aff_inv))
4414 comp_inv = aff_combination_to_tree (aff_inv);
4415 if (comp_inv != NULL_TREE)
4416 cost = force_var_cost (data, comp_inv, inv_vars);
4417 if (ratio != 1 && parts.step == NULL_TREE)
4418 var_cost += mult_by_coeff_cost (ratio, addr_mode, speed);
4419 if (comp_inv != NULL_TREE && parts.index == NULL_TREE)
4420 var_cost += add_cost (speed, addr_mode);
4421
4422 if (comp_inv && inv_expr && !simple_inv)
4423 {
4424 *inv_expr = get_loop_invariant_expr (data, comp_inv);
4425 /* Clear depends on. */
4426 if (*inv_expr != NULL && inv_vars && *inv_vars)
4427 bitmap_clear (*inv_vars);
18081149 4428
f9f69dd6
BC
4429 /* Cost of small invariant expression adjusted against loop niters
4430 is usually zero, which makes it difficult to be differentiated
4431 from candidate based on loop invariant variables. Secondly, the
4432 generated invariant expression may not be hoisted out of loop by
4433 following pass. We penalize the cost by rounding up in order to
4434 neutralize such effects. */
4435 cost.cost = adjust_setup_cost (data, cost.cost, true);
4436 cost.scratch = cost.cost;
4437 }
4438
4439 cost += var_cost;
4440 addr = addr_for_mem_ref (&parts, as, false);
4441 gcc_assert (memory_address_addr_space_p (mem_mode, addr, as));
4442 cost += address_cost (addr, mem_mode, as, speed);
4443
4444 if (parts.symbol != NULL_TREE)
4445 cost.complexity += 1;
4446 if (parts.step != NULL_TREE && !integer_onep (parts.step))
4447 cost.complexity += 1;
4448 if (parts.base != NULL_TREE && parts.index != NULL_TREE)
4449 cost.complexity += 1;
4450 if (parts.offset != NULL_TREE && !integer_zerop (parts.offset))
4451 cost.complexity += 1;
4452
4453 return cost;
18081149
XDL
4454}
4455
83e0b739 4456/* Scale (multiply) the computed COST (except scratch part that should be
13fdeaaf
BC
4457 hoisted out a loop) by header->frequency / AT->frequency, which makes
4458 expected cost more accurate. */
18081149 4459
83e0b739 4460static comp_cost
13fdeaaf 4461get_scaled_computation_cost_at (ivopts_data *data, gimple *at, comp_cost cost)
83e0b739
ML
4462{
4463 int loop_freq = data->current_loop->header->frequency;
0d56d3c1 4464 int bb_freq = gimple_bb (at)->frequency;
83e0b739
ML
4465 if (loop_freq != 0)
4466 {
4467 gcc_assert (cost.scratch <= cost.cost);
4468 int scaled_cost
4469 = cost.scratch + (cost.cost - cost.scratch) * bb_freq / loop_freq;
4470
4471 if (dump_file && (dump_flags & TDF_DETAILS))
13fdeaaf 4472 fprintf (dump_file, "Scaling cost based on bb prob "
83e0b739 4473 "by %2.2f: %d (scratch: %d) -> %d (%d/%d)\n",
13fdeaaf 4474 1.0f * bb_freq / loop_freq, cost.cost,
83e0b739
ML
4475 cost.scratch, scaled_cost, bb_freq, loop_freq);
4476
4477 cost.cost = scaled_cost;
4478 }
4479
4480 return cost;
4481}
18081149 4482
8b11a64c
ZD
4483/* Determines the cost of the computation by that USE is expressed
4484 from induction variable CAND. If ADDRESS_P is true, we just need
4485 to create an address from it, otherwise we want to get it into
c7da0e81 4486 register. A set of invariants we depend on is stored in INV_VARS.
2c08497a 4487 If CAN_AUTOINC is nonnull, use it to record whether autoinc
c7da0e81
BC
4488 addressing is likely. If INV_EXPR is nonnull, record invariant
4489 expr entry in it. */
8b11a64c 4490
6e8c65f6 4491static comp_cost
c7da0e81
BC
4492get_computation_cost (struct ivopts_data *data, struct iv_use *use,
4493 struct iv_cand *cand, bool address_p, bitmap *inv_vars,
4494 bool *can_autoinc, iv_inv_expr_ent **inv_expr)
8b11a64c 4495{
c7da0e81 4496 gimple *at = use->stmt;
f9f69dd6
BC
4497 tree ubase = use->iv->base, cbase = cand->iv->base;
4498 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
4499 tree comp_inv = NULL_TREE;
8b11a64c 4500 HOST_WIDE_INT ratio, aratio;
6e8c65f6 4501 comp_cost cost;
807e902e 4502 widest_int rat;
f9f69dd6 4503 aff_tree aff_inv, aff_var;
f40751dd 4504 bool speed = optimize_bb_for_speed_p (gimple_bb (at));
8b11a64c 4505
0ca91c77
BC
4506 if (inv_vars)
4507 *inv_vars = NULL;
f9f69dd6
BC
4508 if (can_autoinc)
4509 *can_autoinc = false;
4510 if (inv_expr)
4511 *inv_expr = NULL;
8b11a64c 4512
f9f69dd6 4513 /* Check if we have enough precision to express the values of use. */
8b11a64c 4514 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
f9f69dd6 4515 return infinite_cost;
8b11a64c 4516
1d30a09a
RG
4517 if (address_p
4518 || (use->iv->base_object
4519 && cand->iv->base_object
4520 && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4521 && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
e6845c23
ZD
4522 {
4523 /* Do not try to express address of an object with computation based
4524 on address of a different object. This may cause problems in rtl
4525 level alias analysis (that does not expect this to be happening,
4526 as this is illegal in C), and would be unlikely to be useful
4527 anyway. */
4528 if (use->iv->base_object
4529 && cand->iv->base_object
4530 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
6e8c65f6 4531 return infinite_cost;
e6845c23
ZD
4532 }
4533
f9f69dd6
BC
4534 if (!get_computation_aff_1 (data->current_loop, at, use,
4535 cand, &aff_inv, &aff_var, &rat)
4536 || !wi::fits_shwi_p (rat))
6e8c65f6 4537 return infinite_cost;
8b11a64c 4538
f9f69dd6
BC
4539 ratio = rat.to_shwi ();
4540 if (address_p)
8b11a64c 4541 {
f9f69dd6
BC
4542 cost = get_address_cost (data, use, cand, &aff_inv, &aff_var, ratio,
4543 inv_vars, inv_expr, can_autoinc, speed);
4544 return get_scaled_computation_cost_at (data, at, cost);
18081149
XDL
4545 }
4546
f9f69dd6
BC
4547 bool simple_inv = (aff_combination_const_p (&aff_inv)
4548 || aff_combination_singleton_var_p (&aff_inv));
4549 tree signed_type = signed_type_for (aff_combination_type (&aff_inv));
4550 aff_combination_convert (&aff_inv, signed_type);
4551 if (!aff_combination_zero_p (&aff_inv))
4552 comp_inv = aff_combination_to_tree (&aff_inv);
309a0cf6 4553
f9f69dd6
BC
4554 cost = force_var_cost (data, comp_inv, inv_vars);
4555 if (comp_inv && inv_expr && !simple_inv)
18081149 4556 {
f9f69dd6 4557 *inv_expr = get_loop_invariant_expr (data, comp_inv);
18081149 4558 /* Clear depends on. */
f9f69dd6 4559 if (*inv_expr != NULL && inv_vars && *inv_vars)
0ca91c77 4560 bitmap_clear (*inv_vars);
8b11a64c 4561
f9f69dd6
BC
4562 cost.cost = adjust_setup_cost (data, cost.cost);
4563 /* Record setup cost in scratch field. */
4564 cost.scratch = cost.cost;
83e0b739 4565 }
f9f69dd6
BC
4566 /* Cost of constant integer can be covered when adding invariant part to
4567 variant part. */
4568 else if (comp_inv && CONSTANT_CLASS_P (comp_inv))
4569 cost = no_cost;
8b11a64c 4570
f9f69dd6
BC
4571 /* Need type narrowing to represent use with cand. */
4572 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
8b11a64c 4573 {
f9f69dd6
BC
4574 machine_mode outer_mode = TYPE_MODE (utype);
4575 machine_mode inner_mode = TYPE_MODE (ctype);
4576 cost += comp_cost (convert_cost (outer_mode, inner_mode, speed), 0);
8b11a64c
ZD
4577 }
4578
f9f69dd6
BC
4579 /* Turn a + i * (-c) into a - i * c. */
4580 if (ratio < 0 && comp_inv && !integer_zerop (comp_inv))
4581 aratio = -ratio;
4582 else
4583 aratio = ratio;
83e0b739 4584
f9f69dd6
BC
4585 if (ratio != 1)
4586 cost += mult_by_coeff_cost (aratio, TYPE_MODE (utype), speed);
8b11a64c 4587
f9f69dd6
BC
4588 /* TODO: We may also need to check if we can compute a + i * 4 in one
4589 instruction. */
4590 /* Need to add up the invariant and variant parts. */
4591 if (comp_inv && !integer_zerop (comp_inv))
4592 cost += add_cost (speed, TYPE_MODE (utype));
8b11a64c 4593
13fdeaaf 4594 return get_scaled_computation_cost_at (data, at, cost);
8b11a64c
ZD
4595}
4596
309a0cf6 4597/* Determines cost of computing the use in GROUP with CAND in a generic
8b11a64c
ZD
4598 expression. */
4599
b1b02be2 4600static bool
309a0cf6
BC
4601determine_group_iv_cost_generic (struct ivopts_data *data,
4602 struct iv_group *group, struct iv_cand *cand)
8b11a64c 4603{
6e8c65f6 4604 comp_cost cost;
623b8e0a 4605 iv_inv_expr_ent *inv_expr = NULL;
0ca91c77 4606 bitmap inv_vars = NULL, inv_exprs = NULL;
309a0cf6 4607 struct iv_use *use = group->vuses[0];
eec5fec9
ZD
4608
4609 /* The simple case first -- if we need to express value of the preserved
4610 original biv, the cost is 0. This also prevents us from counting the
4611 cost of increment twice -- once at this use and once in the cost of
4612 the candidate. */
309a0cf6
BC
4613 if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt)
4614 cost = no_cost;
4615 else
4616 cost = get_computation_cost (data, use, cand, false,
0ca91c77 4617 &inv_vars, NULL, &inv_expr);
b1b02be2 4618
0ca91c77
BC
4619 if (inv_expr)
4620 {
4621 inv_exprs = BITMAP_ALLOC (NULL);
4622 bitmap_set_bit (inv_exprs, inv_expr->id);
4623 }
4624 set_group_iv_cost (data, group, cand, cost, inv_vars,
4625 NULL_TREE, ERROR_MARK, inv_exprs);
8d18b6df 4626 return !cost.infinite_cost_p ();
8b11a64c
ZD
4627}
4628
309a0cf6 4629/* Determines cost of computing uses in GROUP with CAND in addresses. */
8b11a64c 4630
b1b02be2 4631static bool
309a0cf6
BC
4632determine_group_iv_cost_address (struct ivopts_data *data,
4633 struct iv_group *group, struct iv_cand *cand)
8b11a64c 4634{
309a0cf6 4635 unsigned i;
0ca91c77 4636 bitmap inv_vars = NULL, inv_exprs = NULL;
d3ee260a 4637 bool can_autoinc;
623b8e0a 4638 iv_inv_expr_ent *inv_expr = NULL;
309a0cf6
BC
4639 struct iv_use *use = group->vuses[0];
4640 comp_cost sum_cost = no_cost, cost;
8b11a64c 4641
309a0cf6 4642 cost = get_computation_cost (data, use, cand, true,
0ca91c77 4643 &inv_vars, &can_autoinc, &inv_expr);
309a0cf6 4644
0ca91c77
BC
4645 if (inv_expr)
4646 {
4647 inv_exprs = BITMAP_ALLOC (NULL);
4648 bitmap_set_bit (inv_exprs, inv_expr->id);
4649 }
309a0cf6 4650 sum_cost = cost;
8d18b6df 4651 if (!sum_cost.infinite_cost_p () && cand->ainc_use == use)
2c08497a
BS
4652 {
4653 if (can_autoinc)
8d18b6df 4654 sum_cost -= cand->cost_step;
2c08497a
BS
4655 /* If we generated the candidate solely for exploiting autoincrement
4656 opportunities, and it turns out it can't be used, set the cost to
4657 infinity to make sure we ignore it. */
4658 else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
309a0cf6
BC
4659 sum_cost = infinite_cost;
4660 }
4661
4662 /* Uses in a group can share setup code, so only add setup cost once. */
8d18b6df 4663 cost -= cost.scratch;
309a0cf6 4664 /* Compute and add costs for rest uses of this group. */
8d18b6df 4665 for (i = 1; i < group->vuses.length () && !sum_cost.infinite_cost_p (); i++)
309a0cf6
BC
4666 {
4667 struct iv_use *next = group->vuses[i];
4668
d3ee260a
BC
4669 /* TODO: We could skip computing cost for sub iv_use when it has the
4670 same cost as the first iv_use, but the cost really depends on the
4671 offset and where the iv_use is. */
4672 cost = get_computation_cost (data, next, cand, true,
f9f69dd6
BC
4673 NULL, &can_autoinc, &inv_expr);
4674 if (inv_expr)
4675 {
4676 if (!inv_exprs)
4677 inv_exprs = BITMAP_ALLOC (NULL);
4678
4679 bitmap_set_bit (inv_exprs, inv_expr->id);
4680 }
8d18b6df 4681 sum_cost += cost;
a7e43c57 4682 }
0ca91c77
BC
4683 set_group_iv_cost (data, group, cand, sum_cost, inv_vars,
4684 NULL_TREE, ERROR_MARK, inv_exprs);
a7e43c57 4685
8d18b6df 4686 return !sum_cost.infinite_cost_p ();
8b11a64c
ZD
4687}
4688
7e2ac86c
ZD
4689/* Computes value of candidate CAND at position AT in iteration NITER, and
4690 stores it to VAL. */
8b11a64c 4691
7e2ac86c 4692static void
355fe088 4693cand_value_at (struct loop *loop, struct iv_cand *cand, gimple *at, tree niter,
7e2ac86c 4694 aff_tree *val)
8b11a64c 4695{
7e2ac86c
ZD
4696 aff_tree step, delta, nit;
4697 struct iv *iv = cand->iv;
8b11a64c 4698 tree type = TREE_TYPE (iv->base);
bad9b288 4699 tree steptype;
1ffe34d9
AP
4700 if (POINTER_TYPE_P (type))
4701 steptype = sizetype;
bad9b288
JJ
4702 else
4703 steptype = unsigned_type_for (type);
8b11a64c 4704
d6adff07
RB
4705 tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step);
4706 aff_combination_convert (&step, steptype);
7e2ac86c 4707 tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
1ffe34d9 4708 aff_combination_convert (&nit, steptype);
7e2ac86c 4709 aff_combination_mult (&nit, &step, &delta);
8b11a64c 4710 if (stmt_after_increment (loop, cand, at))
7e2ac86c 4711 aff_combination_add (&delta, &step);
8b11a64c 4712
7e2ac86c 4713 tree_to_aff_combination (iv->base, type, val);
d6adff07
RB
4714 if (!POINTER_TYPE_P (type))
4715 aff_combination_convert (val, steptype);
7e2ac86c 4716 aff_combination_add (val, &delta);
8b11a64c
ZD
4717}
4718
ca4c3169
ZD
4719/* Returns period of induction variable iv. */
4720
4721static tree
4722iv_period (struct iv *iv)
4723{
4724 tree step = iv->step, period, type;
4725 tree pow2div;
4726
4727 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4728
ca4c3169 4729 type = unsigned_type_for (TREE_TYPE (step));
e2102efc
XDL
4730 /* Period of the iv is lcm (step, type_range)/step -1,
4731 i.e., N*type_range/step - 1. Since type range is power
4732 of two, N == (step >> num_of_ending_zeros_binary (step),
4733 so the final result is
4734
4735 (type_range >> num_of_ending_zeros_binary (step)) - 1
4736
4737 */
4738 pow2div = num_ending_zeros (step);
ca4c3169
ZD
4739
4740 period = build_low_bits_mask (type,
623b8e0a
ML
4741 (TYPE_PRECISION (type)
4742 - tree_to_uhwi (pow2div)));
ca4c3169
ZD
4743
4744 return period;
4745}
4746
f5f12961
ZD
4747/* Returns the comparison operator used when eliminating the iv USE. */
4748
4749static enum tree_code
4750iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4751{
4752 struct loop *loop = data->current_loop;
4753 basic_block ex_bb;
4754 edge exit;
4755
726a989a 4756 ex_bb = gimple_bb (use->stmt);
f5f12961
ZD
4757 exit = EDGE_SUCC (ex_bb, 0);
4758 if (flow_bb_inside_loop_p (loop, exit->dest))
4759 exit = EDGE_SUCC (ex_bb, 1);
4760
4761 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4762}
4763
d8af4ba3
ZD
4764/* Returns true if we can prove that BASE - OFFSET does not overflow. For now,
4765 we only detect the situation that BASE = SOMETHING + OFFSET, where the
4766 calculation is performed in non-wrapping type.
4767
4768 TODO: More generally, we could test for the situation that
4769 BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
3230c614 4770 This would require knowing the sign of OFFSET. */
d8af4ba3
ZD
4771
4772static bool
3230c614 4773difference_cannot_overflow_p (struct ivopts_data *data, tree base, tree offset)
d8af4ba3
ZD
4774{
4775 enum tree_code code;
4776 tree e1, e2;
3230c614 4777 aff_tree aff_e1, aff_e2, aff_offset;
d8af4ba3
ZD
4778
4779 if (!nowrap_type_p (TREE_TYPE (base)))
4780 return false;
4781
4782 base = expand_simple_operations (base);
4783
4784 if (TREE_CODE (base) == SSA_NAME)
4785 {
355fe088 4786 gimple *stmt = SSA_NAME_DEF_STMT (base);
d8af4ba3
ZD
4787
4788 if (gimple_code (stmt) != GIMPLE_ASSIGN)
4789 return false;
4790
4791 code = gimple_assign_rhs_code (stmt);
4792 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4793 return false;
4794
4795 e1 = gimple_assign_rhs1 (stmt);
4796 e2 = gimple_assign_rhs2 (stmt);
4797 }
4798 else
4799 {
4800 code = TREE_CODE (base);
4801 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4802 return false;
4803 e1 = TREE_OPERAND (base, 0);
4804 e2 = TREE_OPERAND (base, 1);
4805 }
4806
3230c614
BC
4807 /* Use affine expansion as deeper inspection to prove the equality. */
4808 tree_to_aff_combination_expand (e2, TREE_TYPE (e2),
4809 &aff_e2, &data->name_expansion_cache);
4810 tree_to_aff_combination_expand (offset, TREE_TYPE (offset),
4811 &aff_offset, &data->name_expansion_cache);
4812 aff_combination_scale (&aff_offset, -1);
d8af4ba3
ZD
4813 switch (code)
4814 {
4815 case PLUS_EXPR:
3230c614
BC
4816 aff_combination_add (&aff_e2, &aff_offset);
4817 if (aff_combination_zero_p (&aff_e2))
4818 return true;
4819
4820 tree_to_aff_combination_expand (e1, TREE_TYPE (e1),
4821 &aff_e1, &data->name_expansion_cache);
4822 aff_combination_add (&aff_e1, &aff_offset);
4823 return aff_combination_zero_p (&aff_e1);
4824
d8af4ba3 4825 case POINTER_PLUS_EXPR:
3230c614
BC
4826 aff_combination_add (&aff_e2, &aff_offset);
4827 return aff_combination_zero_p (&aff_e2);
d8af4ba3
ZD
4828
4829 default:
4830 return false;
4831 }
4832}
4833
4834/* Tries to replace loop exit by one formulated in terms of a LT_EXPR
4835 comparison with CAND. NITER describes the number of iterations of
4836 the loops. If successful, the comparison in COMP_P is altered accordingly.
4837
4838 We aim to handle the following situation:
4839
4840 sometype *base, *p;
4841 int a, b, i;
4842
4843 i = a;
4844 p = p_0 = base + a;
4845
4846 do
4847 {
4848 bla (*p);
4849 p++;
4850 i++;
4851 }
4852 while (i < b);
4853
4854 Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
4855 We aim to optimize this to
4856
4857 p = p_0 = base + a;
4858 do
4859 {
4860 bla (*p);
4861 p++;
4862 }
4863 while (p < p_0 - a + b);
4864
4865 This preserves the correctness, since the pointer arithmetics does not
4866 overflow. More precisely:
4867
4868 1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
4869 overflow in computing it or the values of p.
4870 2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
4871 overflow. To prove this, we use the fact that p_0 = base + a. */
4872
4873static bool
4874iv_elimination_compare_lt (struct ivopts_data *data,
623b8e0a 4875 struct iv_cand *cand, enum tree_code *comp_p,
d8af4ba3
ZD
4876 struct tree_niter_desc *niter)
4877{
4878 tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
84562394 4879 struct aff_tree nit, tmpa, tmpb;
d8af4ba3
ZD
4880 enum tree_code comp;
4881 HOST_WIDE_INT step;
4882
4883 /* We need to know that the candidate induction variable does not overflow.
4884 While more complex analysis may be used to prove this, for now just
4885 check that the variable appears in the original program and that it
4886 is computed in a type that guarantees no overflows. */
4887 cand_type = TREE_TYPE (cand->iv->base);
4888 if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
4889 return false;
4890
4891 /* Make sure that the loop iterates till the loop bound is hit, as otherwise
4892 the calculation of the BOUND could overflow, making the comparison
4893 invalid. */
4894 if (!data->loop_single_exit_p)
4895 return false;
4896
4897 /* We need to be able to decide whether candidate is increasing or decreasing
4898 in order to choose the right comparison operator. */
4899 if (!cst_and_fits_in_hwi (cand->iv->step))
4900 return false;
4901 step = int_cst_value (cand->iv->step);
4902
4903 /* Check that the number of iterations matches the expected pattern:
4904 a + 1 > b ? 0 : b - a - 1. */
4905 mbz = niter->may_be_zero;
4906 if (TREE_CODE (mbz) == GT_EXPR)
4907 {
4908 /* Handle a + 1 > b. */
4909 tree op0 = TREE_OPERAND (mbz, 0);
4910 if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
4911 {
4912 a = TREE_OPERAND (op0, 0);
4913 b = TREE_OPERAND (mbz, 1);
4914 }
4915 else
4916 return false;
4917 }
4918 else if (TREE_CODE (mbz) == LT_EXPR)
4919 {
4920 tree op1 = TREE_OPERAND (mbz, 1);
4921
4922 /* Handle b < a + 1. */
4923 if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
623b8e0a
ML
4924 {
4925 a = TREE_OPERAND (op1, 0);
4926 b = TREE_OPERAND (mbz, 0);
4927 }
d8af4ba3
ZD
4928 else
4929 return false;
4930 }
4931 else
4932 return false;
4933
4934 /* Expected number of iterations is B - A - 1. Check that it matches
4935 the actual number, i.e., that B - A - NITER = 1. */
4936 tree_to_aff_combination (niter->niter, nit_type, &nit);
4937 tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
4938 tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
807e902e
KZ
4939 aff_combination_scale (&nit, -1);
4940 aff_combination_scale (&tmpa, -1);
d8af4ba3
ZD
4941 aff_combination_add (&tmpb, &tmpa);
4942 aff_combination_add (&tmpb, &nit);
807e902e 4943 if (tmpb.n != 0 || tmpb.offset != 1)
d8af4ba3
ZD
4944 return false;
4945
4946 /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
4947 overflow. */
4948 offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
4949 cand->iv->step,
4950 fold_convert (TREE_TYPE (cand->iv->step), a));
3230c614 4951 if (!difference_cannot_overflow_p (data, cand->iv->base, offset))
d8af4ba3
ZD
4952 return false;
4953
4954 /* Determine the new comparison operator. */
4955 comp = step < 0 ? GT_EXPR : LT_EXPR;
4956 if (*comp_p == NE_EXPR)
4957 *comp_p = comp;
4958 else if (*comp_p == EQ_EXPR)
4959 *comp_p = invert_tree_comparison (comp, false);
4960 else
4961 gcc_unreachable ();
4962
4963 return true;
4964}
4965
8b11a64c 4966/* Check whether it is possible to express the condition in USE by comparison
d8af4ba3
ZD
4967 of candidate CAND. If so, store the value compared with to BOUND, and the
4968 comparison operator to COMP. */
8b11a64c
ZD
4969
4970static bool
ca4c3169 4971may_eliminate_iv (struct ivopts_data *data,
d8af4ba3
ZD
4972 struct iv_use *use, struct iv_cand *cand, tree *bound,
4973 enum tree_code *comp)
8b11a64c 4974{
e6845c23 4975 basic_block ex_bb;
8b11a64c 4976 edge exit;
d8af4ba3 4977 tree period;
ca4c3169 4978 struct loop *loop = data->current_loop;
7e2ac86c 4979 aff_tree bnd;
e2102efc 4980 struct tree_niter_desc *desc = NULL;
cbc012d5 4981
9be872b7
ZD
4982 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
4983 return false;
4984
52778e2a
EB
4985 /* For now works only for exits that dominate the loop latch.
4986 TODO: extend to other conditions inside loop body. */
726a989a 4987 ex_bb = gimple_bb (use->stmt);
e6845c23 4988 if (use->stmt != last_stmt (ex_bb)
726a989a
RB
4989 || gimple_code (use->stmt) != GIMPLE_COND
4990 || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
8b11a64c
ZD
4991 return false;
4992
e6845c23
ZD
4993 exit = EDGE_SUCC (ex_bb, 0);
4994 if (flow_bb_inside_loop_p (loop, exit->dest))
4995 exit = EDGE_SUCC (ex_bb, 1);
4996 if (flow_bb_inside_loop_p (loop, exit->dest))
4997 return false;
4998
d8af4ba3
ZD
4999 desc = niter_for_exit (data, exit);
5000 if (!desc)
8b11a64c
ZD
5001 return false;
5002
52778e2a
EB
5003 /* Determine whether we can use the variable to test the exit condition.
5004 This is the case iff the period of the induction variable is greater
5005 than the number of iterations for which the exit condition is true. */
ca4c3169 5006 period = iv_period (cand->iv);
8b11a64c 5007
52778e2a 5008 /* If the number of iterations is constant, compare against it directly. */
d8af4ba3 5009 if (TREE_CODE (desc->niter) == INTEGER_CST)
52778e2a 5010 {
e2102efc
XDL
5011 /* See cand_value_at. */
5012 if (stmt_after_increment (loop, cand, use->stmt))
623b8e0a
ML
5013 {
5014 if (!tree_int_cst_lt (desc->niter, period))
5015 return false;
5016 }
e2102efc 5017 else
623b8e0a
ML
5018 {
5019 if (tree_int_cst_lt (period, desc->niter))
5020 return false;
5021 }
52778e2a
EB
5022 }
5023
5024 /* If not, and if this is the only possible exit of the loop, see whether
5025 we can get a conservative estimate on the number of iterations of the
5026 entire loop and compare against that instead. */
e2102efc 5027 else
52778e2a 5028 {
807e902e 5029 widest_int period_value, max_niter;
e2102efc
XDL
5030
5031 max_niter = desc->max;
5032 if (stmt_after_increment (loop, cand, use->stmt))
623b8e0a 5033 max_niter += 1;
807e902e
KZ
5034 period_value = wi::to_widest (period);
5035 if (wi::gtu_p (max_niter, period_value))
623b8e0a
ML
5036 {
5037 /* See if we can take advantage of inferred loop bound
5038 information. */
5039 if (data->loop_single_exit_p)
5040 {
5041 if (!max_loop_iterations (loop, &max_niter))
5042 return false;
5043 /* The loop bound is already adjusted by adding 1. */
5044 if (wi::gtu_p (max_niter, period_value))
5045 return false;
5046 }
5047 else
5048 return false;
5049 }
52778e2a
EB
5050 }
5051
d8af4ba3 5052 cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
771f882e 5053
d6adff07
RB
5054 *bound = fold_convert (TREE_TYPE (cand->iv->base),
5055 aff_combination_to_tree (&bnd));
d8af4ba3
ZD
5056 *comp = iv_elimination_compare (data, use);
5057
771f882e
ZD
5058 /* It is unlikely that computing the number of iterations using division
5059 would be more profitable than keeping the original induction variable. */
5060 if (expression_expensive_p (*bound))
5061 return false;
d8af4ba3
ZD
5062
5063 /* Sometimes, it is possible to handle the situation that the number of
5764ee3c 5064 iterations may be zero unless additional assumptions by using <
d8af4ba3
ZD
5065 instead of != in the exit condition.
5066
5067 TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
5068 base the exit condition on it. However, that is often too
5069 expensive. */
5070 if (!integer_zerop (desc->may_be_zero))
5071 return iv_elimination_compare_lt (data, cand, comp, desc);
5072
8b11a64c
ZD
5073 return true;
5074}
5075
bb8d292d 5076 /* Calculates the cost of BOUND, if it is a PARM_DECL. A PARM_DECL must
026c3cfd 5077 be copied, if it is used in the loop body and DATA->body_includes_call. */
bb8d292d
TV
5078
5079static int
5080parm_decl_cost (struct ivopts_data *data, tree bound)
5081{
5082 tree sbound = bound;
5083 STRIP_NOPS (sbound);
5084
5085 if (TREE_CODE (sbound) == SSA_NAME
67386041 5086 && SSA_NAME_IS_DEFAULT_DEF (sbound)
bb8d292d 5087 && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
bb8d292d
TV
5088 && data->body_includes_call)
5089 return COSTS_N_INSNS (1);
5090
5091 return 0;
5092}
e2102efc 5093
309a0cf6 5094/* Determines cost of computing the use in GROUP with CAND in a condition. */
8b11a64c 5095
b1b02be2 5096static bool
309a0cf6
BC
5097determine_group_iv_cost_cond (struct ivopts_data *data,
5098 struct iv_group *group, struct iv_cand *cand)
8b11a64c 5099{
b697aed4
ZD
5100 tree bound = NULL_TREE;
5101 struct iv *cmp_iv;
0ca91c77
BC
5102 bitmap inv_exprs = NULL;
5103 bitmap inv_vars_elim = NULL, inv_vars_express = NULL, inv_vars;
b6a2258f
BC
5104 comp_cost elim_cost = infinite_cost, express_cost, cost, bound_cost;
5105 enum comp_iv_rewrite rewrite_type;
0ca91c77 5106 iv_inv_expr_ent *inv_expr_elim = NULL, *inv_expr_express = NULL, *inv_expr;
04eadb13 5107 tree *control_var, *bound_cst;
76725a03 5108 enum tree_code comp = ERROR_MARK;
309a0cf6 5109 struct iv_use *use = group->vuses[0];
8b11a64c 5110
b6a2258f
BC
5111 /* Extract condition operands. */
5112 rewrite_type = extract_cond_operands (data, use->stmt, &control_var,
5113 &bound_cst, NULL, &cmp_iv);
5114 gcc_assert (rewrite_type != COMP_IV_NA);
5115
b697aed4 5116 /* Try iv elimination. */
b6a2258f
BC
5117 if (rewrite_type == COMP_IV_ELIM
5118 && may_eliminate_iv (data, use, cand, &bound, &comp))
cbc012d5 5119 {
0ca91c77 5120 elim_cost = force_var_cost (data, bound, &inv_vars_elim);
bb8d292d 5121 if (elim_cost.cost == 0)
623b8e0a 5122 elim_cost.cost = parm_decl_cost (data, bound);
bb8d292d 5123 else if (TREE_CODE (bound) == INTEGER_CST)
623b8e0a 5124 elim_cost.cost = 0;
bb8d292d 5125 /* If we replace a loop condition 'i < n' with 'p < base + n',
0ca91c77
BC
5126 inv_vars_elim will have 'base' and 'n' set, which implies that both
5127 'base' and 'n' will be live during the loop. More likely,
bb8d292d 5128 'base + n' will be loop invariant, resulting in only one live value
0ca91c77
BC
5129 during the loop. So in that case we clear inv_vars_elim and set
5130 inv_expr_elim instead. */
5131 if (inv_vars_elim && bitmap_count_bits (inv_vars_elim) > 1)
bb8d292d 5132 {
f9f69dd6 5133 inv_expr_elim = get_loop_invariant_expr (data, bound);
0ca91c77 5134 bitmap_clear (inv_vars_elim);
bb8d292d 5135 }
cbc012d5
ZD
5136 /* The bound is a loop invariant, so it will be only computed
5137 once. */
6521ac85 5138 elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
cbc012d5 5139 }
8b11a64c 5140
04eadb13
SP
5141 /* When the condition is a comparison of the candidate IV against
5142 zero, prefer this IV.
5143
073a8998 5144 TODO: The constant that we're subtracting from the cost should
04eadb13
SP
5145 be target-dependent. This information should be added to the
5146 target costs for each backend. */
8d18b6df 5147 if (!elim_cost.infinite_cost_p () /* Do not try to decrease infinite! */
572ae476 5148 && integer_zerop (*bound_cst)
04eadb13
SP
5149 && (operand_equal_p (*control_var, cand->var_after, 0)
5150 || operand_equal_p (*control_var, cand->var_before, 0)))
8d18b6df 5151 elim_cost -= 1;
04eadb13 5152
b697aed4 5153 express_cost = get_computation_cost (data, use, cand, false,
0ca91c77
BC
5154 &inv_vars_express, NULL,
5155 &inv_expr_express);
1c52c69f
BC
5156 if (cmp_iv != NULL)
5157 find_inv_vars (data, &cmp_iv->base, &inv_vars_express);
f5f12961 5158
bb8d292d
TV
5159 /* Count the cost of the original bound as well. */
5160 bound_cost = force_var_cost (data, *bound_cst, NULL);
5161 if (bound_cost.cost == 0)
5162 bound_cost.cost = parm_decl_cost (data, *bound_cst);
5163 else if (TREE_CODE (*bound_cst) == INTEGER_CST)
5164 bound_cost.cost = 0;
8d18b6df 5165 express_cost += bound_cost;
bb8d292d 5166
b9ff6079 5167 /* Choose the better approach, preferring the eliminated IV. */
8d18b6df 5168 if (elim_cost <= express_cost)
8b11a64c 5169 {
b697aed4 5170 cost = elim_cost;
0ca91c77
BC
5171 inv_vars = inv_vars_elim;
5172 inv_vars_elim = NULL;
5173 inv_expr = inv_expr_elim;
b697aed4
ZD
5174 }
5175 else
5176 {
5177 cost = express_cost;
0ca91c77
BC
5178 inv_vars = inv_vars_express;
5179 inv_vars_express = NULL;
b697aed4 5180 bound = NULL_TREE;
d8af4ba3 5181 comp = ERROR_MARK;
0ca91c77 5182 inv_expr = inv_expr_express;
8b11a64c
ZD
5183 }
5184
0ca91c77
BC
5185 if (inv_expr)
5186 {
5187 inv_exprs = BITMAP_ALLOC (NULL);
5188 bitmap_set_bit (inv_exprs, inv_expr->id);
5189 }
309a0cf6 5190 set_group_iv_cost (data, group, cand, cost,
0ca91c77 5191 inv_vars, bound, comp, inv_exprs);
b697aed4 5192
0ca91c77
BC
5193 if (inv_vars_elim)
5194 BITMAP_FREE (inv_vars_elim);
5195 if (inv_vars_express)
5196 BITMAP_FREE (inv_vars_express);
b697aed4 5197
8d18b6df 5198 return !cost.infinite_cost_p ();
8b11a64c
ZD
5199}
5200
309a0cf6
BC
5201/* Determines cost of computing uses in GROUP with CAND. Returns false
5202 if USE cannot be represented with CAND. */
8b11a64c 5203
b1b02be2 5204static bool
309a0cf6
BC
5205determine_group_iv_cost (struct ivopts_data *data,
5206 struct iv_group *group, struct iv_cand *cand)
8b11a64c 5207{
309a0cf6 5208 switch (group->type)
8b11a64c
ZD
5209 {
5210 case USE_NONLINEAR_EXPR:
309a0cf6 5211 return determine_group_iv_cost_generic (data, group, cand);
8b11a64c 5212
8b11a64c 5213 case USE_ADDRESS:
309a0cf6 5214 return determine_group_iv_cost_address (data, group, cand);
8b11a64c
ZD
5215
5216 case USE_COMPARE:
309a0cf6 5217 return determine_group_iv_cost_cond (data, group, cand);
8b11a64c
ZD
5218
5219 default:
1e128c5f 5220 gcc_unreachable ();
8b11a64c
ZD
5221 }
5222}
5223
2c08497a
BS
5224/* Return true if get_computation_cost indicates that autoincrement is
5225 a possibility for the pair of USE and CAND, false otherwise. */
5226
5227static bool
5228autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
5229 struct iv_cand *cand)
5230{
2c08497a
BS
5231 if (use->type != USE_ADDRESS)
5232 return false;
5233
37708714
BC
5234 bool can_autoinc = false;
5235 get_computation_cost (data, use, cand, true, NULL, &can_autoinc, NULL);
5236 return can_autoinc;
2c08497a
BS
5237}
5238
5239/* Examine IP_ORIGINAL candidates to see if they are incremented next to a
5240 use that allows autoincrement, and set their AINC_USE if possible. */
5241
5242static void
5243set_autoinc_for_original_candidates (struct ivopts_data *data)
5244{
5245 unsigned i, j;
5246
309a0cf6 5247 for (i = 0; i < data->vcands.length (); i++)
2c08497a 5248 {
309a0cf6 5249 struct iv_cand *cand = data->vcands[i];
85ff4ec6
BC
5250 struct iv_use *closest_before = NULL;
5251 struct iv_use *closest_after = NULL;
2c08497a
BS
5252 if (cand->pos != IP_ORIGINAL)
5253 continue;
85ff4ec6 5254
309a0cf6 5255 for (j = 0; j < data->vgroups.length (); j++)
2c08497a 5256 {
309a0cf6
BC
5257 struct iv_group *group = data->vgroups[j];
5258 struct iv_use *use = group->vuses[0];
2c08497a 5259 unsigned uid = gimple_uid (use->stmt);
85ff4ec6
BC
5260
5261 if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
2c08497a 5262 continue;
85ff4ec6
BC
5263
5264 if (uid < gimple_uid (cand->incremented_at)
5265 && (closest_before == NULL
5266 || uid > gimple_uid (closest_before->stmt)))
5267 closest_before = use;
5268
5269 if (uid > gimple_uid (cand->incremented_at)
5270 && (closest_after == NULL
5271 || uid < gimple_uid (closest_after->stmt)))
5272 closest_after = use;
2c08497a 5273 }
85ff4ec6
BC
5274
5275 if (closest_before != NULL
5276 && autoinc_possible_for_pair (data, closest_before, cand))
5277 cand->ainc_use = closest_before;
5278 else if (closest_after != NULL
5279 && autoinc_possible_for_pair (data, closest_after, cand))
5280 cand->ainc_use = closest_after;
2c08497a
BS
5281 }
5282}
5283
06fd3160
BC
5284/* Relate compare use with all candidates. */
5285
5286static void
5287relate_compare_use_with_all_cands (struct ivopts_data *data)
5288{
7d27b70b 5289 unsigned i, count = data->vcands.length ();
06fd3160
BC
5290 for (i = 0; i < data->vgroups.length (); i++)
5291 {
5292 struct iv_group *group = data->vgroups[i];
5293
5294 if (group->type == USE_COMPARE)
7d27b70b 5295 bitmap_set_range (group->related_cands, 0, count);
06fd3160
BC
5296 }
5297}
5298
2c08497a
BS
5299/* Finds the candidates for the induction variables. */
5300
5301static void
5302find_iv_candidates (struct ivopts_data *data)
5303{
5304 /* Add commonly used ivs. */
5305 add_standard_iv_candidates (data);
5306
5307 /* Add old induction variables. */
4c3b378b 5308 add_iv_candidate_for_bivs (data);
2c08497a
BS
5309
5310 /* Add induction variables derived from uses. */
309a0cf6 5311 add_iv_candidate_for_groups (data);
2c08497a
BS
5312
5313 set_autoinc_for_original_candidates (data);
5314
5315 /* Record the important candidates. */
5316 record_important_candidates (data);
309a0cf6 5317
06fd3160
BC
5318 /* Relate compare iv_use with all candidates. */
5319 if (!data->consider_all_candidates)
5320 relate_compare_use_with_all_cands (data);
5321
309a0cf6
BC
5322 if (dump_file && (dump_flags & TDF_DETAILS))
5323 {
5324 unsigned i;
5325
5326 fprintf (dump_file, "\n<Important Candidates>:\t");
5327 for (i = 0; i < data->vcands.length (); i++)
5328 if (data->vcands[i]->important)
5329 fprintf (dump_file, " %d,", data->vcands[i]->id);
5330 fprintf (dump_file, "\n");
5331
5332 fprintf (dump_file, "\n<Group, Cand> Related:\n");
5333 for (i = 0; i < data->vgroups.length (); i++)
5334 {
5335 struct iv_group *group = data->vgroups[i];
5336
5337 if (group->related_cands)
5338 {
5339 fprintf (dump_file, " Group %d:\t", group->id);
5340 dump_bitmap (dump_file, group->related_cands);
5341 }
5342 }
5343 fprintf (dump_file, "\n");
5344 }
2c08497a
BS
5345}
5346
309a0cf6 5347/* Determines costs of computing use of iv with an iv candidate. */
8b11a64c
ZD
5348
5349static void
309a0cf6 5350determine_group_iv_costs (struct ivopts_data *data)
8b11a64c
ZD
5351{
5352 unsigned i, j;
8b11a64c 5353 struct iv_cand *cand;
309a0cf6 5354 struct iv_group *group;
8bdbfff5 5355 bitmap to_clear = BITMAP_ALLOC (NULL);
8b11a64c
ZD
5356
5357 alloc_use_cost_map (data);
5358
309a0cf6 5359 for (i = 0; i < data->vgroups.length (); i++)
8b11a64c 5360 {
309a0cf6 5361 group = data->vgroups[i];
8b11a64c
ZD
5362
5363 if (data->consider_all_candidates)
5364 {
309a0cf6 5365 for (j = 0; j < data->vcands.length (); j++)
8b11a64c 5366 {
309a0cf6
BC
5367 cand = data->vcands[j];
5368 determine_group_iv_cost (data, group, cand);
8b11a64c
ZD
5369 }
5370 }
5371 else
5372 {
87c476a2
ZD
5373 bitmap_iterator bi;
5374
309a0cf6 5375 EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, j, bi)
8b11a64c 5376 {
309a0cf6
BC
5377 cand = data->vcands[j];
5378 if (!determine_group_iv_cost (data, group, cand))
b1b02be2 5379 bitmap_set_bit (to_clear, j);
87c476a2 5380 }
b1b02be2
ZD
5381
5382 /* Remove the candidates for that the cost is infinite from
5383 the list of related candidates. */
309a0cf6 5384 bitmap_and_compl_into (group->related_cands, to_clear);
b1b02be2 5385 bitmap_clear (to_clear);
8b11a64c
ZD
5386 }
5387 }
5388
8bdbfff5 5389 BITMAP_FREE (to_clear);
b1b02be2 5390
8b11a64c
ZD
5391 if (dump_file && (dump_flags & TDF_DETAILS))
5392 {
e97cac02
BC
5393 bitmap_iterator bi;
5394
5395 /* Dump invariant variables. */
5396 fprintf (dump_file, "\n<Invariant Vars>:\n");
5397 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5398 {
5399 struct version_info *info = ver_info (data, i);
5400 if (info->inv_id)
5401 {
5402 fprintf (dump_file, "Inv %d:\t", info->inv_id);
5403 print_generic_expr (dump_file, info->name, TDF_SLIM);
5404 fprintf (dump_file, "%s\n",
5405 info->has_nonlin_use ? "" : "\t(eliminable)");
5406 }
5407 }
5408
5409 /* Dump invariant expressions. */
623b8e0a
ML
5410 fprintf (dump_file, "\n<Invariant Expressions>:\n");
5411 auto_vec <iv_inv_expr_ent *> list (data->inv_expr_tab->elements ());
5412
5413 for (hash_table<iv_inv_expr_hasher>::iterator it
5414 = data->inv_expr_tab->begin (); it != data->inv_expr_tab->end ();
5415 ++it)
5416 list.safe_push (*it);
5417
5418 list.qsort (sort_iv_inv_expr_ent);
5419
5420 for (i = 0; i < list.length (); ++i)
5421 {
0ca91c77 5422 fprintf (dump_file, "inv_expr %d: \t", list[i]->id);
623b8e0a
ML
5423 print_generic_expr (dump_file, list[i]->expr, TDF_SLIM);
5424 fprintf (dump_file, "\n");
5425 }
5426
5427 fprintf (dump_file, "\n<Group-candidate Costs>:\n");
8b11a64c 5428
309a0cf6 5429 for (i = 0; i < data->vgroups.length (); i++)
8b11a64c 5430 {
309a0cf6 5431 group = data->vgroups[i];
8b11a64c 5432
309a0cf6 5433 fprintf (dump_file, "Group %d:\n", i);
0ca91c77 5434 fprintf (dump_file, " cand\tcost\tcompl.\tinv.expr.\tinv.vars\n");
309a0cf6 5435 for (j = 0; j < group->n_map_members; j++)
8b11a64c 5436 {
309a0cf6 5437 if (!group->cost_map[j].cand
8d18b6df 5438 || group->cost_map[j].cost.infinite_cost_p ())
8b11a64c
ZD
5439 continue;
5440
6e8c65f6 5441 fprintf (dump_file, " %d\t%d\t%d\t",
309a0cf6
BC
5442 group->cost_map[j].cand->id,
5443 group->cost_map[j].cost.cost,
5444 group->cost_map[j].cost.complexity);
0ca91c77
BC
5445 if (!group->cost_map[j].inv_exprs
5446 || bitmap_empty_p (group->cost_map[j].inv_exprs))
5447 fprintf (dump_file, "NIL;\t");
623b8e0a 5448 else
8b11a64c 5449 bitmap_print (dump_file,
0ca91c77
BC
5450 group->cost_map[j].inv_exprs, "", ";\t");
5451 if (!group->cost_map[j].inv_vars
5452 || bitmap_empty_p (group->cost_map[j].inv_vars))
5453 fprintf (dump_file, "NIL;\n");
5454 else
5455 bitmap_print (dump_file,
5456 group->cost_map[j].inv_vars, "", "\n");
8b11a64c
ZD
5457 }
5458
5459 fprintf (dump_file, "\n");
5460 }
5461 fprintf (dump_file, "\n");
5462 }
5463}
5464
5465/* Determines cost of the candidate CAND. */
5466
5467static void
5468determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5469{
6e8c65f6
ZD
5470 comp_cost cost_base;
5471 unsigned cost, cost_step;
4366cf6d 5472 tree base;
8b11a64c 5473
c1662028 5474 gcc_assert (cand->iv != NULL);
8b11a64c
ZD
5475
5476 /* There are two costs associated with the candidate -- its increment
5477 and its initialization. The second is almost negligible for any loop
5478 that rolls enough, so we take it just very little into account. */
5479
5480 base = cand->iv->base;
5481 cost_base = force_var_cost (data, base, NULL);
a53c5024
TV
5482 /* It will be exceptional that the iv register happens to be initialized with
5483 the proper value at no cost. In general, there will at least be a regcopy
5484 or a const set. */
5485 if (cost_base.cost == 0)
5486 cost_base.cost = COSTS_N_INSNS (1);
5322d07e 5487 cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
8b11a64c 5488
6521ac85 5489 cost = cost_step + adjust_setup_cost (data, cost_base.cost);
8b11a64c 5490
6e8c65f6 5491 /* Prefer the original ivs unless we may gain something by replacing it.
fa10beec 5492 The reason is to make debugging simpler; so this is not relevant for
6e8c65f6
ZD
5493 artificial ivs created by other optimization passes. */
5494 if (cand->pos != IP_ORIGINAL
70b5e7dc 5495 || !SSA_NAME_VAR (cand->var_before)
6e8c65f6
ZD
5496 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
5497 cost++;
b8698a0f 5498
8b11a64c
ZD
5499 /* Prefer not to insert statements into latch unless there are some
5500 already (so that we do not create unnecessary jumps). */
4366cf6d
ZD
5501 if (cand->pos == IP_END
5502 && empty_block_p (ip_end_pos (data->current_loop)))
6e8c65f6
ZD
5503 cost++;
5504
5505 cand->cost = cost;
2c08497a 5506 cand->cost_step = cost_step;
8b11a64c
ZD
5507}
5508
5509/* Determines costs of computation of the candidates. */
5510
5511static void
5512determine_iv_costs (struct ivopts_data *data)
5513{
5514 unsigned i;
5515
5516 if (dump_file && (dump_flags & TDF_DETAILS))
5517 {
309a0cf6 5518 fprintf (dump_file, "<Candidate Costs>:\n");
8b11a64c
ZD
5519 fprintf (dump_file, " cand\tcost\n");
5520 }
5521
309a0cf6 5522 for (i = 0; i < data->vcands.length (); i++)
8b11a64c 5523 {
309a0cf6 5524 struct iv_cand *cand = data->vcands[i];
8b11a64c
ZD
5525
5526 determine_iv_cost (data, cand);
5527
5528 if (dump_file && (dump_flags & TDF_DETAILS))
5529 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
5530 }
2c08497a 5531
0f14009a
BS
5532 if (dump_file && (dump_flags & TDF_DETAILS))
5533 fprintf (dump_file, "\n");
8b11a64c
ZD
5534}
5535
c18101f5
BC
5536/* Estimate register pressure for loop having N_INVS invariants and N_CANDS
5537 induction variables. Note N_INVS includes both invariant variables and
5538 invariant expressions. */
8b11a64c
ZD
5539
5540static unsigned
c18101f5
BC
5541ivopts_estimate_reg_pressure (struct ivopts_data *data, unsigned n_invs,
5542 unsigned n_cands)
8b11a64c 5543{
c18101f5
BC
5544 unsigned cost;
5545 unsigned n_old = data->regs_used, n_new = n_invs + n_cands;
5546 unsigned regs_needed = n_new + n_old, available_regs = target_avail_regs;
5547 bool speed = data->speed;
5548
5549 /* If there is a call in the loop body, the call-clobbered registers
5550 are not available for loop invariants. */
5551 if (data->body_includes_call)
5552 available_regs = available_regs - target_clobbered_regs;
5553
5554 /* If we have enough registers. */
5555 if (regs_needed + target_res_regs < available_regs)
5556 cost = n_new;
5557 /* If close to running out of registers, try to preserve them. */
5558 else if (regs_needed <= available_regs)
5559 cost = target_reg_cost [speed] * regs_needed;
5560 /* If we run out of available registers but the number of candidates
5561 does not, we penalize extra registers using target_spill_cost. */
5562 else if (n_cands <= available_regs)
5563 cost = target_reg_cost [speed] * available_regs
5564 + target_spill_cost [speed] * (regs_needed - available_regs);
5565 /* If the number of candidates runs out available registers, we penalize
5566 extra candidate registers using target_spill_cost * 2. Because it is
5567 more expensive to spill induction variable than invariant. */
5568 else
5569 cost = target_reg_cost [speed] * available_regs
5570 + target_spill_cost [speed] * (n_cands - available_regs) * 2
5571 + target_spill_cost [speed] * (regs_needed - n_cands);
5572
5573 /* Finally, add the number of candidates, so that we prefer eliminating
5574 induction variables if possible. */
5575 return cost + n_cands;
8b11a64c
ZD
5576}
5577
5578/* For each size of the induction variable set determine the penalty. */
5579
5580static void
5581determine_set_costs (struct ivopts_data *data)
5582{
5583 unsigned j, n;
538dd0b7
DM
5584 gphi *phi;
5585 gphi_iterator psi;
726a989a 5586 tree op;
8b11a64c 5587 struct loop *loop = data->current_loop;
87c476a2 5588 bitmap_iterator bi;
8b11a64c 5589
8b11a64c
ZD
5590 if (dump_file && (dump_flags & TDF_DETAILS))
5591 {
309a0cf6 5592 fprintf (dump_file, "<Global Costs>:\n");
8b11a64c 5593 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
bec922f0 5594 fprintf (dump_file, " target_clobbered_regs %d\n", target_clobbered_regs);
f40751dd
JH
5595 fprintf (dump_file, " target_reg_cost %d\n", target_reg_cost[data->speed]);
5596 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost[data->speed]);
8b11a64c
ZD
5597 }
5598
5599 n = 0;
726a989a 5600 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
8b11a64c 5601 {
538dd0b7 5602 phi = psi.phi ();
8b11a64c
ZD
5603 op = PHI_RESULT (phi);
5604
ea057359 5605 if (virtual_operand_p (op))
8b11a64c
ZD
5606 continue;
5607
5608 if (get_iv (data, op))
5609 continue;
5610
ab9e91f9
BC
5611 if (!POINTER_TYPE_P (TREE_TYPE (op))
5612 && !INTEGRAL_TYPE_P (TREE_TYPE (op)))
5613 continue;
5614
8b11a64c
ZD
5615 n++;
5616 }
5617
87c476a2 5618 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
8b11a64c
ZD
5619 {
5620 struct version_info *info = ver_info (data, j);
5621
5622 if (info->inv_id && info->has_nonlin_use)
5623 n++;
87c476a2 5624 }
8b11a64c 5625
9a2ef6b8 5626 data->regs_used = n;
8b11a64c
ZD
5627 if (dump_file && (dump_flags & TDF_DETAILS))
5628 fprintf (dump_file, " regs_used %d\n", n);
5629
5630 if (dump_file && (dump_flags & TDF_DETAILS))
5631 {
5632 fprintf (dump_file, " cost for size:\n");
5633 fprintf (dump_file, " ivs\tcost\n");
5634 for (j = 0; j <= 2 * target_avail_regs; j++)
5635 fprintf (dump_file, " %d\t%d\n", j,
c18101f5 5636 ivopts_estimate_reg_pressure (data, 0, j));
8b11a64c
ZD
5637 fprintf (dump_file, "\n");
5638 }
5639}
5640
b1b02be2 5641/* Returns true if A is a cheaper cost pair than B. */
8b11a64c 5642
b1b02be2
ZD
5643static bool
5644cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
8b11a64c 5645{
b1b02be2
ZD
5646 if (!a)
5647 return false;
8b11a64c 5648
b1b02be2
ZD
5649 if (!b)
5650 return true;
5651
8d18b6df 5652 if (a->cost < b->cost)
b1b02be2
ZD
5653 return true;
5654
8d18b6df 5655 if (b->cost < a->cost)
b1b02be2
ZD
5656 return false;
5657
5658 /* In case the costs are the same, prefer the cheaper candidate. */
5659 if (a->cand->cost < b->cand->cost)
5660 return true;
5661
5662 return false;
5663}
5664
e292d606
BC
5665/* Compare if A is a more expensive cost pair than B. Return 1, 0 and -1
5666 for more expensive, equal and cheaper respectively. */
5667
5668static int
5669compare_cost_pair (struct cost_pair *a, struct cost_pair *b)
5670{
5671 if (cheaper_cost_pair (a, b))
5672 return -1;
5673 if (cheaper_cost_pair (b, a))
5674 return 1;
5675
5676 return 0;
5677}
18081149
XDL
5678
5679/* Returns candidate by that USE is expressed in IVS. */
5680
5681static struct cost_pair *
309a0cf6 5682iv_ca_cand_for_group (struct iv_ca *ivs, struct iv_group *group)
18081149 5683{
309a0cf6 5684 return ivs->cand_for_group[group->id];
18081149
XDL
5685}
5686
b1b02be2
ZD
5687/* Computes the cost field of IVS structure. */
5688
5689static void
5690iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
5691{
6e8c65f6 5692 comp_cost cost = ivs->cand_use_cost;
18081149 5693
8d18b6df 5694 cost += ivs->cand_cost;
c18101f5 5695 cost += ivopts_estimate_reg_pressure (data, ivs->n_invs, ivs->n_cands);
b1b02be2
ZD
5696 ivs->cost = cost;
5697}
5698
0ca91c77
BC
5699/* Remove use of invariants in set INVS by decreasing counter in N_INV_USES
5700 and IVS. */
9be872b7
ZD
5701
5702static void
0ca91c77 5703iv_ca_set_remove_invs (struct iv_ca *ivs, bitmap invs, unsigned *n_inv_uses)
9be872b7
ZD
5704{
5705 bitmap_iterator bi;
5706 unsigned iid;
5707
5708 if (!invs)
5709 return;
5710
0ca91c77 5711 gcc_assert (n_inv_uses != NULL);
9be872b7
ZD
5712 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5713 {
0ca91c77
BC
5714 n_inv_uses[iid]--;
5715 if (n_inv_uses[iid] == 0)
1136cae4 5716 ivs->n_invs--;
9be872b7
ZD
5717 }
5718}
5719
b1b02be2
ZD
5720/* Set USE not to be expressed by any candidate in IVS. */
5721
5722static void
5723iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
309a0cf6 5724 struct iv_group *group)
b1b02be2 5725{
309a0cf6 5726 unsigned gid = group->id, cid;
b1b02be2 5727 struct cost_pair *cp;
b1b02be2 5728
309a0cf6 5729 cp = ivs->cand_for_group[gid];
b1b02be2
ZD
5730 if (!cp)
5731 return;
5732 cid = cp->cand->id;
5733
309a0cf6
BC
5734 ivs->bad_groups++;
5735 ivs->cand_for_group[gid] = NULL;
b1b02be2
ZD
5736 ivs->n_cand_uses[cid]--;
5737
5738 if (ivs->n_cand_uses[cid] == 0)
8b11a64c 5739 {
b1b02be2 5740 bitmap_clear_bit (ivs->cands, cid);
36f5ada1 5741 ivs->n_cands--;
b1b02be2 5742 ivs->cand_cost -= cp->cand->cost;
0ca91c77 5743 iv_ca_set_remove_invs (ivs, cp->cand->inv_vars, ivs->n_inv_var_uses);
4c11bdff 5744 iv_ca_set_remove_invs (ivs, cp->cand->inv_exprs, ivs->n_inv_expr_uses);
b1b02be2
ZD
5745 }
5746
8d18b6df 5747 ivs->cand_use_cost -= cp->cost;
0ca91c77
BC
5748 iv_ca_set_remove_invs (ivs, cp->inv_vars, ivs->n_inv_var_uses);
5749 iv_ca_set_remove_invs (ivs, cp->inv_exprs, ivs->n_inv_expr_uses);
9be872b7
ZD
5750 iv_ca_recount_cost (data, ivs);
5751}
5752
0ca91c77
BC
5753/* Add use of invariants in set INVS by increasing counter in N_INV_USES and
5754 IVS. */
80cad5fa 5755
9be872b7 5756static void
0ca91c77 5757iv_ca_set_add_invs (struct iv_ca *ivs, bitmap invs, unsigned *n_inv_uses)
9be872b7
ZD
5758{
5759 bitmap_iterator bi;
5760 unsigned iid;
5761
5762 if (!invs)
5763 return;
5764
0ca91c77 5765 gcc_assert (n_inv_uses != NULL);
9be872b7 5766 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
b1b02be2 5767 {
0ca91c77
BC
5768 n_inv_uses[iid]++;
5769 if (n_inv_uses[iid] == 1)
1136cae4 5770 ivs->n_invs++;
8b11a64c 5771 }
b1b02be2
ZD
5772}
5773
309a0cf6 5774/* Set cost pair for GROUP in set IVS to CP. */
b1b02be2
ZD
5775
5776static void
5777iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
309a0cf6 5778 struct iv_group *group, struct cost_pair *cp)
b1b02be2 5779{
309a0cf6 5780 unsigned gid = group->id, cid;
b1b02be2 5781
309a0cf6 5782 if (ivs->cand_for_group[gid] == cp)
b1b02be2
ZD
5783 return;
5784
309a0cf6
BC
5785 if (ivs->cand_for_group[gid])
5786 iv_ca_set_no_cp (data, ivs, group);
b1b02be2
ZD
5787
5788 if (cp)
8b11a64c 5789 {
b1b02be2 5790 cid = cp->cand->id;
8b11a64c 5791
309a0cf6
BC
5792 ivs->bad_groups--;
5793 ivs->cand_for_group[gid] = cp;
b1b02be2
ZD
5794 ivs->n_cand_uses[cid]++;
5795 if (ivs->n_cand_uses[cid] == 1)
8b11a64c 5796 {
b1b02be2 5797 bitmap_set_bit (ivs->cands, cid);
36f5ada1 5798 ivs->n_cands++;
b1b02be2 5799 ivs->cand_cost += cp->cand->cost;
0ca91c77 5800 iv_ca_set_add_invs (ivs, cp->cand->inv_vars, ivs->n_inv_var_uses);
4c11bdff 5801 iv_ca_set_add_invs (ivs, cp->cand->inv_exprs, ivs->n_inv_expr_uses);
8b11a64c
ZD
5802 }
5803
8d18b6df 5804 ivs->cand_use_cost += cp->cost;
0ca91c77
BC
5805 iv_ca_set_add_invs (ivs, cp->inv_vars, ivs->n_inv_var_uses);
5806 iv_ca_set_add_invs (ivs, cp->inv_exprs, ivs->n_inv_expr_uses);
b1b02be2 5807 iv_ca_recount_cost (data, ivs);
87c476a2 5808 }
b1b02be2
ZD
5809}
5810
5811/* Extend set IVS by expressing USE by some of the candidates in it
f22ae1ec
BC
5812 if possible. Consider all important candidates if candidates in
5813 set IVS don't give any result. */
b1b02be2
ZD
5814
5815static void
309a0cf6
BC
5816iv_ca_add_group (struct ivopts_data *data, struct iv_ca *ivs,
5817 struct iv_group *group)
b1b02be2
ZD
5818{
5819 struct cost_pair *best_cp = NULL, *cp;
5820 bitmap_iterator bi;
5821 unsigned i;
f22ae1ec 5822 struct iv_cand *cand;
8b11a64c 5823
309a0cf6 5824 gcc_assert (ivs->upto >= group->id);
f22ae1ec 5825 ivs->upto++;
309a0cf6 5826 ivs->bad_groups++;
b1b02be2 5827
f22ae1ec 5828 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
b1b02be2 5829 {
309a0cf6
BC
5830 cand = data->vcands[i];
5831 cp = get_group_iv_cost (data, group, cand);
b1b02be2
ZD
5832 if (cheaper_cost_pair (cp, best_cp))
5833 best_cp = cp;
5834 }
309a0cf6 5835
f22ae1ec
BC
5836 if (best_cp == NULL)
5837 {
5838 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5839 {
309a0cf6
BC
5840 cand = data->vcands[i];
5841 cp = get_group_iv_cost (data, group, cand);
f22ae1ec
BC
5842 if (cheaper_cost_pair (cp, best_cp))
5843 best_cp = cp;
5844 }
5845 }
8b11a64c 5846
309a0cf6 5847 iv_ca_set_cp (data, ivs, group, best_cp);
8b11a64c
ZD
5848}
5849
b1b02be2 5850/* Get cost for assignment IVS. */
8b11a64c 5851
6e8c65f6 5852static comp_cost
b1b02be2
ZD
5853iv_ca_cost (struct iv_ca *ivs)
5854{
c4e93e28
AH
5855 /* This was a conditional expression but it triggered a bug in
5856 Sun C 5.5. */
309a0cf6 5857 if (ivs->bad_groups)
cb4ad180
AH
5858 return infinite_cost;
5859 else
5860 return ivs->cost;
b1b02be2
ZD
5861}
5862
e292d606
BC
5863/* Compare if applying NEW_CP to GROUP for IVS introduces more invariants
5864 than OLD_CP. Return 1, 0 and -1 for more, equal and fewer invariants
5865 respectively. */
b1b02be2 5866
e292d606
BC
5867static int
5868iv_ca_compare_deps (struct ivopts_data *data, struct iv_ca *ivs,
5869 struct iv_group *group, struct cost_pair *old_cp,
5870 struct cost_pair *new_cp)
8b11a64c 5871{
f9f69dd6
BC
5872 gcc_assert (old_cp && new_cp && old_cp != new_cp);
5873 unsigned old_n_invs = ivs->n_invs;
5874 iv_ca_set_cp (data, ivs, group, new_cp);
5875 unsigned new_n_invs = ivs->n_invs;
5876 iv_ca_set_cp (data, ivs, group, old_cp);
8b11a64c 5877
e292d606 5878 return new_n_invs > old_n_invs ? 1 : (new_n_invs < old_n_invs ? -1 : 0);
b1b02be2
ZD
5879}
5880
309a0cf6
BC
5881/* Creates change of expressing GROUP by NEW_CP instead of OLD_CP and chains
5882 it before NEXT. */
b1b02be2
ZD
5883
5884static struct iv_ca_delta *
309a0cf6
BC
5885iv_ca_delta_add (struct iv_group *group, struct cost_pair *old_cp,
5886 struct cost_pair *new_cp, struct iv_ca_delta *next)
b1b02be2 5887{
5ed6ace5 5888 struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
b1b02be2 5889
309a0cf6 5890 change->group = group;
b1b02be2
ZD
5891 change->old_cp = old_cp;
5892 change->new_cp = new_cp;
309a0cf6 5893 change->next = next;
b1b02be2
ZD
5894
5895 return change;
5896}
5897
36f5ada1 5898/* Joins two lists of changes L1 and L2. Destructive -- old lists
6c6cfbfd 5899 are rewritten. */
36f5ada1
ZD
5900
5901static struct iv_ca_delta *
5902iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
5903{
5904 struct iv_ca_delta *last;
5905
5906 if (!l2)
5907 return l1;
5908
5909 if (!l1)
5910 return l2;
5911
309a0cf6 5912 for (last = l1; last->next; last = last->next)
36f5ada1 5913 continue;
309a0cf6 5914 last->next = l2;
36f5ada1
ZD
5915
5916 return l1;
5917}
5918
36f5ada1
ZD
5919/* Reverse the list of changes DELTA, forming the inverse to it. */
5920
5921static struct iv_ca_delta *
5922iv_ca_delta_reverse (struct iv_ca_delta *delta)
5923{
5924 struct iv_ca_delta *act, *next, *prev = NULL;
36f5ada1
ZD
5925
5926 for (act = delta; act; act = next)
5927 {
309a0cf6
BC
5928 next = act->next;
5929 act->next = prev;
36f5ada1
ZD
5930 prev = act;
5931
fab27f52 5932 std::swap (act->old_cp, act->new_cp);
36f5ada1
ZD
5933 }
5934
5935 return prev;
5936}
5937
b1b02be2
ZD
5938/* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
5939 reverted instead. */
5940
5941static void
5942iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
5943 struct iv_ca_delta *delta, bool forward)
5944{
5945 struct cost_pair *from, *to;
36f5ada1 5946 struct iv_ca_delta *act;
b1b02be2 5947
36f5ada1
ZD
5948 if (!forward)
5949 delta = iv_ca_delta_reverse (delta);
b1b02be2 5950
309a0cf6 5951 for (act = delta; act; act = act->next)
36f5ada1
ZD
5952 {
5953 from = act->old_cp;
5954 to = act->new_cp;
309a0cf6
BC
5955 gcc_assert (iv_ca_cand_for_group (ivs, act->group) == from);
5956 iv_ca_set_cp (data, ivs, act->group, to);
8b11a64c 5957 }
36f5ada1
ZD
5958
5959 if (!forward)
5960 iv_ca_delta_reverse (delta);
b1b02be2 5961}
8b11a64c 5962
b1b02be2 5963/* Returns true if CAND is used in IVS. */
8b11a64c 5964
b1b02be2
ZD
5965static bool
5966iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
5967{
5968 return ivs->n_cand_uses[cand->id] > 0;
5969}
8b11a64c 5970
36f5ada1
ZD
5971/* Returns number of induction variable candidates in the set IVS. */
5972
5973static unsigned
5974iv_ca_n_cands (struct iv_ca *ivs)
5975{
5976 return ivs->n_cands;
5977}
5978
b1b02be2
ZD
5979/* Free the list of changes DELTA. */
5980
5981static void
5982iv_ca_delta_free (struct iv_ca_delta **delta)
5983{
5984 struct iv_ca_delta *act, *next;
5985
5986 for (act = *delta; act; act = next)
87c476a2 5987 {
309a0cf6 5988 next = act->next;
b1b02be2 5989 free (act);
87c476a2 5990 }
8b11a64c 5991
b1b02be2
ZD
5992 *delta = NULL;
5993}
5994
5995/* Allocates new iv candidates assignment. */
5996
5997static struct iv_ca *
5998iv_ca_new (struct ivopts_data *data)
5999{
5ed6ace5 6000 struct iv_ca *nw = XNEW (struct iv_ca);
8b11a64c 6001
b1b02be2 6002 nw->upto = 0;
309a0cf6
BC
6003 nw->bad_groups = 0;
6004 nw->cand_for_group = XCNEWVEC (struct cost_pair *,
6005 data->vgroups.length ());
6006 nw->n_cand_uses = XCNEWVEC (unsigned, data->vcands.length ());
8bdbfff5 6007 nw->cands = BITMAP_ALLOC (NULL);
36f5ada1 6008 nw->n_cands = 0;
1136cae4 6009 nw->n_invs = 0;
7735d6c7 6010 nw->cand_use_cost = no_cost;
b1b02be2 6011 nw->cand_cost = 0;
0ca91c77
BC
6012 nw->n_inv_var_uses = XCNEWVEC (unsigned, data->max_inv_var_id + 1);
6013 nw->n_inv_expr_uses = XCNEWVEC (unsigned, data->max_inv_expr_id + 1);
7735d6c7 6014 nw->cost = no_cost;
b1b02be2
ZD
6015
6016 return nw;
6017}
6018
6019/* Free memory occupied by the set IVS. */
6020
6021static void
6022iv_ca_free (struct iv_ca **ivs)
6023{
309a0cf6 6024 free ((*ivs)->cand_for_group);
b1b02be2 6025 free ((*ivs)->n_cand_uses);
8bdbfff5 6026 BITMAP_FREE ((*ivs)->cands);
0ca91c77
BC
6027 free ((*ivs)->n_inv_var_uses);
6028 free ((*ivs)->n_inv_expr_uses);
b1b02be2
ZD
6029 free (*ivs);
6030 *ivs = NULL;
6031}
6032
6033/* Dumps IVS to FILE. */
6034
6035static void
6036iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
6037{
b1b02be2 6038 unsigned i;
6e8c65f6 6039 comp_cost cost = iv_ca_cost (ivs);
b1b02be2 6040
8d18b6df
ML
6041 fprintf (file, " cost: %d (complexity %d)\n", cost.cost,
6042 cost.complexity);
309a0cf6 6043 fprintf (file, " cand_cost: %d\n cand_group_cost: %d (complexity %d)\n",
623b8e0a
ML
6044 ivs->cand_cost, ivs->cand_use_cost.cost,
6045 ivs->cand_use_cost.complexity);
18081149
XDL
6046 bitmap_print (file, ivs->cands, " candidates: ","\n");
6047
53f2382d 6048 for (i = 0; i < ivs->upto; i++)
18081149 6049 {
309a0cf6
BC
6050 struct iv_group *group = data->vgroups[i];
6051 struct cost_pair *cp = iv_ca_cand_for_group (ivs, group);
18081149 6052 if (cp)
8d18b6df
ML
6053 fprintf (file, " group:%d --> iv_cand:%d, cost=(%d,%d)\n",
6054 group->id, cp->cand->id, cp->cost.cost,
6055 cp->cost.complexity);
18081149 6056 else
53f2382d 6057 fprintf (file, " group:%d --> ??\n", group->id);
18081149 6058 }
b1b02be2 6059
623b8e0a
ML
6060 const char *pref = "";
6061 fprintf (file, " invariant variables: ");
0ca91c77
BC
6062 for (i = 1; i <= data->max_inv_var_id; i++)
6063 if (ivs->n_inv_var_uses[i])
b1b02be2
ZD
6064 {
6065 fprintf (file, "%s%d", pref, i);
6066 pref = ", ";
6067 }
623b8e0a
ML
6068
6069 pref = "";
6070 fprintf (file, "\n invariant expressions: ");
0ca91c77
BC
6071 for (i = 1; i <= data->max_inv_expr_id; i++)
6072 if (ivs->n_inv_expr_uses[i])
6073 {
6074 fprintf (file, "%s%d", pref, i);
623b8e0a 6075 pref = ", ";
0ca91c77 6076 }
623b8e0a 6077
18081149 6078 fprintf (file, "\n\n");
b1b02be2
ZD
6079}
6080
6081/* Try changing candidate in IVS to CAND for each use. Return cost of the
36f5ada1 6082 new set, and store differences in DELTA. Number of induction variables
18081149
XDL
6083 in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
6084 the function will try to find a solution with mimimal iv candidates. */
b1b02be2 6085
6e8c65f6 6086static comp_cost
b1b02be2 6087iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
36f5ada1 6088 struct iv_cand *cand, struct iv_ca_delta **delta,
18081149 6089 unsigned *n_ivs, bool min_ncand)
b1b02be2 6090{
6e8c65f6
ZD
6091 unsigned i;
6092 comp_cost cost;
309a0cf6 6093 struct iv_group *group;
b1b02be2
ZD
6094 struct cost_pair *old_cp, *new_cp;
6095
6096 *delta = NULL;
6097 for (i = 0; i < ivs->upto; i++)
6098 {
309a0cf6
BC
6099 group = data->vgroups[i];
6100 old_cp = iv_ca_cand_for_group (ivs, group);
b1b02be2
ZD
6101
6102 if (old_cp
6103 && old_cp->cand == cand)
6104 continue;
6105
309a0cf6 6106 new_cp = get_group_iv_cost (data, group, cand);
b1b02be2
ZD
6107 if (!new_cp)
6108 continue;
6109
e292d606
BC
6110 if (!min_ncand)
6111 {
6112 int cmp_invs = iv_ca_compare_deps (data, ivs, group, old_cp, new_cp);
6113 /* Skip if new_cp depends on more invariants. */
6114 if (cmp_invs > 0)
6115 continue;
b8698a0f 6116
e292d606
BC
6117 int cmp_cost = compare_cost_pair (new_cp, old_cp);
6118 /* Skip if new_cp is not cheaper. */
6119 if (cmp_cost > 0 || (cmp_cost == 0 && cmp_invs == 0))
6120 continue;
6121 }
b1b02be2 6122
309a0cf6 6123 *delta = iv_ca_delta_add (group, old_cp, new_cp, *delta);
b1b02be2
ZD
6124 }
6125
6126 iv_ca_delta_commit (data, ivs, *delta, true);
6127 cost = iv_ca_cost (ivs);
36f5ada1
ZD
6128 if (n_ivs)
6129 *n_ivs = iv_ca_n_cands (ivs);
b1b02be2 6130 iv_ca_delta_commit (data, ivs, *delta, false);
8b11a64c
ZD
6131
6132 return cost;
6133}
6134
a0eca485 6135/* Try narrowing set IVS by removing CAND. Return the cost of
2c407426
BC
6136 the new set and store the differences in DELTA. START is
6137 the candidate with which we start narrowing. */
8b11a64c 6138
6e8c65f6 6139static comp_cost
b1b02be2 6140iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
2c407426
BC
6141 struct iv_cand *cand, struct iv_cand *start,
6142 struct iv_ca_delta **delta)
8b11a64c 6143{
b1b02be2 6144 unsigned i, ci;
309a0cf6 6145 struct iv_group *group;
b1b02be2
ZD
6146 struct cost_pair *old_cp, *new_cp, *cp;
6147 bitmap_iterator bi;
6148 struct iv_cand *cnd;
2c407426 6149 comp_cost cost, best_cost, acost;
b1b02be2
ZD
6150
6151 *delta = NULL;
309a0cf6 6152 for (i = 0; i < data->vgroups.length (); i++)
b1b02be2 6153 {
309a0cf6 6154 group = data->vgroups[i];
b1b02be2 6155
309a0cf6 6156 old_cp = iv_ca_cand_for_group (ivs, group);
b1b02be2
ZD
6157 if (old_cp->cand != cand)
6158 continue;
6159
2c407426
BC
6160 best_cost = iv_ca_cost (ivs);
6161 /* Start narrowing with START. */
309a0cf6 6162 new_cp = get_group_iv_cost (data, group, start);
b1b02be2
ZD
6163
6164 if (data->consider_all_candidates)
6165 {
6166 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
6167 {
2c407426 6168 if (ci == cand->id || (start && ci == start->id))
b1b02be2
ZD
6169 continue;
6170
309a0cf6 6171 cnd = data->vcands[ci];
b1b02be2 6172
309a0cf6 6173 cp = get_group_iv_cost (data, group, cnd);
b1b02be2
ZD
6174 if (!cp)
6175 continue;
18081149 6176
309a0cf6 6177 iv_ca_set_cp (data, ivs, group, cp);
2c407426 6178 acost = iv_ca_cost (ivs);
b1b02be2 6179
8d18b6df 6180 if (acost < best_cost)
2c407426
BC
6181 {
6182 best_cost = acost;
6183 new_cp = cp;
6184 }
b1b02be2
ZD
6185 }
6186 }
6187 else
6188 {
309a0cf6 6189 EXECUTE_IF_AND_IN_BITMAP (group->related_cands, ivs->cands, 0, ci, bi)
b1b02be2 6190 {
2c407426 6191 if (ci == cand->id || (start && ci == start->id))
b1b02be2
ZD
6192 continue;
6193
309a0cf6 6194 cnd = data->vcands[ci];
b1b02be2 6195
309a0cf6 6196 cp = get_group_iv_cost (data, group, cnd);
b1b02be2
ZD
6197 if (!cp)
6198 continue;
b8698a0f 6199
309a0cf6 6200 iv_ca_set_cp (data, ivs, group, cp);
2c407426 6201 acost = iv_ca_cost (ivs);
b1b02be2 6202
8d18b6df 6203 if (acost < best_cost)
2c407426
BC
6204 {
6205 best_cost = acost;
6206 new_cp = cp;
6207 }
b1b02be2
ZD
6208 }
6209 }
2c407426 6210 /* Restore to old cp for use. */
309a0cf6 6211 iv_ca_set_cp (data, ivs, group, old_cp);
b1b02be2
ZD
6212
6213 if (!new_cp)
6214 {
6215 iv_ca_delta_free (delta);
6e8c65f6 6216 return infinite_cost;
b1b02be2
ZD
6217 }
6218
309a0cf6 6219 *delta = iv_ca_delta_add (group, old_cp, new_cp, *delta);
b1b02be2
ZD
6220 }
6221
6222 iv_ca_delta_commit (data, ivs, *delta, true);
6223 cost = iv_ca_cost (ivs);
6224 iv_ca_delta_commit (data, ivs, *delta, false);
6225
6226 return cost;
8b11a64c
ZD
6227}
6228
36f5ada1
ZD
6229/* Try optimizing the set of candidates IVS by removing candidates different
6230 from to EXCEPT_CAND from it. Return cost of the new set, and store
6231 differences in DELTA. */
6232
6e8c65f6 6233static comp_cost
36f5ada1
ZD
6234iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
6235 struct iv_cand *except_cand, struct iv_ca_delta **delta)
6236{
6237 bitmap_iterator bi;
6238 struct iv_ca_delta *act_delta, *best_delta;
6e8c65f6
ZD
6239 unsigned i;
6240 comp_cost best_cost, acost;
36f5ada1
ZD
6241 struct iv_cand *cand;
6242
6243 best_delta = NULL;
6244 best_cost = iv_ca_cost (ivs);
6245
6246 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6247 {
309a0cf6 6248 cand = data->vcands[i];
36f5ada1
ZD
6249
6250 if (cand == except_cand)
6251 continue;
6252
2c407426 6253 acost = iv_ca_narrow (data, ivs, cand, except_cand, &act_delta);
36f5ada1 6254
8d18b6df 6255 if (acost < best_cost)
36f5ada1
ZD
6256 {
6257 best_cost = acost;
6258 iv_ca_delta_free (&best_delta);
6259 best_delta = act_delta;
6260 }
6261 else
6262 iv_ca_delta_free (&act_delta);
6263 }
6264
6265 if (!best_delta)
6266 {
6267 *delta = NULL;
6268 return best_cost;
6269 }
6270
6271 /* Recurse to possibly remove other unnecessary ivs. */
6272 iv_ca_delta_commit (data, ivs, best_delta, true);
6273 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
6274 iv_ca_delta_commit (data, ivs, best_delta, false);
6275 *delta = iv_ca_delta_join (best_delta, *delta);
6276 return best_cost;
6277}
6278
6326a5f5 6279/* Check if CAND_IDX is a candidate other than OLD_CAND and has
309a0cf6 6280 cheaper local cost for GROUP than BEST_CP. Return pointer to
6326a5f5
BC
6281 the corresponding cost_pair, otherwise just return BEST_CP. */
6282
6283static struct cost_pair*
309a0cf6 6284cheaper_cost_with_cand (struct ivopts_data *data, struct iv_group *group,
6326a5f5
BC
6285 unsigned int cand_idx, struct iv_cand *old_cand,
6286 struct cost_pair *best_cp)
6287{
6288 struct iv_cand *cand;
6289 struct cost_pair *cp;
6290
6291 gcc_assert (old_cand != NULL && best_cp != NULL);
6292 if (cand_idx == old_cand->id)
6293 return best_cp;
6294
309a0cf6
BC
6295 cand = data->vcands[cand_idx];
6296 cp = get_group_iv_cost (data, group, cand);
6326a5f5
BC
6297 if (cp != NULL && cheaper_cost_pair (cp, best_cp))
6298 return cp;
6299
6300 return best_cp;
6301}
6302
6303/* Try breaking local optimal fixed-point for IVS by replacing candidates
6304 which are used by more than one iv uses. For each of those candidates,
6305 this function tries to represent iv uses under that candidate using
6306 other ones with lower local cost, then tries to prune the new set.
6307 If the new set has lower cost, It returns the new cost after recording
6308 candidate replacement in list DELTA. */
6309
6310static comp_cost
6311iv_ca_replace (struct ivopts_data *data, struct iv_ca *ivs,
6312 struct iv_ca_delta **delta)
6313{
6314 bitmap_iterator bi, bj;
6315 unsigned int i, j, k;
6326a5f5
BC
6316 struct iv_cand *cand;
6317 comp_cost orig_cost, acost;
6318 struct iv_ca_delta *act_delta, *tmp_delta;
6319 struct cost_pair *old_cp, *best_cp = NULL;
6320
6321 *delta = NULL;
6322 orig_cost = iv_ca_cost (ivs);
6323
6324 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6325 {
6326 if (ivs->n_cand_uses[i] == 1
6327 || ivs->n_cand_uses[i] > ALWAYS_PRUNE_CAND_SET_BOUND)
6328 continue;
6329
309a0cf6
BC
6330 cand = data->vcands[i];
6331
6326a5f5
BC
6332 act_delta = NULL;
6333 /* Represent uses under current candidate using other ones with
6334 lower local cost. */
6335 for (j = 0; j < ivs->upto; j++)
6336 {
309a0cf6
BC
6337 struct iv_group *group = data->vgroups[j];
6338 old_cp = iv_ca_cand_for_group (ivs, group);
6326a5f5
BC
6339
6340 if (old_cp->cand != cand)
6341 continue;
6342
6343 best_cp = old_cp;
6344 if (data->consider_all_candidates)
309a0cf6
BC
6345 for (k = 0; k < data->vcands.length (); k++)
6346 best_cp = cheaper_cost_with_cand (data, group, k,
6326a5f5
BC
6347 old_cp->cand, best_cp);
6348 else
309a0cf6
BC
6349 EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, k, bj)
6350 best_cp = cheaper_cost_with_cand (data, group, k,
6326a5f5
BC
6351 old_cp->cand, best_cp);
6352
6353 if (best_cp == old_cp)
6354 continue;
6355
309a0cf6 6356 act_delta = iv_ca_delta_add (group, old_cp, best_cp, act_delta);
6326a5f5
BC
6357 }
6358 /* No need for further prune. */
6359 if (!act_delta)
6360 continue;
6361
6362 /* Prune the new candidate set. */
6363 iv_ca_delta_commit (data, ivs, act_delta, true);
6364 acost = iv_ca_prune (data, ivs, NULL, &tmp_delta);
6365 iv_ca_delta_commit (data, ivs, act_delta, false);
6366 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6367
8d18b6df 6368 if (acost < orig_cost)
6326a5f5
BC
6369 {
6370 *delta = act_delta;
6371 return acost;
6372 }
6373 else
6374 iv_ca_delta_free (&act_delta);
6375 }
6376
6377 return orig_cost;
6378}
6379
309a0cf6
BC
6380/* Tries to extend the sets IVS in the best possible way in order to
6381 express the GROUP. If ORIGINALP is true, prefer candidates from
16ad8025
SL
6382 the original set of IVs, otherwise favor important candidates not
6383 based on any memory object. */
8b11a64c
ZD
6384
6385static bool
b1b02be2 6386try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
309a0cf6 6387 struct iv_group *group, bool originalp)
8b11a64c 6388{
6e8c65f6 6389 comp_cost best_cost, act_cost;
8b11a64c 6390 unsigned i;
38b0dcb8
ZD
6391 bitmap_iterator bi;
6392 struct iv_cand *cand;
b1b02be2
ZD
6393 struct iv_ca_delta *best_delta = NULL, *act_delta;
6394 struct cost_pair *cp;
6395
309a0cf6 6396 iv_ca_add_group (data, ivs, group);
b1b02be2 6397 best_cost = iv_ca_cost (ivs);
309a0cf6 6398 cp = iv_ca_cand_for_group (ivs, group);
b1b02be2
ZD
6399 if (cp)
6400 {
309a0cf6
BC
6401 best_delta = iv_ca_delta_add (group, NULL, cp, NULL);
6402 iv_ca_set_no_cp (data, ivs, group);
b1b02be2 6403 }
8b11a64c 6404
16ad8025
SL
6405 /* If ORIGINALP is true, try to find the original IV for the use. Otherwise
6406 first try important candidates not based on any memory object. Only if
6e8c65f6
ZD
6407 this fails, try the specific ones. Rationale -- in loops with many
6408 variables the best choice often is to use just one generic biv. If we
6409 added here many ivs specific to the uses, the optimization algorithm later
6410 would be likely to get stuck in a local minimum, thus causing us to create
6411 too many ivs. The approach from few ivs to more seems more likely to be
6412 successful -- starting from few ivs, replacing an expensive use by a
6413 specific iv should always be a win. */
309a0cf6 6414 EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, i, bi)
8b11a64c 6415 {
309a0cf6 6416 cand = data->vcands[i];
38b0dcb8 6417
16ad8025
SL
6418 if (originalp && cand->pos !=IP_ORIGINAL)
6419 continue;
6420
6421 if (!originalp && cand->iv->base_object != NULL_TREE)
6e8c65f6
ZD
6422 continue;
6423
b1b02be2 6424 if (iv_ca_cand_used_p (ivs, cand))
623b8e0a 6425 continue;
8b11a64c 6426
309a0cf6 6427 cp = get_group_iv_cost (data, group, cand);
b1b02be2
ZD
6428 if (!cp)
6429 continue;
6430
309a0cf6 6431 iv_ca_set_cp (data, ivs, group, cp);
18081149 6432 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
623b8e0a 6433 true);
309a0cf6
BC
6434 iv_ca_set_no_cp (data, ivs, group);
6435 act_delta = iv_ca_delta_add (group, NULL, cp, act_delta);
8b11a64c 6436
8d18b6df 6437 if (act_cost < best_cost)
8b11a64c
ZD
6438 {
6439 best_cost = act_cost;
b1b02be2
ZD
6440
6441 iv_ca_delta_free (&best_delta);
6442 best_delta = act_delta;
8b11a64c 6443 }
b1b02be2
ZD
6444 else
6445 iv_ca_delta_free (&act_delta);
8b11a64c
ZD
6446 }
6447
8d18b6df 6448 if (best_cost.infinite_cost_p ())
38b0dcb8 6449 {
309a0cf6 6450 for (i = 0; i < group->n_map_members; i++)
38b0dcb8 6451 {
309a0cf6 6452 cp = group->cost_map + i;
b1b02be2
ZD
6453 cand = cp->cand;
6454 if (!cand)
38b0dcb8
ZD
6455 continue;
6456
6457 /* Already tried this. */
16ad8025
SL
6458 if (cand->important)
6459 {
6460 if (originalp && cand->pos == IP_ORIGINAL)
6461 continue;
6462 if (!originalp && cand->iv->base_object == NULL_TREE)
6463 continue;
6464 }
b8698a0f 6465
b1b02be2 6466 if (iv_ca_cand_used_p (ivs, cand))
38b0dcb8
ZD
6467 continue;
6468
b1b02be2 6469 act_delta = NULL;
309a0cf6 6470 iv_ca_set_cp (data, ivs, group, cp);
18081149 6471 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
309a0cf6
BC
6472 iv_ca_set_no_cp (data, ivs, group);
6473 act_delta = iv_ca_delta_add (group,
6474 iv_ca_cand_for_group (ivs, group),
b1b02be2 6475 cp, act_delta);
38b0dcb8 6476
8d18b6df 6477 if (act_cost < best_cost)
38b0dcb8
ZD
6478 {
6479 best_cost = act_cost;
b1b02be2
ZD
6480
6481 if (best_delta)
6482 iv_ca_delta_free (&best_delta);
6483 best_delta = act_delta;
38b0dcb8 6484 }
b1b02be2
ZD
6485 else
6486 iv_ca_delta_free (&act_delta);
38b0dcb8
ZD
6487 }
6488 }
6489
b1b02be2
ZD
6490 iv_ca_delta_commit (data, ivs, best_delta, true);
6491 iv_ca_delta_free (&best_delta);
8b11a64c 6492
8d18b6df 6493 return !best_cost.infinite_cost_p ();
8b11a64c
ZD
6494}
6495
b1b02be2 6496/* Finds an initial assignment of candidates to uses. */
8b11a64c 6497
b1b02be2 6498static struct iv_ca *
16ad8025 6499get_initial_solution (struct ivopts_data *data, bool originalp)
8b11a64c
ZD
6500{
6501 unsigned i;
309a0cf6 6502 struct iv_ca *ivs = iv_ca_new (data);
8b11a64c 6503
309a0cf6
BC
6504 for (i = 0; i < data->vgroups.length (); i++)
6505 if (!try_add_cand_for (data, ivs, data->vgroups[i], originalp))
b1b02be2
ZD
6506 {
6507 iv_ca_free (&ivs);
6508 return NULL;
6509 }
8b11a64c 6510
b1b02be2 6511 return ivs;
8b11a64c
ZD
6512}
6513
6326a5f5
BC
6514/* Tries to improve set of induction variables IVS. TRY_REPLACE_P
6515 points to a bool variable, this function tries to break local
6516 optimal fixed-point by replacing candidates in IVS if it's true. */
8b11a64c
ZD
6517
6518static bool
6326a5f5
BC
6519try_improve_iv_set (struct ivopts_data *data,
6520 struct iv_ca *ivs, bool *try_replace_p)
8b11a64c 6521{
6e8c65f6
ZD
6522 unsigned i, n_ivs;
6523 comp_cost acost, best_cost = iv_ca_cost (ivs);
36f5ada1 6524 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
b1b02be2 6525 struct iv_cand *cand;
8b11a64c 6526
36f5ada1 6527 /* Try extending the set of induction variables by one. */
309a0cf6 6528 for (i = 0; i < data->vcands.length (); i++)
8b11a64c 6529 {
309a0cf6 6530 cand = data->vcands[i];
b8698a0f 6531
b1b02be2 6532 if (iv_ca_cand_used_p (ivs, cand))
36f5ada1
ZD
6533 continue;
6534
18081149 6535 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
36f5ada1
ZD
6536 if (!act_delta)
6537 continue;
6538
6539 /* If we successfully added the candidate and the set is small enough,
6540 try optimizing it by removing other candidates. */
6541 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
6542 {
6543 iv_ca_delta_commit (data, ivs, act_delta, true);
6544 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
6545 iv_ca_delta_commit (data, ivs, act_delta, false);
6546 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6547 }
8b11a64c 6548
8d18b6df 6549 if (acost < best_cost)
8b11a64c 6550 {
b1b02be2 6551 best_cost = acost;
36f5ada1 6552 iv_ca_delta_free (&best_delta);
b1b02be2 6553 best_delta = act_delta;
8b11a64c 6554 }
8b11a64c 6555 else
b1b02be2 6556 iv_ca_delta_free (&act_delta);
8b11a64c
ZD
6557 }
6558
b1b02be2 6559 if (!best_delta)
36f5ada1
ZD
6560 {
6561 /* Try removing the candidates from the set instead. */
6562 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
6563
6326a5f5
BC
6564 if (!best_delta && *try_replace_p)
6565 {
6566 *try_replace_p = false;
6567 /* So far candidate selecting algorithm tends to choose fewer IVs
6568 so that it can handle cases in which loops have many variables
6569 but the best choice is often to use only one general biv. One
6570 weakness is it can't handle opposite cases, in which different
6571 candidates should be chosen with respect to each use. To solve
6572 the problem, we replace candidates in a manner described by the
6573 comments of iv_ca_replace, thus give general algorithm a chance
6574 to break local optimal fixed-point in these cases. */
6575 best_cost = iv_ca_replace (data, ivs, &best_delta);
6576 }
6577
36f5ada1
ZD
6578 if (!best_delta)
6579 return false;
6580 }
8b11a64c 6581
b1b02be2 6582 iv_ca_delta_commit (data, ivs, best_delta, true);
8d18b6df 6583 gcc_assert (best_cost == iv_ca_cost (ivs));
b1b02be2 6584 iv_ca_delta_free (&best_delta);
8b11a64c
ZD
6585 return true;
6586}
6587
6588/* Attempts to find the optimal set of induction variables. We do simple
6589 greedy heuristic -- we try to replace at most one candidate in the selected
6590 solution and remove the unused ivs while this improves the cost. */
6591
b1b02be2 6592static struct iv_ca *
16ad8025 6593find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
8b11a64c 6594{
b1b02be2 6595 struct iv_ca *set;
6326a5f5 6596 bool try_replace_p = true;
8b11a64c 6597
b1b02be2 6598 /* Get the initial solution. */
16ad8025 6599 set = get_initial_solution (data, originalp);
b1b02be2 6600 if (!set)
8b11a64c
ZD
6601 {
6602 if (dump_file && (dump_flags & TDF_DETAILS))
6603 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
8b11a64c
ZD
6604 return NULL;
6605 }
6606
6607 if (dump_file && (dump_flags & TDF_DETAILS))
6608 {
b1b02be2
ZD
6609 fprintf (dump_file, "Initial set of candidates:\n");
6610 iv_ca_dump (data, dump_file, set);
8b11a64c
ZD
6611 }
6612
6326a5f5 6613 while (try_improve_iv_set (data, set, &try_replace_p))
8b11a64c
ZD
6614 {
6615 if (dump_file && (dump_flags & TDF_DETAILS))
6616 {
b1b02be2
ZD
6617 fprintf (dump_file, "Improved to:\n");
6618 iv_ca_dump (data, dump_file, set);
8b11a64c
ZD
6619 }
6620 }
6621
16ad8025
SL
6622 return set;
6623}
6624
6625static struct iv_ca *
6626find_optimal_iv_set (struct ivopts_data *data)
6627{
6628 unsigned i;
16ad8025 6629 comp_cost cost, origcost;
309a0cf6 6630 struct iv_ca *set, *origset;
16ad8025
SL
6631
6632 /* Determine the cost based on a strategy that starts with original IVs,
6633 and try again using a strategy that prefers candidates not based
6634 on any IVs. */
6635 origset = find_optimal_iv_set_1 (data, true);
6636 set = find_optimal_iv_set_1 (data, false);
6637
6638 if (!origset && !set)
6639 return NULL;
6640
6641 origcost = origset ? iv_ca_cost (origset) : infinite_cost;
6642 cost = set ? iv_ca_cost (set) : infinite_cost;
6643
8b11a64c 6644 if (dump_file && (dump_flags & TDF_DETAILS))
6e8c65f6 6645 {
16ad8025
SL
6646 fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
6647 origcost.cost, origcost.complexity);
6648 fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
6649 cost.cost, cost.complexity);
6650 }
6651
6652 /* Choose the one with the best cost. */
8d18b6df 6653 if (origcost <= cost)
16ad8025
SL
6654 {
6655 if (set)
6656 iv_ca_free (&set);
6657 set = origset;
6e8c65f6 6658 }
16ad8025
SL
6659 else if (origset)
6660 iv_ca_free (&origset);
8b11a64c 6661
309a0cf6 6662 for (i = 0; i < data->vgroups.length (); i++)
8b11a64c 6663 {
309a0cf6
BC
6664 struct iv_group *group = data->vgroups[i];
6665 group->selected = iv_ca_cand_for_group (set, group)->cand;
8b11a64c
ZD
6666 }
6667
8b11a64c
ZD
6668 return set;
6669}
6670
6671/* Creates a new induction variable corresponding to CAND. */
6672
6673static void
6674create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
6675{
726a989a 6676 gimple_stmt_iterator incr_pos;
8b11a64c 6677 tree base;
309a0cf6
BC
6678 struct iv_use *use;
6679 struct iv_group *group;
8b11a64c
ZD
6680 bool after = false;
6681
c1662028 6682 gcc_assert (cand->iv != NULL);
8b11a64c
ZD
6683
6684 switch (cand->pos)
6685 {
6686 case IP_NORMAL:
726a989a 6687 incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
8b11a64c
ZD
6688 break;
6689
6690 case IP_END:
726a989a 6691 incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
8b11a64c
ZD
6692 after = true;
6693 break;
6694
2c08497a
BS
6695 case IP_AFTER_USE:
6696 after = true;
6697 /* fall through */
6698 case IP_BEFORE_USE:
6699 incr_pos = gsi_for_stmt (cand->incremented_at);
6700 break;
6701
8b11a64c
ZD
6702 case IP_ORIGINAL:
6703 /* Mark that the iv is preserved. */
6704 name_info (data, cand->var_before)->preserve_biv = true;
6705 name_info (data, cand->var_after)->preserve_biv = true;
6706
6707 /* Rewrite the increment so that it uses var_before directly. */
309a0cf6
BC
6708 use = find_interesting_uses_op (data, cand->var_after);
6709 group = data->vgroups[use->group_id];
6710 group->selected = cand;
8b11a64c
ZD
6711 return;
6712 }
b8698a0f 6713
8b11a64c 6714 gimple_add_tmp_var (cand->var_before);
8b11a64c
ZD
6715
6716 base = unshare_expr (cand->iv->base);
6717
9be872b7
ZD
6718 create_iv (base, unshare_expr (cand->iv->step),
6719 cand->var_before, data->current_loop,
8b11a64c
ZD
6720 &incr_pos, after, &cand->var_before, &cand->var_after);
6721}
6722
6723/* Creates new induction variables described in SET. */
6724
6725static void
b1b02be2 6726create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
8b11a64c
ZD
6727{
6728 unsigned i;
6729 struct iv_cand *cand;
87c476a2 6730 bitmap_iterator bi;
8b11a64c 6731
b1b02be2 6732 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
8b11a64c 6733 {
309a0cf6 6734 cand = data->vcands[i];
8b11a64c 6735 create_new_iv (data, cand);
87c476a2 6736 }
8b11a64c 6737
18081149
XDL
6738 if (dump_file && (dump_flags & TDF_DETAILS))
6739 {
28002f1a
RB
6740 fprintf (dump_file, "Selected IV set for loop %d",
6741 data->current_loop->num);
6742 if (data->loop_loc != UNKNOWN_LOCATION)
6743 fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
6744 LOCATION_LINE (data->loop_loc));
7549163c 6745 fprintf (dump_file, ", " HOST_WIDE_INT_PRINT_DEC " avg niters",
623b8e0a 6746 avg_loop_niter (data->current_loop));
28002f1a 6747 fprintf (dump_file, ", %lu IVs:\n", bitmap_count_bits (set->cands));
18081149 6748 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
623b8e0a
ML
6749 {
6750 cand = data->vcands[i];
6751 dump_cand (dump_file, cand);
6752 }
18081149
XDL
6753 fprintf (dump_file, "\n");
6754 }
6755}
8b11a64c
ZD
6756
6757/* Rewrites USE (definition of iv used in a nonlinear expression)
6758 using candidate CAND. */
6759
6760static void
6761rewrite_use_nonlinear_expr (struct ivopts_data *data,
6762 struct iv_use *use, struct iv_cand *cand)
6763{
538dd0b7 6764 gassign *ass;
726a989a 6765 gimple_stmt_iterator bsi;
014ef6e1 6766 tree comp, type = get_use_type (use), tgt;
3520b745
ZD
6767
6768 /* An important special case -- if we are asked to express value of
6769 the original iv by itself, just exit; there is no need to
6770 introduce a new computation (that might also need casting the
6771 variable to unsigned and back). */
6772 if (cand->pos == IP_ORIGINAL
7b9d4f70 6773 && cand->incremented_at == use->stmt)
3520b745 6774 {
aac69a62 6775 tree op = NULL_TREE;
d06a01bf 6776 enum tree_code stmt_code;
7b9d4f70 6777
726a989a
RB
6778 gcc_assert (is_gimple_assign (use->stmt));
6779 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
7b9d4f70 6780
7b9d4f70
ZD
6781 /* Check whether we may leave the computation unchanged.
6782 This is the case only if it does not rely on other
6783 computations in the loop -- otherwise, the computation
6784 we rely upon may be removed in remove_unused_ivs,
6785 thus leading to ICE. */
d06a01bf
ZD
6786 stmt_code = gimple_assign_rhs_code (use->stmt);
6787 if (stmt_code == PLUS_EXPR
6788 || stmt_code == MINUS_EXPR
6789 || stmt_code == POINTER_PLUS_EXPR)
7b9d4f70 6790 {
726a989a
RB
6791 if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
6792 op = gimple_assign_rhs2 (use->stmt);
d06a01bf 6793 else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
726a989a 6794 op = gimple_assign_rhs1 (use->stmt);
7b9d4f70 6795 }
3520b745 6796
aac69a62
BC
6797 if (op != NULL_TREE)
6798 {
6799 if (expr_invariant_in_loop_p (data->current_loop, op))
6800 return;
6801 if (TREE_CODE (op) == SSA_NAME)
6802 {
6803 struct iv *iv = get_iv (data, op);
6804 if (iv != NULL && integer_zerop (iv->step))
6805 return;
6806 }
6807 }
73f30c63 6808 }
3520b745 6809
726a989a 6810 switch (gimple_code (use->stmt))
8b11a64c 6811 {
726a989a 6812 case GIMPLE_PHI:
8b11a64c
ZD
6813 tgt = PHI_RESULT (use->stmt);
6814
6815 /* If we should keep the biv, do not replace it. */
6816 if (name_info (data, tgt)->preserve_biv)
6817 return;
6818
726a989a 6819 bsi = gsi_after_labels (gimple_bb (use->stmt));
1e128c5f
GB
6820 break;
6821
726a989a
RB
6822 case GIMPLE_ASSIGN:
6823 tgt = gimple_assign_lhs (use->stmt);
6824 bsi = gsi_for_stmt (use->stmt);
1e128c5f
GB
6825 break;
6826
6827 default:
6828 gcc_unreachable ();
8b11a64c 6829 }
8b11a64c 6830
014ef6e1
BC
6831 aff_tree aff_inv, aff_var;
6832 if (!get_computation_aff_1 (data->current_loop, use->stmt,
6833 use, cand, &aff_inv, &aff_var))
6834 gcc_unreachable ();
6835
6836 unshare_aff_combination (&aff_inv);
6837 unshare_aff_combination (&aff_var);
6838 /* Prefer CSE opportunity than loop invariant by adding offset at last
6839 so that iv_uses have different offsets can be CSEed. */
6840 widest_int offset = aff_inv.offset;
6841 aff_inv.offset = 0;
6842
6843 gimple_seq stmt_list = NULL, seq = NULL;
6844 tree comp_op1 = aff_combination_to_tree (&aff_inv);
6845 tree comp_op2 = aff_combination_to_tree (&aff_var);
6846 gcc_assert (comp_op1 && comp_op2);
6847
6848 comp_op1 = force_gimple_operand (comp_op1, &seq, true, NULL);
6849 gimple_seq_add_seq (&stmt_list, seq);
6850 comp_op2 = force_gimple_operand (comp_op2, &seq, true, NULL);
6851 gimple_seq_add_seq (&stmt_list, seq);
6852
6853 if (POINTER_TYPE_P (TREE_TYPE (comp_op2)))
6854 std::swap (comp_op1, comp_op2);
6855
6856 if (POINTER_TYPE_P (TREE_TYPE (comp_op1)))
6857 {
6858 comp = fold_build_pointer_plus (comp_op1,
6859 fold_convert (sizetype, comp_op2));
6860 comp = fold_build_pointer_plus (comp,
6861 wide_int_to_tree (sizetype, offset));
6862 }
6863 else
6864 {
6865 comp = fold_build2 (PLUS_EXPR, TREE_TYPE (comp_op1), comp_op1,
6866 fold_convert (TREE_TYPE (comp_op1), comp_op2));
6867 comp = fold_build2 (PLUS_EXPR, TREE_TYPE (comp_op1), comp,
6868 wide_int_to_tree (TREE_TYPE (comp_op1), offset));
6869 }
6870
6871 comp = fold_convert (type, comp);
17fc049f
RG
6872 if (!valid_gimple_rhs_p (comp)
6873 || (gimple_code (use->stmt) != GIMPLE_PHI
6874 /* We can't allow re-allocating the stmt as it might be pointed
6875 to still. */
6876 && (get_gimple_rhs_num_ops (TREE_CODE (comp))
6877 >= gimple_num_ops (gsi_stmt (bsi)))))
bdf0f819 6878 {
014ef6e1
BC
6879 comp = force_gimple_operand (comp, &seq, true, NULL);
6880 gimple_seq_add_seq (&stmt_list, seq);
bdf0f819 6881 if (POINTER_TYPE_P (TREE_TYPE (tgt)))
b5c878a5
RG
6882 {
6883 duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
6884 /* As this isn't a plain copy we have to reset alignment
6885 information. */
6886 if (SSA_NAME_PTR_INFO (comp))
644ffefd 6887 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
b5c878a5 6888 }
bdf0f819 6889 }
8b11a64c 6890
014ef6e1 6891 gsi_insert_seq_before (&bsi, stmt_list, GSI_SAME_STMT);
726a989a 6892 if (gimple_code (use->stmt) == GIMPLE_PHI)
8b11a64c 6893 {
17fc049f 6894 ass = gimple_build_assign (tgt, comp);
726a989a 6895 gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
ae0a4449
AO
6896
6897 bsi = gsi_for_stmt (use->stmt);
6898 remove_phi_node (&bsi, false);
8b11a64c
ZD
6899 }
6900 else
726a989a 6901 {
17fc049f 6902 gimple_assign_set_rhs_from_tree (&bsi, comp);
726a989a
RB
6903 use->stmt = gsi_stmt (bsi);
6904 }
8b11a64c
ZD
6905}
6906
55791fcd
XDL
6907/* Performs a peephole optimization to reorder the iv update statement with
6908 a mem ref to enable instruction combining in later phases. The mem ref uses
6909 the iv value before the update, so the reordering transformation requires
6910 adjustment of the offset. CAND is the selected IV_CAND.
6911
6912 Example:
6913
6914 t = MEM_REF (base, iv1, 8, 16); // base, index, stride, offset
6915 iv2 = iv1 + 1;
6916
6917 if (t < val) (1)
6918 goto L;
6919 goto Head;
6920
6921
6922 directly propagating t over to (1) will introduce overlapping live range
6923 thus increase register pressure. This peephole transform it into:
6924
6925
6926 iv2 = iv1 + 1;
6927 t = MEM_REF (base, iv2, 8, 8);
6928 if (t < val)
6929 goto L;
6930 goto Head;
6931*/
6932
6933static void
6934adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
6935{
6936 tree var_after;
355fe088 6937 gimple *iv_update, *stmt;
55791fcd
XDL
6938 basic_block bb;
6939 gimple_stmt_iterator gsi, gsi_iv;
6940
6941 if (cand->pos != IP_NORMAL)
6942 return;
6943
6944 var_after = cand->var_after;
6945 iv_update = SSA_NAME_DEF_STMT (var_after);
6946
6947 bb = gimple_bb (iv_update);
6948 gsi = gsi_last_nondebug_bb (bb);
6949 stmt = gsi_stmt (gsi);
6950
6951 /* Only handle conditional statement for now. */
6952 if (gimple_code (stmt) != GIMPLE_COND)
6953 return;
6954
6955 gsi_prev_nondebug (&gsi);
6956 stmt = gsi_stmt (gsi);
6957 if (stmt != iv_update)
6958 return;
6959
6960 gsi_prev_nondebug (&gsi);
6961 if (gsi_end_p (gsi))
6962 return;
6963
6964 stmt = gsi_stmt (gsi);
6965 if (gimple_code (stmt) != GIMPLE_ASSIGN)
6966 return;
6967
6968 if (stmt != use->stmt)
6969 return;
6970
6971 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
6972 return;
6973
6974 if (dump_file && (dump_flags & TDF_DETAILS))
6975 {
6976 fprintf (dump_file, "Reordering \n");
ef6cb4c7
ML
6977 print_gimple_stmt (dump_file, iv_update, 0);
6978 print_gimple_stmt (dump_file, use->stmt, 0);
55791fcd
XDL
6979 fprintf (dump_file, "\n");
6980 }
6981
6982 gsi = gsi_for_stmt (use->stmt);
6983 gsi_iv = gsi_for_stmt (iv_update);
6984 gsi_move_before (&gsi_iv, &gsi);
6985
6986 cand->pos = IP_BEFORE_USE;
6987 cand->incremented_at = use->stmt;
6988}
6989
8b11a64c
ZD
6990/* Rewrites USE (address that is an iv) using candidate CAND. */
6991
6992static void
309a0cf6
BC
6993rewrite_use_address (struct ivopts_data *data,
6994 struct iv_use *use, struct iv_cand *cand)
8b11a64c 6995{
73f30c63 6996 aff_tree aff;
73f30c63 6997 bool ok;
8b11a64c 6998
55791fcd 6999 adjust_iv_update_pos (cand, use);
db61fc7a 7000 ok = get_computation_aff (data->current_loop, use->stmt, use, cand, &aff);
73f30c63 7001 gcc_assert (ok);
ac182688 7002 unshare_aff_combination (&aff);
8b11a64c 7003
d7c0c068
UW
7004 /* To avoid undefined overflow problems, all IV candidates use unsigned
7005 integer types. The drawback is that this makes it impossible for
7006 create_mem_ref to distinguish an IV that is based on a memory object
7007 from one that represents simply an offset.
7008
7009 To work around this problem, we pass a hint to create_mem_ref that
7010 indicates which variable (if any) in aff is an IV based on a memory
7011 object. Note that we only consider the candidate. If this is not
7012 based on an object, the base of the reference is in some subexpression
7013 of the use -- but these will use pointer types, so they are recognized
7014 by the create_mem_ref heuristics anyway. */
d6176f72
BC
7015 tree iv = var_at_stmt (data->current_loop, cand, use->stmt);
7016 tree base_hint = (cand->iv->base_object) ? iv : NULL_TREE;
7017 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
09b63f67
RB
7018 tree type = TREE_TYPE (*use->op_p);
7019 unsigned int align = get_object_alignment (*use->op_p);
7020 if (align != TYPE_ALIGN (type))
7021 type = build_aligned_type (type, align);
d6176f72
BC
7022
7023 tree ref = create_mem_ref (&bsi, type, &aff,
7024 reference_alias_ptr_type (*use->op_p),
7025 iv, base_hint, data->speed);
7026
ac182688
ZD
7027 copy_ref_info (ref, *use->op_p);
7028 *use->op_p = ref;
8b11a64c
ZD
7029}
7030
7031/* Rewrites USE (the condition such that one of the arguments is an iv) using
7032 candidate CAND. */
7033
7034static void
7035rewrite_use_compare (struct ivopts_data *data,
7036 struct iv_use *use, struct iv_cand *cand)
7037{
80ca1cfa 7038 tree comp, op, bound;
726a989a 7039 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
8b11a64c 7040 enum tree_code compare;
309a0cf6
BC
7041 struct iv_group *group = data->vgroups[use->group_id];
7042 struct cost_pair *cp = get_group_iv_cost (data, group, cand);
b697aed4 7043
f5f12961
ZD
7044 bound = cp->value;
7045 if (bound)
8b11a64c 7046 {
9e7376e5
ZD
7047 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
7048 tree var_type = TREE_TYPE (var);
dc5b3407 7049 gimple_seq stmts;
9e7376e5 7050
18081149 7051 if (dump_file && (dump_flags & TDF_DETAILS))
623b8e0a
ML
7052 {
7053 fprintf (dump_file, "Replacing exit test: ");
7054 print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
7055 }
d8af4ba3 7056 compare = cp->comp;
b697aed4 7057 bound = unshare_expr (fold_convert (var_type, bound));
dc5b3407
ZD
7058 op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
7059 if (stmts)
7060 gsi_insert_seq_on_edge_immediate (
7061 loop_preheader_edge (data->current_loop),
7062 stmts);
8b11a64c 7063
538dd0b7
DM
7064 gcond *cond_stmt = as_a <gcond *> (use->stmt);
7065 gimple_cond_set_lhs (cond_stmt, var);
7066 gimple_cond_set_code (cond_stmt, compare);
7067 gimple_cond_set_rhs (cond_stmt, op);
8b11a64c
ZD
7068 return;
7069 }
7070
7071 /* The induction variable elimination failed; just express the original
7072 giv. */
c7da0e81 7073 comp = get_computation_at (data->current_loop, use->stmt, use, cand);
73f30c63 7074 gcc_assert (comp != NULL_TREE);
80ca1cfa
BC
7075 gcc_assert (use->op_p != NULL);
7076 *use->op_p = force_gimple_operand_gsi (&bsi, comp, true,
7077 SSA_NAME_VAR (*use->op_p),
7078 true, GSI_SAME_STMT);
8b11a64c
ZD
7079}
7080
309a0cf6 7081/* Rewrite the groups using the selected induction variables. */
8b11a64c
ZD
7082
7083static void
309a0cf6 7084rewrite_groups (struct ivopts_data *data)
8b11a64c 7085{
309a0cf6 7086 unsigned i, j;
8b11a64c 7087
309a0cf6 7088 for (i = 0; i < data->vgroups.length (); i++)
8b11a64c 7089 {
309a0cf6
BC
7090 struct iv_group *group = data->vgroups[i];
7091 struct iv_cand *cand = group->selected;
7092
1e128c5f 7093 gcc_assert (cand);
8b11a64c 7094
309a0cf6
BC
7095 if (group->type == USE_NONLINEAR_EXPR)
7096 {
7097 for (j = 0; j < group->vuses.length (); j++)
7098 {
7099 rewrite_use_nonlinear_expr (data, group->vuses[j], cand);
7100 update_stmt (group->vuses[j]->stmt);
7101 }
7102 }
7103 else if (group->type == USE_ADDRESS)
7104 {
7105 for (j = 0; j < group->vuses.length (); j++)
7106 {
7107 rewrite_use_address (data, group->vuses[j], cand);
7108 update_stmt (group->vuses[j]->stmt);
7109 }
7110 }
7111 else
7112 {
7113 gcc_assert (group->type == USE_COMPARE);
7114
7115 for (j = 0; j < group->vuses.length (); j++)
7116 {
7117 rewrite_use_compare (data, group->vuses[j], cand);
7118 update_stmt (group->vuses[j]->stmt);
7119 }
7120 }
8b11a64c
ZD
7121 }
7122}
7123
7124/* Removes the ivs that are not used after rewriting. */
7125
7126static void
7127remove_unused_ivs (struct ivopts_data *data)
7128{
7129 unsigned j;
87c476a2 7130 bitmap_iterator bi;
ae0a4449 7131 bitmap toremove = BITMAP_ALLOC (NULL);
8b11a64c 7132
ae0a4449
AO
7133 /* Figure out an order in which to release SSA DEFs so that we don't
7134 release something that we'd have to propagate into a debug stmt
7135 afterwards. */
87c476a2 7136 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
8b11a64c
ZD
7137 {
7138 struct version_info *info;
7139
7140 info = ver_info (data, j);
7141 if (info->iv
6e42ce54 7142 && !integer_zerop (info->iv->step)
8b11a64c 7143 && !info->inv_id
309a0cf6 7144 && !info->iv->nonlin_use
8b11a64c 7145 && !info->preserve_biv)
e1066560
AO
7146 {
7147 bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
309a0cf6 7148
e1066560
AO
7149 tree def = info->iv->ssa_name;
7150
7151 if (MAY_HAVE_DEBUG_STMTS && SSA_NAME_DEF_STMT (def))
7152 {
7153 imm_use_iterator imm_iter;
7154 use_operand_p use_p;
355fe088 7155 gimple *stmt;
e1066560
AO
7156 int count = 0;
7157
7158 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7159 {
7160 if (!gimple_debug_bind_p (stmt))
7161 continue;
7162
7163 /* We just want to determine whether to do nothing
7164 (count == 0), to substitute the computed
7165 expression into a single use of the SSA DEF by
7166 itself (count == 1), or to use a debug temp
7167 because the SSA DEF is used multiple times or as
7168 part of a larger expression (count > 1). */
7169 count++;
7170 if (gimple_debug_bind_get_value (stmt) != def)
7171 count++;
7172
7173 if (count > 1)
7174 BREAK_FROM_IMM_USE_STMT (imm_iter);
7175 }
7176
7177 if (!count)
7178 continue;
7179
7180 struct iv_use dummy_use;
7181 struct iv_cand *best_cand = NULL, *cand;
7182 unsigned i, best_pref = 0, cand_pref;
7183
7184 memset (&dummy_use, 0, sizeof (dummy_use));
7185 dummy_use.iv = info->iv;
309a0cf6 7186 for (i = 0; i < data->vgroups.length () && i < 64; i++)
e1066560 7187 {
309a0cf6 7188 cand = data->vgroups[i]->selected;
e1066560
AO
7189 if (cand == best_cand)
7190 continue;
7191 cand_pref = operand_equal_p (cand->iv->step,
7192 info->iv->step, 0)
7193 ? 4 : 0;
7194 cand_pref
7195 += TYPE_MODE (TREE_TYPE (cand->iv->base))
7196 == TYPE_MODE (TREE_TYPE (info->iv->base))
7197 ? 2 : 0;
7198 cand_pref
7199 += TREE_CODE (cand->iv->base) == INTEGER_CST
7200 ? 1 : 0;
7201 if (best_cand == NULL || best_pref < cand_pref)
7202 {
7203 best_cand = cand;
7204 best_pref = cand_pref;
7205 }
7206 }
7207
7208 if (!best_cand)
7209 continue;
7210
7211 tree comp = get_computation_at (data->current_loop,
c7da0e81
BC
7212 SSA_NAME_DEF_STMT (def),
7213 &dummy_use, best_cand);
e1066560
AO
7214 if (!comp)
7215 continue;
7216
7217 if (count > 1)
7218 {
7219 tree vexpr = make_node (DEBUG_EXPR_DECL);
7220 DECL_ARTIFICIAL (vexpr) = 1;
7221 TREE_TYPE (vexpr) = TREE_TYPE (comp);
7222 if (SSA_NAME_VAR (def))
899ca90e 7223 SET_DECL_MODE (vexpr, DECL_MODE (SSA_NAME_VAR (def)));
e1066560 7224 else
899ca90e 7225 SET_DECL_MODE (vexpr, TYPE_MODE (TREE_TYPE (vexpr)));
538dd0b7
DM
7226 gdebug *def_temp
7227 = gimple_build_debug_bind (vexpr, comp, NULL);
e1066560
AO
7228 gimple_stmt_iterator gsi;
7229
7230 if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
7231 gsi = gsi_after_labels (gimple_bb
7232 (SSA_NAME_DEF_STMT (def)));
7233 else
7234 gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
7235
7236 gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
7237 comp = vexpr;
7238 }
7239
7240 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7241 {
7242 if (!gimple_debug_bind_p (stmt))
7243 continue;
7244
7245 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
7246 SET_USE (use_p, comp);
7247
7248 update_stmt (stmt);
7249 }
7250 }
7251 }
87c476a2 7252 }
ae0a4449
AO
7253
7254 release_defs_bitset (toremove);
7255
7256 BITMAP_FREE (toremove);
8b11a64c
ZD
7257}
7258
e2102efc 7259/* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
b787e7a2 7260 for hash_map::traverse. */
e2102efc 7261
b787e7a2
TS
7262bool
7263free_tree_niter_desc (edge const &, tree_niter_desc *const &value, void *)
e2102efc 7264{
b787e7a2 7265 free (value);
e2102efc
XDL
7266 return true;
7267}
7268
8b11a64c
ZD
7269/* Frees data allocated by the optimization of a single loop. */
7270
7271static void
7272free_loop_data (struct ivopts_data *data)
7273{
7274 unsigned i, j;
87c476a2 7275 bitmap_iterator bi;
69ebd99d 7276 tree obj;
8b11a64c 7277
15814ba0
PB
7278 if (data->niters)
7279 {
b787e7a2
TS
7280 data->niters->traverse<void *, free_tree_niter_desc> (NULL);
7281 delete data->niters;
15814ba0
PB
7282 data->niters = NULL;
7283 }
ca4c3169 7284
87c476a2 7285 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
8b11a64c
ZD
7286 {
7287 struct version_info *info;
7288
7289 info = ver_info (data, i);
8b11a64c
ZD
7290 info->iv = NULL;
7291 info->has_nonlin_use = false;
7292 info->preserve_biv = false;
7293 info->inv_id = 0;
87c476a2 7294 }
8b11a64c 7295 bitmap_clear (data->relevant);
b1b02be2 7296 bitmap_clear (data->important_candidates);
8b11a64c 7297
309a0cf6 7298 for (i = 0; i < data->vgroups.length (); i++)
8b11a64c 7299 {
309a0cf6 7300 struct iv_group *group = data->vgroups[i];
a7e43c57 7301
309a0cf6
BC
7302 for (j = 0; j < group->vuses.length (); j++)
7303 free (group->vuses[j]);
53f2382d 7304 group->vuses.release ();
a7e43c57 7305
309a0cf6
BC
7306 BITMAP_FREE (group->related_cands);
7307 for (j = 0; j < group->n_map_members; j++)
0ca91c77
BC
7308 {
7309 if (group->cost_map[j].inv_vars)
7310 BITMAP_FREE (group->cost_map[j].inv_vars);
7311 if (group->cost_map[j].inv_exprs)
7312 BITMAP_FREE (group->cost_map[j].inv_exprs);
7313 }
8b11a64c 7314
309a0cf6
BC
7315 free (group->cost_map);
7316 free (group);
8b11a64c 7317 }
309a0cf6 7318 data->vgroups.truncate (0);
8b11a64c 7319
309a0cf6 7320 for (i = 0; i < data->vcands.length (); i++)
8b11a64c 7321 {
309a0cf6 7322 struct iv_cand *cand = data->vcands[i];
8b11a64c 7323
0ca91c77
BC
7324 if (cand->inv_vars)
7325 BITMAP_FREE (cand->inv_vars);
4c11bdff
BC
7326 if (cand->inv_exprs)
7327 BITMAP_FREE (cand->inv_exprs);
8b11a64c
ZD
7328 free (cand);
7329 }
309a0cf6 7330 data->vcands.truncate (0);
8b11a64c
ZD
7331
7332 if (data->version_info_size < num_ssa_names)
7333 {
7334 data->version_info_size = 2 * num_ssa_names;
7335 free (data->version_info);
5ed6ace5 7336 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
8b11a64c
ZD
7337 }
7338
0ca91c77
BC
7339 data->max_inv_var_id = 0;
7340 data->max_inv_expr_id = 0;
8b11a64c 7341
9771b263 7342 FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
69ebd99d 7343 SET_DECL_RTL (obj, NULL_RTX);
8b11a64c 7344
9771b263 7345 decl_rtl_to_reset.truncate (0);
18081149 7346
c203e8a7 7347 data->inv_expr_tab->empty ();
cf5b92ef
BC
7348
7349 data->iv_common_cand_tab->empty ();
7350 data->iv_common_cands.truncate (0);
8b11a64c
ZD
7351}
7352
7353/* Finalizes data structures used by the iv optimization pass. LOOPS is the
7354 loop tree. */
7355
7356static void
9a2ef6b8 7357tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
8b11a64c 7358{
8b11a64c
ZD
7359 free_loop_data (data);
7360 free (data->version_info);
8bdbfff5
NS
7361 BITMAP_FREE (data->relevant);
7362 BITMAP_FREE (data->important_candidates);
8b11a64c 7363
9771b263 7364 decl_rtl_to_reset.release ();
309a0cf6
BC
7365 data->vgroups.release ();
7366 data->vcands.release ();
c203e8a7
TS
7367 delete data->inv_expr_tab;
7368 data->inv_expr_tab = NULL;
3230c614 7369 free_affine_expand_cache (&data->name_expansion_cache);
cf5b92ef
BC
7370 delete data->iv_common_cand_tab;
7371 data->iv_common_cand_tab = NULL;
7372 data->iv_common_cands.release ();
6f929985 7373 obstack_free (&data->iv_obstack, NULL);
8b11a64c
ZD
7374}
7375
bec922f0
SL
7376/* Returns true if the loop body BODY includes any function calls. */
7377
7378static bool
7379loop_body_includes_call (basic_block *body, unsigned num_nodes)
7380{
7381 gimple_stmt_iterator gsi;
7382 unsigned i;
7383
7384 for (i = 0; i < num_nodes; i++)
7385 for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
7386 {
355fe088 7387 gimple *stmt = gsi_stmt (gsi);
bec922f0 7388 if (is_gimple_call (stmt)
b3d2acb6 7389 && !gimple_call_internal_p (stmt)
bec922f0
SL
7390 && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
7391 return true;
7392 }
7393 return false;
7394}
7395
8b11a64c
ZD
7396/* Optimizes the LOOP. Returns true if anything changed. */
7397
7398static bool
7399tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
7400{
7401 bool changed = false;
b1b02be2 7402 struct iv_ca *iv_ca;
d8af4ba3 7403 edge exit = single_dom_exit (loop);
2c08497a 7404 basic_block *body;
8b11a64c 7405
15814ba0 7406 gcc_assert (!data->niters);
8b11a64c 7407 data->current_loop = loop;
28002f1a 7408 data->loop_loc = find_loop_location (loop);
f40751dd 7409 data->speed = optimize_loop_for_speed_p (loop);
8b11a64c
ZD
7410
7411 if (dump_file && (dump_flags & TDF_DETAILS))
7412 {
28002f1a
RB
7413 fprintf (dump_file, "Processing loop %d", loop->num);
7414 if (data->loop_loc != UNKNOWN_LOCATION)
7415 fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
7416 LOCATION_LINE (data->loop_loc));
7417 fprintf (dump_file, "\n");
b8698a0f 7418
8b11a64c
ZD
7419 if (exit)
7420 {
7421 fprintf (dump_file, " single exit %d -> %d, exit condition ",
7422 exit->src->index, exit->dest->index);
726a989a 7423 print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
8b11a64c
ZD
7424 fprintf (dump_file, "\n");
7425 }
7426
7427 fprintf (dump_file, "\n");
7428 }
7429
2c08497a 7430 body = get_loop_body (loop);
bec922f0 7431 data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
2c08497a
BS
7432 renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
7433 free (body);
7434
d8af4ba3
ZD
7435 data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
7436
8b11a64c
ZD
7437 /* For each ssa name determines whether it behaves as an induction variable
7438 in some loop. */
7439 if (!find_induction_variables (data))
7440 goto finish;
7441
7442 /* Finds interesting uses (item 1). */
7443 find_interesting_uses (data);
309a0cf6 7444 if (data->vgroups.length () > MAX_CONSIDERED_GROUPS)
8b11a64c
ZD
7445 goto finish;
7446
7447 /* Finds candidates for the induction variables (item 2). */
7448 find_iv_candidates (data);
7449
7450 /* Calculates the costs (item 3, part 1). */
8b11a64c 7451 determine_iv_costs (data);
309a0cf6 7452 determine_group_iv_costs (data);
8b11a64c
ZD
7453 determine_set_costs (data);
7454
7455 /* Find the optimal set of induction variables (item 3, part 2). */
b1b02be2
ZD
7456 iv_ca = find_optimal_iv_set (data);
7457 if (!iv_ca)
8b11a64c
ZD
7458 goto finish;
7459 changed = true;
7460
7461 /* Create the new induction variables (item 4, part 1). */
b1b02be2
ZD
7462 create_new_ivs (data, iv_ca);
7463 iv_ca_free (&iv_ca);
b8698a0f 7464
8b11a64c 7465 /* Rewrite the uses (item 4, part 2). */
309a0cf6 7466 rewrite_groups (data);
8b11a64c
ZD
7467
7468 /* Remove the ivs that are unused after rewriting. */
7469 remove_unused_ivs (data);
7470
8b11a64c
ZD
7471 /* We have changed the structure of induction variables; it might happen
7472 that definitions in the scev database refer to some of them that were
7473 eliminated. */
7474 scev_reset ();
7475
7476finish:
7477 free_loop_data (data);
7478
7479 return changed;
7480}
7481
d73be268 7482/* Main entry point. Optimizes induction variables in loops. */
8b11a64c
ZD
7483
7484void
d73be268 7485tree_ssa_iv_optimize (void)
8b11a64c
ZD
7486{
7487 struct loop *loop;
7488 struct ivopts_data data;
7489
9a2ef6b8 7490 tree_ssa_iv_optimize_init (&data);
8b11a64c
ZD
7491
7492 /* Optimize the loops starting with the innermost ones. */
f0bd40b1 7493 FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
8b11a64c 7494 {
8679c649
JH
7495 if (dump_file && (dump_flags & TDF_DETAILS))
7496 flow_loop_dump (loop, dump_file, NULL, 1);
e9472263
ZD
7497
7498 tree_ssa_iv_optimize_loop (&data, loop);
8b11a64c
ZD
7499 }
7500
9a2ef6b8 7501 tree_ssa_iv_optimize_finalize (&data);
8b11a64c 7502}
359b060e
BC
7503
7504#include "gt-tree-ssa-loop-ivopts.h"