]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-ssa-loop-ivopts.c
asan.c (create_cond_insert_point): Maintain profile.
[thirdparty/gcc.git] / gcc / tree-ssa-loop-ivopts.c
CommitLineData
8b11a64c 1/* Induction variable optimizations.
cbe34bb5 2 Copyright (C) 2003-2017 Free Software Foundation, Inc.
b8698a0f 3
8b11a64c 4This file is part of GCC.
b8698a0f 5
8b11a64c
ZD
6GCC is free software; you can redistribute it and/or modify it
7under the terms of the GNU General Public License as published by the
9dcd6f09 8Free Software Foundation; either version 3, or (at your option) any
8b11a64c 9later version.
b8698a0f 10
8b11a64c
ZD
11GCC is distributed in the hope that it will be useful, but WITHOUT
12ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14for more details.
b8698a0f 15
8b11a64c 16You should have received a copy of the GNU General Public License
9dcd6f09
NC
17along with GCC; see the file COPYING3. If not see
18<http://www.gnu.org/licenses/>. */
8b11a64c
ZD
19
20/* This pass tries to find the optimal set of induction variables for the loop.
21 It optimizes just the basic linear induction variables (although adding
22 support for other types should not be too hard). It includes the
23 optimizations commonly known as strength reduction, induction variable
24 coalescing and induction variable elimination. It does it in the
25 following steps:
26
27 1) The interesting uses of induction variables are found. This includes
28
29 -- uses of induction variables in non-linear expressions
30 -- addresses of arrays
31 -- comparisons of induction variables
32
309a0cf6
BC
33 Note the interesting uses are categorized and handled in group.
34 Generally, address type uses are grouped together if their iv bases
35 are different in constant offset.
36
8b11a64c
ZD
37 2) Candidates for the induction variables are found. This includes
38
39 -- old induction variables
40 -- the variables defined by expressions derived from the "interesting
309a0cf6 41 groups/uses" above
8b11a64c
ZD
42
43 3) The optimal (w.r. to a cost function) set of variables is chosen. The
44 cost function assigns a cost to sets of induction variables and consists
45 of three parts:
46
309a0cf6
BC
47 -- The group/use costs. Each of the interesting groups/uses chooses
48 the best induction variable in the set and adds its cost to the sum.
49 The cost reflects the time spent on modifying the induction variables
50 value to be usable for the given purpose (adding base and offset for
51 arrays, etc.).
8b11a64c
ZD
52 -- The variable costs. Each of the variables has a cost assigned that
53 reflects the costs associated with incrementing the value of the
54 variable. The original variables are somewhat preferred.
55 -- The set cost. Depending on the size of the set, extra cost may be
56 added to reflect register pressure.
57
58 All the costs are defined in a machine-specific way, using the target
59 hooks and machine descriptions to determine them.
60
61 4) The trees are transformed to use the new variables, the dead code is
62 removed.
b8698a0f 63
8b11a64c
ZD
64 All of this is done loop by loop. Doing it globally is theoretically
65 possible, it might give a better performance and it might enable us
66 to decide costs more precisely, but getting all the interactions right
67 would be complicated. */
68
69#include "config.h"
70#include "system.h"
71#include "coretypes.h"
c7131fb2 72#include "backend.h"
957060b5 73#include "rtl.h"
8b11a64c 74#include "tree.h"
c7131fb2 75#include "gimple.h"
957060b5
AM
76#include "cfghooks.h"
77#include "tree-pass.h"
4d0cdd0c 78#include "memmodel.h"
957060b5 79#include "tm_p.h"
c7131fb2 80#include "ssa.h"
957060b5
AM
81#include "expmed.h"
82#include "insn-config.h"
83#include "emit-rtl.h"
84#include "recog.h"
85#include "cgraph.h"
86#include "gimple-pretty-print.h"
c7131fb2 87#include "alias.h"
40e23961 88#include "fold-const.h"
d8a2d370 89#include "stor-layout.h"
2fb9a547 90#include "tree-eh.h"
45b0be94 91#include "gimplify.h"
5be5c238 92#include "gimple-iterator.h"
18f429e2 93#include "gimplify-me.h"
442b4905 94#include "tree-cfg.h"
e28030cf
AM
95#include "tree-ssa-loop-ivopts.h"
96#include "tree-ssa-loop-manip.h"
97#include "tree-ssa-loop-niter.h"
442b4905 98#include "tree-ssa-loop.h"
36566b39 99#include "explow.h"
d8a2d370 100#include "expr.h"
442b4905 101#include "tree-dfa.h"
7a300452 102#include "tree-ssa.h"
8b11a64c 103#include "cfgloop.h"
8b11a64c 104#include "tree-scalar-evolution.h"
8b11a64c 105#include "params.h"
73f30c63 106#include "tree-affine.h"
17fc049f 107#include "tree-ssa-propagate.h"
4484a35a 108#include "tree-ssa-address.h"
9b2b7279 109#include "builtins.h"
28002f1a 110#include "tree-vectorizer.h"
7735d6c7 111
2eb79bbb
SB
112/* FIXME: Expressions are expanded to RTL in this pass to determine the
113 cost of different addressing modes. This should be moved to a TBD
114 interface between the GIMPLE and RTL worlds. */
2eb79bbb 115
8b11a64c
ZD
116/* The infinite cost. */
117#define INFTY 10000000
118
18081149
XDL
119/* Returns the expected number of loop iterations for LOOP.
120 The average trip count is computed from profile data if it
121 exists. */
122
123static inline HOST_WIDE_INT
124avg_loop_niter (struct loop *loop)
125{
652c4c71 126 HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
18081149 127 if (niter == -1)
fa519ca6 128 {
c8cf746a 129 niter = likely_max_stmt_executions_int (loop);
4661839e
ML
130
131 if (niter == -1 || niter > PARAM_VALUE (PARAM_AVG_LOOP_NITER))
132 return PARAM_VALUE (PARAM_AVG_LOOP_NITER);
fa519ca6 133 }
18081149
XDL
134
135 return niter;
136}
8b11a64c 137
309a0cf6
BC
138struct iv_use;
139
8b11a64c
ZD
140/* Representation of the induction variable. */
141struct iv
142{
143 tree base; /* Initial value of the iv. */
e6845c23 144 tree base_object; /* A memory object to that the induction variable points. */
8b11a64c
ZD
145 tree step; /* Step of the iv (constant only). */
146 tree ssa_name; /* The ssa name with the value. */
309a0cf6 147 struct iv_use *nonlin_use; /* The identifier in the use if it is the case. */
8b11a64c 148 bool biv_p; /* Is it a biv? */
c70ed622 149 bool no_overflow; /* True if the iv doesn't overflow. */
e4142529
BC
150 bool have_address_use;/* For biv, indicate if it's used in any address
151 type use. */
8b11a64c
ZD
152};
153
154/* Per-ssa version information (induction variable descriptions, etc.). */
155struct version_info
156{
157 tree name; /* The ssa name. */
158 struct iv *iv; /* Induction variable description. */
159 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
160 an expression that is not an induction variable. */
8b11a64c 161 bool preserve_biv; /* For the original biv, whether to preserve it. */
448f65db 162 unsigned inv_id; /* Id of an invariant. */
8b11a64c
ZD
163};
164
8b11a64c
ZD
165/* Types of uses. */
166enum use_type
167{
168 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
8b11a64c
ZD
169 USE_ADDRESS, /* Use in an address. */
170 USE_COMPARE /* Use is a compare. */
171};
172
6e8c65f6 173/* Cost of a computation. */
50686850 174struct comp_cost
6e8c65f6 175{
8d18b6df
ML
176 comp_cost (): cost (0), complexity (0), scratch (0)
177 {}
178
179 comp_cost (int cost, unsigned complexity, int scratch = 0)
180 : cost (cost), complexity (complexity), scratch (scratch)
181 {}
182
183 /* Returns true if COST is infinite. */
184 bool infinite_cost_p ();
185
186 /* Adds costs COST1 and COST2. */
187 friend comp_cost operator+ (comp_cost cost1, comp_cost cost2);
188
189 /* Adds COST to the comp_cost. */
190 comp_cost operator+= (comp_cost cost);
191
192 /* Adds constant C to this comp_cost. */
193 comp_cost operator+= (HOST_WIDE_INT c);
194
195 /* Subtracts constant C to this comp_cost. */
196 comp_cost operator-= (HOST_WIDE_INT c);
197
198 /* Divide the comp_cost by constant C. */
199 comp_cost operator/= (HOST_WIDE_INT c);
200
201 /* Multiply the comp_cost by constant C. */
202 comp_cost operator*= (HOST_WIDE_INT c);
203
204 /* Subtracts costs COST1 and COST2. */
205 friend comp_cost operator- (comp_cost cost1, comp_cost cost2);
206
207 /* Subtracts COST from this comp_cost. */
208 comp_cost operator-= (comp_cost cost);
209
210 /* Returns true if COST1 is smaller than COST2. */
211 friend bool operator< (comp_cost cost1, comp_cost cost2);
212
213 /* Returns true if COST1 and COST2 are equal. */
214 friend bool operator== (comp_cost cost1, comp_cost cost2);
215
216 /* Returns true if COST1 is smaller or equal than COST2. */
217 friend bool operator<= (comp_cost cost1, comp_cost cost2);
218
2c08497a 219 int cost; /* The runtime cost. */
8d18b6df 220 unsigned complexity; /* The estimate of the complexity of the code for
6e8c65f6
ZD
221 the computation (in no concrete units --
222 complexity field should be larger for more
223 complex expressions and addressing modes). */
515558b8 224 int scratch; /* Scratch used during cost computation. */
50686850 225};
6e8c65f6 226
8d18b6df
ML
227static const comp_cost no_cost;
228static const comp_cost infinite_cost (INFTY, INFTY, INFTY);
229
230bool
231comp_cost::infinite_cost_p ()
232{
233 return cost == INFTY;
234}
235
236comp_cost
237operator+ (comp_cost cost1, comp_cost cost2)
238{
239 if (cost1.infinite_cost_p () || cost2.infinite_cost_p ())
240 return infinite_cost;
241
242 cost1.cost += cost2.cost;
243 cost1.complexity += cost2.complexity;
244
245 return cost1;
246}
247
248comp_cost
249operator- (comp_cost cost1, comp_cost cost2)
250{
251 if (cost1.infinite_cost_p ())
252 return infinite_cost;
253
254 gcc_assert (!cost2.infinite_cost_p ());
255
256 cost1.cost -= cost2.cost;
257 cost1.complexity -= cost2.complexity;
258
259 return cost1;
260}
261
262comp_cost
263comp_cost::operator+= (comp_cost cost)
264{
265 *this = *this + cost;
266 return *this;
267}
268
269comp_cost
270comp_cost::operator+= (HOST_WIDE_INT c)
271{
272 if (infinite_cost_p ())
273 return *this;
274
275 this->cost += c;
276
277 return *this;
278}
279
280comp_cost
281comp_cost::operator-= (HOST_WIDE_INT c)
282{
283 if (infinite_cost_p ())
284 return *this;
285
286 this->cost -= c;
287
288 return *this;
289}
290
291comp_cost
292comp_cost::operator/= (HOST_WIDE_INT c)
293{
294 if (infinite_cost_p ())
295 return *this;
296
297 this->cost /= c;
298
299 return *this;
300}
301
302comp_cost
303comp_cost::operator*= (HOST_WIDE_INT c)
304{
305 if (infinite_cost_p ())
306 return *this;
307
308 this->cost *= c;
309
310 return *this;
311}
312
313comp_cost
314comp_cost::operator-= (comp_cost cost)
315{
316 *this = *this - cost;
317 return *this;
318}
319
320bool
321operator< (comp_cost cost1, comp_cost cost2)
322{
323 if (cost1.cost == cost2.cost)
324 return cost1.complexity < cost2.complexity;
325
326 return cost1.cost < cost2.cost;
327}
328
329bool
330operator== (comp_cost cost1, comp_cost cost2)
331{
332 return cost1.cost == cost2.cost
333 && cost1.complexity == cost2.complexity;
334}
335
336bool
337operator<= (comp_cost cost1, comp_cost cost2)
338{
339 return cost1 < cost2 || cost1 == cost2;
340}
6e8c65f6 341
623b8e0a
ML
342struct iv_inv_expr_ent;
343
8b11a64c
ZD
344/* The candidate - cost pair. */
345struct cost_pair
346{
347 struct iv_cand *cand; /* The candidate. */
6e8c65f6 348 comp_cost cost; /* The cost. */
34e82342 349 enum tree_code comp; /* For iv elimination, the comparison. */
4c11bdff
BC
350 bitmap inv_vars; /* The list of invariant ssa_vars that have to be
351 preserved when representing iv_use with iv_cand. */
352 bitmap inv_exprs; /* The list of newly created invariant expressions
353 when representing iv_use with iv_cand. */
f5f12961
ZD
354 tree value; /* For final value elimination, the expression for
355 the final value of the iv. For iv elimination,
356 the new bound to compare with. */
8b11a64c
ZD
357};
358
359/* Use. */
360struct iv_use
361{
362 unsigned id; /* The id of the use. */
309a0cf6 363 unsigned group_id; /* The group id the use belongs to. */
8b11a64c
ZD
364 enum use_type type; /* Type of the use. */
365 struct iv *iv; /* The induction variable it is based on. */
355fe088 366 gimple *stmt; /* Statement in that it occurs. */
8b11a64c 367 tree *op_p; /* The place where it occurs. */
8b11a64c 368
a7e43c57
BC
369 tree addr_base; /* Base address with const offset stripped. */
370 unsigned HOST_WIDE_INT addr_offset;
371 /* Const offset stripped from base address. */
8b11a64c
ZD
372};
373
309a0cf6
BC
374/* Group of uses. */
375struct iv_group
376{
377 /* The id of the group. */
378 unsigned id;
379 /* Uses of the group are of the same type. */
380 enum use_type type;
381 /* The set of "related" IV candidates, plus the important ones. */
382 bitmap related_cands;
383 /* Number of IV candidates in the cost_map. */
384 unsigned n_map_members;
385 /* The costs wrto the iv candidates. */
386 struct cost_pair *cost_map;
387 /* The selected candidate for the group. */
388 struct iv_cand *selected;
389 /* Uses in the group. */
390 vec<struct iv_use *> vuses;
391};
392
8b11a64c
ZD
393/* The position where the iv is computed. */
394enum iv_position
395{
396 IP_NORMAL, /* At the end, just before the exit condition. */
397 IP_END, /* At the end of the latch block. */
2c08497a
BS
398 IP_BEFORE_USE, /* Immediately before a specific use. */
399 IP_AFTER_USE, /* Immediately after a specific use. */
8b11a64c
ZD
400 IP_ORIGINAL /* The original biv. */
401};
402
403/* The induction variable candidate. */
404struct iv_cand
405{
406 unsigned id; /* The number of the candidate. */
407 bool important; /* Whether this is an "important" candidate, i.e. such
408 that it should be considered by all uses. */
448f65db 409 ENUM_BITFIELD(iv_position) pos : 8; /* Where it is computed. */
355fe088 410 gimple *incremented_at;/* For original biv, the statement where it is
8b11a64c
ZD
411 incremented. */
412 tree var_before; /* The variable used for it before increment. */
413 tree var_after; /* The variable used for it after increment. */
414 struct iv *iv; /* The value of the candidate. NULL for
415 "pseudocandidate" used to indicate the possibility
416 to replace the final value of an iv by direct
417 computation of the value. */
418 unsigned cost; /* Cost of the candidate. */
2c08497a
BS
419 unsigned cost_step; /* Cost of the candidate's increment operation. */
420 struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
421 where it is incremented. */
4c11bdff
BC
422 bitmap inv_vars; /* The list of invariant ssa_vars used in step of the
423 iv_cand. */
424 bitmap inv_exprs; /* If step is more complicated than a single ssa_var,
425 hanlde it as a new invariant expression which will
426 be hoisted out of loop. */
e4142529
BC
427 struct iv *orig_iv; /* The original iv if this cand is added from biv with
428 smaller type. */
8b11a64c
ZD
429};
430
cf5b92ef
BC
431/* Hashtable entry for common candidate derived from iv uses. */
432struct iv_common_cand
433{
434 tree base;
435 tree step;
436 /* IV uses from which this common candidate is derived. */
309a0cf6 437 auto_vec<struct iv_use *> uses;
cf5b92ef
BC
438 hashval_t hash;
439};
440
441/* Hashtable helpers. */
442
74fbae92 443struct iv_common_cand_hasher : delete_ptr_hash <iv_common_cand>
cf5b92ef
BC
444{
445 static inline hashval_t hash (const iv_common_cand *);
446 static inline bool equal (const iv_common_cand *, const iv_common_cand *);
447};
448
449/* Hash function for possible common candidates. */
450
451inline hashval_t
452iv_common_cand_hasher::hash (const iv_common_cand *ccand)
453{
454 return ccand->hash;
455}
456
457/* Hash table equality function for common candidates. */
458
459inline bool
460iv_common_cand_hasher::equal (const iv_common_cand *ccand1,
461 const iv_common_cand *ccand2)
462{
463 return (ccand1->hash == ccand2->hash
464 && operand_equal_p (ccand1->base, ccand2->base, 0)
465 && operand_equal_p (ccand1->step, ccand2->step, 0)
466 && (TYPE_PRECISION (TREE_TYPE (ccand1->base))
467 == TYPE_PRECISION (TREE_TYPE (ccand2->base))));
468}
469
18081149 470/* Loop invariant expression hashtable entry. */
623b8e0a 471
18081149
XDL
472struct iv_inv_expr_ent
473{
623b8e0a 474 /* Tree expression of the entry. */
18081149 475 tree expr;
623b8e0a 476 /* Unique indentifier. */
18081149 477 int id;
623b8e0a 478 /* Hash value. */
18081149
XDL
479 hashval_t hash;
480};
481
623b8e0a
ML
482/* Sort iv_inv_expr_ent pair A and B by id field. */
483
484static int
485sort_iv_inv_expr_ent (const void *a, const void *b)
486{
487 const iv_inv_expr_ent * const *e1 = (const iv_inv_expr_ent * const *) (a);
488 const iv_inv_expr_ent * const *e2 = (const iv_inv_expr_ent * const *) (b);
489
490 unsigned id1 = (*e1)->id;
491 unsigned id2 = (*e2)->id;
492
493 if (id1 < id2)
494 return -1;
495 else if (id1 > id2)
496 return 1;
497 else
498 return 0;
499}
500
4a8fb1a1
LC
501/* Hashtable helpers. */
502
95fbe13e 503struct iv_inv_expr_hasher : free_ptr_hash <iv_inv_expr_ent>
4a8fb1a1 504{
67f58944
TS
505 static inline hashval_t hash (const iv_inv_expr_ent *);
506 static inline bool equal (const iv_inv_expr_ent *, const iv_inv_expr_ent *);
4a8fb1a1
LC
507};
508
509/* Hash function for loop invariant expressions. */
510
511inline hashval_t
67f58944 512iv_inv_expr_hasher::hash (const iv_inv_expr_ent *expr)
4a8fb1a1
LC
513{
514 return expr->hash;
515}
516
517/* Hash table equality function for expressions. */
518
519inline bool
67f58944
TS
520iv_inv_expr_hasher::equal (const iv_inv_expr_ent *expr1,
521 const iv_inv_expr_ent *expr2)
4a8fb1a1
LC
522{
523 return expr1->hash == expr2->hash
524 && operand_equal_p (expr1->expr, expr2->expr, 0);
525}
526
8b11a64c
ZD
527struct ivopts_data
528{
529 /* The currently optimized loop. */
530 struct loop *current_loop;
28002f1a 531 source_location loop_loc;
8b11a64c 532
8f5929e1 533 /* Numbers of iterations for all exits of the current loop. */
b787e7a2 534 hash_map<edge, tree_niter_desc *> *niters;
f40751dd 535
9a2ef6b8
ZD
536 /* Number of registers used in it. */
537 unsigned regs_used;
538
8b11a64c
ZD
539 /* The size of version_info array allocated. */
540 unsigned version_info_size;
541
542 /* The array of information for the ssa names. */
543 struct version_info *version_info;
544
18081149
XDL
545 /* The hashtable of loop invariant expressions created
546 by ivopt. */
c203e8a7 547 hash_table<iv_inv_expr_hasher> *inv_expr_tab;
18081149 548
8b11a64c
ZD
549 /* The bitmap of indices in version_info whose value was changed. */
550 bitmap relevant;
551
8b11a64c 552 /* The uses of induction variables. */
309a0cf6 553 vec<iv_group *> vgroups;
8b11a64c
ZD
554
555 /* The candidates. */
309a0cf6 556 vec<iv_cand *> vcands;
8b11a64c 557
80cad5fa
ZD
558 /* A bitmap of important candidates. */
559 bitmap important_candidates;
560
3230c614
BC
561 /* Cache used by tree_to_aff_combination_expand. */
562 hash_map<tree, name_expansion *> *name_expansion_cache;
563
cf5b92ef
BC
564 /* The hashtable of common candidates derived from iv uses. */
565 hash_table<iv_common_cand_hasher> *iv_common_cand_tab;
566
567 /* The common candidates. */
568 vec<iv_common_cand *> iv_common_cands;
569
0ca91c77
BC
570 /* The maximum invariant variable id. */
571 unsigned max_inv_var_id;
572
573 /* The maximum invariant expression id. */
574 unsigned max_inv_expr_id;
8f5929e1 575
e4142529
BC
576 /* Number of no_overflow BIVs which are not used in memory address. */
577 unsigned bivs_not_used_in_addr;
578
6f929985
BC
579 /* Obstack for iv structure. */
580 struct obstack iv_obstack;
581
8b11a64c
ZD
582 /* Whether to consider just related and important candidates when replacing a
583 use. */
584 bool consider_all_candidates;
8f5929e1
JJ
585
586 /* Are we optimizing for speed? */
587 bool speed;
bec922f0
SL
588
589 /* Whether the loop body includes any function calls. */
590 bool body_includes_call;
d8af4ba3
ZD
591
592 /* Whether the loop body can only be exited via single exit. */
593 bool loop_single_exit_p;
8b11a64c
ZD
594};
595
b1b02be2
ZD
596/* An assignment of iv candidates to uses. */
597
598struct iv_ca
599{
600 /* The number of uses covered by the assignment. */
601 unsigned upto;
602
603 /* Number of uses that cannot be expressed by the candidates in the set. */
309a0cf6 604 unsigned bad_groups;
b1b02be2
ZD
605
606 /* Candidate assigned to a use, together with the related costs. */
309a0cf6 607 struct cost_pair **cand_for_group;
b1b02be2
ZD
608
609 /* Number of times each candidate is used. */
610 unsigned *n_cand_uses;
611
612 /* The candidates used. */
613 bitmap cands;
614
36f5ada1
ZD
615 /* The number of candidates in the set. */
616 unsigned n_cands;
617
1136cae4
BC
618 /* The number of invariants needed, including both invariant variants and
619 invariant expressions. */
620 unsigned n_invs;
b1b02be2
ZD
621
622 /* Total cost of expressing uses. */
6e8c65f6 623 comp_cost cand_use_cost;
b1b02be2
ZD
624
625 /* Total cost of candidates. */
626 unsigned cand_cost;
627
0ca91c77
BC
628 /* Number of times each invariant variable is used. */
629 unsigned *n_inv_var_uses;
b1b02be2 630
0ca91c77
BC
631 /* Number of times each invariant expression is used. */
632 unsigned *n_inv_expr_uses;
f06e400f 633
b1b02be2 634 /* Total cost of the assignment. */
6e8c65f6 635 comp_cost cost;
b1b02be2
ZD
636};
637
638/* Difference of two iv candidate assignments. */
639
640struct iv_ca_delta
641{
309a0cf6
BC
642 /* Changed group. */
643 struct iv_group *group;
b1b02be2
ZD
644
645 /* An old assignment (for rollback purposes). */
646 struct cost_pair *old_cp;
647
648 /* A new assignment. */
649 struct cost_pair *new_cp;
650
651 /* Next change in the list. */
309a0cf6 652 struct iv_ca_delta *next;
b1b02be2
ZD
653};
654
8b11a64c
ZD
655/* Bound on number of candidates below that all candidates are considered. */
656
657#define CONSIDER_ALL_CANDIDATES_BOUND \
658 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
659
2a7e31df 660/* If there are more iv occurrences, we just give up (it is quite unlikely that
8b11a64c
ZD
661 optimizing such a loop would help, and it would take ages). */
662
309a0cf6 663#define MAX_CONSIDERED_GROUPS \
8b11a64c
ZD
664 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
665
36f5ada1
ZD
666/* If there are at most this number of ivs in the set, try removing unnecessary
667 ivs from the set always. */
668
669#define ALWAYS_PRUNE_CAND_SET_BOUND \
670 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
671
8b11a64c
ZD
672/* The list of trees for that the decl_rtl field must be reset is stored
673 here. */
674
9771b263 675static vec<tree> decl_rtl_to_reset;
8b11a64c 676
e6450c11
TV
677static comp_cost force_expr_to_var_cost (tree, bool);
678
8b11a64c
ZD
679/* The single loop exit if it dominates the latch, NULL otherwise. */
680
b7eae7b8 681edge
8b11a64c
ZD
682single_dom_exit (struct loop *loop)
683{
ac8f6c69 684 edge exit = single_exit (loop);
8b11a64c
ZD
685
686 if (!exit)
687 return NULL;
688
689 if (!just_once_each_iteration_p (loop, exit->src))
690 return NULL;
691
692 return exit;
693}
694
309a0cf6
BC
695/* Dumps information about the induction variable IV to FILE. Don't dump
696 variable's name if DUMP_NAME is FALSE. The information is dumped with
697 preceding spaces indicated by INDENT_LEVEL. */
8b11a64c 698
8b11a64c 699void
309a0cf6 700dump_iv (FILE *file, struct iv *iv, bool dump_name, unsigned indent_level)
8b11a64c 701{
309a0cf6
BC
702 const char *p;
703 const char spaces[9] = {' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ', '\0'};
704
705 if (indent_level > 4)
706 indent_level = 4;
707 p = spaces + 8 - (indent_level << 1);
708
709 fprintf (file, "%sIV struct:\n", p);
e185f450 710 if (iv->ssa_name && dump_name)
e6845c23 711 {
309a0cf6 712 fprintf (file, "%s SSA_NAME:\t", p);
e6845c23
ZD
713 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
714 fprintf (file, "\n");
715 }
8b11a64c 716
309a0cf6 717 fprintf (file, "%s Type:\t", p);
2f4675b4
ZD
718 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
719 fprintf (file, "\n");
720
309a0cf6
BC
721 fprintf (file, "%s Base:\t", p);
722 print_generic_expr (file, iv->base, TDF_SLIM);
723 fprintf (file, "\n");
8b11a64c 724
309a0cf6
BC
725 fprintf (file, "%s Step:\t", p);
726 print_generic_expr (file, iv->step, TDF_SLIM);
727 fprintf (file, "\n");
8b11a64c 728
e6845c23
ZD
729 if (iv->base_object)
730 {
309a0cf6 731 fprintf (file, "%s Object:\t", p);
e6845c23
ZD
732 print_generic_expr (file, iv->base_object, TDF_SLIM);
733 fprintf (file, "\n");
734 }
735
309a0cf6 736 fprintf (file, "%s Biv:\t%c\n", p, iv->biv_p ? 'Y' : 'N');
e4142529 737
309a0cf6
BC
738 fprintf (file, "%s Overflowness wrto loop niter:\t%s\n",
739 p, iv->no_overflow ? "No-overflow" : "Overflow");
8b11a64c
ZD
740}
741
742/* Dumps information about the USE to FILE. */
743
8b11a64c
ZD
744void
745dump_use (FILE *file, struct iv_use *use)
746{
309a0cf6
BC
747 fprintf (file, " Use %d.%d:\n", use->group_id, use->id);
748 fprintf (file, " At stmt:\t");
ef6cb4c7 749 print_gimple_stmt (file, use->stmt, 0);
309a0cf6 750 fprintf (file, " At pos:\t");
2f4675b4
ZD
751 if (use->op_p)
752 print_generic_expr (file, *use->op_p, TDF_SLIM);
753 fprintf (file, "\n");
309a0cf6 754 dump_iv (file, use->iv, false, 2);
8b11a64c
ZD
755}
756
757/* Dumps information about the uses to FILE. */
758
8b11a64c 759void
309a0cf6 760dump_groups (FILE *file, struct ivopts_data *data)
8b11a64c 761{
309a0cf6
BC
762 unsigned i, j;
763 struct iv_group *group;
8b11a64c 764
309a0cf6 765 for (i = 0; i < data->vgroups.length (); i++)
8b11a64c 766 {
309a0cf6
BC
767 group = data->vgroups[i];
768 fprintf (file, "Group %d:\n", group->id);
769 if (group->type == USE_NONLINEAR_EXPR)
770 fprintf (file, " Type:\tGENERIC\n");
771 else if (group->type == USE_ADDRESS)
772 fprintf (file, " Type:\tADDRESS\n");
773 else
a7e43c57 774 {
309a0cf6
BC
775 gcc_assert (group->type == USE_COMPARE);
776 fprintf (file, " Type:\tCOMPARE\n");
a7e43c57 777 }
309a0cf6
BC
778 for (j = 0; j < group->vuses.length (); j++)
779 dump_use (file, group->vuses[j]);
8b11a64c
ZD
780 }
781}
782
783/* Dumps information about induction variable candidate CAND to FILE. */
784
8b11a64c
ZD
785void
786dump_cand (FILE *file, struct iv_cand *cand)
787{
788 struct iv *iv = cand->iv;
789
309a0cf6 790 fprintf (file, "Candidate %d:\n", cand->id);
0ca91c77 791 if (cand->inv_vars)
9be872b7 792 {
0ca91c77
BC
793 fprintf (file, " Depend on inv.vars: ");
794 dump_bitmap (file, cand->inv_vars);
9be872b7 795 }
4c11bdff
BC
796 if (cand->inv_exprs)
797 {
798 fprintf (file, " Depend on inv.exprs: ");
799 dump_bitmap (file, cand->inv_exprs);
800 }
9be872b7 801
18081149
XDL
802 if (cand->var_before)
803 {
309a0cf6 804 fprintf (file, " Var befor: ");
18081149
XDL
805 print_generic_expr (file, cand->var_before, TDF_SLIM);
806 fprintf (file, "\n");
807 }
808 if (cand->var_after)
809 {
309a0cf6 810 fprintf (file, " Var after: ");
18081149
XDL
811 print_generic_expr (file, cand->var_after, TDF_SLIM);
812 fprintf (file, "\n");
813 }
814
8b11a64c
ZD
815 switch (cand->pos)
816 {
817 case IP_NORMAL:
309a0cf6 818 fprintf (file, " Incr POS: before exit test\n");
8b11a64c
ZD
819 break;
820
2c08497a 821 case IP_BEFORE_USE:
309a0cf6 822 fprintf (file, " Incr POS: before use %d\n", cand->ainc_use->id);
2c08497a
BS
823 break;
824
825 case IP_AFTER_USE:
309a0cf6 826 fprintf (file, " Incr POS: after use %d\n", cand->ainc_use->id);
2c08497a
BS
827 break;
828
8b11a64c 829 case IP_END:
309a0cf6 830 fprintf (file, " Incr POS: at end\n");
8b11a64c
ZD
831 break;
832
833 case IP_ORIGINAL:
309a0cf6 834 fprintf (file, " Incr POS: orig biv\n");
8b11a64c
ZD
835 break;
836 }
837
309a0cf6 838 dump_iv (file, iv, false, 1);
8b11a64c
ZD
839}
840
841/* Returns the info for ssa version VER. */
842
843static inline struct version_info *
844ver_info (struct ivopts_data *data, unsigned ver)
845{
846 return data->version_info + ver;
847}
848
849/* Returns the info for ssa name NAME. */
850
851static inline struct version_info *
852name_info (struct ivopts_data *data, tree name)
853{
854 return ver_info (data, SSA_NAME_VERSION (name));
855}
856
8b11a64c
ZD
857/* Returns true if STMT is after the place where the IP_NORMAL ivs will be
858 emitted in LOOP. */
859
860static bool
355fe088 861stmt_after_ip_normal_pos (struct loop *loop, gimple *stmt)
8b11a64c 862{
726a989a 863 basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
8b11a64c 864
1e128c5f 865 gcc_assert (bb);
8b11a64c
ZD
866
867 if (sbb == loop->latch)
868 return true;
869
870 if (sbb != bb)
871 return false;
872
873 return stmt == last_stmt (bb);
874}
875
876/* Returns true if STMT if after the place where the original induction
2c08497a
BS
877 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
878 if the positions are identical. */
8b11a64c
ZD
879
880static bool
355fe088 881stmt_after_inc_pos (struct iv_cand *cand, gimple *stmt, bool true_if_equal)
8b11a64c 882{
726a989a
RB
883 basic_block cand_bb = gimple_bb (cand->incremented_at);
884 basic_block stmt_bb = gimple_bb (stmt);
8b11a64c
ZD
885
886 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
887 return false;
888
889 if (stmt_bb != cand_bb)
890 return true;
891
2c08497a
BS
892 if (true_if_equal
893 && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
894 return true;
895 return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
8b11a64c
ZD
896}
897
898/* Returns true if STMT if after the place where the induction variable
899 CAND is incremented in LOOP. */
900
901static bool
355fe088 902stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple *stmt)
8b11a64c
ZD
903{
904 switch (cand->pos)
905 {
906 case IP_END:
907 return false;
908
909 case IP_NORMAL:
910 return stmt_after_ip_normal_pos (loop, stmt);
911
912 case IP_ORIGINAL:
2c08497a
BS
913 case IP_AFTER_USE:
914 return stmt_after_inc_pos (cand, stmt, false);
915
916 case IP_BEFORE_USE:
917 return stmt_after_inc_pos (cand, stmt, true);
8b11a64c
ZD
918
919 default:
1e128c5f 920 gcc_unreachable ();
8b11a64c
ZD
921 }
922}
923
dcccd88d
ZD
924/* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
925
926static bool
927abnormal_ssa_name_p (tree exp)
928{
929 if (!exp)
930 return false;
931
932 if (TREE_CODE (exp) != SSA_NAME)
933 return false;
934
935 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
936}
937
938/* Returns false if BASE or INDEX contains a ssa name that occurs in an
939 abnormal phi node. Callback for for_each_index. */
940
941static bool
942idx_contains_abnormal_ssa_name_p (tree base, tree *index,
943 void *data ATTRIBUTE_UNUSED)
944{
9f7ccf69 945 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
dcccd88d
ZD
946 {
947 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
948 return false;
949 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
950 return false;
951 }
952
953 return !abnormal_ssa_name_p (*index);
954}
955
956/* Returns true if EXPR contains a ssa name that occurs in an
957 abnormal phi node. */
958
e5db3515 959bool
dcccd88d
ZD
960contains_abnormal_ssa_name_p (tree expr)
961{
962 enum tree_code code;
c22940cd 963 enum tree_code_class codeclass;
dcccd88d
ZD
964
965 if (!expr)
966 return false;
967
968 code = TREE_CODE (expr);
c22940cd 969 codeclass = TREE_CODE_CLASS (code);
dcccd88d
ZD
970
971 if (code == SSA_NAME)
972 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
973
974 if (code == INTEGER_CST
975 || is_gimple_min_invariant (expr))
976 return false;
977
978 if (code == ADDR_EXPR)
979 return !for_each_index (&TREE_OPERAND (expr, 0),
980 idx_contains_abnormal_ssa_name_p,
981 NULL);
982
0a74c758
SP
983 if (code == COND_EXPR)
984 return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
985 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
986 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
987
c22940cd 988 switch (codeclass)
dcccd88d
ZD
989 {
990 case tcc_binary:
991 case tcc_comparison:
992 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
993 return true;
994
995 /* Fallthru. */
996 case tcc_unary:
997 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
998 return true;
999
1000 break;
1001
1002 default:
1003 gcc_unreachable ();
1004 }
1005
1006 return false;
1007}
1008
d8af4ba3 1009/* Returns the structure describing number of iterations determined from
ca4c3169
ZD
1010 EXIT of DATA->current_loop, or NULL if something goes wrong. */
1011
d8af4ba3
ZD
1012static struct tree_niter_desc *
1013niter_for_exit (struct ivopts_data *data, edge exit)
ca4c3169 1014{
d8af4ba3 1015 struct tree_niter_desc *desc;
b787e7a2 1016 tree_niter_desc **slot;
ca4c3169 1017
15814ba0 1018 if (!data->niters)
ca4c3169 1019 {
b787e7a2 1020 data->niters = new hash_map<edge, tree_niter_desc *>;
15814ba0
PB
1021 slot = NULL;
1022 }
1023 else
b787e7a2 1024 slot = data->niters->get (exit);
dcccd88d 1025
15814ba0
PB
1026 if (!slot)
1027 {
d8af4ba3 1028 /* Try to determine number of iterations. We cannot safely work with ssa
623b8e0a
ML
1029 names that appear in phi nodes on abnormal edges, so that we do not
1030 create overlapping life ranges for them (PR 27283). */
e2102efc 1031 desc = XNEW (struct tree_niter_desc);
d8af4ba3
ZD
1032 if (!number_of_iterations_exit (data->current_loop,
1033 exit, desc, true)
1034 || contains_abnormal_ssa_name_p (desc->niter))
1035 {
1036 XDELETE (desc);
1037 desc = NULL;
1038 }
b787e7a2 1039 data->niters->put (exit, desc);
ca4c3169
ZD
1040 }
1041 else
b787e7a2 1042 desc = *slot;
ca4c3169 1043
d8af4ba3 1044 return desc;
ca4c3169
ZD
1045}
1046
d8af4ba3 1047/* Returns the structure describing number of iterations determined from
ca4c3169
ZD
1048 single dominating exit of DATA->current_loop, or NULL if something
1049 goes wrong. */
1050
d8af4ba3 1051static struct tree_niter_desc *
ca4c3169
ZD
1052niter_for_single_dom_exit (struct ivopts_data *data)
1053{
1054 edge exit = single_dom_exit (data->current_loop);
1055
1056 if (!exit)
1057 return NULL;
1058
d8af4ba3 1059 return niter_for_exit (data, exit);
ca4c3169
ZD
1060}
1061
8b11a64c 1062/* Initializes data structures used by the iv optimization pass, stored
9a2ef6b8 1063 in DATA. */
8b11a64c
ZD
1064
1065static void
9a2ef6b8 1066tree_ssa_iv_optimize_init (struct ivopts_data *data)
8b11a64c 1067{
8b11a64c 1068 data->version_info_size = 2 * num_ssa_names;
5ed6ace5 1069 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
8bdbfff5
NS
1070 data->relevant = BITMAP_ALLOC (NULL);
1071 data->important_candidates = BITMAP_ALLOC (NULL);
0ca91c77
BC
1072 data->max_inv_var_id = 0;
1073 data->max_inv_expr_id = 0;
15814ba0 1074 data->niters = NULL;
309a0cf6
BC
1075 data->vgroups.create (20);
1076 data->vcands.create (20);
c203e8a7 1077 data->inv_expr_tab = new hash_table<iv_inv_expr_hasher> (10);
3230c614 1078 data->name_expansion_cache = NULL;
cf5b92ef
BC
1079 data->iv_common_cand_tab = new hash_table<iv_common_cand_hasher> (10);
1080 data->iv_common_cands.create (20);
9771b263 1081 decl_rtl_to_reset.create (20);
6f929985 1082 gcc_obstack_init (&data->iv_obstack);
8b11a64c
ZD
1083}
1084
e6845c23
ZD
1085/* Returns a memory object to that EXPR points. In case we are able to
1086 determine that it does not point to any such object, NULL is returned. */
1087
1088static tree
1089determine_base_object (tree expr)
1090{
1091 enum tree_code code = TREE_CODE (expr);
5be014d5 1092 tree base, obj;
e6845c23 1093
975626a7
ZD
1094 /* If this is a pointer casted to any type, we need to determine
1095 the base object for the pointer; so handle conversions before
1096 throwing away non-pointer expressions. */
1043771b 1097 if (CONVERT_EXPR_P (expr))
975626a7
ZD
1098 return determine_base_object (TREE_OPERAND (expr, 0));
1099
e6845c23
ZD
1100 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
1101 return NULL_TREE;
1102
1103 switch (code)
1104 {
1105 case INTEGER_CST:
1106 return NULL_TREE;
1107
1108 case ADDR_EXPR:
1109 obj = TREE_OPERAND (expr, 0);
1110 base = get_base_address (obj);
1111
1112 if (!base)
f5e2738c 1113 return expr;
e6845c23 1114
70f34814 1115 if (TREE_CODE (base) == MEM_REF)
f5e2738c 1116 return determine_base_object (TREE_OPERAND (base, 0));
7299dbfb 1117
62b37d91 1118 return fold_convert (ptr_type_node,
623b8e0a 1119 build_fold_addr_expr (base));
e6845c23 1120
5be014d5
AP
1121 case POINTER_PLUS_EXPR:
1122 return determine_base_object (TREE_OPERAND (expr, 0));
1123
e6845c23
ZD
1124 case PLUS_EXPR:
1125 case MINUS_EXPR:
5be014d5
AP
1126 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
1127 gcc_unreachable ();
e6845c23
ZD
1128
1129 default:
1130 return fold_convert (ptr_type_node, expr);
1131 }
1132}
1133
be9a0da5
BC
1134/* Return true if address expression with non-DECL_P operand appears
1135 in EXPR. */
1136
1137static bool
1138contain_complex_addr_expr (tree expr)
1139{
1140 bool res = false;
1141
1142 STRIP_NOPS (expr);
1143 switch (TREE_CODE (expr))
1144 {
1145 case POINTER_PLUS_EXPR:
1146 case PLUS_EXPR:
1147 case MINUS_EXPR:
1148 res |= contain_complex_addr_expr (TREE_OPERAND (expr, 0));
1149 res |= contain_complex_addr_expr (TREE_OPERAND (expr, 1));
1150 break;
1151
1152 case ADDR_EXPR:
1153 return (!DECL_P (TREE_OPERAND (expr, 0)));
1154
1155 default:
1156 return false;
1157 }
1158
1159 return res;
1160}
1161
8b11a64c 1162/* Allocates an induction variable with given initial value BASE and step STEP
c70ed622 1163 for loop LOOP. NO_OVERFLOW implies the iv doesn't overflow. */
8b11a64c
ZD
1164
1165static struct iv *
6f929985
BC
1166alloc_iv (struct ivopts_data *data, tree base, tree step,
1167 bool no_overflow = false)
8b11a64c 1168{
be9a0da5 1169 tree expr = base;
6f929985
BC
1170 struct iv *iv = (struct iv*) obstack_alloc (&data->iv_obstack,
1171 sizeof (struct iv));
6e42ce54 1172 gcc_assert (step != NULL_TREE);
8b11a64c 1173
be9a0da5 1174 /* Lower address expression in base except ones with DECL_P as operand.
be8c1c8c
BC
1175 By doing this:
1176 1) More accurate cost can be computed for address expressions;
1177 2) Duplicate candidates won't be created for bases in different
623b8e0a 1178 forms, like &a[0] and &a. */
be9a0da5
BC
1179 STRIP_NOPS (expr);
1180 if ((TREE_CODE (expr) == ADDR_EXPR && !DECL_P (TREE_OPERAND (expr, 0)))
1181 || contain_complex_addr_expr (expr))
be8c1c8c
BC
1182 {
1183 aff_tree comb;
aac69a62 1184 tree_to_aff_combination (expr, TREE_TYPE (expr), &comb);
be8c1c8c
BC
1185 base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
1186 }
1187
8b11a64c 1188 iv->base = base;
be9a0da5 1189 iv->base_object = determine_base_object (base);
8b11a64c
ZD
1190 iv->step = step;
1191 iv->biv_p = false;
309a0cf6 1192 iv->nonlin_use = NULL;
8b11a64c 1193 iv->ssa_name = NULL_TREE;
019d6598
JH
1194 if (!no_overflow
1195 && !iv_can_overflow_p (data->current_loop, TREE_TYPE (base),
1196 base, step))
1197 no_overflow = true;
c70ed622 1198 iv->no_overflow = no_overflow;
e4142529 1199 iv->have_address_use = false;
8b11a64c
ZD
1200
1201 return iv;
1202}
1203
c70ed622
BC
1204/* Sets STEP and BASE for induction variable IV. NO_OVERFLOW implies the IV
1205 doesn't overflow. */
8b11a64c
ZD
1206
1207static void
c70ed622
BC
1208set_iv (struct ivopts_data *data, tree iv, tree base, tree step,
1209 bool no_overflow)
8b11a64c
ZD
1210{
1211 struct version_info *info = name_info (data, iv);
1212
1e128c5f 1213 gcc_assert (!info->iv);
8b11a64c
ZD
1214
1215 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
6f929985 1216 info->iv = alloc_iv (data, base, step, no_overflow);
8b11a64c
ZD
1217 info->iv->ssa_name = iv;
1218}
1219
1220/* Finds induction variable declaration for VAR. */
1221
1222static struct iv *
1223get_iv (struct ivopts_data *data, tree var)
1224{
1225 basic_block bb;
6e42ce54
ZD
1226 tree type = TREE_TYPE (var);
1227
1228 if (!POINTER_TYPE_P (type)
1229 && !INTEGRAL_TYPE_P (type))
1230 return NULL;
1231
8b11a64c
ZD
1232 if (!name_info (data, var)->iv)
1233 {
726a989a 1234 bb = gimple_bb (SSA_NAME_DEF_STMT (var));
8b11a64c
ZD
1235
1236 if (!bb
1237 || !flow_bb_inside_loop_p (data->current_loop, bb))
c70ed622 1238 set_iv (data, var, var, build_int_cst (type, 0), true);
8b11a64c
ZD
1239 }
1240
1241 return name_info (data, var)->iv;
1242}
1243
fc06280e
BC
1244/* Return the first non-invariant ssa var found in EXPR. */
1245
1246static tree
1247extract_single_var_from_expr (tree expr)
1248{
1249 int i, n;
1250 tree tmp;
1251 enum tree_code code;
1252
1253 if (!expr || is_gimple_min_invariant (expr))
1254 return NULL;
1255
1256 code = TREE_CODE (expr);
1257 if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1258 {
1259 n = TREE_OPERAND_LENGTH (expr);
1260 for (i = 0; i < n; i++)
1261 {
1262 tmp = extract_single_var_from_expr (TREE_OPERAND (expr, i));
1263
1264 if (tmp)
1265 return tmp;
1266 }
1267 }
1268 return (TREE_CODE (expr) == SSA_NAME) ? expr : NULL;
1269}
1270
8b11a64c
ZD
1271/* Finds basic ivs. */
1272
1273static bool
1274find_bivs (struct ivopts_data *data)
1275{
538dd0b7 1276 gphi *phi;
c70ed622 1277 affine_iv iv;
fc06280e 1278 tree step, type, base, stop;
8b11a64c
ZD
1279 bool found = false;
1280 struct loop *loop = data->current_loop;
538dd0b7 1281 gphi_iterator psi;
8b11a64c 1282
726a989a 1283 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
8b11a64c 1284 {
538dd0b7 1285 phi = psi.phi ();
726a989a 1286
8b11a64c
ZD
1287 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1288 continue;
1289
c70ed622 1290 if (virtual_operand_p (PHI_RESULT (phi)))
8b11a64c 1291 continue;
8b11a64c 1292
c70ed622
BC
1293 if (!simple_iv (loop, loop, PHI_RESULT (phi), &iv, true))
1294 continue;
1295
1296 if (integer_zerop (iv.step))
1297 continue;
1298
1299 step = iv.step;
8b11a64c 1300 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
fc06280e
BC
1301 /* Stop expanding iv base at the first ssa var referred by iv step.
1302 Ideally we should stop at any ssa var, because that's expensive
1303 and unusual to happen, we just do it on the first one.
1304
1305 See PR64705 for the rationale. */
1306 stop = extract_single_var_from_expr (step);
1307 base = expand_simple_operations (base, stop);
9be872b7
ZD
1308 if (contains_abnormal_ssa_name_p (base)
1309 || contains_abnormal_ssa_name_p (step))
8b11a64c
ZD
1310 continue;
1311
1312 type = TREE_TYPE (PHI_RESULT (phi));
1313 base = fold_convert (type, base);
9be872b7 1314 if (step)
1ffe34d9
AP
1315 {
1316 if (POINTER_TYPE_P (type))
0d82a1c8 1317 step = convert_to_ptrofftype (step);
1ffe34d9
AP
1318 else
1319 step = fold_convert (type, step);
1320 }
8b11a64c 1321
c70ed622 1322 set_iv (data, PHI_RESULT (phi), base, step, iv.no_overflow);
8b11a64c
ZD
1323 found = true;
1324 }
1325
1326 return found;
1327}
1328
1329/* Marks basic ivs. */
1330
1331static void
1332mark_bivs (struct ivopts_data *data)
1333{
538dd0b7 1334 gphi *phi;
355fe088 1335 gimple *def;
726a989a 1336 tree var;
8b11a64c
ZD
1337 struct iv *iv, *incr_iv;
1338 struct loop *loop = data->current_loop;
1339 basic_block incr_bb;
538dd0b7 1340 gphi_iterator psi;
8b11a64c 1341
e4142529 1342 data->bivs_not_used_in_addr = 0;
726a989a 1343 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
8b11a64c 1344 {
538dd0b7 1345 phi = psi.phi ();
726a989a 1346
8b11a64c
ZD
1347 iv = get_iv (data, PHI_RESULT (phi));
1348 if (!iv)
1349 continue;
1350
1351 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
b83b5507
BC
1352 def = SSA_NAME_DEF_STMT (var);
1353 /* Don't mark iv peeled from other one as biv. */
1354 if (def
1355 && gimple_code (def) == GIMPLE_PHI
1356 && gimple_bb (def) == loop->header)
1357 continue;
1358
8b11a64c
ZD
1359 incr_iv = get_iv (data, var);
1360 if (!incr_iv)
1361 continue;
1362
1363 /* If the increment is in the subloop, ignore it. */
726a989a 1364 incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
8b11a64c
ZD
1365 if (incr_bb->loop_father != data->current_loop
1366 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1367 continue;
1368
1369 iv->biv_p = true;
1370 incr_iv->biv_p = true;
e4142529
BC
1371 if (iv->no_overflow)
1372 data->bivs_not_used_in_addr++;
1373 if (incr_iv->no_overflow)
1374 data->bivs_not_used_in_addr++;
8b11a64c
ZD
1375 }
1376}
1377
1378/* Checks whether STMT defines a linear induction variable and stores its
a6f778b2 1379 parameters to IV. */
8b11a64c
ZD
1380
1381static bool
355fe088 1382find_givs_in_stmt_scev (struct ivopts_data *data, gimple *stmt, affine_iv *iv)
8b11a64c 1383{
fc06280e 1384 tree lhs, stop;
8b11a64c
ZD
1385 struct loop *loop = data->current_loop;
1386
a6f778b2
ZD
1387 iv->base = NULL_TREE;
1388 iv->step = NULL_TREE;
8b11a64c 1389
726a989a 1390 if (gimple_code (stmt) != GIMPLE_ASSIGN)
8b11a64c
ZD
1391 return false;
1392
726a989a 1393 lhs = gimple_assign_lhs (stmt);
8b11a64c
ZD
1394 if (TREE_CODE (lhs) != SSA_NAME)
1395 return false;
1396
f017bf5e 1397 if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
8b11a64c
ZD
1398 return false;
1399
fc06280e
BC
1400 /* Stop expanding iv base at the first ssa var referred by iv step.
1401 Ideally we should stop at any ssa var, because that's expensive
1402 and unusual to happen, we just do it on the first one.
1403
1404 See PR64705 for the rationale. */
1405 stop = extract_single_var_from_expr (iv->step);
1406 iv->base = expand_simple_operations (iv->base, stop);
a6f778b2
ZD
1407 if (contains_abnormal_ssa_name_p (iv->base)
1408 || contains_abnormal_ssa_name_p (iv->step))
8b11a64c
ZD
1409 return false;
1410
fc06280e 1411 /* If STMT could throw, then do not consider STMT as defining a GIV.
9f9ca914
JL
1412 While this will suppress optimizations, we can not safely delete this
1413 GIV and associated statements, even if it appears it is not used. */
1414 if (stmt_could_throw_p (stmt))
1415 return false;
1416
8b11a64c
ZD
1417 return true;
1418}
1419
1420/* Finds general ivs in statement STMT. */
1421
1422static void
355fe088 1423find_givs_in_stmt (struct ivopts_data *data, gimple *stmt)
8b11a64c 1424{
a6f778b2 1425 affine_iv iv;
8b11a64c 1426
a6f778b2 1427 if (!find_givs_in_stmt_scev (data, stmt, &iv))
8b11a64c
ZD
1428 return;
1429
c70ed622 1430 set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step, iv.no_overflow);
8b11a64c
ZD
1431}
1432
1433/* Finds general ivs in basic block BB. */
1434
1435static void
1436find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1437{
726a989a 1438 gimple_stmt_iterator bsi;
8b11a64c 1439
726a989a
RB
1440 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1441 find_givs_in_stmt (data, gsi_stmt (bsi));
8b11a64c
ZD
1442}
1443
1444/* Finds general ivs. */
1445
1446static void
1447find_givs (struct ivopts_data *data)
1448{
1449 struct loop *loop = data->current_loop;
1450 basic_block *body = get_loop_body_in_dom_order (loop);
1451 unsigned i;
1452
1453 for (i = 0; i < loop->num_nodes; i++)
1454 find_givs_in_bb (data, body[i]);
1455 free (body);
1456}
1457
8b11a64c
ZD
1458/* For each ssa name defined in LOOP determines whether it is an induction
1459 variable and if so, its initial value and step. */
1460
1461static bool
1462find_induction_variables (struct ivopts_data *data)
1463{
1464 unsigned i;
87c476a2 1465 bitmap_iterator bi;
8b11a64c
ZD
1466
1467 if (!find_bivs (data))
1468 return false;
1469
1470 find_givs (data);
1471 mark_bivs (data);
8b11a64c
ZD
1472
1473 if (dump_file && (dump_flags & TDF_DETAILS))
1474 {
d8af4ba3 1475 struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
ca4c3169
ZD
1476
1477 if (niter)
8b11a64c
ZD
1478 {
1479 fprintf (dump_file, " number of iterations ");
d8af4ba3
ZD
1480 print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1481 if (!integer_zerop (niter->may_be_zero))
1482 {
1483 fprintf (dump_file, "; zero if ");
1484 print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1485 }
309a0cf6
BC
1486 fprintf (dump_file, "\n");
1487 };
8b11a64c 1488
309a0cf6 1489 fprintf (dump_file, "\n<Induction Vars>:\n");
87c476a2 1490 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
8b11a64c 1491 {
309a0cf6
BC
1492 struct version_info *info = ver_info (data, i);
1493 if (info->iv && info->iv->step && !integer_zerop (info->iv->step))
1494 dump_iv (dump_file, ver_info (data, i)->iv, true, 0);
87c476a2 1495 }
8b11a64c
ZD
1496 }
1497
1498 return true;
1499}
1500
309a0cf6 1501/* Records a use of TYPE at *USE_P in STMT whose value is IV in GROUP.
a7e43c57 1502 For address type use, ADDR_BASE is the stripped IV base, ADDR_OFFSET
309a0cf6
BC
1503 is the const offset stripped from IV base; for other types use, both
1504 are zero by default. */
8b11a64c
ZD
1505
1506static struct iv_use *
309a0cf6
BC
1507record_use (struct iv_group *group, tree *use_p, struct iv *iv,
1508 gimple *stmt, enum use_type type, tree addr_base,
1509 unsigned HOST_WIDE_INT addr_offset)
8b11a64c 1510{
5ed6ace5 1511 struct iv_use *use = XCNEW (struct iv_use);
8b11a64c 1512
309a0cf6
BC
1513 use->id = group->vuses.length ();
1514 use->group_id = group->id;
1515 use->type = type;
8b11a64c
ZD
1516 use->iv = iv;
1517 use->stmt = stmt;
1518 use->op_p = use_p;
a7e43c57
BC
1519 use->addr_base = addr_base;
1520 use->addr_offset = addr_offset;
8b11a64c 1521
309a0cf6 1522 group->vuses.safe_push (use);
a7e43c57
BC
1523 return use;
1524}
1525
8b11a64c
ZD
1526/* Checks whether OP is a loop-level invariant and if so, records it.
1527 NONLINEAR_USE is true if the invariant is used in a way we do not
1528 handle specially. */
1529
1530static void
1531record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1532{
1533 basic_block bb;
1534 struct version_info *info;
1535
1536 if (TREE_CODE (op) != SSA_NAME
ea057359 1537 || virtual_operand_p (op))
8b11a64c
ZD
1538 return;
1539
726a989a 1540 bb = gimple_bb (SSA_NAME_DEF_STMT (op));
8b11a64c
ZD
1541 if (bb
1542 && flow_bb_inside_loop_p (data->current_loop, bb))
1543 return;
1544
1545 info = name_info (data, op);
1546 info->name = op;
1547 info->has_nonlin_use |= nonlinear_use;
1548 if (!info->inv_id)
0ca91c77 1549 info->inv_id = ++data->max_inv_var_id;
8b11a64c
ZD
1550 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1551}
1552
309a0cf6
BC
1553/* Record a group of TYPE. */
1554
1555static struct iv_group *
1556record_group (struct ivopts_data *data, enum use_type type)
1557{
1558 struct iv_group *group = XCNEW (struct iv_group);
1559
1560 group->id = data->vgroups.length ();
1561 group->type = type;
1562 group->related_cands = BITMAP_ALLOC (NULL);
1563 group->vuses.create (1);
1564
1565 data->vgroups.safe_push (group);
1566 return group;
1567}
1568
1569/* Record a use of TYPE at *USE_P in STMT whose value is IV in a group.
1570 New group will be created if there is no existing group for the use. */
1571
1572static struct iv_use *
1573record_group_use (struct ivopts_data *data, tree *use_p,
1574 struct iv *iv, gimple *stmt, enum use_type type)
1575{
1576 tree addr_base = NULL;
1577 struct iv_group *group = NULL;
1578 unsigned HOST_WIDE_INT addr_offset = 0;
1579
1580 /* Record non address type use in a new group. */
1581 if (type == USE_ADDRESS && iv->base_object)
1582 {
1583 unsigned int i;
1584
1585 addr_base = strip_offset (iv->base, &addr_offset);
1586 for (i = 0; i < data->vgroups.length (); i++)
1587 {
1588 struct iv_use *use;
1589
1590 group = data->vgroups[i];
1591 use = group->vuses[0];
1592 if (use->type != USE_ADDRESS || !use->iv->base_object)
1593 continue;
1594
1595 /* Check if it has the same stripped base and step. */
1596 if (operand_equal_p (iv->base_object, use->iv->base_object, 0)
1597 && operand_equal_p (iv->step, use->iv->step, 0)
1598 && operand_equal_p (addr_base, use->addr_base, 0))
1599 break;
1600 }
1601 if (i == data->vgroups.length ())
1602 group = NULL;
1603 }
1604
1605 if (!group)
1606 group = record_group (data, type);
1607
1608 return record_use (group, use_p, iv, stmt, type, addr_base, addr_offset);
1609}
1610
50cc9802 1611/* Checks whether the use OP is interesting and if so, records it. */
8b11a64c
ZD
1612
1613static struct iv_use *
50cc9802 1614find_interesting_uses_op (struct ivopts_data *data, tree op)
8b11a64c
ZD
1615{
1616 struct iv *iv;
355fe088 1617 gimple *stmt;
8b11a64c
ZD
1618 struct iv_use *use;
1619
1620 if (TREE_CODE (op) != SSA_NAME)
1621 return NULL;
1622
1623 iv = get_iv (data, op);
1624 if (!iv)
1625 return NULL;
b8698a0f 1626
309a0cf6 1627 if (iv->nonlin_use)
8b11a64c 1628 {
309a0cf6
BC
1629 gcc_assert (iv->nonlin_use->type == USE_NONLINEAR_EXPR);
1630 return iv->nonlin_use;
8b11a64c
ZD
1631 }
1632
6e42ce54 1633 if (integer_zerop (iv->step))
8b11a64c
ZD
1634 {
1635 record_invariant (data, op, true);
1636 return NULL;
1637 }
8b11a64c 1638
8b11a64c 1639 stmt = SSA_NAME_DEF_STMT (op);
309a0cf6 1640 gcc_assert (gimple_code (stmt) == GIMPLE_PHI || is_gimple_assign (stmt));
8b11a64c 1641
309a0cf6
BC
1642 use = record_group_use (data, NULL, iv, stmt, USE_NONLINEAR_EXPR);
1643 iv->nonlin_use = use;
8b11a64c
ZD
1644 return use;
1645}
1646
b6a2258f
BC
1647/* Indicate how compare type iv_use can be handled. */
1648enum comp_iv_rewrite
1649{
1650 COMP_IV_NA,
1651 /* We may rewrite compare type iv_use by expressing value of the iv_use. */
1652 COMP_IV_EXPR,
80ca1cfa
BC
1653 /* We may rewrite compare type iv_uses on both sides of comparison by
1654 expressing value of each iv_use. */
1655 COMP_IV_EXPR_2,
b6a2258f
BC
1656 /* We may rewrite compare type iv_use by expressing value of the iv_use
1657 or by eliminating it with other iv_cand. */
1658 COMP_IV_ELIM
1659};
1660
726a989a
RB
1661/* Given a condition in statement STMT, checks whether it is a compare
1662 of an induction variable and an invariant. If this is the case,
1663 CONTROL_VAR is set to location of the iv, BOUND to the location of
1664 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1665 induction variable descriptions, and true is returned. If this is not
1666 the case, CONTROL_VAR and BOUND are set to the arguments of the
1667 condition and false is returned. */
8b11a64c 1668
b6a2258f 1669static enum comp_iv_rewrite
355fe088 1670extract_cond_operands (struct ivopts_data *data, gimple *stmt,
b697aed4
ZD
1671 tree **control_var, tree **bound,
1672 struct iv **iv_var, struct iv **iv_bound)
1673{
726a989a 1674 /* The objects returned when COND has constant operands. */
b697aed4
ZD
1675 static struct iv const_iv;
1676 static tree zero;
6b4db501
MM
1677 tree *op0 = &zero, *op1 = &zero;
1678 struct iv *iv0 = &const_iv, *iv1 = &const_iv;
b6a2258f 1679 enum comp_iv_rewrite rewrite_type = COMP_IV_NA;
b697aed4 1680
726a989a 1681 if (gimple_code (stmt) == GIMPLE_COND)
8b11a64c 1682 {
538dd0b7
DM
1683 gcond *cond_stmt = as_a <gcond *> (stmt);
1684 op0 = gimple_cond_lhs_ptr (cond_stmt);
1685 op1 = gimple_cond_rhs_ptr (cond_stmt);
8b11a64c 1686 }
726a989a 1687 else
8b11a64c 1688 {
726a989a
RB
1689 op0 = gimple_assign_rhs1_ptr (stmt);
1690 op1 = gimple_assign_rhs2_ptr (stmt);
8b11a64c
ZD
1691 }
1692
726a989a
RB
1693 zero = integer_zero_node;
1694 const_iv.step = integer_zero_node;
1695
b697aed4
ZD
1696 if (TREE_CODE (*op0) == SSA_NAME)
1697 iv0 = get_iv (data, *op0);
1698 if (TREE_CODE (*op1) == SSA_NAME)
1699 iv1 = get_iv (data, *op1);
8b11a64c 1700
80ca1cfa 1701 /* If both sides of comparison are IVs. We can express ivs on both end. */
b6a2258f 1702 if (iv0 && iv1 && !integer_zerop (iv0->step) && !integer_zerop (iv1->step))
80ca1cfa
BC
1703 {
1704 rewrite_type = COMP_IV_EXPR_2;
1705 goto end;
1706 }
b697aed4 1707
b6a2258f
BC
1708 /* If none side of comparison is IV. */
1709 if ((!iv0 || integer_zerop (iv0->step))
1710 && (!iv1 || integer_zerop (iv1->step)))
1711 goto end;
1712
1713 /* Control variable may be on the other side. */
1714 if (!iv0 || integer_zerop (iv0->step))
b697aed4 1715 {
6b4db501
MM
1716 std::swap (op0, op1);
1717 std::swap (iv0, iv1);
8b11a64c 1718 }
b6a2258f
BC
1719 /* If one side is IV and the other side isn't loop invariant. */
1720 if (!iv1)
1721 rewrite_type = COMP_IV_EXPR;
1722 /* If one side is IV and the other side is loop invariant. */
1723 else if (!integer_zerop (iv0->step) && integer_zerop (iv1->step))
1724 rewrite_type = COMP_IV_ELIM;
b697aed4
ZD
1725
1726end:
1727 if (control_var)
6f3d1a5e 1728 *control_var = op0;
b697aed4 1729 if (iv_var)
6f3d1a5e 1730 *iv_var = iv0;
b697aed4
ZD
1731 if (bound)
1732 *bound = op1;
1733 if (iv_bound)
1734 *iv_bound = iv1;
1735
b6a2258f 1736 return rewrite_type;
b697aed4
ZD
1737}
1738
726a989a
RB
1739/* Checks whether the condition in STMT is interesting and if so,
1740 records it. */
b697aed4
ZD
1741
1742static void
355fe088 1743find_interesting_uses_cond (struct ivopts_data *data, gimple *stmt)
b697aed4
ZD
1744{
1745 tree *var_p, *bound_p;
80ca1cfa 1746 struct iv *var_iv, *bound_iv;
b6a2258f 1747 enum comp_iv_rewrite ret;
8b11a64c 1748
80ca1cfa
BC
1749 ret = extract_cond_operands (data, stmt,
1750 &var_p, &bound_p, &var_iv, &bound_iv);
b6a2258f 1751 if (ret == COMP_IV_NA)
8b11a64c 1752 {
b697aed4
ZD
1753 find_interesting_uses_op (data, *var_p);
1754 find_interesting_uses_op (data, *bound_p);
8b11a64c
ZD
1755 return;
1756 }
1757
b6a2258f 1758 record_group_use (data, var_p, var_iv, stmt, USE_COMPARE);
80ca1cfa
BC
1759 /* Record compare type iv_use for iv on the other side of comparison. */
1760 if (ret == COMP_IV_EXPR_2)
1761 record_group_use (data, bound_p, bound_iv, stmt, USE_COMPARE);
8b11a64c
ZD
1762}
1763
4ba5ea11
RB
1764/* Returns the outermost loop EXPR is obviously invariant in
1765 relative to the loop LOOP, i.e. if all its operands are defined
1766 outside of the returned loop. Returns NULL if EXPR is not
1767 even obviously invariant in LOOP. */
1768
1769struct loop *
1770outermost_invariant_loop_for_expr (struct loop *loop, tree expr)
1771{
1772 basic_block def_bb;
1773 unsigned i, len;
1774
1775 if (is_gimple_min_invariant (expr))
1776 return current_loops->tree_root;
1777
1778 if (TREE_CODE (expr) == SSA_NAME)
1779 {
1780 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1781 if (def_bb)
1782 {
1783 if (flow_bb_inside_loop_p (loop, def_bb))
1784 return NULL;
1785 return superloop_at_depth (loop,
1786 loop_depth (def_bb->loop_father) + 1);
1787 }
1788
1789 return current_loops->tree_root;
1790 }
1791
1792 if (!EXPR_P (expr))
1793 return NULL;
1794
1795 unsigned maxdepth = 0;
1796 len = TREE_OPERAND_LENGTH (expr);
1797 for (i = 0; i < len; i++)
1798 {
1799 struct loop *ivloop;
1800 if (!TREE_OPERAND (expr, i))
1801 continue;
1802
1803 ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1804 if (!ivloop)
1805 return NULL;
1806 maxdepth = MAX (maxdepth, loop_depth (ivloop));
1807 }
1808
1809 return superloop_at_depth (loop, maxdepth);
1810}
1811
be35cf60 1812/* Returns true if expression EXPR is obviously invariant in LOOP,
6a732743
SP
1813 i.e. if all its operands are defined outside of the LOOP. LOOP
1814 should not be the function body. */
be35cf60 1815
feb075f4 1816bool
be35cf60
ZD
1817expr_invariant_in_loop_p (struct loop *loop, tree expr)
1818{
1819 basic_block def_bb;
1820 unsigned i, len;
1821
6a732743
SP
1822 gcc_assert (loop_depth (loop) > 0);
1823
be35cf60
ZD
1824 if (is_gimple_min_invariant (expr))
1825 return true;
1826
1827 if (TREE_CODE (expr) == SSA_NAME)
1828 {
726a989a 1829 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
be35cf60
ZD
1830 if (def_bb
1831 && flow_bb_inside_loop_p (loop, def_bb))
1832 return false;
1833
1834 return true;
1835 }
1836
726a989a 1837 if (!EXPR_P (expr))
be35cf60
ZD
1838 return false;
1839
5039610b 1840 len = TREE_OPERAND_LENGTH (expr);
be35cf60 1841 for (i = 0; i < len; i++)
837a549b
JH
1842 if (TREE_OPERAND (expr, i)
1843 && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
be35cf60
ZD
1844 return false;
1845
1846 return true;
1847}
1848
e4142529
BC
1849/* Given expression EXPR which computes inductive values with respect
1850 to loop recorded in DATA, this function returns biv from which EXPR
1851 is derived by tracing definition chains of ssa variables in EXPR. */
1852
1853static struct iv*
1854find_deriving_biv_for_expr (struct ivopts_data *data, tree expr)
1855{
1856 struct iv *iv;
1857 unsigned i, n;
1858 tree e2, e1;
1859 enum tree_code code;
355fe088 1860 gimple *stmt;
e4142529
BC
1861
1862 if (expr == NULL_TREE)
1863 return NULL;
1864
1865 if (is_gimple_min_invariant (expr))
1866 return NULL;
1867
1868 code = TREE_CODE (expr);
1869 if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1870 {
1871 n = TREE_OPERAND_LENGTH (expr);
1872 for (i = 0; i < n; i++)
1873 {
1874 iv = find_deriving_biv_for_expr (data, TREE_OPERAND (expr, i));
1875 if (iv)
1876 return iv;
1877 }
1878 }
1879
1880 /* Stop if it's not ssa name. */
1881 if (code != SSA_NAME)
1882 return NULL;
1883
1884 iv = get_iv (data, expr);
1885 if (!iv || integer_zerop (iv->step))
1886 return NULL;
1887 else if (iv->biv_p)
1888 return iv;
1889
1890 stmt = SSA_NAME_DEF_STMT (expr);
1891 if (gphi *phi = dyn_cast <gphi *> (stmt))
1892 {
1893 ssa_op_iter iter;
1894 use_operand_p use_p;
aec0ee11
BC
1895 basic_block phi_bb = gimple_bb (phi);
1896
1897 /* Skip loop header PHI that doesn't define biv. */
1898 if (phi_bb->loop_father == data->current_loop)
1899 return NULL;
e4142529
BC
1900
1901 if (virtual_operand_p (gimple_phi_result (phi)))
1902 return NULL;
1903
1904 FOR_EACH_PHI_ARG (use_p, phi, iter, SSA_OP_USE)
1905 {
1906 tree use = USE_FROM_PTR (use_p);
1907 iv = find_deriving_biv_for_expr (data, use);
1908 if (iv)
1909 return iv;
1910 }
1911 return NULL;
1912 }
1913 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1914 return NULL;
1915
1916 e1 = gimple_assign_rhs1 (stmt);
1917 code = gimple_assign_rhs_code (stmt);
1918 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS)
1919 return find_deriving_biv_for_expr (data, e1);
1920
1921 switch (code)
1922 {
1923 case MULT_EXPR:
1924 case PLUS_EXPR:
1925 case MINUS_EXPR:
1926 case POINTER_PLUS_EXPR:
1927 /* Increments, decrements and multiplications by a constant
1928 are simple. */
1929 e2 = gimple_assign_rhs2 (stmt);
1930 iv = find_deriving_biv_for_expr (data, e2);
1931 if (iv)
1932 return iv;
81fea426 1933 gcc_fallthrough ();
e4142529 1934
e4142529
BC
1935 CASE_CONVERT:
1936 /* Casts are simple. */
1937 return find_deriving_biv_for_expr (data, e1);
1938
1939 default:
1940 break;
1941 }
1942
1943 return NULL;
1944}
1945
1946/* Record BIV, its predecessor and successor that they are used in
1947 address type uses. */
1948
1949static void
1950record_biv_for_address_use (struct ivopts_data *data, struct iv *biv)
1951{
1952 unsigned i;
1953 tree type, base_1, base_2;
1954 bitmap_iterator bi;
1955
1956 if (!biv || !biv->biv_p || integer_zerop (biv->step)
1957 || biv->have_address_use || !biv->no_overflow)
1958 return;
1959
1960 type = TREE_TYPE (biv->base);
1961 if (!INTEGRAL_TYPE_P (type))
1962 return;
1963
1964 biv->have_address_use = true;
1965 data->bivs_not_used_in_addr--;
1966 base_1 = fold_build2 (PLUS_EXPR, type, biv->base, biv->step);
1967 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1968 {
1969 struct iv *iv = ver_info (data, i)->iv;
1970
1971 if (!iv || !iv->biv_p || integer_zerop (iv->step)
1972 || iv->have_address_use || !iv->no_overflow)
1973 continue;
1974
1975 if (type != TREE_TYPE (iv->base)
1976 || !INTEGRAL_TYPE_P (TREE_TYPE (iv->base)))
1977 continue;
1978
1979 if (!operand_equal_p (biv->step, iv->step, 0))
1980 continue;
1981
1982 base_2 = fold_build2 (PLUS_EXPR, type, iv->base, iv->step);
1983 if (operand_equal_p (base_1, iv->base, 0)
1984 || operand_equal_p (base_2, biv->base, 0))
1985 {
1986 iv->have_address_use = true;
1987 data->bivs_not_used_in_addr--;
1988 }
1989 }
1990}
1991
8b11a64c
ZD
1992/* Cumulates the steps of indices into DATA and replaces their values with the
1993 initial ones. Returns false when the value of the index cannot be determined.
1994 Callback for for_each_index. */
1995
1996struct ifs_ivopts_data
1997{
1998 struct ivopts_data *ivopts_data;
355fe088 1999 gimple *stmt;
6e42ce54 2000 tree step;
8b11a64c
ZD
2001};
2002
2003static bool
2004idx_find_step (tree base, tree *idx, void *data)
2005{
c22940cd 2006 struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
8b11a64c 2007 struct iv *iv;
c70ed622 2008 bool use_overflow_semantics = false;
d7f5de76 2009 tree step, iv_base, iv_step, lbound, off;
2f4675b4 2010 struct loop *loop = dta->ivopts_data->current_loop;
be35cf60 2011
be35cf60 2012 /* If base is a component ref, require that the offset of the reference
3a7c155d 2013 be invariant. */
be35cf60
ZD
2014 if (TREE_CODE (base) == COMPONENT_REF)
2015 {
2016 off = component_ref_field_offset (base);
2017 return expr_invariant_in_loop_p (loop, off);
2018 }
2019
2020 /* If base is array, first check whether we will be able to move the
2021 reference out of the loop (in order to take its address in strength
2022 reduction). In order for this to work we need both lower bound
2023 and step to be loop invariants. */
9f7ccf69 2024 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
be35cf60 2025 {
9f7ccf69
EB
2026 /* Moreover, for a range, the size needs to be invariant as well. */
2027 if (TREE_CODE (base) == ARRAY_RANGE_REF
2028 && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
2029 return false;
2030
be35cf60
ZD
2031 step = array_ref_element_size (base);
2032 lbound = array_ref_low_bound (base);
2033
2034 if (!expr_invariant_in_loop_p (loop, step)
2035 || !expr_invariant_in_loop_p (loop, lbound))
2036 return false;
2037 }
2038
8b11a64c
ZD
2039 if (TREE_CODE (*idx) != SSA_NAME)
2040 return true;
2041
2042 iv = get_iv (dta->ivopts_data, *idx);
2043 if (!iv)
2044 return false;
2045
ea643120
RG
2046 /* XXX We produce for a base of *D42 with iv->base being &x[0]
2047 *&x[0], which is not folded and does not trigger the
2048 ARRAY_REF path below. */
8b11a64c
ZD
2049 *idx = iv->base;
2050
6e42ce54 2051 if (integer_zerop (iv->step))
8b11a64c
ZD
2052 return true;
2053
9f7ccf69 2054 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2f4675b4
ZD
2055 {
2056 step = array_ref_element_size (base);
2f4675b4
ZD
2057
2058 /* We only handle addresses whose step is an integer constant. */
2059 if (TREE_CODE (step) != INTEGER_CST)
2060 return false;
2f4675b4 2061 }
8b11a64c 2062 else
5212068f 2063 /* The step for pointer arithmetics already is 1 byte. */
9a9d280e 2064 step = size_one_node;
8b11a64c 2065
d7f5de76
ZD
2066 iv_base = iv->base;
2067 iv_step = iv->step;
c70ed622
BC
2068 if (iv->no_overflow && nowrap_type_p (TREE_TYPE (iv_step)))
2069 use_overflow_semantics = true;
2070
d7f5de76
ZD
2071 if (!convert_affine_scev (dta->ivopts_data->current_loop,
2072 sizetype, &iv_base, &iv_step, dta->stmt,
c70ed622 2073 use_overflow_semantics))
8b11a64c
ZD
2074 {
2075 /* The index might wrap. */
2076 return false;
2077 }
2078
1c1205fb 2079 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
6e42ce54 2080 dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
8b11a64c 2081
e4142529
BC
2082 if (dta->ivopts_data->bivs_not_used_in_addr)
2083 {
2084 if (!iv->biv_p)
2085 iv = find_deriving_biv_for_expr (dta->ivopts_data, iv->ssa_name);
2086
2087 record_biv_for_address_use (dta->ivopts_data, iv);
2088 }
8b11a64c
ZD
2089 return true;
2090}
2091
2092/* Records use in index IDX. Callback for for_each_index. Ivopts data
2093 object is passed to it in DATA. */
2094
2095static bool
2f4675b4 2096idx_record_use (tree base, tree *idx,
c22940cd 2097 void *vdata)
8b11a64c 2098{
c22940cd 2099 struct ivopts_data *data = (struct ivopts_data *) vdata;
8b11a64c 2100 find_interesting_uses_op (data, *idx);
9f7ccf69 2101 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
2f4675b4
ZD
2102 {
2103 find_interesting_uses_op (data, array_ref_element_size (base));
2104 find_interesting_uses_op (data, array_ref_low_bound (base));
2105 }
8b11a64c
ZD
2106 return true;
2107}
2108
32159434
CB
2109/* If we can prove that TOP = cst * BOT for some constant cst,
2110 store cst to MUL and return true. Otherwise return false.
2111 The returned value is always sign-extended, regardless of the
2112 signedness of TOP and BOT. */
2113
2114static bool
807e902e 2115constant_multiple_of (tree top, tree bot, widest_int *mul)
32159434
CB
2116{
2117 tree mby;
2118 enum tree_code code;
32159434 2119 unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
807e902e 2120 widest_int res, p0, p1;
32159434
CB
2121
2122 STRIP_NOPS (top);
2123 STRIP_NOPS (bot);
2124
2125 if (operand_equal_p (top, bot, 0))
2126 {
807e902e 2127 *mul = 1;
32159434
CB
2128 return true;
2129 }
2130
2131 code = TREE_CODE (top);
2132 switch (code)
2133 {
2134 case MULT_EXPR:
2135 mby = TREE_OPERAND (top, 1);
2136 if (TREE_CODE (mby) != INTEGER_CST)
2137 return false;
2138
2139 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
2140 return false;
2141
807e902e 2142 *mul = wi::sext (res * wi::to_widest (mby), precision);
32159434
CB
2143 return true;
2144
2145 case PLUS_EXPR:
2146 case MINUS_EXPR:
2147 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
2148 || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
2149 return false;
2150
2151 if (code == MINUS_EXPR)
27bcd47c 2152 p1 = -p1;
807e902e 2153 *mul = wi::sext (p0 + p1, precision);
32159434
CB
2154 return true;
2155
2156 case INTEGER_CST:
2157 if (TREE_CODE (bot) != INTEGER_CST)
2158 return false;
2159
8e6cdc90
RS
2160 p0 = widest_int::from (wi::to_wide (top), SIGNED);
2161 p1 = widest_int::from (wi::to_wide (bot), SIGNED);
807e902e 2162 if (p1 == 0)
32159434 2163 return false;
807e902e
KZ
2164 *mul = wi::sext (wi::divmod_trunc (p0, p1, SIGNED, &res), precision);
2165 return res == 0;
32159434
CB
2166
2167 default:
2168 return false;
2169 }
2170}
2171
e75fde1a 2172/* Return true if memory reference REF with step STEP may be unaligned. */
0a915e3d
ZD
2173
2174static bool
32159434 2175may_be_unaligned_p (tree ref, tree step)
0a915e3d 2176{
ac182688 2177 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
607fb860 2178 thus they are not misaligned. */
ac182688
ZD
2179 if (TREE_CODE (ref) == TARGET_MEM_REF)
2180 return false;
2181
e75fde1a 2182 unsigned int align = TYPE_ALIGN (TREE_TYPE (ref));
abf30454
RB
2183 if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))) > align)
2184 align = GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref)));
b8698a0f 2185
e75fde1a
EB
2186 unsigned HOST_WIDE_INT bitpos;
2187 unsigned int ref_align;
2188 get_object_alignment_1 (ref, &ref_align, &bitpos);
2189 if (ref_align < align
2190 || (bitpos % align) != 0
2191 || (bitpos % BITS_PER_UNIT) != 0)
2192 return true;
ce276b61 2193
e75fde1a
EB
2194 unsigned int trailing_zeros = tree_ctz (step);
2195 if (trailing_zeros < HOST_BITS_PER_INT
2196 && (1U << trailing_zeros) * BITS_PER_UNIT < align)
2197 return true;
0a915e3d
ZD
2198
2199 return false;
2200}
2201
75715cf6
EB
2202/* Return true if EXPR may be non-addressable. */
2203
bc068a23 2204bool
75715cf6
EB
2205may_be_nonaddressable_p (tree expr)
2206{
2207 switch (TREE_CODE (expr))
2208 {
928bc34f
EB
2209 case TARGET_MEM_REF:
2210 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
2211 target, thus they are always addressable. */
2212 return false;
2213
ee45a32d
EB
2214 case MEM_REF:
2215 /* Likewise for MEM_REFs, modulo the storage order. */
2216 return REF_REVERSE_STORAGE_ORDER (expr);
2217
2218 case BIT_FIELD_REF:
2219 if (REF_REVERSE_STORAGE_ORDER (expr))
2220 return true;
2221 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2222
75715cf6 2223 case COMPONENT_REF:
ee45a32d
EB
2224 if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2225 return true;
75715cf6
EB
2226 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
2227 || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2228
ee45a32d
EB
2229 case ARRAY_REF:
2230 case ARRAY_RANGE_REF:
2231 if (TYPE_REVERSE_STORAGE_ORDER (TREE_TYPE (TREE_OPERAND (expr, 0))))
2232 return true;
2233 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
2234
75715cf6
EB
2235 case VIEW_CONVERT_EXPR:
2236 /* This kind of view-conversions may wrap non-addressable objects
2237 and make them look addressable. After some processing the
2238 non-addressability may be uncovered again, causing ADDR_EXPRs
2239 of inappropriate objects to be built. */
7a4fbffc
EB
2240 if (is_gimple_reg (TREE_OPERAND (expr, 0))
2241 || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
2242 return true;
7a4fbffc 2243 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
928bc34f 2244
1043771b 2245 CASE_CONVERT:
928bc34f 2246 return true;
75715cf6
EB
2247
2248 default:
2249 break;
2250 }
2251
2252 return false;
2253}
2254
8b11a64c
ZD
2255/* Finds addresses in *OP_P inside STMT. */
2256
2257static void
355fe088
TS
2258find_interesting_uses_address (struct ivopts_data *data, gimple *stmt,
2259 tree *op_p)
8b11a64c 2260{
9a9d280e 2261 tree base = *op_p, step = size_zero_node;
8b11a64c
ZD
2262 struct iv *civ;
2263 struct ifs_ivopts_data ifs_ivopts_data;
2264
e3cc7254
ZD
2265 /* Do not play with volatile memory references. A bit too conservative,
2266 perhaps, but safe. */
726a989a 2267 if (gimple_has_volatile_ops (stmt))
e3cc7254
ZD
2268 goto fail;
2269
8b11a64c
ZD
2270 /* Ignore bitfields for now. Not really something terribly complicated
2271 to handle. TODO. */
75715cf6
EB
2272 if (TREE_CODE (base) == BIT_FIELD_REF)
2273 goto fail;
2274
ac182688
ZD
2275 base = unshare_expr (base);
2276
2277 if (TREE_CODE (base) == TARGET_MEM_REF)
2278 {
2279 tree type = build_pointer_type (TREE_TYPE (base));
2280 tree astep;
2281
2282 if (TMR_BASE (base)
2283 && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
2284 {
2285 civ = get_iv (data, TMR_BASE (base));
2286 if (!civ)
2287 goto fail;
2288
2289 TMR_BASE (base) = civ->base;
2290 step = civ->step;
2291 }
4d948885
RG
2292 if (TMR_INDEX2 (base)
2293 && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
2294 {
2295 civ = get_iv (data, TMR_INDEX2 (base));
2296 if (!civ)
2297 goto fail;
2298
2299 TMR_INDEX2 (base) = civ->base;
2300 step = civ->step;
2301 }
ac182688
ZD
2302 if (TMR_INDEX (base)
2303 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
2304 {
2305 civ = get_iv (data, TMR_INDEX (base));
2306 if (!civ)
2307 goto fail;
8b11a64c 2308
ac182688
ZD
2309 TMR_INDEX (base) = civ->base;
2310 astep = civ->step;
be35cf60 2311
ac182688
ZD
2312 if (astep)
2313 {
2314 if (TMR_STEP (base))
2315 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
2316
6e42ce54 2317 step = fold_build2 (PLUS_EXPR, type, step, astep);
ac182688
ZD
2318 }
2319 }
2320
6e42ce54 2321 if (integer_zerop (step))
ac182688
ZD
2322 goto fail;
2323 base = tree_mem_ref_addr (type, base);
2324 }
2325 else
2326 {
2327 ifs_ivopts_data.ivopts_data = data;
2328 ifs_ivopts_data.stmt = stmt;
9a9d280e 2329 ifs_ivopts_data.step = size_zero_node;
ac182688 2330 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
6e42ce54 2331 || integer_zerop (ifs_ivopts_data.step))
ac182688 2332 goto fail;
6e42ce54 2333 step = ifs_ivopts_data.step;
ac182688 2334
928bc34f
EB
2335 /* Check that the base expression is addressable. This needs
2336 to be done after substituting bases of IVs into it. */
2337 if (may_be_nonaddressable_p (base))
2338 goto fail;
2339
2340 /* Moreover, on strict alignment platforms, check that it is
2341 sufficiently aligned. */
32159434 2342 if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
928bc34f
EB
2343 goto fail;
2344
ac182688 2345 base = build_fold_addr_expr (base);
ea643120
RG
2346
2347 /* Substituting bases of IVs into the base expression might
2348 have caused folding opportunities. */
2349 if (TREE_CODE (base) == ADDR_EXPR)
2350 {
2351 tree *ref = &TREE_OPERAND (base, 0);
2352 while (handled_component_p (*ref))
2353 ref = &TREE_OPERAND (*ref, 0);
70f34814 2354 if (TREE_CODE (*ref) == MEM_REF)
cb6b911a 2355 {
bcf71673
RG
2356 tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
2357 TREE_OPERAND (*ref, 0),
2358 TREE_OPERAND (*ref, 1));
cb6b911a
RG
2359 if (tem)
2360 *ref = tem;
2361 }
ea643120 2362 }
ac182688 2363 }
8b11a64c 2364
6f929985 2365 civ = alloc_iv (data, base, step);
653a4b32
BC
2366 /* Fail if base object of this memory reference is unknown. */
2367 if (civ->base_object == NULL_TREE)
2368 goto fail;
2369
a7e43c57 2370 record_group_use (data, op_p, civ, stmt, USE_ADDRESS);
8b11a64c
ZD
2371 return;
2372
2373fail:
2374 for_each_index (op_p, idx_record_use, data);
2375}
2376
2377/* Finds and records invariants used in STMT. */
2378
2379static void
355fe088 2380find_invariants_stmt (struct ivopts_data *data, gimple *stmt)
8b11a64c 2381{
f47c96aa
AM
2382 ssa_op_iter iter;
2383 use_operand_p use_p;
8b11a64c
ZD
2384 tree op;
2385
f47c96aa 2386 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
8b11a64c 2387 {
f47c96aa 2388 op = USE_FROM_PTR (use_p);
8b11a64c
ZD
2389 record_invariant (data, op, false);
2390 }
2391}
2392
2393/* Finds interesting uses of induction variables in the statement STMT. */
2394
2395static void
355fe088 2396find_interesting_uses_stmt (struct ivopts_data *data, gimple *stmt)
8b11a64c
ZD
2397{
2398 struct iv *iv;
726a989a 2399 tree op, *lhs, *rhs;
f47c96aa
AM
2400 ssa_op_iter iter;
2401 use_operand_p use_p;
726a989a 2402 enum tree_code code;
8b11a64c
ZD
2403
2404 find_invariants_stmt (data, stmt);
2405
726a989a 2406 if (gimple_code (stmt) == GIMPLE_COND)
8b11a64c 2407 {
726a989a 2408 find_interesting_uses_cond (data, stmt);
8b11a64c
ZD
2409 return;
2410 }
2411
726a989a 2412 if (is_gimple_assign (stmt))
8b11a64c 2413 {
726a989a
RB
2414 lhs = gimple_assign_lhs_ptr (stmt);
2415 rhs = gimple_assign_rhs1_ptr (stmt);
8b11a64c 2416
726a989a 2417 if (TREE_CODE (*lhs) == SSA_NAME)
8b11a64c
ZD
2418 {
2419 /* If the statement defines an induction variable, the uses are not
2420 interesting by themselves. */
2421
726a989a 2422 iv = get_iv (data, *lhs);
8b11a64c 2423
6e42ce54 2424 if (iv && !integer_zerop (iv->step))
8b11a64c
ZD
2425 return;
2426 }
2427
726a989a
RB
2428 code = gimple_assign_rhs_code (stmt);
2429 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
2430 && (REFERENCE_CLASS_P (*rhs)
2431 || is_gimple_val (*rhs)))
8b11a64c 2432 {
726a989a
RB
2433 if (REFERENCE_CLASS_P (*rhs))
2434 find_interesting_uses_address (data, stmt, rhs);
2435 else
2436 find_interesting_uses_op (data, *rhs);
8b11a64c 2437
726a989a
RB
2438 if (REFERENCE_CLASS_P (*lhs))
2439 find_interesting_uses_address (data, stmt, lhs);
8b11a64c 2440 return;
8b11a64c 2441 }
726a989a 2442 else if (TREE_CODE_CLASS (code) == tcc_comparison)
8b11a64c 2443 {
726a989a 2444 find_interesting_uses_cond (data, stmt);
8b11a64c
ZD
2445 return;
2446 }
2f4675b4
ZD
2447
2448 /* TODO -- we should also handle address uses of type
2449
2450 memory = call (whatever);
2451
2452 and
2453
2454 call (memory). */
8b11a64c
ZD
2455 }
2456
726a989a
RB
2457 if (gimple_code (stmt) == GIMPLE_PHI
2458 && gimple_bb (stmt) == data->current_loop->header)
8b11a64c 2459 {
726a989a 2460 iv = get_iv (data, PHI_RESULT (stmt));
8b11a64c 2461
6e42ce54 2462 if (iv && !integer_zerop (iv->step))
8b11a64c
ZD
2463 return;
2464 }
2465
f47c96aa 2466 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
8b11a64c 2467 {
f47c96aa 2468 op = USE_FROM_PTR (use_p);
8b11a64c
ZD
2469
2470 if (TREE_CODE (op) != SSA_NAME)
2471 continue;
2472
2473 iv = get_iv (data, op);
2474 if (!iv)
2475 continue;
2476
2477 find_interesting_uses_op (data, op);
2478 }
2479}
2480
2481/* Finds interesting uses of induction variables outside of loops
2482 on loop exit edge EXIT. */
2483
2484static void
2485find_interesting_uses_outside (struct ivopts_data *data, edge exit)
2486{
538dd0b7
DM
2487 gphi *phi;
2488 gphi_iterator psi;
726a989a 2489 tree def;
8b11a64c 2490
726a989a 2491 for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
8b11a64c 2492 {
538dd0b7 2493 phi = psi.phi ();
8b11a64c 2494 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
ea057359 2495 if (!virtual_operand_p (def))
623b8e0a 2496 find_interesting_uses_op (data, def);
8b11a64c
ZD
2497 }
2498}
2499
359b060e
BC
2500/* Return TRUE if OFFSET is within the range of [base + offset] addressing
2501 mode for memory reference represented by USE. */
a7e43c57 2502
359b060e
BC
2503static GTY (()) vec<rtx, va_gc> *addr_list;
2504
2505static bool
2506addr_offset_valid_p (struct iv_use *use, HOST_WIDE_INT offset)
a7e43c57 2507{
a7e43c57 2508 rtx reg, addr;
359b060e
BC
2509 unsigned list_index;
2510 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
2511 machine_mode addr_mode, mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
a7e43c57 2512
a7e43c57 2513 list_index = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
359b060e
BC
2514 if (list_index >= vec_safe_length (addr_list))
2515 vec_safe_grow_cleared (addr_list, list_index + MAX_MACHINE_MODE);
a7e43c57 2516
359b060e
BC
2517 addr = (*addr_list)[list_index];
2518 if (!addr)
a7e43c57 2519 {
359b060e
BC
2520 addr_mode = targetm.addr_space.address_mode (as);
2521 reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
2522 addr = gen_rtx_fmt_ee (PLUS, addr_mode, reg, NULL_RTX);
2523 (*addr_list)[list_index] = addr;
a7e43c57 2524 }
359b060e
BC
2525 else
2526 addr_mode = GET_MODE (addr);
a7e43c57 2527
359b060e
BC
2528 XEXP (addr, 1) = gen_int_mode (offset, addr_mode);
2529 return (memory_address_addr_space_p (mem_mode, addr, as));
a7e43c57
BC
2530}
2531
309a0cf6 2532/* Comparison function to sort group in ascending order of addr_offset. */
a7e43c57 2533
309a0cf6
BC
2534static int
2535group_compare_offset (const void *a, const void *b)
2536{
2537 const struct iv_use *const *u1 = (const struct iv_use *const *) a;
2538 const struct iv_use *const *u2 = (const struct iv_use *const *) b;
2539
2540 if ((*u1)->addr_offset != (*u2)->addr_offset)
2541 return (*u1)->addr_offset < (*u2)->addr_offset ? -1 : 1;
2542 else
2543 return 0;
2544}
a7e43c57 2545
309a0cf6
BC
2546/* Check if small groups should be split. Return true if no group
2547 contains more than two uses with distinct addr_offsets. Return
2548 false otherwise. We want to split such groups because:
a7e43c57
BC
2549
2550 1) Small groups don't have much benefit and may interfer with
2551 general candidate selection.
2552 2) Size for problem with only small groups is usually small and
2553 general algorithm can handle it well.
2554
309a0cf6
BC
2555 TODO -- Above claim may not hold when we want to merge memory
2556 accesses with conseuctive addresses. */
a7e43c57
BC
2557
2558static bool
309a0cf6 2559split_small_address_groups_p (struct ivopts_data *data)
a7e43c57 2560{
309a0cf6
BC
2561 unsigned int i, j, distinct = 1;
2562 struct iv_use *pre;
2563 struct iv_group *group;
a7e43c57 2564
309a0cf6 2565 for (i = 0; i < data->vgroups.length (); i++)
a7e43c57 2566 {
309a0cf6
BC
2567 group = data->vgroups[i];
2568 if (group->vuses.length () == 1)
2569 continue;
2570
2571 gcc_assert (group->type == USE_ADDRESS);
2572 if (group->vuses.length () == 2)
2573 {
2574 if (group->vuses[0]->addr_offset > group->vuses[1]->addr_offset)
2575 std::swap (group->vuses[0], group->vuses[1]);
2576 }
2577 else
2578 group->vuses.qsort (group_compare_offset);
2579
2580 if (distinct > 2)
a7e43c57
BC
2581 continue;
2582
2583 distinct = 1;
309a0cf6 2584 for (pre = group->vuses[0], j = 1; j < group->vuses.length (); j++)
a7e43c57 2585 {
309a0cf6
BC
2586 if (group->vuses[j]->addr_offset != pre->addr_offset)
2587 {
2588 pre = group->vuses[j];
2589 distinct++;
2590 }
a7e43c57
BC
2591
2592 if (distinct > 2)
309a0cf6 2593 break;
a7e43c57 2594 }
a7e43c57
BC
2595 }
2596
309a0cf6 2597 return (distinct <= 2);
a7e43c57
BC
2598}
2599
2600/* For each group of address type uses, this function further groups
2601 these uses according to the maximum offset supported by target's
2602 [base + offset] addressing mode. */
2603
2604static void
309a0cf6 2605split_address_groups (struct ivopts_data *data)
a7e43c57 2606{
309a0cf6 2607 unsigned int i, j;
359b060e
BC
2608 /* Always split group. */
2609 bool split_p = split_small_address_groups_p (data);
a7e43c57 2610
309a0cf6 2611 for (i = 0; i < data->vgroups.length (); i++)
a7e43c57 2612 {
359b060e 2613 struct iv_group *new_group = NULL;
309a0cf6
BC
2614 struct iv_group *group = data->vgroups[i];
2615 struct iv_use *use = group->vuses[0];
2616
2617 use->id = 0;
2618 use->group_id = group->id;
2619 if (group->vuses.length () == 1)
a7e43c57
BC
2620 continue;
2621
359b060e 2622 gcc_assert (group->type == USE_ADDRESS);
a7e43c57 2623
359b060e 2624 for (j = 1; j < group->vuses.length ();)
a7e43c57 2625 {
309a0cf6 2626 struct iv_use *next = group->vuses[j];
359b060e 2627 HOST_WIDE_INT offset = next->addr_offset - use->addr_offset;
309a0cf6 2628
359b060e
BC
2629 /* Split group if aksed to, or the offset against the first
2630 use can't fit in offset part of addressing mode. IV uses
2631 having the same offset are still kept in one group. */
2632 if (offset != 0 &&
2633 (split_p || !addr_offset_valid_p (use, offset)))
2634 {
2635 if (!new_group)
2636 new_group = record_group (data, group->type);
2637 group->vuses.ordered_remove (j);
2638 new_group->vuses.safe_push (next);
2639 continue;
2640 }
309a0cf6
BC
2641
2642 next->id = j;
2643 next->group_id = group->id;
359b060e 2644 j++;
309a0cf6
BC
2645 }
2646 }
2647}
2648
2649/* Finds uses of the induction variables that are interesting. */
2650
2651static void
2652find_interesting_uses (struct ivopts_data *data)
2653{
2654 basic_block bb;
2655 gimple_stmt_iterator bsi;
2656 basic_block *body = get_loop_body (data->current_loop);
2657 unsigned i;
2658 edge e;
2659
2660 for (i = 0; i < data->current_loop->num_nodes; i++)
2661 {
2662 edge_iterator ei;
2663 bb = body[i];
2664
2665 FOR_EACH_EDGE (e, ei, bb->succs)
2666 if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2667 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2668 find_interesting_uses_outside (data, e);
2669
2670 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2671 find_interesting_uses_stmt (data, gsi_stmt (bsi));
2672 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2673 if (!is_gimple_debug (gsi_stmt (bsi)))
2674 find_interesting_uses_stmt (data, gsi_stmt (bsi));
2675 }
e97cac02 2676 free (body);
309a0cf6
BC
2677
2678 split_address_groups (data);
2679
2680 if (dump_file && (dump_flags & TDF_DETAILS))
2681 {
309a0cf6
BC
2682 fprintf (dump_file, "\n<IV Groups>:\n");
2683 dump_groups (dump_file, data);
2684 fprintf (dump_file, "\n");
a7e43c57 2685 }
a7e43c57
BC
2686}
2687
f5e2738c 2688/* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
9be872b7
ZD
2689 is true, assume we are inside an address. If TOP_COMPREF is true, assume
2690 we are at the top-level of the processed address. */
f5e2738c
ZD
2691
2692static tree
9be872b7 2693strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
58fe50d5 2694 HOST_WIDE_INT *offset)
f5e2738c 2695{
9be872b7 2696 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
f5e2738c
ZD
2697 enum tree_code code;
2698 tree type, orig_type = TREE_TYPE (expr);
58fe50d5 2699 HOST_WIDE_INT off0, off1, st;
f5e2738c
ZD
2700 tree orig_expr = expr;
2701
2702 STRIP_NOPS (expr);
9be872b7 2703
f5e2738c
ZD
2704 type = TREE_TYPE (expr);
2705 code = TREE_CODE (expr);
2706 *offset = 0;
2707
2708 switch (code)
2709 {
2710 case INTEGER_CST:
2711 if (!cst_and_fits_in_hwi (expr)
6e682d7e 2712 || integer_zerop (expr))
f5e2738c
ZD
2713 return orig_expr;
2714
2715 *offset = int_cst_value (expr);
ff5e9a94 2716 return build_int_cst (orig_type, 0);
f5e2738c 2717
2d1a1007 2718 case POINTER_PLUS_EXPR:
f5e2738c
ZD
2719 case PLUS_EXPR:
2720 case MINUS_EXPR:
2721 op0 = TREE_OPERAND (expr, 0);
2722 op1 = TREE_OPERAND (expr, 1);
2723
9be872b7
ZD
2724 op0 = strip_offset_1 (op0, false, false, &off0);
2725 op1 = strip_offset_1 (op1, false, false, &off1);
f5e2738c 2726
2d1a1007 2727 *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
f5e2738c
ZD
2728 if (op0 == TREE_OPERAND (expr, 0)
2729 && op1 == TREE_OPERAND (expr, 1))
2730 return orig_expr;
2731
6e682d7e 2732 if (integer_zerop (op1))
f5e2738c 2733 expr = op0;
6e682d7e 2734 else if (integer_zerop (op0))
f5e2738c 2735 {
2d1a1007 2736 if (code == MINUS_EXPR)
9be872b7 2737 expr = fold_build1 (NEGATE_EXPR, type, op1);
2d1a1007
AP
2738 else
2739 expr = op1;
f5e2738c
ZD
2740 }
2741 else
9be872b7 2742 expr = fold_build2 (code, type, op0, op1);
f5e2738c
ZD
2743
2744 return fold_convert (orig_type, expr);
2745
7a2faca1
EB
2746 case MULT_EXPR:
2747 op1 = TREE_OPERAND (expr, 1);
2748 if (!cst_and_fits_in_hwi (op1))
2749 return orig_expr;
2750
2751 op0 = TREE_OPERAND (expr, 0);
2752 op0 = strip_offset_1 (op0, false, false, &off0);
2753 if (op0 == TREE_OPERAND (expr, 0))
2754 return orig_expr;
2755
2756 *offset = off0 * int_cst_value (op1);
2757 if (integer_zerop (op0))
2758 expr = op0;
2759 else
2760 expr = fold_build2 (MULT_EXPR, type, op0, op1);
2761
2762 return fold_convert (orig_type, expr);
2763
f5e2738c 2764 case ARRAY_REF:
9f7ccf69 2765 case ARRAY_RANGE_REF:
f5e2738c
ZD
2766 if (!inside_addr)
2767 return orig_expr;
2768
2769 step = array_ref_element_size (expr);
2770 if (!cst_and_fits_in_hwi (step))
2771 break;
2772
2773 st = int_cst_value (step);
2774 op1 = TREE_OPERAND (expr, 1);
9be872b7 2775 op1 = strip_offset_1 (op1, false, false, &off1);
f5e2738c 2776 *offset = off1 * st;
9be872b7
ZD
2777
2778 if (top_compref
6e682d7e 2779 && integer_zerop (op1))
9be872b7
ZD
2780 {
2781 /* Strip the component reference completely. */
2782 op0 = TREE_OPERAND (expr, 0);
2783 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2784 *offset += off0;
2785 return op0;
2786 }
f5e2738c
ZD
2787 break;
2788
2789 case COMPONENT_REF:
58fe50d5
BC
2790 {
2791 tree field;
2792
2793 if (!inside_addr)
2794 return orig_expr;
2795
2796 tmp = component_ref_field_offset (expr);
2797 field = TREE_OPERAND (expr, 1);
2798 if (top_compref
2799 && cst_and_fits_in_hwi (tmp)
2800 && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2801 {
2802 HOST_WIDE_INT boffset, abs_off;
2803
2804 /* Strip the component reference completely. */
2805 op0 = TREE_OPERAND (expr, 0);
2806 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2807 boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2808 abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2809 if (boffset < 0)
2810 abs_off = -abs_off;
2811
2812 *offset = off0 + int_cst_value (tmp) + abs_off;
2813 return op0;
2814 }
2815 }
f5e2738c
ZD
2816 break;
2817
2818 case ADDR_EXPR:
9be872b7
ZD
2819 op0 = TREE_OPERAND (expr, 0);
2820 op0 = strip_offset_1 (op0, true, true, &off0);
2821 *offset += off0;
2822
2823 if (op0 == TREE_OPERAND (expr, 0))
2824 return orig_expr;
2825
d5dc1717 2826 expr = build_fold_addr_expr (op0);
9be872b7
ZD
2827 return fold_convert (orig_type, expr);
2828
70f34814
RG
2829 case MEM_REF:
2830 /* ??? Offset operand? */
9be872b7 2831 inside_addr = false;
f5e2738c
ZD
2832 break;
2833
2834 default:
2835 return orig_expr;
2836 }
2837
2838 /* Default handling of expressions for that we want to recurse into
2839 the first operand. */
2840 op0 = TREE_OPERAND (expr, 0);
9be872b7 2841 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
f5e2738c
ZD
2842 *offset += off0;
2843
2844 if (op0 == TREE_OPERAND (expr, 0)
2845 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2846 return orig_expr;
2847
2848 expr = copy_node (expr);
2849 TREE_OPERAND (expr, 0) = op0;
2850 if (op1)
2851 TREE_OPERAND (expr, 1) = op1;
2852
9be872b7 2853 /* Inside address, we might strip the top level component references,
0fa2e4df 2854 thus changing type of the expression. Handling of ADDR_EXPR
9be872b7
ZD
2855 will fix that. */
2856 expr = fold_convert (orig_type, expr);
2857
2858 return expr;
2859}
2860
2861/* Strips constant offsets from EXPR and stores them to OFFSET. */
2862
957f0d8f 2863tree
9be872b7
ZD
2864strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2865{
58fe50d5
BC
2866 HOST_WIDE_INT off;
2867 tree core = strip_offset_1 (expr, false, false, &off);
2868 *offset = off;
2869 return core;
f5e2738c
ZD
2870}
2871
d482f417 2872/* Returns variant of TYPE that can be used as base for different uses.
20527215
ZD
2873 We return unsigned type with the same precision, which avoids problems
2874 with overflows. */
d482f417
ZD
2875
2876static tree
2877generic_type_for (tree type)
2878{
2879 if (POINTER_TYPE_P (type))
20527215 2880 return unsigned_type_for (type);
d482f417
ZD
2881
2882 if (TYPE_UNSIGNED (type))
2883 return type;
2884
2885 return unsigned_type_for (type);
2886}
2887
1c52c69f
BC
2888/* Private data for walk_tree. */
2889
2890struct walk_tree_data
2891{
2892 bitmap *inv_vars;
2893 struct ivopts_data *idata;
2894};
2895
2896/* Callback function for walk_tree, it records invariants and symbol
2897 reference in *EXPR_P. DATA is the structure storing result info. */
9be872b7 2898
9be872b7 2899static tree
1c52c69f 2900find_inv_vars_cb (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
9be872b7 2901{
e97cac02 2902 tree op = *expr_p;
9be872b7 2903 struct version_info *info;
e97cac02 2904 struct walk_tree_data *wdata = (struct walk_tree_data*) data;
9be872b7 2905
e97cac02 2906 if (TREE_CODE (op) != SSA_NAME)
9be872b7 2907 return NULL_TREE;
9be872b7 2908
e97cac02
BC
2909 info = name_info (wdata->idata, op);
2910 /* Because we expand simple operations when finding IVs, loop invariant
2911 variable that isn't referred by the original loop could be used now.
2912 Record such invariant variables here. */
2913 if (!info->iv)
2914 {
2915 struct ivopts_data *idata = wdata->idata;
2916 basic_block bb = gimple_bb (SSA_NAME_DEF_STMT (op));
2917
2918 if (!bb || !flow_bb_inside_loop_p (idata->current_loop, bb))
2919 {
2920 set_iv (idata, op, op, build_int_cst (TREE_TYPE (op), 0), true);
2921 record_invariant (idata, op, false);
2922 }
2923 }
9be872b7
ZD
2924 if (!info->inv_id || info->has_nonlin_use)
2925 return NULL_TREE;
2926
1c52c69f
BC
2927 if (!*wdata->inv_vars)
2928 *wdata->inv_vars = BITMAP_ALLOC (NULL);
2929 bitmap_set_bit (*wdata->inv_vars, info->inv_id);
9be872b7
ZD
2930
2931 return NULL_TREE;
2932}
2933
1c52c69f
BC
2934/* Records invariants in *EXPR_P. INV_VARS is the bitmap to that we should
2935 store it. */
2936
2937static inline void
2938find_inv_vars (struct ivopts_data *data, tree *expr_p, bitmap *inv_vars)
2939{
2940 struct walk_tree_data wdata;
2941
2942 if (!inv_vars)
2943 return;
2944
2945 wdata.idata = data;
2946 wdata.inv_vars = inv_vars;
2947 walk_tree (expr_p, find_inv_vars_cb, &wdata, NULL);
2948}
2949
f9f69dd6
BC
2950/* Get entry from invariant expr hash table for INV_EXPR. New entry
2951 will be recorded if it doesn't exist yet. Given below two exprs:
2952 inv_expr + cst1, inv_expr + cst2
2953 It's hard to make decision whether constant part should be stripped
2954 or not. We choose to not strip based on below facts:
2955 1) We need to count ADD cost for constant part if it's stripped,
2956 which is't always trivial where this functions is called.
2957 2) Stripping constant away may be conflict with following loop
2958 invariant hoisting pass.
2959 3) Not stripping constant away results in more invariant exprs,
2960 which usually leads to decision preferring lower reg pressure. */
2961
2962static iv_inv_expr_ent *
2963get_loop_invariant_expr (struct ivopts_data *data, tree inv_expr)
2964{
2965 STRIP_NOPS (inv_expr);
2966
2967 if (TREE_CODE (inv_expr) == INTEGER_CST || TREE_CODE (inv_expr) == SSA_NAME)
2968 return NULL;
2969
2970 /* Don't strip constant part away as we used to. */
2971
2972 /* Stores EXPR in DATA->inv_expr_tab, return pointer to iv_inv_expr_ent. */
2973 struct iv_inv_expr_ent ent;
2974 ent.expr = inv_expr;
2975 ent.hash = iterative_hash_expr (inv_expr, 0);
2976 struct iv_inv_expr_ent **slot = data->inv_expr_tab->find_slot (&ent, INSERT);
2977
2978 if (!*slot)
2979 {
2980 *slot = XNEW (struct iv_inv_expr_ent);
2981 (*slot)->expr = inv_expr;
2982 (*slot)->hash = ent.hash;
2983 (*slot)->id = ++data->max_inv_expr_id;
2984 }
2985
2986 return *slot;
2987}
2988
8b11a64c
ZD
2989/* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2990 position to POS. If USE is not NULL, the candidate is set as related to
2991 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
2992 replacement of the final value of the iv by a direct computation. */
2993
2994static struct iv_cand *
2995add_candidate_1 (struct ivopts_data *data,
2996 tree base, tree step, bool important, enum iv_position pos,
355fe088 2997 struct iv_use *use, gimple *incremented_at,
e4142529 2998 struct iv *orig_iv = NULL)
8b11a64c
ZD
2999{
3000 unsigned i;
3001 struct iv_cand *cand = NULL;
d482f417 3002 tree type, orig_type;
b8698a0f 3003
309a0cf6
BC
3004 gcc_assert (base && step);
3005
1a218fc9
ILT
3006 /* -fkeep-gc-roots-live means that we have to keep a real pointer
3007 live, but the ivopts code may replace a real pointer with one
3008 pointing before or after the memory block that is then adjusted
3009 into the memory block during the loop. FIXME: It would likely be
3010 better to actually force the pointer live and still use ivopts;
3011 for example, it would be enough to write the pointer into memory
3012 and keep it there until after the loop. */
3013 if (flag_keep_gc_roots_live && POINTER_TYPE_P (TREE_TYPE (base)))
3014 return NULL;
3015
d8af4ba3
ZD
3016 /* For non-original variables, make sure their values are computed in a type
3017 that does not invoke undefined behavior on overflows (since in general,
3018 we cannot prove that these induction variables are non-wrapping). */
3019 if (pos != IP_ORIGINAL)
8b11a64c 3020 {
d482f417
ZD
3021 orig_type = TREE_TYPE (base);
3022 type = generic_type_for (orig_type);
71adbef3 3023 if (type != orig_type)
8b11a64c 3024 {
8b11a64c 3025 base = fold_convert (type, base);
6e42ce54 3026 step = fold_convert (type, step);
8b11a64c
ZD
3027 }
3028 }
3029
309a0cf6 3030 for (i = 0; i < data->vcands.length (); i++)
8b11a64c 3031 {
309a0cf6 3032 cand = data->vcands[i];
8b11a64c
ZD
3033
3034 if (cand->pos != pos)
3035 continue;
3036
2c08497a
BS
3037 if (cand->incremented_at != incremented_at
3038 || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
3039 && cand->ainc_use != use))
8b11a64c
ZD
3040 continue;
3041
6e42ce54 3042 if (operand_equal_p (base, cand->iv->base, 0)
18081149 3043 && operand_equal_p (step, cand->iv->step, 0)
623b8e0a
ML
3044 && (TYPE_PRECISION (TREE_TYPE (base))
3045 == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
6e42ce54 3046 break;
8b11a64c
ZD
3047 }
3048
309a0cf6 3049 if (i == data->vcands.length ())
8b11a64c 3050 {
5ed6ace5 3051 cand = XCNEW (struct iv_cand);
8b11a64c 3052 cand->id = i;
309a0cf6 3053 cand->iv = alloc_iv (data, base, step);
8b11a64c 3054 cand->pos = pos;
309a0cf6 3055 if (pos != IP_ORIGINAL)
8b11a64c
ZD
3056 {
3057 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
3058 cand->var_after = cand->var_before;
3059 }
3060 cand->important = important;
3061 cand->incremented_at = incremented_at;
309a0cf6 3062 data->vcands.safe_push (cand);
8b11a64c 3063
309a0cf6 3064 if (TREE_CODE (step) != INTEGER_CST)
4c11bdff
BC
3065 {
3066 find_inv_vars (data, &step, &cand->inv_vars);
3067
3068 iv_inv_expr_ent *inv_expr = get_loop_invariant_expr (data, step);
3069 /* Share bitmap between inv_vars and inv_exprs for cand. */
3070 if (inv_expr != NULL)
3071 {
3072 cand->inv_exprs = cand->inv_vars;
3073 cand->inv_vars = NULL;
3074 if (cand->inv_exprs)
3075 bitmap_clear (cand->inv_exprs);
3076 else
3077 cand->inv_exprs = BITMAP_ALLOC (NULL);
3078
3079 bitmap_set_bit (cand->inv_exprs, inv_expr->id);
3080 }
3081 }
9be872b7 3082
2c08497a
BS
3083 if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
3084 cand->ainc_use = use;
3085 else
3086 cand->ainc_use = NULL;
3087
e4142529 3088 cand->orig_iv = orig_iv;
8b11a64c
ZD
3089 if (dump_file && (dump_flags & TDF_DETAILS))
3090 dump_cand (dump_file, cand);
3091 }
3092
309a0cf6 3093 cand->important |= important;
8b11a64c 3094
309a0cf6 3095 /* Relate candidate to the group for which it is added. */
8b11a64c 3096 if (use)
309a0cf6 3097 bitmap_set_bit (data->vgroups[use->group_id]->related_cands, i);
8b11a64c
ZD
3098
3099 return cand;
3100}
3101
4366cf6d
ZD
3102/* Returns true if incrementing the induction variable at the end of the LOOP
3103 is allowed.
3104
3105 The purpose is to avoid splitting latch edge with a biv increment, thus
3106 creating a jump, possibly confusing other optimization passes and leaving
ac5344e0
BC
3107 less freedom to scheduler. So we allow IP_END only if IP_NORMAL is not
3108 available (so we do not have a better alternative), or if the latch edge
3109 is already nonempty. */
4366cf6d
ZD
3110
3111static bool
3112allow_ip_end_pos_p (struct loop *loop)
3113{
3114 if (!ip_normal_pos (loop))
3115 return true;
3116
3117 if (!empty_block_p (ip_end_pos (loop)))
3118 return true;
3119
3120 return false;
3121}
3122
2c08497a
BS
3123/* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
3124 Important field is set to IMPORTANT. */
3125
3126static void
3127add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
3128 bool important, struct iv_use *use)
3129{
3130 basic_block use_bb = gimple_bb (use->stmt);
ef4bddc2 3131 machine_mode mem_mode;
2c08497a
BS
3132 unsigned HOST_WIDE_INT cstepi;
3133
3134 /* If we insert the increment in any position other than the standard
3135 ones, we must ensure that it is incremented once per iteration.
3136 It must not be in an inner nested loop, or one side of an if
3137 statement. */
3138 if (use_bb->loop_father != data->current_loop
3139 || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
8119ebd5 3140 || stmt_can_throw_internal (use->stmt)
2c08497a
BS
3141 || !cst_and_fits_in_hwi (step))
3142 return;
3143
3144 cstepi = int_cst_value (step);
3145
3146 mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
8875e939
RR
3147 if (((USE_LOAD_PRE_INCREMENT (mem_mode)
3148 || USE_STORE_PRE_INCREMENT (mem_mode))
3149 && GET_MODE_SIZE (mem_mode) == cstepi)
3150 || ((USE_LOAD_PRE_DECREMENT (mem_mode)
3151 || USE_STORE_PRE_DECREMENT (mem_mode))
3152 && GET_MODE_SIZE (mem_mode) == -cstepi))
2c08497a
BS
3153 {
3154 enum tree_code code = MINUS_EXPR;
3155 tree new_base;
3156 tree new_step = step;
3157
3158 if (POINTER_TYPE_P (TREE_TYPE (base)))
3159 {
3160 new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
3161 code = POINTER_PLUS_EXPR;
3162 }
3163 else
3164 new_step = fold_convert (TREE_TYPE (base), new_step);
3165 new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
3166 add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
3167 use->stmt);
3168 }
8875e939
RR
3169 if (((USE_LOAD_POST_INCREMENT (mem_mode)
3170 || USE_STORE_POST_INCREMENT (mem_mode))
3171 && GET_MODE_SIZE (mem_mode) == cstepi)
3172 || ((USE_LOAD_POST_DECREMENT (mem_mode)
3173 || USE_STORE_POST_DECREMENT (mem_mode))
3174 && GET_MODE_SIZE (mem_mode) == -cstepi))
2c08497a
BS
3175 {
3176 add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
3177 use->stmt);
3178 }
3179}
3180
8b11a64c
ZD
3181/* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
3182 position to POS. If USE is not NULL, the candidate is set as related to
4c3b378b
BC
3183 it. The candidate computation is scheduled before exit condition and at
3184 the end of loop. */
8b11a64c
ZD
3185
3186static void
b8698a0f 3187add_candidate (struct ivopts_data *data,
e4142529
BC
3188 tree base, tree step, bool important, struct iv_use *use,
3189 struct iv *orig_iv = NULL)
8b11a64c
ZD
3190{
3191 if (ip_normal_pos (data->current_loop))
e4142529
BC
3192 add_candidate_1 (data, base, step, important,
3193 IP_NORMAL, use, NULL, orig_iv);
4366cf6d
ZD
3194 if (ip_end_pos (data->current_loop)
3195 && allow_ip_end_pos_p (data->current_loop))
e4142529 3196 add_candidate_1 (data, base, step, important, IP_END, use, NULL, orig_iv);
8b11a64c
ZD
3197}
3198
3199/* Adds standard iv candidates. */
3200
3201static void
3202add_standard_iv_candidates (struct ivopts_data *data)
3203{
0f250839
RG
3204 add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
3205
3206 /* The same for a double-integer type if it is still fast enough. */
3207 if (TYPE_PRECISION
623b8e0a 3208 (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
0f250839
RG
3209 && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
3210 add_candidate (data, build_int_cst (long_integer_type_node, 0),
3211 build_int_cst (long_integer_type_node, 1), true, NULL);
8b11a64c 3212
39b4020c 3213 /* The same for a double-integer type if it is still fast enough. */
0f250839 3214 if (TYPE_PRECISION
623b8e0a 3215 (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
0f250839
RG
3216 && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
3217 add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
3218 build_int_cst (long_long_integer_type_node, 1), true, NULL);
8b11a64c
ZD
3219}
3220
3221
3222/* Adds candidates bases on the old induction variable IV. */
3223
3224static void
4c3b378b 3225add_iv_candidate_for_biv (struct ivopts_data *data, struct iv *iv)
8b11a64c 3226{
355fe088 3227 gimple *phi;
726a989a 3228 tree def;
8b11a64c
ZD
3229 struct iv_cand *cand;
3230
e4142529
BC
3231 /* Check if this biv is used in address type use. */
3232 if (iv->no_overflow && iv->have_address_use
3233 && INTEGRAL_TYPE_P (TREE_TYPE (iv->base))
3234 && TYPE_PRECISION (TREE_TYPE (iv->base)) < TYPE_PRECISION (sizetype))
3235 {
3236 tree base = fold_convert (sizetype, iv->base);
3237 tree step = fold_convert (sizetype, iv->step);
3238
3239 /* Add iv cand of same precision as index part in TARGET_MEM_REF. */
3240 add_candidate (data, base, step, true, NULL, iv);
3241 /* Add iv cand of the original type only if it has nonlinear use. */
309a0cf6 3242 if (iv->nonlin_use)
e4142529
BC
3243 add_candidate (data, iv->base, iv->step, true, NULL);
3244 }
3245 else
3246 add_candidate (data, iv->base, iv->step, true, NULL);
8b11a64c
ZD
3247
3248 /* The same, but with initial value zero. */
1a00e5f7
JJ
3249 if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
3250 add_candidate (data, size_int (0), iv->step, true, NULL);
3251 else
3252 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
3253 iv->step, true, NULL);
8b11a64c
ZD
3254
3255 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
726a989a 3256 if (gimple_code (phi) == GIMPLE_PHI)
8b11a64c
ZD
3257 {
3258 /* Additionally record the possibility of leaving the original iv
3259 untouched. */
3260 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
b83b5507
BC
3261 /* Don't add candidate if it's from another PHI node because
3262 it's an affine iv appearing in the form of PEELED_CHREC. */
3263 phi = SSA_NAME_DEF_STMT (def);
3264 if (gimple_code (phi) != GIMPLE_PHI)
3265 {
3266 cand = add_candidate_1 (data,
3267 iv->base, iv->step, true, IP_ORIGINAL, NULL,
3268 SSA_NAME_DEF_STMT (def));
1a218fc9
ILT
3269 if (cand)
3270 {
3271 cand->var_before = iv->ssa_name;
3272 cand->var_after = def;
3273 }
b83b5507
BC
3274 }
3275 else
3276 gcc_assert (gimple_bb (phi) == data->current_loop->header);
8b11a64c
ZD
3277 }
3278}
3279
3280/* Adds candidates based on the old induction variables. */
3281
3282static void
4c3b378b 3283add_iv_candidate_for_bivs (struct ivopts_data *data)
8b11a64c
ZD
3284{
3285 unsigned i;
3286 struct iv *iv;
87c476a2 3287 bitmap_iterator bi;
8b11a64c 3288
87c476a2 3289 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
8b11a64c
ZD
3290 {
3291 iv = ver_info (data, i)->iv;
6e42ce54 3292 if (iv && iv->biv_p && !integer_zerop (iv->step))
4c3b378b 3293 add_iv_candidate_for_biv (data, iv);
87c476a2 3294 }
8b11a64c
ZD
3295}
3296
cf5b92ef
BC
3297/* Record common candidate {BASE, STEP} derived from USE in hashtable. */
3298
3299static void
3300record_common_cand (struct ivopts_data *data, tree base,
3301 tree step, struct iv_use *use)
3302{
3303 struct iv_common_cand ent;
3304 struct iv_common_cand **slot;
3305
cf5b92ef
BC
3306 ent.base = base;
3307 ent.step = step;
3308 ent.hash = iterative_hash_expr (base, 0);
3309 ent.hash = iterative_hash_expr (step, ent.hash);
3310
3311 slot = data->iv_common_cand_tab->find_slot (&ent, INSERT);
3312 if (*slot == NULL)
3313 {
74fbae92 3314 *slot = new iv_common_cand ();
cf5b92ef
BC
3315 (*slot)->base = base;
3316 (*slot)->step = step;
3317 (*slot)->uses.create (8);
3318 (*slot)->hash = ent.hash;
3319 data->iv_common_cands.safe_push ((*slot));
3320 }
309a0cf6
BC
3321
3322 gcc_assert (use != NULL);
cf5b92ef
BC
3323 (*slot)->uses.safe_push (use);
3324 return;
3325}
3326
3327/* Comparison function used to sort common candidates. */
3328
3329static int
3330common_cand_cmp (const void *p1, const void *p2)
3331{
3332 unsigned n1, n2;
3333 const struct iv_common_cand *const *const ccand1
3334 = (const struct iv_common_cand *const *)p1;
3335 const struct iv_common_cand *const *const ccand2
3336 = (const struct iv_common_cand *const *)p2;
3337
3338 n1 = (*ccand1)->uses.length ();
3339 n2 = (*ccand2)->uses.length ();
3340 return n2 - n1;
3341}
3342
3343/* Adds IV candidates based on common candidated recorded. */
3344
3345static void
3346add_iv_candidate_derived_from_uses (struct ivopts_data *data)
3347{
3348 unsigned i, j;
3349 struct iv_cand *cand_1, *cand_2;
3350
3351 data->iv_common_cands.qsort (common_cand_cmp);
3352 for (i = 0; i < data->iv_common_cands.length (); i++)
3353 {
3354 struct iv_common_cand *ptr = data->iv_common_cands[i];
3355
3356 /* Only add IV candidate if it's derived from multiple uses. */
3357 if (ptr->uses.length () <= 1)
3358 break;
3359
3360 cand_1 = NULL;
3361 cand_2 = NULL;
3362 if (ip_normal_pos (data->current_loop))
3363 cand_1 = add_candidate_1 (data, ptr->base, ptr->step,
3364 false, IP_NORMAL, NULL, NULL);
3365
3366 if (ip_end_pos (data->current_loop)
3367 && allow_ip_end_pos_p (data->current_loop))
3368 cand_2 = add_candidate_1 (data, ptr->base, ptr->step,
3369 false, IP_END, NULL, NULL);
3370
3371 /* Bind deriving uses and the new candidates. */
3372 for (j = 0; j < ptr->uses.length (); j++)
3373 {
309a0cf6 3374 struct iv_group *group = data->vgroups[ptr->uses[j]->group_id];
cf5b92ef 3375 if (cand_1)
309a0cf6 3376 bitmap_set_bit (group->related_cands, cand_1->id);
cf5b92ef 3377 if (cand_2)
309a0cf6 3378 bitmap_set_bit (group->related_cands, cand_2->id);
cf5b92ef
BC
3379 }
3380 }
3381
3382 /* Release data since it is useless from this point. */
3383 data->iv_common_cand_tab->empty ();
3384 data->iv_common_cands.truncate (0);
3385}
3386
4c3b378b 3387/* Adds candidates based on the value of USE's iv. */
8b11a64c
ZD
3388
3389static void
4c3b378b 3390add_iv_candidate_for_use (struct ivopts_data *data, struct iv_use *use)
8b11a64c 3391{
f5e2738c 3392 unsigned HOST_WIDE_INT offset;
9be872b7 3393 tree base;
1ffe34d9 3394 tree basetype;
4c3b378b 3395 struct iv *iv = use->iv;
8b11a64c 3396
9be872b7 3397 add_candidate (data, iv->base, iv->step, false, use);
be35cf60 3398
cf5b92ef
BC
3399 /* Record common candidate for use in case it can be shared by others. */
3400 record_common_cand (data, iv->base, iv->step, use);
3401
3402 /* Record common candidate with initial value zero. */
1ffe34d9
AP
3403 basetype = TREE_TYPE (iv->base);
3404 if (POINTER_TYPE_P (basetype))
3405 basetype = sizetype;
cf5b92ef
BC
3406 record_common_cand (data, build_int_cst (basetype, 0), iv->step, use);
3407
cfe8aebe
BC
3408 /* Record common candidate with constant offset stripped in base.
3409 Like the use itself, we also add candidate directly for it. */
3410 base = strip_offset (iv->base, &offset);
3411 if (offset || base != iv->base)
cf5b92ef 3412 {
cfe8aebe
BC
3413 record_common_cand (data, base, iv->step, use);
3414 add_candidate (data, base, iv->step, false, use);
cf5b92ef
BC
3415 }
3416
3417 /* Record common candidate with base_object removed in base. */
46bf3f84
BC
3418 base = iv->base;
3419 STRIP_NOPS (base);
3420 if (iv->base_object != NULL && TREE_CODE (base) == POINTER_PLUS_EXPR)
cf5b92ef 3421 {
46bf3f84 3422 tree step = iv->step;
8b11a64c 3423
cf5b92ef 3424 STRIP_NOPS (step);
46bf3f84
BC
3425 base = TREE_OPERAND (base, 1);
3426 step = fold_convert (sizetype, step);
3427 record_common_cand (data, base, step, use);
3428 /* Also record common candidate with offset stripped. */
3429 base = strip_offset (base, &offset);
3430 if (offset)
3431 record_common_cand (data, base, step, use);
cf5b92ef 3432 }
4c3b378b
BC
3433
3434 /* At last, add auto-incremental candidates. Make such variables
3435 important since other iv uses with same base object may be based
3436 on it. */
3437 if (use != NULL && use->type == USE_ADDRESS)
3438 add_autoinc_candidates (data, iv->base, iv->step, true, use);
8b11a64c
ZD
3439}
3440
8b11a64c
ZD
3441/* Adds candidates based on the uses. */
3442
3443static void
309a0cf6 3444add_iv_candidate_for_groups (struct ivopts_data *data)
8b11a64c
ZD
3445{
3446 unsigned i;
3447
309a0cf6
BC
3448 /* Only add candidate for the first use in group. */
3449 for (i = 0; i < data->vgroups.length (); i++)
8b11a64c 3450 {
309a0cf6 3451 struct iv_group *group = data->vgroups[i];
8b11a64c 3452
309a0cf6
BC
3453 gcc_assert (group->vuses[0] != NULL);
3454 add_iv_candidate_for_use (data, group->vuses[0]);
8b11a64c 3455 }
cf5b92ef 3456 add_iv_candidate_derived_from_uses (data);
8b11a64c
ZD
3457}
3458
cf5b92ef 3459/* Record important candidates and add them to related_cands bitmaps. */
b1b02be2
ZD
3460
3461static void
3462record_important_candidates (struct ivopts_data *data)
3463{
3464 unsigned i;
309a0cf6 3465 struct iv_group *group;
b1b02be2 3466
309a0cf6 3467 for (i = 0; i < data->vcands.length (); i++)
b1b02be2 3468 {
309a0cf6 3469 struct iv_cand *cand = data->vcands[i];
b1b02be2
ZD
3470
3471 if (cand->important)
3472 bitmap_set_bit (data->important_candidates, i);
3473 }
3474
309a0cf6 3475 data->consider_all_candidates = (data->vcands.length ()
b1b02be2
ZD
3476 <= CONSIDER_ALL_CANDIDATES_BOUND);
3477
309a0cf6
BC
3478 /* Add important candidates to groups' related_cands bitmaps. */
3479 for (i = 0; i < data->vgroups.length (); i++)
b1b02be2 3480 {
309a0cf6
BC
3481 group = data->vgroups[i];
3482 bitmap_ior_into (group->related_cands, data->important_candidates);
b1b02be2
ZD
3483 }
3484}
3485
8b11a64c
ZD
3486/* Allocates the data structure mapping the (use, candidate) pairs to costs.
3487 If consider_all_candidates is true, we use a two-dimensional array, otherwise
3488 we allocate a simple list to every use. */
3489
3490static void
3491alloc_use_cost_map (struct ivopts_data *data)
3492{
79836a12 3493 unsigned i, size, s;
8b11a64c 3494
309a0cf6 3495 for (i = 0; i < data->vgroups.length (); i++)
8b11a64c 3496 {
309a0cf6 3497 struct iv_group *group = data->vgroups[i];
8b11a64c
ZD
3498
3499 if (data->consider_all_candidates)
309a0cf6 3500 size = data->vcands.length ();
8b11a64c
ZD
3501 else
3502 {
309a0cf6 3503 s = bitmap_count_bits (group->related_cands);
b1b02be2
ZD
3504
3505 /* Round up to the power of two, so that moduling by it is fast. */
79836a12 3506 size = s ? (1 << ceil_log2 (s)) : 1;
8b11a64c
ZD
3507 }
3508
309a0cf6
BC
3509 group->n_map_members = size;
3510 group->cost_map = XCNEWVEC (struct cost_pair, size);
8b11a64c
ZD
3511 }
3512}
3513
309a0cf6 3514/* Sets cost of (GROUP, CAND) pair to COST and record that it depends
0ca91c77
BC
3515 on invariants INV_VARS and that the value used in expressing it is
3516 VALUE, and in case of iv elimination the comparison operator is COMP. */
8b11a64c
ZD
3517
3518static void
309a0cf6
BC
3519set_group_iv_cost (struct ivopts_data *data,
3520 struct iv_group *group, struct iv_cand *cand,
0ca91c77
BC
3521 comp_cost cost, bitmap inv_vars, tree value,
3522 enum tree_code comp, bitmap inv_exprs)
8b11a64c 3523{
b1b02be2
ZD
3524 unsigned i, s;
3525
8d18b6df 3526 if (cost.infinite_cost_p ())
8b11a64c 3527 {
0ca91c77
BC
3528 BITMAP_FREE (inv_vars);
3529 BITMAP_FREE (inv_exprs);
b1b02be2 3530 return;
8b11a64c
ZD
3531 }
3532
3533 if (data->consider_all_candidates)
3534 {
309a0cf6
BC
3535 group->cost_map[cand->id].cand = cand;
3536 group->cost_map[cand->id].cost = cost;
0ca91c77
BC
3537 group->cost_map[cand->id].inv_vars = inv_vars;
3538 group->cost_map[cand->id].inv_exprs = inv_exprs;
309a0cf6
BC
3539 group->cost_map[cand->id].value = value;
3540 group->cost_map[cand->id].comp = comp;
8b11a64c
ZD
3541 return;
3542 }
3543
b1b02be2 3544 /* n_map_members is a power of two, so this computes modulo. */
309a0cf6
BC
3545 s = cand->id & (group->n_map_members - 1);
3546 for (i = s; i < group->n_map_members; i++)
3547 if (!group->cost_map[i].cand)
b1b02be2
ZD
3548 goto found;
3549 for (i = 0; i < s; i++)
309a0cf6 3550 if (!group->cost_map[i].cand)
b1b02be2
ZD
3551 goto found;
3552
3553 gcc_unreachable ();
8b11a64c 3554
b1b02be2 3555found:
309a0cf6
BC
3556 group->cost_map[i].cand = cand;
3557 group->cost_map[i].cost = cost;
0ca91c77
BC
3558 group->cost_map[i].inv_vars = inv_vars;
3559 group->cost_map[i].inv_exprs = inv_exprs;
309a0cf6
BC
3560 group->cost_map[i].value = value;
3561 group->cost_map[i].comp = comp;
8b11a64c
ZD
3562}
3563
309a0cf6 3564/* Gets cost of (GROUP, CAND) pair. */
8b11a64c 3565
b1b02be2 3566static struct cost_pair *
309a0cf6
BC
3567get_group_iv_cost (struct ivopts_data *data, struct iv_group *group,
3568 struct iv_cand *cand)
8b11a64c 3569{
b1b02be2
ZD
3570 unsigned i, s;
3571 struct cost_pair *ret;
8b11a64c
ZD
3572
3573 if (!cand)
b1b02be2 3574 return NULL;
8b11a64c
ZD
3575
3576 if (data->consider_all_candidates)
8b11a64c 3577 {
309a0cf6 3578 ret = group->cost_map + cand->id;
b1b02be2
ZD
3579 if (!ret->cand)
3580 return NULL;
8b11a64c 3581
b1b02be2 3582 return ret;
8b11a64c 3583 }
b8698a0f 3584
b1b02be2 3585 /* n_map_members is a power of two, so this computes modulo. */
309a0cf6
BC
3586 s = cand->id & (group->n_map_members - 1);
3587 for (i = s; i < group->n_map_members; i++)
3588 if (group->cost_map[i].cand == cand)
3589 return group->cost_map + i;
3590 else if (group->cost_map[i].cand == NULL)
79836a12 3591 return NULL;
b1b02be2 3592 for (i = 0; i < s; i++)
309a0cf6
BC
3593 if (group->cost_map[i].cand == cand)
3594 return group->cost_map + i;
3595 else if (group->cost_map[i].cand == NULL)
79836a12 3596 return NULL;
b1b02be2
ZD
3597
3598 return NULL;
8b11a64c
ZD
3599}
3600
8679c649
JH
3601/* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
3602static rtx
3603produce_memory_decl_rtl (tree obj, int *regno)
3604{
09e881c9 3605 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
ef4bddc2 3606 machine_mode address_mode = targetm.addr_space.address_mode (as);
8679c649 3607 rtx x;
b8698a0f 3608
e16acfcd 3609 gcc_assert (obj);
8679c649
JH
3610 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
3611 {
3612 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
d4ebfa65 3613 x = gen_rtx_SYMBOL_REF (address_mode, name);
8318b0d9
RH
3614 SET_SYMBOL_REF_DECL (x, obj);
3615 x = gen_rtx_MEM (DECL_MODE (obj), x);
09e881c9 3616 set_mem_addr_space (x, as);
8318b0d9 3617 targetm.encode_section_info (obj, x, true);
8679c649
JH
3618 }
3619 else
8318b0d9 3620 {
d4ebfa65 3621 x = gen_raw_REG (address_mode, (*regno)++);
8318b0d9 3622 x = gen_rtx_MEM (DECL_MODE (obj), x);
09e881c9 3623 set_mem_addr_space (x, as);
8318b0d9 3624 }
8679c649 3625
8318b0d9 3626 return x;
8679c649
JH
3627}
3628
8b11a64c
ZD
3629/* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
3630 walk_tree. DATA contains the actual fake register number. */
3631
3632static tree
3633prepare_decl_rtl (tree *expr_p, int *ws, void *data)
3634{
3635 tree obj = NULL_TREE;
3636 rtx x = NULL_RTX;
c22940cd 3637 int *regno = (int *) data;
8b11a64c
ZD
3638
3639 switch (TREE_CODE (*expr_p))
3640 {
8679c649
JH
3641 case ADDR_EXPR:
3642 for (expr_p = &TREE_OPERAND (*expr_p, 0);
afe84921
RH
3643 handled_component_p (*expr_p);
3644 expr_p = &TREE_OPERAND (*expr_p, 0))
3645 continue;
8679c649 3646 obj = *expr_p;
c401fb6f 3647 if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
623b8e0a 3648 x = produce_memory_decl_rtl (obj, regno);
8679c649
JH
3649 break;
3650
8b11a64c
ZD
3651 case SSA_NAME:
3652 *ws = 0;
3653 obj = SSA_NAME_VAR (*expr_p);
70b5e7dc
RG
3654 /* Defer handling of anonymous SSA_NAMEs to the expander. */
3655 if (!obj)
3656 return NULL_TREE;
8b11a64c
ZD
3657 if (!DECL_RTL_SET_P (obj))
3658 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3659 break;
3660
3661 case VAR_DECL:
3662 case PARM_DECL:
3663 case RESULT_DECL:
3664 *ws = 0;
3665 obj = *expr_p;
3666
3667 if (DECL_RTL_SET_P (obj))
3668 break;
3669
3670 if (DECL_MODE (obj) == BLKmode)
8679c649 3671 x = produce_memory_decl_rtl (obj, regno);
8b11a64c
ZD
3672 else
3673 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3674
3675 break;
3676
3677 default:
3678 break;
3679 }
3680
3681 if (x)
3682 {
9771b263 3683 decl_rtl_to_reset.safe_push (obj);
8b11a64c
ZD
3684 SET_DECL_RTL (obj, x);
3685 }
3686
3687 return NULL_TREE;
3688}
3689
3690/* Determines cost of the computation of EXPR. */
3691
3692static unsigned
f40751dd 3693computation_cost (tree expr, bool speed)
8b11a64c 3694{
b32d5189
DM
3695 rtx_insn *seq;
3696 rtx rslt;
8b11a64c
ZD
3697 tree type = TREE_TYPE (expr);
3698 unsigned cost;
1d27fed4
DD
3699 /* Avoid using hard regs in ways which may be unsupported. */
3700 int regno = LAST_VIRTUAL_REGISTER + 1;
d52f5295 3701 struct cgraph_node *node = cgraph_node::get (current_function_decl);
5fefcf92 3702 enum node_frequency real_frequency = node->frequency;
8b11a64c 3703
5fefcf92 3704 node->frequency = NODE_FREQUENCY_NORMAL;
f40751dd 3705 crtl->maybe_hot_insn_p = speed;
8b11a64c
ZD
3706 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
3707 start_sequence ();
3708 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
3709 seq = get_insns ();
3710 end_sequence ();
f40751dd 3711 default_rtl_profile ();
5fefcf92 3712 node->frequency = real_frequency;
8b11a64c 3713
f40751dd 3714 cost = seq_cost (seq, speed);
2ca202e7 3715 if (MEM_P (rslt))
09e881c9
BE
3716 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
3717 TYPE_ADDR_SPACE (type), speed);
b5ee6752 3718 else if (!REG_P (rslt))
e548c9df 3719 cost += set_src_cost (rslt, TYPE_MODE (type), speed);
8b11a64c
ZD
3720
3721 return cost;
3722}
3723
3724/* Returns variable containing the value of candidate CAND at statement AT. */
3725
3726static tree
355fe088 3727var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple *stmt)
8b11a64c
ZD
3728{
3729 if (stmt_after_increment (loop, cand, stmt))
3730 return cand->var_after;
3731 else
3732 return cand->var_before;
3733}
3734
b67102ae
ZD
3735/* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
3736 same precision that is at least as wide as the precision of TYPE, stores
3737 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
3738 type of A and B. */
3739
3740static tree
3741determine_common_wider_type (tree *a, tree *b)
3742{
3743 tree wider_type = NULL;
3744 tree suba, subb;
3745 tree atype = TREE_TYPE (*a);
3746
1043771b 3747 if (CONVERT_EXPR_P (*a))
b67102ae
ZD
3748 {
3749 suba = TREE_OPERAND (*a, 0);
3750 wider_type = TREE_TYPE (suba);
3751 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
3752 return atype;
3753 }
3754 else
3755 return atype;
3756
1043771b 3757 if (CONVERT_EXPR_P (*b))
b67102ae
ZD
3758 {
3759 subb = TREE_OPERAND (*b, 0);
3760 if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
3761 return atype;
3762 }
3763 else
3764 return atype;
3765
3766 *a = suba;
3767 *b = subb;
3768 return wider_type;
3769}
3770
8b11a64c 3771/* Determines the expression by that USE is expressed from induction variable
db61fc7a
BC
3772 CAND at statement AT in LOOP. The expression is stored in two parts in a
3773 decomposed form. The invariant part is stored in AFF_INV; while variant
3774 part in AFF_VAR. Store ratio of CAND.step over USE.step in PRAT if it's
3775 non-null. Returns false if USE cannot be expressed using CAND. */
8b11a64c 3776
ac182688 3777static bool
db61fc7a
BC
3778get_computation_aff_1 (struct loop *loop, gimple *at, struct iv_use *use,
3779 struct iv_cand *cand, struct aff_tree *aff_inv,
3780 struct aff_tree *aff_var, widest_int *prat = NULL)
3781{
3782 tree ubase = use->iv->base, ustep = use->iv->step;
3783 tree cbase = cand->iv->base, cstep = cand->iv->step;
3784 tree common_type, uutype, var, cstep_common;
8b11a64c 3785 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
db61fc7a 3786 aff_tree aff_cbase;
807e902e 3787 widest_int rat;
8b11a64c 3788
db61fc7a 3789 /* We must have a precision to express the values of use. */
8b11a64c 3790 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
db61fc7a 3791 return false;
8b11a64c 3792
73f30c63
ZD
3793 var = var_at_stmt (loop, cand, at);
3794 uutype = unsigned_type_for (utype);
8b11a64c 3795
73f30c63
ZD
3796 /* If the conversion is not noop, perform it. */
3797 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
8b11a64c 3798 {
e4142529
BC
3799 if (cand->orig_iv != NULL && CONVERT_EXPR_P (cbase)
3800 && (CONVERT_EXPR_P (cstep) || TREE_CODE (cstep) == INTEGER_CST))
3801 {
3802 tree inner_base, inner_step, inner_type;
3803 inner_base = TREE_OPERAND (cbase, 0);
3804 if (CONVERT_EXPR_P (cstep))
3805 inner_step = TREE_OPERAND (cstep, 0);
3806 else
3807 inner_step = cstep;
3808
3809 inner_type = TREE_TYPE (inner_base);
3810 /* If candidate is added from a biv whose type is smaller than
3811 ctype, we know both candidate and the biv won't overflow.
3812 In this case, it's safe to skip the convertion in candidate.
3813 As an example, (unsigned short)((unsigned long)A) equals to
3814 (unsigned short)A, if A has a type no larger than short. */
3815 if (TYPE_PRECISION (inner_type) <= TYPE_PRECISION (uutype))
3816 {
3817 cbase = inner_base;
3818 cstep = inner_step;
3819 }
3820 }
73f30c63 3821 cbase = fold_convert (uutype, cbase);
db61fc7a 3822 cstep = fold_convert (uutype, cstep);
73f30c63 3823 var = fold_convert (uutype, var);
9be872b7 3824 }
9be872b7 3825
13b562c5
BC
3826 /* Ratio is 1 when computing the value of biv cand by itself.
3827 We can't rely on constant_multiple_of in this case because the
3828 use is created after the original biv is selected. The call
3829 could fail because of inconsistent fold behavior. See PR68021
3830 for more information. */
3831 if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt)
3832 {
3833 gcc_assert (is_gimple_assign (use->stmt));
3834 gcc_assert (use->iv->ssa_name == cand->var_after);
3835 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
3836 rat = 1;
3837 }
3838 else if (!constant_multiple_of (ustep, cstep, &rat))
73f30c63 3839 return false;
8b11a64c 3840
db61fc7a
BC
3841 if (prat)
3842 *prat = rat;
3843
b67102ae
ZD
3844 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3845 type, we achieve better folding by computing their difference in this
3846 wider type, and cast the result to UUTYPE. We do not need to worry about
3847 overflows, as all the arithmetics will in the end be performed in UUTYPE
3848 anyway. */
3849 common_type = determine_common_wider_type (&ubase, &cbase);
3850
73f30c63 3851 /* use = ubase - ratio * cbase + ratio * var. */
db61fc7a
BC
3852 tree_to_aff_combination (ubase, common_type, aff_inv);
3853 tree_to_aff_combination (cbase, common_type, &aff_cbase);
3854 tree_to_aff_combination (var, uutype, aff_var);
8b11a64c 3855
73f30c63
ZD
3856 /* We need to shift the value if we are after the increment. */
3857 if (stmt_after_increment (loop, cand, at))
8b11a64c 3858 {
73f30c63 3859 aff_tree cstep_aff;
b8698a0f 3860
73f30c63
ZD
3861 if (common_type != uutype)
3862 cstep_common = fold_convert (common_type, cstep);
9be872b7 3863 else
73f30c63 3864 cstep_common = cstep;
ac182688 3865
73f30c63 3866 tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
db61fc7a 3867 aff_combination_add (&aff_cbase, &cstep_aff);
8b11a64c 3868 }
8b11a64c 3869
db61fc7a
BC
3870 aff_combination_scale (&aff_cbase, -rat);
3871 aff_combination_add (aff_inv, &aff_cbase);
b67102ae 3872 if (common_type != uutype)
db61fc7a 3873 aff_combination_convert (aff_inv, uutype);
73f30c63 3874
db61fc7a
BC
3875 aff_combination_scale (aff_var, rat);
3876 return true;
3877}
3878
3879/* Determines the expression by that USE is expressed from induction variable
3880 CAND at statement AT in LOOP. The expression is stored in a decomposed
3881 form into AFF. Returns false if USE cannot be expressed using CAND. */
3882
3883static bool
3884get_computation_aff (struct loop *loop, gimple *at, struct iv_use *use,
3885 struct iv_cand *cand, struct aff_tree *aff)
3886{
3887 aff_tree aff_var;
3888
3889 if (!get_computation_aff_1 (loop, at, use, cand, aff, &aff_var))
3890 return false;
ac182688 3891
db61fc7a 3892 aff_combination_add (aff, &aff_var);
ac182688
ZD
3893 return true;
3894}
3895
0c604a61
TV
3896/* Return the type of USE. */
3897
3898static tree
3899get_use_type (struct iv_use *use)
3900{
3901 tree base_type = TREE_TYPE (use->iv->base);
3902 tree type;
3903
3904 if (use->type == USE_ADDRESS)
3905 {
3906 /* The base_type may be a void pointer. Create a pointer type based on
3907 the mem_ref instead. */
3908 type = build_pointer_type (TREE_TYPE (*use->op_p));
3909 gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
3910 == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
3911 }
3912 else
3913 type = base_type;
3914
3915 return type;
3916}
3917
ac182688
ZD
3918/* Determines the expression by that USE is expressed from induction variable
3919 CAND at statement AT in LOOP. The computation is unshared. */
3920
3921static tree
c7da0e81
BC
3922get_computation_at (struct loop *loop, gimple *at,
3923 struct iv_use *use, struct iv_cand *cand)
ac182688 3924{
73f30c63 3925 aff_tree aff;
0c604a61 3926 tree type = get_use_type (use);
ac182688 3927
db61fc7a 3928 if (!get_computation_aff (loop, at, use, cand, &aff))
ac182688
ZD
3929 return NULL_TREE;
3930 unshare_aff_combination (&aff);
3931 return fold_convert (type, aff_combination_to_tree (&aff));
8b11a64c
ZD
3932}
3933
6521ac85
SL
3934/* Adjust the cost COST for being in loop setup rather than loop body.
3935 If we're optimizing for space, the loop setup overhead is constant;
f9f69dd6
BC
3936 if we're optimizing for speed, amortize it over the per-iteration cost.
3937 If ROUND_UP_P is true, the result is round up rather than to zero when
3938 optimizing for speed. */
6521ac85 3939static unsigned
f9f69dd6
BC
3940adjust_setup_cost (struct ivopts_data *data, unsigned cost,
3941 bool round_up_p = false)
6521ac85
SL
3942{
3943 if (cost == INFTY)
3944 return cost;
3945 else if (optimize_loop_for_speed_p (data->current_loop))
f9f69dd6
BC
3946 {
3947 HOST_WIDE_INT niters = avg_loop_niter (data->current_loop);
3948 return ((HOST_WIDE_INT) cost + (round_up_p ? niters - 1 : 0)) / niters;
3949 }
6521ac85
SL
3950 else
3951 return cost;
3952}
3953
ac5344e0
BC
3954/* Calculate the SPEED or size cost of shiftadd EXPR in MODE. MULT is the
3955 EXPR operand holding the shift. COST0 and COST1 are the costs for
3956 calculating the operands of EXPR. Returns true if successful, and returns
3957 the cost in COST. */
e6450c11
TV
3958
3959static bool
095a2d76 3960get_shiftadd_cost (tree expr, scalar_int_mode mode, comp_cost cost0,
623b8e0a 3961 comp_cost cost1, tree mult, bool speed, comp_cost *cost)
e6450c11
TV
3962{
3963 comp_cost res;
3964 tree op1 = TREE_OPERAND (expr, 1);
3965 tree cst = TREE_OPERAND (mult, 1);
3966 tree multop = TREE_OPERAND (mult, 0);
3967 int m = exact_log2 (int_cst_value (cst));
3968 int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
9fb87eb0
EB
3969 int as_cost, sa_cost;
3970 bool mult_in_op1;
e6450c11
TV
3971
3972 if (!(m >= 0 && m < maxm))
3973 return false;
3974
f468efc5 3975 STRIP_NOPS (op1);
9fb87eb0 3976 mult_in_op1 = operand_equal_p (op1, mult, 0);
1a1a5534 3977
9fb87eb0
EB
3978 as_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
3979
3980 /* If the target has a cheap shift-and-add or shift-and-sub instruction,
3981 use that in preference to a shift insn followed by an add insn. */
e6450c11 3982 sa_cost = (TREE_CODE (expr) != MINUS_EXPR
623b8e0a
ML
3983 ? shiftadd_cost (speed, mode, m)
3984 : (mult_in_op1
3985 ? shiftsub1_cost (speed, mode, m)
3986 : shiftsub0_cost (speed, mode, m)));
9fb87eb0 3987
8d18b6df
ML
3988 res = comp_cost (MIN (as_cost, sa_cost), 0);
3989 res += (mult_in_op1 ? cost0 : cost1);
e6450c11
TV
3990
3991 STRIP_NOPS (multop);
3992 if (!is_gimple_val (multop))
8d18b6df 3993 res += force_expr_to_var_cost (multop, speed);
e6450c11
TV
3994
3995 *cost = res;
3996 return true;
3997}
3998
3ac01fde
ZD
3999/* Estimates cost of forcing expression EXPR into a variable. */
4000
6e8c65f6 4001static comp_cost
f40751dd 4002force_expr_to_var_cost (tree expr, bool speed)
8b11a64c
ZD
4003{
4004 static bool costs_initialized = false;
f40751dd
JH
4005 static unsigned integer_cost [2];
4006 static unsigned symbol_cost [2];
4007 static unsigned address_cost [2];
7299dbfb 4008 tree op0, op1;
6e8c65f6 4009 comp_cost cost0, cost1, cost;
ef4bddc2 4010 machine_mode mode;
54651377 4011 scalar_int_mode int_mode;
8b11a64c
ZD
4012
4013 if (!costs_initialized)
4014 {
8b11a64c 4015 tree type = build_pointer_type (integer_type_node);
8318b0d9
RH
4016 tree var, addr;
4017 rtx x;
f40751dd 4018 int i;
8318b0d9
RH
4019
4020 var = create_tmp_var_raw (integer_type_node, "test_var");
4021 TREE_STATIC (var) = 1;
4022 x = produce_memory_decl_rtl (var, NULL);
4023 SET_DECL_RTL (var, x);
8b11a64c 4024
8b11a64c 4025 addr = build1 (ADDR_EXPR, type, var);
8b11a64c 4026
f40751dd
JH
4027
4028 for (i = 0; i < 2; i++)
8b11a64c 4029 {
f40751dd
JH
4030 integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
4031 2000), i);
4032
4033 symbol_cost[i] = computation_cost (addr, i) + 1;
4034
4035 address_cost[i]
5d49b6a7 4036 = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
f40751dd
JH
4037 if (dump_file && (dump_flags & TDF_DETAILS))
4038 {
4039 fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
4040 fprintf (dump_file, " integer %d\n", (int) integer_cost[i]);
4041 fprintf (dump_file, " symbol %d\n", (int) symbol_cost[i]);
4042 fprintf (dump_file, " address %d\n", (int) address_cost[i]);
4043 fprintf (dump_file, " other %d\n", (int) target_spill_cost[i]);
4044 fprintf (dump_file, "\n");
4045 }
8b11a64c
ZD
4046 }
4047
4048 costs_initialized = true;
4049 }
4050
f5e2738c
ZD
4051 STRIP_NOPS (expr);
4052
8b11a64c 4053 if (SSA_VAR_P (expr))
7735d6c7 4054 return no_cost;
8b11a64c 4055
ad6003f2 4056 if (is_gimple_min_invariant (expr))
8b11a64c
ZD
4057 {
4058 if (TREE_CODE (expr) == INTEGER_CST)
8d18b6df 4059 return comp_cost (integer_cost [speed], 0);
8b11a64c
ZD
4060
4061 if (TREE_CODE (expr) == ADDR_EXPR)
4062 {
4063 tree obj = TREE_OPERAND (expr, 0);
4064
8813a647 4065 if (VAR_P (obj)
8b11a64c
ZD
4066 || TREE_CODE (obj) == PARM_DECL
4067 || TREE_CODE (obj) == RESULT_DECL)
8d18b6df 4068 return comp_cost (symbol_cost [speed], 0);
8b11a64c
ZD
4069 }
4070
8d18b6df 4071 return comp_cost (address_cost [speed], 0);
8b11a64c
ZD
4072 }
4073
7299dbfb
ZD
4074 switch (TREE_CODE (expr))
4075 {
5be014d5 4076 case POINTER_PLUS_EXPR:
7299dbfb
ZD
4077 case PLUS_EXPR:
4078 case MINUS_EXPR:
4079 case MULT_EXPR:
c544114f
BC
4080 case TRUNC_DIV_EXPR:
4081 case BIT_AND_EXPR:
4082 case BIT_IOR_EXPR:
4083 case LSHIFT_EXPR:
4084 case RSHIFT_EXPR:
7299dbfb
ZD
4085 op0 = TREE_OPERAND (expr, 0);
4086 op1 = TREE_OPERAND (expr, 1);
f5e2738c
ZD
4087 STRIP_NOPS (op0);
4088 STRIP_NOPS (op1);
7299dbfb
ZD
4089 break;
4090
801a661c 4091 CASE_CONVERT:
7a2faca1 4092 case NEGATE_EXPR:
c544114f 4093 case BIT_NOT_EXPR:
7a2faca1
EB
4094 op0 = TREE_OPERAND (expr, 0);
4095 STRIP_NOPS (op0);
4096 op1 = NULL_TREE;
7a2faca1
EB
4097 break;
4098
7299dbfb
ZD
4099 default:
4100 /* Just an arbitrary value, FIXME. */
8d18b6df 4101 return comp_cost (target_spill_cost[speed], 0);
7299dbfb
ZD
4102 }
4103
801a661c
BC
4104 if (op0 == NULL_TREE
4105 || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
4106 cost0 = no_cost;
4107 else
4108 cost0 = force_expr_to_var_cost (op0, speed);
4109
4110 if (op1 == NULL_TREE
4111 || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
4112 cost1 = no_cost;
4113 else
4114 cost1 = force_expr_to_var_cost (op1, speed);
4115
7299dbfb
ZD
4116 mode = TYPE_MODE (TREE_TYPE (expr));
4117 switch (TREE_CODE (expr))
4118 {
5be014d5 4119 case POINTER_PLUS_EXPR:
7299dbfb
ZD
4120 case PLUS_EXPR:
4121 case MINUS_EXPR:
7a2faca1 4122 case NEGATE_EXPR:
8d18b6df 4123 cost = comp_cost (add_cost (speed, mode), 0);
e6450c11 4124 if (TREE_CODE (expr) != NEGATE_EXPR)
623b8e0a
ML
4125 {
4126 tree mult = NULL_TREE;
4127 comp_cost sa_cost;
4128 if (TREE_CODE (op1) == MULT_EXPR)
4129 mult = op1;
4130 else if (TREE_CODE (op0) == MULT_EXPR)
4131 mult = op0;
4132
4133 if (mult != NULL_TREE
54651377 4134 && is_a <scalar_int_mode> (mode, &int_mode)
623b8e0a 4135 && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
54651377 4136 && get_shiftadd_cost (expr, int_mode, cost0, cost1, mult,
623b8e0a
ML
4137 speed, &sa_cost))
4138 return sa_cost;
4139 }
7299dbfb
ZD
4140 break;
4141
801a661c
BC
4142 CASE_CONVERT:
4143 {
4144 tree inner_mode, outer_mode;
4145 outer_mode = TREE_TYPE (expr);
4146 inner_mode = TREE_TYPE (op0);
8d18b6df 4147 cost = comp_cost (convert_cost (TYPE_MODE (outer_mode),
801a661c
BC
4148 TYPE_MODE (inner_mode), speed), 0);
4149 }
4150 break;
4151
7299dbfb
ZD
4152 case MULT_EXPR:
4153 if (cst_and_fits_in_hwi (op0))
8d18b6df 4154 cost = comp_cost (mult_by_coeff_cost (int_cst_value (op0),
6dd8f4bb 4155 mode, speed), 0);
b8698a0f 4156 else if (cst_and_fits_in_hwi (op1))
8d18b6df 4157 cost = comp_cost (mult_by_coeff_cost (int_cst_value (op1),
6dd8f4bb 4158 mode, speed), 0);
7299dbfb 4159 else
8d18b6df 4160 return comp_cost (target_spill_cost [speed], 0);
7299dbfb
ZD
4161 break;
4162
c544114f
BC
4163 case TRUNC_DIV_EXPR:
4164 /* Division by power of two is usually cheap, so we allow it. Forbid
4165 anything else. */
4166 if (integer_pow2p (TREE_OPERAND (expr, 1)))
4167 cost = comp_cost (add_cost (speed, mode), 0);
4168 else
4169 cost = comp_cost (target_spill_cost[speed], 0);
4170 break;
4171
4172 case BIT_AND_EXPR:
4173 case BIT_IOR_EXPR:
4174 case BIT_NOT_EXPR:
4175 case LSHIFT_EXPR:
4176 case RSHIFT_EXPR:
4177 cost = comp_cost (add_cost (speed, mode), 0);
4178 break;
4179
7299dbfb
ZD
4180 default:
4181 gcc_unreachable ();
4182 }
4183
8d18b6df
ML
4184 cost += cost0;
4185 cost += cost1;
6e8c65f6 4186 return cost;
8b11a64c
ZD
4187}
4188
0ca91c77 4189/* Estimates cost of forcing EXPR into a variable. INV_VARS is a set of the
3ac01fde
ZD
4190 invariants the computation depends on. */
4191
6e8c65f6 4192static comp_cost
1c52c69f 4193force_var_cost (struct ivopts_data *data, tree expr, bitmap *inv_vars)
3ac01fde 4194{
1c52c69f
BC
4195 if (!expr)
4196 return no_cost;
3ac01fde 4197
1c52c69f 4198 find_inv_vars (data, &expr, inv_vars);
f40751dd 4199 return force_expr_to_var_cost (expr, data->speed);
3ac01fde
ZD
4200}
4201
f9f69dd6
BC
4202/* Returns cost of auto-modifying address expression in shape base + offset.
4203 AINC_STEP is step size of the address IV. AINC_OFFSET is offset of the
4204 address expression. The address expression has ADDR_MODE in addr space
4205 AS. The memory access has MEM_MODE. SPEED means we are optimizing for
4206 speed or size. */
b8698a0f 4207
f9f69dd6 4208enum ainc_type
8b11a64c 4209{
f9f69dd6
BC
4210 AINC_PRE_INC, /* Pre increment. */
4211 AINC_PRE_DEC, /* Pre decrement. */
4212 AINC_POST_INC, /* Post increment. */
4213 AINC_POST_DEC, /* Post decrement. */
4214 AINC_NONE /* Also the number of auto increment types. */
4215};
8b11a64c 4216
f9f69dd6 4217struct ainc_cost_data
8b11a64c 4218{
f9f69dd6
BC
4219 unsigned costs[AINC_NONE];
4220};
8b11a64c 4221
f9f69dd6
BC
4222static comp_cost
4223get_address_cost_ainc (HOST_WIDE_INT ainc_step, HOST_WIDE_INT ainc_offset,
4224 machine_mode addr_mode, machine_mode mem_mode,
4225 addr_space_t as, bool speed)
4226{
4227 if (!USE_LOAD_PRE_DECREMENT (mem_mode)
4228 && !USE_STORE_PRE_DECREMENT (mem_mode)
4229 && !USE_LOAD_POST_DECREMENT (mem_mode)
4230 && !USE_STORE_POST_DECREMENT (mem_mode)
4231 && !USE_LOAD_PRE_INCREMENT (mem_mode)
4232 && !USE_STORE_PRE_INCREMENT (mem_mode)
4233 && !USE_LOAD_POST_INCREMENT (mem_mode)
4234 && !USE_STORE_POST_INCREMENT (mem_mode))
4235 return infinite_cost;
8b11a64c 4236
f9f69dd6
BC
4237 static vec<ainc_cost_data *> ainc_cost_data_list;
4238 unsigned idx = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
4239 if (idx >= ainc_cost_data_list.length ())
8b11a64c 4240 {
f9f69dd6 4241 unsigned nsize = ((unsigned) as + 1) *MAX_MACHINE_MODE;
7a2faca1 4242
f9f69dd6
BC
4243 gcc_assert (nsize > idx);
4244 ainc_cost_data_list.safe_grow_cleared (nsize);
8b11a64c
ZD
4245 }
4246
f9f69dd6
BC
4247 ainc_cost_data *data = ainc_cost_data_list[idx];
4248 if (data == NULL)
18081149 4249 {
f9f69dd6 4250 rtx reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
18081149 4251
f9f69dd6
BC
4252 data = (ainc_cost_data *) xcalloc (1, sizeof (*data));
4253 data->costs[AINC_PRE_DEC] = INFTY;
4254 data->costs[AINC_POST_DEC] = INFTY;
4255 data->costs[AINC_PRE_INC] = INFTY;
4256 data->costs[AINC_POST_INC] = INFTY;
4257 if (USE_LOAD_PRE_DECREMENT (mem_mode)
4258 || USE_STORE_PRE_DECREMENT (mem_mode))
4259 {
4260 rtx addr = gen_rtx_PRE_DEC (addr_mode, reg);
18081149 4261
f9f69dd6
BC
4262 if (memory_address_addr_space_p (mem_mode, addr, as))
4263 data->costs[AINC_PRE_DEC]
4264 = address_cost (addr, mem_mode, as, speed);
4265 }
4266 if (USE_LOAD_POST_DECREMENT (mem_mode)
4267 || USE_STORE_POST_DECREMENT (mem_mode))
4268 {
4269 rtx addr = gen_rtx_POST_DEC (addr_mode, reg);
bb8d292d 4270
f9f69dd6
BC
4271 if (memory_address_addr_space_p (mem_mode, addr, as))
4272 data->costs[AINC_POST_DEC]
4273 = address_cost (addr, mem_mode, as, speed);
4274 }
4275 if (USE_LOAD_PRE_INCREMENT (mem_mode)
4276 || USE_STORE_PRE_INCREMENT (mem_mode))
4277 {
4278 rtx addr = gen_rtx_PRE_INC (addr_mode, reg);
bb8d292d 4279
f9f69dd6
BC
4280 if (memory_address_addr_space_p (mem_mode, addr, as))
4281 data->costs[AINC_PRE_INC]
4282 = address_cost (addr, mem_mode, as, speed);
4283 }
4284 if (USE_LOAD_POST_INCREMENT (mem_mode)
4285 || USE_STORE_POST_INCREMENT (mem_mode))
4286 {
4287 rtx addr = gen_rtx_POST_INC (addr_mode, reg);
bb8d292d 4288
f9f69dd6
BC
4289 if (memory_address_addr_space_p (mem_mode, addr, as))
4290 data->costs[AINC_POST_INC]
4291 = address_cost (addr, mem_mode, as, speed);
4292 }
4293 ainc_cost_data_list[idx] = data;
623b8e0a
ML
4294 }
4295
f9f69dd6
BC
4296 HOST_WIDE_INT msize = GET_MODE_SIZE (mem_mode);
4297 if (ainc_offset == 0 && msize == ainc_step)
4298 return comp_cost (data->costs[AINC_POST_INC], 0);
4299 if (ainc_offset == 0 && msize == -ainc_step)
4300 return comp_cost (data->costs[AINC_POST_DEC], 0);
4301 if (ainc_offset == msize && msize == ainc_step)
4302 return comp_cost (data->costs[AINC_PRE_INC], 0);
4303 if (ainc_offset == -msize && msize == -ainc_step)
4304 return comp_cost (data->costs[AINC_PRE_DEC], 0);
bb8d292d 4305
f9f69dd6
BC
4306 return infinite_cost;
4307}
18081149 4308
f9f69dd6
BC
4309/* Return cost of computing USE's address expression by using CAND.
4310 AFF_INV and AFF_VAR represent invariant and variant parts of the
4311 address expression, respectively. If AFF_INV is simple, store
4312 the loop invariant variables which are depended by it in INV_VARS;
4313 if AFF_INV is complicated, handle it as a new invariant expression
4314 and record it in INV_EXPR. RATIO indicates multiple times between
4315 steps of USE and CAND. If CAN_AUTOINC is nonNULL, store boolean
4316 value to it indicating if this is an auto-increment address. */
18081149 4317
f9f69dd6
BC
4318static comp_cost
4319get_address_cost (struct ivopts_data *data, struct iv_use *use,
4320 struct iv_cand *cand, aff_tree *aff_inv,
4321 aff_tree *aff_var, HOST_WIDE_INT ratio,
4322 bitmap *inv_vars, iv_inv_expr_ent **inv_expr,
4323 bool *can_autoinc, bool speed)
4324{
4325 rtx addr;
4326 bool simple_inv = true;
4327 tree comp_inv = NULL_TREE, type = aff_var->type;
4328 comp_cost var_cost = no_cost, cost = no_cost;
4329 struct mem_address parts = {NULL_TREE, integer_one_node,
4330 NULL_TREE, NULL_TREE, NULL_TREE};
4331 machine_mode addr_mode = TYPE_MODE (type);
4332 machine_mode mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
4333 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
18081149 4334
f9f69dd6 4335 if (!aff_combination_const_p (aff_inv))
18081149 4336 {
f9f69dd6
BC
4337 parts.index = integer_one_node;
4338 /* Addressing mode "base + index". */
4339 if (valid_mem_ref_p (mem_mode, as, &parts))
4340 {
4341 parts.step = wide_int_to_tree (type, ratio);
4342 /* Addressing mode "base + index << scale". */
4343 if (ratio != 1 && !valid_mem_ref_p (mem_mode, as, &parts))
4344 parts.step = NULL_TREE;
18081149 4345
f9f69dd6
BC
4346 if (aff_inv->offset != 0)
4347 {
4348 parts.offset = wide_int_to_tree (sizetype, aff_inv->offset);
4349 /* Addressing mode "base + index [<< scale] + offset". */
4350 if (!valid_mem_ref_p (mem_mode, as, &parts))
4351 parts.offset = NULL_TREE;
4352 else
4353 aff_inv->offset = 0;
4354 }
18081149 4355
f9f69dd6
BC
4356 move_fixed_address_to_symbol (&parts, aff_inv);
4357 /* Base is fixed address and is moved to symbol part. */
4358 if (parts.symbol != NULL_TREE && aff_combination_zero_p (aff_inv))
4359 parts.base = NULL_TREE;
18081149 4360
f9f69dd6
BC
4361 /* Addressing mode "symbol + base + index [<< scale] [+ offset]". */
4362 if (parts.symbol != NULL_TREE
4363 && !valid_mem_ref_p (mem_mode, as, &parts))
4364 {
4365 aff_combination_add_elt (aff_inv, parts.symbol, 1);
4366 parts.symbol = NULL_TREE;
4367 /* Reset SIMPLE_INV since symbol address needs to be computed
4368 outside of address expression in this case. */
4369 simple_inv = false;
4370 /* Symbol part is moved back to base part, it can't be NULL. */
4371 parts.base = integer_one_node;
4372 }
4373 }
4374 else
4375 parts.index = NULL_TREE;
18081149 4376 }
f9f69dd6 4377 else
18081149 4378 {
f9f69dd6 4379 if (can_autoinc && ratio == 1 && cst_and_fits_in_hwi (cand->iv->step))
623b8e0a 4380 {
f9f69dd6
BC
4381 HOST_WIDE_INT ainc_step = int_cst_value (cand->iv->step);
4382 HOST_WIDE_INT ainc_offset = (aff_inv->offset).to_shwi ();
4383
4384 if (stmt_after_increment (data->current_loop, cand, use->stmt))
4385 ainc_offset += ainc_step;
4386 cost = get_address_cost_ainc (ainc_step, ainc_offset,
4387 addr_mode, mem_mode, as, speed);
4388 if (!cost.infinite_cost_p ())
623b8e0a 4389 {
f9f69dd6
BC
4390 *can_autoinc = true;
4391 return cost;
623b8e0a 4392 }
f9f69dd6
BC
4393 cost = no_cost;
4394 }
4395 if (!aff_combination_zero_p (aff_inv))
4396 {
4397 parts.offset = wide_int_to_tree (sizetype, aff_inv->offset);
4398 /* Addressing mode "base + offset". */
4399 if (!valid_mem_ref_p (mem_mode, as, &parts))
4400 parts.offset = NULL_TREE;
4401 else
4402 aff_inv->offset = 0;
623b8e0a 4403 }
18081149
XDL
4404 }
4405
f9f69dd6
BC
4406 if (simple_inv)
4407 simple_inv = (aff_inv == NULL
4408 || aff_combination_const_p (aff_inv)
4409 || aff_combination_singleton_var_p (aff_inv));
4410 if (!aff_combination_zero_p (aff_inv))
4411 comp_inv = aff_combination_to_tree (aff_inv);
4412 if (comp_inv != NULL_TREE)
4413 cost = force_var_cost (data, comp_inv, inv_vars);
4414 if (ratio != 1 && parts.step == NULL_TREE)
4415 var_cost += mult_by_coeff_cost (ratio, addr_mode, speed);
4416 if (comp_inv != NULL_TREE && parts.index == NULL_TREE)
4417 var_cost += add_cost (speed, addr_mode);
4418
4419 if (comp_inv && inv_expr && !simple_inv)
4420 {
4421 *inv_expr = get_loop_invariant_expr (data, comp_inv);
4422 /* Clear depends on. */
4423 if (*inv_expr != NULL && inv_vars && *inv_vars)
4424 bitmap_clear (*inv_vars);
18081149 4425
f9f69dd6
BC
4426 /* Cost of small invariant expression adjusted against loop niters
4427 is usually zero, which makes it difficult to be differentiated
4428 from candidate based on loop invariant variables. Secondly, the
4429 generated invariant expression may not be hoisted out of loop by
4430 following pass. We penalize the cost by rounding up in order to
4431 neutralize such effects. */
4432 cost.cost = adjust_setup_cost (data, cost.cost, true);
4433 cost.scratch = cost.cost;
4434 }
4435
4436 cost += var_cost;
4437 addr = addr_for_mem_ref (&parts, as, false);
4438 gcc_assert (memory_address_addr_space_p (mem_mode, addr, as));
4439 cost += address_cost (addr, mem_mode, as, speed);
4440
4441 if (parts.symbol != NULL_TREE)
4442 cost.complexity += 1;
4443 if (parts.step != NULL_TREE && !integer_onep (parts.step))
4444 cost.complexity += 1;
4445 if (parts.base != NULL_TREE && parts.index != NULL_TREE)
4446 cost.complexity += 1;
4447 if (parts.offset != NULL_TREE && !integer_zerop (parts.offset))
4448 cost.complexity += 1;
4449
4450 return cost;
18081149
XDL
4451}
4452
83e0b739 4453/* Scale (multiply) the computed COST (except scratch part that should be
13fdeaaf
BC
4454 hoisted out a loop) by header->frequency / AT->frequency, which makes
4455 expected cost more accurate. */
18081149 4456
83e0b739 4457static comp_cost
13fdeaaf 4458get_scaled_computation_cost_at (ivopts_data *data, gimple *at, comp_cost cost)
83e0b739 4459{
e7a74006
JH
4460 int loop_freq = data->current_loop->header->count.to_frequency (cfun);
4461 int bb_freq = gimple_bb (at)->count.to_frequency (cfun);
83e0b739
ML
4462 if (loop_freq != 0)
4463 {
4464 gcc_assert (cost.scratch <= cost.cost);
4465 int scaled_cost
4466 = cost.scratch + (cost.cost - cost.scratch) * bb_freq / loop_freq;
4467
4468 if (dump_file && (dump_flags & TDF_DETAILS))
13fdeaaf 4469 fprintf (dump_file, "Scaling cost based on bb prob "
83e0b739 4470 "by %2.2f: %d (scratch: %d) -> %d (%d/%d)\n",
13fdeaaf 4471 1.0f * bb_freq / loop_freq, cost.cost,
83e0b739
ML
4472 cost.scratch, scaled_cost, bb_freq, loop_freq);
4473
4474 cost.cost = scaled_cost;
4475 }
4476
4477 return cost;
4478}
18081149 4479
8b11a64c
ZD
4480/* Determines the cost of the computation by that USE is expressed
4481 from induction variable CAND. If ADDRESS_P is true, we just need
4482 to create an address from it, otherwise we want to get it into
c7da0e81 4483 register. A set of invariants we depend on is stored in INV_VARS.
2c08497a 4484 If CAN_AUTOINC is nonnull, use it to record whether autoinc
c7da0e81
BC
4485 addressing is likely. If INV_EXPR is nonnull, record invariant
4486 expr entry in it. */
8b11a64c 4487
6e8c65f6 4488static comp_cost
c7da0e81
BC
4489get_computation_cost (struct ivopts_data *data, struct iv_use *use,
4490 struct iv_cand *cand, bool address_p, bitmap *inv_vars,
4491 bool *can_autoinc, iv_inv_expr_ent **inv_expr)
8b11a64c 4492{
c7da0e81 4493 gimple *at = use->stmt;
f9f69dd6
BC
4494 tree ubase = use->iv->base, cbase = cand->iv->base;
4495 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
4496 tree comp_inv = NULL_TREE;
8b11a64c 4497 HOST_WIDE_INT ratio, aratio;
6e8c65f6 4498 comp_cost cost;
807e902e 4499 widest_int rat;
f9f69dd6 4500 aff_tree aff_inv, aff_var;
f40751dd 4501 bool speed = optimize_bb_for_speed_p (gimple_bb (at));
8b11a64c 4502
0ca91c77
BC
4503 if (inv_vars)
4504 *inv_vars = NULL;
f9f69dd6
BC
4505 if (can_autoinc)
4506 *can_autoinc = false;
4507 if (inv_expr)
4508 *inv_expr = NULL;
8b11a64c 4509
f9f69dd6 4510 /* Check if we have enough precision to express the values of use. */
8b11a64c 4511 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
f9f69dd6 4512 return infinite_cost;
8b11a64c 4513
1d30a09a
RG
4514 if (address_p
4515 || (use->iv->base_object
4516 && cand->iv->base_object
4517 && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4518 && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
e6845c23
ZD
4519 {
4520 /* Do not try to express address of an object with computation based
4521 on address of a different object. This may cause problems in rtl
4522 level alias analysis (that does not expect this to be happening,
4523 as this is illegal in C), and would be unlikely to be useful
4524 anyway. */
4525 if (use->iv->base_object
4526 && cand->iv->base_object
4527 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
6e8c65f6 4528 return infinite_cost;
e6845c23
ZD
4529 }
4530
f9f69dd6
BC
4531 if (!get_computation_aff_1 (data->current_loop, at, use,
4532 cand, &aff_inv, &aff_var, &rat)
4533 || !wi::fits_shwi_p (rat))
6e8c65f6 4534 return infinite_cost;
8b11a64c 4535
f9f69dd6
BC
4536 ratio = rat.to_shwi ();
4537 if (address_p)
8b11a64c 4538 {
f9f69dd6
BC
4539 cost = get_address_cost (data, use, cand, &aff_inv, &aff_var, ratio,
4540 inv_vars, inv_expr, can_autoinc, speed);
4541 return get_scaled_computation_cost_at (data, at, cost);
18081149
XDL
4542 }
4543
f9f69dd6
BC
4544 bool simple_inv = (aff_combination_const_p (&aff_inv)
4545 || aff_combination_singleton_var_p (&aff_inv));
4546 tree signed_type = signed_type_for (aff_combination_type (&aff_inv));
4547 aff_combination_convert (&aff_inv, signed_type);
4548 if (!aff_combination_zero_p (&aff_inv))
4549 comp_inv = aff_combination_to_tree (&aff_inv);
309a0cf6 4550
f9f69dd6
BC
4551 cost = force_var_cost (data, comp_inv, inv_vars);
4552 if (comp_inv && inv_expr && !simple_inv)
18081149 4553 {
f9f69dd6 4554 *inv_expr = get_loop_invariant_expr (data, comp_inv);
18081149 4555 /* Clear depends on. */
f9f69dd6 4556 if (*inv_expr != NULL && inv_vars && *inv_vars)
0ca91c77 4557 bitmap_clear (*inv_vars);
8b11a64c 4558
f9f69dd6
BC
4559 cost.cost = adjust_setup_cost (data, cost.cost);
4560 /* Record setup cost in scratch field. */
4561 cost.scratch = cost.cost;
83e0b739 4562 }
f9f69dd6
BC
4563 /* Cost of constant integer can be covered when adding invariant part to
4564 variant part. */
4565 else if (comp_inv && CONSTANT_CLASS_P (comp_inv))
4566 cost = no_cost;
8b11a64c 4567
f9f69dd6
BC
4568 /* Need type narrowing to represent use with cand. */
4569 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
8b11a64c 4570 {
f9f69dd6
BC
4571 machine_mode outer_mode = TYPE_MODE (utype);
4572 machine_mode inner_mode = TYPE_MODE (ctype);
4573 cost += comp_cost (convert_cost (outer_mode, inner_mode, speed), 0);
8b11a64c
ZD
4574 }
4575
f9f69dd6
BC
4576 /* Turn a + i * (-c) into a - i * c. */
4577 if (ratio < 0 && comp_inv && !integer_zerop (comp_inv))
4578 aratio = -ratio;
4579 else
4580 aratio = ratio;
83e0b739 4581
f9f69dd6
BC
4582 if (ratio != 1)
4583 cost += mult_by_coeff_cost (aratio, TYPE_MODE (utype), speed);
8b11a64c 4584
f9f69dd6
BC
4585 /* TODO: We may also need to check if we can compute a + i * 4 in one
4586 instruction. */
4587 /* Need to add up the invariant and variant parts. */
4588 if (comp_inv && !integer_zerop (comp_inv))
4589 cost += add_cost (speed, TYPE_MODE (utype));
8b11a64c 4590
13fdeaaf 4591 return get_scaled_computation_cost_at (data, at, cost);
8b11a64c
ZD
4592}
4593
309a0cf6 4594/* Determines cost of computing the use in GROUP with CAND in a generic
8b11a64c
ZD
4595 expression. */
4596
b1b02be2 4597static bool
309a0cf6
BC
4598determine_group_iv_cost_generic (struct ivopts_data *data,
4599 struct iv_group *group, struct iv_cand *cand)
8b11a64c 4600{
6e8c65f6 4601 comp_cost cost;
623b8e0a 4602 iv_inv_expr_ent *inv_expr = NULL;
0ca91c77 4603 bitmap inv_vars = NULL, inv_exprs = NULL;
309a0cf6 4604 struct iv_use *use = group->vuses[0];
eec5fec9
ZD
4605
4606 /* The simple case first -- if we need to express value of the preserved
4607 original biv, the cost is 0. This also prevents us from counting the
4608 cost of increment twice -- once at this use and once in the cost of
4609 the candidate. */
309a0cf6
BC
4610 if (cand->pos == IP_ORIGINAL && cand->incremented_at == use->stmt)
4611 cost = no_cost;
4612 else
4613 cost = get_computation_cost (data, use, cand, false,
0ca91c77 4614 &inv_vars, NULL, &inv_expr);
b1b02be2 4615
0ca91c77
BC
4616 if (inv_expr)
4617 {
4618 inv_exprs = BITMAP_ALLOC (NULL);
4619 bitmap_set_bit (inv_exprs, inv_expr->id);
4620 }
4621 set_group_iv_cost (data, group, cand, cost, inv_vars,
4622 NULL_TREE, ERROR_MARK, inv_exprs);
8d18b6df 4623 return !cost.infinite_cost_p ();
8b11a64c
ZD
4624}
4625
309a0cf6 4626/* Determines cost of computing uses in GROUP with CAND in addresses. */
8b11a64c 4627
b1b02be2 4628static bool
309a0cf6
BC
4629determine_group_iv_cost_address (struct ivopts_data *data,
4630 struct iv_group *group, struct iv_cand *cand)
8b11a64c 4631{
309a0cf6 4632 unsigned i;
0ca91c77 4633 bitmap inv_vars = NULL, inv_exprs = NULL;
d3ee260a 4634 bool can_autoinc;
623b8e0a 4635 iv_inv_expr_ent *inv_expr = NULL;
309a0cf6
BC
4636 struct iv_use *use = group->vuses[0];
4637 comp_cost sum_cost = no_cost, cost;
8b11a64c 4638
309a0cf6 4639 cost = get_computation_cost (data, use, cand, true,
0ca91c77 4640 &inv_vars, &can_autoinc, &inv_expr);
309a0cf6 4641
0ca91c77
BC
4642 if (inv_expr)
4643 {
4644 inv_exprs = BITMAP_ALLOC (NULL);
4645 bitmap_set_bit (inv_exprs, inv_expr->id);
4646 }
309a0cf6 4647 sum_cost = cost;
8d18b6df 4648 if (!sum_cost.infinite_cost_p () && cand->ainc_use == use)
2c08497a
BS
4649 {
4650 if (can_autoinc)
8d18b6df 4651 sum_cost -= cand->cost_step;
2c08497a
BS
4652 /* If we generated the candidate solely for exploiting autoincrement
4653 opportunities, and it turns out it can't be used, set the cost to
4654 infinity to make sure we ignore it. */
4655 else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
309a0cf6
BC
4656 sum_cost = infinite_cost;
4657 }
4658
4659 /* Uses in a group can share setup code, so only add setup cost once. */
8d18b6df 4660 cost -= cost.scratch;
309a0cf6 4661 /* Compute and add costs for rest uses of this group. */
8d18b6df 4662 for (i = 1; i < group->vuses.length () && !sum_cost.infinite_cost_p (); i++)
309a0cf6
BC
4663 {
4664 struct iv_use *next = group->vuses[i];
4665
d3ee260a
BC
4666 /* TODO: We could skip computing cost for sub iv_use when it has the
4667 same cost as the first iv_use, but the cost really depends on the
4668 offset and where the iv_use is. */
4669 cost = get_computation_cost (data, next, cand, true,
f9f69dd6
BC
4670 NULL, &can_autoinc, &inv_expr);
4671 if (inv_expr)
4672 {
4673 if (!inv_exprs)
4674 inv_exprs = BITMAP_ALLOC (NULL);
4675
4676 bitmap_set_bit (inv_exprs, inv_expr->id);
4677 }
8d18b6df 4678 sum_cost += cost;
a7e43c57 4679 }
0ca91c77
BC
4680 set_group_iv_cost (data, group, cand, sum_cost, inv_vars,
4681 NULL_TREE, ERROR_MARK, inv_exprs);
a7e43c57 4682
8d18b6df 4683 return !sum_cost.infinite_cost_p ();
8b11a64c
ZD
4684}
4685
7e2ac86c
ZD
4686/* Computes value of candidate CAND at position AT in iteration NITER, and
4687 stores it to VAL. */
8b11a64c 4688
7e2ac86c 4689static void
355fe088 4690cand_value_at (struct loop *loop, struct iv_cand *cand, gimple *at, tree niter,
7e2ac86c 4691 aff_tree *val)
8b11a64c 4692{
7e2ac86c
ZD
4693 aff_tree step, delta, nit;
4694 struct iv *iv = cand->iv;
8b11a64c 4695 tree type = TREE_TYPE (iv->base);
bad9b288 4696 tree steptype;
1ffe34d9
AP
4697 if (POINTER_TYPE_P (type))
4698 steptype = sizetype;
bad9b288
JJ
4699 else
4700 steptype = unsigned_type_for (type);
8b11a64c 4701
d6adff07
RB
4702 tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step);
4703 aff_combination_convert (&step, steptype);
7e2ac86c 4704 tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
1ffe34d9 4705 aff_combination_convert (&nit, steptype);
7e2ac86c 4706 aff_combination_mult (&nit, &step, &delta);
8b11a64c 4707 if (stmt_after_increment (loop, cand, at))
7e2ac86c 4708 aff_combination_add (&delta, &step);
8b11a64c 4709
7e2ac86c 4710 tree_to_aff_combination (iv->base, type, val);
d6adff07
RB
4711 if (!POINTER_TYPE_P (type))
4712 aff_combination_convert (val, steptype);
7e2ac86c 4713 aff_combination_add (val, &delta);
8b11a64c
ZD
4714}
4715
ca4c3169
ZD
4716/* Returns period of induction variable iv. */
4717
4718static tree
4719iv_period (struct iv *iv)
4720{
4721 tree step = iv->step, period, type;
4722 tree pow2div;
4723
4724 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4725
ca4c3169 4726 type = unsigned_type_for (TREE_TYPE (step));
e2102efc
XDL
4727 /* Period of the iv is lcm (step, type_range)/step -1,
4728 i.e., N*type_range/step - 1. Since type range is power
4729 of two, N == (step >> num_of_ending_zeros_binary (step),
4730 so the final result is
4731
4732 (type_range >> num_of_ending_zeros_binary (step)) - 1
4733
4734 */
4735 pow2div = num_ending_zeros (step);
ca4c3169
ZD
4736
4737 period = build_low_bits_mask (type,
623b8e0a
ML
4738 (TYPE_PRECISION (type)
4739 - tree_to_uhwi (pow2div)));
ca4c3169
ZD
4740
4741 return period;
4742}
4743
f5f12961
ZD
4744/* Returns the comparison operator used when eliminating the iv USE. */
4745
4746static enum tree_code
4747iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4748{
4749 struct loop *loop = data->current_loop;
4750 basic_block ex_bb;
4751 edge exit;
4752
726a989a 4753 ex_bb = gimple_bb (use->stmt);
f5f12961
ZD
4754 exit = EDGE_SUCC (ex_bb, 0);
4755 if (flow_bb_inside_loop_p (loop, exit->dest))
4756 exit = EDGE_SUCC (ex_bb, 1);
4757
4758 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4759}
4760
d8af4ba3
ZD
4761/* Returns true if we can prove that BASE - OFFSET does not overflow. For now,
4762 we only detect the situation that BASE = SOMETHING + OFFSET, where the
4763 calculation is performed in non-wrapping type.
4764
4765 TODO: More generally, we could test for the situation that
4766 BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
3230c614 4767 This would require knowing the sign of OFFSET. */
d8af4ba3
ZD
4768
4769static bool
3230c614 4770difference_cannot_overflow_p (struct ivopts_data *data, tree base, tree offset)
d8af4ba3
ZD
4771{
4772 enum tree_code code;
4773 tree e1, e2;
3230c614 4774 aff_tree aff_e1, aff_e2, aff_offset;
d8af4ba3
ZD
4775
4776 if (!nowrap_type_p (TREE_TYPE (base)))
4777 return false;
4778
4779 base = expand_simple_operations (base);
4780
4781 if (TREE_CODE (base) == SSA_NAME)
4782 {
355fe088 4783 gimple *stmt = SSA_NAME_DEF_STMT (base);
d8af4ba3
ZD
4784
4785 if (gimple_code (stmt) != GIMPLE_ASSIGN)
4786 return false;
4787
4788 code = gimple_assign_rhs_code (stmt);
4789 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4790 return false;
4791
4792 e1 = gimple_assign_rhs1 (stmt);
4793 e2 = gimple_assign_rhs2 (stmt);
4794 }
4795 else
4796 {
4797 code = TREE_CODE (base);
4798 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4799 return false;
4800 e1 = TREE_OPERAND (base, 0);
4801 e2 = TREE_OPERAND (base, 1);
4802 }
4803
3230c614
BC
4804 /* Use affine expansion as deeper inspection to prove the equality. */
4805 tree_to_aff_combination_expand (e2, TREE_TYPE (e2),
4806 &aff_e2, &data->name_expansion_cache);
4807 tree_to_aff_combination_expand (offset, TREE_TYPE (offset),
4808 &aff_offset, &data->name_expansion_cache);
4809 aff_combination_scale (&aff_offset, -1);
d8af4ba3
ZD
4810 switch (code)
4811 {
4812 case PLUS_EXPR:
3230c614
BC
4813 aff_combination_add (&aff_e2, &aff_offset);
4814 if (aff_combination_zero_p (&aff_e2))
4815 return true;
4816
4817 tree_to_aff_combination_expand (e1, TREE_TYPE (e1),
4818 &aff_e1, &data->name_expansion_cache);
4819 aff_combination_add (&aff_e1, &aff_offset);
4820 return aff_combination_zero_p (&aff_e1);
4821
d8af4ba3 4822 case POINTER_PLUS_EXPR:
3230c614
BC
4823 aff_combination_add (&aff_e2, &aff_offset);
4824 return aff_combination_zero_p (&aff_e2);
d8af4ba3
ZD
4825
4826 default:
4827 return false;
4828 }
4829}
4830
4831/* Tries to replace loop exit by one formulated in terms of a LT_EXPR
4832 comparison with CAND. NITER describes the number of iterations of
4833 the loops. If successful, the comparison in COMP_P is altered accordingly.
4834
4835 We aim to handle the following situation:
4836
4837 sometype *base, *p;
4838 int a, b, i;
4839
4840 i = a;
4841 p = p_0 = base + a;
4842
4843 do
4844 {
4845 bla (*p);
4846 p++;
4847 i++;
4848 }
4849 while (i < b);
4850
4851 Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
4852 We aim to optimize this to
4853
4854 p = p_0 = base + a;
4855 do
4856 {
4857 bla (*p);
4858 p++;
4859 }
4860 while (p < p_0 - a + b);
4861
4862 This preserves the correctness, since the pointer arithmetics does not
4863 overflow. More precisely:
4864
4865 1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
4866 overflow in computing it or the values of p.
4867 2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
4868 overflow. To prove this, we use the fact that p_0 = base + a. */
4869
4870static bool
4871iv_elimination_compare_lt (struct ivopts_data *data,
623b8e0a 4872 struct iv_cand *cand, enum tree_code *comp_p,
d8af4ba3
ZD
4873 struct tree_niter_desc *niter)
4874{
4875 tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
84562394 4876 struct aff_tree nit, tmpa, tmpb;
d8af4ba3
ZD
4877 enum tree_code comp;
4878 HOST_WIDE_INT step;
4879
4880 /* We need to know that the candidate induction variable does not overflow.
4881 While more complex analysis may be used to prove this, for now just
4882 check that the variable appears in the original program and that it
4883 is computed in a type that guarantees no overflows. */
4884 cand_type = TREE_TYPE (cand->iv->base);
4885 if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
4886 return false;
4887
4888 /* Make sure that the loop iterates till the loop bound is hit, as otherwise
4889 the calculation of the BOUND could overflow, making the comparison
4890 invalid. */
4891 if (!data->loop_single_exit_p)
4892 return false;
4893
4894 /* We need to be able to decide whether candidate is increasing or decreasing
4895 in order to choose the right comparison operator. */
4896 if (!cst_and_fits_in_hwi (cand->iv->step))
4897 return false;
4898 step = int_cst_value (cand->iv->step);
4899
4900 /* Check that the number of iterations matches the expected pattern:
4901 a + 1 > b ? 0 : b - a - 1. */
4902 mbz = niter->may_be_zero;
4903 if (TREE_CODE (mbz) == GT_EXPR)
4904 {
4905 /* Handle a + 1 > b. */
4906 tree op0 = TREE_OPERAND (mbz, 0);
4907 if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
4908 {
4909 a = TREE_OPERAND (op0, 0);
4910 b = TREE_OPERAND (mbz, 1);
4911 }
4912 else
4913 return false;
4914 }
4915 else if (TREE_CODE (mbz) == LT_EXPR)
4916 {
4917 tree op1 = TREE_OPERAND (mbz, 1);
4918
4919 /* Handle b < a + 1. */
4920 if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
623b8e0a
ML
4921 {
4922 a = TREE_OPERAND (op1, 0);
4923 b = TREE_OPERAND (mbz, 0);
4924 }
d8af4ba3
ZD
4925 else
4926 return false;
4927 }
4928 else
4929 return false;
4930
4931 /* Expected number of iterations is B - A - 1. Check that it matches
4932 the actual number, i.e., that B - A - NITER = 1. */
4933 tree_to_aff_combination (niter->niter, nit_type, &nit);
4934 tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
4935 tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
807e902e
KZ
4936 aff_combination_scale (&nit, -1);
4937 aff_combination_scale (&tmpa, -1);
d8af4ba3
ZD
4938 aff_combination_add (&tmpb, &tmpa);
4939 aff_combination_add (&tmpb, &nit);
807e902e 4940 if (tmpb.n != 0 || tmpb.offset != 1)
d8af4ba3
ZD
4941 return false;
4942
4943 /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
4944 overflow. */
4945 offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
4946 cand->iv->step,
4947 fold_convert (TREE_TYPE (cand->iv->step), a));
3230c614 4948 if (!difference_cannot_overflow_p (data, cand->iv->base, offset))
d8af4ba3
ZD
4949 return false;
4950
4951 /* Determine the new comparison operator. */
4952 comp = step < 0 ? GT_EXPR : LT_EXPR;
4953 if (*comp_p == NE_EXPR)
4954 *comp_p = comp;
4955 else if (*comp_p == EQ_EXPR)
4956 *comp_p = invert_tree_comparison (comp, false);
4957 else
4958 gcc_unreachable ();
4959
4960 return true;
4961}
4962
8b11a64c 4963/* Check whether it is possible to express the condition in USE by comparison
d8af4ba3
ZD
4964 of candidate CAND. If so, store the value compared with to BOUND, and the
4965 comparison operator to COMP. */
8b11a64c
ZD
4966
4967static bool
ca4c3169 4968may_eliminate_iv (struct ivopts_data *data,
d8af4ba3
ZD
4969 struct iv_use *use, struct iv_cand *cand, tree *bound,
4970 enum tree_code *comp)
8b11a64c 4971{
e6845c23 4972 basic_block ex_bb;
8b11a64c 4973 edge exit;
d8af4ba3 4974 tree period;
ca4c3169 4975 struct loop *loop = data->current_loop;
7e2ac86c 4976 aff_tree bnd;
e2102efc 4977 struct tree_niter_desc *desc = NULL;
cbc012d5 4978
9be872b7
ZD
4979 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
4980 return false;
4981
52778e2a
EB
4982 /* For now works only for exits that dominate the loop latch.
4983 TODO: extend to other conditions inside loop body. */
726a989a 4984 ex_bb = gimple_bb (use->stmt);
e6845c23 4985 if (use->stmt != last_stmt (ex_bb)
726a989a
RB
4986 || gimple_code (use->stmt) != GIMPLE_COND
4987 || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
8b11a64c
ZD
4988 return false;
4989
e6845c23
ZD
4990 exit = EDGE_SUCC (ex_bb, 0);
4991 if (flow_bb_inside_loop_p (loop, exit->dest))
4992 exit = EDGE_SUCC (ex_bb, 1);
4993 if (flow_bb_inside_loop_p (loop, exit->dest))
4994 return false;
4995
d8af4ba3
ZD
4996 desc = niter_for_exit (data, exit);
4997 if (!desc)
8b11a64c
ZD
4998 return false;
4999
52778e2a
EB
5000 /* Determine whether we can use the variable to test the exit condition.
5001 This is the case iff the period of the induction variable is greater
5002 than the number of iterations for which the exit condition is true. */
ca4c3169 5003 period = iv_period (cand->iv);
8b11a64c 5004
52778e2a 5005 /* If the number of iterations is constant, compare against it directly. */
d8af4ba3 5006 if (TREE_CODE (desc->niter) == INTEGER_CST)
52778e2a 5007 {
e2102efc
XDL
5008 /* See cand_value_at. */
5009 if (stmt_after_increment (loop, cand, use->stmt))
623b8e0a
ML
5010 {
5011 if (!tree_int_cst_lt (desc->niter, period))
5012 return false;
5013 }
e2102efc 5014 else
623b8e0a
ML
5015 {
5016 if (tree_int_cst_lt (period, desc->niter))
5017 return false;
5018 }
52778e2a
EB
5019 }
5020
5021 /* If not, and if this is the only possible exit of the loop, see whether
5022 we can get a conservative estimate on the number of iterations of the
5023 entire loop and compare against that instead. */
e2102efc 5024 else
52778e2a 5025 {
807e902e 5026 widest_int period_value, max_niter;
e2102efc
XDL
5027
5028 max_niter = desc->max;
5029 if (stmt_after_increment (loop, cand, use->stmt))
623b8e0a 5030 max_niter += 1;
807e902e
KZ
5031 period_value = wi::to_widest (period);
5032 if (wi::gtu_p (max_niter, period_value))
623b8e0a
ML
5033 {
5034 /* See if we can take advantage of inferred loop bound
5035 information. */
5036 if (data->loop_single_exit_p)
5037 {
5038 if (!max_loop_iterations (loop, &max_niter))
5039 return false;
5040 /* The loop bound is already adjusted by adding 1. */
5041 if (wi::gtu_p (max_niter, period_value))
5042 return false;
5043 }
5044 else
5045 return false;
5046 }
52778e2a
EB
5047 }
5048
d8af4ba3 5049 cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
771f882e 5050
d6adff07
RB
5051 *bound = fold_convert (TREE_TYPE (cand->iv->base),
5052 aff_combination_to_tree (&bnd));
d8af4ba3
ZD
5053 *comp = iv_elimination_compare (data, use);
5054
771f882e
ZD
5055 /* It is unlikely that computing the number of iterations using division
5056 would be more profitable than keeping the original induction variable. */
5057 if (expression_expensive_p (*bound))
5058 return false;
d8af4ba3
ZD
5059
5060 /* Sometimes, it is possible to handle the situation that the number of
5764ee3c 5061 iterations may be zero unless additional assumptions by using <
d8af4ba3
ZD
5062 instead of != in the exit condition.
5063
5064 TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
5065 base the exit condition on it. However, that is often too
5066 expensive. */
5067 if (!integer_zerop (desc->may_be_zero))
5068 return iv_elimination_compare_lt (data, cand, comp, desc);
5069
8b11a64c
ZD
5070 return true;
5071}
5072
bb8d292d 5073 /* Calculates the cost of BOUND, if it is a PARM_DECL. A PARM_DECL must
026c3cfd 5074 be copied, if it is used in the loop body and DATA->body_includes_call. */
bb8d292d
TV
5075
5076static int
5077parm_decl_cost (struct ivopts_data *data, tree bound)
5078{
5079 tree sbound = bound;
5080 STRIP_NOPS (sbound);
5081
5082 if (TREE_CODE (sbound) == SSA_NAME
67386041 5083 && SSA_NAME_IS_DEFAULT_DEF (sbound)
bb8d292d 5084 && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
bb8d292d
TV
5085 && data->body_includes_call)
5086 return COSTS_N_INSNS (1);
5087
5088 return 0;
5089}
e2102efc 5090
309a0cf6 5091/* Determines cost of computing the use in GROUP with CAND in a condition. */
8b11a64c 5092
b1b02be2 5093static bool
309a0cf6
BC
5094determine_group_iv_cost_cond (struct ivopts_data *data,
5095 struct iv_group *group, struct iv_cand *cand)
8b11a64c 5096{
b697aed4
ZD
5097 tree bound = NULL_TREE;
5098 struct iv *cmp_iv;
0ca91c77
BC
5099 bitmap inv_exprs = NULL;
5100 bitmap inv_vars_elim = NULL, inv_vars_express = NULL, inv_vars;
b6a2258f
BC
5101 comp_cost elim_cost = infinite_cost, express_cost, cost, bound_cost;
5102 enum comp_iv_rewrite rewrite_type;
0ca91c77 5103 iv_inv_expr_ent *inv_expr_elim = NULL, *inv_expr_express = NULL, *inv_expr;
04eadb13 5104 tree *control_var, *bound_cst;
76725a03 5105 enum tree_code comp = ERROR_MARK;
309a0cf6 5106 struct iv_use *use = group->vuses[0];
8b11a64c 5107
b6a2258f
BC
5108 /* Extract condition operands. */
5109 rewrite_type = extract_cond_operands (data, use->stmt, &control_var,
5110 &bound_cst, NULL, &cmp_iv);
5111 gcc_assert (rewrite_type != COMP_IV_NA);
5112
b697aed4 5113 /* Try iv elimination. */
b6a2258f
BC
5114 if (rewrite_type == COMP_IV_ELIM
5115 && may_eliminate_iv (data, use, cand, &bound, &comp))
cbc012d5 5116 {
0ca91c77 5117 elim_cost = force_var_cost (data, bound, &inv_vars_elim);
bb8d292d 5118 if (elim_cost.cost == 0)
623b8e0a 5119 elim_cost.cost = parm_decl_cost (data, bound);
bb8d292d 5120 else if (TREE_CODE (bound) == INTEGER_CST)
623b8e0a 5121 elim_cost.cost = 0;
bb8d292d 5122 /* If we replace a loop condition 'i < n' with 'p < base + n',
0ca91c77
BC
5123 inv_vars_elim will have 'base' and 'n' set, which implies that both
5124 'base' and 'n' will be live during the loop. More likely,
bb8d292d 5125 'base + n' will be loop invariant, resulting in only one live value
0ca91c77
BC
5126 during the loop. So in that case we clear inv_vars_elim and set
5127 inv_expr_elim instead. */
5128 if (inv_vars_elim && bitmap_count_bits (inv_vars_elim) > 1)
bb8d292d 5129 {
f9f69dd6 5130 inv_expr_elim = get_loop_invariant_expr (data, bound);
0ca91c77 5131 bitmap_clear (inv_vars_elim);
bb8d292d 5132 }
cbc012d5
ZD
5133 /* The bound is a loop invariant, so it will be only computed
5134 once. */
6521ac85 5135 elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
cbc012d5 5136 }
8b11a64c 5137
04eadb13
SP
5138 /* When the condition is a comparison of the candidate IV against
5139 zero, prefer this IV.
5140
073a8998 5141 TODO: The constant that we're subtracting from the cost should
04eadb13
SP
5142 be target-dependent. This information should be added to the
5143 target costs for each backend. */
8d18b6df 5144 if (!elim_cost.infinite_cost_p () /* Do not try to decrease infinite! */
572ae476 5145 && integer_zerop (*bound_cst)
04eadb13
SP
5146 && (operand_equal_p (*control_var, cand->var_after, 0)
5147 || operand_equal_p (*control_var, cand->var_before, 0)))
8d18b6df 5148 elim_cost -= 1;
04eadb13 5149
b697aed4 5150 express_cost = get_computation_cost (data, use, cand, false,
0ca91c77
BC
5151 &inv_vars_express, NULL,
5152 &inv_expr_express);
1c52c69f
BC
5153 if (cmp_iv != NULL)
5154 find_inv_vars (data, &cmp_iv->base, &inv_vars_express);
f5f12961 5155
bb8d292d
TV
5156 /* Count the cost of the original bound as well. */
5157 bound_cost = force_var_cost (data, *bound_cst, NULL);
5158 if (bound_cost.cost == 0)
5159 bound_cost.cost = parm_decl_cost (data, *bound_cst);
5160 else if (TREE_CODE (*bound_cst) == INTEGER_CST)
5161 bound_cost.cost = 0;
8d18b6df 5162 express_cost += bound_cost;
bb8d292d 5163
b9ff6079 5164 /* Choose the better approach, preferring the eliminated IV. */
8d18b6df 5165 if (elim_cost <= express_cost)
8b11a64c 5166 {
b697aed4 5167 cost = elim_cost;
0ca91c77
BC
5168 inv_vars = inv_vars_elim;
5169 inv_vars_elim = NULL;
5170 inv_expr = inv_expr_elim;
b697aed4
ZD
5171 }
5172 else
5173 {
5174 cost = express_cost;
0ca91c77
BC
5175 inv_vars = inv_vars_express;
5176 inv_vars_express = NULL;
b697aed4 5177 bound = NULL_TREE;
d8af4ba3 5178 comp = ERROR_MARK;
0ca91c77 5179 inv_expr = inv_expr_express;
8b11a64c
ZD
5180 }
5181
0ca91c77
BC
5182 if (inv_expr)
5183 {
5184 inv_exprs = BITMAP_ALLOC (NULL);
5185 bitmap_set_bit (inv_exprs, inv_expr->id);
5186 }
309a0cf6 5187 set_group_iv_cost (data, group, cand, cost,
0ca91c77 5188 inv_vars, bound, comp, inv_exprs);
b697aed4 5189
0ca91c77
BC
5190 if (inv_vars_elim)
5191 BITMAP_FREE (inv_vars_elim);
5192 if (inv_vars_express)
5193 BITMAP_FREE (inv_vars_express);
b697aed4 5194
8d18b6df 5195 return !cost.infinite_cost_p ();
8b11a64c
ZD
5196}
5197
309a0cf6
BC
5198/* Determines cost of computing uses in GROUP with CAND. Returns false
5199 if USE cannot be represented with CAND. */
8b11a64c 5200
b1b02be2 5201static bool
309a0cf6
BC
5202determine_group_iv_cost (struct ivopts_data *data,
5203 struct iv_group *group, struct iv_cand *cand)
8b11a64c 5204{
309a0cf6 5205 switch (group->type)
8b11a64c
ZD
5206 {
5207 case USE_NONLINEAR_EXPR:
309a0cf6 5208 return determine_group_iv_cost_generic (data, group, cand);
8b11a64c 5209
8b11a64c 5210 case USE_ADDRESS:
309a0cf6 5211 return determine_group_iv_cost_address (data, group, cand);
8b11a64c
ZD
5212
5213 case USE_COMPARE:
309a0cf6 5214 return determine_group_iv_cost_cond (data, group, cand);
8b11a64c
ZD
5215
5216 default:
1e128c5f 5217 gcc_unreachable ();
8b11a64c
ZD
5218 }
5219}
5220
2c08497a
BS
5221/* Return true if get_computation_cost indicates that autoincrement is
5222 a possibility for the pair of USE and CAND, false otherwise. */
5223
5224static bool
5225autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
5226 struct iv_cand *cand)
5227{
2c08497a
BS
5228 if (use->type != USE_ADDRESS)
5229 return false;
5230
37708714
BC
5231 bool can_autoinc = false;
5232 get_computation_cost (data, use, cand, true, NULL, &can_autoinc, NULL);
5233 return can_autoinc;
2c08497a
BS
5234}
5235
5236/* Examine IP_ORIGINAL candidates to see if they are incremented next to a
5237 use that allows autoincrement, and set their AINC_USE if possible. */
5238
5239static void
5240set_autoinc_for_original_candidates (struct ivopts_data *data)
5241{
5242 unsigned i, j;
5243
309a0cf6 5244 for (i = 0; i < data->vcands.length (); i++)
2c08497a 5245 {
309a0cf6 5246 struct iv_cand *cand = data->vcands[i];
85ff4ec6
BC
5247 struct iv_use *closest_before = NULL;
5248 struct iv_use *closest_after = NULL;
2c08497a
BS
5249 if (cand->pos != IP_ORIGINAL)
5250 continue;
85ff4ec6 5251
309a0cf6 5252 for (j = 0; j < data->vgroups.length (); j++)
2c08497a 5253 {
309a0cf6
BC
5254 struct iv_group *group = data->vgroups[j];
5255 struct iv_use *use = group->vuses[0];
2c08497a 5256 unsigned uid = gimple_uid (use->stmt);
85ff4ec6
BC
5257
5258 if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
2c08497a 5259 continue;
85ff4ec6
BC
5260
5261 if (uid < gimple_uid (cand->incremented_at)
5262 && (closest_before == NULL
5263 || uid > gimple_uid (closest_before->stmt)))
5264 closest_before = use;
5265
5266 if (uid > gimple_uid (cand->incremented_at)
5267 && (closest_after == NULL
5268 || uid < gimple_uid (closest_after->stmt)))
5269 closest_after = use;
2c08497a 5270 }
85ff4ec6
BC
5271
5272 if (closest_before != NULL
5273 && autoinc_possible_for_pair (data, closest_before, cand))
5274 cand->ainc_use = closest_before;
5275 else if (closest_after != NULL
5276 && autoinc_possible_for_pair (data, closest_after, cand))
5277 cand->ainc_use = closest_after;
2c08497a
BS
5278 }
5279}
5280
06fd3160
BC
5281/* Relate compare use with all candidates. */
5282
5283static void
5284relate_compare_use_with_all_cands (struct ivopts_data *data)
5285{
7d27b70b 5286 unsigned i, count = data->vcands.length ();
06fd3160
BC
5287 for (i = 0; i < data->vgroups.length (); i++)
5288 {
5289 struct iv_group *group = data->vgroups[i];
5290
5291 if (group->type == USE_COMPARE)
7d27b70b 5292 bitmap_set_range (group->related_cands, 0, count);
06fd3160
BC
5293 }
5294}
5295
2c08497a
BS
5296/* Finds the candidates for the induction variables. */
5297
5298static void
5299find_iv_candidates (struct ivopts_data *data)
5300{
5301 /* Add commonly used ivs. */
5302 add_standard_iv_candidates (data);
5303
5304 /* Add old induction variables. */
4c3b378b 5305 add_iv_candidate_for_bivs (data);
2c08497a
BS
5306
5307 /* Add induction variables derived from uses. */
309a0cf6 5308 add_iv_candidate_for_groups (data);
2c08497a
BS
5309
5310 set_autoinc_for_original_candidates (data);
5311
5312 /* Record the important candidates. */
5313 record_important_candidates (data);
309a0cf6 5314
06fd3160
BC
5315 /* Relate compare iv_use with all candidates. */
5316 if (!data->consider_all_candidates)
5317 relate_compare_use_with_all_cands (data);
5318
309a0cf6
BC
5319 if (dump_file && (dump_flags & TDF_DETAILS))
5320 {
5321 unsigned i;
5322
5323 fprintf (dump_file, "\n<Important Candidates>:\t");
5324 for (i = 0; i < data->vcands.length (); i++)
5325 if (data->vcands[i]->important)
5326 fprintf (dump_file, " %d,", data->vcands[i]->id);
5327 fprintf (dump_file, "\n");
5328
5329 fprintf (dump_file, "\n<Group, Cand> Related:\n");
5330 for (i = 0; i < data->vgroups.length (); i++)
5331 {
5332 struct iv_group *group = data->vgroups[i];
5333
5334 if (group->related_cands)
5335 {
5336 fprintf (dump_file, " Group %d:\t", group->id);
5337 dump_bitmap (dump_file, group->related_cands);
5338 }
5339 }
5340 fprintf (dump_file, "\n");
5341 }
2c08497a
BS
5342}
5343
309a0cf6 5344/* Determines costs of computing use of iv with an iv candidate. */
8b11a64c
ZD
5345
5346static void
309a0cf6 5347determine_group_iv_costs (struct ivopts_data *data)
8b11a64c
ZD
5348{
5349 unsigned i, j;
8b11a64c 5350 struct iv_cand *cand;
309a0cf6 5351 struct iv_group *group;
8bdbfff5 5352 bitmap to_clear = BITMAP_ALLOC (NULL);
8b11a64c
ZD
5353
5354 alloc_use_cost_map (data);
5355
309a0cf6 5356 for (i = 0; i < data->vgroups.length (); i++)
8b11a64c 5357 {
309a0cf6 5358 group = data->vgroups[i];
8b11a64c
ZD
5359
5360 if (data->consider_all_candidates)
5361 {
309a0cf6 5362 for (j = 0; j < data->vcands.length (); j++)
8b11a64c 5363 {
309a0cf6
BC
5364 cand = data->vcands[j];
5365 determine_group_iv_cost (data, group, cand);
8b11a64c
ZD
5366 }
5367 }
5368 else
5369 {
87c476a2
ZD
5370 bitmap_iterator bi;
5371
309a0cf6 5372 EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, j, bi)
8b11a64c 5373 {
309a0cf6
BC
5374 cand = data->vcands[j];
5375 if (!determine_group_iv_cost (data, group, cand))
b1b02be2 5376 bitmap_set_bit (to_clear, j);
87c476a2 5377 }
b1b02be2
ZD
5378
5379 /* Remove the candidates for that the cost is infinite from
5380 the list of related candidates. */
309a0cf6 5381 bitmap_and_compl_into (group->related_cands, to_clear);
b1b02be2 5382 bitmap_clear (to_clear);
8b11a64c
ZD
5383 }
5384 }
5385
8bdbfff5 5386 BITMAP_FREE (to_clear);
b1b02be2 5387
8b11a64c
ZD
5388 if (dump_file && (dump_flags & TDF_DETAILS))
5389 {
e97cac02
BC
5390 bitmap_iterator bi;
5391
5392 /* Dump invariant variables. */
5393 fprintf (dump_file, "\n<Invariant Vars>:\n");
5394 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5395 {
5396 struct version_info *info = ver_info (data, i);
5397 if (info->inv_id)
5398 {
5399 fprintf (dump_file, "Inv %d:\t", info->inv_id);
5400 print_generic_expr (dump_file, info->name, TDF_SLIM);
5401 fprintf (dump_file, "%s\n",
5402 info->has_nonlin_use ? "" : "\t(eliminable)");
5403 }
5404 }
5405
5406 /* Dump invariant expressions. */
623b8e0a
ML
5407 fprintf (dump_file, "\n<Invariant Expressions>:\n");
5408 auto_vec <iv_inv_expr_ent *> list (data->inv_expr_tab->elements ());
5409
5410 for (hash_table<iv_inv_expr_hasher>::iterator it
5411 = data->inv_expr_tab->begin (); it != data->inv_expr_tab->end ();
5412 ++it)
5413 list.safe_push (*it);
5414
5415 list.qsort (sort_iv_inv_expr_ent);
5416
5417 for (i = 0; i < list.length (); ++i)
5418 {
0ca91c77 5419 fprintf (dump_file, "inv_expr %d: \t", list[i]->id);
623b8e0a
ML
5420 print_generic_expr (dump_file, list[i]->expr, TDF_SLIM);
5421 fprintf (dump_file, "\n");
5422 }
5423
5424 fprintf (dump_file, "\n<Group-candidate Costs>:\n");
8b11a64c 5425
309a0cf6 5426 for (i = 0; i < data->vgroups.length (); i++)
8b11a64c 5427 {
309a0cf6 5428 group = data->vgroups[i];
8b11a64c 5429
309a0cf6 5430 fprintf (dump_file, "Group %d:\n", i);
0ca91c77 5431 fprintf (dump_file, " cand\tcost\tcompl.\tinv.expr.\tinv.vars\n");
309a0cf6 5432 for (j = 0; j < group->n_map_members; j++)
8b11a64c 5433 {
309a0cf6 5434 if (!group->cost_map[j].cand
8d18b6df 5435 || group->cost_map[j].cost.infinite_cost_p ())
8b11a64c
ZD
5436 continue;
5437
6e8c65f6 5438 fprintf (dump_file, " %d\t%d\t%d\t",
309a0cf6
BC
5439 group->cost_map[j].cand->id,
5440 group->cost_map[j].cost.cost,
5441 group->cost_map[j].cost.complexity);
0ca91c77
BC
5442 if (!group->cost_map[j].inv_exprs
5443 || bitmap_empty_p (group->cost_map[j].inv_exprs))
5444 fprintf (dump_file, "NIL;\t");
623b8e0a 5445 else
8b11a64c 5446 bitmap_print (dump_file,
0ca91c77
BC
5447 group->cost_map[j].inv_exprs, "", ";\t");
5448 if (!group->cost_map[j].inv_vars
5449 || bitmap_empty_p (group->cost_map[j].inv_vars))
5450 fprintf (dump_file, "NIL;\n");
5451 else
5452 bitmap_print (dump_file,
5453 group->cost_map[j].inv_vars, "", "\n");
8b11a64c
ZD
5454 }
5455
5456 fprintf (dump_file, "\n");
5457 }
5458 fprintf (dump_file, "\n");
5459 }
5460}
5461
5462/* Determines cost of the candidate CAND. */
5463
5464static void
5465determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5466{
6e8c65f6
ZD
5467 comp_cost cost_base;
5468 unsigned cost, cost_step;
4366cf6d 5469 tree base;
8b11a64c 5470
c1662028 5471 gcc_assert (cand->iv != NULL);
8b11a64c
ZD
5472
5473 /* There are two costs associated with the candidate -- its increment
5474 and its initialization. The second is almost negligible for any loop
5475 that rolls enough, so we take it just very little into account. */
5476
5477 base = cand->iv->base;
5478 cost_base = force_var_cost (data, base, NULL);
a53c5024
TV
5479 /* It will be exceptional that the iv register happens to be initialized with
5480 the proper value at no cost. In general, there will at least be a regcopy
5481 or a const set. */
5482 if (cost_base.cost == 0)
5483 cost_base.cost = COSTS_N_INSNS (1);
5322d07e 5484 cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
8b11a64c 5485
6521ac85 5486 cost = cost_step + adjust_setup_cost (data, cost_base.cost);
8b11a64c 5487
6e8c65f6 5488 /* Prefer the original ivs unless we may gain something by replacing it.
fa10beec 5489 The reason is to make debugging simpler; so this is not relevant for
6e8c65f6
ZD
5490 artificial ivs created by other optimization passes. */
5491 if (cand->pos != IP_ORIGINAL
70b5e7dc 5492 || !SSA_NAME_VAR (cand->var_before)
6e8c65f6
ZD
5493 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
5494 cost++;
b8698a0f 5495
8b11a64c
ZD
5496 /* Prefer not to insert statements into latch unless there are some
5497 already (so that we do not create unnecessary jumps). */
4366cf6d
ZD
5498 if (cand->pos == IP_END
5499 && empty_block_p (ip_end_pos (data->current_loop)))
6e8c65f6
ZD
5500 cost++;
5501
5502 cand->cost = cost;
2c08497a 5503 cand->cost_step = cost_step;
8b11a64c
ZD
5504}
5505
5506/* Determines costs of computation of the candidates. */
5507
5508static void
5509determine_iv_costs (struct ivopts_data *data)
5510{
5511 unsigned i;
5512
5513 if (dump_file && (dump_flags & TDF_DETAILS))
5514 {
309a0cf6 5515 fprintf (dump_file, "<Candidate Costs>:\n");
8b11a64c
ZD
5516 fprintf (dump_file, " cand\tcost\n");
5517 }
5518
309a0cf6 5519 for (i = 0; i < data->vcands.length (); i++)
8b11a64c 5520 {
309a0cf6 5521 struct iv_cand *cand = data->vcands[i];
8b11a64c
ZD
5522
5523 determine_iv_cost (data, cand);
5524
5525 if (dump_file && (dump_flags & TDF_DETAILS))
5526 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
5527 }
2c08497a 5528
0f14009a
BS
5529 if (dump_file && (dump_flags & TDF_DETAILS))
5530 fprintf (dump_file, "\n");
8b11a64c
ZD
5531}
5532
c18101f5
BC
5533/* Estimate register pressure for loop having N_INVS invariants and N_CANDS
5534 induction variables. Note N_INVS includes both invariant variables and
5535 invariant expressions. */
8b11a64c
ZD
5536
5537static unsigned
c18101f5
BC
5538ivopts_estimate_reg_pressure (struct ivopts_data *data, unsigned n_invs,
5539 unsigned n_cands)
8b11a64c 5540{
c18101f5
BC
5541 unsigned cost;
5542 unsigned n_old = data->regs_used, n_new = n_invs + n_cands;
5543 unsigned regs_needed = n_new + n_old, available_regs = target_avail_regs;
5544 bool speed = data->speed;
5545
5546 /* If there is a call in the loop body, the call-clobbered registers
5547 are not available for loop invariants. */
5548 if (data->body_includes_call)
5549 available_regs = available_regs - target_clobbered_regs;
5550
5551 /* If we have enough registers. */
5552 if (regs_needed + target_res_regs < available_regs)
5553 cost = n_new;
5554 /* If close to running out of registers, try to preserve them. */
5555 else if (regs_needed <= available_regs)
5556 cost = target_reg_cost [speed] * regs_needed;
5557 /* If we run out of available registers but the number of candidates
5558 does not, we penalize extra registers using target_spill_cost. */
5559 else if (n_cands <= available_regs)
5560 cost = target_reg_cost [speed] * available_regs
5561 + target_spill_cost [speed] * (regs_needed - available_regs);
5562 /* If the number of candidates runs out available registers, we penalize
5563 extra candidate registers using target_spill_cost * 2. Because it is
5564 more expensive to spill induction variable than invariant. */
5565 else
5566 cost = target_reg_cost [speed] * available_regs
5567 + target_spill_cost [speed] * (n_cands - available_regs) * 2
5568 + target_spill_cost [speed] * (regs_needed - n_cands);
5569
5570 /* Finally, add the number of candidates, so that we prefer eliminating
5571 induction variables if possible. */
5572 return cost + n_cands;
8b11a64c
ZD
5573}
5574
5575/* For each size of the induction variable set determine the penalty. */
5576
5577static void
5578determine_set_costs (struct ivopts_data *data)
5579{
5580 unsigned j, n;
538dd0b7
DM
5581 gphi *phi;
5582 gphi_iterator psi;
726a989a 5583 tree op;
8b11a64c 5584 struct loop *loop = data->current_loop;
87c476a2 5585 bitmap_iterator bi;
8b11a64c 5586
8b11a64c
ZD
5587 if (dump_file && (dump_flags & TDF_DETAILS))
5588 {
309a0cf6 5589 fprintf (dump_file, "<Global Costs>:\n");
8b11a64c 5590 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
bec922f0 5591 fprintf (dump_file, " target_clobbered_regs %d\n", target_clobbered_regs);
f40751dd
JH
5592 fprintf (dump_file, " target_reg_cost %d\n", target_reg_cost[data->speed]);
5593 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost[data->speed]);
8b11a64c
ZD
5594 }
5595
5596 n = 0;
726a989a 5597 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
8b11a64c 5598 {
538dd0b7 5599 phi = psi.phi ();
8b11a64c
ZD
5600 op = PHI_RESULT (phi);
5601
ea057359 5602 if (virtual_operand_p (op))
8b11a64c
ZD
5603 continue;
5604
5605 if (get_iv (data, op))
5606 continue;
5607
ab9e91f9
BC
5608 if (!POINTER_TYPE_P (TREE_TYPE (op))
5609 && !INTEGRAL_TYPE_P (TREE_TYPE (op)))
5610 continue;
5611
8b11a64c
ZD
5612 n++;
5613 }
5614
87c476a2 5615 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
8b11a64c
ZD
5616 {
5617 struct version_info *info = ver_info (data, j);
5618
5619 if (info->inv_id && info->has_nonlin_use)
5620 n++;
87c476a2 5621 }
8b11a64c 5622
9a2ef6b8 5623 data->regs_used = n;
8b11a64c
ZD
5624 if (dump_file && (dump_flags & TDF_DETAILS))
5625 fprintf (dump_file, " regs_used %d\n", n);
5626
5627 if (dump_file && (dump_flags & TDF_DETAILS))
5628 {
5629 fprintf (dump_file, " cost for size:\n");
5630 fprintf (dump_file, " ivs\tcost\n");
5631 for (j = 0; j <= 2 * target_avail_regs; j++)
5632 fprintf (dump_file, " %d\t%d\n", j,
c18101f5 5633 ivopts_estimate_reg_pressure (data, 0, j));
8b11a64c
ZD
5634 fprintf (dump_file, "\n");
5635 }
5636}
5637
b1b02be2 5638/* Returns true if A is a cheaper cost pair than B. */
8b11a64c 5639
b1b02be2
ZD
5640static bool
5641cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
8b11a64c 5642{
b1b02be2
ZD
5643 if (!a)
5644 return false;
8b11a64c 5645
b1b02be2
ZD
5646 if (!b)
5647 return true;
5648
8d18b6df 5649 if (a->cost < b->cost)
b1b02be2
ZD
5650 return true;
5651
8d18b6df 5652 if (b->cost < a->cost)
b1b02be2
ZD
5653 return false;
5654
5655 /* In case the costs are the same, prefer the cheaper candidate. */
5656 if (a->cand->cost < b->cand->cost)
5657 return true;
5658
5659 return false;
5660}
5661
e292d606
BC
5662/* Compare if A is a more expensive cost pair than B. Return 1, 0 and -1
5663 for more expensive, equal and cheaper respectively. */
5664
5665static int
5666compare_cost_pair (struct cost_pair *a, struct cost_pair *b)
5667{
5668 if (cheaper_cost_pair (a, b))
5669 return -1;
5670 if (cheaper_cost_pair (b, a))
5671 return 1;
5672
5673 return 0;
5674}
18081149
XDL
5675
5676/* Returns candidate by that USE is expressed in IVS. */
5677
5678static struct cost_pair *
309a0cf6 5679iv_ca_cand_for_group (struct iv_ca *ivs, struct iv_group *group)
18081149 5680{
309a0cf6 5681 return ivs->cand_for_group[group->id];
18081149
XDL
5682}
5683
b1b02be2
ZD
5684/* Computes the cost field of IVS structure. */
5685
5686static void
5687iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
5688{
6e8c65f6 5689 comp_cost cost = ivs->cand_use_cost;
18081149 5690
8d18b6df 5691 cost += ivs->cand_cost;
c18101f5 5692 cost += ivopts_estimate_reg_pressure (data, ivs->n_invs, ivs->n_cands);
b1b02be2
ZD
5693 ivs->cost = cost;
5694}
5695
0ca91c77
BC
5696/* Remove use of invariants in set INVS by decreasing counter in N_INV_USES
5697 and IVS. */
9be872b7
ZD
5698
5699static void
0ca91c77 5700iv_ca_set_remove_invs (struct iv_ca *ivs, bitmap invs, unsigned *n_inv_uses)
9be872b7
ZD
5701{
5702 bitmap_iterator bi;
5703 unsigned iid;
5704
5705 if (!invs)
5706 return;
5707
0ca91c77 5708 gcc_assert (n_inv_uses != NULL);
9be872b7
ZD
5709 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5710 {
0ca91c77
BC
5711 n_inv_uses[iid]--;
5712 if (n_inv_uses[iid] == 0)
1136cae4 5713 ivs->n_invs--;
9be872b7
ZD
5714 }
5715}
5716
b1b02be2
ZD
5717/* Set USE not to be expressed by any candidate in IVS. */
5718
5719static void
5720iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
309a0cf6 5721 struct iv_group *group)
b1b02be2 5722{
309a0cf6 5723 unsigned gid = group->id, cid;
b1b02be2 5724 struct cost_pair *cp;
b1b02be2 5725
309a0cf6 5726 cp = ivs->cand_for_group[gid];
b1b02be2
ZD
5727 if (!cp)
5728 return;
5729 cid = cp->cand->id;
5730
309a0cf6
BC
5731 ivs->bad_groups++;
5732 ivs->cand_for_group[gid] = NULL;
b1b02be2
ZD
5733 ivs->n_cand_uses[cid]--;
5734
5735 if (ivs->n_cand_uses[cid] == 0)
8b11a64c 5736 {
b1b02be2 5737 bitmap_clear_bit (ivs->cands, cid);
36f5ada1 5738 ivs->n_cands--;
b1b02be2 5739 ivs->cand_cost -= cp->cand->cost;
0ca91c77 5740 iv_ca_set_remove_invs (ivs, cp->cand->inv_vars, ivs->n_inv_var_uses);
4c11bdff 5741 iv_ca_set_remove_invs (ivs, cp->cand->inv_exprs, ivs->n_inv_expr_uses);
b1b02be2
ZD
5742 }
5743
8d18b6df 5744 ivs->cand_use_cost -= cp->cost;
0ca91c77
BC
5745 iv_ca_set_remove_invs (ivs, cp->inv_vars, ivs->n_inv_var_uses);
5746 iv_ca_set_remove_invs (ivs, cp->inv_exprs, ivs->n_inv_expr_uses);
9be872b7
ZD
5747 iv_ca_recount_cost (data, ivs);
5748}
5749
0ca91c77
BC
5750/* Add use of invariants in set INVS by increasing counter in N_INV_USES and
5751 IVS. */
80cad5fa 5752
9be872b7 5753static void
0ca91c77 5754iv_ca_set_add_invs (struct iv_ca *ivs, bitmap invs, unsigned *n_inv_uses)
9be872b7
ZD
5755{
5756 bitmap_iterator bi;
5757 unsigned iid;
5758
5759 if (!invs)
5760 return;
5761
0ca91c77 5762 gcc_assert (n_inv_uses != NULL);
9be872b7 5763 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
b1b02be2 5764 {
0ca91c77
BC
5765 n_inv_uses[iid]++;
5766 if (n_inv_uses[iid] == 1)
1136cae4 5767 ivs->n_invs++;
8b11a64c 5768 }
b1b02be2
ZD
5769}
5770
309a0cf6 5771/* Set cost pair for GROUP in set IVS to CP. */
b1b02be2
ZD
5772
5773static void
5774iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
309a0cf6 5775 struct iv_group *group, struct cost_pair *cp)
b1b02be2 5776{
309a0cf6 5777 unsigned gid = group->id, cid;
b1b02be2 5778
309a0cf6 5779 if (ivs->cand_for_group[gid] == cp)
b1b02be2
ZD
5780 return;
5781
309a0cf6
BC
5782 if (ivs->cand_for_group[gid])
5783 iv_ca_set_no_cp (data, ivs, group);
b1b02be2
ZD
5784
5785 if (cp)
8b11a64c 5786 {
b1b02be2 5787 cid = cp->cand->id;
8b11a64c 5788
309a0cf6
BC
5789 ivs->bad_groups--;
5790 ivs->cand_for_group[gid] = cp;
b1b02be2
ZD
5791 ivs->n_cand_uses[cid]++;
5792 if (ivs->n_cand_uses[cid] == 1)
8b11a64c 5793 {
b1b02be2 5794 bitmap_set_bit (ivs->cands, cid);
36f5ada1 5795 ivs->n_cands++;
b1b02be2 5796 ivs->cand_cost += cp->cand->cost;
0ca91c77 5797 iv_ca_set_add_invs (ivs, cp->cand->inv_vars, ivs->n_inv_var_uses);
4c11bdff 5798 iv_ca_set_add_invs (ivs, cp->cand->inv_exprs, ivs->n_inv_expr_uses);
8b11a64c
ZD
5799 }
5800
8d18b6df 5801 ivs->cand_use_cost += cp->cost;
0ca91c77
BC
5802 iv_ca_set_add_invs (ivs, cp->inv_vars, ivs->n_inv_var_uses);
5803 iv_ca_set_add_invs (ivs, cp->inv_exprs, ivs->n_inv_expr_uses);
b1b02be2 5804 iv_ca_recount_cost (data, ivs);
87c476a2 5805 }
b1b02be2
ZD
5806}
5807
5808/* Extend set IVS by expressing USE by some of the candidates in it
f22ae1ec
BC
5809 if possible. Consider all important candidates if candidates in
5810 set IVS don't give any result. */
b1b02be2
ZD
5811
5812static void
309a0cf6
BC
5813iv_ca_add_group (struct ivopts_data *data, struct iv_ca *ivs,
5814 struct iv_group *group)
b1b02be2
ZD
5815{
5816 struct cost_pair *best_cp = NULL, *cp;
5817 bitmap_iterator bi;
5818 unsigned i;
f22ae1ec 5819 struct iv_cand *cand;
8b11a64c 5820
309a0cf6 5821 gcc_assert (ivs->upto >= group->id);
f22ae1ec 5822 ivs->upto++;
309a0cf6 5823 ivs->bad_groups++;
b1b02be2 5824
f22ae1ec 5825 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
b1b02be2 5826 {
309a0cf6
BC
5827 cand = data->vcands[i];
5828 cp = get_group_iv_cost (data, group, cand);
b1b02be2
ZD
5829 if (cheaper_cost_pair (cp, best_cp))
5830 best_cp = cp;
5831 }
309a0cf6 5832
f22ae1ec
BC
5833 if (best_cp == NULL)
5834 {
5835 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5836 {
309a0cf6
BC
5837 cand = data->vcands[i];
5838 cp = get_group_iv_cost (data, group, cand);
f22ae1ec
BC
5839 if (cheaper_cost_pair (cp, best_cp))
5840 best_cp = cp;
5841 }
5842 }
8b11a64c 5843
309a0cf6 5844 iv_ca_set_cp (data, ivs, group, best_cp);
8b11a64c
ZD
5845}
5846
b1b02be2 5847/* Get cost for assignment IVS. */
8b11a64c 5848
6e8c65f6 5849static comp_cost
b1b02be2
ZD
5850iv_ca_cost (struct iv_ca *ivs)
5851{
c4e93e28
AH
5852 /* This was a conditional expression but it triggered a bug in
5853 Sun C 5.5. */
309a0cf6 5854 if (ivs->bad_groups)
cb4ad180
AH
5855 return infinite_cost;
5856 else
5857 return ivs->cost;
b1b02be2
ZD
5858}
5859
e292d606
BC
5860/* Compare if applying NEW_CP to GROUP for IVS introduces more invariants
5861 than OLD_CP. Return 1, 0 and -1 for more, equal and fewer invariants
5862 respectively. */
b1b02be2 5863
e292d606
BC
5864static int
5865iv_ca_compare_deps (struct ivopts_data *data, struct iv_ca *ivs,
5866 struct iv_group *group, struct cost_pair *old_cp,
5867 struct cost_pair *new_cp)
8b11a64c 5868{
f9f69dd6
BC
5869 gcc_assert (old_cp && new_cp && old_cp != new_cp);
5870 unsigned old_n_invs = ivs->n_invs;
5871 iv_ca_set_cp (data, ivs, group, new_cp);
5872 unsigned new_n_invs = ivs->n_invs;
5873 iv_ca_set_cp (data, ivs, group, old_cp);
8b11a64c 5874
e292d606 5875 return new_n_invs > old_n_invs ? 1 : (new_n_invs < old_n_invs ? -1 : 0);
b1b02be2
ZD
5876}
5877
309a0cf6
BC
5878/* Creates change of expressing GROUP by NEW_CP instead of OLD_CP and chains
5879 it before NEXT. */
b1b02be2
ZD
5880
5881static struct iv_ca_delta *
309a0cf6
BC
5882iv_ca_delta_add (struct iv_group *group, struct cost_pair *old_cp,
5883 struct cost_pair *new_cp, struct iv_ca_delta *next)
b1b02be2 5884{
5ed6ace5 5885 struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
b1b02be2 5886
309a0cf6 5887 change->group = group;
b1b02be2
ZD
5888 change->old_cp = old_cp;
5889 change->new_cp = new_cp;
309a0cf6 5890 change->next = next;
b1b02be2
ZD
5891
5892 return change;
5893}
5894
36f5ada1 5895/* Joins two lists of changes L1 and L2. Destructive -- old lists
6c6cfbfd 5896 are rewritten. */
36f5ada1
ZD
5897
5898static struct iv_ca_delta *
5899iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
5900{
5901 struct iv_ca_delta *last;
5902
5903 if (!l2)
5904 return l1;
5905
5906 if (!l1)
5907 return l2;
5908
309a0cf6 5909 for (last = l1; last->next; last = last->next)
36f5ada1 5910 continue;
309a0cf6 5911 last->next = l2;
36f5ada1
ZD
5912
5913 return l1;
5914}
5915
36f5ada1
ZD
5916/* Reverse the list of changes DELTA, forming the inverse to it. */
5917
5918static struct iv_ca_delta *
5919iv_ca_delta_reverse (struct iv_ca_delta *delta)
5920{
5921 struct iv_ca_delta *act, *next, *prev = NULL;
36f5ada1
ZD
5922
5923 for (act = delta; act; act = next)
5924 {
309a0cf6
BC
5925 next = act->next;
5926 act->next = prev;
36f5ada1
ZD
5927 prev = act;
5928
fab27f52 5929 std::swap (act->old_cp, act->new_cp);
36f5ada1
ZD
5930 }
5931
5932 return prev;
5933}
5934
b1b02be2
ZD
5935/* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
5936 reverted instead. */
5937
5938static void
5939iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
5940 struct iv_ca_delta *delta, bool forward)
5941{
5942 struct cost_pair *from, *to;
36f5ada1 5943 struct iv_ca_delta *act;
b1b02be2 5944
36f5ada1
ZD
5945 if (!forward)
5946 delta = iv_ca_delta_reverse (delta);
b1b02be2 5947
309a0cf6 5948 for (act = delta; act; act = act->next)
36f5ada1
ZD
5949 {
5950 from = act->old_cp;
5951 to = act->new_cp;
309a0cf6
BC
5952 gcc_assert (iv_ca_cand_for_group (ivs, act->group) == from);
5953 iv_ca_set_cp (data, ivs, act->group, to);
8b11a64c 5954 }
36f5ada1
ZD
5955
5956 if (!forward)
5957 iv_ca_delta_reverse (delta);
b1b02be2 5958}
8b11a64c 5959
b1b02be2 5960/* Returns true if CAND is used in IVS. */
8b11a64c 5961
b1b02be2
ZD
5962static bool
5963iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
5964{
5965 return ivs->n_cand_uses[cand->id] > 0;
5966}
8b11a64c 5967
36f5ada1
ZD
5968/* Returns number of induction variable candidates in the set IVS. */
5969
5970static unsigned
5971iv_ca_n_cands (struct iv_ca *ivs)
5972{
5973 return ivs->n_cands;
5974}
5975
b1b02be2
ZD
5976/* Free the list of changes DELTA. */
5977
5978static void
5979iv_ca_delta_free (struct iv_ca_delta **delta)
5980{
5981 struct iv_ca_delta *act, *next;
5982
5983 for (act = *delta; act; act = next)
87c476a2 5984 {
309a0cf6 5985 next = act->next;
b1b02be2 5986 free (act);
87c476a2 5987 }
8b11a64c 5988
b1b02be2
ZD
5989 *delta = NULL;
5990}
5991
5992/* Allocates new iv candidates assignment. */
5993
5994static struct iv_ca *
5995iv_ca_new (struct ivopts_data *data)
5996{
5ed6ace5 5997 struct iv_ca *nw = XNEW (struct iv_ca);
8b11a64c 5998
b1b02be2 5999 nw->upto = 0;
309a0cf6
BC
6000 nw->bad_groups = 0;
6001 nw->cand_for_group = XCNEWVEC (struct cost_pair *,
6002 data->vgroups.length ());
6003 nw->n_cand_uses = XCNEWVEC (unsigned, data->vcands.length ());
8bdbfff5 6004 nw->cands = BITMAP_ALLOC (NULL);
36f5ada1 6005 nw->n_cands = 0;
1136cae4 6006 nw->n_invs = 0;
7735d6c7 6007 nw->cand_use_cost = no_cost;
b1b02be2 6008 nw->cand_cost = 0;
0ca91c77
BC
6009 nw->n_inv_var_uses = XCNEWVEC (unsigned, data->max_inv_var_id + 1);
6010 nw->n_inv_expr_uses = XCNEWVEC (unsigned, data->max_inv_expr_id + 1);
7735d6c7 6011 nw->cost = no_cost;
b1b02be2
ZD
6012
6013 return nw;
6014}
6015
6016/* Free memory occupied by the set IVS. */
6017
6018static void
6019iv_ca_free (struct iv_ca **ivs)
6020{
309a0cf6 6021 free ((*ivs)->cand_for_group);
b1b02be2 6022 free ((*ivs)->n_cand_uses);
8bdbfff5 6023 BITMAP_FREE ((*ivs)->cands);
0ca91c77
BC
6024 free ((*ivs)->n_inv_var_uses);
6025 free ((*ivs)->n_inv_expr_uses);
b1b02be2
ZD
6026 free (*ivs);
6027 *ivs = NULL;
6028}
6029
6030/* Dumps IVS to FILE. */
6031
6032static void
6033iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
6034{
b1b02be2 6035 unsigned i;
6e8c65f6 6036 comp_cost cost = iv_ca_cost (ivs);
b1b02be2 6037
8d18b6df
ML
6038 fprintf (file, " cost: %d (complexity %d)\n", cost.cost,
6039 cost.complexity);
309a0cf6 6040 fprintf (file, " cand_cost: %d\n cand_group_cost: %d (complexity %d)\n",
623b8e0a
ML
6041 ivs->cand_cost, ivs->cand_use_cost.cost,
6042 ivs->cand_use_cost.complexity);
18081149
XDL
6043 bitmap_print (file, ivs->cands, " candidates: ","\n");
6044
53f2382d 6045 for (i = 0; i < ivs->upto; i++)
18081149 6046 {
309a0cf6
BC
6047 struct iv_group *group = data->vgroups[i];
6048 struct cost_pair *cp = iv_ca_cand_for_group (ivs, group);
18081149 6049 if (cp)
8d18b6df
ML
6050 fprintf (file, " group:%d --> iv_cand:%d, cost=(%d,%d)\n",
6051 group->id, cp->cand->id, cp->cost.cost,
6052 cp->cost.complexity);
18081149 6053 else
53f2382d 6054 fprintf (file, " group:%d --> ??\n", group->id);
18081149 6055 }
b1b02be2 6056
623b8e0a
ML
6057 const char *pref = "";
6058 fprintf (file, " invariant variables: ");
0ca91c77
BC
6059 for (i = 1; i <= data->max_inv_var_id; i++)
6060 if (ivs->n_inv_var_uses[i])
b1b02be2
ZD
6061 {
6062 fprintf (file, "%s%d", pref, i);
6063 pref = ", ";
6064 }
623b8e0a
ML
6065
6066 pref = "";
6067 fprintf (file, "\n invariant expressions: ");
0ca91c77
BC
6068 for (i = 1; i <= data->max_inv_expr_id; i++)
6069 if (ivs->n_inv_expr_uses[i])
6070 {
6071 fprintf (file, "%s%d", pref, i);
623b8e0a 6072 pref = ", ";
0ca91c77 6073 }
623b8e0a 6074
18081149 6075 fprintf (file, "\n\n");
b1b02be2
ZD
6076}
6077
6078/* Try changing candidate in IVS to CAND for each use. Return cost of the
36f5ada1 6079 new set, and store differences in DELTA. Number of induction variables
18081149
XDL
6080 in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
6081 the function will try to find a solution with mimimal iv candidates. */
b1b02be2 6082
6e8c65f6 6083static comp_cost
b1b02be2 6084iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
36f5ada1 6085 struct iv_cand *cand, struct iv_ca_delta **delta,
18081149 6086 unsigned *n_ivs, bool min_ncand)
b1b02be2 6087{
6e8c65f6
ZD
6088 unsigned i;
6089 comp_cost cost;
309a0cf6 6090 struct iv_group *group;
b1b02be2
ZD
6091 struct cost_pair *old_cp, *new_cp;
6092
6093 *delta = NULL;
6094 for (i = 0; i < ivs->upto; i++)
6095 {
309a0cf6
BC
6096 group = data->vgroups[i];
6097 old_cp = iv_ca_cand_for_group (ivs, group);
b1b02be2
ZD
6098
6099 if (old_cp
6100 && old_cp->cand == cand)
6101 continue;
6102
309a0cf6 6103 new_cp = get_group_iv_cost (data, group, cand);
b1b02be2
ZD
6104 if (!new_cp)
6105 continue;
6106
e292d606
BC
6107 if (!min_ncand)
6108 {
6109 int cmp_invs = iv_ca_compare_deps (data, ivs, group, old_cp, new_cp);
6110 /* Skip if new_cp depends on more invariants. */
6111 if (cmp_invs > 0)
6112 continue;
b8698a0f 6113
e292d606
BC
6114 int cmp_cost = compare_cost_pair (new_cp, old_cp);
6115 /* Skip if new_cp is not cheaper. */
6116 if (cmp_cost > 0 || (cmp_cost == 0 && cmp_invs == 0))
6117 continue;
6118 }
b1b02be2 6119
309a0cf6 6120 *delta = iv_ca_delta_add (group, old_cp, new_cp, *delta);
b1b02be2
ZD
6121 }
6122
6123 iv_ca_delta_commit (data, ivs, *delta, true);
6124 cost = iv_ca_cost (ivs);
36f5ada1
ZD
6125 if (n_ivs)
6126 *n_ivs = iv_ca_n_cands (ivs);
b1b02be2 6127 iv_ca_delta_commit (data, ivs, *delta, false);
8b11a64c
ZD
6128
6129 return cost;
6130}
6131
a0eca485 6132/* Try narrowing set IVS by removing CAND. Return the cost of
2c407426
BC
6133 the new set and store the differences in DELTA. START is
6134 the candidate with which we start narrowing. */
8b11a64c 6135
6e8c65f6 6136static comp_cost
b1b02be2 6137iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
2c407426
BC
6138 struct iv_cand *cand, struct iv_cand *start,
6139 struct iv_ca_delta **delta)
8b11a64c 6140{
b1b02be2 6141 unsigned i, ci;
309a0cf6 6142 struct iv_group *group;
b1b02be2
ZD
6143 struct cost_pair *old_cp, *new_cp, *cp;
6144 bitmap_iterator bi;
6145 struct iv_cand *cnd;
2c407426 6146 comp_cost cost, best_cost, acost;
b1b02be2
ZD
6147
6148 *delta = NULL;
309a0cf6 6149 for (i = 0; i < data->vgroups.length (); i++)
b1b02be2 6150 {
309a0cf6 6151 group = data->vgroups[i];
b1b02be2 6152
309a0cf6 6153 old_cp = iv_ca_cand_for_group (ivs, group);
b1b02be2
ZD
6154 if (old_cp->cand != cand)
6155 continue;
6156
2c407426
BC
6157 best_cost = iv_ca_cost (ivs);
6158 /* Start narrowing with START. */
309a0cf6 6159 new_cp = get_group_iv_cost (data, group, start);
b1b02be2
ZD
6160
6161 if (data->consider_all_candidates)
6162 {
6163 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
6164 {
2c407426 6165 if (ci == cand->id || (start && ci == start->id))
b1b02be2
ZD
6166 continue;
6167
309a0cf6 6168 cnd = data->vcands[ci];
b1b02be2 6169
309a0cf6 6170 cp = get_group_iv_cost (data, group, cnd);
b1b02be2
ZD
6171 if (!cp)
6172 continue;
18081149 6173
309a0cf6 6174 iv_ca_set_cp (data, ivs, group, cp);
2c407426 6175 acost = iv_ca_cost (ivs);
b1b02be2 6176
8d18b6df 6177 if (acost < best_cost)
2c407426
BC
6178 {
6179 best_cost = acost;
6180 new_cp = cp;
6181 }
b1b02be2
ZD
6182 }
6183 }
6184 else
6185 {
309a0cf6 6186 EXECUTE_IF_AND_IN_BITMAP (group->related_cands, ivs->cands, 0, ci, bi)
b1b02be2 6187 {
2c407426 6188 if (ci == cand->id || (start && ci == start->id))
b1b02be2
ZD
6189 continue;
6190
309a0cf6 6191 cnd = data->vcands[ci];
b1b02be2 6192
309a0cf6 6193 cp = get_group_iv_cost (data, group, cnd);
b1b02be2
ZD
6194 if (!cp)
6195 continue;
b8698a0f 6196
309a0cf6 6197 iv_ca_set_cp (data, ivs, group, cp);
2c407426 6198 acost = iv_ca_cost (ivs);
b1b02be2 6199
8d18b6df 6200 if (acost < best_cost)
2c407426
BC
6201 {
6202 best_cost = acost;
6203 new_cp = cp;
6204 }
b1b02be2
ZD
6205 }
6206 }
2c407426 6207 /* Restore to old cp for use. */
309a0cf6 6208 iv_ca_set_cp (data, ivs, group, old_cp);
b1b02be2
ZD
6209
6210 if (!new_cp)
6211 {
6212 iv_ca_delta_free (delta);
6e8c65f6 6213 return infinite_cost;
b1b02be2
ZD
6214 }
6215
309a0cf6 6216 *delta = iv_ca_delta_add (group, old_cp, new_cp, *delta);
b1b02be2
ZD
6217 }
6218
6219 iv_ca_delta_commit (data, ivs, *delta, true);
6220 cost = iv_ca_cost (ivs);
6221 iv_ca_delta_commit (data, ivs, *delta, false);
6222
6223 return cost;
8b11a64c
ZD
6224}
6225
36f5ada1
ZD
6226/* Try optimizing the set of candidates IVS by removing candidates different
6227 from to EXCEPT_CAND from it. Return cost of the new set, and store
6228 differences in DELTA. */
6229
6e8c65f6 6230static comp_cost
36f5ada1
ZD
6231iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
6232 struct iv_cand *except_cand, struct iv_ca_delta **delta)
6233{
6234 bitmap_iterator bi;
6235 struct iv_ca_delta *act_delta, *best_delta;
6e8c65f6
ZD
6236 unsigned i;
6237 comp_cost best_cost, acost;
36f5ada1
ZD
6238 struct iv_cand *cand;
6239
6240 best_delta = NULL;
6241 best_cost = iv_ca_cost (ivs);
6242
6243 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6244 {
309a0cf6 6245 cand = data->vcands[i];
36f5ada1
ZD
6246
6247 if (cand == except_cand)
6248 continue;
6249
2c407426 6250 acost = iv_ca_narrow (data, ivs, cand, except_cand, &act_delta);
36f5ada1 6251
8d18b6df 6252 if (acost < best_cost)
36f5ada1
ZD
6253 {
6254 best_cost = acost;
6255 iv_ca_delta_free (&best_delta);
6256 best_delta = act_delta;
6257 }
6258 else
6259 iv_ca_delta_free (&act_delta);
6260 }
6261
6262 if (!best_delta)
6263 {
6264 *delta = NULL;
6265 return best_cost;
6266 }
6267
6268 /* Recurse to possibly remove other unnecessary ivs. */
6269 iv_ca_delta_commit (data, ivs, best_delta, true);
6270 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
6271 iv_ca_delta_commit (data, ivs, best_delta, false);
6272 *delta = iv_ca_delta_join (best_delta, *delta);
6273 return best_cost;
6274}
6275
6326a5f5 6276/* Check if CAND_IDX is a candidate other than OLD_CAND and has
309a0cf6 6277 cheaper local cost for GROUP than BEST_CP. Return pointer to
6326a5f5
BC
6278 the corresponding cost_pair, otherwise just return BEST_CP. */
6279
6280static struct cost_pair*
309a0cf6 6281cheaper_cost_with_cand (struct ivopts_data *data, struct iv_group *group,
6326a5f5
BC
6282 unsigned int cand_idx, struct iv_cand *old_cand,
6283 struct cost_pair *best_cp)
6284{
6285 struct iv_cand *cand;
6286 struct cost_pair *cp;
6287
6288 gcc_assert (old_cand != NULL && best_cp != NULL);
6289 if (cand_idx == old_cand->id)
6290 return best_cp;
6291
309a0cf6
BC
6292 cand = data->vcands[cand_idx];
6293 cp = get_group_iv_cost (data, group, cand);
6326a5f5
BC
6294 if (cp != NULL && cheaper_cost_pair (cp, best_cp))
6295 return cp;
6296
6297 return best_cp;
6298}
6299
6300/* Try breaking local optimal fixed-point for IVS by replacing candidates
6301 which are used by more than one iv uses. For each of those candidates,
6302 this function tries to represent iv uses under that candidate using
6303 other ones with lower local cost, then tries to prune the new set.
6304 If the new set has lower cost, It returns the new cost after recording
6305 candidate replacement in list DELTA. */
6306
6307static comp_cost
6308iv_ca_replace (struct ivopts_data *data, struct iv_ca *ivs,
6309 struct iv_ca_delta **delta)
6310{
6311 bitmap_iterator bi, bj;
6312 unsigned int i, j, k;
6326a5f5
BC
6313 struct iv_cand *cand;
6314 comp_cost orig_cost, acost;
6315 struct iv_ca_delta *act_delta, *tmp_delta;
6316 struct cost_pair *old_cp, *best_cp = NULL;
6317
6318 *delta = NULL;
6319 orig_cost = iv_ca_cost (ivs);
6320
6321 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6322 {
6323 if (ivs->n_cand_uses[i] == 1
6324 || ivs->n_cand_uses[i] > ALWAYS_PRUNE_CAND_SET_BOUND)
6325 continue;
6326
309a0cf6
BC
6327 cand = data->vcands[i];
6328
6326a5f5
BC
6329 act_delta = NULL;
6330 /* Represent uses under current candidate using other ones with
6331 lower local cost. */
6332 for (j = 0; j < ivs->upto; j++)
6333 {
309a0cf6
BC
6334 struct iv_group *group = data->vgroups[j];
6335 old_cp = iv_ca_cand_for_group (ivs, group);
6326a5f5
BC
6336
6337 if (old_cp->cand != cand)
6338 continue;
6339
6340 best_cp = old_cp;
6341 if (data->consider_all_candidates)
309a0cf6
BC
6342 for (k = 0; k < data->vcands.length (); k++)
6343 best_cp = cheaper_cost_with_cand (data, group, k,
6326a5f5
BC
6344 old_cp->cand, best_cp);
6345 else
309a0cf6
BC
6346 EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, k, bj)
6347 best_cp = cheaper_cost_with_cand (data, group, k,
6326a5f5
BC
6348 old_cp->cand, best_cp);
6349
6350 if (best_cp == old_cp)
6351 continue;
6352
309a0cf6 6353 act_delta = iv_ca_delta_add (group, old_cp, best_cp, act_delta);
6326a5f5
BC
6354 }
6355 /* No need for further prune. */
6356 if (!act_delta)
6357 continue;
6358
6359 /* Prune the new candidate set. */
6360 iv_ca_delta_commit (data, ivs, act_delta, true);
6361 acost = iv_ca_prune (data, ivs, NULL, &tmp_delta);
6362 iv_ca_delta_commit (data, ivs, act_delta, false);
6363 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6364
8d18b6df 6365 if (acost < orig_cost)
6326a5f5
BC
6366 {
6367 *delta = act_delta;
6368 return acost;
6369 }
6370 else
6371 iv_ca_delta_free (&act_delta);
6372 }
6373
6374 return orig_cost;
6375}
6376
309a0cf6
BC
6377/* Tries to extend the sets IVS in the best possible way in order to
6378 express the GROUP. If ORIGINALP is true, prefer candidates from
16ad8025
SL
6379 the original set of IVs, otherwise favor important candidates not
6380 based on any memory object. */
8b11a64c
ZD
6381
6382static bool
b1b02be2 6383try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
309a0cf6 6384 struct iv_group *group, bool originalp)
8b11a64c 6385{
6e8c65f6 6386 comp_cost best_cost, act_cost;
8b11a64c 6387 unsigned i;
38b0dcb8
ZD
6388 bitmap_iterator bi;
6389 struct iv_cand *cand;
b1b02be2
ZD
6390 struct iv_ca_delta *best_delta = NULL, *act_delta;
6391 struct cost_pair *cp;
6392
309a0cf6 6393 iv_ca_add_group (data, ivs, group);
b1b02be2 6394 best_cost = iv_ca_cost (ivs);
309a0cf6 6395 cp = iv_ca_cand_for_group (ivs, group);
b1b02be2
ZD
6396 if (cp)
6397 {
309a0cf6
BC
6398 best_delta = iv_ca_delta_add (group, NULL, cp, NULL);
6399 iv_ca_set_no_cp (data, ivs, group);
b1b02be2 6400 }
8b11a64c 6401
16ad8025
SL
6402 /* If ORIGINALP is true, try to find the original IV for the use. Otherwise
6403 first try important candidates not based on any memory object. Only if
6e8c65f6
ZD
6404 this fails, try the specific ones. Rationale -- in loops with many
6405 variables the best choice often is to use just one generic biv. If we
6406 added here many ivs specific to the uses, the optimization algorithm later
6407 would be likely to get stuck in a local minimum, thus causing us to create
6408 too many ivs. The approach from few ivs to more seems more likely to be
6409 successful -- starting from few ivs, replacing an expensive use by a
6410 specific iv should always be a win. */
309a0cf6 6411 EXECUTE_IF_SET_IN_BITMAP (group->related_cands, 0, i, bi)
8b11a64c 6412 {
309a0cf6 6413 cand = data->vcands[i];
38b0dcb8 6414
16ad8025
SL
6415 if (originalp && cand->pos !=IP_ORIGINAL)
6416 continue;
6417
6418 if (!originalp && cand->iv->base_object != NULL_TREE)
6e8c65f6
ZD
6419 continue;
6420
b1b02be2 6421 if (iv_ca_cand_used_p (ivs, cand))
623b8e0a 6422 continue;
8b11a64c 6423
309a0cf6 6424 cp = get_group_iv_cost (data, group, cand);
b1b02be2
ZD
6425 if (!cp)
6426 continue;
6427
309a0cf6 6428 iv_ca_set_cp (data, ivs, group, cp);
18081149 6429 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
623b8e0a 6430 true);
309a0cf6
BC
6431 iv_ca_set_no_cp (data, ivs, group);
6432 act_delta = iv_ca_delta_add (group, NULL, cp, act_delta);
8b11a64c 6433
8d18b6df 6434 if (act_cost < best_cost)
8b11a64c
ZD
6435 {
6436 best_cost = act_cost;
b1b02be2
ZD
6437
6438 iv_ca_delta_free (&best_delta);
6439 best_delta = act_delta;
8b11a64c 6440 }
b1b02be2
ZD
6441 else
6442 iv_ca_delta_free (&act_delta);
8b11a64c
ZD
6443 }
6444
8d18b6df 6445 if (best_cost.infinite_cost_p ())
38b0dcb8 6446 {
309a0cf6 6447 for (i = 0; i < group->n_map_members; i++)
38b0dcb8 6448 {
309a0cf6 6449 cp = group->cost_map + i;
b1b02be2
ZD
6450 cand = cp->cand;
6451 if (!cand)
38b0dcb8
ZD
6452 continue;
6453
6454 /* Already tried this. */
16ad8025
SL
6455 if (cand->important)
6456 {
6457 if (originalp && cand->pos == IP_ORIGINAL)
6458 continue;
6459 if (!originalp && cand->iv->base_object == NULL_TREE)
6460 continue;
6461 }
b8698a0f 6462
b1b02be2 6463 if (iv_ca_cand_used_p (ivs, cand))
38b0dcb8
ZD
6464 continue;
6465
b1b02be2 6466 act_delta = NULL;
309a0cf6 6467 iv_ca_set_cp (data, ivs, group, cp);
18081149 6468 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
309a0cf6
BC
6469 iv_ca_set_no_cp (data, ivs, group);
6470 act_delta = iv_ca_delta_add (group,
6471 iv_ca_cand_for_group (ivs, group),
b1b02be2 6472 cp, act_delta);
38b0dcb8 6473
8d18b6df 6474 if (act_cost < best_cost)
38b0dcb8
ZD
6475 {
6476 best_cost = act_cost;
b1b02be2
ZD
6477
6478 if (best_delta)
6479 iv_ca_delta_free (&best_delta);
6480 best_delta = act_delta;
38b0dcb8 6481 }
b1b02be2
ZD
6482 else
6483 iv_ca_delta_free (&act_delta);
38b0dcb8
ZD
6484 }
6485 }
6486
b1b02be2
ZD
6487 iv_ca_delta_commit (data, ivs, best_delta, true);
6488 iv_ca_delta_free (&best_delta);
8b11a64c 6489
8d18b6df 6490 return !best_cost.infinite_cost_p ();
8b11a64c
ZD
6491}
6492
b1b02be2 6493/* Finds an initial assignment of candidates to uses. */
8b11a64c 6494
b1b02be2 6495static struct iv_ca *
16ad8025 6496get_initial_solution (struct ivopts_data *data, bool originalp)
8b11a64c
ZD
6497{
6498 unsigned i;
309a0cf6 6499 struct iv_ca *ivs = iv_ca_new (data);
8b11a64c 6500
309a0cf6
BC
6501 for (i = 0; i < data->vgroups.length (); i++)
6502 if (!try_add_cand_for (data, ivs, data->vgroups[i], originalp))
b1b02be2
ZD
6503 {
6504 iv_ca_free (&ivs);
6505 return NULL;
6506 }
8b11a64c 6507
b1b02be2 6508 return ivs;
8b11a64c
ZD
6509}
6510
6326a5f5
BC
6511/* Tries to improve set of induction variables IVS. TRY_REPLACE_P
6512 points to a bool variable, this function tries to break local
6513 optimal fixed-point by replacing candidates in IVS if it's true. */
8b11a64c
ZD
6514
6515static bool
6326a5f5
BC
6516try_improve_iv_set (struct ivopts_data *data,
6517 struct iv_ca *ivs, bool *try_replace_p)
8b11a64c 6518{
6e8c65f6
ZD
6519 unsigned i, n_ivs;
6520 comp_cost acost, best_cost = iv_ca_cost (ivs);
36f5ada1 6521 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
b1b02be2 6522 struct iv_cand *cand;
8b11a64c 6523
36f5ada1 6524 /* Try extending the set of induction variables by one. */
309a0cf6 6525 for (i = 0; i < data->vcands.length (); i++)
8b11a64c 6526 {
309a0cf6 6527 cand = data->vcands[i];
b8698a0f 6528
b1b02be2 6529 if (iv_ca_cand_used_p (ivs, cand))
36f5ada1
ZD
6530 continue;
6531
18081149 6532 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
36f5ada1
ZD
6533 if (!act_delta)
6534 continue;
6535
6536 /* If we successfully added the candidate and the set is small enough,
6537 try optimizing it by removing other candidates. */
6538 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
6539 {
6540 iv_ca_delta_commit (data, ivs, act_delta, true);
6541 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
6542 iv_ca_delta_commit (data, ivs, act_delta, false);
6543 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6544 }
8b11a64c 6545
8d18b6df 6546 if (acost < best_cost)
8b11a64c 6547 {
b1b02be2 6548 best_cost = acost;
36f5ada1 6549 iv_ca_delta_free (&best_delta);
b1b02be2 6550 best_delta = act_delta;
8b11a64c 6551 }
8b11a64c 6552 else
b1b02be2 6553 iv_ca_delta_free (&act_delta);
8b11a64c
ZD
6554 }
6555
b1b02be2 6556 if (!best_delta)
36f5ada1
ZD
6557 {
6558 /* Try removing the candidates from the set instead. */
6559 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
6560
6326a5f5
BC
6561 if (!best_delta && *try_replace_p)
6562 {
6563 *try_replace_p = false;
6564 /* So far candidate selecting algorithm tends to choose fewer IVs
6565 so that it can handle cases in which loops have many variables
6566 but the best choice is often to use only one general biv. One
6567 weakness is it can't handle opposite cases, in which different
6568 candidates should be chosen with respect to each use. To solve
6569 the problem, we replace candidates in a manner described by the
6570 comments of iv_ca_replace, thus give general algorithm a chance
6571 to break local optimal fixed-point in these cases. */
6572 best_cost = iv_ca_replace (data, ivs, &best_delta);
6573 }
6574
36f5ada1
ZD
6575 if (!best_delta)
6576 return false;
6577 }
8b11a64c 6578
b1b02be2 6579 iv_ca_delta_commit (data, ivs, best_delta, true);
8d18b6df 6580 gcc_assert (best_cost == iv_ca_cost (ivs));
b1b02be2 6581 iv_ca_delta_free (&best_delta);
8b11a64c
ZD
6582 return true;
6583}
6584
6585/* Attempts to find the optimal set of induction variables. We do simple
6586 greedy heuristic -- we try to replace at most one candidate in the selected
6587 solution and remove the unused ivs while this improves the cost. */
6588
b1b02be2 6589static struct iv_ca *
16ad8025 6590find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
8b11a64c 6591{
b1b02be2 6592 struct iv_ca *set;
6326a5f5 6593 bool try_replace_p = true;
8b11a64c 6594
b1b02be2 6595 /* Get the initial solution. */
16ad8025 6596 set = get_initial_solution (data, originalp);
b1b02be2 6597 if (!set)
8b11a64c
ZD
6598 {
6599 if (dump_file && (dump_flags & TDF_DETAILS))
6600 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
8b11a64c
ZD
6601 return NULL;
6602 }
6603
6604 if (dump_file && (dump_flags & TDF_DETAILS))
6605 {
b1b02be2
ZD
6606 fprintf (dump_file, "Initial set of candidates:\n");
6607 iv_ca_dump (data, dump_file, set);
8b11a64c
ZD
6608 }
6609
6326a5f5 6610 while (try_improve_iv_set (data, set, &try_replace_p))
8b11a64c
ZD
6611 {
6612 if (dump_file && (dump_flags & TDF_DETAILS))
6613 {
b1b02be2
ZD
6614 fprintf (dump_file, "Improved to:\n");
6615 iv_ca_dump (data, dump_file, set);
8b11a64c
ZD
6616 }
6617 }
6618
16ad8025
SL
6619 return set;
6620}
6621
6622static struct iv_ca *
6623find_optimal_iv_set (struct ivopts_data *data)
6624{
6625 unsigned i;
16ad8025 6626 comp_cost cost, origcost;
309a0cf6 6627 struct iv_ca *set, *origset;
16ad8025
SL
6628
6629 /* Determine the cost based on a strategy that starts with original IVs,
6630 and try again using a strategy that prefers candidates not based
6631 on any IVs. */
6632 origset = find_optimal_iv_set_1 (data, true);
6633 set = find_optimal_iv_set_1 (data, false);
6634
6635 if (!origset && !set)
6636 return NULL;
6637
6638 origcost = origset ? iv_ca_cost (origset) : infinite_cost;
6639 cost = set ? iv_ca_cost (set) : infinite_cost;
6640
8b11a64c 6641 if (dump_file && (dump_flags & TDF_DETAILS))
6e8c65f6 6642 {
16ad8025
SL
6643 fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
6644 origcost.cost, origcost.complexity);
6645 fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
6646 cost.cost, cost.complexity);
6647 }
6648
6649 /* Choose the one with the best cost. */
8d18b6df 6650 if (origcost <= cost)
16ad8025
SL
6651 {
6652 if (set)
6653 iv_ca_free (&set);
6654 set = origset;
6e8c65f6 6655 }
16ad8025
SL
6656 else if (origset)
6657 iv_ca_free (&origset);
8b11a64c 6658
309a0cf6 6659 for (i = 0; i < data->vgroups.length (); i++)
8b11a64c 6660 {
309a0cf6
BC
6661 struct iv_group *group = data->vgroups[i];
6662 group->selected = iv_ca_cand_for_group (set, group)->cand;
8b11a64c
ZD
6663 }
6664
8b11a64c
ZD
6665 return set;
6666}
6667
6668/* Creates a new induction variable corresponding to CAND. */
6669
6670static void
6671create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
6672{
726a989a 6673 gimple_stmt_iterator incr_pos;
8b11a64c 6674 tree base;
309a0cf6
BC
6675 struct iv_use *use;
6676 struct iv_group *group;
8b11a64c
ZD
6677 bool after = false;
6678
c1662028 6679 gcc_assert (cand->iv != NULL);
8b11a64c
ZD
6680
6681 switch (cand->pos)
6682 {
6683 case IP_NORMAL:
726a989a 6684 incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
8b11a64c
ZD
6685 break;
6686
6687 case IP_END:
726a989a 6688 incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
8b11a64c
ZD
6689 after = true;
6690 break;
6691
2c08497a
BS
6692 case IP_AFTER_USE:
6693 after = true;
6694 /* fall through */
6695 case IP_BEFORE_USE:
6696 incr_pos = gsi_for_stmt (cand->incremented_at);
6697 break;
6698
8b11a64c
ZD
6699 case IP_ORIGINAL:
6700 /* Mark that the iv is preserved. */
6701 name_info (data, cand->var_before)->preserve_biv = true;
6702 name_info (data, cand->var_after)->preserve_biv = true;
6703
6704 /* Rewrite the increment so that it uses var_before directly. */
309a0cf6
BC
6705 use = find_interesting_uses_op (data, cand->var_after);
6706 group = data->vgroups[use->group_id];
6707 group->selected = cand;
8b11a64c
ZD
6708 return;
6709 }
b8698a0f 6710
8b11a64c 6711 gimple_add_tmp_var (cand->var_before);
8b11a64c
ZD
6712
6713 base = unshare_expr (cand->iv->base);
6714
9be872b7
ZD
6715 create_iv (base, unshare_expr (cand->iv->step),
6716 cand->var_before, data->current_loop,
8b11a64c
ZD
6717 &incr_pos, after, &cand->var_before, &cand->var_after);
6718}
6719
6720/* Creates new induction variables described in SET. */
6721
6722static void
b1b02be2 6723create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
8b11a64c
ZD
6724{
6725 unsigned i;
6726 struct iv_cand *cand;
87c476a2 6727 bitmap_iterator bi;
8b11a64c 6728
b1b02be2 6729 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
8b11a64c 6730 {
309a0cf6 6731 cand = data->vcands[i];
8b11a64c 6732 create_new_iv (data, cand);
87c476a2 6733 }
8b11a64c 6734
18081149
XDL
6735 if (dump_file && (dump_flags & TDF_DETAILS))
6736 {
28002f1a
RB
6737 fprintf (dump_file, "Selected IV set for loop %d",
6738 data->current_loop->num);
6739 if (data->loop_loc != UNKNOWN_LOCATION)
6740 fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
6741 LOCATION_LINE (data->loop_loc));
7549163c 6742 fprintf (dump_file, ", " HOST_WIDE_INT_PRINT_DEC " avg niters",
623b8e0a 6743 avg_loop_niter (data->current_loop));
28002f1a 6744 fprintf (dump_file, ", %lu IVs:\n", bitmap_count_bits (set->cands));
18081149 6745 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
623b8e0a
ML
6746 {
6747 cand = data->vcands[i];
6748 dump_cand (dump_file, cand);
6749 }
18081149
XDL
6750 fprintf (dump_file, "\n");
6751 }
6752}
8b11a64c
ZD
6753
6754/* Rewrites USE (definition of iv used in a nonlinear expression)
6755 using candidate CAND. */
6756
6757static void
6758rewrite_use_nonlinear_expr (struct ivopts_data *data,
6759 struct iv_use *use, struct iv_cand *cand)
6760{
538dd0b7 6761 gassign *ass;
726a989a 6762 gimple_stmt_iterator bsi;
014ef6e1 6763 tree comp, type = get_use_type (use), tgt;
3520b745
ZD
6764
6765 /* An important special case -- if we are asked to express value of
6766 the original iv by itself, just exit; there is no need to
6767 introduce a new computation (that might also need casting the
6768 variable to unsigned and back). */
6769 if (cand->pos == IP_ORIGINAL
7b9d4f70 6770 && cand->incremented_at == use->stmt)
3520b745 6771 {
aac69a62 6772 tree op = NULL_TREE;
d06a01bf 6773 enum tree_code stmt_code;
7b9d4f70 6774
726a989a
RB
6775 gcc_assert (is_gimple_assign (use->stmt));
6776 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
7b9d4f70 6777
7b9d4f70
ZD
6778 /* Check whether we may leave the computation unchanged.
6779 This is the case only if it does not rely on other
6780 computations in the loop -- otherwise, the computation
6781 we rely upon may be removed in remove_unused_ivs,
6782 thus leading to ICE. */
d06a01bf
ZD
6783 stmt_code = gimple_assign_rhs_code (use->stmt);
6784 if (stmt_code == PLUS_EXPR
6785 || stmt_code == MINUS_EXPR
6786 || stmt_code == POINTER_PLUS_EXPR)
7b9d4f70 6787 {
726a989a
RB
6788 if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
6789 op = gimple_assign_rhs2 (use->stmt);
d06a01bf 6790 else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
726a989a 6791 op = gimple_assign_rhs1 (use->stmt);
7b9d4f70 6792 }
3520b745 6793
aac69a62
BC
6794 if (op != NULL_TREE)
6795 {
6796 if (expr_invariant_in_loop_p (data->current_loop, op))
6797 return;
6798 if (TREE_CODE (op) == SSA_NAME)
6799 {
6800 struct iv *iv = get_iv (data, op);
6801 if (iv != NULL && integer_zerop (iv->step))
6802 return;
6803 }
6804 }
73f30c63 6805 }
3520b745 6806
726a989a 6807 switch (gimple_code (use->stmt))
8b11a64c 6808 {
726a989a 6809 case GIMPLE_PHI:
8b11a64c
ZD
6810 tgt = PHI_RESULT (use->stmt);
6811
6812 /* If we should keep the biv, do not replace it. */
6813 if (name_info (data, tgt)->preserve_biv)
6814 return;
6815
726a989a 6816 bsi = gsi_after_labels (gimple_bb (use->stmt));
1e128c5f
GB
6817 break;
6818
726a989a
RB
6819 case GIMPLE_ASSIGN:
6820 tgt = gimple_assign_lhs (use->stmt);
6821 bsi = gsi_for_stmt (use->stmt);
1e128c5f
GB
6822 break;
6823
6824 default:
6825 gcc_unreachable ();
8b11a64c 6826 }
8b11a64c 6827
014ef6e1
BC
6828 aff_tree aff_inv, aff_var;
6829 if (!get_computation_aff_1 (data->current_loop, use->stmt,
6830 use, cand, &aff_inv, &aff_var))
6831 gcc_unreachable ();
6832
6833 unshare_aff_combination (&aff_inv);
6834 unshare_aff_combination (&aff_var);
6835 /* Prefer CSE opportunity than loop invariant by adding offset at last
6836 so that iv_uses have different offsets can be CSEed. */
6837 widest_int offset = aff_inv.offset;
6838 aff_inv.offset = 0;
6839
6840 gimple_seq stmt_list = NULL, seq = NULL;
6841 tree comp_op1 = aff_combination_to_tree (&aff_inv);
6842 tree comp_op2 = aff_combination_to_tree (&aff_var);
6843 gcc_assert (comp_op1 && comp_op2);
6844
6845 comp_op1 = force_gimple_operand (comp_op1, &seq, true, NULL);
6846 gimple_seq_add_seq (&stmt_list, seq);
6847 comp_op2 = force_gimple_operand (comp_op2, &seq, true, NULL);
6848 gimple_seq_add_seq (&stmt_list, seq);
6849
6850 if (POINTER_TYPE_P (TREE_TYPE (comp_op2)))
6851 std::swap (comp_op1, comp_op2);
6852
6853 if (POINTER_TYPE_P (TREE_TYPE (comp_op1)))
6854 {
6855 comp = fold_build_pointer_plus (comp_op1,
6856 fold_convert (sizetype, comp_op2));
6857 comp = fold_build_pointer_plus (comp,
6858 wide_int_to_tree (sizetype, offset));
6859 }
6860 else
6861 {
6862 comp = fold_build2 (PLUS_EXPR, TREE_TYPE (comp_op1), comp_op1,
6863 fold_convert (TREE_TYPE (comp_op1), comp_op2));
6864 comp = fold_build2 (PLUS_EXPR, TREE_TYPE (comp_op1), comp,
6865 wide_int_to_tree (TREE_TYPE (comp_op1), offset));
6866 }
6867
6868 comp = fold_convert (type, comp);
17fc049f
RG
6869 if (!valid_gimple_rhs_p (comp)
6870 || (gimple_code (use->stmt) != GIMPLE_PHI
6871 /* We can't allow re-allocating the stmt as it might be pointed
6872 to still. */
6873 && (get_gimple_rhs_num_ops (TREE_CODE (comp))
6874 >= gimple_num_ops (gsi_stmt (bsi)))))
bdf0f819 6875 {
014ef6e1
BC
6876 comp = force_gimple_operand (comp, &seq, true, NULL);
6877 gimple_seq_add_seq (&stmt_list, seq);
bdf0f819 6878 if (POINTER_TYPE_P (TREE_TYPE (tgt)))
b5c878a5
RG
6879 {
6880 duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
6881 /* As this isn't a plain copy we have to reset alignment
6882 information. */
6883 if (SSA_NAME_PTR_INFO (comp))
644ffefd 6884 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
b5c878a5 6885 }
bdf0f819 6886 }
8b11a64c 6887
014ef6e1 6888 gsi_insert_seq_before (&bsi, stmt_list, GSI_SAME_STMT);
726a989a 6889 if (gimple_code (use->stmt) == GIMPLE_PHI)
8b11a64c 6890 {
17fc049f 6891 ass = gimple_build_assign (tgt, comp);
726a989a 6892 gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
ae0a4449
AO
6893
6894 bsi = gsi_for_stmt (use->stmt);
6895 remove_phi_node (&bsi, false);
8b11a64c
ZD
6896 }
6897 else
726a989a 6898 {
17fc049f 6899 gimple_assign_set_rhs_from_tree (&bsi, comp);
726a989a
RB
6900 use->stmt = gsi_stmt (bsi);
6901 }
8b11a64c
ZD
6902}
6903
55791fcd
XDL
6904/* Performs a peephole optimization to reorder the iv update statement with
6905 a mem ref to enable instruction combining in later phases. The mem ref uses
6906 the iv value before the update, so the reordering transformation requires
6907 adjustment of the offset. CAND is the selected IV_CAND.
6908
6909 Example:
6910
6911 t = MEM_REF (base, iv1, 8, 16); // base, index, stride, offset
6912 iv2 = iv1 + 1;
6913
6914 if (t < val) (1)
6915 goto L;
6916 goto Head;
6917
6918
6919 directly propagating t over to (1) will introduce overlapping live range
6920 thus increase register pressure. This peephole transform it into:
6921
6922
6923 iv2 = iv1 + 1;
6924 t = MEM_REF (base, iv2, 8, 8);
6925 if (t < val)
6926 goto L;
6927 goto Head;
6928*/
6929
6930static void
6931adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
6932{
6933 tree var_after;
355fe088 6934 gimple *iv_update, *stmt;
55791fcd
XDL
6935 basic_block bb;
6936 gimple_stmt_iterator gsi, gsi_iv;
6937
6938 if (cand->pos != IP_NORMAL)
6939 return;
6940
6941 var_after = cand->var_after;
6942 iv_update = SSA_NAME_DEF_STMT (var_after);
6943
6944 bb = gimple_bb (iv_update);
6945 gsi = gsi_last_nondebug_bb (bb);
6946 stmt = gsi_stmt (gsi);
6947
6948 /* Only handle conditional statement for now. */
6949 if (gimple_code (stmt) != GIMPLE_COND)
6950 return;
6951
6952 gsi_prev_nondebug (&gsi);
6953 stmt = gsi_stmt (gsi);
6954 if (stmt != iv_update)
6955 return;
6956
6957 gsi_prev_nondebug (&gsi);
6958 if (gsi_end_p (gsi))
6959 return;
6960
6961 stmt = gsi_stmt (gsi);
6962 if (gimple_code (stmt) != GIMPLE_ASSIGN)
6963 return;
6964
6965 if (stmt != use->stmt)
6966 return;
6967
6968 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
6969 return;
6970
6971 if (dump_file && (dump_flags & TDF_DETAILS))
6972 {
6973 fprintf (dump_file, "Reordering \n");
ef6cb4c7
ML
6974 print_gimple_stmt (dump_file, iv_update, 0);
6975 print_gimple_stmt (dump_file, use->stmt, 0);
55791fcd
XDL
6976 fprintf (dump_file, "\n");
6977 }
6978
6979 gsi = gsi_for_stmt (use->stmt);
6980 gsi_iv = gsi_for_stmt (iv_update);
6981 gsi_move_before (&gsi_iv, &gsi);
6982
6983 cand->pos = IP_BEFORE_USE;
6984 cand->incremented_at = use->stmt;
6985}
6986
8b11a64c
ZD
6987/* Rewrites USE (address that is an iv) using candidate CAND. */
6988
6989static void
309a0cf6
BC
6990rewrite_use_address (struct ivopts_data *data,
6991 struct iv_use *use, struct iv_cand *cand)
8b11a64c 6992{
73f30c63 6993 aff_tree aff;
73f30c63 6994 bool ok;
8b11a64c 6995
55791fcd 6996 adjust_iv_update_pos (cand, use);
db61fc7a 6997 ok = get_computation_aff (data->current_loop, use->stmt, use, cand, &aff);
73f30c63 6998 gcc_assert (ok);
ac182688 6999 unshare_aff_combination (&aff);
8b11a64c 7000
d7c0c068
UW
7001 /* To avoid undefined overflow problems, all IV candidates use unsigned
7002 integer types. The drawback is that this makes it impossible for
7003 create_mem_ref to distinguish an IV that is based on a memory object
7004 from one that represents simply an offset.
7005
7006 To work around this problem, we pass a hint to create_mem_ref that
7007 indicates which variable (if any) in aff is an IV based on a memory
7008 object. Note that we only consider the candidate. If this is not
7009 based on an object, the base of the reference is in some subexpression
7010 of the use -- but these will use pointer types, so they are recognized
7011 by the create_mem_ref heuristics anyway. */
d6176f72
BC
7012 tree iv = var_at_stmt (data->current_loop, cand, use->stmt);
7013 tree base_hint = (cand->iv->base_object) ? iv : NULL_TREE;
7014 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
09b63f67
RB
7015 tree type = TREE_TYPE (*use->op_p);
7016 unsigned int align = get_object_alignment (*use->op_p);
7017 if (align != TYPE_ALIGN (type))
7018 type = build_aligned_type (type, align);
d6176f72
BC
7019
7020 tree ref = create_mem_ref (&bsi, type, &aff,
7021 reference_alias_ptr_type (*use->op_p),
7022 iv, base_hint, data->speed);
7023
ac182688
ZD
7024 copy_ref_info (ref, *use->op_p);
7025 *use->op_p = ref;
8b11a64c
ZD
7026}
7027
7028/* Rewrites USE (the condition such that one of the arguments is an iv) using
7029 candidate CAND. */
7030
7031static void
7032rewrite_use_compare (struct ivopts_data *data,
7033 struct iv_use *use, struct iv_cand *cand)
7034{
80ca1cfa 7035 tree comp, op, bound;
726a989a 7036 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
8b11a64c 7037 enum tree_code compare;
309a0cf6
BC
7038 struct iv_group *group = data->vgroups[use->group_id];
7039 struct cost_pair *cp = get_group_iv_cost (data, group, cand);
b697aed4 7040
f5f12961
ZD
7041 bound = cp->value;
7042 if (bound)
8b11a64c 7043 {
9e7376e5
ZD
7044 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
7045 tree var_type = TREE_TYPE (var);
dc5b3407 7046 gimple_seq stmts;
9e7376e5 7047
18081149 7048 if (dump_file && (dump_flags & TDF_DETAILS))
623b8e0a
ML
7049 {
7050 fprintf (dump_file, "Replacing exit test: ");
7051 print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
7052 }
d8af4ba3 7053 compare = cp->comp;
b697aed4 7054 bound = unshare_expr (fold_convert (var_type, bound));
dc5b3407
ZD
7055 op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
7056 if (stmts)
7057 gsi_insert_seq_on_edge_immediate (
7058 loop_preheader_edge (data->current_loop),
7059 stmts);
8b11a64c 7060
538dd0b7
DM
7061 gcond *cond_stmt = as_a <gcond *> (use->stmt);
7062 gimple_cond_set_lhs (cond_stmt, var);
7063 gimple_cond_set_code (cond_stmt, compare);
7064 gimple_cond_set_rhs (cond_stmt, op);
8b11a64c
ZD
7065 return;
7066 }
7067
7068 /* The induction variable elimination failed; just express the original
7069 giv. */
c7da0e81 7070 comp = get_computation_at (data->current_loop, use->stmt, use, cand);
73f30c63 7071 gcc_assert (comp != NULL_TREE);
80ca1cfa
BC
7072 gcc_assert (use->op_p != NULL);
7073 *use->op_p = force_gimple_operand_gsi (&bsi, comp, true,
7074 SSA_NAME_VAR (*use->op_p),
7075 true, GSI_SAME_STMT);
8b11a64c
ZD
7076}
7077
309a0cf6 7078/* Rewrite the groups using the selected induction variables. */
8b11a64c
ZD
7079
7080static void
309a0cf6 7081rewrite_groups (struct ivopts_data *data)
8b11a64c 7082{
309a0cf6 7083 unsigned i, j;
8b11a64c 7084
309a0cf6 7085 for (i = 0; i < data->vgroups.length (); i++)
8b11a64c 7086 {
309a0cf6
BC
7087 struct iv_group *group = data->vgroups[i];
7088 struct iv_cand *cand = group->selected;
7089
1e128c5f 7090 gcc_assert (cand);
8b11a64c 7091
309a0cf6
BC
7092 if (group->type == USE_NONLINEAR_EXPR)
7093 {
7094 for (j = 0; j < group->vuses.length (); j++)
7095 {
7096 rewrite_use_nonlinear_expr (data, group->vuses[j], cand);
7097 update_stmt (group->vuses[j]->stmt);
7098 }
7099 }
7100 else if (group->type == USE_ADDRESS)
7101 {
7102 for (j = 0; j < group->vuses.length (); j++)
7103 {
7104 rewrite_use_address (data, group->vuses[j], cand);
7105 update_stmt (group->vuses[j]->stmt);
7106 }
7107 }
7108 else
7109 {
7110 gcc_assert (group->type == USE_COMPARE);
7111
7112 for (j = 0; j < group->vuses.length (); j++)
7113 {
7114 rewrite_use_compare (data, group->vuses[j], cand);
7115 update_stmt (group->vuses[j]->stmt);
7116 }
7117 }
8b11a64c
ZD
7118 }
7119}
7120
7121/* Removes the ivs that are not used after rewriting. */
7122
7123static void
7124remove_unused_ivs (struct ivopts_data *data)
7125{
7126 unsigned j;
87c476a2 7127 bitmap_iterator bi;
ae0a4449 7128 bitmap toremove = BITMAP_ALLOC (NULL);
8b11a64c 7129
ae0a4449
AO
7130 /* Figure out an order in which to release SSA DEFs so that we don't
7131 release something that we'd have to propagate into a debug stmt
7132 afterwards. */
87c476a2 7133 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
8b11a64c
ZD
7134 {
7135 struct version_info *info;
7136
7137 info = ver_info (data, j);
7138 if (info->iv
6e42ce54 7139 && !integer_zerop (info->iv->step)
8b11a64c 7140 && !info->inv_id
309a0cf6 7141 && !info->iv->nonlin_use
8b11a64c 7142 && !info->preserve_biv)
e1066560
AO
7143 {
7144 bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
309a0cf6 7145
e1066560
AO
7146 tree def = info->iv->ssa_name;
7147
7148 if (MAY_HAVE_DEBUG_STMTS && SSA_NAME_DEF_STMT (def))
7149 {
7150 imm_use_iterator imm_iter;
7151 use_operand_p use_p;
355fe088 7152 gimple *stmt;
e1066560
AO
7153 int count = 0;
7154
7155 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7156 {
7157 if (!gimple_debug_bind_p (stmt))
7158 continue;
7159
7160 /* We just want to determine whether to do nothing
7161 (count == 0), to substitute the computed
7162 expression into a single use of the SSA DEF by
7163 itself (count == 1), or to use a debug temp
7164 because the SSA DEF is used multiple times or as
7165 part of a larger expression (count > 1). */
7166 count++;
7167 if (gimple_debug_bind_get_value (stmt) != def)
7168 count++;
7169
7170 if (count > 1)
7171 BREAK_FROM_IMM_USE_STMT (imm_iter);
7172 }
7173
7174 if (!count)
7175 continue;
7176
7177 struct iv_use dummy_use;
7178 struct iv_cand *best_cand = NULL, *cand;
7179 unsigned i, best_pref = 0, cand_pref;
7180
7181 memset (&dummy_use, 0, sizeof (dummy_use));
7182 dummy_use.iv = info->iv;
309a0cf6 7183 for (i = 0; i < data->vgroups.length () && i < 64; i++)
e1066560 7184 {
309a0cf6 7185 cand = data->vgroups[i]->selected;
e1066560
AO
7186 if (cand == best_cand)
7187 continue;
7188 cand_pref = operand_equal_p (cand->iv->step,
7189 info->iv->step, 0)
7190 ? 4 : 0;
7191 cand_pref
7192 += TYPE_MODE (TREE_TYPE (cand->iv->base))
7193 == TYPE_MODE (TREE_TYPE (info->iv->base))
7194 ? 2 : 0;
7195 cand_pref
7196 += TREE_CODE (cand->iv->base) == INTEGER_CST
7197 ? 1 : 0;
7198 if (best_cand == NULL || best_pref < cand_pref)
7199 {
7200 best_cand = cand;
7201 best_pref = cand_pref;
7202 }
7203 }
7204
7205 if (!best_cand)
7206 continue;
7207
7208 tree comp = get_computation_at (data->current_loop,
c7da0e81
BC
7209 SSA_NAME_DEF_STMT (def),
7210 &dummy_use, best_cand);
e1066560
AO
7211 if (!comp)
7212 continue;
7213
7214 if (count > 1)
7215 {
7216 tree vexpr = make_node (DEBUG_EXPR_DECL);
7217 DECL_ARTIFICIAL (vexpr) = 1;
7218 TREE_TYPE (vexpr) = TREE_TYPE (comp);
7219 if (SSA_NAME_VAR (def))
899ca90e 7220 SET_DECL_MODE (vexpr, DECL_MODE (SSA_NAME_VAR (def)));
e1066560 7221 else
899ca90e 7222 SET_DECL_MODE (vexpr, TYPE_MODE (TREE_TYPE (vexpr)));
538dd0b7
DM
7223 gdebug *def_temp
7224 = gimple_build_debug_bind (vexpr, comp, NULL);
e1066560
AO
7225 gimple_stmt_iterator gsi;
7226
7227 if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
7228 gsi = gsi_after_labels (gimple_bb
7229 (SSA_NAME_DEF_STMT (def)));
7230 else
7231 gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
7232
7233 gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
7234 comp = vexpr;
7235 }
7236
7237 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7238 {
7239 if (!gimple_debug_bind_p (stmt))
7240 continue;
7241
7242 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
7243 SET_USE (use_p, comp);
7244
7245 update_stmt (stmt);
7246 }
7247 }
7248 }
87c476a2 7249 }
ae0a4449
AO
7250
7251 release_defs_bitset (toremove);
7252
7253 BITMAP_FREE (toremove);
8b11a64c
ZD
7254}
7255
e2102efc 7256/* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
b787e7a2 7257 for hash_map::traverse. */
e2102efc 7258
b787e7a2
TS
7259bool
7260free_tree_niter_desc (edge const &, tree_niter_desc *const &value, void *)
e2102efc 7261{
b787e7a2 7262 free (value);
e2102efc
XDL
7263 return true;
7264}
7265
8b11a64c
ZD
7266/* Frees data allocated by the optimization of a single loop. */
7267
7268static void
7269free_loop_data (struct ivopts_data *data)
7270{
7271 unsigned i, j;
87c476a2 7272 bitmap_iterator bi;
69ebd99d 7273 tree obj;
8b11a64c 7274
15814ba0
PB
7275 if (data->niters)
7276 {
b787e7a2
TS
7277 data->niters->traverse<void *, free_tree_niter_desc> (NULL);
7278 delete data->niters;
15814ba0
PB
7279 data->niters = NULL;
7280 }
ca4c3169 7281
87c476a2 7282 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
8b11a64c
ZD
7283 {
7284 struct version_info *info;
7285
7286 info = ver_info (data, i);
8b11a64c
ZD
7287 info->iv = NULL;
7288 info->has_nonlin_use = false;
7289 info->preserve_biv = false;
7290 info->inv_id = 0;
87c476a2 7291 }
8b11a64c 7292 bitmap_clear (data->relevant);
b1b02be2 7293 bitmap_clear (data->important_candidates);
8b11a64c 7294
309a0cf6 7295 for (i = 0; i < data->vgroups.length (); i++)
8b11a64c 7296 {
309a0cf6 7297 struct iv_group *group = data->vgroups[i];
a7e43c57 7298
309a0cf6
BC
7299 for (j = 0; j < group->vuses.length (); j++)
7300 free (group->vuses[j]);
53f2382d 7301 group->vuses.release ();
a7e43c57 7302
309a0cf6
BC
7303 BITMAP_FREE (group->related_cands);
7304 for (j = 0; j < group->n_map_members; j++)
0ca91c77
BC
7305 {
7306 if (group->cost_map[j].inv_vars)
7307 BITMAP_FREE (group->cost_map[j].inv_vars);
7308 if (group->cost_map[j].inv_exprs)
7309 BITMAP_FREE (group->cost_map[j].inv_exprs);
7310 }
8b11a64c 7311
309a0cf6
BC
7312 free (group->cost_map);
7313 free (group);
8b11a64c 7314 }
309a0cf6 7315 data->vgroups.truncate (0);
8b11a64c 7316
309a0cf6 7317 for (i = 0; i < data->vcands.length (); i++)
8b11a64c 7318 {
309a0cf6 7319 struct iv_cand *cand = data->vcands[i];
8b11a64c 7320
0ca91c77
BC
7321 if (cand->inv_vars)
7322 BITMAP_FREE (cand->inv_vars);
4c11bdff
BC
7323 if (cand->inv_exprs)
7324 BITMAP_FREE (cand->inv_exprs);
8b11a64c
ZD
7325 free (cand);
7326 }
309a0cf6 7327 data->vcands.truncate (0);
8b11a64c
ZD
7328
7329 if (data->version_info_size < num_ssa_names)
7330 {
7331 data->version_info_size = 2 * num_ssa_names;
7332 free (data->version_info);
5ed6ace5 7333 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
8b11a64c
ZD
7334 }
7335
0ca91c77
BC
7336 data->max_inv_var_id = 0;
7337 data->max_inv_expr_id = 0;
8b11a64c 7338
9771b263 7339 FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
69ebd99d 7340 SET_DECL_RTL (obj, NULL_RTX);
8b11a64c 7341
9771b263 7342 decl_rtl_to_reset.truncate (0);
18081149 7343
c203e8a7 7344 data->inv_expr_tab->empty ();
cf5b92ef
BC
7345
7346 data->iv_common_cand_tab->empty ();
7347 data->iv_common_cands.truncate (0);
8b11a64c
ZD
7348}
7349
7350/* Finalizes data structures used by the iv optimization pass. LOOPS is the
7351 loop tree. */
7352
7353static void
9a2ef6b8 7354tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
8b11a64c 7355{
8b11a64c
ZD
7356 free_loop_data (data);
7357 free (data->version_info);
8bdbfff5
NS
7358 BITMAP_FREE (data->relevant);
7359 BITMAP_FREE (data->important_candidates);
8b11a64c 7360
9771b263 7361 decl_rtl_to_reset.release ();
309a0cf6
BC
7362 data->vgroups.release ();
7363 data->vcands.release ();
c203e8a7
TS
7364 delete data->inv_expr_tab;
7365 data->inv_expr_tab = NULL;
3230c614 7366 free_affine_expand_cache (&data->name_expansion_cache);
cf5b92ef
BC
7367 delete data->iv_common_cand_tab;
7368 data->iv_common_cand_tab = NULL;
7369 data->iv_common_cands.release ();
6f929985 7370 obstack_free (&data->iv_obstack, NULL);
8b11a64c
ZD
7371}
7372
bec922f0
SL
7373/* Returns true if the loop body BODY includes any function calls. */
7374
7375static bool
7376loop_body_includes_call (basic_block *body, unsigned num_nodes)
7377{
7378 gimple_stmt_iterator gsi;
7379 unsigned i;
7380
7381 for (i = 0; i < num_nodes; i++)
7382 for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
7383 {
355fe088 7384 gimple *stmt = gsi_stmt (gsi);
bec922f0 7385 if (is_gimple_call (stmt)
b3d2acb6 7386 && !gimple_call_internal_p (stmt)
bec922f0
SL
7387 && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
7388 return true;
7389 }
7390 return false;
7391}
7392
8b11a64c
ZD
7393/* Optimizes the LOOP. Returns true if anything changed. */
7394
7395static bool
7396tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
7397{
7398 bool changed = false;
b1b02be2 7399 struct iv_ca *iv_ca;
d8af4ba3 7400 edge exit = single_dom_exit (loop);
2c08497a 7401 basic_block *body;
8b11a64c 7402
15814ba0 7403 gcc_assert (!data->niters);
8b11a64c 7404 data->current_loop = loop;
28002f1a 7405 data->loop_loc = find_loop_location (loop);
f40751dd 7406 data->speed = optimize_loop_for_speed_p (loop);
8b11a64c
ZD
7407
7408 if (dump_file && (dump_flags & TDF_DETAILS))
7409 {
28002f1a
RB
7410 fprintf (dump_file, "Processing loop %d", loop->num);
7411 if (data->loop_loc != UNKNOWN_LOCATION)
7412 fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
7413 LOCATION_LINE (data->loop_loc));
7414 fprintf (dump_file, "\n");
b8698a0f 7415
8b11a64c
ZD
7416 if (exit)
7417 {
7418 fprintf (dump_file, " single exit %d -> %d, exit condition ",
7419 exit->src->index, exit->dest->index);
726a989a 7420 print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
8b11a64c
ZD
7421 fprintf (dump_file, "\n");
7422 }
7423
7424 fprintf (dump_file, "\n");
7425 }
7426
2c08497a 7427 body = get_loop_body (loop);
bec922f0 7428 data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
2c08497a
BS
7429 renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
7430 free (body);
7431
d8af4ba3
ZD
7432 data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
7433
8b11a64c
ZD
7434 /* For each ssa name determines whether it behaves as an induction variable
7435 in some loop. */
7436 if (!find_induction_variables (data))
7437 goto finish;
7438
7439 /* Finds interesting uses (item 1). */
7440 find_interesting_uses (data);
309a0cf6 7441 if (data->vgroups.length () > MAX_CONSIDERED_GROUPS)
8b11a64c
ZD
7442 goto finish;
7443
7444 /* Finds candidates for the induction variables (item 2). */
7445 find_iv_candidates (data);
7446
7447 /* Calculates the costs (item 3, part 1). */
8b11a64c 7448 determine_iv_costs (data);
309a0cf6 7449 determine_group_iv_costs (data);
8b11a64c
ZD
7450 determine_set_costs (data);
7451
7452 /* Find the optimal set of induction variables (item 3, part 2). */
b1b02be2
ZD
7453 iv_ca = find_optimal_iv_set (data);
7454 if (!iv_ca)
8b11a64c
ZD
7455 goto finish;
7456 changed = true;
7457
7458 /* Create the new induction variables (item 4, part 1). */
b1b02be2
ZD
7459 create_new_ivs (data, iv_ca);
7460 iv_ca_free (&iv_ca);
b8698a0f 7461
8b11a64c 7462 /* Rewrite the uses (item 4, part 2). */
309a0cf6 7463 rewrite_groups (data);
8b11a64c
ZD
7464
7465 /* Remove the ivs that are unused after rewriting. */
7466 remove_unused_ivs (data);
7467
8b11a64c
ZD
7468 /* We have changed the structure of induction variables; it might happen
7469 that definitions in the scev database refer to some of them that were
7470 eliminated. */
7471 scev_reset ();
7472
7473finish:
7474 free_loop_data (data);
7475
7476 return changed;
7477}
7478
d73be268 7479/* Main entry point. Optimizes induction variables in loops. */
8b11a64c
ZD
7480
7481void
d73be268 7482tree_ssa_iv_optimize (void)
8b11a64c
ZD
7483{
7484 struct loop *loop;
7485 struct ivopts_data data;
7486
9a2ef6b8 7487 tree_ssa_iv_optimize_init (&data);
8b11a64c
ZD
7488
7489 /* Optimize the loops starting with the innermost ones. */
f0bd40b1 7490 FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
8b11a64c 7491 {
8679c649
JH
7492 if (dump_file && (dump_flags & TDF_DETAILS))
7493 flow_loop_dump (loop, dump_file, NULL, 1);
e9472263
ZD
7494
7495 tree_ssa_iv_optimize_loop (&data, loop);
8b11a64c
ZD
7496 }
7497
9a2ef6b8 7498 tree_ssa_iv_optimize_finalize (&data);
8b11a64c 7499}
359b060e
BC
7500
7501#include "gt-tree-ssa-loop-ivopts.h"