]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-ssa-loop-ivopts.c
re PR testsuite/27476 (ACATS: Ada testsuite Bourne shell compatibility problem on...
[thirdparty/gcc.git] / gcc / tree-ssa-loop-ivopts.c
CommitLineData
8b11a64c 1/* Induction variable optimizations.
ad616de1 2 Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
8b11a64c
ZD
3
4This file is part of GCC.
5
6GCC is free software; you can redistribute it and/or modify it
7under the terms of the GNU General Public License as published by the
8Free Software Foundation; either version 2, or (at your option) any
9later version.
10
11GCC is distributed in the hope that it will be useful, but WITHOUT
12ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14for more details.
15
16You should have received a copy of the GNU General Public License
17along with GCC; see the file COPYING. If not, write to the Free
366ccddb
KC
18Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
1902110-1301, USA. */
8b11a64c
ZD
20
21/* This pass tries to find the optimal set of induction variables for the loop.
22 It optimizes just the basic linear induction variables (although adding
23 support for other types should not be too hard). It includes the
24 optimizations commonly known as strength reduction, induction variable
25 coalescing and induction variable elimination. It does it in the
26 following steps:
27
28 1) The interesting uses of induction variables are found. This includes
29
30 -- uses of induction variables in non-linear expressions
31 -- addresses of arrays
32 -- comparisons of induction variables
33
34 2) Candidates for the induction variables are found. This includes
35
36 -- old induction variables
37 -- the variables defined by expressions derived from the "interesting
38 uses" above
39
40 3) The optimal (w.r. to a cost function) set of variables is chosen. The
41 cost function assigns a cost to sets of induction variables and consists
42 of three parts:
43
44 -- The use costs. Each of the interesting uses chooses the best induction
45 variable in the set and adds its cost to the sum. The cost reflects
46 the time spent on modifying the induction variables value to be usable
47 for the given purpose (adding base and offset for arrays, etc.).
48 -- The variable costs. Each of the variables has a cost assigned that
49 reflects the costs associated with incrementing the value of the
50 variable. The original variables are somewhat preferred.
51 -- The set cost. Depending on the size of the set, extra cost may be
52 added to reflect register pressure.
53
54 All the costs are defined in a machine-specific way, using the target
55 hooks and machine descriptions to determine them.
56
57 4) The trees are transformed to use the new variables, the dead code is
58 removed.
59
60 All of this is done loop by loop. Doing it globally is theoretically
61 possible, it might give a better performance and it might enable us
62 to decide costs more precisely, but getting all the interactions right
63 would be complicated. */
64
65#include "config.h"
66#include "system.h"
67#include "coretypes.h"
68#include "tm.h"
69#include "tree.h"
70#include "rtl.h"
71#include "tm_p.h"
72#include "hard-reg-set.h"
73#include "basic-block.h"
74#include "output.h"
75#include "diagnostic.h"
76#include "tree-flow.h"
77#include "tree-dump.h"
78#include "timevar.h"
79#include "cfgloop.h"
80#include "varray.h"
81#include "expr.h"
82#include "tree-pass.h"
83#include "ggc.h"
84#include "insn-config.h"
85#include "recog.h"
86#include "hashtab.h"
87#include "tree-chrec.h"
88#include "tree-scalar-evolution.h"
89#include "cfgloop.h"
90#include "params.h"
39b4020c 91#include "langhooks.h"
8b11a64c
ZD
92
93/* The infinite cost. */
94#define INFTY 10000000
95
96/* The expected number of loop iterations. TODO -- use profiling instead of
97 this. */
98#define AVG_LOOP_NITER(LOOP) 5
99
8b11a64c
ZD
100
101/* Representation of the induction variable. */
102struct iv
103{
104 tree base; /* Initial value of the iv. */
e6845c23 105 tree base_object; /* A memory object to that the induction variable points. */
8b11a64c
ZD
106 tree step; /* Step of the iv (constant only). */
107 tree ssa_name; /* The ssa name with the value. */
108 bool biv_p; /* Is it a biv? */
109 bool have_use_for; /* Do we already have a use for it? */
110 unsigned use_id; /* The identifier in the use if it is the case. */
111};
112
113/* Per-ssa version information (induction variable descriptions, etc.). */
114struct version_info
115{
116 tree name; /* The ssa name. */
117 struct iv *iv; /* Induction variable description. */
118 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
119 an expression that is not an induction variable. */
120 unsigned inv_id; /* Id of an invariant. */
121 bool preserve_biv; /* For the original biv, whether to preserve it. */
122};
123
8b11a64c
ZD
124/* Types of uses. */
125enum use_type
126{
127 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
8b11a64c
ZD
128 USE_ADDRESS, /* Use in an address. */
129 USE_COMPARE /* Use is a compare. */
130};
131
132/* The candidate - cost pair. */
133struct cost_pair
134{
135 struct iv_cand *cand; /* The candidate. */
136 unsigned cost; /* The cost. */
137 bitmap depends_on; /* The list of invariants that have to be
138 preserved. */
f5f12961
ZD
139 tree value; /* For final value elimination, the expression for
140 the final value of the iv. For iv elimination,
141 the new bound to compare with. */
8b11a64c
ZD
142};
143
144/* Use. */
145struct iv_use
146{
147 unsigned id; /* The id of the use. */
148 enum use_type type; /* Type of the use. */
149 struct iv *iv; /* The induction variable it is based on. */
150 tree stmt; /* Statement in that it occurs. */
151 tree *op_p; /* The place where it occurs. */
b1b02be2
ZD
152 bitmap related_cands; /* The set of "related" iv candidates, plus the common
153 important ones. */
8b11a64c
ZD
154
155 unsigned n_map_members; /* Number of candidates in the cost_map list. */
156 struct cost_pair *cost_map;
157 /* The costs wrto the iv candidates. */
158
159 struct iv_cand *selected;
160 /* The selected candidate. */
161};
162
163/* The position where the iv is computed. */
164enum iv_position
165{
166 IP_NORMAL, /* At the end, just before the exit condition. */
167 IP_END, /* At the end of the latch block. */
168 IP_ORIGINAL /* The original biv. */
169};
170
171/* The induction variable candidate. */
172struct iv_cand
173{
174 unsigned id; /* The number of the candidate. */
175 bool important; /* Whether this is an "important" candidate, i.e. such
176 that it should be considered by all uses. */
177 enum iv_position pos; /* Where it is computed. */
178 tree incremented_at; /* For original biv, the statement where it is
179 incremented. */
180 tree var_before; /* The variable used for it before increment. */
181 tree var_after; /* The variable used for it after increment. */
182 struct iv *iv; /* The value of the candidate. NULL for
183 "pseudocandidate" used to indicate the possibility
184 to replace the final value of an iv by direct
185 computation of the value. */
186 unsigned cost; /* Cost of the candidate. */
9be872b7
ZD
187 bitmap depends_on; /* The list of invariants that are used in step of the
188 biv. */
8b11a64c
ZD
189};
190
191/* The data used by the induction variable optimizations. */
192
69ebd99d
KH
193typedef struct iv_use *iv_use_p;
194DEF_VEC_P(iv_use_p);
195DEF_VEC_ALLOC_P(iv_use_p,heap);
196
197typedef struct iv_cand *iv_cand_p;
198DEF_VEC_P(iv_cand_p);
199DEF_VEC_ALLOC_P(iv_cand_p,heap);
200
8b11a64c
ZD
201struct ivopts_data
202{
203 /* The currently optimized loop. */
204 struct loop *current_loop;
205
9a2ef6b8
ZD
206 /* Number of registers used in it. */
207 unsigned regs_used;
208
ca4c3169
ZD
209 /* Numbers of iterations for all exits of the current loop. */
210 htab_t niters;
211
8b11a64c
ZD
212 /* The size of version_info array allocated. */
213 unsigned version_info_size;
214
215 /* The array of information for the ssa names. */
216 struct version_info *version_info;
217
218 /* The bitmap of indices in version_info whose value was changed. */
219 bitmap relevant;
220
221 /* The maximum invariant id. */
222 unsigned max_inv_id;
223
224 /* The uses of induction variables. */
69ebd99d 225 VEC(iv_use_p,heap) *iv_uses;
8b11a64c
ZD
226
227 /* The candidates. */
69ebd99d 228 VEC(iv_cand_p,heap) *iv_candidates;
8b11a64c 229
80cad5fa
ZD
230 /* A bitmap of important candidates. */
231 bitmap important_candidates;
232
8b11a64c
ZD
233 /* Whether to consider just related and important candidates when replacing a
234 use. */
235 bool consider_all_candidates;
236};
237
b1b02be2
ZD
238/* An assignment of iv candidates to uses. */
239
240struct iv_ca
241{
242 /* The number of uses covered by the assignment. */
243 unsigned upto;
244
245 /* Number of uses that cannot be expressed by the candidates in the set. */
246 unsigned bad_uses;
247
248 /* Candidate assigned to a use, together with the related costs. */
249 struct cost_pair **cand_for_use;
250
251 /* Number of times each candidate is used. */
252 unsigned *n_cand_uses;
253
254 /* The candidates used. */
255 bitmap cands;
256
36f5ada1
ZD
257 /* The number of candidates in the set. */
258 unsigned n_cands;
259
b1b02be2
ZD
260 /* Total number of registers needed. */
261 unsigned n_regs;
262
263 /* Total cost of expressing uses. */
264 unsigned cand_use_cost;
265
266 /* Total cost of candidates. */
267 unsigned cand_cost;
268
269 /* Number of times each invariant is used. */
270 unsigned *n_invariant_uses;
271
272 /* Total cost of the assignment. */
273 unsigned cost;
274};
275
276/* Difference of two iv candidate assignments. */
277
278struct iv_ca_delta
279{
280 /* Changed use. */
281 struct iv_use *use;
282
283 /* An old assignment (for rollback purposes). */
284 struct cost_pair *old_cp;
285
286 /* A new assignment. */
287 struct cost_pair *new_cp;
288
289 /* Next change in the list. */
290 struct iv_ca_delta *next_change;
291};
292
8b11a64c
ZD
293/* Bound on number of candidates below that all candidates are considered. */
294
295#define CONSIDER_ALL_CANDIDATES_BOUND \
296 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
297
2a7e31df 298/* If there are more iv occurrences, we just give up (it is quite unlikely that
8b11a64c
ZD
299 optimizing such a loop would help, and it would take ages). */
300
301#define MAX_CONSIDERED_USES \
302 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
303
36f5ada1
ZD
304/* If there are at most this number of ivs in the set, try removing unnecessary
305 ivs from the set always. */
306
307#define ALWAYS_PRUNE_CAND_SET_BOUND \
308 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
309
8b11a64c
ZD
310/* The list of trees for that the decl_rtl field must be reset is stored
311 here. */
312
69ebd99d 313static VEC(tree,heap) *decl_rtl_to_reset;
8b11a64c
ZD
314
315/* Number of uses recorded in DATA. */
316
317static inline unsigned
318n_iv_uses (struct ivopts_data *data)
319{
69ebd99d 320 return VEC_length (iv_use_p, data->iv_uses);
8b11a64c
ZD
321}
322
323/* Ith use recorded in DATA. */
324
325static inline struct iv_use *
326iv_use (struct ivopts_data *data, unsigned i)
327{
69ebd99d 328 return VEC_index (iv_use_p, data->iv_uses, i);
8b11a64c
ZD
329}
330
331/* Number of candidates recorded in DATA. */
332
333static inline unsigned
334n_iv_cands (struct ivopts_data *data)
335{
69ebd99d 336 return VEC_length (iv_cand_p, data->iv_candidates);
8b11a64c
ZD
337}
338
339/* Ith candidate recorded in DATA. */
340
341static inline struct iv_cand *
342iv_cand (struct ivopts_data *data, unsigned i)
343{
69ebd99d 344 return VEC_index (iv_cand_p, data->iv_candidates, i);
8b11a64c
ZD
345}
346
8b11a64c
ZD
347/* The single loop exit if it dominates the latch, NULL otherwise. */
348
b7eae7b8 349edge
8b11a64c
ZD
350single_dom_exit (struct loop *loop)
351{
352 edge exit = loop->single_exit;
353
354 if (!exit)
355 return NULL;
356
357 if (!just_once_each_iteration_p (loop, exit->src))
358 return NULL;
359
360 return exit;
361}
362
363/* Dumps information about the induction variable IV to FILE. */
364
365extern void dump_iv (FILE *, struct iv *);
366void
367dump_iv (FILE *file, struct iv *iv)
368{
e6845c23
ZD
369 if (iv->ssa_name)
370 {
371 fprintf (file, "ssa name ");
372 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
373 fprintf (file, "\n");
374 }
8b11a64c 375
2f4675b4
ZD
376 fprintf (file, " type ");
377 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
378 fprintf (file, "\n");
379
8b11a64c
ZD
380 if (iv->step)
381 {
382 fprintf (file, " base ");
383 print_generic_expr (file, iv->base, TDF_SLIM);
384 fprintf (file, "\n");
385
386 fprintf (file, " step ");
387 print_generic_expr (file, iv->step, TDF_SLIM);
388 fprintf (file, "\n");
389 }
390 else
391 {
392 fprintf (file, " invariant ");
393 print_generic_expr (file, iv->base, TDF_SLIM);
394 fprintf (file, "\n");
395 }
396
e6845c23
ZD
397 if (iv->base_object)
398 {
399 fprintf (file, " base object ");
400 print_generic_expr (file, iv->base_object, TDF_SLIM);
401 fprintf (file, "\n");
402 }
403
8b11a64c
ZD
404 if (iv->biv_p)
405 fprintf (file, " is a biv\n");
406}
407
408/* Dumps information about the USE to FILE. */
409
410extern void dump_use (FILE *, struct iv_use *);
411void
412dump_use (FILE *file, struct iv_use *use)
413{
8b11a64c
ZD
414 fprintf (file, "use %d\n", use->id);
415
416 switch (use->type)
417 {
418 case USE_NONLINEAR_EXPR:
419 fprintf (file, " generic\n");
420 break;
421
8b11a64c
ZD
422 case USE_ADDRESS:
423 fprintf (file, " address\n");
424 break;
425
426 case USE_COMPARE:
427 fprintf (file, " compare\n");
428 break;
429
430 default:
1e128c5f 431 gcc_unreachable ();
8b11a64c
ZD
432 }
433
2f4675b4
ZD
434 fprintf (file, " in statement ");
435 print_generic_expr (file, use->stmt, TDF_SLIM);
436 fprintf (file, "\n");
437
438 fprintf (file, " at position ");
439 if (use->op_p)
440 print_generic_expr (file, *use->op_p, TDF_SLIM);
441 fprintf (file, "\n");
442
e6845c23 443 dump_iv (file, use->iv);
2f4675b4 444
eec5fec9
ZD
445 if (use->related_cands)
446 {
447 fprintf (file, " related candidates ");
448 dump_bitmap (file, use->related_cands);
449 }
8b11a64c
ZD
450}
451
452/* Dumps information about the uses to FILE. */
453
454extern void dump_uses (FILE *, struct ivopts_data *);
455void
456dump_uses (FILE *file, struct ivopts_data *data)
457{
458 unsigned i;
459 struct iv_use *use;
460
461 for (i = 0; i < n_iv_uses (data); i++)
462 {
463 use = iv_use (data, i);
464
465 dump_use (file, use);
466 fprintf (file, "\n");
467 }
468}
469
470/* Dumps information about induction variable candidate CAND to FILE. */
471
472extern void dump_cand (FILE *, struct iv_cand *);
473void
474dump_cand (FILE *file, struct iv_cand *cand)
475{
476 struct iv *iv = cand->iv;
477
478 fprintf (file, "candidate %d%s\n",
479 cand->id, cand->important ? " (important)" : "");
480
9be872b7
ZD
481 if (cand->depends_on)
482 {
483 fprintf (file, " depends on ");
484 dump_bitmap (file, cand->depends_on);
485 }
486
8b11a64c
ZD
487 if (!iv)
488 {
489 fprintf (file, " final value replacement\n");
490 return;
491 }
492
493 switch (cand->pos)
494 {
495 case IP_NORMAL:
496 fprintf (file, " incremented before exit test\n");
497 break;
498
499 case IP_END:
500 fprintf (file, " incremented at end\n");
501 break;
502
503 case IP_ORIGINAL:
504 fprintf (file, " original biv\n");
505 break;
506 }
507
e6845c23 508 dump_iv (file, iv);
8b11a64c
ZD
509}
510
511/* Returns the info for ssa version VER. */
512
513static inline struct version_info *
514ver_info (struct ivopts_data *data, unsigned ver)
515{
516 return data->version_info + ver;
517}
518
519/* Returns the info for ssa name NAME. */
520
521static inline struct version_info *
522name_info (struct ivopts_data *data, tree name)
523{
524 return ver_info (data, SSA_NAME_VERSION (name));
525}
526
527/* Checks whether there exists number X such that X * B = A, counting modulo
528 2^BITS. */
529
530static bool
531divide (unsigned bits, unsigned HOST_WIDE_INT a, unsigned HOST_WIDE_INT b,
532 HOST_WIDE_INT *x)
533{
534 unsigned HOST_WIDE_INT mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
535 unsigned HOST_WIDE_INT inv, ex, val;
536 unsigned i;
537
538 a &= mask;
539 b &= mask;
540
541 /* First divide the whole equation by 2 as long as possible. */
542 while (!(a & 1) && !(b & 1))
543 {
544 a >>= 1;
545 b >>= 1;
546 bits--;
547 mask >>= 1;
548 }
549
550 if (!(b & 1))
551 {
552 /* If b is still even, a is odd and there is no such x. */
553 return false;
554 }
555
556 /* Find the inverse of b. We compute it as
557 b^(2^(bits - 1) - 1) (mod 2^bits). */
558 inv = 1;
559 ex = b;
560 for (i = 0; i < bits - 1; i++)
561 {
562 inv = (inv * ex) & mask;
563 ex = (ex * ex) & mask;
564 }
565
566 val = (a * inv) & mask;
567
1e128c5f 568 gcc_assert (((val * b) & mask) == a);
8b11a64c
ZD
569
570 if ((val >> (bits - 1)) & 1)
571 val |= ~mask;
572
573 *x = val;
574
575 return true;
576}
577
578/* Returns true if STMT is after the place where the IP_NORMAL ivs will be
579 emitted in LOOP. */
580
581static bool
582stmt_after_ip_normal_pos (struct loop *loop, tree stmt)
583{
584 basic_block bb = ip_normal_pos (loop), sbb = bb_for_stmt (stmt);
585
1e128c5f 586 gcc_assert (bb);
8b11a64c
ZD
587
588 if (sbb == loop->latch)
589 return true;
590
591 if (sbb != bb)
592 return false;
593
594 return stmt == last_stmt (bb);
595}
596
597/* Returns true if STMT if after the place where the original induction
598 variable CAND is incremented. */
599
600static bool
601stmt_after_ip_original_pos (struct iv_cand *cand, tree stmt)
602{
603 basic_block cand_bb = bb_for_stmt (cand->incremented_at);
604 basic_block stmt_bb = bb_for_stmt (stmt);
605 block_stmt_iterator bsi;
606
607 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
608 return false;
609
610 if (stmt_bb != cand_bb)
611 return true;
612
613 /* Scan the block from the end, since the original ivs are usually
614 incremented at the end of the loop body. */
615 for (bsi = bsi_last (stmt_bb); ; bsi_prev (&bsi))
616 {
617 if (bsi_stmt (bsi) == cand->incremented_at)
618 return false;
619 if (bsi_stmt (bsi) == stmt)
620 return true;
621 }
622}
623
624/* Returns true if STMT if after the place where the induction variable
625 CAND is incremented in LOOP. */
626
627static bool
628stmt_after_increment (struct loop *loop, struct iv_cand *cand, tree stmt)
629{
630 switch (cand->pos)
631 {
632 case IP_END:
633 return false;
634
635 case IP_NORMAL:
636 return stmt_after_ip_normal_pos (loop, stmt);
637
638 case IP_ORIGINAL:
639 return stmt_after_ip_original_pos (cand, stmt);
640
641 default:
1e128c5f 642 gcc_unreachable ();
8b11a64c
ZD
643 }
644}
645
dcccd88d
ZD
646/* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
647
648static bool
649abnormal_ssa_name_p (tree exp)
650{
651 if (!exp)
652 return false;
653
654 if (TREE_CODE (exp) != SSA_NAME)
655 return false;
656
657 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
658}
659
660/* Returns false if BASE or INDEX contains a ssa name that occurs in an
661 abnormal phi node. Callback for for_each_index. */
662
663static bool
664idx_contains_abnormal_ssa_name_p (tree base, tree *index,
665 void *data ATTRIBUTE_UNUSED)
666{
667 if (TREE_CODE (base) == ARRAY_REF)
668 {
669 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
670 return false;
671 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
672 return false;
673 }
674
675 return !abnormal_ssa_name_p (*index);
676}
677
678/* Returns true if EXPR contains a ssa name that occurs in an
679 abnormal phi node. */
680
681static bool
682contains_abnormal_ssa_name_p (tree expr)
683{
684 enum tree_code code;
685 enum tree_code_class class;
686
687 if (!expr)
688 return false;
689
690 code = TREE_CODE (expr);
691 class = TREE_CODE_CLASS (code);
692
693 if (code == SSA_NAME)
694 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
695
696 if (code == INTEGER_CST
697 || is_gimple_min_invariant (expr))
698 return false;
699
700 if (code == ADDR_EXPR)
701 return !for_each_index (&TREE_OPERAND (expr, 0),
702 idx_contains_abnormal_ssa_name_p,
703 NULL);
704
705 switch (class)
706 {
707 case tcc_binary:
708 case tcc_comparison:
709 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
710 return true;
711
712 /* Fallthru. */
713 case tcc_unary:
714 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
715 return true;
716
717 break;
718
719 default:
720 gcc_unreachable ();
721 }
722
723 return false;
724}
725
ca4c3169
ZD
726/* Element of the table in that we cache the numbers of iterations obtained
727 from exits of the loop. */
728
729struct nfe_cache_elt
730{
731 /* The edge for that the number of iterations is cached. */
732 edge exit;
733
dcccd88d
ZD
734 /* Number of iterations corresponding to this exit, or NULL if it cannot be
735 determined. */
736 tree niter;
ca4c3169
ZD
737};
738
739/* Hash function for nfe_cache_elt E. */
740
741static hashval_t
742nfe_hash (const void *e)
743{
744 const struct nfe_cache_elt *elt = e;
745
746 return htab_hash_pointer (elt->exit);
747}
748
749/* Equality function for nfe_cache_elt E1 and edge E2. */
750
751static int
752nfe_eq (const void *e1, const void *e2)
753{
754 const struct nfe_cache_elt *elt1 = e1;
755
756 return elt1->exit == e2;
757}
758
dcccd88d 759/* Returns tree describing number of iterations determined from
ca4c3169
ZD
760 EXIT of DATA->current_loop, or NULL if something goes wrong. */
761
dcccd88d 762static tree
ca4c3169
ZD
763niter_for_exit (struct ivopts_data *data, edge exit)
764{
765 struct nfe_cache_elt *nfe_desc;
dcccd88d 766 struct tree_niter_desc desc;
ca4c3169
ZD
767 PTR *slot;
768
769 slot = htab_find_slot_with_hash (data->niters, exit,
770 htab_hash_pointer (exit),
771 INSERT);
772
773 if (!*slot)
774 {
775 nfe_desc = xmalloc (sizeof (struct nfe_cache_elt));
776 nfe_desc->exit = exit;
dcccd88d
ZD
777
778 /* Try to determine number of iterations. We must know it
779 unconditionally (i.e., without possibility of # of iterations
780 being zero). Also, we cannot safely work with ssa names that
781 appear in phi nodes on abnormal edges, so that we do not create
782 overlapping life ranges for them (PR 27283). */
783 if (number_of_iterations_exit (data->current_loop,
784 exit, &desc, true)
785 && zero_p (desc.may_be_zero)
786 && !contains_abnormal_ssa_name_p (desc.niter))
787 nfe_desc->niter = desc.niter;
788 else
789 nfe_desc->niter = NULL_TREE;
ca4c3169
ZD
790 }
791 else
792 nfe_desc = *slot;
793
dcccd88d 794 return nfe_desc->niter;
ca4c3169
ZD
795}
796
dcccd88d 797/* Returns tree describing number of iterations determined from
ca4c3169
ZD
798 single dominating exit of DATA->current_loop, or NULL if something
799 goes wrong. */
800
dcccd88d 801static tree
ca4c3169
ZD
802niter_for_single_dom_exit (struct ivopts_data *data)
803{
804 edge exit = single_dom_exit (data->current_loop);
805
806 if (!exit)
807 return NULL;
808
809 return niter_for_exit (data, exit);
810}
811
8b11a64c 812/* Initializes data structures used by the iv optimization pass, stored
9a2ef6b8 813 in DATA. */
8b11a64c
ZD
814
815static void
9a2ef6b8 816tree_ssa_iv_optimize_init (struct ivopts_data *data)
8b11a64c 817{
8b11a64c 818 data->version_info_size = 2 * num_ssa_names;
5ed6ace5 819 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
8bdbfff5
NS
820 data->relevant = BITMAP_ALLOC (NULL);
821 data->important_candidates = BITMAP_ALLOC (NULL);
8b11a64c 822 data->max_inv_id = 0;
ca4c3169 823 data->niters = htab_create (10, nfe_hash, nfe_eq, free);
69ebd99d
KH
824 data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
825 data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
826 decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
8b11a64c
ZD
827}
828
e6845c23
ZD
829/* Returns a memory object to that EXPR points. In case we are able to
830 determine that it does not point to any such object, NULL is returned. */
831
832static tree
833determine_base_object (tree expr)
834{
835 enum tree_code code = TREE_CODE (expr);
836 tree base, obj, op0, op1;
837
838 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
839 return NULL_TREE;
840
841 switch (code)
842 {
843 case INTEGER_CST:
844 return NULL_TREE;
845
846 case ADDR_EXPR:
847 obj = TREE_OPERAND (expr, 0);
848 base = get_base_address (obj);
849
850 if (!base)
f5e2738c 851 return expr;
e6845c23 852
7299dbfb 853 if (TREE_CODE (base) == INDIRECT_REF)
f5e2738c 854 return determine_base_object (TREE_OPERAND (base, 0));
7299dbfb 855
62b37d91
RG
856 return fold_convert (ptr_type_node,
857 build_fold_addr_expr (base));
e6845c23
ZD
858
859 case PLUS_EXPR:
860 case MINUS_EXPR:
861 op0 = determine_base_object (TREE_OPERAND (expr, 0));
862 op1 = determine_base_object (TREE_OPERAND (expr, 1));
863
864 if (!op1)
865 return op0;
866
867 if (!op0)
868 return (code == PLUS_EXPR
869 ? op1
62b37d91 870 : fold_build1 (NEGATE_EXPR, ptr_type_node, op1));
e6845c23 871
62b37d91 872 return fold_build2 (code, ptr_type_node, op0, op1);
e6845c23 873
f5e2738c
ZD
874 case NOP_EXPR:
875 case CONVERT_EXPR:
876 return determine_base_object (TREE_OPERAND (expr, 0));
877
e6845c23
ZD
878 default:
879 return fold_convert (ptr_type_node, expr);
880 }
881}
882
8b11a64c
ZD
883/* Allocates an induction variable with given initial value BASE and step STEP
884 for loop LOOP. */
885
886static struct iv *
887alloc_iv (tree base, tree step)
888{
5ed6ace5 889 struct iv *iv = XCNEW (struct iv);
8b11a64c
ZD
890
891 if (step && integer_zerop (step))
892 step = NULL_TREE;
893
894 iv->base = base;
e6845c23 895 iv->base_object = determine_base_object (base);
8b11a64c
ZD
896 iv->step = step;
897 iv->biv_p = false;
898 iv->have_use_for = false;
899 iv->use_id = 0;
900 iv->ssa_name = NULL_TREE;
901
902 return iv;
903}
904
905/* Sets STEP and BASE for induction variable IV. */
906
907static void
908set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
909{
910 struct version_info *info = name_info (data, iv);
911
1e128c5f 912 gcc_assert (!info->iv);
8b11a64c
ZD
913
914 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
915 info->iv = alloc_iv (base, step);
916 info->iv->ssa_name = iv;
917}
918
919/* Finds induction variable declaration for VAR. */
920
921static struct iv *
922get_iv (struct ivopts_data *data, tree var)
923{
924 basic_block bb;
925
926 if (!name_info (data, var)->iv)
927 {
928 bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
929
930 if (!bb
931 || !flow_bb_inside_loop_p (data->current_loop, bb))
932 set_iv (data, var, var, NULL_TREE);
933 }
934
935 return name_info (data, var)->iv;
936}
937
9be872b7
ZD
938/* Determines the step of a biv defined in PHI. Returns NULL if PHI does
939 not define a simple affine biv with nonzero step. */
8b11a64c
ZD
940
941static tree
942determine_biv_step (tree phi)
943{
944 struct loop *loop = bb_for_stmt (phi)->loop_father;
a6f778b2
ZD
945 tree name = PHI_RESULT (phi);
946 affine_iv iv;
8b11a64c
ZD
947
948 if (!is_gimple_reg (name))
949 return NULL_TREE;
950
a6f778b2 951 if (!simple_iv (loop, phi, name, &iv, true))
8b11a64c
ZD
952 return NULL_TREE;
953
a6f778b2 954 return (zero_p (iv.step) ? NULL_TREE : iv.step);
8b11a64c
ZD
955}
956
8b11a64c
ZD
957/* Finds basic ivs. */
958
959static bool
960find_bivs (struct ivopts_data *data)
961{
962 tree phi, step, type, base;
963 bool found = false;
964 struct loop *loop = data->current_loop;
965
bb29d951 966 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
8b11a64c
ZD
967 {
968 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
969 continue;
970
971 step = determine_biv_step (phi);
8b11a64c
ZD
972 if (!step)
973 continue;
8b11a64c
ZD
974
975 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
d7bf3bcf 976 base = expand_simple_operations (base);
9be872b7
ZD
977 if (contains_abnormal_ssa_name_p (base)
978 || contains_abnormal_ssa_name_p (step))
8b11a64c
ZD
979 continue;
980
981 type = TREE_TYPE (PHI_RESULT (phi));
982 base = fold_convert (type, base);
9be872b7
ZD
983 if (step)
984 step = fold_convert (type, step);
8b11a64c
ZD
985
986 set_iv (data, PHI_RESULT (phi), base, step);
987 found = true;
988 }
989
990 return found;
991}
992
993/* Marks basic ivs. */
994
995static void
996mark_bivs (struct ivopts_data *data)
997{
998 tree phi, var;
999 struct iv *iv, *incr_iv;
1000 struct loop *loop = data->current_loop;
1001 basic_block incr_bb;
1002
bb29d951 1003 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
8b11a64c
ZD
1004 {
1005 iv = get_iv (data, PHI_RESULT (phi));
1006 if (!iv)
1007 continue;
1008
1009 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1010 incr_iv = get_iv (data, var);
1011 if (!incr_iv)
1012 continue;
1013
1014 /* If the increment is in the subloop, ignore it. */
1015 incr_bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
1016 if (incr_bb->loop_father != data->current_loop
1017 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1018 continue;
1019
1020 iv->biv_p = true;
1021 incr_iv->biv_p = true;
1022 }
1023}
1024
1025/* Checks whether STMT defines a linear induction variable and stores its
a6f778b2 1026 parameters to IV. */
8b11a64c
ZD
1027
1028static bool
a6f778b2 1029find_givs_in_stmt_scev (struct ivopts_data *data, tree stmt, affine_iv *iv)
8b11a64c
ZD
1030{
1031 tree lhs;
1032 struct loop *loop = data->current_loop;
1033
a6f778b2
ZD
1034 iv->base = NULL_TREE;
1035 iv->step = NULL_TREE;
8b11a64c
ZD
1036
1037 if (TREE_CODE (stmt) != MODIFY_EXPR)
1038 return false;
1039
1040 lhs = TREE_OPERAND (stmt, 0);
1041 if (TREE_CODE (lhs) != SSA_NAME)
1042 return false;
1043
a6f778b2 1044 if (!simple_iv (loop, stmt, TREE_OPERAND (stmt, 1), iv, true))
8b11a64c 1045 return false;
a6f778b2 1046 iv->base = expand_simple_operations (iv->base);
8b11a64c 1047
a6f778b2
ZD
1048 if (contains_abnormal_ssa_name_p (iv->base)
1049 || contains_abnormal_ssa_name_p (iv->step))
8b11a64c
ZD
1050 return false;
1051
1052 return true;
1053}
1054
1055/* Finds general ivs in statement STMT. */
1056
1057static void
1058find_givs_in_stmt (struct ivopts_data *data, tree stmt)
1059{
a6f778b2 1060 affine_iv iv;
8b11a64c 1061
a6f778b2 1062 if (!find_givs_in_stmt_scev (data, stmt, &iv))
8b11a64c
ZD
1063 return;
1064
a6f778b2 1065 set_iv (data, TREE_OPERAND (stmt, 0), iv.base, iv.step);
8b11a64c
ZD
1066}
1067
1068/* Finds general ivs in basic block BB. */
1069
1070static void
1071find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1072{
1073 block_stmt_iterator bsi;
1074
1075 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1076 find_givs_in_stmt (data, bsi_stmt (bsi));
1077}
1078
1079/* Finds general ivs. */
1080
1081static void
1082find_givs (struct ivopts_data *data)
1083{
1084 struct loop *loop = data->current_loop;
1085 basic_block *body = get_loop_body_in_dom_order (loop);
1086 unsigned i;
1087
1088 for (i = 0; i < loop->num_nodes; i++)
1089 find_givs_in_bb (data, body[i]);
1090 free (body);
1091}
1092
8b11a64c
ZD
1093/* For each ssa name defined in LOOP determines whether it is an induction
1094 variable and if so, its initial value and step. */
1095
1096static bool
1097find_induction_variables (struct ivopts_data *data)
1098{
1099 unsigned i;
87c476a2 1100 bitmap_iterator bi;
8b11a64c
ZD
1101
1102 if (!find_bivs (data))
1103 return false;
1104
1105 find_givs (data);
1106 mark_bivs (data);
8b11a64c
ZD
1107
1108 if (dump_file && (dump_flags & TDF_DETAILS))
1109 {
dcccd88d 1110 tree niter = niter_for_single_dom_exit (data);
ca4c3169
ZD
1111
1112 if (niter)
8b11a64c
ZD
1113 {
1114 fprintf (dump_file, " number of iterations ");
dcccd88d
ZD
1115 print_generic_expr (dump_file, niter, TDF_SLIM);
1116 fprintf (dump_file, "\n\n");
8b11a64c
ZD
1117 };
1118
1119 fprintf (dump_file, "Induction variables:\n\n");
1120
87c476a2 1121 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
8b11a64c
ZD
1122 {
1123 if (ver_info (data, i)->iv)
1124 dump_iv (dump_file, ver_info (data, i)->iv);
87c476a2 1125 }
8b11a64c
ZD
1126 }
1127
1128 return true;
1129}
1130
1131/* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1132
1133static struct iv_use *
1134record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1135 tree stmt, enum use_type use_type)
1136{
5ed6ace5 1137 struct iv_use *use = XCNEW (struct iv_use);
8b11a64c
ZD
1138
1139 use->id = n_iv_uses (data);
1140 use->type = use_type;
1141 use->iv = iv;
1142 use->stmt = stmt;
1143 use->op_p = use_p;
8bdbfff5 1144 use->related_cands = BITMAP_ALLOC (NULL);
8b11a64c 1145
e6845c23
ZD
1146 /* To avoid showing ssa name in the dumps, if it was not reset by the
1147 caller. */
1148 iv->ssa_name = NULL_TREE;
1149
8b11a64c
ZD
1150 if (dump_file && (dump_flags & TDF_DETAILS))
1151 dump_use (dump_file, use);
1152
69ebd99d 1153 VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
8b11a64c
ZD
1154
1155 return use;
1156}
1157
1158/* Checks whether OP is a loop-level invariant and if so, records it.
1159 NONLINEAR_USE is true if the invariant is used in a way we do not
1160 handle specially. */
1161
1162static void
1163record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1164{
1165 basic_block bb;
1166 struct version_info *info;
1167
1168 if (TREE_CODE (op) != SSA_NAME
1169 || !is_gimple_reg (op))
1170 return;
1171
1172 bb = bb_for_stmt (SSA_NAME_DEF_STMT (op));
1173 if (bb
1174 && flow_bb_inside_loop_p (data->current_loop, bb))
1175 return;
1176
1177 info = name_info (data, op);
1178 info->name = op;
1179 info->has_nonlin_use |= nonlinear_use;
1180 if (!info->inv_id)
1181 info->inv_id = ++data->max_inv_id;
1182 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1183}
1184
50cc9802 1185/* Checks whether the use OP is interesting and if so, records it. */
8b11a64c
ZD
1186
1187static struct iv_use *
50cc9802 1188find_interesting_uses_op (struct ivopts_data *data, tree op)
8b11a64c
ZD
1189{
1190 struct iv *iv;
1191 struct iv *civ;
1192 tree stmt;
1193 struct iv_use *use;
1194
1195 if (TREE_CODE (op) != SSA_NAME)
1196 return NULL;
1197
1198 iv = get_iv (data, op);
1199 if (!iv)
1200 return NULL;
1201
1202 if (iv->have_use_for)
1203 {
1204 use = iv_use (data, iv->use_id);
1205
50cc9802 1206 gcc_assert (use->type == USE_NONLINEAR_EXPR);
8b11a64c
ZD
1207 return use;
1208 }
1209
1210 if (zero_p (iv->step))
1211 {
1212 record_invariant (data, op, true);
1213 return NULL;
1214 }
1215 iv->have_use_for = true;
1216
5ed6ace5 1217 civ = XNEW (struct iv);
8b11a64c
ZD
1218 *civ = *iv;
1219
1220 stmt = SSA_NAME_DEF_STMT (op);
1e128c5f
GB
1221 gcc_assert (TREE_CODE (stmt) == PHI_NODE
1222 || TREE_CODE (stmt) == MODIFY_EXPR);
8b11a64c 1223
50cc9802 1224 use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
8b11a64c
ZD
1225 iv->use_id = use->id;
1226
1227 return use;
1228}
1229
8b11a64c
ZD
1230/* Checks whether the condition *COND_P in STMT is interesting
1231 and if so, records it. */
1232
1233static void
1234find_interesting_uses_cond (struct ivopts_data *data, tree stmt, tree *cond_p)
1235{
1236 tree *op0_p;
1237 tree *op1_p;
1238 struct iv *iv0 = NULL, *iv1 = NULL, *civ;
1239 struct iv const_iv;
1240 tree zero = integer_zero_node;
1241
1242 const_iv.step = NULL_TREE;
1243
f5f12961
ZD
1244 if (TREE_CODE (*cond_p) != SSA_NAME
1245 && !COMPARISON_CLASS_P (*cond_p))
8b11a64c
ZD
1246 return;
1247
1248 if (TREE_CODE (*cond_p) == SSA_NAME)
1249 {
1250 op0_p = cond_p;
1251 op1_p = &zero;
1252 }
1253 else
1254 {
1255 op0_p = &TREE_OPERAND (*cond_p, 0);
1256 op1_p = &TREE_OPERAND (*cond_p, 1);
1257 }
1258
1259 if (TREE_CODE (*op0_p) == SSA_NAME)
1260 iv0 = get_iv (data, *op0_p);
1261 else
1262 iv0 = &const_iv;
1263
1264 if (TREE_CODE (*op1_p) == SSA_NAME)
1265 iv1 = get_iv (data, *op1_p);
1266 else
1267 iv1 = &const_iv;
1268
1269 if (/* When comparing with non-invariant value, we may not do any senseful
1270 induction variable elimination. */
1271 (!iv0 || !iv1)
1272 /* Eliminating condition based on two ivs would be nontrivial.
1273 ??? TODO -- it is not really important to handle this case. */
1274 || (!zero_p (iv0->step) && !zero_p (iv1->step)))
1275 {
1276 find_interesting_uses_op (data, *op0_p);
1277 find_interesting_uses_op (data, *op1_p);
1278 return;
1279 }
1280
1281 if (zero_p (iv0->step) && zero_p (iv1->step))
1282 {
1283 /* If both are invariants, this is a work for unswitching. */
1284 return;
1285 }
1286
5ed6ace5 1287 civ = XNEW (struct iv);
8b11a64c
ZD
1288 *civ = zero_p (iv0->step) ? *iv1: *iv0;
1289 record_use (data, cond_p, civ, stmt, USE_COMPARE);
1290}
1291
be35cf60
ZD
1292/* Returns true if expression EXPR is obviously invariant in LOOP,
1293 i.e. if all its operands are defined outside of the LOOP. */
1294
feb075f4 1295bool
be35cf60
ZD
1296expr_invariant_in_loop_p (struct loop *loop, tree expr)
1297{
1298 basic_block def_bb;
1299 unsigned i, len;
1300
1301 if (is_gimple_min_invariant (expr))
1302 return true;
1303
1304 if (TREE_CODE (expr) == SSA_NAME)
1305 {
1306 def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (expr));
1307 if (def_bb
1308 && flow_bb_inside_loop_p (loop, def_bb))
1309 return false;
1310
1311 return true;
1312 }
1313
1314 if (!EXPR_P (expr))
1315 return false;
1316
54e4aedb 1317 len = TREE_CODE_LENGTH (TREE_CODE (expr));
be35cf60
ZD
1318 for (i = 0; i < len; i++)
1319 if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1320 return false;
1321
1322 return true;
1323}
1324
8b11a64c
ZD
1325/* Cumulates the steps of indices into DATA and replaces their values with the
1326 initial ones. Returns false when the value of the index cannot be determined.
1327 Callback for for_each_index. */
1328
1329struct ifs_ivopts_data
1330{
1331 struct ivopts_data *ivopts_data;
1332 tree stmt;
1333 tree *step_p;
1334};
1335
1336static bool
1337idx_find_step (tree base, tree *idx, void *data)
1338{
1339 struct ifs_ivopts_data *dta = data;
1340 struct iv *iv;
1c1205fb 1341 tree step, iv_step, lbound, off;
2f4675b4 1342 struct loop *loop = dta->ivopts_data->current_loop;
be35cf60
ZD
1343
1344 if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF
1345 || TREE_CODE (base) == ALIGN_INDIRECT_REF)
1346 return false;
1347
1348 /* If base is a component ref, require that the offset of the reference
3a7c155d 1349 be invariant. */
be35cf60
ZD
1350 if (TREE_CODE (base) == COMPONENT_REF)
1351 {
1352 off = component_ref_field_offset (base);
1353 return expr_invariant_in_loop_p (loop, off);
1354 }
1355
1356 /* If base is array, first check whether we will be able to move the
1357 reference out of the loop (in order to take its address in strength
1358 reduction). In order for this to work we need both lower bound
1359 and step to be loop invariants. */
1360 if (TREE_CODE (base) == ARRAY_REF)
1361 {
1362 step = array_ref_element_size (base);
1363 lbound = array_ref_low_bound (base);
1364
1365 if (!expr_invariant_in_loop_p (loop, step)
1366 || !expr_invariant_in_loop_p (loop, lbound))
1367 return false;
1368 }
1369
8b11a64c
ZD
1370 if (TREE_CODE (*idx) != SSA_NAME)
1371 return true;
1372
1373 iv = get_iv (dta->ivopts_data, *idx);
1374 if (!iv)
1375 return false;
1376
ea643120
RG
1377 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1378 *&x[0], which is not folded and does not trigger the
1379 ARRAY_REF path below. */
8b11a64c
ZD
1380 *idx = iv->base;
1381
1382 if (!iv->step)
1383 return true;
1384
8b11a64c 1385 if (TREE_CODE (base) == ARRAY_REF)
2f4675b4
ZD
1386 {
1387 step = array_ref_element_size (base);
2f4675b4
ZD
1388
1389 /* We only handle addresses whose step is an integer constant. */
1390 if (TREE_CODE (step) != INTEGER_CST)
1391 return false;
2f4675b4 1392 }
8b11a64c 1393 else
5212068f 1394 /* The step for pointer arithmetics already is 1 byte. */
1c1205fb 1395 step = build_int_cst (sizetype, 1);
8b11a64c 1396
d7770457
SP
1397 /* FIXME: convert_step should not be used outside chrec_convert: fix
1398 this by calling chrec_convert. */
1e8552eb
SP
1399 iv_step = convert_step (dta->ivopts_data->current_loop,
1400 sizetype, iv->base, iv->step, dta->stmt);
8b11a64c
ZD
1401
1402 if (!iv_step)
1403 {
1404 /* The index might wrap. */
1405 return false;
1406 }
1407
1c1205fb 1408 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
8b11a64c
ZD
1409
1410 if (!*dta->step_p)
1411 *dta->step_p = step;
1412 else
1c1205fb 1413 *dta->step_p = fold_build2 (PLUS_EXPR, sizetype, *dta->step_p, step);
8b11a64c
ZD
1414
1415 return true;
1416}
1417
1418/* Records use in index IDX. Callback for for_each_index. Ivopts data
1419 object is passed to it in DATA. */
1420
1421static bool
2f4675b4 1422idx_record_use (tree base, tree *idx,
8b11a64c
ZD
1423 void *data)
1424{
1425 find_interesting_uses_op (data, *idx);
2f4675b4
ZD
1426 if (TREE_CODE (base) == ARRAY_REF)
1427 {
1428 find_interesting_uses_op (data, array_ref_element_size (base));
1429 find_interesting_uses_op (data, array_ref_low_bound (base));
1430 }
8b11a64c
ZD
1431 return true;
1432}
1433
0a915e3d
ZD
1434/* Returns true if memory reference REF may be unaligned. */
1435
1436static bool
1437may_be_unaligned_p (tree ref)
1438{
1439 tree base;
1440 tree base_type;
1441 HOST_WIDE_INT bitsize;
1442 HOST_WIDE_INT bitpos;
1443 tree toffset;
1444 enum machine_mode mode;
1445 int unsignedp, volatilep;
1446 unsigned base_align;
1447
ac182688 1448 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
607fb860 1449 thus they are not misaligned. */
ac182688
ZD
1450 if (TREE_CODE (ref) == TARGET_MEM_REF)
1451 return false;
1452
0a915e3d
ZD
1453 /* The test below is basically copy of what expr.c:normal_inner_ref
1454 does to check whether the object must be loaded by parts when
1455 STRICT_ALIGNMENT is true. */
1456 base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1457 &unsignedp, &volatilep, true);
1458 base_type = TREE_TYPE (base);
1459 base_align = TYPE_ALIGN (base_type);
1460
1461 if (mode != BLKmode
1462 && (base_align < GET_MODE_ALIGNMENT (mode)
1463 || bitpos % GET_MODE_ALIGNMENT (mode) != 0
1464 || bitpos % BITS_PER_UNIT != 0))
1465 return true;
1466
1467 return false;
1468}
1469
8b11a64c
ZD
1470/* Finds addresses in *OP_P inside STMT. */
1471
1472static void
1473find_interesting_uses_address (struct ivopts_data *data, tree stmt, tree *op_p)
1474{
ac182688 1475 tree base = *op_p, step = NULL;
8b11a64c
ZD
1476 struct iv *civ;
1477 struct ifs_ivopts_data ifs_ivopts_data;
1478
e3cc7254
ZD
1479 /* Do not play with volatile memory references. A bit too conservative,
1480 perhaps, but safe. */
1481 if (stmt_ann (stmt)->has_volatile_ops)
1482 goto fail;
1483
8b11a64c
ZD
1484 /* Ignore bitfields for now. Not really something terribly complicated
1485 to handle. TODO. */
dd79d394
ZD
1486 if (TREE_CODE (base) == BIT_FIELD_REF
1487 || (TREE_CODE (base) == COMPONENT_REF
1488 && DECL_NONADDRESSABLE_P (TREE_OPERAND (base, 1))))
8b11a64c
ZD
1489 goto fail;
1490
0a915e3d
ZD
1491 if (STRICT_ALIGNMENT
1492 && may_be_unaligned_p (base))
1493 goto fail;
1494
ac182688
ZD
1495 base = unshare_expr (base);
1496
1497 if (TREE_CODE (base) == TARGET_MEM_REF)
1498 {
1499 tree type = build_pointer_type (TREE_TYPE (base));
1500 tree astep;
1501
1502 if (TMR_BASE (base)
1503 && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1504 {
1505 civ = get_iv (data, TMR_BASE (base));
1506 if (!civ)
1507 goto fail;
1508
1509 TMR_BASE (base) = civ->base;
1510 step = civ->step;
1511 }
1512 if (TMR_INDEX (base)
1513 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1514 {
1515 civ = get_iv (data, TMR_INDEX (base));
1516 if (!civ)
1517 goto fail;
8b11a64c 1518
ac182688
ZD
1519 TMR_INDEX (base) = civ->base;
1520 astep = civ->step;
be35cf60 1521
ac182688
ZD
1522 if (astep)
1523 {
1524 if (TMR_STEP (base))
1525 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1526
1527 if (step)
1528 step = fold_build2 (PLUS_EXPR, type, step, astep);
1529 else
1530 step = astep;
1531 }
1532 }
1533
1534 if (zero_p (step))
1535 goto fail;
1536 base = tree_mem_ref_addr (type, base);
1537 }
1538 else
1539 {
1540 ifs_ivopts_data.ivopts_data = data;
1541 ifs_ivopts_data.stmt = stmt;
1542 ifs_ivopts_data.step_p = &step;
1543 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1544 || zero_p (step))
1545 goto fail;
1546
1547 gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
1548 gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1549
1550 base = build_fold_addr_expr (base);
ea643120
RG
1551
1552 /* Substituting bases of IVs into the base expression might
1553 have caused folding opportunities. */
1554 if (TREE_CODE (base) == ADDR_EXPR)
1555 {
1556 tree *ref = &TREE_OPERAND (base, 0);
1557 while (handled_component_p (*ref))
1558 ref = &TREE_OPERAND (*ref, 0);
1559 if (TREE_CODE (*ref) == INDIRECT_REF)
1560 *ref = fold_indirect_ref (*ref);
1561 }
ac182688 1562 }
8b11a64c
ZD
1563
1564 civ = alloc_iv (base, step);
1565 record_use (data, op_p, civ, stmt, USE_ADDRESS);
1566 return;
1567
1568fail:
1569 for_each_index (op_p, idx_record_use, data);
1570}
1571
1572/* Finds and records invariants used in STMT. */
1573
1574static void
1575find_invariants_stmt (struct ivopts_data *data, tree stmt)
1576{
f47c96aa
AM
1577 ssa_op_iter iter;
1578 use_operand_p use_p;
8b11a64c
ZD
1579 tree op;
1580
f47c96aa 1581 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
8b11a64c 1582 {
f47c96aa 1583 op = USE_FROM_PTR (use_p);
8b11a64c
ZD
1584 record_invariant (data, op, false);
1585 }
1586}
1587
1588/* Finds interesting uses of induction variables in the statement STMT. */
1589
1590static void
1591find_interesting_uses_stmt (struct ivopts_data *data, tree stmt)
1592{
1593 struct iv *iv;
1594 tree op, lhs, rhs;
f47c96aa
AM
1595 ssa_op_iter iter;
1596 use_operand_p use_p;
8b11a64c
ZD
1597
1598 find_invariants_stmt (data, stmt);
1599
1600 if (TREE_CODE (stmt) == COND_EXPR)
1601 {
1602 find_interesting_uses_cond (data, stmt, &COND_EXPR_COND (stmt));
1603 return;
1604 }
1605
1606 if (TREE_CODE (stmt) == MODIFY_EXPR)
1607 {
1608 lhs = TREE_OPERAND (stmt, 0);
1609 rhs = TREE_OPERAND (stmt, 1);
1610
1611 if (TREE_CODE (lhs) == SSA_NAME)
1612 {
1613 /* If the statement defines an induction variable, the uses are not
1614 interesting by themselves. */
1615
1616 iv = get_iv (data, lhs);
1617
1618 if (iv && !zero_p (iv->step))
1619 return;
1620 }
1621
1622 switch (TREE_CODE_CLASS (TREE_CODE (rhs)))
1623 {
6615c446 1624 case tcc_comparison:
8b11a64c
ZD
1625 find_interesting_uses_cond (data, stmt, &TREE_OPERAND (stmt, 1));
1626 return;
1627
6615c446 1628 case tcc_reference:
8b11a64c 1629 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 1));
6615c446 1630 if (REFERENCE_CLASS_P (lhs))
8b11a64c
ZD
1631 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1632 return;
1633
1634 default: ;
1635 }
1636
6615c446 1637 if (REFERENCE_CLASS_P (lhs)
2f4675b4 1638 && is_gimple_val (rhs))
8b11a64c
ZD
1639 {
1640 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1641 find_interesting_uses_op (data, rhs);
1642 return;
1643 }
2f4675b4
ZD
1644
1645 /* TODO -- we should also handle address uses of type
1646
1647 memory = call (whatever);
1648
1649 and
1650
1651 call (memory). */
8b11a64c
ZD
1652 }
1653
1654 if (TREE_CODE (stmt) == PHI_NODE
1655 && bb_for_stmt (stmt) == data->current_loop->header)
1656 {
1657 lhs = PHI_RESULT (stmt);
1658 iv = get_iv (data, lhs);
1659
1660 if (iv && !zero_p (iv->step))
1661 return;
1662 }
1663
f47c96aa 1664 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
8b11a64c 1665 {
f47c96aa 1666 op = USE_FROM_PTR (use_p);
8b11a64c
ZD
1667
1668 if (TREE_CODE (op) != SSA_NAME)
1669 continue;
1670
1671 iv = get_iv (data, op);
1672 if (!iv)
1673 continue;
1674
1675 find_interesting_uses_op (data, op);
1676 }
1677}
1678
1679/* Finds interesting uses of induction variables outside of loops
1680 on loop exit edge EXIT. */
1681
1682static void
1683find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1684{
1685 tree phi, def;
1686
bb29d951 1687 for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
8b11a64c
ZD
1688 {
1689 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
50cc9802 1690 find_interesting_uses_op (data, def);
8b11a64c
ZD
1691 }
1692}
1693
1694/* Finds uses of the induction variables that are interesting. */
1695
1696static void
1697find_interesting_uses (struct ivopts_data *data)
1698{
1699 basic_block bb;
1700 block_stmt_iterator bsi;
1701 tree phi;
1702 basic_block *body = get_loop_body (data->current_loop);
1703 unsigned i;
1704 struct version_info *info;
1705 edge e;
1706
1707 if (dump_file && (dump_flags & TDF_DETAILS))
1708 fprintf (dump_file, "Uses:\n\n");
1709
1710 for (i = 0; i < data->current_loop->num_nodes; i++)
1711 {
628f6a4e 1712 edge_iterator ei;
8b11a64c
ZD
1713 bb = body[i];
1714
628f6a4e 1715 FOR_EACH_EDGE (e, ei, bb->succs)
8b11a64c
ZD
1716 if (e->dest != EXIT_BLOCK_PTR
1717 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1718 find_interesting_uses_outside (data, e);
1719
bb29d951 1720 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
8b11a64c
ZD
1721 find_interesting_uses_stmt (data, phi);
1722 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1723 find_interesting_uses_stmt (data, bsi_stmt (bsi));
1724 }
1725
1726 if (dump_file && (dump_flags & TDF_DETAILS))
1727 {
87c476a2
ZD
1728 bitmap_iterator bi;
1729
8b11a64c
ZD
1730 fprintf (dump_file, "\n");
1731
87c476a2 1732 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
8b11a64c
ZD
1733 {
1734 info = ver_info (data, i);
1735 if (info->inv_id)
1736 {
1737 fprintf (dump_file, " ");
1738 print_generic_expr (dump_file, info->name, TDF_SLIM);
1739 fprintf (dump_file, " is invariant (%d)%s\n",
1740 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1741 }
87c476a2 1742 }
8b11a64c
ZD
1743
1744 fprintf (dump_file, "\n");
1745 }
1746
1747 free (body);
1748}
1749
f5e2738c 1750/* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
9be872b7
ZD
1751 is true, assume we are inside an address. If TOP_COMPREF is true, assume
1752 we are at the top-level of the processed address. */
f5e2738c
ZD
1753
1754static tree
9be872b7
ZD
1755strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
1756 unsigned HOST_WIDE_INT *offset)
f5e2738c 1757{
9be872b7 1758 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
f5e2738c
ZD
1759 enum tree_code code;
1760 tree type, orig_type = TREE_TYPE (expr);
1761 unsigned HOST_WIDE_INT off0, off1, st;
1762 tree orig_expr = expr;
1763
1764 STRIP_NOPS (expr);
9be872b7 1765
f5e2738c
ZD
1766 type = TREE_TYPE (expr);
1767 code = TREE_CODE (expr);
1768 *offset = 0;
1769
1770 switch (code)
1771 {
1772 case INTEGER_CST:
1773 if (!cst_and_fits_in_hwi (expr)
1774 || zero_p (expr))
1775 return orig_expr;
1776
1777 *offset = int_cst_value (expr);
ff5e9a94 1778 return build_int_cst (orig_type, 0);
f5e2738c
ZD
1779
1780 case PLUS_EXPR:
1781 case MINUS_EXPR:
1782 op0 = TREE_OPERAND (expr, 0);
1783 op1 = TREE_OPERAND (expr, 1);
1784
9be872b7
ZD
1785 op0 = strip_offset_1 (op0, false, false, &off0);
1786 op1 = strip_offset_1 (op1, false, false, &off1);
f5e2738c
ZD
1787
1788 *offset = (code == PLUS_EXPR ? off0 + off1 : off0 - off1);
1789 if (op0 == TREE_OPERAND (expr, 0)
1790 && op1 == TREE_OPERAND (expr, 1))
1791 return orig_expr;
1792
1793 if (zero_p (op1))
1794 expr = op0;
1795 else if (zero_p (op0))
1796 {
1797 if (code == PLUS_EXPR)
1798 expr = op1;
1799 else
9be872b7 1800 expr = fold_build1 (NEGATE_EXPR, type, op1);
f5e2738c
ZD
1801 }
1802 else
9be872b7 1803 expr = fold_build2 (code, type, op0, op1);
f5e2738c
ZD
1804
1805 return fold_convert (orig_type, expr);
1806
1807 case ARRAY_REF:
1808 if (!inside_addr)
1809 return orig_expr;
1810
1811 step = array_ref_element_size (expr);
1812 if (!cst_and_fits_in_hwi (step))
1813 break;
1814
1815 st = int_cst_value (step);
1816 op1 = TREE_OPERAND (expr, 1);
9be872b7 1817 op1 = strip_offset_1 (op1, false, false, &off1);
f5e2738c 1818 *offset = off1 * st;
9be872b7
ZD
1819
1820 if (top_compref
1821 && zero_p (op1))
1822 {
1823 /* Strip the component reference completely. */
1824 op0 = TREE_OPERAND (expr, 0);
1825 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1826 *offset += off0;
1827 return op0;
1828 }
f5e2738c
ZD
1829 break;
1830
1831 case COMPONENT_REF:
1832 if (!inside_addr)
1833 return orig_expr;
9be872b7
ZD
1834
1835 tmp = component_ref_field_offset (expr);
1836 if (top_compref
1837 && cst_and_fits_in_hwi (tmp))
1838 {
1839 /* Strip the component reference completely. */
1840 op0 = TREE_OPERAND (expr, 0);
1841 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1842 *offset = off0 + int_cst_value (tmp);
1843 return op0;
1844 }
f5e2738c
ZD
1845 break;
1846
1847 case ADDR_EXPR:
9be872b7
ZD
1848 op0 = TREE_OPERAND (expr, 0);
1849 op0 = strip_offset_1 (op0, true, true, &off0);
1850 *offset += off0;
1851
1852 if (op0 == TREE_OPERAND (expr, 0))
1853 return orig_expr;
1854
d5dc1717 1855 expr = build_fold_addr_expr (op0);
9be872b7
ZD
1856 return fold_convert (orig_type, expr);
1857
1858 case INDIRECT_REF:
1859 inside_addr = false;
f5e2738c
ZD
1860 break;
1861
1862 default:
1863 return orig_expr;
1864 }
1865
1866 /* Default handling of expressions for that we want to recurse into
1867 the first operand. */
1868 op0 = TREE_OPERAND (expr, 0);
9be872b7 1869 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
f5e2738c
ZD
1870 *offset += off0;
1871
1872 if (op0 == TREE_OPERAND (expr, 0)
1873 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
1874 return orig_expr;
1875
1876 expr = copy_node (expr);
1877 TREE_OPERAND (expr, 0) = op0;
1878 if (op1)
1879 TREE_OPERAND (expr, 1) = op1;
1880
9be872b7 1881 /* Inside address, we might strip the top level component references,
0fa2e4df 1882 thus changing type of the expression. Handling of ADDR_EXPR
9be872b7
ZD
1883 will fix that. */
1884 expr = fold_convert (orig_type, expr);
1885
1886 return expr;
1887}
1888
1889/* Strips constant offsets from EXPR and stores them to OFFSET. */
1890
1891static tree
1892strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
1893{
1894 return strip_offset_1 (expr, false, false, offset);
f5e2738c
ZD
1895}
1896
d482f417
ZD
1897/* Returns variant of TYPE that can be used as base for different uses.
1898 For integer types, we return unsigned variant of the type, which
1899 avoids problems with overflows. For pointer types, we return void *. */
1900
1901static tree
1902generic_type_for (tree type)
1903{
1904 if (POINTER_TYPE_P (type))
1905 return ptr_type_node;
1906
1907 if (TYPE_UNSIGNED (type))
1908 return type;
1909
1910 return unsigned_type_for (type);
1911}
1912
9be872b7
ZD
1913/* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
1914 the bitmap to that we should store it. */
1915
1916static struct ivopts_data *fd_ivopts_data;
1917static tree
1918find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
1919{
1920 bitmap *depends_on = data;
1921 struct version_info *info;
1922
1923 if (TREE_CODE (*expr_p) != SSA_NAME)
1924 return NULL_TREE;
1925 info = name_info (fd_ivopts_data, *expr_p);
1926
1927 if (!info->inv_id || info->has_nonlin_use)
1928 return NULL_TREE;
1929
1930 if (!*depends_on)
1931 *depends_on = BITMAP_ALLOC (NULL);
1932 bitmap_set_bit (*depends_on, info->inv_id);
1933
1934 return NULL_TREE;
1935}
1936
8b11a64c
ZD
1937/* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
1938 position to POS. If USE is not NULL, the candidate is set as related to
1939 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
1940 replacement of the final value of the iv by a direct computation. */
1941
1942static struct iv_cand *
1943add_candidate_1 (struct ivopts_data *data,
1944 tree base, tree step, bool important, enum iv_position pos,
1945 struct iv_use *use, tree incremented_at)
1946{
1947 unsigned i;
1948 struct iv_cand *cand = NULL;
d482f417 1949 tree type, orig_type;
8b11a64c
ZD
1950
1951 if (base)
1952 {
d482f417
ZD
1953 orig_type = TREE_TYPE (base);
1954 type = generic_type_for (orig_type);
1955 if (type != orig_type)
8b11a64c 1956 {
8b11a64c
ZD
1957 base = fold_convert (type, base);
1958 if (step)
1959 step = fold_convert (type, step);
1960 }
1961 }
1962
1963 for (i = 0; i < n_iv_cands (data); i++)
1964 {
1965 cand = iv_cand (data, i);
1966
1967 if (cand->pos != pos)
1968 continue;
1969
1970 if (cand->incremented_at != incremented_at)
1971 continue;
1972
1973 if (!cand->iv)
1974 {
1975 if (!base && !step)
1976 break;
1977
1978 continue;
1979 }
1980
1981 if (!base && !step)
1982 continue;
1983
1984 if (!operand_equal_p (base, cand->iv->base, 0))
1985 continue;
1986
1987 if (zero_p (cand->iv->step))
1988 {
1989 if (zero_p (step))
1990 break;
1991 }
1992 else
1993 {
1994 if (step && operand_equal_p (step, cand->iv->step, 0))
1995 break;
1996 }
1997 }
1998
1999 if (i == n_iv_cands (data))
2000 {
5ed6ace5 2001 cand = XCNEW (struct iv_cand);
8b11a64c
ZD
2002 cand->id = i;
2003
2004 if (!base && !step)
2005 cand->iv = NULL;
2006 else
2007 cand->iv = alloc_iv (base, step);
2008
2009 cand->pos = pos;
2010 if (pos != IP_ORIGINAL && cand->iv)
2011 {
2012 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2013 cand->var_after = cand->var_before;
2014 }
2015 cand->important = important;
2016 cand->incremented_at = incremented_at;
69ebd99d 2017 VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
8b11a64c 2018
9be872b7
ZD
2019 if (step
2020 && TREE_CODE (step) != INTEGER_CST)
2021 {
2022 fd_ivopts_data = data;
2023 walk_tree (&step, find_depends, &cand->depends_on, NULL);
2024 }
2025
8b11a64c
ZD
2026 if (dump_file && (dump_flags & TDF_DETAILS))
2027 dump_cand (dump_file, cand);
2028 }
2029
2030 if (important && !cand->important)
2031 {
2032 cand->important = true;
2033 if (dump_file && (dump_flags & TDF_DETAILS))
2034 fprintf (dump_file, "Candidate %d is important\n", cand->id);
2035 }
2036
2037 if (use)
2038 {
2039 bitmap_set_bit (use->related_cands, i);
2040 if (dump_file && (dump_flags & TDF_DETAILS))
2041 fprintf (dump_file, "Candidate %d is related to use %d\n",
2042 cand->id, use->id);
2043 }
2044
2045 return cand;
2046}
2047
4366cf6d
ZD
2048/* Returns true if incrementing the induction variable at the end of the LOOP
2049 is allowed.
2050
2051 The purpose is to avoid splitting latch edge with a biv increment, thus
2052 creating a jump, possibly confusing other optimization passes and leaving
2053 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2054 is not available (so we do not have a better alternative), or if the latch
2055 edge is already nonempty. */
2056
2057static bool
2058allow_ip_end_pos_p (struct loop *loop)
2059{
2060 if (!ip_normal_pos (loop))
2061 return true;
2062
2063 if (!empty_block_p (ip_end_pos (loop)))
2064 return true;
2065
2066 return false;
2067}
2068
8b11a64c
ZD
2069/* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2070 position to POS. If USE is not NULL, the candidate is set as related to
2071 it. The candidate computation is scheduled on all available positions. */
2072
2073static void
2074add_candidate (struct ivopts_data *data,
2075 tree base, tree step, bool important, struct iv_use *use)
2076{
2077 if (ip_normal_pos (data->current_loop))
2078 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL_TREE);
4366cf6d
ZD
2079 if (ip_end_pos (data->current_loop)
2080 && allow_ip_end_pos_p (data->current_loop))
8b11a64c
ZD
2081 add_candidate_1 (data, base, step, important, IP_END, use, NULL_TREE);
2082}
2083
39b4020c
RS
2084/* Add a standard "0 + 1 * iteration" iv candidate for a
2085 type with SIZE bits. */
2086
2087static void
2088add_standard_iv_candidates_for_size (struct ivopts_data *data,
2089 unsigned int size)
2090{
2091 tree type = lang_hooks.types.type_for_size (size, true);
2092 add_candidate (data, build_int_cst (type, 0), build_int_cst (type, 1),
2093 true, NULL);
2094}
2095
8b11a64c
ZD
2096/* Adds standard iv candidates. */
2097
2098static void
2099add_standard_iv_candidates (struct ivopts_data *data)
2100{
39b4020c 2101 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE);
8b11a64c 2102
39b4020c
RS
2103 /* The same for a double-integer type if it is still fast enough. */
2104 if (BITS_PER_WORD >= INT_TYPE_SIZE * 2)
2105 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE * 2);
8b11a64c
ZD
2106}
2107
2108
2109/* Adds candidates bases on the old induction variable IV. */
2110
2111static void
2112add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2113{
2114 tree phi, def;
2115 struct iv_cand *cand;
2116
2117 add_candidate (data, iv->base, iv->step, true, NULL);
2118
2119 /* The same, but with initial value zero. */
2120 add_candidate (data,
5212068f 2121 build_int_cst (TREE_TYPE (iv->base), 0),
8b11a64c
ZD
2122 iv->step, true, NULL);
2123
2124 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2125 if (TREE_CODE (phi) == PHI_NODE)
2126 {
2127 /* Additionally record the possibility of leaving the original iv
2128 untouched. */
2129 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2130 cand = add_candidate_1 (data,
2131 iv->base, iv->step, true, IP_ORIGINAL, NULL,
2132 SSA_NAME_DEF_STMT (def));
2133 cand->var_before = iv->ssa_name;
2134 cand->var_after = def;
2135 }
2136}
2137
2138/* Adds candidates based on the old induction variables. */
2139
2140static void
2141add_old_ivs_candidates (struct ivopts_data *data)
2142{
2143 unsigned i;
2144 struct iv *iv;
87c476a2 2145 bitmap_iterator bi;
8b11a64c 2146
87c476a2 2147 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
8b11a64c
ZD
2148 {
2149 iv = ver_info (data, i)->iv;
2150 if (iv && iv->biv_p && !zero_p (iv->step))
2151 add_old_iv_candidates (data, iv);
87c476a2 2152 }
8b11a64c
ZD
2153}
2154
2155/* Adds candidates based on the value of the induction variable IV and USE. */
2156
2157static void
2158add_iv_value_candidates (struct ivopts_data *data,
2159 struct iv *iv, struct iv_use *use)
2160{
f5e2738c 2161 unsigned HOST_WIDE_INT offset;
9be872b7 2162 tree base;
8b11a64c 2163
9be872b7 2164 add_candidate (data, iv->base, iv->step, false, use);
be35cf60 2165
9be872b7
ZD
2166 /* The same, but with initial value zero. Make such variable important,
2167 since it is generic enough so that possibly many uses may be based
2168 on it. */
2169 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2170 iv->step, true, use);
8b11a64c
ZD
2171
2172 /* Third, try removing the constant offset. */
9be872b7 2173 base = strip_offset (iv->base, &offset);
f5e2738c
ZD
2174 if (offset)
2175 add_candidate (data, base, iv->step, false, use);
8b11a64c
ZD
2176}
2177
8b11a64c
ZD
2178/* Adds candidates based on the uses. */
2179
2180static void
2181add_derived_ivs_candidates (struct ivopts_data *data)
2182{
2183 unsigned i;
2184
2185 for (i = 0; i < n_iv_uses (data); i++)
2186 {
2187 struct iv_use *use = iv_use (data, i);
2188
2189 if (!use)
2190 continue;
2191
2192 switch (use->type)
2193 {
2194 case USE_NONLINEAR_EXPR:
2195 case USE_COMPARE:
9be872b7 2196 case USE_ADDRESS:
8b11a64c
ZD
2197 /* Just add the ivs based on the value of the iv used here. */
2198 add_iv_value_candidates (data, use->iv, use);
2199 break;
2200
8b11a64c 2201 default:
1e128c5f 2202 gcc_unreachable ();
8b11a64c
ZD
2203 }
2204 }
2205}
2206
b1b02be2
ZD
2207/* Record important candidates and add them to related_cands bitmaps
2208 if needed. */
2209
2210static void
2211record_important_candidates (struct ivopts_data *data)
2212{
2213 unsigned i;
2214 struct iv_use *use;
2215
2216 for (i = 0; i < n_iv_cands (data); i++)
2217 {
2218 struct iv_cand *cand = iv_cand (data, i);
2219
2220 if (cand->important)
2221 bitmap_set_bit (data->important_candidates, i);
2222 }
2223
2224 data->consider_all_candidates = (n_iv_cands (data)
2225 <= CONSIDER_ALL_CANDIDATES_BOUND);
2226
2227 if (data->consider_all_candidates)
2228 {
2229 /* We will not need "related_cands" bitmaps in this case,
2230 so release them to decrease peak memory consumption. */
2231 for (i = 0; i < n_iv_uses (data); i++)
2232 {
2233 use = iv_use (data, i);
8bdbfff5 2234 BITMAP_FREE (use->related_cands);
b1b02be2
ZD
2235 }
2236 }
2237 else
2238 {
2239 /* Add important candidates to the related_cands bitmaps. */
2240 for (i = 0; i < n_iv_uses (data); i++)
2241 bitmap_ior_into (iv_use (data, i)->related_cands,
2242 data->important_candidates);
2243 }
2244}
2245
8b11a64c
ZD
2246/* Finds the candidates for the induction variables. */
2247
2248static void
2249find_iv_candidates (struct ivopts_data *data)
2250{
2251 /* Add commonly used ivs. */
2252 add_standard_iv_candidates (data);
2253
2254 /* Add old induction variables. */
2255 add_old_ivs_candidates (data);
2256
2257 /* Add induction variables derived from uses. */
2258 add_derived_ivs_candidates (data);
b1b02be2
ZD
2259
2260 /* Record the important candidates. */
2261 record_important_candidates (data);
8b11a64c
ZD
2262}
2263
2264/* Allocates the data structure mapping the (use, candidate) pairs to costs.
2265 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2266 we allocate a simple list to every use. */
2267
2268static void
2269alloc_use_cost_map (struct ivopts_data *data)
2270{
b1b02be2 2271 unsigned i, size, s, j;
8b11a64c
ZD
2272
2273 for (i = 0; i < n_iv_uses (data); i++)
2274 {
2275 struct iv_use *use = iv_use (data, i);
87c476a2 2276 bitmap_iterator bi;
8b11a64c
ZD
2277
2278 if (data->consider_all_candidates)
b1b02be2 2279 size = n_iv_cands (data);
8b11a64c
ZD
2280 else
2281 {
b1b02be2 2282 s = 0;
87c476a2
ZD
2283 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2284 {
b1b02be2 2285 s++;
87c476a2 2286 }
b1b02be2
ZD
2287
2288 /* Round up to the power of two, so that moduling by it is fast. */
2289 for (size = 1; size < s; size <<= 1)
2290 continue;
8b11a64c
ZD
2291 }
2292
b1b02be2 2293 use->n_map_members = size;
5ed6ace5 2294 use->cost_map = XCNEWVEC (struct cost_pair, size);
8b11a64c
ZD
2295 }
2296}
2297
2298/* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
f5f12961
ZD
2299 on invariants DEPENDS_ON and that the value used in expressing it
2300 is VALUE.*/
8b11a64c
ZD
2301
2302static void
2303set_use_iv_cost (struct ivopts_data *data,
2304 struct iv_use *use, struct iv_cand *cand, unsigned cost,
f5f12961 2305 bitmap depends_on, tree value)
8b11a64c 2306{
b1b02be2
ZD
2307 unsigned i, s;
2308
2309 if (cost == INFTY)
8b11a64c 2310 {
8bdbfff5 2311 BITMAP_FREE (depends_on);
b1b02be2 2312 return;
8b11a64c
ZD
2313 }
2314
2315 if (data->consider_all_candidates)
2316 {
2317 use->cost_map[cand->id].cand = cand;
2318 use->cost_map[cand->id].cost = cost;
2319 use->cost_map[cand->id].depends_on = depends_on;
f5f12961 2320 use->cost_map[cand->id].value = value;
8b11a64c
ZD
2321 return;
2322 }
2323
b1b02be2
ZD
2324 /* n_map_members is a power of two, so this computes modulo. */
2325 s = cand->id & (use->n_map_members - 1);
2326 for (i = s; i < use->n_map_members; i++)
2327 if (!use->cost_map[i].cand)
2328 goto found;
2329 for (i = 0; i < s; i++)
2330 if (!use->cost_map[i].cand)
2331 goto found;
2332
2333 gcc_unreachable ();
8b11a64c 2334
b1b02be2
ZD
2335found:
2336 use->cost_map[i].cand = cand;
2337 use->cost_map[i].cost = cost;
2338 use->cost_map[i].depends_on = depends_on;
f5f12961 2339 use->cost_map[i].value = value;
8b11a64c
ZD
2340}
2341
b1b02be2 2342/* Gets cost of (USE, CANDIDATE) pair. */
8b11a64c 2343
b1b02be2
ZD
2344static struct cost_pair *
2345get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2346 struct iv_cand *cand)
8b11a64c 2347{
b1b02be2
ZD
2348 unsigned i, s;
2349 struct cost_pair *ret;
8b11a64c
ZD
2350
2351 if (!cand)
b1b02be2 2352 return NULL;
8b11a64c
ZD
2353
2354 if (data->consider_all_candidates)
8b11a64c 2355 {
b1b02be2
ZD
2356 ret = use->cost_map + cand->id;
2357 if (!ret->cand)
2358 return NULL;
8b11a64c 2359
b1b02be2 2360 return ret;
8b11a64c 2361 }
b1b02be2
ZD
2362
2363 /* n_map_members is a power of two, so this computes modulo. */
2364 s = cand->id & (use->n_map_members - 1);
2365 for (i = s; i < use->n_map_members; i++)
2366 if (use->cost_map[i].cand == cand)
2367 return use->cost_map + i;
8b11a64c 2368
b1b02be2
ZD
2369 for (i = 0; i < s; i++)
2370 if (use->cost_map[i].cand == cand)
2371 return use->cost_map + i;
2372
2373 return NULL;
8b11a64c
ZD
2374}
2375
2376/* Returns estimate on cost of computing SEQ. */
2377
2378static unsigned
2379seq_cost (rtx seq)
2380{
2381 unsigned cost = 0;
2382 rtx set;
2383
2384 for (; seq; seq = NEXT_INSN (seq))
2385 {
2386 set = single_set (seq);
2387 if (set)
2388 cost += rtx_cost (set, SET);
2389 else
2390 cost++;
2391 }
2392
2393 return cost;
2394}
2395
8679c649
JH
2396/* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2397static rtx
2398produce_memory_decl_rtl (tree obj, int *regno)
2399{
2400 rtx x;
e16acfcd
NS
2401
2402 gcc_assert (obj);
8679c649
JH
2403 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2404 {
2405 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2406 x = gen_rtx_SYMBOL_REF (Pmode, name);
2407 }
2408 else
2409 x = gen_raw_REG (Pmode, (*regno)++);
2410
2411 return gen_rtx_MEM (DECL_MODE (obj), x);
2412}
2413
8b11a64c
ZD
2414/* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2415 walk_tree. DATA contains the actual fake register number. */
2416
2417static tree
2418prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2419{
2420 tree obj = NULL_TREE;
2421 rtx x = NULL_RTX;
2422 int *regno = data;
2423
2424 switch (TREE_CODE (*expr_p))
2425 {
8679c649
JH
2426 case ADDR_EXPR:
2427 for (expr_p = &TREE_OPERAND (*expr_p, 0);
afe84921
RH
2428 handled_component_p (*expr_p);
2429 expr_p = &TREE_OPERAND (*expr_p, 0))
2430 continue;
8679c649 2431 obj = *expr_p;
aacd3885 2432 if (DECL_P (obj) && !DECL_RTL_SET_P (obj))
8679c649
JH
2433 x = produce_memory_decl_rtl (obj, regno);
2434 break;
2435
8b11a64c
ZD
2436 case SSA_NAME:
2437 *ws = 0;
2438 obj = SSA_NAME_VAR (*expr_p);
2439 if (!DECL_RTL_SET_P (obj))
2440 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2441 break;
2442
2443 case VAR_DECL:
2444 case PARM_DECL:
2445 case RESULT_DECL:
2446 *ws = 0;
2447 obj = *expr_p;
2448
2449 if (DECL_RTL_SET_P (obj))
2450 break;
2451
2452 if (DECL_MODE (obj) == BLKmode)
8679c649 2453 x = produce_memory_decl_rtl (obj, regno);
8b11a64c
ZD
2454 else
2455 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2456
2457 break;
2458
2459 default:
2460 break;
2461 }
2462
2463 if (x)
2464 {
69ebd99d 2465 VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
8b11a64c
ZD
2466 SET_DECL_RTL (obj, x);
2467 }
2468
2469 return NULL_TREE;
2470}
2471
2472/* Determines cost of the computation of EXPR. */
2473
2474static unsigned
2475computation_cost (tree expr)
2476{
2477 rtx seq, rslt;
2478 tree type = TREE_TYPE (expr);
2479 unsigned cost;
1d27fed4
DD
2480 /* Avoid using hard regs in ways which may be unsupported. */
2481 int regno = LAST_VIRTUAL_REGISTER + 1;
8b11a64c
ZD
2482
2483 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2484 start_sequence ();
2485 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2486 seq = get_insns ();
2487 end_sequence ();
2488
2489 cost = seq_cost (seq);
2ca202e7 2490 if (MEM_P (rslt))
8b11a64c
ZD
2491 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type));
2492
2493 return cost;
2494}
2495
2496/* Returns variable containing the value of candidate CAND at statement AT. */
2497
2498static tree
2499var_at_stmt (struct loop *loop, struct iv_cand *cand, tree stmt)
2500{
2501 if (stmt_after_increment (loop, cand, stmt))
2502 return cand->var_after;
2503 else
2504 return cand->var_before;
2505}
2506
9be872b7
ZD
2507/* Return the most significant (sign) bit of T. Similar to tree_int_cst_msb,
2508 but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE. */
2509
d7bf3bcf 2510int
9be872b7
ZD
2511tree_int_cst_sign_bit (tree t)
2512{
2513 unsigned bitno = TYPE_PRECISION (TREE_TYPE (t)) - 1;
2514 unsigned HOST_WIDE_INT w;
2515
2516 if (bitno < HOST_BITS_PER_WIDE_INT)
2517 w = TREE_INT_CST_LOW (t);
2518 else
2519 {
2520 w = TREE_INT_CST_HIGH (t);
2521 bitno -= HOST_BITS_PER_WIDE_INT;
2522 }
2523
2524 return (w >> bitno) & 1;
2525}
2526
2527/* If we can prove that TOP = cst * BOT for some constant cst in TYPE,
2528 return cst. Otherwise return NULL_TREE. */
2529
2530static tree
2531constant_multiple_of (tree type, tree top, tree bot)
2532{
2533 tree res, mby, p0, p1;
2534 enum tree_code code;
2535 bool negate;
2536
2537 STRIP_NOPS (top);
2538 STRIP_NOPS (bot);
2539
2540 if (operand_equal_p (top, bot, 0))
2541 return build_int_cst (type, 1);
2542
2543 code = TREE_CODE (top);
2544 switch (code)
2545 {
2546 case MULT_EXPR:
2547 mby = TREE_OPERAND (top, 1);
2548 if (TREE_CODE (mby) != INTEGER_CST)
2549 return NULL_TREE;
2550
2551 res = constant_multiple_of (type, TREE_OPERAND (top, 0), bot);
2552 if (!res)
2553 return NULL_TREE;
2554
2555 return fold_binary_to_constant (MULT_EXPR, type, res,
2556 fold_convert (type, mby));
2557
2558 case PLUS_EXPR:
2559 case MINUS_EXPR:
2560 p0 = constant_multiple_of (type, TREE_OPERAND (top, 0), bot);
2561 if (!p0)
2562 return NULL_TREE;
2563 p1 = constant_multiple_of (type, TREE_OPERAND (top, 1), bot);
2564 if (!p1)
2565 return NULL_TREE;
2566
2567 return fold_binary_to_constant (code, type, p0, p1);
2568
2569 case INTEGER_CST:
2570 if (TREE_CODE (bot) != INTEGER_CST)
2571 return NULL_TREE;
2572
2573 bot = fold_convert (type, bot);
2574 top = fold_convert (type, top);
2575
2576 /* If BOT seems to be negative, try dividing by -BOT instead, and negate
2577 the result afterwards. */
2578 if (tree_int_cst_sign_bit (bot))
2579 {
2580 negate = true;
2581 bot = fold_unary_to_constant (NEGATE_EXPR, type, bot);
2582 }
2583 else
2584 negate = false;
2585
2586 /* Ditto for TOP. */
2587 if (tree_int_cst_sign_bit (top))
2588 {
2589 negate = !negate;
2590 top = fold_unary_to_constant (NEGATE_EXPR, type, top);
2591 }
2592
2593 if (!zero_p (fold_binary_to_constant (TRUNC_MOD_EXPR, type, top, bot)))
2594 return NULL_TREE;
2595
2596 res = fold_binary_to_constant (EXACT_DIV_EXPR, type, top, bot);
2597 if (negate)
2598 res = fold_unary_to_constant (NEGATE_EXPR, type, res);
2599 return res;
2600
2601 default:
2602 return NULL_TREE;
2603 }
2604}
2605
9be872b7
ZD
2606/* Sets COMB to CST. */
2607
2608static void
2609aff_combination_const (struct affine_tree_combination *comb, tree type,
2610 unsigned HOST_WIDE_INT cst)
2611{
2612 unsigned prec = TYPE_PRECISION (type);
2613
2614 comb->type = type;
2615 comb->mask = (((unsigned HOST_WIDE_INT) 2 << (prec - 1)) - 1);
2616
2617 comb->n = 0;
2618 comb->rest = NULL_TREE;
2619 comb->offset = cst & comb->mask;
2620}
2621
2622/* Sets COMB to single element ELT. */
2623
2624static void
2625aff_combination_elt (struct affine_tree_combination *comb, tree type, tree elt)
2626{
2627 unsigned prec = TYPE_PRECISION (type);
2628
2629 comb->type = type;
2630 comb->mask = (((unsigned HOST_WIDE_INT) 2 << (prec - 1)) - 1);
2631
2632 comb->n = 1;
2633 comb->elts[0] = elt;
2634 comb->coefs[0] = 1;
2635 comb->rest = NULL_TREE;
2636 comb->offset = 0;
2637}
2638
2639/* Scales COMB by SCALE. */
2640
2641static void
2642aff_combination_scale (struct affine_tree_combination *comb,
2643 unsigned HOST_WIDE_INT scale)
2644{
2645 unsigned i, j;
2646
2647 if (scale == 1)
2648 return;
2649
2650 if (scale == 0)
2651 {
2652 aff_combination_const (comb, comb->type, 0);
2653 return;
2654 }
2655
2656 comb->offset = (scale * comb->offset) & comb->mask;
2657 for (i = 0, j = 0; i < comb->n; i++)
2658 {
2659 comb->coefs[j] = (scale * comb->coefs[i]) & comb->mask;
2660 comb->elts[j] = comb->elts[i];
2661 if (comb->coefs[j] != 0)
2662 j++;
2663 }
2664 comb->n = j;
2665
2666 if (comb->rest)
2667 {
2668 if (comb->n < MAX_AFF_ELTS)
2669 {
2670 comb->coefs[comb->n] = scale;
2671 comb->elts[comb->n] = comb->rest;
2672 comb->rest = NULL_TREE;
2673 comb->n++;
2674 }
2675 else
2676 comb->rest = fold_build2 (MULT_EXPR, comb->type, comb->rest,
2677 build_int_cst_type (comb->type, scale));
2678 }
2679}
2680
2681/* Adds ELT * SCALE to COMB. */
2682
2683static void
2684aff_combination_add_elt (struct affine_tree_combination *comb, tree elt,
2685 unsigned HOST_WIDE_INT scale)
2686{
2687 unsigned i;
2688
2689 if (scale == 0)
2690 return;
2691
2692 for (i = 0; i < comb->n; i++)
2693 if (operand_equal_p (comb->elts[i], elt, 0))
2694 {
2695 comb->coefs[i] = (comb->coefs[i] + scale) & comb->mask;
2696 if (comb->coefs[i])
2697 return;
2698
2699 comb->n--;
2700 comb->coefs[i] = comb->coefs[comb->n];
2701 comb->elts[i] = comb->elts[comb->n];
98d59fe6
ZD
2702
2703 if (comb->rest)
2704 {
2705 gcc_assert (comb->n == MAX_AFF_ELTS - 1);
2706 comb->coefs[comb->n] = 1;
2707 comb->elts[comb->n] = comb->rest;
2708 comb->rest = NULL_TREE;
2709 comb->n++;
2710 }
9be872b7
ZD
2711 return;
2712 }
2713 if (comb->n < MAX_AFF_ELTS)
2714 {
2715 comb->coefs[comb->n] = scale;
2716 comb->elts[comb->n] = elt;
2717 comb->n++;
2718 return;
2719 }
2720
2721 if (scale == 1)
2722 elt = fold_convert (comb->type, elt);
2723 else
2724 elt = fold_build2 (MULT_EXPR, comb->type,
2725 fold_convert (comb->type, elt),
2726 build_int_cst_type (comb->type, scale));
2727
2728 if (comb->rest)
2729 comb->rest = fold_build2 (PLUS_EXPR, comb->type, comb->rest, elt);
2730 else
2731 comb->rest = elt;
2732}
2733
2734/* Adds COMB2 to COMB1. */
2735
2736static void
2737aff_combination_add (struct affine_tree_combination *comb1,
2738 struct affine_tree_combination *comb2)
2739{
2740 unsigned i;
2741
2742 comb1->offset = (comb1->offset + comb2->offset) & comb1->mask;
98d59fe6 2743 for (i = 0; i < comb2->n; i++)
9be872b7
ZD
2744 aff_combination_add_elt (comb1, comb2->elts[i], comb2->coefs[i]);
2745 if (comb2->rest)
2746 aff_combination_add_elt (comb1, comb2->rest, 1);
2747}
2748
2749/* Splits EXPR into an affine combination of parts. */
2750
2751static void
2752tree_to_aff_combination (tree expr, tree type,
2753 struct affine_tree_combination *comb)
2754{
2755 struct affine_tree_combination tmp;
2756 enum tree_code code;
2757 tree cst, core, toffset;
2758 HOST_WIDE_INT bitpos, bitsize;
2759 enum machine_mode mode;
2760 int unsignedp, volatilep;
2761
2762 STRIP_NOPS (expr);
2763
2764 code = TREE_CODE (expr);
2765 switch (code)
2766 {
2767 case INTEGER_CST:
2768 aff_combination_const (comb, type, int_cst_value (expr));
2769 return;
2770
2771 case PLUS_EXPR:
2772 case MINUS_EXPR:
2773 tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
2774 tree_to_aff_combination (TREE_OPERAND (expr, 1), type, &tmp);
2775 if (code == MINUS_EXPR)
2776 aff_combination_scale (&tmp, -1);
2777 aff_combination_add (comb, &tmp);
2778 return;
2779
2780 case MULT_EXPR:
2781 cst = TREE_OPERAND (expr, 1);
2782 if (TREE_CODE (cst) != INTEGER_CST)
2783 break;
2784 tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
2785 aff_combination_scale (comb, int_cst_value (cst));
2786 return;
2787
2788 case NEGATE_EXPR:
2789 tree_to_aff_combination (TREE_OPERAND (expr, 0), type, comb);
2790 aff_combination_scale (comb, -1);
2791 return;
2792
2793 case ADDR_EXPR:
2794 core = get_inner_reference (TREE_OPERAND (expr, 0), &bitsize, &bitpos,
2795 &toffset, &mode, &unsignedp, &volatilep,
2796 false);
2797 if (bitpos % BITS_PER_UNIT != 0)
2798 break;
2799 aff_combination_const (comb, type, bitpos / BITS_PER_UNIT);
d5dc1717 2800 core = build_fold_addr_expr (core);
9be872b7
ZD
2801 if (TREE_CODE (core) == ADDR_EXPR)
2802 aff_combination_add_elt (comb, core, 1);
2803 else
2804 {
2805 tree_to_aff_combination (core, type, &tmp);
2806 aff_combination_add (comb, &tmp);
2807 }
2808 if (toffset)
2809 {
2810 tree_to_aff_combination (toffset, type, &tmp);
2811 aff_combination_add (comb, &tmp);
2812 }
2813 return;
2814
2815 default:
2816 break;
2817 }
2818
2819 aff_combination_elt (comb, type, expr);
2820}
2821
2822/* Creates EXPR + ELT * SCALE in TYPE. MASK is the mask for width of TYPE. */
2823
2824static tree
2825add_elt_to_tree (tree expr, tree type, tree elt, unsigned HOST_WIDE_INT scale,
2826 unsigned HOST_WIDE_INT mask)
2827{
2828 enum tree_code code;
2829
2830 scale &= mask;
2831 elt = fold_convert (type, elt);
2832
2833 if (scale == 1)
2834 {
2835 if (!expr)
2836 return elt;
2837
2838 return fold_build2 (PLUS_EXPR, type, expr, elt);
2839 }
2840
2841 if (scale == mask)
2842 {
2843 if (!expr)
2844 return fold_build1 (NEGATE_EXPR, type, elt);
2845
2846 return fold_build2 (MINUS_EXPR, type, expr, elt);
2847 }
2848
2849 if (!expr)
2850 return fold_build2 (MULT_EXPR, type, elt,
2851 build_int_cst_type (type, scale));
2852
2853 if ((scale | (mask >> 1)) == mask)
2854 {
2855 /* Scale is negative. */
2856 code = MINUS_EXPR;
2857 scale = (-scale) & mask;
2858 }
2859 else
2860 code = PLUS_EXPR;
2861
2862 elt = fold_build2 (MULT_EXPR, type, elt,
2863 build_int_cst_type (type, scale));
2864 return fold_build2 (code, type, expr, elt);
2865}
2866
ac182688
ZD
2867/* Copies the tree elements of COMB to ensure that they are not shared. */
2868
2869static void
2870unshare_aff_combination (struct affine_tree_combination *comb)
2871{
2872 unsigned i;
2873
2874 for (i = 0; i < comb->n; i++)
2875 comb->elts[i] = unshare_expr (comb->elts[i]);
2876 if (comb->rest)
2877 comb->rest = unshare_expr (comb->rest);
2878}
2879
9be872b7
ZD
2880/* Makes tree from the affine combination COMB. */
2881
2882static tree
2883aff_combination_to_tree (struct affine_tree_combination *comb)
2884{
2885 tree type = comb->type;
2886 tree expr = comb->rest;
2887 unsigned i;
2888 unsigned HOST_WIDE_INT off, sgn;
2889
ac182688
ZD
2890 /* Handle the special case produced by get_computation_aff when
2891 the type does not fit in HOST_WIDE_INT. */
2892 if (comb->n == 0 && comb->offset == 0)
2893 return fold_convert (type, expr);
2894
9be872b7
ZD
2895 gcc_assert (comb->n == MAX_AFF_ELTS || comb->rest == NULL_TREE);
2896
2897 for (i = 0; i < comb->n; i++)
2898 expr = add_elt_to_tree (expr, type, comb->elts[i], comb->coefs[i],
2899 comb->mask);
2900
2901 if ((comb->offset | (comb->mask >> 1)) == comb->mask)
2902 {
2903 /* Offset is negative. */
2904 off = (-comb->offset) & comb->mask;
2905 sgn = comb->mask;
2906 }
2907 else
2908 {
2909 off = comb->offset;
2910 sgn = 1;
2911 }
2912 return add_elt_to_tree (expr, type, build_int_cst_type (type, off), sgn,
2913 comb->mask);
2914}
2915
8b11a64c 2916/* Determines the expression by that USE is expressed from induction variable
ac182688
ZD
2917 CAND at statement AT in LOOP. The expression is stored in a decomposed
2918 form into AFF. Returns false if USE cannot be expressed using CAND. */
8b11a64c 2919
ac182688
ZD
2920static bool
2921get_computation_aff (struct loop *loop,
2922 struct iv_use *use, struct iv_cand *cand, tree at,
2923 struct affine_tree_combination *aff)
8b11a64c 2924{
2f4675b4
ZD
2925 tree ubase = use->iv->base;
2926 tree ustep = use->iv->step;
2927 tree cbase = cand->iv->base;
2928 tree cstep = cand->iv->step;
8b11a64c
ZD
2929 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2930 tree uutype;
2931 tree expr, delta;
2932 tree ratio;
2933 unsigned HOST_WIDE_INT ustepi, cstepi;
2934 HOST_WIDE_INT ratioi;
ac182688 2935 struct affine_tree_combination cbase_aff, expr_aff;
c456a94a 2936 tree cstep_orig = cstep, ustep_orig = ustep;
8b11a64c
ZD
2937
2938 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2939 {
2940 /* We do not have a precision to express the values of use. */
ac182688 2941 return false;
8b11a64c
ZD
2942 }
2943
2944 expr = var_at_stmt (loop, cand, at);
2945
2946 if (TREE_TYPE (expr) != ctype)
2947 {
2948 /* This may happen with the original ivs. */
2949 expr = fold_convert (ctype, expr);
2950 }
2951
2952 if (TYPE_UNSIGNED (utype))
2953 uutype = utype;
2954 else
2955 {
2956 uutype = unsigned_type_for (utype);
2957 ubase = fold_convert (uutype, ubase);
2958 ustep = fold_convert (uutype, ustep);
2959 }
2960
2961 if (uutype != ctype)
2962 {
2963 expr = fold_convert (uutype, expr);
2964 cbase = fold_convert (uutype, cbase);
2965 cstep = fold_convert (uutype, cstep);
c456a94a
ZD
2966
2967 /* If the conversion is not noop, we must take it into account when
2968 considering the value of the step. */
2969 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
2970 cstep_orig = cstep;
8b11a64c
ZD
2971 }
2972
c456a94a
ZD
2973 if (cst_and_fits_in_hwi (cstep_orig)
2974 && cst_and_fits_in_hwi (ustep_orig))
9be872b7 2975 {
c456a94a
ZD
2976 ustepi = int_cst_value (ustep_orig);
2977 cstepi = int_cst_value (cstep_orig);
8b11a64c 2978
9be872b7
ZD
2979 if (!divide (TYPE_PRECISION (uutype), ustepi, cstepi, &ratioi))
2980 {
2981 /* TODO maybe consider case when ustep divides cstep and the ratio is
2982 a power of 2 (so that the division is fast to execute)? We would
2983 need to be much more careful with overflows etc. then. */
ac182688 2984 return false;
9be872b7 2985 }
8b11a64c 2986
9be872b7
ZD
2987 ratio = build_int_cst_type (uutype, ratioi);
2988 }
2989 else
8b11a64c 2990 {
c456a94a 2991 ratio = constant_multiple_of (uutype, ustep_orig, cstep_orig);
9be872b7 2992 if (!ratio)
ac182688 2993 return false;
9be872b7
ZD
2994
2995 /* Ratioi is only used to detect special cases when the multiplicative
2996 factor is 1 or -1, so if we cannot convert ratio to HOST_WIDE_INT,
2997 we may set it to 0. We prefer cst_and_fits_in_hwi/int_cst_value
2998 to integer_onep/integer_all_onesp, since the former ignores
2999 TREE_OVERFLOW. */
3000 if (cst_and_fits_in_hwi (ratio))
3001 ratioi = int_cst_value (ratio);
3002 else if (integer_onep (ratio))
3003 ratioi = 1;
3004 else if (integer_all_onesp (ratio))
3005 ratioi = -1;
3006 else
3007 ratioi = 0;
8b11a64c
ZD
3008 }
3009
3010 /* We may need to shift the value if we are after the increment. */
3011 if (stmt_after_increment (loop, cand, at))
987b67bc 3012 cbase = fold_build2 (PLUS_EXPR, uutype, cbase, cstep);
8b11a64c 3013
b0a168b6
ZD
3014 /* use = ubase - ratio * cbase + ratio * var.
3015
3016 In general case ubase + ratio * (var - cbase) could be better (one less
3017 multiplication), but often it is possible to eliminate redundant parts
3018 of computations from (ubase - ratio * cbase) term, and if it does not
3019 happen, fold is able to apply the distributive law to obtain this form
3020 anyway. */
8b11a64c 3021
ac182688 3022 if (TYPE_PRECISION (uutype) > HOST_BITS_PER_WIDE_INT)
8b11a64c 3023 {
ac182688
ZD
3024 /* Let's compute in trees and just return the result in AFF. This case
3025 should not be very common, and fold itself is not that bad either,
3026 so making the aff. functions more complicated to handle this case
3027 is not that urgent. */
3028 if (ratioi == 1)
3029 {
3030 delta = fold_build2 (MINUS_EXPR, uutype, ubase, cbase);
3031 expr = fold_build2 (PLUS_EXPR, uutype, expr, delta);
3032 }
3033 else if (ratioi == -1)
3034 {
3035 delta = fold_build2 (PLUS_EXPR, uutype, ubase, cbase);
3036 expr = fold_build2 (MINUS_EXPR, uutype, delta, expr);
3037 }
9be872b7
ZD
3038 else
3039 {
ac182688
ZD
3040 delta = fold_build2 (MULT_EXPR, uutype, cbase, ratio);
3041 delta = fold_build2 (MINUS_EXPR, uutype, ubase, delta);
3042 expr = fold_build2 (MULT_EXPR, uutype, ratio, expr);
3043 expr = fold_build2 (PLUS_EXPR, uutype, delta, expr);
9be872b7 3044 }
ac182688
ZD
3045
3046 aff->type = uutype;
3047 aff->n = 0;
3048 aff->offset = 0;
3049 aff->mask = 0;
3050 aff->rest = expr;
3051 return true;
8b11a64c 3052 }
8b11a64c 3053
ac182688
ZD
3054 /* If we got here, the types fits in HOST_WIDE_INT, thus it must be
3055 possible to compute ratioi. */
3056 gcc_assert (ratioi);
3057
3058 tree_to_aff_combination (ubase, uutype, aff);
3059 tree_to_aff_combination (cbase, uutype, &cbase_aff);
3060 tree_to_aff_combination (expr, uutype, &expr_aff);
3061 aff_combination_scale (&cbase_aff, -ratioi);
3062 aff_combination_scale (&expr_aff, ratioi);
3063 aff_combination_add (aff, &cbase_aff);
3064 aff_combination_add (aff, &expr_aff);
3065
3066 return true;
3067}
3068
3069/* Determines the expression by that USE is expressed from induction variable
3070 CAND at statement AT in LOOP. The computation is unshared. */
3071
3072static tree
3073get_computation_at (struct loop *loop,
3074 struct iv_use *use, struct iv_cand *cand, tree at)
3075{
3076 struct affine_tree_combination aff;
3077 tree type = TREE_TYPE (use->iv->base);
3078
3079 if (!get_computation_aff (loop, use, cand, at, &aff))
3080 return NULL_TREE;
3081 unshare_aff_combination (&aff);
3082 return fold_convert (type, aff_combination_to_tree (&aff));
8b11a64c
ZD
3083}
3084
3085/* Determines the expression by that USE is expressed from induction variable
ac182688 3086 CAND in LOOP. The computation is unshared. */
8b11a64c
ZD
3087
3088static tree
3089get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3090{
3091 return get_computation_at (loop, use, cand, use->stmt);
3092}
3093
8b11a64c
ZD
3094/* Returns cost of addition in MODE. */
3095
3096static unsigned
3097add_cost (enum machine_mode mode)
3098{
3099 static unsigned costs[NUM_MACHINE_MODES];
3100 rtx seq;
3101 unsigned cost;
3102
3103 if (costs[mode])
3104 return costs[mode];
3105
3106 start_sequence ();
3107 force_operand (gen_rtx_fmt_ee (PLUS, mode,
03daa27a
ZD
3108 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3109 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 2)),
8b11a64c
ZD
3110 NULL_RTX);
3111 seq = get_insns ();
3112 end_sequence ();
3113
3114 cost = seq_cost (seq);
3115 if (!cost)
3116 cost = 1;
3117
3118 costs[mode] = cost;
3119
3120 if (dump_file && (dump_flags & TDF_DETAILS))
3121 fprintf (dump_file, "Addition in %s costs %d\n",
3122 GET_MODE_NAME (mode), cost);
3123 return cost;
3124}
3125
3126/* Entry in a hashtable of already known costs for multiplication. */
3127struct mbc_entry
3128{
3129 HOST_WIDE_INT cst; /* The constant to multiply by. */
3130 enum machine_mode mode; /* In mode. */
3131 unsigned cost; /* The cost. */
3132};
3133
3134/* Counts hash value for the ENTRY. */
3135
3136static hashval_t
3137mbc_entry_hash (const void *entry)
3138{
3139 const struct mbc_entry *e = entry;
3140
3141 return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
3142}
3143
3144/* Compares the hash table entries ENTRY1 and ENTRY2. */
3145
3146static int
3147mbc_entry_eq (const void *entry1, const void *entry2)
3148{
3149 const struct mbc_entry *e1 = entry1;
3150 const struct mbc_entry *e2 = entry2;
3151
3152 return (e1->mode == e2->mode
3153 && e1->cst == e2->cst);
3154}
3155
3156/* Returns cost of multiplication by constant CST in MODE. */
3157
ac182688 3158unsigned
8b11a64c
ZD
3159multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode)
3160{
3161 static htab_t costs;
3162 struct mbc_entry **cached, act;
3163 rtx seq;
3164 unsigned cost;
3165
3166 if (!costs)
3167 costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
3168
3169 act.mode = mode;
3170 act.cst = cst;
3171 cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
3172 if (*cached)
3173 return (*cached)->cost;
3174
5ed6ace5 3175 *cached = XNEW (struct mbc_entry);
8b11a64c
ZD
3176 (*cached)->mode = mode;
3177 (*cached)->cst = cst;
3178
3179 start_sequence ();
03daa27a
ZD
3180 expand_mult (mode, gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3181 gen_int_mode (cst, mode), NULL_RTX, 0);
8b11a64c
ZD
3182 seq = get_insns ();
3183 end_sequence ();
3184
3185 cost = seq_cost (seq);
3186
3187 if (dump_file && (dump_flags & TDF_DETAILS))
3188 fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
3189 (int) cst, GET_MODE_NAME (mode), cost);
3190
3191 (*cached)->cost = cost;
3192
3193 return cost;
3194}
3195
ac182688
ZD
3196/* Returns true if multiplying by RATIO is allowed in address. */
3197
3198bool
3199multiplier_allowed_in_address_p (HOST_WIDE_INT ratio)
3200{
3201#define MAX_RATIO 128
3202 static sbitmap valid_mult;
3203
3204 if (!valid_mult)
3205 {
03daa27a 3206 rtx reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
ac182688
ZD
3207 rtx addr;
3208 HOST_WIDE_INT i;
3209
3210 valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3211 sbitmap_zero (valid_mult);
3212 addr = gen_rtx_fmt_ee (MULT, Pmode, reg1, NULL_RTX);
3213 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3214 {
3215 XEXP (addr, 1) = gen_int_mode (i, Pmode);
3216 if (memory_address_p (Pmode, addr))
3217 SET_BIT (valid_mult, i + MAX_RATIO);
3218 }
3219
3220 if (dump_file && (dump_flags & TDF_DETAILS))
3221 {
3222 fprintf (dump_file, " allowed multipliers:");
3223 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3224 if (TEST_BIT (valid_mult, i + MAX_RATIO))
3225 fprintf (dump_file, " %d", (int) i);
3226 fprintf (dump_file, "\n");
3227 fprintf (dump_file, "\n");
3228 }
3229 }
3230
3231 if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3232 return false;
3233
3234 return TEST_BIT (valid_mult, ratio + MAX_RATIO);
3235}
3236
8b11a64c
ZD
3237/* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3238 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3239 variable is omitted. The created memory accesses MODE.
3240
3241 TODO -- there must be some better way. This all is quite crude. */
3242
3243static unsigned
3244get_address_cost (bool symbol_present, bool var_present,
3245 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio)
3246{
ac182688 3247 static bool initialized = false;
8b11a64c
ZD
3248 static HOST_WIDE_INT rat, off;
3249 static HOST_WIDE_INT min_offset, max_offset;
3250 static unsigned costs[2][2][2][2];
3251 unsigned cost, acost;
3252 rtx seq, addr, base;
3253 bool offset_p, ratio_p;
3254 rtx reg1;
3255 HOST_WIDE_INT s_offset;
3256 unsigned HOST_WIDE_INT mask;
3257 unsigned bits;
3258
ac182688 3259 if (!initialized)
8b11a64c
ZD
3260 {
3261 HOST_WIDE_INT i;
ac182688 3262 initialized = true;
8b11a64c 3263
03daa27a 3264 reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
8b11a64c
ZD
3265
3266 addr = gen_rtx_fmt_ee (PLUS, Pmode, reg1, NULL_RTX);
3267 for (i = 1; i <= 1 << 20; i <<= 1)
3268 {
03daa27a 3269 XEXP (addr, 1) = gen_int_mode (i, Pmode);
8b11a64c
ZD
3270 if (!memory_address_p (Pmode, addr))
3271 break;
3272 }
3273 max_offset = i >> 1;
3274 off = max_offset;
3275
3276 for (i = 1; i <= 1 << 20; i <<= 1)
3277 {
03daa27a 3278 XEXP (addr, 1) = gen_int_mode (-i, Pmode);
8b11a64c
ZD
3279 if (!memory_address_p (Pmode, addr))
3280 break;
3281 }
3282 min_offset = -(i >> 1);
3283
3284 if (dump_file && (dump_flags & TDF_DETAILS))
3285 {
3286 fprintf (dump_file, "get_address_cost:\n");
3287 fprintf (dump_file, " min offset %d\n", (int) min_offset);
3288 fprintf (dump_file, " max offset %d\n", (int) max_offset);
3289 }
3290
8b11a64c 3291 rat = 1;
ac182688
ZD
3292 for (i = 2; i <= MAX_RATIO; i++)
3293 if (multiplier_allowed_in_address_p (i))
3294 {
3295 rat = i;
3296 break;
3297 }
8b11a64c
ZD
3298 }
3299
3300 bits = GET_MODE_BITSIZE (Pmode);
3301 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3302 offset &= mask;
3303 if ((offset >> (bits - 1) & 1))
3304 offset |= ~mask;
3305 s_offset = offset;
3306
3307 cost = 0;
a85a9024
UW
3308 offset_p = (s_offset != 0
3309 && min_offset <= s_offset && s_offset <= max_offset);
8b11a64c 3310 ratio_p = (ratio != 1
ac182688 3311 && multiplier_allowed_in_address_p (ratio));
8b11a64c
ZD
3312
3313 if (ratio != 1 && !ratio_p)
3314 cost += multiply_by_cost (ratio, Pmode);
3315
3316 if (s_offset && !offset_p && !symbol_present)
3317 {
3318 cost += add_cost (Pmode);
3319 var_present = true;
3320 }
3321
3322 acost = costs[symbol_present][var_present][offset_p][ratio_p];
3323 if (!acost)
3324 {
85a43114 3325 int old_cse_not_expected;
8b11a64c
ZD
3326 acost = 0;
3327
03daa27a
ZD
3328 addr = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
3329 reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 2);
8b11a64c 3330 if (ratio_p)
03daa27a 3331 addr = gen_rtx_fmt_ee (MULT, Pmode, addr, gen_int_mode (rat, Pmode));
8b11a64c 3332
a85a9024 3333 if (var_present)
7299dbfb 3334 addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, reg1);
a85a9024 3335
8b11a64c
ZD
3336 if (symbol_present)
3337 {
3338 base = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (""));
3339 if (offset_p)
3340 base = gen_rtx_fmt_e (CONST, Pmode,
3341 gen_rtx_fmt_ee (PLUS, Pmode,
3342 base,
03daa27a 3343 gen_int_mode (off, Pmode)));
8b11a64c
ZD
3344 }
3345 else if (offset_p)
03daa27a 3346 base = gen_int_mode (off, Pmode);
8b11a64c
ZD
3347 else
3348 base = NULL_RTX;
3349
3350 if (base)
7299dbfb 3351 addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, base);
8b11a64c
ZD
3352
3353 start_sequence ();
85a43114
ZD
3354 /* To avoid splitting addressing modes, pretend that no cse will
3355 follow. */
3356 old_cse_not_expected = cse_not_expected;
3357 cse_not_expected = true;
8b11a64c 3358 addr = memory_address (Pmode, addr);
85a43114 3359 cse_not_expected = old_cse_not_expected;
8b11a64c
ZD
3360 seq = get_insns ();
3361 end_sequence ();
3362
3363 acost = seq_cost (seq);
3364 acost += address_cost (addr, Pmode);
3365
3366 if (!acost)
3367 acost = 1;
3368 costs[symbol_present][var_present][offset_p][ratio_p] = acost;
3369 }
3370
3371 return cost + acost;
3372}
8b11a64c 3373
3ac01fde
ZD
3374/* Estimates cost of forcing expression EXPR into a variable. */
3375
3376unsigned
3377force_expr_to_var_cost (tree expr)
8b11a64c
ZD
3378{
3379 static bool costs_initialized = false;
3380 static unsigned integer_cost;
3381 static unsigned symbol_cost;
3382 static unsigned address_cost;
7299dbfb
ZD
3383 tree op0, op1;
3384 unsigned cost0, cost1, cost;
3385 enum machine_mode mode;
8b11a64c
ZD
3386
3387 if (!costs_initialized)
3388 {
3389 tree var = create_tmp_var_raw (integer_type_node, "test_var");
3390 rtx x = gen_rtx_MEM (DECL_MODE (var),
3391 gen_rtx_SYMBOL_REF (Pmode, "test_var"));
3392 tree addr;
3393 tree type = build_pointer_type (integer_type_node);
3394
ff5e9a94
SP
3395 integer_cost = computation_cost (build_int_cst (integer_type_node,
3396 2000));
8b11a64c
ZD
3397
3398 SET_DECL_RTL (var, x);
3399 TREE_STATIC (var) = 1;
3400 addr = build1 (ADDR_EXPR, type, var);
3401 symbol_cost = computation_cost (addr) + 1;
3402
3403 address_cost
3404 = computation_cost (build2 (PLUS_EXPR, type,
3405 addr,
ff5e9a94 3406 build_int_cst (type, 2000))) + 1;
8b11a64c
ZD
3407 if (dump_file && (dump_flags & TDF_DETAILS))
3408 {
3ac01fde 3409 fprintf (dump_file, "force_expr_to_var_cost:\n");
8b11a64c
ZD
3410 fprintf (dump_file, " integer %d\n", (int) integer_cost);
3411 fprintf (dump_file, " symbol %d\n", (int) symbol_cost);
3412 fprintf (dump_file, " address %d\n", (int) address_cost);
3413 fprintf (dump_file, " other %d\n", (int) target_spill_cost);
3414 fprintf (dump_file, "\n");
3415 }
3416
3417 costs_initialized = true;
3418 }
3419
f5e2738c
ZD
3420 STRIP_NOPS (expr);
3421
8b11a64c
ZD
3422 if (SSA_VAR_P (expr))
3423 return 0;
3424
3425 if (TREE_INVARIANT (expr))
3426 {
3427 if (TREE_CODE (expr) == INTEGER_CST)
3428 return integer_cost;
3429
3430 if (TREE_CODE (expr) == ADDR_EXPR)
3431 {
3432 tree obj = TREE_OPERAND (expr, 0);
3433
3434 if (TREE_CODE (obj) == VAR_DECL
3435 || TREE_CODE (obj) == PARM_DECL
3436 || TREE_CODE (obj) == RESULT_DECL)
3437 return symbol_cost;
3438 }
3439
3440 return address_cost;
3441 }
3442
7299dbfb
ZD
3443 switch (TREE_CODE (expr))
3444 {
3445 case PLUS_EXPR:
3446 case MINUS_EXPR:
3447 case MULT_EXPR:
3448 op0 = TREE_OPERAND (expr, 0);
3449 op1 = TREE_OPERAND (expr, 1);
f5e2738c
ZD
3450 STRIP_NOPS (op0);
3451 STRIP_NOPS (op1);
7299dbfb
ZD
3452
3453 if (is_gimple_val (op0))
3454 cost0 = 0;
3455 else
3ac01fde 3456 cost0 = force_expr_to_var_cost (op0);
7299dbfb
ZD
3457
3458 if (is_gimple_val (op1))
3459 cost1 = 0;
3460 else
3ac01fde 3461 cost1 = force_expr_to_var_cost (op1);
7299dbfb
ZD
3462
3463 break;
3464
3465 default:
3466 /* Just an arbitrary value, FIXME. */
3467 return target_spill_cost;
3468 }
3469
3470 mode = TYPE_MODE (TREE_TYPE (expr));
3471 switch (TREE_CODE (expr))
3472 {
3473 case PLUS_EXPR:
3474 case MINUS_EXPR:
3475 cost = add_cost (mode);
3476 break;
3477
3478 case MULT_EXPR:
3479 if (cst_and_fits_in_hwi (op0))
3480 cost = multiply_by_cost (int_cst_value (op0), mode);
3481 else if (cst_and_fits_in_hwi (op1))
3482 cost = multiply_by_cost (int_cst_value (op1), mode);
3483 else
3484 return target_spill_cost;
3485 break;
3486
3487 default:
3488 gcc_unreachable ();
3489 }
3490
3491 cost += cost0;
3492 cost += cost1;
3493
3494 /* Bound the cost by target_spill_cost. The parts of complicated
3495 computations often are either loop invariant or at least can
3496 be shared between several iv uses, so letting this grow without
3497 limits would not give reasonable results. */
3498 return cost < target_spill_cost ? cost : target_spill_cost;
8b11a64c
ZD
3499}
3500
3ac01fde
ZD
3501/* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
3502 invariants the computation depends on. */
3503
3504static unsigned
3505force_var_cost (struct ivopts_data *data,
3506 tree expr, bitmap *depends_on)
3507{
3508 if (depends_on)
3509 {
3510 fd_ivopts_data = data;
3511 walk_tree (&expr, find_depends, depends_on, NULL);
3512 }
3513
3514 return force_expr_to_var_cost (expr);
3515}
3516
8b11a64c
ZD
3517/* Estimates cost of expressing address ADDR as var + symbol + offset. The
3518 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3519 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3520 invariants the computation depends on. */
3521
3522static unsigned
3523split_address_cost (struct ivopts_data *data,
3524 tree addr, bool *symbol_present, bool *var_present,
3525 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3526{
2f4675b4
ZD
3527 tree core;
3528 HOST_WIDE_INT bitsize;
3529 HOST_WIDE_INT bitpos;
3530 tree toffset;
3531 enum machine_mode mode;
3532 int unsignedp, volatilep;
3533
3534 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
2614034e 3535 &unsignedp, &volatilep, false);
8b11a64c 3536
2f4675b4
ZD
3537 if (toffset != 0
3538 || bitpos % BITS_PER_UNIT != 0
3539 || TREE_CODE (core) != VAR_DECL)
8b11a64c
ZD
3540 {
3541 *symbol_present = false;
3542 *var_present = true;
3543 fd_ivopts_data = data;
3544 walk_tree (&addr, find_depends, depends_on, NULL);
3545 return target_spill_cost;
2f4675b4
ZD
3546 }
3547
3548 *offset += bitpos / BITS_PER_UNIT;
8b11a64c
ZD
3549 if (TREE_STATIC (core)
3550 || DECL_EXTERNAL (core))
3551 {
3552 *symbol_present = true;
3553 *var_present = false;
3554 return 0;
3555 }
3556
3557 *symbol_present = false;
3558 *var_present = true;
3559 return 0;
3560}
3561
3562/* Estimates cost of expressing difference of addresses E1 - E2 as
3563 var + symbol + offset. The value of offset is added to OFFSET,
3564 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3565 part is missing. DEPENDS_ON is a set of the invariants the computation
3566 depends on. */
3567
3568static unsigned
3569ptr_difference_cost (struct ivopts_data *data,
3570 tree e1, tree e2, bool *symbol_present, bool *var_present,
3571 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3572{
2f4675b4 3573 HOST_WIDE_INT diff = 0;
8b11a64c
ZD
3574 unsigned cost;
3575
1e128c5f 3576 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
8b11a64c 3577
7299dbfb 3578 if (ptr_difference_const (e1, e2, &diff))
8b11a64c
ZD
3579 {
3580 *offset += diff;
3581 *symbol_present = false;
3582 *var_present = false;
3583 return 0;
3584 }
3585
3586 if (e2 == integer_zero_node)
3587 return split_address_cost (data, TREE_OPERAND (e1, 0),
3588 symbol_present, var_present, offset, depends_on);
3589
3590 *symbol_present = false;
3591 *var_present = true;
3592
3593 cost = force_var_cost (data, e1, depends_on);
3594 cost += force_var_cost (data, e2, depends_on);
3595 cost += add_cost (Pmode);
3596
3597 return cost;
3598}
3599
3600/* Estimates cost of expressing difference E1 - E2 as
3601 var + symbol + offset. The value of offset is added to OFFSET,
3602 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3603 part is missing. DEPENDS_ON is a set of the invariants the computation
3604 depends on. */
3605
3606static unsigned
3607difference_cost (struct ivopts_data *data,
3608 tree e1, tree e2, bool *symbol_present, bool *var_present,
3609 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3610{
3611 unsigned cost;
3612 enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
f5e2738c
ZD
3613 unsigned HOST_WIDE_INT off1, off2;
3614
9be872b7
ZD
3615 e1 = strip_offset (e1, &off1);
3616 e2 = strip_offset (e2, &off2);
f5e2738c 3617 *offset += off1 - off2;
8b11a64c 3618
f5e2738c
ZD
3619 STRIP_NOPS (e1);
3620 STRIP_NOPS (e2);
8b11a64c
ZD
3621
3622 if (TREE_CODE (e1) == ADDR_EXPR)
3623 return ptr_difference_cost (data, e1, e2, symbol_present, var_present, offset,
3624 depends_on);
3625 *symbol_present = false;
3626
3627 if (operand_equal_p (e1, e2, 0))
3628 {
3629 *var_present = false;
3630 return 0;
3631 }
3632 *var_present = true;
3633 if (zero_p (e2))
3634 return force_var_cost (data, e1, depends_on);
3635
3636 if (zero_p (e1))
3637 {
3638 cost = force_var_cost (data, e2, depends_on);
3639 cost += multiply_by_cost (-1, mode);
3640
3641 return cost;
3642 }
3643
3644 cost = force_var_cost (data, e1, depends_on);
3645 cost += force_var_cost (data, e2, depends_on);
3646 cost += add_cost (mode);
3647
3648 return cost;
3649}
3650
3651/* Determines the cost of the computation by that USE is expressed
3652 from induction variable CAND. If ADDRESS_P is true, we just need
3653 to create an address from it, otherwise we want to get it into
3654 register. A set of invariants we depend on is stored in
3655 DEPENDS_ON. AT is the statement at that the value is computed. */
3656
3657static unsigned
3658get_computation_cost_at (struct ivopts_data *data,
3659 struct iv_use *use, struct iv_cand *cand,
3660 bool address_p, bitmap *depends_on, tree at)
3661{
3662 tree ubase = use->iv->base, ustep = use->iv->step;
3663 tree cbase, cstep;
3664 tree utype = TREE_TYPE (ubase), ctype;
3665 unsigned HOST_WIDE_INT ustepi, cstepi, offset = 0;
3666 HOST_WIDE_INT ratio, aratio;
3667 bool var_present, symbol_present;
3668 unsigned cost = 0, n_sums;
3669
3670 *depends_on = NULL;
3671
3672 /* Only consider real candidates. */
3673 if (!cand->iv)
3674 return INFTY;
3675
3676 cbase = cand->iv->base;
3677 cstep = cand->iv->step;
3678 ctype = TREE_TYPE (cbase);
3679
3680 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3681 {
3682 /* We do not have a precision to express the values of use. */
3683 return INFTY;
3684 }
3685
e6845c23
ZD
3686 if (address_p)
3687 {
3688 /* Do not try to express address of an object with computation based
3689 on address of a different object. This may cause problems in rtl
3690 level alias analysis (that does not expect this to be happening,
3691 as this is illegal in C), and would be unlikely to be useful
3692 anyway. */
3693 if (use->iv->base_object
3694 && cand->iv->base_object
3695 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3696 return INFTY;
3697 }
3698
8b11a64c
ZD
3699 if (TYPE_PRECISION (utype) != TYPE_PRECISION (ctype))
3700 {
3701 /* TODO -- add direct handling of this case. */
3702 goto fallback;
3703 }
3704
9be872b7
ZD
3705 /* CSTEPI is removed from the offset in case statement is after the
3706 increment. If the step is not constant, we use zero instead.
1e1def1c 3707 This is a bit imprecise (there is the extra addition), but
9be872b7
ZD
3708 redundancy elimination is likely to transform the code so that
3709 it uses value of the variable before increment anyway,
3710 so it is not that much unrealistic. */
3711 if (cst_and_fits_in_hwi (cstep))
3712 cstepi = int_cst_value (cstep);
3713 else
3714 cstepi = 0;
3715
3716 if (cst_and_fits_in_hwi (ustep)
3717 && cst_and_fits_in_hwi (cstep))
3718 {
3719 ustepi = int_cst_value (ustep);
3720
3721 if (!divide (TYPE_PRECISION (utype), ustepi, cstepi, &ratio))
3722 return INFTY;
3723 }
3724 else
3725 {
3726 tree rat;
3727
3728 rat = constant_multiple_of (utype, ustep, cstep);
3729
3730 if (!rat)
3731 return INFTY;
3732
3733 if (cst_and_fits_in_hwi (rat))
3734 ratio = int_cst_value (rat);
3735 else if (integer_onep (rat))
3736 ratio = 1;
3737 else if (integer_all_onesp (rat))
3738 ratio = -1;
3739 else
3740 return INFTY;
3741 }
8b11a64c
ZD
3742
3743 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
3744 or ratio == 1, it is better to handle this like
3745
3746 ubase - ratio * cbase + ratio * var
3747
3748 (also holds in the case ratio == -1, TODO. */
3749
9be872b7 3750 if (cst_and_fits_in_hwi (cbase))
8b11a64c
ZD
3751 {
3752 offset = - ratio * int_cst_value (cbase);
3753 cost += difference_cost (data,
3754 ubase, integer_zero_node,
3755 &symbol_present, &var_present, &offset,
3756 depends_on);
3757 }
3758 else if (ratio == 1)
3759 {
3760 cost += difference_cost (data,
3761 ubase, cbase,
3762 &symbol_present, &var_present, &offset,
3763 depends_on);
3764 }
3765 else
3766 {
3767 cost += force_var_cost (data, cbase, depends_on);
3768 cost += add_cost (TYPE_MODE (ctype));
3769 cost += difference_cost (data,
3770 ubase, integer_zero_node,
3771 &symbol_present, &var_present, &offset,
3772 depends_on);
3773 }
3774
3775 /* If we are after the increment, the value of the candidate is higher by
3776 one iteration. */
3777 if (stmt_after_increment (data->current_loop, cand, at))
3778 offset -= ratio * cstepi;
3779
3780 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3781 (symbol/var/const parts may be omitted). If we are looking for an address,
3782 find the cost of addressing this. */
3783 if (address_p)
7299dbfb 3784 return cost + get_address_cost (symbol_present, var_present, offset, ratio);
8b11a64c
ZD
3785
3786 /* Otherwise estimate the costs for computing the expression. */
3787 aratio = ratio > 0 ? ratio : -ratio;
3788 if (!symbol_present && !var_present && !offset)
3789 {
3790 if (ratio != 1)
3791 cost += multiply_by_cost (ratio, TYPE_MODE (ctype));
3792
3793 return cost;
3794 }
3795
3796 if (aratio != 1)
3797 cost += multiply_by_cost (aratio, TYPE_MODE (ctype));
3798
3799 n_sums = 1;
3800 if (var_present
3801 /* Symbol + offset should be compile-time computable. */
3802 && (symbol_present || offset))
3803 n_sums++;
3804
3805 return cost + n_sums * add_cost (TYPE_MODE (ctype));
3806
3807fallback:
3808 {
3809 /* Just get the expression, expand it and measure the cost. */
3810 tree comp = get_computation_at (data->current_loop, use, cand, at);
3811
3812 if (!comp)
3813 return INFTY;
3814
3815 if (address_p)
3816 comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp);
3817
3818 return computation_cost (comp);
3819 }
3820}
3821
3822/* Determines the cost of the computation by that USE is expressed
3823 from induction variable CAND. If ADDRESS_P is true, we just need
3824 to create an address from it, otherwise we want to get it into
3825 register. A set of invariants we depend on is stored in
3826 DEPENDS_ON. */
3827
3828static unsigned
3829get_computation_cost (struct ivopts_data *data,
3830 struct iv_use *use, struct iv_cand *cand,
3831 bool address_p, bitmap *depends_on)
3832{
3833 return get_computation_cost_at (data,
3834 use, cand, address_p, depends_on, use->stmt);
3835}
3836
3837/* Determines cost of basing replacement of USE on CAND in a generic
3838 expression. */
3839
b1b02be2 3840static bool
8b11a64c
ZD
3841determine_use_iv_cost_generic (struct ivopts_data *data,
3842 struct iv_use *use, struct iv_cand *cand)
3843{
3844 bitmap depends_on;
eec5fec9
ZD
3845 unsigned cost;
3846
3847 /* The simple case first -- if we need to express value of the preserved
3848 original biv, the cost is 0. This also prevents us from counting the
3849 cost of increment twice -- once at this use and once in the cost of
3850 the candidate. */
3851 if (cand->pos == IP_ORIGINAL
3852 && cand->incremented_at == use->stmt)
3853 {
f5f12961 3854 set_use_iv_cost (data, use, cand, 0, NULL, NULL_TREE);
eec5fec9
ZD
3855 return true;
3856 }
8b11a64c 3857
eec5fec9 3858 cost = get_computation_cost (data, use, cand, false, &depends_on);
f5f12961 3859 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
b1b02be2
ZD
3860
3861 return cost != INFTY;
8b11a64c
ZD
3862}
3863
3864/* Determines cost of basing replacement of USE on CAND in an address. */
3865
b1b02be2 3866static bool
8b11a64c
ZD
3867determine_use_iv_cost_address (struct ivopts_data *data,
3868 struct iv_use *use, struct iv_cand *cand)
3869{
3870 bitmap depends_on;
3871 unsigned cost = get_computation_cost (data, use, cand, true, &depends_on);
3872
f5f12961 3873 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
b1b02be2
ZD
3874
3875 return cost != INFTY;
8b11a64c
ZD
3876}
3877
3878/* Computes value of induction variable IV in iteration NITER. */
3879
3880static tree
3881iv_value (struct iv *iv, tree niter)
3882{
3883 tree val;
3884 tree type = TREE_TYPE (iv->base);
3885
3886 niter = fold_convert (type, niter);
987b67bc 3887 val = fold_build2 (MULT_EXPR, type, iv->step, niter);
8b11a64c 3888
987b67bc 3889 return fold_build2 (PLUS_EXPR, type, iv->base, val);
8b11a64c
ZD
3890}
3891
3892/* Computes value of candidate CAND at position AT in iteration NITER. */
3893
3894static tree
3895cand_value_at (struct loop *loop, struct iv_cand *cand, tree at, tree niter)
3896{
3897 tree val = iv_value (cand->iv, niter);
3898 tree type = TREE_TYPE (cand->iv->base);
3899
3900 if (stmt_after_increment (loop, cand, at))
987b67bc 3901 val = fold_build2 (PLUS_EXPR, type, val, cand->iv->step);
8b11a64c
ZD
3902
3903 return val;
3904}
3905
ca4c3169
ZD
3906/* Returns period of induction variable iv. */
3907
3908static tree
3909iv_period (struct iv *iv)
3910{
3911 tree step = iv->step, period, type;
3912 tree pow2div;
3913
3914 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
3915
3916 /* Period of the iv is gcd (step, type range). Since type range is power
3917 of two, it suffices to determine the maximum power of two that divides
3918 step. */
3919 pow2div = num_ending_zeros (step);
3920 type = unsigned_type_for (TREE_TYPE (step));
3921
3922 period = build_low_bits_mask (type,
3923 (TYPE_PRECISION (type)
3924 - tree_low_cst (pow2div, 1)));
3925
3926 return period;
3927}
3928
f5f12961
ZD
3929/* Returns the comparison operator used when eliminating the iv USE. */
3930
3931static enum tree_code
3932iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
3933{
3934 struct loop *loop = data->current_loop;
3935 basic_block ex_bb;
3936 edge exit;
3937
3938 ex_bb = bb_for_stmt (use->stmt);
3939 exit = EDGE_SUCC (ex_bb, 0);
3940 if (flow_bb_inside_loop_p (loop, exit->dest))
3941 exit = EDGE_SUCC (ex_bb, 1);
3942
3943 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
3944}
3945
8b11a64c 3946/* Check whether it is possible to express the condition in USE by comparison
f5f12961 3947 of candidate CAND. If so, store the value compared with to BOUND. */
8b11a64c
ZD
3948
3949static bool
ca4c3169 3950may_eliminate_iv (struct ivopts_data *data,
f5f12961 3951 struct iv_use *use, struct iv_cand *cand, tree *bound)
8b11a64c 3952{
e6845c23 3953 basic_block ex_bb;
8b11a64c 3954 edge exit;
ca4c3169
ZD
3955 tree nit, nit_type;
3956 tree wider_type, period, per_type;
3957 struct loop *loop = data->current_loop;
e6845c23 3958
9be872b7
ZD
3959 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
3960 return false;
3961
e6845c23
ZD
3962 /* For now works only for exits that dominate the loop latch. TODO -- extend
3963 for other conditions inside loop body. */
3964 ex_bb = bb_for_stmt (use->stmt);
3965 if (use->stmt != last_stmt (ex_bb)
3966 || TREE_CODE (use->stmt) != COND_EXPR)
8b11a64c 3967 return false;
e6845c23 3968 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
8b11a64c
ZD
3969 return false;
3970
e6845c23
ZD
3971 exit = EDGE_SUCC (ex_bb, 0);
3972 if (flow_bb_inside_loop_p (loop, exit->dest))
3973 exit = EDGE_SUCC (ex_bb, 1);
3974 if (flow_bb_inside_loop_p (loop, exit->dest))
3975 return false;
3976
dcccd88d
ZD
3977 nit = niter_for_exit (data, exit);
3978 if (!nit)
8b11a64c
ZD
3979 return false;
3980
ca4c3169 3981 nit_type = TREE_TYPE (nit);
8b11a64c 3982
ca4c3169
ZD
3983 /* Determine whether we may use the variable to test whether niter iterations
3984 elapsed. This is the case iff the period of the induction variable is
3985 greater than the number of iterations. */
3986 period = iv_period (cand->iv);
3987 if (!period)
8b11a64c 3988 return false;
ca4c3169
ZD
3989 per_type = TREE_TYPE (period);
3990
3991 wider_type = TREE_TYPE (period);
3992 if (TYPE_PRECISION (nit_type) < TYPE_PRECISION (per_type))
3993 wider_type = per_type;
3994 else
3995 wider_type = nit_type;
8b11a64c 3996
987b67bc
KH
3997 if (!integer_nonzerop (fold_build2 (GE_EXPR, boolean_type_node,
3998 fold_convert (wider_type, period),
3999 fold_convert (wider_type, nit))))
8b11a64c
ZD
4000 return false;
4001
ca4c3169 4002 *bound = cand_value_at (loop, cand, use->stmt, nit);
8b11a64c
ZD
4003 return true;
4004}
4005
4006/* Determines cost of basing replacement of USE on CAND in a condition. */
4007
b1b02be2 4008static bool
8b11a64c
ZD
4009determine_use_iv_cost_condition (struct ivopts_data *data,
4010 struct iv_use *use, struct iv_cand *cand)
4011{
f5f12961
ZD
4012 tree bound = NULL_TREE, op, cond;
4013 bitmap depends_on = NULL;
4014 unsigned cost;
8b11a64c
ZD
4015
4016 /* Only consider real candidates. */
4017 if (!cand->iv)
4018 {
f5f12961 4019 set_use_iv_cost (data, use, cand, INFTY, NULL, NULL_TREE);
b1b02be2 4020 return false;
8b11a64c
ZD
4021 }
4022
f5f12961 4023 if (may_eliminate_iv (data, use, cand, &bound))
8b11a64c 4024 {
f5f12961 4025 cost = force_var_cost (data, bound, &depends_on);
8b11a64c 4026
f5f12961 4027 set_use_iv_cost (data, use, cand, cost, depends_on, bound);
b1b02be2 4028 return cost != INFTY;
8b11a64c
ZD
4029 }
4030
4031 /* The induction variable elimination failed; just express the original
4032 giv. If it is compared with an invariant, note that we cannot get
4033 rid of it. */
f5f12961
ZD
4034 cost = get_computation_cost (data, use, cand, false, &depends_on);
4035
4036 cond = *use->op_p;
4037 if (TREE_CODE (cond) != SSA_NAME)
8b11a64c 4038 {
f5f12961
ZD
4039 op = TREE_OPERAND (cond, 0);
4040 if (TREE_CODE (op) == SSA_NAME && !zero_p (get_iv (data, op)->step))
4041 op = TREE_OPERAND (cond, 1);
4042 if (TREE_CODE (op) == SSA_NAME)
4043 {
4044 op = get_iv (data, op)->base;
4045 fd_ivopts_data = data;
4046 walk_tree (&op, find_depends, &depends_on, NULL);
4047 }
8b11a64c
ZD
4048 }
4049
f5f12961
ZD
4050 set_use_iv_cost (data, use, cand, cost, depends_on, NULL);
4051 return cost != INFTY;
8b11a64c
ZD
4052}
4053
b1b02be2
ZD
4054/* Determines cost of basing replacement of USE on CAND. Returns false
4055 if USE cannot be based on CAND. */
8b11a64c 4056
b1b02be2 4057static bool
8b11a64c
ZD
4058determine_use_iv_cost (struct ivopts_data *data,
4059 struct iv_use *use, struct iv_cand *cand)
4060{
4061 switch (use->type)
4062 {
4063 case USE_NONLINEAR_EXPR:
b1b02be2 4064 return determine_use_iv_cost_generic (data, use, cand);
8b11a64c 4065
8b11a64c 4066 case USE_ADDRESS:
b1b02be2 4067 return determine_use_iv_cost_address (data, use, cand);
8b11a64c
ZD
4068
4069 case USE_COMPARE:
b1b02be2 4070 return determine_use_iv_cost_condition (data, use, cand);
8b11a64c
ZD
4071
4072 default:
1e128c5f 4073 gcc_unreachable ();
8b11a64c
ZD
4074 }
4075}
4076
4077/* Determines costs of basing the use of the iv on an iv candidate. */
4078
4079static void
4080determine_use_iv_costs (struct ivopts_data *data)
4081{
4082 unsigned i, j;
4083 struct iv_use *use;
4084 struct iv_cand *cand;
8bdbfff5 4085 bitmap to_clear = BITMAP_ALLOC (NULL);
8b11a64c
ZD
4086
4087 alloc_use_cost_map (data);
4088
8b11a64c
ZD
4089 for (i = 0; i < n_iv_uses (data); i++)
4090 {
4091 use = iv_use (data, i);
4092
4093 if (data->consider_all_candidates)
4094 {
4095 for (j = 0; j < n_iv_cands (data); j++)
4096 {
4097 cand = iv_cand (data, j);
4098 determine_use_iv_cost (data, use, cand);
4099 }
4100 }
4101 else
4102 {
87c476a2
ZD
4103 bitmap_iterator bi;
4104
4105 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
8b11a64c
ZD
4106 {
4107 cand = iv_cand (data, j);
b1b02be2
ZD
4108 if (!determine_use_iv_cost (data, use, cand))
4109 bitmap_set_bit (to_clear, j);
87c476a2 4110 }
b1b02be2
ZD
4111
4112 /* Remove the candidates for that the cost is infinite from
4113 the list of related candidates. */
4114 bitmap_and_compl_into (use->related_cands, to_clear);
4115 bitmap_clear (to_clear);
8b11a64c
ZD
4116 }
4117 }
4118
8bdbfff5 4119 BITMAP_FREE (to_clear);
b1b02be2 4120
8b11a64c
ZD
4121 if (dump_file && (dump_flags & TDF_DETAILS))
4122 {
4123 fprintf (dump_file, "Use-candidate costs:\n");
4124
4125 for (i = 0; i < n_iv_uses (data); i++)
4126 {
4127 use = iv_use (data, i);
4128
4129 fprintf (dump_file, "Use %d:\n", i);
4130 fprintf (dump_file, " cand\tcost\tdepends on\n");
4131 for (j = 0; j < use->n_map_members; j++)
4132 {
4133 if (!use->cost_map[j].cand
4134 || use->cost_map[j].cost == INFTY)
4135 continue;
4136
4137 fprintf (dump_file, " %d\t%d\t",
4138 use->cost_map[j].cand->id,
4139 use->cost_map[j].cost);
4140 if (use->cost_map[j].depends_on)
4141 bitmap_print (dump_file,
4142 use->cost_map[j].depends_on, "","");
4143 fprintf (dump_file, "\n");
4144 }
4145
4146 fprintf (dump_file, "\n");
4147 }
4148 fprintf (dump_file, "\n");
4149 }
4150}
4151
4152/* Determines cost of the candidate CAND. */
4153
4154static void
4155determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
4156{
4157 unsigned cost_base, cost_step;
4366cf6d 4158 tree base;
8b11a64c
ZD
4159
4160 if (!cand->iv)
4161 {
4162 cand->cost = 0;
4163 return;
4164 }
4165
4166 /* There are two costs associated with the candidate -- its increment
4167 and its initialization. The second is almost negligible for any loop
4168 that rolls enough, so we take it just very little into account. */
4169
4170 base = cand->iv->base;
4171 cost_base = force_var_cost (data, base, NULL);
4172 cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)));
4173
4174 cand->cost = cost_step + cost_base / AVG_LOOP_NITER (current_loop);
4175
cf5d1802
ZD
4176 /* Prefer the original iv unless we may gain something by replacing it;
4177 this is not really relevant for artificial ivs created by other
4178 passes. */
4179 if (cand->pos == IP_ORIGINAL
4180 && !DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
8b11a64c
ZD
4181 cand->cost--;
4182
4183 /* Prefer not to insert statements into latch unless there are some
4184 already (so that we do not create unnecessary jumps). */
4366cf6d
ZD
4185 if (cand->pos == IP_END
4186 && empty_block_p (ip_end_pos (data->current_loop)))
4187 cand->cost++;
8b11a64c
ZD
4188}
4189
4190/* Determines costs of computation of the candidates. */
4191
4192static void
4193determine_iv_costs (struct ivopts_data *data)
4194{
4195 unsigned i;
4196
4197 if (dump_file && (dump_flags & TDF_DETAILS))
4198 {
4199 fprintf (dump_file, "Candidate costs:\n");
4200 fprintf (dump_file, " cand\tcost\n");
4201 }
4202
4203 for (i = 0; i < n_iv_cands (data); i++)
4204 {
4205 struct iv_cand *cand = iv_cand (data, i);
4206
4207 determine_iv_cost (data, cand);
4208
4209 if (dump_file && (dump_flags & TDF_DETAILS))
4210 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
4211 }
4212
4213if (dump_file && (dump_flags & TDF_DETAILS))
4214 fprintf (dump_file, "\n");
4215}
4216
4217/* Calculates cost for having SIZE induction variables. */
4218
4219static unsigned
4220ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
4221{
9a2ef6b8 4222 return global_cost_for_size (size, data->regs_used, n_iv_uses (data));
8b11a64c
ZD
4223}
4224
4225/* For each size of the induction variable set determine the penalty. */
4226
4227static void
4228determine_set_costs (struct ivopts_data *data)
4229{
4230 unsigned j, n;
4231 tree phi, op;
4232 struct loop *loop = data->current_loop;
87c476a2 4233 bitmap_iterator bi;
8b11a64c
ZD
4234
4235 /* We use the following model (definitely improvable, especially the
4236 cost function -- TODO):
4237
4238 We estimate the number of registers available (using MD data), name it A.
4239
4240 We estimate the number of registers used by the loop, name it U. This
4241 number is obtained as the number of loop phi nodes (not counting virtual
4242 registers and bivs) + the number of variables from outside of the loop.
4243
4244 We set a reserve R (free regs that are used for temporary computations,
4245 etc.). For now the reserve is a constant 3.
4246
4247 Let I be the number of induction variables.
4248
4249 -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
4250 make a lot of ivs without a reason).
4251 -- if A - R < U + I <= A, the cost is I * PRES_COST
4252 -- if U + I > A, the cost is I * PRES_COST and
4253 number of uses * SPILL_COST * (U + I - A) / (U + I) is added. */
4254
4255 if (dump_file && (dump_flags & TDF_DETAILS))
4256 {
4257 fprintf (dump_file, "Global costs:\n");
4258 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
4259 fprintf (dump_file, " target_small_cost %d\n", target_small_cost);
4260 fprintf (dump_file, " target_pres_cost %d\n", target_pres_cost);
4261 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost);
4262 }
4263
4264 n = 0;
bb29d951 4265 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
8b11a64c
ZD
4266 {
4267 op = PHI_RESULT (phi);
4268
4269 if (!is_gimple_reg (op))
4270 continue;
4271
4272 if (get_iv (data, op))
4273 continue;
4274
4275 n++;
4276 }
4277
87c476a2 4278 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
8b11a64c
ZD
4279 {
4280 struct version_info *info = ver_info (data, j);
4281
4282 if (info->inv_id && info->has_nonlin_use)
4283 n++;
87c476a2 4284 }
8b11a64c 4285
9a2ef6b8 4286 data->regs_used = n;
8b11a64c
ZD
4287 if (dump_file && (dump_flags & TDF_DETAILS))
4288 fprintf (dump_file, " regs_used %d\n", n);
4289
4290 if (dump_file && (dump_flags & TDF_DETAILS))
4291 {
4292 fprintf (dump_file, " cost for size:\n");
4293 fprintf (dump_file, " ivs\tcost\n");
4294 for (j = 0; j <= 2 * target_avail_regs; j++)
4295 fprintf (dump_file, " %d\t%d\n", j,
4296 ivopts_global_cost_for_size (data, j));
4297 fprintf (dump_file, "\n");
4298 }
4299}
4300
b1b02be2 4301/* Returns true if A is a cheaper cost pair than B. */
8b11a64c 4302
b1b02be2
ZD
4303static bool
4304cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
8b11a64c 4305{
b1b02be2
ZD
4306 if (!a)
4307 return false;
8b11a64c 4308
b1b02be2
ZD
4309 if (!b)
4310 return true;
4311
4312 if (a->cost < b->cost)
4313 return true;
4314
4315 if (a->cost > b->cost)
4316 return false;
4317
4318 /* In case the costs are the same, prefer the cheaper candidate. */
4319 if (a->cand->cost < b->cand->cost)
4320 return true;
4321
4322 return false;
4323}
4324
4325/* Computes the cost field of IVS structure. */
4326
4327static void
4328iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
4329{
4330 unsigned cost = 0;
4331
4332 cost += ivs->cand_use_cost;
4333 cost += ivs->cand_cost;
4334 cost += ivopts_global_cost_for_size (data, ivs->n_regs);
4335
4336 ivs->cost = cost;
4337}
4338
9be872b7
ZD
4339/* Remove invariants in set INVS to set IVS. */
4340
4341static void
4342iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
4343{
4344 bitmap_iterator bi;
4345 unsigned iid;
4346
4347 if (!invs)
4348 return;
4349
4350 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4351 {
4352 ivs->n_invariant_uses[iid]--;
4353 if (ivs->n_invariant_uses[iid] == 0)
4354 ivs->n_regs--;
4355 }
4356}
4357
b1b02be2
ZD
4358/* Set USE not to be expressed by any candidate in IVS. */
4359
4360static void
4361iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
4362 struct iv_use *use)
4363{
9be872b7 4364 unsigned uid = use->id, cid;
b1b02be2 4365 struct cost_pair *cp;
b1b02be2
ZD
4366
4367 cp = ivs->cand_for_use[uid];
4368 if (!cp)
4369 return;
4370 cid = cp->cand->id;
4371
4372 ivs->bad_uses++;
4373 ivs->cand_for_use[uid] = NULL;
4374 ivs->n_cand_uses[cid]--;
4375
4376 if (ivs->n_cand_uses[cid] == 0)
8b11a64c 4377 {
b1b02be2
ZD
4378 bitmap_clear_bit (ivs->cands, cid);
4379 /* Do not count the pseudocandidates. */
4380 if (cp->cand->iv)
4381 ivs->n_regs--;
36f5ada1 4382 ivs->n_cands--;
b1b02be2 4383 ivs->cand_cost -= cp->cand->cost;
9be872b7
ZD
4384
4385 iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
b1b02be2
ZD
4386 }
4387
4388 ivs->cand_use_cost -= cp->cost;
4389
9be872b7
ZD
4390 iv_ca_set_remove_invariants (ivs, cp->depends_on);
4391 iv_ca_recount_cost (data, ivs);
4392}
4393
4394/* Add invariants in set INVS to set IVS. */
80cad5fa 4395
9be872b7
ZD
4396static void
4397iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
4398{
4399 bitmap_iterator bi;
4400 unsigned iid;
4401
4402 if (!invs)
4403 return;
4404
4405 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
b1b02be2 4406 {
9be872b7
ZD
4407 ivs->n_invariant_uses[iid]++;
4408 if (ivs->n_invariant_uses[iid] == 1)
4409 ivs->n_regs++;
8b11a64c 4410 }
b1b02be2
ZD
4411}
4412
4413/* Set cost pair for USE in set IVS to CP. */
4414
4415static void
4416iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
4417 struct iv_use *use, struct cost_pair *cp)
4418{
9be872b7 4419 unsigned uid = use->id, cid;
b1b02be2
ZD
4420
4421 if (ivs->cand_for_use[uid] == cp)
4422 return;
4423
4424 if (ivs->cand_for_use[uid])
4425 iv_ca_set_no_cp (data, ivs, use);
4426
4427 if (cp)
8b11a64c 4428 {
b1b02be2 4429 cid = cp->cand->id;
8b11a64c 4430
b1b02be2
ZD
4431 ivs->bad_uses--;
4432 ivs->cand_for_use[uid] = cp;
4433 ivs->n_cand_uses[cid]++;
4434 if (ivs->n_cand_uses[cid] == 1)
8b11a64c 4435 {
b1b02be2
ZD
4436 bitmap_set_bit (ivs->cands, cid);
4437 /* Do not count the pseudocandidates. */
4438 if (cp->cand->iv)
4439 ivs->n_regs++;
36f5ada1 4440 ivs->n_cands++;
b1b02be2 4441 ivs->cand_cost += cp->cand->cost;
b1b02be2 4442
9be872b7 4443 iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
8b11a64c
ZD
4444 }
4445
9be872b7
ZD
4446 ivs->cand_use_cost += cp->cost;
4447 iv_ca_set_add_invariants (ivs, cp->depends_on);
b1b02be2 4448 iv_ca_recount_cost (data, ivs);
87c476a2 4449 }
b1b02be2
ZD
4450}
4451
4452/* Extend set IVS by expressing USE by some of the candidates in it
4453 if possible. */
4454
4455static void
4456iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
4457 struct iv_use *use)
4458{
4459 struct cost_pair *best_cp = NULL, *cp;
4460 bitmap_iterator bi;
4461 unsigned i;
8b11a64c 4462
b1b02be2
ZD
4463 gcc_assert (ivs->upto >= use->id);
4464
4465 if (ivs->upto == use->id)
4466 {
4467 ivs->upto++;
4468 ivs->bad_uses++;
4469 }
8b11a64c 4470
b1b02be2
ZD
4471 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4472 {
4473 cp = get_use_iv_cost (data, use, iv_cand (data, i));
8b11a64c 4474
b1b02be2
ZD
4475 if (cheaper_cost_pair (cp, best_cp))
4476 best_cp = cp;
4477 }
8b11a64c 4478
b1b02be2 4479 iv_ca_set_cp (data, ivs, use, best_cp);
8b11a64c
ZD
4480}
4481
b1b02be2 4482/* Get cost for assignment IVS. */
8b11a64c
ZD
4483
4484static unsigned
b1b02be2
ZD
4485iv_ca_cost (struct iv_ca *ivs)
4486{
4487 return (ivs->bad_uses ? INFTY : ivs->cost);
4488}
4489
4490/* Returns true if all dependences of CP are among invariants in IVS. */
4491
4492static bool
4493iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
8b11a64c
ZD
4494{
4495 unsigned i;
87c476a2 4496 bitmap_iterator bi;
8b11a64c 4497
b1b02be2
ZD
4498 if (!cp->depends_on)
4499 return true;
4500
4501 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
8b11a64c 4502 {
b1b02be2
ZD
4503 if (ivs->n_invariant_uses[i] == 0)
4504 return false;
4505 }
4506
4507 return true;
4508}
4509
4510/* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
4511 it before NEXT_CHANGE. */
4512
4513static struct iv_ca_delta *
4514iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
4515 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
4516{
5ed6ace5 4517 struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
b1b02be2
ZD
4518
4519 change->use = use;
4520 change->old_cp = old_cp;
4521 change->new_cp = new_cp;
4522 change->next_change = next_change;
4523
4524 return change;
4525}
4526
36f5ada1 4527/* Joins two lists of changes L1 and L2. Destructive -- old lists
6c6cfbfd 4528 are rewritten. */
36f5ada1
ZD
4529
4530static struct iv_ca_delta *
4531iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
4532{
4533 struct iv_ca_delta *last;
4534
4535 if (!l2)
4536 return l1;
4537
4538 if (!l1)
4539 return l2;
4540
4541 for (last = l1; last->next_change; last = last->next_change)
4542 continue;
4543 last->next_change = l2;
4544
4545 return l1;
4546}
4547
b1b02be2
ZD
4548/* Returns candidate by that USE is expressed in IVS. */
4549
4550static struct cost_pair *
4551iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
4552{
4553 return ivs->cand_for_use[use->id];
4554}
4555
36f5ada1
ZD
4556/* Reverse the list of changes DELTA, forming the inverse to it. */
4557
4558static struct iv_ca_delta *
4559iv_ca_delta_reverse (struct iv_ca_delta *delta)
4560{
4561 struct iv_ca_delta *act, *next, *prev = NULL;
4562 struct cost_pair *tmp;
4563
4564 for (act = delta; act; act = next)
4565 {
4566 next = act->next_change;
4567 act->next_change = prev;
4568 prev = act;
4569
4570 tmp = act->old_cp;
4571 act->old_cp = act->new_cp;
4572 act->new_cp = tmp;
4573 }
4574
4575 return prev;
4576}
4577
b1b02be2
ZD
4578/* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
4579 reverted instead. */
4580
4581static void
4582iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
4583 struct iv_ca_delta *delta, bool forward)
4584{
4585 struct cost_pair *from, *to;
36f5ada1 4586 struct iv_ca_delta *act;
b1b02be2 4587
36f5ada1
ZD
4588 if (!forward)
4589 delta = iv_ca_delta_reverse (delta);
b1b02be2 4590
36f5ada1
ZD
4591 for (act = delta; act; act = act->next_change)
4592 {
4593 from = act->old_cp;
4594 to = act->new_cp;
4595 gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
4596 iv_ca_set_cp (data, ivs, act->use, to);
8b11a64c 4597 }
36f5ada1
ZD
4598
4599 if (!forward)
4600 iv_ca_delta_reverse (delta);
b1b02be2 4601}
8b11a64c 4602
b1b02be2 4603/* Returns true if CAND is used in IVS. */
8b11a64c 4604
b1b02be2
ZD
4605static bool
4606iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
4607{
4608 return ivs->n_cand_uses[cand->id] > 0;
4609}
8b11a64c 4610
36f5ada1
ZD
4611/* Returns number of induction variable candidates in the set IVS. */
4612
4613static unsigned
4614iv_ca_n_cands (struct iv_ca *ivs)
4615{
4616 return ivs->n_cands;
4617}
4618
b1b02be2
ZD
4619/* Free the list of changes DELTA. */
4620
4621static void
4622iv_ca_delta_free (struct iv_ca_delta **delta)
4623{
4624 struct iv_ca_delta *act, *next;
4625
4626 for (act = *delta; act; act = next)
87c476a2 4627 {
b1b02be2
ZD
4628 next = act->next_change;
4629 free (act);
87c476a2 4630 }
8b11a64c 4631
b1b02be2
ZD
4632 *delta = NULL;
4633}
4634
4635/* Allocates new iv candidates assignment. */
4636
4637static struct iv_ca *
4638iv_ca_new (struct ivopts_data *data)
4639{
5ed6ace5 4640 struct iv_ca *nw = XNEW (struct iv_ca);
8b11a64c 4641
b1b02be2
ZD
4642 nw->upto = 0;
4643 nw->bad_uses = 0;
5ed6ace5
MD
4644 nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
4645 nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
8bdbfff5 4646 nw->cands = BITMAP_ALLOC (NULL);
36f5ada1 4647 nw->n_cands = 0;
b1b02be2
ZD
4648 nw->n_regs = 0;
4649 nw->cand_use_cost = 0;
4650 nw->cand_cost = 0;
5ed6ace5 4651 nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
b1b02be2
ZD
4652 nw->cost = 0;
4653
4654 return nw;
4655}
4656
4657/* Free memory occupied by the set IVS. */
4658
4659static void
4660iv_ca_free (struct iv_ca **ivs)
4661{
4662 free ((*ivs)->cand_for_use);
4663 free ((*ivs)->n_cand_uses);
8bdbfff5 4664 BITMAP_FREE ((*ivs)->cands);
b1b02be2
ZD
4665 free ((*ivs)->n_invariant_uses);
4666 free (*ivs);
4667 *ivs = NULL;
4668}
4669
4670/* Dumps IVS to FILE. */
4671
4672static void
4673iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
4674{
4675 const char *pref = " invariants ";
4676 unsigned i;
4677
4678 fprintf (file, " cost %d\n", iv_ca_cost (ivs));
4679 bitmap_print (file, ivs->cands, " candidates ","\n");
4680
4681 for (i = 1; i <= data->max_inv_id; i++)
4682 if (ivs->n_invariant_uses[i])
4683 {
4684 fprintf (file, "%s%d", pref, i);
4685 pref = ", ";
4686 }
4687 fprintf (file, "\n");
4688}
4689
4690/* Try changing candidate in IVS to CAND for each use. Return cost of the
36f5ada1
ZD
4691 new set, and store differences in DELTA. Number of induction variables
4692 in the new set is stored to N_IVS. */
b1b02be2
ZD
4693
4694static unsigned
4695iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
36f5ada1
ZD
4696 struct iv_cand *cand, struct iv_ca_delta **delta,
4697 unsigned *n_ivs)
b1b02be2
ZD
4698{
4699 unsigned i, cost;
4700 struct iv_use *use;
4701 struct cost_pair *old_cp, *new_cp;
4702
4703 *delta = NULL;
4704 for (i = 0; i < ivs->upto; i++)
4705 {
4706 use = iv_use (data, i);
4707 old_cp = iv_ca_cand_for_use (ivs, use);
4708
4709 if (old_cp
4710 && old_cp->cand == cand)
4711 continue;
4712
4713 new_cp = get_use_iv_cost (data, use, cand);
4714 if (!new_cp)
4715 continue;
4716
4717 if (!iv_ca_has_deps (ivs, new_cp))
4718 continue;
4719
4720 if (!cheaper_cost_pair (new_cp, old_cp))
4721 continue;
4722
4723 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4724 }
4725
4726 iv_ca_delta_commit (data, ivs, *delta, true);
4727 cost = iv_ca_cost (ivs);
36f5ada1
ZD
4728 if (n_ivs)
4729 *n_ivs = iv_ca_n_cands (ivs);
b1b02be2 4730 iv_ca_delta_commit (data, ivs, *delta, false);
8b11a64c
ZD
4731
4732 return cost;
4733}
4734
a0eca485 4735/* Try narrowing set IVS by removing CAND. Return the cost of
b1b02be2 4736 the new set and store the differences in DELTA. */
8b11a64c
ZD
4737
4738static unsigned
b1b02be2
ZD
4739iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
4740 struct iv_cand *cand, struct iv_ca_delta **delta)
8b11a64c 4741{
b1b02be2
ZD
4742 unsigned i, ci;
4743 struct iv_use *use;
4744 struct cost_pair *old_cp, *new_cp, *cp;
4745 bitmap_iterator bi;
4746 struct iv_cand *cnd;
4747 unsigned cost;
4748
4749 *delta = NULL;
4750 for (i = 0; i < n_iv_uses (data); i++)
4751 {
4752 use = iv_use (data, i);
4753
4754 old_cp = iv_ca_cand_for_use (ivs, use);
4755 if (old_cp->cand != cand)
4756 continue;
4757
4758 new_cp = NULL;
4759
4760 if (data->consider_all_candidates)
4761 {
4762 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
4763 {
4764 if (ci == cand->id)
4765 continue;
4766
4767 cnd = iv_cand (data, ci);
4768
4769 cp = get_use_iv_cost (data, use, cnd);
4770 if (!cp)
4771 continue;
4772 if (!iv_ca_has_deps (ivs, cp))
4773 continue;
4774
4775 if (!cheaper_cost_pair (cp, new_cp))
4776 continue;
4777
4778 new_cp = cp;
4779 }
4780 }
4781 else
4782 {
4783 EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
4784 {
4785 if (ci == cand->id)
4786 continue;
4787
4788 cnd = iv_cand (data, ci);
4789
4790 cp = get_use_iv_cost (data, use, cnd);
4791 if (!cp)
4792 continue;
4793 if (!iv_ca_has_deps (ivs, cp))
4794 continue;
4795
4796 if (!cheaper_cost_pair (cp, new_cp))
4797 continue;
4798
4799 new_cp = cp;
4800 }
4801 }
4802
4803 if (!new_cp)
4804 {
4805 iv_ca_delta_free (delta);
4806 return INFTY;
4807 }
4808
4809 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4810 }
4811
4812 iv_ca_delta_commit (data, ivs, *delta, true);
4813 cost = iv_ca_cost (ivs);
4814 iv_ca_delta_commit (data, ivs, *delta, false);
4815
4816 return cost;
8b11a64c
ZD
4817}
4818
36f5ada1
ZD
4819/* Try optimizing the set of candidates IVS by removing candidates different
4820 from to EXCEPT_CAND from it. Return cost of the new set, and store
4821 differences in DELTA. */
4822
4823static unsigned
4824iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
4825 struct iv_cand *except_cand, struct iv_ca_delta **delta)
4826{
4827 bitmap_iterator bi;
4828 struct iv_ca_delta *act_delta, *best_delta;
4829 unsigned i, best_cost, acost;
4830 struct iv_cand *cand;
4831
4832 best_delta = NULL;
4833 best_cost = iv_ca_cost (ivs);
4834
4835 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4836 {
4837 cand = iv_cand (data, i);
4838
4839 if (cand == except_cand)
4840 continue;
4841
4842 acost = iv_ca_narrow (data, ivs, cand, &act_delta);
4843
4844 if (acost < best_cost)
4845 {
4846 best_cost = acost;
4847 iv_ca_delta_free (&best_delta);
4848 best_delta = act_delta;
4849 }
4850 else
4851 iv_ca_delta_free (&act_delta);
4852 }
4853
4854 if (!best_delta)
4855 {
4856 *delta = NULL;
4857 return best_cost;
4858 }
4859
4860 /* Recurse to possibly remove other unnecessary ivs. */
4861 iv_ca_delta_commit (data, ivs, best_delta, true);
4862 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
4863 iv_ca_delta_commit (data, ivs, best_delta, false);
4864 *delta = iv_ca_delta_join (best_delta, *delta);
4865 return best_cost;
4866}
4867
b1b02be2 4868/* Tries to extend the sets IVS in the best possible way in order
8b11a64c
ZD
4869 to express the USE. */
4870
4871static bool
b1b02be2 4872try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
8b11a64c
ZD
4873 struct iv_use *use)
4874{
b1b02be2 4875 unsigned best_cost, act_cost;
8b11a64c 4876 unsigned i;
38b0dcb8
ZD
4877 bitmap_iterator bi;
4878 struct iv_cand *cand;
b1b02be2
ZD
4879 struct iv_ca_delta *best_delta = NULL, *act_delta;
4880 struct cost_pair *cp;
4881
4882 iv_ca_add_use (data, ivs, use);
4883 best_cost = iv_ca_cost (ivs);
8b11a64c 4884
b1b02be2
ZD
4885 cp = iv_ca_cand_for_use (ivs, use);
4886 if (cp)
4887 {
4888 best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
4889 iv_ca_set_no_cp (data, ivs, use);
4890 }
8b11a64c 4891
38b0dcb8
ZD
4892 /* First try important candidates. Only if it fails, try the specific ones.
4893 Rationale -- in loops with many variables the best choice often is to use
4894 just one generic biv. If we added here many ivs specific to the uses,
4895 the optimization algorithm later would be likely to get stuck in a local
4896 minimum, thus causing us to create too many ivs. The approach from
1c170b5e 4897 few ivs to more seems more likely to be successful -- starting from few
38b0dcb8
ZD
4898 ivs, replacing an expensive use by a specific iv should always be a
4899 win. */
4900 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
8b11a64c 4901 {
38b0dcb8
ZD
4902 cand = iv_cand (data, i);
4903
b1b02be2 4904 if (iv_ca_cand_used_p (ivs, cand))
8b11a64c
ZD
4905 continue;
4906
b1b02be2
ZD
4907 cp = get_use_iv_cost (data, use, cand);
4908 if (!cp)
4909 continue;
4910
4911 iv_ca_set_cp (data, ivs, use, cp);
36f5ada1 4912 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
b1b02be2
ZD
4913 iv_ca_set_no_cp (data, ivs, use);
4914 act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
8b11a64c
ZD
4915
4916 if (act_cost < best_cost)
4917 {
4918 best_cost = act_cost;
b1b02be2
ZD
4919
4920 iv_ca_delta_free (&best_delta);
4921 best_delta = act_delta;
8b11a64c 4922 }
b1b02be2
ZD
4923 else
4924 iv_ca_delta_free (&act_delta);
8b11a64c
ZD
4925 }
4926
38b0dcb8
ZD
4927 if (best_cost == INFTY)
4928 {
4929 for (i = 0; i < use->n_map_members; i++)
4930 {
4931 cp = use->cost_map + i;
b1b02be2
ZD
4932 cand = cp->cand;
4933 if (!cand)
38b0dcb8
ZD
4934 continue;
4935
4936 /* Already tried this. */
b1b02be2
ZD
4937 if (cand->important)
4938 continue;
4939
4940 if (iv_ca_cand_used_p (ivs, cand))
38b0dcb8
ZD
4941 continue;
4942
b1b02be2
ZD
4943 act_delta = NULL;
4944 iv_ca_set_cp (data, ivs, use, cp);
36f5ada1 4945 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
b1b02be2
ZD
4946 iv_ca_set_no_cp (data, ivs, use);
4947 act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
4948 cp, act_delta);
38b0dcb8
ZD
4949
4950 if (act_cost < best_cost)
4951 {
4952 best_cost = act_cost;
b1b02be2
ZD
4953
4954 if (best_delta)
4955 iv_ca_delta_free (&best_delta);
4956 best_delta = act_delta;
38b0dcb8 4957 }
b1b02be2
ZD
4958 else
4959 iv_ca_delta_free (&act_delta);
38b0dcb8
ZD
4960 }
4961 }
4962
b1b02be2
ZD
4963 iv_ca_delta_commit (data, ivs, best_delta, true);
4964 iv_ca_delta_free (&best_delta);
8b11a64c
ZD
4965
4966 return (best_cost != INFTY);
4967}
4968
b1b02be2 4969/* Finds an initial assignment of candidates to uses. */
8b11a64c 4970
b1b02be2
ZD
4971static struct iv_ca *
4972get_initial_solution (struct ivopts_data *data)
8b11a64c 4973{
b1b02be2 4974 struct iv_ca *ivs = iv_ca_new (data);
8b11a64c
ZD
4975 unsigned i;
4976
4977 for (i = 0; i < n_iv_uses (data); i++)
b1b02be2
ZD
4978 if (!try_add_cand_for (data, ivs, iv_use (data, i)))
4979 {
4980 iv_ca_free (&ivs);
4981 return NULL;
4982 }
8b11a64c 4983
b1b02be2 4984 return ivs;
8b11a64c
ZD
4985}
4986
b1b02be2 4987/* Tries to improve set of induction variables IVS. */
8b11a64c
ZD
4988
4989static bool
b1b02be2 4990try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
8b11a64c 4991{
36f5ada1
ZD
4992 unsigned i, acost, best_cost = iv_ca_cost (ivs), n_ivs;
4993 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
b1b02be2 4994 struct iv_cand *cand;
8b11a64c 4995
36f5ada1 4996 /* Try extending the set of induction variables by one. */
8b11a64c
ZD
4997 for (i = 0; i < n_iv_cands (data); i++)
4998 {
b1b02be2
ZD
4999 cand = iv_cand (data, i);
5000
5001 if (iv_ca_cand_used_p (ivs, cand))
36f5ada1
ZD
5002 continue;
5003
5004 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs);
5005 if (!act_delta)
5006 continue;
5007
5008 /* If we successfully added the candidate and the set is small enough,
5009 try optimizing it by removing other candidates. */
5010 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
5011 {
5012 iv_ca_delta_commit (data, ivs, act_delta, true);
5013 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
5014 iv_ca_delta_commit (data, ivs, act_delta, false);
5015 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
5016 }
8b11a64c 5017
b1b02be2 5018 if (acost < best_cost)
8b11a64c 5019 {
b1b02be2 5020 best_cost = acost;
36f5ada1 5021 iv_ca_delta_free (&best_delta);
b1b02be2 5022 best_delta = act_delta;
8b11a64c 5023 }
8b11a64c 5024 else
b1b02be2 5025 iv_ca_delta_free (&act_delta);
8b11a64c
ZD
5026 }
5027
b1b02be2 5028 if (!best_delta)
36f5ada1
ZD
5029 {
5030 /* Try removing the candidates from the set instead. */
5031 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
5032
5033 /* Nothing more we can do. */
5034 if (!best_delta)
5035 return false;
5036 }
8b11a64c 5037
b1b02be2 5038 iv_ca_delta_commit (data, ivs, best_delta, true);
36f5ada1 5039 gcc_assert (best_cost == iv_ca_cost (ivs));
b1b02be2 5040 iv_ca_delta_free (&best_delta);
8b11a64c
ZD
5041 return true;
5042}
5043
5044/* Attempts to find the optimal set of induction variables. We do simple
5045 greedy heuristic -- we try to replace at most one candidate in the selected
5046 solution and remove the unused ivs while this improves the cost. */
5047
b1b02be2 5048static struct iv_ca *
8b11a64c
ZD
5049find_optimal_iv_set (struct ivopts_data *data)
5050{
b1b02be2
ZD
5051 unsigned i;
5052 struct iv_ca *set;
8b11a64c
ZD
5053 struct iv_use *use;
5054
b1b02be2
ZD
5055 /* Get the initial solution. */
5056 set = get_initial_solution (data);
5057 if (!set)
8b11a64c
ZD
5058 {
5059 if (dump_file && (dump_flags & TDF_DETAILS))
5060 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
8b11a64c
ZD
5061 return NULL;
5062 }
5063
5064 if (dump_file && (dump_flags & TDF_DETAILS))
5065 {
b1b02be2
ZD
5066 fprintf (dump_file, "Initial set of candidates:\n");
5067 iv_ca_dump (data, dump_file, set);
8b11a64c
ZD
5068 }
5069
b1b02be2 5070 while (try_improve_iv_set (data, set))
8b11a64c
ZD
5071 {
5072 if (dump_file && (dump_flags & TDF_DETAILS))
5073 {
b1b02be2
ZD
5074 fprintf (dump_file, "Improved to:\n");
5075 iv_ca_dump (data, dump_file, set);
8b11a64c
ZD
5076 }
5077 }
5078
5079 if (dump_file && (dump_flags & TDF_DETAILS))
b1b02be2 5080 fprintf (dump_file, "Final cost %d\n\n", iv_ca_cost (set));
8b11a64c
ZD
5081
5082 for (i = 0; i < n_iv_uses (data); i++)
5083 {
5084 use = iv_use (data, i);
b1b02be2 5085 use->selected = iv_ca_cand_for_use (set, use)->cand;
8b11a64c
ZD
5086 }
5087
8b11a64c
ZD
5088 return set;
5089}
5090
5091/* Creates a new induction variable corresponding to CAND. */
5092
5093static void
5094create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
5095{
5096 block_stmt_iterator incr_pos;
5097 tree base;
5098 bool after = false;
5099
5100 if (!cand->iv)
5101 return;
5102
5103 switch (cand->pos)
5104 {
5105 case IP_NORMAL:
5106 incr_pos = bsi_last (ip_normal_pos (data->current_loop));
5107 break;
5108
5109 case IP_END:
5110 incr_pos = bsi_last (ip_end_pos (data->current_loop));
5111 after = true;
5112 break;
5113
5114 case IP_ORIGINAL:
5115 /* Mark that the iv is preserved. */
5116 name_info (data, cand->var_before)->preserve_biv = true;
5117 name_info (data, cand->var_after)->preserve_biv = true;
5118
5119 /* Rewrite the increment so that it uses var_before directly. */
5120 find_interesting_uses_op (data, cand->var_after)->selected = cand;
5121
5122 return;
5123 }
5124
5125 gimple_add_tmp_var (cand->var_before);
5126 add_referenced_tmp_var (cand->var_before);
5127
5128 base = unshare_expr (cand->iv->base);
5129
9be872b7
ZD
5130 create_iv (base, unshare_expr (cand->iv->step),
5131 cand->var_before, data->current_loop,
8b11a64c
ZD
5132 &incr_pos, after, &cand->var_before, &cand->var_after);
5133}
5134
5135/* Creates new induction variables described in SET. */
5136
5137static void
b1b02be2 5138create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
8b11a64c
ZD
5139{
5140 unsigned i;
5141 struct iv_cand *cand;
87c476a2 5142 bitmap_iterator bi;
8b11a64c 5143
b1b02be2 5144 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
8b11a64c
ZD
5145 {
5146 cand = iv_cand (data, i);
5147 create_new_iv (data, cand);
87c476a2 5148 }
8b11a64c
ZD
5149}
5150
5151/* Removes statement STMT (real or a phi node). If INCLUDING_DEFINED_NAME
5152 is true, remove also the ssa name defined by the statement. */
5153
5154static void
5155remove_statement (tree stmt, bool including_defined_name)
5156{
5157 if (TREE_CODE (stmt) == PHI_NODE)
5158 {
5159 if (!including_defined_name)
5160 {
5161 /* Prevent the ssa name defined by the statement from being removed. */
5162 SET_PHI_RESULT (stmt, NULL);
5163 }
d19e3ef6 5164 remove_phi_node (stmt, NULL_TREE);
8b11a64c
ZD
5165 }
5166 else
5167 {
1a1804c2 5168 block_stmt_iterator bsi = bsi_for_stmt (stmt);
8b11a64c 5169
736432ee 5170 bsi_remove (&bsi, true);
8b11a64c
ZD
5171 }
5172}
5173
5174/* Rewrites USE (definition of iv used in a nonlinear expression)
5175 using candidate CAND. */
5176
5177static void
5178rewrite_use_nonlinear_expr (struct ivopts_data *data,
5179 struct iv_use *use, struct iv_cand *cand)
5180{
3520b745 5181 tree comp;
8b11a64c
ZD
5182 tree op, stmts, tgt, ass;
5183 block_stmt_iterator bsi, pbsi;
3520b745
ZD
5184
5185 /* An important special case -- if we are asked to express value of
5186 the original iv by itself, just exit; there is no need to
5187 introduce a new computation (that might also need casting the
5188 variable to unsigned and back). */
5189 if (cand->pos == IP_ORIGINAL
7b9d4f70 5190 && cand->incremented_at == use->stmt)
3520b745 5191 {
7b9d4f70
ZD
5192 tree step, ctype, utype;
5193 enum tree_code incr_code = PLUS_EXPR;
5194
5195 gcc_assert (TREE_CODE (use->stmt) == MODIFY_EXPR);
5196 gcc_assert (TREE_OPERAND (use->stmt, 0) == cand->var_after);
5197
5198 step = cand->iv->step;
5199 ctype = TREE_TYPE (step);
5200 utype = TREE_TYPE (cand->var_after);
5201 if (TREE_CODE (step) == NEGATE_EXPR)
5202 {
5203 incr_code = MINUS_EXPR;
5204 step = TREE_OPERAND (step, 0);
5205 }
5206
5207 /* Check whether we may leave the computation unchanged.
5208 This is the case only if it does not rely on other
5209 computations in the loop -- otherwise, the computation
5210 we rely upon may be removed in remove_unused_ivs,
5211 thus leading to ICE. */
3520b745 5212 op = TREE_OPERAND (use->stmt, 1);
7b9d4f70
ZD
5213 if (TREE_CODE (op) == PLUS_EXPR
5214 || TREE_CODE (op) == MINUS_EXPR)
5215 {
5216 if (TREE_OPERAND (op, 0) == cand->var_before)
5217 op = TREE_OPERAND (op, 1);
5218 else if (TREE_CODE (op) == PLUS_EXPR
5219 && TREE_OPERAND (op, 1) == cand->var_before)
5220 op = TREE_OPERAND (op, 0);
5221 else
5222 op = NULL_TREE;
5223 }
5224 else
5225 op = NULL_TREE;
3520b745 5226
7b9d4f70
ZD
5227 if (op
5228 && (TREE_CODE (op) == INTEGER_CST
5229 || operand_equal_p (op, step, 0)))
3520b745 5230 return;
7b9d4f70
ZD
5231
5232 /* Otherwise, add the necessary computations to express
5233 the iv. */
5234 op = fold_convert (ctype, cand->var_before);
5235 comp = fold_convert (utype,
5236 build2 (incr_code, ctype, op,
5237 unshare_expr (step)));
3520b745 5238 }
7b9d4f70
ZD
5239 else
5240 comp = get_computation (data->current_loop, use, cand);
3520b745 5241
1e128c5f 5242 switch (TREE_CODE (use->stmt))
8b11a64c 5243 {
1e128c5f 5244 case PHI_NODE:
8b11a64c
ZD
5245 tgt = PHI_RESULT (use->stmt);
5246
5247 /* If we should keep the biv, do not replace it. */
5248 if (name_info (data, tgt)->preserve_biv)
5249 return;
5250
5251 pbsi = bsi = bsi_start (bb_for_stmt (use->stmt));
5252 while (!bsi_end_p (pbsi)
5253 && TREE_CODE (bsi_stmt (pbsi)) == LABEL_EXPR)
5254 {
5255 bsi = pbsi;
5256 bsi_next (&pbsi);
5257 }
1e128c5f
GB
5258 break;
5259
5260 case MODIFY_EXPR:
8b11a64c 5261 tgt = TREE_OPERAND (use->stmt, 0);
1a1804c2 5262 bsi = bsi_for_stmt (use->stmt);
1e128c5f
GB
5263 break;
5264
5265 default:
5266 gcc_unreachable ();
8b11a64c 5267 }
8b11a64c
ZD
5268
5269 op = force_gimple_operand (comp, &stmts, false, SSA_NAME_VAR (tgt));
5270
5271 if (TREE_CODE (use->stmt) == PHI_NODE)
5272 {
5273 if (stmts)
5274 bsi_insert_after (&bsi, stmts, BSI_CONTINUE_LINKING);
5275 ass = build2 (MODIFY_EXPR, TREE_TYPE (tgt), tgt, op);
5276 bsi_insert_after (&bsi, ass, BSI_NEW_STMT);
5277 remove_statement (use->stmt, false);
5278 SSA_NAME_DEF_STMT (tgt) = ass;
5279 }
5280 else
5281 {
5282 if (stmts)
5283 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5284 TREE_OPERAND (use->stmt, 1) = op;
5285 }
5286}
5287
5288/* Replaces ssa name in index IDX by its basic variable. Callback for
5289 for_each_index. */
5290
5291static bool
be35cf60 5292idx_remove_ssa_names (tree base, tree *idx,
8b11a64c
ZD
5293 void *data ATTRIBUTE_UNUSED)
5294{
be35cf60
ZD
5295 tree *op;
5296
8b11a64c
ZD
5297 if (TREE_CODE (*idx) == SSA_NAME)
5298 *idx = SSA_NAME_VAR (*idx);
be35cf60
ZD
5299
5300 if (TREE_CODE (base) == ARRAY_REF)
5301 {
5302 op = &TREE_OPERAND (base, 2);
5303 if (*op
5304 && TREE_CODE (*op) == SSA_NAME)
5305 *op = SSA_NAME_VAR (*op);
5306 op = &TREE_OPERAND (base, 3);
5307 if (*op
5308 && TREE_CODE (*op) == SSA_NAME)
5309 *op = SSA_NAME_VAR (*op);
5310 }
5311
8b11a64c
ZD
5312 return true;
5313}
5314
5315/* Unshares REF and replaces ssa names inside it by their basic variables. */
5316
5317static tree
5318unshare_and_remove_ssa_names (tree ref)
5319{
5320 ref = unshare_expr (ref);
5321 for_each_index (&ref, idx_remove_ssa_names, NULL);
5322
5323 return ref;
5324}
5325
ac182688
ZD
5326/* Extract the alias analysis info for the memory reference REF. There are
5327 several ways how this information may be stored and what precisely is
5328 its semantics depending on the type of the reference, but there always is
5329 somewhere hidden one _DECL node that is used to determine the set of
5330 virtual operands for the reference. The code below deciphers this jungle
5331 and extracts this single useful piece of information. */
8b11a64c 5332
ac182688 5333static tree
9be7ee44 5334get_ref_tag (tree ref, tree orig)
8b11a64c 5335{
ac182688 5336 tree var = get_base_address (ref);
9be7ee44
ZD
5337 tree aref = NULL_TREE, tag, sv;
5338 HOST_WIDE_INT offset, size, maxsize;
5339
5340 for (sv = orig; handled_component_p (sv); sv = TREE_OPERAND (sv, 0))
5341 {
5342 aref = get_ref_base_and_extent (sv, &offset, &size, &maxsize);
5343 if (ref)
5344 break;
5345 }
5346
5347 if (aref && SSA_VAR_P (aref) && get_subvars_for_var (aref))
5348 return unshare_expr (sv);
7ccf35ed 5349
ac182688
ZD
5350 if (!var)
5351 return NULL_TREE;
be35cf60 5352
be35cf60 5353 if (TREE_CODE (var) == INDIRECT_REF)
8b11a64c 5354 {
18cd8a03
DN
5355 /* If the base is a dereference of a pointer, first check its name memory
5356 tag. If it does not have one, use its symbol memory tag. */
2360a4c1
ZD
5357 var = TREE_OPERAND (var, 0);
5358 if (TREE_CODE (var) != SSA_NAME)
5359 return NULL_TREE;
5360
ac182688
ZD
5361 if (SSA_NAME_PTR_INFO (var))
5362 {
5363 tag = SSA_NAME_PTR_INFO (var)->name_mem_tag;
5364 if (tag)
5365 return tag;
5366 }
5367
8b11a64c 5368 var = SSA_NAME_VAR (var);
18cd8a03 5369 tag = var_ann (var)->symbol_mem_tag;
2360a4c1
ZD
5370 gcc_assert (tag != NULL_TREE);
5371 return tag;
8b11a64c 5372 }
2360a4c1
ZD
5373 else
5374 {
5375 if (!DECL_P (var))
5376 return NULL_TREE;
5377
18cd8a03 5378 tag = var_ann (var)->symbol_mem_tag;
0bca51f0 5379 if (tag)
ac182688 5380 return tag;
8b11a64c 5381
ac182688
ZD
5382 return var;
5383 }
ac182688 5384}
7ccf35ed 5385
ac182688 5386/* Copies the reference information from OLD_REF to NEW_REF. */
0bca51f0 5387
ac182688
ZD
5388static void
5389copy_ref_info (tree new_ref, tree old_ref)
5390{
5391 if (TREE_CODE (old_ref) == TARGET_MEM_REF)
5392 copy_mem_ref_info (new_ref, old_ref);
5393 else
5394 {
ac182688 5395 TMR_ORIGINAL (new_ref) = unshare_and_remove_ssa_names (old_ref);
9be7ee44 5396 TMR_TAG (new_ref) = get_ref_tag (old_ref, TMR_ORIGINAL (new_ref));
ac182688 5397 }
8b11a64c
ZD
5398}
5399
5400/* Rewrites USE (address that is an iv) using candidate CAND. */
5401
5402static void
5403rewrite_use_address (struct ivopts_data *data,
5404 struct iv_use *use, struct iv_cand *cand)
5405{
ac182688 5406 struct affine_tree_combination aff;
1a1804c2 5407 block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
ac182688 5408 tree ref;
8b11a64c 5409
ac182688
ZD
5410 get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
5411 unshare_aff_combination (&aff);
8b11a64c 5412
ac182688
ZD
5413 ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff);
5414 copy_ref_info (ref, *use->op_p);
5415 *use->op_p = ref;
8b11a64c
ZD
5416}
5417
5418/* Rewrites USE (the condition such that one of the arguments is an iv) using
5419 candidate CAND. */
5420
5421static void
5422rewrite_use_compare (struct ivopts_data *data,
5423 struct iv_use *use, struct iv_cand *cand)
5424{
5425 tree comp;
5426 tree *op_p, cond, op, stmts, bound;
1a1804c2 5427 block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
8b11a64c 5428 enum tree_code compare;
f5f12961 5429 struct cost_pair *cp = get_use_iv_cost (data, use, cand);
8b11a64c 5430
f5f12961
ZD
5431 bound = cp->value;
5432 if (bound)
8b11a64c 5433 {
9e7376e5
ZD
5434 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
5435 tree var_type = TREE_TYPE (var);
5436
f5f12961 5437 compare = iv_elimination_compare (data, use);
9e7376e5 5438 bound = fold_convert (var_type, bound);
8b11a64c
ZD
5439 op = force_gimple_operand (unshare_expr (bound), &stmts,
5440 true, NULL_TREE);
5441
5442 if (stmts)
5443 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5444
9e7376e5 5445 *use->op_p = build2 (compare, boolean_type_node, var, op);
f430bae8 5446 update_stmt (use->stmt);
8b11a64c
ZD
5447 return;
5448 }
5449
5450 /* The induction variable elimination failed; just express the original
5451 giv. */
ac182688 5452 comp = get_computation (data->current_loop, use, cand);
8b11a64c
ZD
5453
5454 cond = *use->op_p;
5455 op_p = &TREE_OPERAND (cond, 0);
5456 if (TREE_CODE (*op_p) != SSA_NAME
5457 || zero_p (get_iv (data, *op_p)->step))
5458 op_p = &TREE_OPERAND (cond, 1);
5459
5460 op = force_gimple_operand (comp, &stmts, true, SSA_NAME_VAR (*op_p));
5461 if (stmts)
5462 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5463
5464 *op_p = op;
5465}
5466
5467/* Ensure that operand *OP_P may be used at the end of EXIT without
5468 violating loop closed ssa form. */
5469
5470static void
5471protect_loop_closed_ssa_form_use (edge exit, use_operand_p op_p)
5472{
5473 basic_block def_bb;
5474 struct loop *def_loop;
5475 tree phi, use;
5476
5477 use = USE_FROM_PTR (op_p);
5478 if (TREE_CODE (use) != SSA_NAME)
5479 return;
5480
5481 def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (use));
5482 if (!def_bb)
5483 return;
5484
5485 def_loop = def_bb->loop_father;
5486 if (flow_bb_inside_loop_p (def_loop, exit->dest))
5487 return;
5488
5489 /* Try finding a phi node that copies the value out of the loop. */
bb29d951 5490 for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
8b11a64c
ZD
5491 if (PHI_ARG_DEF_FROM_EDGE (phi, exit) == use)
5492 break;
5493
5494 if (!phi)
5495 {
5496 /* Create such a phi node. */
5497 tree new_name = duplicate_ssa_name (use, NULL);
5498
5499 phi = create_phi_node (new_name, exit->dest);
5500 SSA_NAME_DEF_STMT (new_name) = phi;
d2e398df 5501 add_phi_arg (phi, use, exit);
8b11a64c
ZD
5502 }
5503
5504 SET_USE (op_p, PHI_RESULT (phi));
5505}
5506
5507/* Ensure that operands of STMT may be used at the end of EXIT without
5508 violating loop closed ssa form. */
5509
5510static void
5511protect_loop_closed_ssa_form (edge exit, tree stmt)
5512{
f47c96aa
AM
5513 ssa_op_iter iter;
5514 use_operand_p use_p;
8b11a64c 5515
f47c96aa
AM
5516 FOR_EACH_SSA_USE_OPERAND (use_p, stmt, iter, SSA_OP_ALL_USES)
5517 protect_loop_closed_ssa_form_use (exit, use_p);
8b11a64c
ZD
5518}
5519
5520/* STMTS compute a value of a phi argument OP on EXIT of a loop. Arrange things
5521 so that they are emitted on the correct place, and so that the loop closed
5522 ssa form is preserved. */
5523
3ac01fde 5524void
8b11a64c
ZD
5525compute_phi_arg_on_exit (edge exit, tree stmts, tree op)
5526{
5527 tree_stmt_iterator tsi;
5528 block_stmt_iterator bsi;
5529 tree phi, stmt, def, next;
5530
c5cbcccf 5531 if (!single_pred_p (exit->dest))
8b11a64c
ZD
5532 split_loop_exit_edge (exit);
5533
f430bae8
AM
5534 /* Ensure there is label in exit->dest, so that we can
5535 insert after it. */
5536 tree_block_label (exit->dest);
5537 bsi = bsi_after_labels (exit->dest);
5538
8b11a64c
ZD
5539 if (TREE_CODE (stmts) == STATEMENT_LIST)
5540 {
5541 for (tsi = tsi_start (stmts); !tsi_end_p (tsi); tsi_next (&tsi))
f430bae8 5542 {
35771d34
PB
5543 tree stmt = tsi_stmt (tsi);
5544 bsi_insert_before (&bsi, stmt, BSI_SAME_STMT);
5545 protect_loop_closed_ssa_form (exit, stmt);
f430bae8 5546 }
8b11a64c
ZD
5547 }
5548 else
f430bae8 5549 {
35771d34
PB
5550 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
5551 protect_loop_closed_ssa_form (exit, stmts);
f430bae8 5552 }
8b11a64c
ZD
5553
5554 if (!op)
5555 return;
5556
5557 for (phi = phi_nodes (exit->dest); phi; phi = next)
5558 {
eaf0dc02 5559 next = PHI_CHAIN (phi);
8b11a64c
ZD
5560
5561 if (PHI_ARG_DEF_FROM_EDGE (phi, exit) == op)
5562 {
5563 def = PHI_RESULT (phi);
5564 remove_statement (phi, false);
5565 stmt = build2 (MODIFY_EXPR, TREE_TYPE (op),
5566 def, op);
5567 SSA_NAME_DEF_STMT (def) = stmt;
35771d34 5568 bsi_insert_before (&bsi, stmt, BSI_SAME_STMT);
8b11a64c
ZD
5569 }
5570 }
5571}
5572
8b11a64c
ZD
5573/* Rewrites USE using candidate CAND. */
5574
5575static void
5576rewrite_use (struct ivopts_data *data,
5577 struct iv_use *use, struct iv_cand *cand)
5578{
5579 switch (use->type)
5580 {
5581 case USE_NONLINEAR_EXPR:
5582 rewrite_use_nonlinear_expr (data, use, cand);
5583 break;
5584
8b11a64c
ZD
5585 case USE_ADDRESS:
5586 rewrite_use_address (data, use, cand);
5587 break;
5588
5589 case USE_COMPARE:
5590 rewrite_use_compare (data, use, cand);
5591 break;
5592
5593 default:
1e128c5f 5594 gcc_unreachable ();
8b11a64c 5595 }
758cf3f2 5596 mark_new_vars_to_rename (use->stmt);
8b11a64c
ZD
5597}
5598
5599/* Rewrite the uses using the selected induction variables. */
5600
5601static void
5602rewrite_uses (struct ivopts_data *data)
5603{
5604 unsigned i;
5605 struct iv_cand *cand;
5606 struct iv_use *use;
5607
5608 for (i = 0; i < n_iv_uses (data); i++)
5609 {
5610 use = iv_use (data, i);
5611 cand = use->selected;
1e128c5f 5612 gcc_assert (cand);
8b11a64c
ZD
5613
5614 rewrite_use (data, use, cand);
5615 }
5616}
5617
5618/* Removes the ivs that are not used after rewriting. */
5619
5620static void
5621remove_unused_ivs (struct ivopts_data *data)
5622{
5623 unsigned j;
87c476a2 5624 bitmap_iterator bi;
8b11a64c 5625
87c476a2 5626 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
8b11a64c
ZD
5627 {
5628 struct version_info *info;
5629
5630 info = ver_info (data, j);
5631 if (info->iv
5632 && !zero_p (info->iv->step)
5633 && !info->inv_id
5634 && !info->iv->have_use_for
5635 && !info->preserve_biv)
5636 remove_statement (SSA_NAME_DEF_STMT (info->iv->ssa_name), true);
87c476a2 5637 }
8b11a64c
ZD
5638}
5639
5640/* Frees data allocated by the optimization of a single loop. */
5641
5642static void
5643free_loop_data (struct ivopts_data *data)
5644{
5645 unsigned i, j;
87c476a2 5646 bitmap_iterator bi;
69ebd99d 5647 tree obj;
8b11a64c 5648
ca4c3169
ZD
5649 htab_empty (data->niters);
5650
87c476a2 5651 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
8b11a64c
ZD
5652 {
5653 struct version_info *info;
5654
5655 info = ver_info (data, i);
5656 if (info->iv)
5657 free (info->iv);
5658 info->iv = NULL;
5659 info->has_nonlin_use = false;
5660 info->preserve_biv = false;
5661 info->inv_id = 0;
87c476a2 5662 }
8b11a64c 5663 bitmap_clear (data->relevant);
b1b02be2 5664 bitmap_clear (data->important_candidates);
8b11a64c
ZD
5665
5666 for (i = 0; i < n_iv_uses (data); i++)
5667 {
5668 struct iv_use *use = iv_use (data, i);
5669
5670 free (use->iv);
8bdbfff5 5671 BITMAP_FREE (use->related_cands);
8b11a64c
ZD
5672 for (j = 0; j < use->n_map_members; j++)
5673 if (use->cost_map[j].depends_on)
8bdbfff5 5674 BITMAP_FREE (use->cost_map[j].depends_on);
8b11a64c
ZD
5675 free (use->cost_map);
5676 free (use);
5677 }
69ebd99d 5678 VEC_truncate (iv_use_p, data->iv_uses, 0);
8b11a64c
ZD
5679
5680 for (i = 0; i < n_iv_cands (data); i++)
5681 {
5682 struct iv_cand *cand = iv_cand (data, i);
5683
5684 if (cand->iv)
5685 free (cand->iv);
9be872b7
ZD
5686 if (cand->depends_on)
5687 BITMAP_FREE (cand->depends_on);
8b11a64c
ZD
5688 free (cand);
5689 }
69ebd99d 5690 VEC_truncate (iv_cand_p, data->iv_candidates, 0);
8b11a64c
ZD
5691
5692 if (data->version_info_size < num_ssa_names)
5693 {
5694 data->version_info_size = 2 * num_ssa_names;
5695 free (data->version_info);
5ed6ace5 5696 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
8b11a64c
ZD
5697 }
5698
5699 data->max_inv_id = 0;
5700
69ebd99d
KH
5701 for (i = 0; VEC_iterate (tree, decl_rtl_to_reset, i, obj); i++)
5702 SET_DECL_RTL (obj, NULL_RTX);
8b11a64c 5703
69ebd99d 5704 VEC_truncate (tree, decl_rtl_to_reset, 0);
8b11a64c
ZD
5705}
5706
5707/* Finalizes data structures used by the iv optimization pass. LOOPS is the
5708 loop tree. */
5709
5710static void
9a2ef6b8 5711tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
8b11a64c 5712{
8b11a64c
ZD
5713 free_loop_data (data);
5714 free (data->version_info);
8bdbfff5
NS
5715 BITMAP_FREE (data->relevant);
5716 BITMAP_FREE (data->important_candidates);
ca4c3169 5717 htab_delete (data->niters);
8b11a64c 5718
69ebd99d
KH
5719 VEC_free (tree, heap, decl_rtl_to_reset);
5720 VEC_free (iv_use_p, heap, data->iv_uses);
5721 VEC_free (iv_cand_p, heap, data->iv_candidates);
8b11a64c
ZD
5722}
5723
5724/* Optimizes the LOOP. Returns true if anything changed. */
5725
5726static bool
5727tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
5728{
5729 bool changed = false;
b1b02be2 5730 struct iv_ca *iv_ca;
8b11a64c
ZD
5731 edge exit;
5732
5733 data->current_loop = loop;
5734
5735 if (dump_file && (dump_flags & TDF_DETAILS))
5736 {
5737 fprintf (dump_file, "Processing loop %d\n", loop->num);
5738
5739 exit = single_dom_exit (loop);
5740 if (exit)
5741 {
5742 fprintf (dump_file, " single exit %d -> %d, exit condition ",
5743 exit->src->index, exit->dest->index);
5744 print_generic_expr (dump_file, last_stmt (exit->src), TDF_SLIM);
5745 fprintf (dump_file, "\n");
5746 }
5747
5748 fprintf (dump_file, "\n");
5749 }
5750
5751 /* For each ssa name determines whether it behaves as an induction variable
5752 in some loop. */
5753 if (!find_induction_variables (data))
5754 goto finish;
5755
5756 /* Finds interesting uses (item 1). */
5757 find_interesting_uses (data);
5758 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
5759 goto finish;
5760
5761 /* Finds candidates for the induction variables (item 2). */
5762 find_iv_candidates (data);
5763
5764 /* Calculates the costs (item 3, part 1). */
5765 determine_use_iv_costs (data);
5766 determine_iv_costs (data);
5767 determine_set_costs (data);
5768
5769 /* Find the optimal set of induction variables (item 3, part 2). */
b1b02be2
ZD
5770 iv_ca = find_optimal_iv_set (data);
5771 if (!iv_ca)
8b11a64c
ZD
5772 goto finish;
5773 changed = true;
5774
5775 /* Create the new induction variables (item 4, part 1). */
b1b02be2
ZD
5776 create_new_ivs (data, iv_ca);
5777 iv_ca_free (&iv_ca);
8b11a64c
ZD
5778
5779 /* Rewrite the uses (item 4, part 2). */
5780 rewrite_uses (data);
5781
5782 /* Remove the ivs that are unused after rewriting. */
5783 remove_unused_ivs (data);
5784
8b11a64c
ZD
5785 /* We have changed the structure of induction variables; it might happen
5786 that definitions in the scev database refer to some of them that were
5787 eliminated. */
5788 scev_reset ();
5789
5790finish:
5791 free_loop_data (data);
5792
5793 return changed;
5794}
5795
5796/* Main entry point. Optimizes induction variables in LOOPS. */
5797
5798void
5799tree_ssa_iv_optimize (struct loops *loops)
5800{
5801 struct loop *loop;
5802 struct ivopts_data data;
5803
9a2ef6b8 5804 tree_ssa_iv_optimize_init (&data);
8b11a64c
ZD
5805
5806 /* Optimize the loops starting with the innermost ones. */
5807 loop = loops->tree_root;
5808 while (loop->inner)
5809 loop = loop->inner;
5810
8b11a64c
ZD
5811 /* Scan the loops, inner ones first. */
5812 while (loop != loops->tree_root)
5813 {
8679c649
JH
5814 if (dump_file && (dump_flags & TDF_DETAILS))
5815 flow_loop_dump (loop, dump_file, NULL, 1);
e9472263
ZD
5816
5817 tree_ssa_iv_optimize_loop (&data, loop);
8b11a64c
ZD
5818
5819 if (loop->next)
5820 {
5821 loop = loop->next;
5822 while (loop->inner)
5823 loop = loop->inner;
5824 }
5825 else
5826 loop = loop->outer;
5827 }
5828
9a2ef6b8 5829 tree_ssa_iv_optimize_finalize (&data);
8b11a64c 5830}