]> git.ipfire.org Git - thirdparty/gcc.git/blame - gcc/tree-ssa-loop-ivopts.c
tree-flow-inline.h (get_stmt_operands): Remove.
[thirdparty/gcc.git] / gcc / tree-ssa-loop-ivopts.c
CommitLineData
8b11a64c 1/* Induction variable optimizations.
ad616de1 2 Copyright (C) 2003, 2004, 2005 Free Software Foundation, Inc.
8b11a64c
ZD
3
4This file is part of GCC.
5
6GCC is free software; you can redistribute it and/or modify it
7under the terms of the GNU General Public License as published by the
8Free Software Foundation; either version 2, or (at your option) any
9later version.
10
11GCC is distributed in the hope that it will be useful, but WITHOUT
12ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14for more details.
15
16You should have received a copy of the GNU General Public License
17along with GCC; see the file COPYING. If not, write to the Free
18Software Foundation, 59 Temple Place - Suite 330, Boston, MA
1902111-1307, USA. */
20
21/* This pass tries to find the optimal set of induction variables for the loop.
22 It optimizes just the basic linear induction variables (although adding
23 support for other types should not be too hard). It includes the
24 optimizations commonly known as strength reduction, induction variable
25 coalescing and induction variable elimination. It does it in the
26 following steps:
27
28 1) The interesting uses of induction variables are found. This includes
29
30 -- uses of induction variables in non-linear expressions
31 -- addresses of arrays
32 -- comparisons of induction variables
33
34 2) Candidates for the induction variables are found. This includes
35
36 -- old induction variables
37 -- the variables defined by expressions derived from the "interesting
38 uses" above
39
40 3) The optimal (w.r. to a cost function) set of variables is chosen. The
41 cost function assigns a cost to sets of induction variables and consists
42 of three parts:
43
44 -- The use costs. Each of the interesting uses chooses the best induction
45 variable in the set and adds its cost to the sum. The cost reflects
46 the time spent on modifying the induction variables value to be usable
47 for the given purpose (adding base and offset for arrays, etc.).
48 -- The variable costs. Each of the variables has a cost assigned that
49 reflects the costs associated with incrementing the value of the
50 variable. The original variables are somewhat preferred.
51 -- The set cost. Depending on the size of the set, extra cost may be
52 added to reflect register pressure.
53
54 All the costs are defined in a machine-specific way, using the target
55 hooks and machine descriptions to determine them.
56
57 4) The trees are transformed to use the new variables, the dead code is
58 removed.
59
60 All of this is done loop by loop. Doing it globally is theoretically
61 possible, it might give a better performance and it might enable us
62 to decide costs more precisely, but getting all the interactions right
63 would be complicated. */
64
65#include "config.h"
66#include "system.h"
67#include "coretypes.h"
68#include "tm.h"
69#include "tree.h"
70#include "rtl.h"
71#include "tm_p.h"
72#include "hard-reg-set.h"
73#include "basic-block.h"
74#include "output.h"
75#include "diagnostic.h"
76#include "tree-flow.h"
77#include "tree-dump.h"
78#include "timevar.h"
79#include "cfgloop.h"
80#include "varray.h"
81#include "expr.h"
82#include "tree-pass.h"
83#include "ggc.h"
84#include "insn-config.h"
85#include "recog.h"
86#include "hashtab.h"
87#include "tree-chrec.h"
88#include "tree-scalar-evolution.h"
89#include "cfgloop.h"
90#include "params.h"
39b4020c 91#include "langhooks.h"
8b11a64c
ZD
92
93/* The infinite cost. */
94#define INFTY 10000000
95
96/* The expected number of loop iterations. TODO -- use profiling instead of
97 this. */
98#define AVG_LOOP_NITER(LOOP) 5
99
8b11a64c
ZD
100
101/* Representation of the induction variable. */
102struct iv
103{
104 tree base; /* Initial value of the iv. */
e6845c23 105 tree base_object; /* A memory object to that the induction variable points. */
8b11a64c
ZD
106 tree step; /* Step of the iv (constant only). */
107 tree ssa_name; /* The ssa name with the value. */
108 bool biv_p; /* Is it a biv? */
109 bool have_use_for; /* Do we already have a use for it? */
110 unsigned use_id; /* The identifier in the use if it is the case. */
111};
112
113/* Per-ssa version information (induction variable descriptions, etc.). */
114struct version_info
115{
116 tree name; /* The ssa name. */
117 struct iv *iv; /* Induction variable description. */
118 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
119 an expression that is not an induction variable. */
120 unsigned inv_id; /* Id of an invariant. */
121 bool preserve_biv; /* For the original biv, whether to preserve it. */
122};
123
124/* Information attached to loop. */
125struct loop_data
126{
8b11a64c
ZD
127 unsigned regs_used; /* Number of registers used. */
128};
129
130/* Types of uses. */
131enum use_type
132{
133 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
134 USE_OUTER, /* The induction variable is used outside the loop. */
135 USE_ADDRESS, /* Use in an address. */
136 USE_COMPARE /* Use is a compare. */
137};
138
139/* The candidate - cost pair. */
140struct cost_pair
141{
142 struct iv_cand *cand; /* The candidate. */
143 unsigned cost; /* The cost. */
144 bitmap depends_on; /* The list of invariants that have to be
145 preserved. */
146};
147
148/* Use. */
149struct iv_use
150{
151 unsigned id; /* The id of the use. */
152 enum use_type type; /* Type of the use. */
153 struct iv *iv; /* The induction variable it is based on. */
154 tree stmt; /* Statement in that it occurs. */
155 tree *op_p; /* The place where it occurs. */
b1b02be2
ZD
156 bitmap related_cands; /* The set of "related" iv candidates, plus the common
157 important ones. */
8b11a64c
ZD
158
159 unsigned n_map_members; /* Number of candidates in the cost_map list. */
160 struct cost_pair *cost_map;
161 /* The costs wrto the iv candidates. */
162
163 struct iv_cand *selected;
164 /* The selected candidate. */
165};
166
167/* The position where the iv is computed. */
168enum iv_position
169{
170 IP_NORMAL, /* At the end, just before the exit condition. */
171 IP_END, /* At the end of the latch block. */
172 IP_ORIGINAL /* The original biv. */
173};
174
175/* The induction variable candidate. */
176struct iv_cand
177{
178 unsigned id; /* The number of the candidate. */
179 bool important; /* Whether this is an "important" candidate, i.e. such
180 that it should be considered by all uses. */
181 enum iv_position pos; /* Where it is computed. */
182 tree incremented_at; /* For original biv, the statement where it is
183 incremented. */
184 tree var_before; /* The variable used for it before increment. */
185 tree var_after; /* The variable used for it after increment. */
186 struct iv *iv; /* The value of the candidate. NULL for
187 "pseudocandidate" used to indicate the possibility
188 to replace the final value of an iv by direct
189 computation of the value. */
190 unsigned cost; /* Cost of the candidate. */
191};
192
193/* The data used by the induction variable optimizations. */
194
195struct ivopts_data
196{
197 /* The currently optimized loop. */
198 struct loop *current_loop;
199
ca4c3169
ZD
200 /* Numbers of iterations for all exits of the current loop. */
201 htab_t niters;
202
8b11a64c
ZD
203 /* The size of version_info array allocated. */
204 unsigned version_info_size;
205
206 /* The array of information for the ssa names. */
207 struct version_info *version_info;
208
209 /* The bitmap of indices in version_info whose value was changed. */
210 bitmap relevant;
211
212 /* The maximum invariant id. */
213 unsigned max_inv_id;
214
215 /* The uses of induction variables. */
216 varray_type iv_uses;
217
218 /* The candidates. */
219 varray_type iv_candidates;
220
80cad5fa
ZD
221 /* A bitmap of important candidates. */
222 bitmap important_candidates;
223
8b11a64c
ZD
224 /* Whether to consider just related and important candidates when replacing a
225 use. */
226 bool consider_all_candidates;
227};
228
b1b02be2
ZD
229/* An assignment of iv candidates to uses. */
230
231struct iv_ca
232{
233 /* The number of uses covered by the assignment. */
234 unsigned upto;
235
236 /* Number of uses that cannot be expressed by the candidates in the set. */
237 unsigned bad_uses;
238
239 /* Candidate assigned to a use, together with the related costs. */
240 struct cost_pair **cand_for_use;
241
242 /* Number of times each candidate is used. */
243 unsigned *n_cand_uses;
244
245 /* The candidates used. */
246 bitmap cands;
247
36f5ada1
ZD
248 /* The number of candidates in the set. */
249 unsigned n_cands;
250
b1b02be2
ZD
251 /* Total number of registers needed. */
252 unsigned n_regs;
253
254 /* Total cost of expressing uses. */
255 unsigned cand_use_cost;
256
257 /* Total cost of candidates. */
258 unsigned cand_cost;
259
260 /* Number of times each invariant is used. */
261 unsigned *n_invariant_uses;
262
263 /* Total cost of the assignment. */
264 unsigned cost;
265};
266
267/* Difference of two iv candidate assignments. */
268
269struct iv_ca_delta
270{
271 /* Changed use. */
272 struct iv_use *use;
273
274 /* An old assignment (for rollback purposes). */
275 struct cost_pair *old_cp;
276
277 /* A new assignment. */
278 struct cost_pair *new_cp;
279
280 /* Next change in the list. */
281 struct iv_ca_delta *next_change;
282};
283
8b11a64c
ZD
284/* Bound on number of candidates below that all candidates are considered. */
285
286#define CONSIDER_ALL_CANDIDATES_BOUND \
287 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
288
2a7e31df 289/* If there are more iv occurrences, we just give up (it is quite unlikely that
8b11a64c
ZD
290 optimizing such a loop would help, and it would take ages). */
291
292#define MAX_CONSIDERED_USES \
293 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
294
36f5ada1
ZD
295/* If there are at most this number of ivs in the set, try removing unnecessary
296 ivs from the set always. */
297
298#define ALWAYS_PRUNE_CAND_SET_BOUND \
299 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
300
8b11a64c
ZD
301/* The list of trees for that the decl_rtl field must be reset is stored
302 here. */
303
304static varray_type decl_rtl_to_reset;
305
306/* Number of uses recorded in DATA. */
307
308static inline unsigned
309n_iv_uses (struct ivopts_data *data)
310{
311 return VARRAY_ACTIVE_SIZE (data->iv_uses);
312}
313
314/* Ith use recorded in DATA. */
315
316static inline struct iv_use *
317iv_use (struct ivopts_data *data, unsigned i)
318{
319 return VARRAY_GENERIC_PTR_NOGC (data->iv_uses, i);
320}
321
322/* Number of candidates recorded in DATA. */
323
324static inline unsigned
325n_iv_cands (struct ivopts_data *data)
326{
327 return VARRAY_ACTIVE_SIZE (data->iv_candidates);
328}
329
330/* Ith candidate recorded in DATA. */
331
332static inline struct iv_cand *
333iv_cand (struct ivopts_data *data, unsigned i)
334{
335 return VARRAY_GENERIC_PTR_NOGC (data->iv_candidates, i);
336}
337
338/* The data for LOOP. */
339
340static inline struct loop_data *
341loop_data (struct loop *loop)
342{
343 return loop->aux;
344}
345
346/* The single loop exit if it dominates the latch, NULL otherwise. */
347
348static edge
349single_dom_exit (struct loop *loop)
350{
351 edge exit = loop->single_exit;
352
353 if (!exit)
354 return NULL;
355
356 if (!just_once_each_iteration_p (loop, exit->src))
357 return NULL;
358
359 return exit;
360}
361
362/* Dumps information about the induction variable IV to FILE. */
363
364extern void dump_iv (FILE *, struct iv *);
365void
366dump_iv (FILE *file, struct iv *iv)
367{
e6845c23
ZD
368 if (iv->ssa_name)
369 {
370 fprintf (file, "ssa name ");
371 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
372 fprintf (file, "\n");
373 }
8b11a64c 374
2f4675b4
ZD
375 fprintf (file, " type ");
376 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
377 fprintf (file, "\n");
378
8b11a64c
ZD
379 if (iv->step)
380 {
381 fprintf (file, " base ");
382 print_generic_expr (file, iv->base, TDF_SLIM);
383 fprintf (file, "\n");
384
385 fprintf (file, " step ");
386 print_generic_expr (file, iv->step, TDF_SLIM);
387 fprintf (file, "\n");
388 }
389 else
390 {
391 fprintf (file, " invariant ");
392 print_generic_expr (file, iv->base, TDF_SLIM);
393 fprintf (file, "\n");
394 }
395
e6845c23
ZD
396 if (iv->base_object)
397 {
398 fprintf (file, " base object ");
399 print_generic_expr (file, iv->base_object, TDF_SLIM);
400 fprintf (file, "\n");
401 }
402
8b11a64c
ZD
403 if (iv->biv_p)
404 fprintf (file, " is a biv\n");
405}
406
407/* Dumps information about the USE to FILE. */
408
409extern void dump_use (FILE *, struct iv_use *);
410void
411dump_use (FILE *file, struct iv_use *use)
412{
8b11a64c
ZD
413 fprintf (file, "use %d\n", use->id);
414
415 switch (use->type)
416 {
417 case USE_NONLINEAR_EXPR:
418 fprintf (file, " generic\n");
419 break;
420
421 case USE_OUTER:
422 fprintf (file, " outside\n");
423 break;
424
425 case USE_ADDRESS:
426 fprintf (file, " address\n");
427 break;
428
429 case USE_COMPARE:
430 fprintf (file, " compare\n");
431 break;
432
433 default:
1e128c5f 434 gcc_unreachable ();
8b11a64c
ZD
435 }
436
2f4675b4
ZD
437 fprintf (file, " in statement ");
438 print_generic_expr (file, use->stmt, TDF_SLIM);
439 fprintf (file, "\n");
440
441 fprintf (file, " at position ");
442 if (use->op_p)
443 print_generic_expr (file, *use->op_p, TDF_SLIM);
444 fprintf (file, "\n");
445
e6845c23 446 dump_iv (file, use->iv);
2f4675b4 447
eec5fec9
ZD
448 if (use->related_cands)
449 {
450 fprintf (file, " related candidates ");
451 dump_bitmap (file, use->related_cands);
452 }
8b11a64c
ZD
453}
454
455/* Dumps information about the uses to FILE. */
456
457extern void dump_uses (FILE *, struct ivopts_data *);
458void
459dump_uses (FILE *file, struct ivopts_data *data)
460{
461 unsigned i;
462 struct iv_use *use;
463
464 for (i = 0; i < n_iv_uses (data); i++)
465 {
466 use = iv_use (data, i);
467
468 dump_use (file, use);
469 fprintf (file, "\n");
470 }
471}
472
473/* Dumps information about induction variable candidate CAND to FILE. */
474
475extern void dump_cand (FILE *, struct iv_cand *);
476void
477dump_cand (FILE *file, struct iv_cand *cand)
478{
479 struct iv *iv = cand->iv;
480
481 fprintf (file, "candidate %d%s\n",
482 cand->id, cand->important ? " (important)" : "");
483
484 if (!iv)
485 {
486 fprintf (file, " final value replacement\n");
487 return;
488 }
489
490 switch (cand->pos)
491 {
492 case IP_NORMAL:
493 fprintf (file, " incremented before exit test\n");
494 break;
495
496 case IP_END:
497 fprintf (file, " incremented at end\n");
498 break;
499
500 case IP_ORIGINAL:
501 fprintf (file, " original biv\n");
502 break;
503 }
504
e6845c23 505 dump_iv (file, iv);
8b11a64c
ZD
506}
507
508/* Returns the info for ssa version VER. */
509
510static inline struct version_info *
511ver_info (struct ivopts_data *data, unsigned ver)
512{
513 return data->version_info + ver;
514}
515
516/* Returns the info for ssa name NAME. */
517
518static inline struct version_info *
519name_info (struct ivopts_data *data, tree name)
520{
521 return ver_info (data, SSA_NAME_VERSION (name));
522}
523
524/* Checks whether there exists number X such that X * B = A, counting modulo
525 2^BITS. */
526
527static bool
528divide (unsigned bits, unsigned HOST_WIDE_INT a, unsigned HOST_WIDE_INT b,
529 HOST_WIDE_INT *x)
530{
531 unsigned HOST_WIDE_INT mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
532 unsigned HOST_WIDE_INT inv, ex, val;
533 unsigned i;
534
535 a &= mask;
536 b &= mask;
537
538 /* First divide the whole equation by 2 as long as possible. */
539 while (!(a & 1) && !(b & 1))
540 {
541 a >>= 1;
542 b >>= 1;
543 bits--;
544 mask >>= 1;
545 }
546
547 if (!(b & 1))
548 {
549 /* If b is still even, a is odd and there is no such x. */
550 return false;
551 }
552
553 /* Find the inverse of b. We compute it as
554 b^(2^(bits - 1) - 1) (mod 2^bits). */
555 inv = 1;
556 ex = b;
557 for (i = 0; i < bits - 1; i++)
558 {
559 inv = (inv * ex) & mask;
560 ex = (ex * ex) & mask;
561 }
562
563 val = (a * inv) & mask;
564
1e128c5f 565 gcc_assert (((val * b) & mask) == a);
8b11a64c
ZD
566
567 if ((val >> (bits - 1)) & 1)
568 val |= ~mask;
569
570 *x = val;
571
572 return true;
573}
574
575/* Returns true if STMT is after the place where the IP_NORMAL ivs will be
576 emitted in LOOP. */
577
578static bool
579stmt_after_ip_normal_pos (struct loop *loop, tree stmt)
580{
581 basic_block bb = ip_normal_pos (loop), sbb = bb_for_stmt (stmt);
582
1e128c5f 583 gcc_assert (bb);
8b11a64c
ZD
584
585 if (sbb == loop->latch)
586 return true;
587
588 if (sbb != bb)
589 return false;
590
591 return stmt == last_stmt (bb);
592}
593
594/* Returns true if STMT if after the place where the original induction
595 variable CAND is incremented. */
596
597static bool
598stmt_after_ip_original_pos (struct iv_cand *cand, tree stmt)
599{
600 basic_block cand_bb = bb_for_stmt (cand->incremented_at);
601 basic_block stmt_bb = bb_for_stmt (stmt);
602 block_stmt_iterator bsi;
603
604 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
605 return false;
606
607 if (stmt_bb != cand_bb)
608 return true;
609
610 /* Scan the block from the end, since the original ivs are usually
611 incremented at the end of the loop body. */
612 for (bsi = bsi_last (stmt_bb); ; bsi_prev (&bsi))
613 {
614 if (bsi_stmt (bsi) == cand->incremented_at)
615 return false;
616 if (bsi_stmt (bsi) == stmt)
617 return true;
618 }
619}
620
621/* Returns true if STMT if after the place where the induction variable
622 CAND is incremented in LOOP. */
623
624static bool
625stmt_after_increment (struct loop *loop, struct iv_cand *cand, tree stmt)
626{
627 switch (cand->pos)
628 {
629 case IP_END:
630 return false;
631
632 case IP_NORMAL:
633 return stmt_after_ip_normal_pos (loop, stmt);
634
635 case IP_ORIGINAL:
636 return stmt_after_ip_original_pos (cand, stmt);
637
638 default:
1e128c5f 639 gcc_unreachable ();
8b11a64c
ZD
640 }
641}
642
ca4c3169
ZD
643/* Element of the table in that we cache the numbers of iterations obtained
644 from exits of the loop. */
645
646struct nfe_cache_elt
647{
648 /* The edge for that the number of iterations is cached. */
649 edge exit;
650
0388d40a 651 /* True if the # of iterations was successfully determined. */
ca4c3169
ZD
652 bool valid_p;
653
654 /* Description of # of iterations. */
655 struct tree_niter_desc niter;
656};
657
658/* Hash function for nfe_cache_elt E. */
659
660static hashval_t
661nfe_hash (const void *e)
662{
663 const struct nfe_cache_elt *elt = e;
664
665 return htab_hash_pointer (elt->exit);
666}
667
668/* Equality function for nfe_cache_elt E1 and edge E2. */
669
670static int
671nfe_eq (const void *e1, const void *e2)
672{
673 const struct nfe_cache_elt *elt1 = e1;
674
675 return elt1->exit == e2;
676}
677
678/* Returns structure describing number of iterations determined from
679 EXIT of DATA->current_loop, or NULL if something goes wrong. */
680
681static struct tree_niter_desc *
682niter_for_exit (struct ivopts_data *data, edge exit)
683{
684 struct nfe_cache_elt *nfe_desc;
685 PTR *slot;
686
687 slot = htab_find_slot_with_hash (data->niters, exit,
688 htab_hash_pointer (exit),
689 INSERT);
690
691 if (!*slot)
692 {
693 nfe_desc = xmalloc (sizeof (struct nfe_cache_elt));
694 nfe_desc->exit = exit;
695 nfe_desc->valid_p = number_of_iterations_exit (data->current_loop,
696 exit, &nfe_desc->niter);
697 *slot = nfe_desc;
698 }
699 else
700 nfe_desc = *slot;
701
702 if (!nfe_desc->valid_p)
703 return NULL;
704
705 return &nfe_desc->niter;
706}
707
708/* Returns structure describing number of iterations determined from
709 single dominating exit of DATA->current_loop, or NULL if something
710 goes wrong. */
711
712static struct tree_niter_desc *
713niter_for_single_dom_exit (struct ivopts_data *data)
714{
715 edge exit = single_dom_exit (data->current_loop);
716
717 if (!exit)
718 return NULL;
719
720 return niter_for_exit (data, exit);
721}
722
8b11a64c
ZD
723/* Initializes data structures used by the iv optimization pass, stored
724 in DATA. LOOPS is the loop tree. */
725
726static void
727tree_ssa_iv_optimize_init (struct loops *loops, struct ivopts_data *data)
728{
729 unsigned i;
730
731 data->version_info_size = 2 * num_ssa_names;
732 data->version_info = xcalloc (data->version_info_size,
733 sizeof (struct version_info));
8bdbfff5
NS
734 data->relevant = BITMAP_ALLOC (NULL);
735 data->important_candidates = BITMAP_ALLOC (NULL);
8b11a64c 736 data->max_inv_id = 0;
ca4c3169 737 data->niters = htab_create (10, nfe_hash, nfe_eq, free);
8b11a64c
ZD
738
739 for (i = 1; i < loops->num; i++)
740 if (loops->parray[i])
741 loops->parray[i]->aux = xcalloc (1, sizeof (struct loop_data));
742
743 VARRAY_GENERIC_PTR_NOGC_INIT (data->iv_uses, 20, "iv_uses");
744 VARRAY_GENERIC_PTR_NOGC_INIT (data->iv_candidates, 20, "iv_candidates");
745 VARRAY_GENERIC_PTR_NOGC_INIT (decl_rtl_to_reset, 20, "decl_rtl_to_reset");
746}
747
e6845c23
ZD
748/* Returns a memory object to that EXPR points. In case we are able to
749 determine that it does not point to any such object, NULL is returned. */
750
751static tree
752determine_base_object (tree expr)
753{
754 enum tree_code code = TREE_CODE (expr);
755 tree base, obj, op0, op1;
756
757 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
758 return NULL_TREE;
759
760 switch (code)
761 {
762 case INTEGER_CST:
763 return NULL_TREE;
764
765 case ADDR_EXPR:
766 obj = TREE_OPERAND (expr, 0);
767 base = get_base_address (obj);
768
769 if (!base)
f5e2738c 770 return expr;
e6845c23 771
7299dbfb 772 if (TREE_CODE (base) == INDIRECT_REF)
f5e2738c 773 return determine_base_object (TREE_OPERAND (base, 0));
7299dbfb 774
e6845c23
ZD
775 return fold (build1 (ADDR_EXPR, ptr_type_node, base));
776
777 case PLUS_EXPR:
778 case MINUS_EXPR:
779 op0 = determine_base_object (TREE_OPERAND (expr, 0));
780 op1 = determine_base_object (TREE_OPERAND (expr, 1));
781
782 if (!op1)
783 return op0;
784
785 if (!op0)
786 return (code == PLUS_EXPR
787 ? op1
788 : fold (build1 (NEGATE_EXPR, ptr_type_node, op1)));
789
790 return fold (build (code, ptr_type_node, op0, op1));
791
f5e2738c
ZD
792 case NOP_EXPR:
793 case CONVERT_EXPR:
794 return determine_base_object (TREE_OPERAND (expr, 0));
795
e6845c23
ZD
796 default:
797 return fold_convert (ptr_type_node, expr);
798 }
799}
800
8b11a64c
ZD
801/* Allocates an induction variable with given initial value BASE and step STEP
802 for loop LOOP. */
803
804static struct iv *
805alloc_iv (tree base, tree step)
806{
807 struct iv *iv = xcalloc (1, sizeof (struct iv));
808
809 if (step && integer_zerop (step))
810 step = NULL_TREE;
811
812 iv->base = base;
e6845c23 813 iv->base_object = determine_base_object (base);
8b11a64c
ZD
814 iv->step = step;
815 iv->biv_p = false;
816 iv->have_use_for = false;
817 iv->use_id = 0;
818 iv->ssa_name = NULL_TREE;
819
820 return iv;
821}
822
823/* Sets STEP and BASE for induction variable IV. */
824
825static void
826set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
827{
828 struct version_info *info = name_info (data, iv);
829
1e128c5f 830 gcc_assert (!info->iv);
8b11a64c
ZD
831
832 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
833 info->iv = alloc_iv (base, step);
834 info->iv->ssa_name = iv;
835}
836
837/* Finds induction variable declaration for VAR. */
838
839static struct iv *
840get_iv (struct ivopts_data *data, tree var)
841{
842 basic_block bb;
843
844 if (!name_info (data, var)->iv)
845 {
846 bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
847
848 if (!bb
849 || !flow_bb_inside_loop_p (data->current_loop, bb))
850 set_iv (data, var, var, NULL_TREE);
851 }
852
853 return name_info (data, var)->iv;
854}
855
856/* Determines the step of a biv defined in PHI. */
857
858static tree
859determine_biv_step (tree phi)
860{
861 struct loop *loop = bb_for_stmt (phi)->loop_father;
862 tree name = PHI_RESULT (phi), base, step;
863 tree type = TREE_TYPE (name);
864
865 if (!is_gimple_reg (name))
866 return NULL_TREE;
867
868 if (!simple_iv (loop, phi, name, &base, &step))
869 return NULL_TREE;
870
871 if (!step)
5212068f 872 return build_int_cst (type, 0);
8b11a64c
ZD
873
874 return step;
875}
876
be35cf60
ZD
877/* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
878
879static bool
880abnormal_ssa_name_p (tree exp)
881{
882 if (!exp)
883 return false;
884
885 if (TREE_CODE (exp) != SSA_NAME)
886 return false;
887
888 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
889}
890
891/* Returns false if BASE or INDEX contains a ssa name that occurs in an
8b11a64c
ZD
892 abnormal phi node. Callback for for_each_index. */
893
894static bool
be35cf60 895idx_contains_abnormal_ssa_name_p (tree base, tree *index,
8b11a64c
ZD
896 void *data ATTRIBUTE_UNUSED)
897{
be35cf60
ZD
898 if (TREE_CODE (base) == ARRAY_REF)
899 {
900 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
901 return false;
902 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
903 return false;
904 }
8b11a64c 905
be35cf60 906 return !abnormal_ssa_name_p (*index);
8b11a64c
ZD
907}
908
909/* Returns true if EXPR contains a ssa name that occurs in an
910 abnormal phi node. */
911
912static bool
913contains_abnormal_ssa_name_p (tree expr)
914{
915 enum tree_code code = TREE_CODE (expr);
6615c446 916 enum tree_code_class class = TREE_CODE_CLASS (code);
8b11a64c
ZD
917
918 if (code == SSA_NAME)
919 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
920
921 if (code == INTEGER_CST
922 || is_gimple_min_invariant (expr))
923 return false;
924
925 if (code == ADDR_EXPR)
6efa2c71 926 return !for_each_index (&TREE_OPERAND (expr, 0),
8b11a64c
ZD
927 idx_contains_abnormal_ssa_name_p,
928 NULL);
929
930 switch (class)
931 {
6615c446
JO
932 case tcc_binary:
933 case tcc_comparison:
8b11a64c
ZD
934 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
935 return true;
936
937 /* Fallthru. */
6615c446 938 case tcc_unary:
8b11a64c
ZD
939 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
940 return true;
941
942 break;
943
944 default:
1e128c5f 945 gcc_unreachable ();
8b11a64c
ZD
946 }
947
948 return false;
949}
950
951/* Finds basic ivs. */
952
953static bool
954find_bivs (struct ivopts_data *data)
955{
956 tree phi, step, type, base;
957 bool found = false;
958 struct loop *loop = data->current_loop;
959
bb29d951 960 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
8b11a64c
ZD
961 {
962 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
963 continue;
964
965 step = determine_biv_step (phi);
966
967 if (!step)
968 continue;
969 if (cst_and_fits_in_hwi (step)
970 && int_cst_value (step) == 0)
971 continue;
972
973 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
974 if (contains_abnormal_ssa_name_p (base))
975 continue;
976
977 type = TREE_TYPE (PHI_RESULT (phi));
978 base = fold_convert (type, base);
979 step = fold_convert (type, step);
980
981 /* FIXME: We do not handle induction variables whose step does
982 not satisfy cst_and_fits_in_hwi. */
983 if (!cst_and_fits_in_hwi (step))
984 continue;
985
986 set_iv (data, PHI_RESULT (phi), base, step);
987 found = true;
988 }
989
990 return found;
991}
992
993/* Marks basic ivs. */
994
995static void
996mark_bivs (struct ivopts_data *data)
997{
998 tree phi, var;
999 struct iv *iv, *incr_iv;
1000 struct loop *loop = data->current_loop;
1001 basic_block incr_bb;
1002
bb29d951 1003 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
8b11a64c
ZD
1004 {
1005 iv = get_iv (data, PHI_RESULT (phi));
1006 if (!iv)
1007 continue;
1008
1009 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1010 incr_iv = get_iv (data, var);
1011 if (!incr_iv)
1012 continue;
1013
1014 /* If the increment is in the subloop, ignore it. */
1015 incr_bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
1016 if (incr_bb->loop_father != data->current_loop
1017 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1018 continue;
1019
1020 iv->biv_p = true;
1021 incr_iv->biv_p = true;
1022 }
1023}
1024
1025/* Checks whether STMT defines a linear induction variable and stores its
1026 parameters to BASE and STEP. */
1027
1028static bool
1029find_givs_in_stmt_scev (struct ivopts_data *data, tree stmt,
1030 tree *base, tree *step)
1031{
1032 tree lhs;
1033 struct loop *loop = data->current_loop;
1034
1035 *base = NULL_TREE;
1036 *step = NULL_TREE;
1037
1038 if (TREE_CODE (stmt) != MODIFY_EXPR)
1039 return false;
1040
1041 lhs = TREE_OPERAND (stmt, 0);
1042 if (TREE_CODE (lhs) != SSA_NAME)
1043 return false;
1044
1045 if (!simple_iv (loop, stmt, TREE_OPERAND (stmt, 1), base, step))
1046 return false;
1047
1048 /* FIXME: We do not handle induction variables whose step does
1049 not satisfy cst_and_fits_in_hwi. */
1050 if (!zero_p (*step)
1051 && !cst_and_fits_in_hwi (*step))
1052 return false;
1053
1054 if (contains_abnormal_ssa_name_p (*base))
1055 return false;
1056
1057 return true;
1058}
1059
1060/* Finds general ivs in statement STMT. */
1061
1062static void
1063find_givs_in_stmt (struct ivopts_data *data, tree stmt)
1064{
1065 tree base, step;
1066
1067 if (!find_givs_in_stmt_scev (data, stmt, &base, &step))
1068 return;
1069
1070 set_iv (data, TREE_OPERAND (stmt, 0), base, step);
1071}
1072
1073/* Finds general ivs in basic block BB. */
1074
1075static void
1076find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1077{
1078 block_stmt_iterator bsi;
1079
1080 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1081 find_givs_in_stmt (data, bsi_stmt (bsi));
1082}
1083
1084/* Finds general ivs. */
1085
1086static void
1087find_givs (struct ivopts_data *data)
1088{
1089 struct loop *loop = data->current_loop;
1090 basic_block *body = get_loop_body_in_dom_order (loop);
1091 unsigned i;
1092
1093 for (i = 0; i < loop->num_nodes; i++)
1094 find_givs_in_bb (data, body[i]);
1095 free (body);
1096}
1097
8b11a64c
ZD
1098/* For each ssa name defined in LOOP determines whether it is an induction
1099 variable and if so, its initial value and step. */
1100
1101static bool
1102find_induction_variables (struct ivopts_data *data)
1103{
1104 unsigned i;
87c476a2 1105 bitmap_iterator bi;
8b11a64c
ZD
1106
1107 if (!find_bivs (data))
1108 return false;
1109
1110 find_givs (data);
1111 mark_bivs (data);
8b11a64c
ZD
1112
1113 if (dump_file && (dump_flags & TDF_DETAILS))
1114 {
ca4c3169
ZD
1115 struct tree_niter_desc *niter;
1116
1117 niter = niter_for_single_dom_exit (data);
1118
1119 if (niter)
8b11a64c
ZD
1120 {
1121 fprintf (dump_file, " number of iterations ");
ca4c3169 1122 print_generic_expr (dump_file, niter->niter, TDF_SLIM);
8b11a64c
ZD
1123 fprintf (dump_file, "\n");
1124
1125 fprintf (dump_file, " may be zero if ");
ca4c3169 1126 print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
8b11a64c
ZD
1127 fprintf (dump_file, "\n");
1128 fprintf (dump_file, "\n");
1129 };
1130
1131 fprintf (dump_file, "Induction variables:\n\n");
1132
87c476a2 1133 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
8b11a64c
ZD
1134 {
1135 if (ver_info (data, i)->iv)
1136 dump_iv (dump_file, ver_info (data, i)->iv);
87c476a2 1137 }
8b11a64c
ZD
1138 }
1139
1140 return true;
1141}
1142
1143/* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1144
1145static struct iv_use *
1146record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1147 tree stmt, enum use_type use_type)
1148{
1149 struct iv_use *use = xcalloc (1, sizeof (struct iv_use));
1150
1151 use->id = n_iv_uses (data);
1152 use->type = use_type;
1153 use->iv = iv;
1154 use->stmt = stmt;
1155 use->op_p = use_p;
8bdbfff5 1156 use->related_cands = BITMAP_ALLOC (NULL);
8b11a64c 1157
e6845c23
ZD
1158 /* To avoid showing ssa name in the dumps, if it was not reset by the
1159 caller. */
1160 iv->ssa_name = NULL_TREE;
1161
8b11a64c
ZD
1162 if (dump_file && (dump_flags & TDF_DETAILS))
1163 dump_use (dump_file, use);
1164
1165 VARRAY_PUSH_GENERIC_PTR_NOGC (data->iv_uses, use);
1166
1167 return use;
1168}
1169
1170/* Checks whether OP is a loop-level invariant and if so, records it.
1171 NONLINEAR_USE is true if the invariant is used in a way we do not
1172 handle specially. */
1173
1174static void
1175record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1176{
1177 basic_block bb;
1178 struct version_info *info;
1179
1180 if (TREE_CODE (op) != SSA_NAME
1181 || !is_gimple_reg (op))
1182 return;
1183
1184 bb = bb_for_stmt (SSA_NAME_DEF_STMT (op));
1185 if (bb
1186 && flow_bb_inside_loop_p (data->current_loop, bb))
1187 return;
1188
1189 info = name_info (data, op);
1190 info->name = op;
1191 info->has_nonlin_use |= nonlinear_use;
1192 if (!info->inv_id)
1193 info->inv_id = ++data->max_inv_id;
1194 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1195}
1196
1197/* Checks whether the use OP is interesting and if so, records it
1198 as TYPE. */
1199
1200static struct iv_use *
1201find_interesting_uses_outer_or_nonlin (struct ivopts_data *data, tree op,
1202 enum use_type type)
1203{
1204 struct iv *iv;
1205 struct iv *civ;
1206 tree stmt;
1207 struct iv_use *use;
1208
1209 if (TREE_CODE (op) != SSA_NAME)
1210 return NULL;
1211
1212 iv = get_iv (data, op);
1213 if (!iv)
1214 return NULL;
1215
1216 if (iv->have_use_for)
1217 {
1218 use = iv_use (data, iv->use_id);
1219
1e128c5f
GB
1220 gcc_assert (use->type == USE_NONLINEAR_EXPR
1221 || use->type == USE_OUTER);
8b11a64c
ZD
1222
1223 if (type == USE_NONLINEAR_EXPR)
1224 use->type = USE_NONLINEAR_EXPR;
1225 return use;
1226 }
1227
1228 if (zero_p (iv->step))
1229 {
1230 record_invariant (data, op, true);
1231 return NULL;
1232 }
1233 iv->have_use_for = true;
1234
1235 civ = xmalloc (sizeof (struct iv));
1236 *civ = *iv;
1237
1238 stmt = SSA_NAME_DEF_STMT (op);
1e128c5f
GB
1239 gcc_assert (TREE_CODE (stmt) == PHI_NODE
1240 || TREE_CODE (stmt) == MODIFY_EXPR);
8b11a64c
ZD
1241
1242 use = record_use (data, NULL, civ, stmt, type);
1243 iv->use_id = use->id;
1244
1245 return use;
1246}
1247
1248/* Checks whether the use OP is interesting and if so, records it. */
1249
1250static struct iv_use *
1251find_interesting_uses_op (struct ivopts_data *data, tree op)
1252{
1253 return find_interesting_uses_outer_or_nonlin (data, op, USE_NONLINEAR_EXPR);
1254}
1255
1256/* Records a definition of induction variable OP that is used outside of the
1257 loop. */
1258
1259static struct iv_use *
1260find_interesting_uses_outer (struct ivopts_data *data, tree op)
1261{
1262 return find_interesting_uses_outer_or_nonlin (data, op, USE_OUTER);
1263}
1264
1265/* Checks whether the condition *COND_P in STMT is interesting
1266 and if so, records it. */
1267
1268static void
1269find_interesting_uses_cond (struct ivopts_data *data, tree stmt, tree *cond_p)
1270{
1271 tree *op0_p;
1272 tree *op1_p;
1273 struct iv *iv0 = NULL, *iv1 = NULL, *civ;
1274 struct iv const_iv;
1275 tree zero = integer_zero_node;
1276
1277 const_iv.step = NULL_TREE;
1278
1279 if (integer_zerop (*cond_p)
1280 || integer_nonzerop (*cond_p))
1281 return;
1282
1283 if (TREE_CODE (*cond_p) == SSA_NAME)
1284 {
1285 op0_p = cond_p;
1286 op1_p = &zero;
1287 }
1288 else
1289 {
1290 op0_p = &TREE_OPERAND (*cond_p, 0);
1291 op1_p = &TREE_OPERAND (*cond_p, 1);
1292 }
1293
1294 if (TREE_CODE (*op0_p) == SSA_NAME)
1295 iv0 = get_iv (data, *op0_p);
1296 else
1297 iv0 = &const_iv;
1298
1299 if (TREE_CODE (*op1_p) == SSA_NAME)
1300 iv1 = get_iv (data, *op1_p);
1301 else
1302 iv1 = &const_iv;
1303
1304 if (/* When comparing with non-invariant value, we may not do any senseful
1305 induction variable elimination. */
1306 (!iv0 || !iv1)
1307 /* Eliminating condition based on two ivs would be nontrivial.
1308 ??? TODO -- it is not really important to handle this case. */
1309 || (!zero_p (iv0->step) && !zero_p (iv1->step)))
1310 {
1311 find_interesting_uses_op (data, *op0_p);
1312 find_interesting_uses_op (data, *op1_p);
1313 return;
1314 }
1315
1316 if (zero_p (iv0->step) && zero_p (iv1->step))
1317 {
1318 /* If both are invariants, this is a work for unswitching. */
1319 return;
1320 }
1321
1322 civ = xmalloc (sizeof (struct iv));
1323 *civ = zero_p (iv0->step) ? *iv1: *iv0;
1324 record_use (data, cond_p, civ, stmt, USE_COMPARE);
1325}
1326
be35cf60
ZD
1327/* Returns true if expression EXPR is obviously invariant in LOOP,
1328 i.e. if all its operands are defined outside of the LOOP. */
1329
feb075f4 1330bool
be35cf60
ZD
1331expr_invariant_in_loop_p (struct loop *loop, tree expr)
1332{
1333 basic_block def_bb;
1334 unsigned i, len;
1335
1336 if (is_gimple_min_invariant (expr))
1337 return true;
1338
1339 if (TREE_CODE (expr) == SSA_NAME)
1340 {
1341 def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (expr));
1342 if (def_bb
1343 && flow_bb_inside_loop_p (loop, def_bb))
1344 return false;
1345
1346 return true;
1347 }
1348
1349 if (!EXPR_P (expr))
1350 return false;
1351
54e4aedb 1352 len = TREE_CODE_LENGTH (TREE_CODE (expr));
be35cf60
ZD
1353 for (i = 0; i < len; i++)
1354 if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1355 return false;
1356
1357 return true;
1358}
1359
8b11a64c
ZD
1360/* Cumulates the steps of indices into DATA and replaces their values with the
1361 initial ones. Returns false when the value of the index cannot be determined.
1362 Callback for for_each_index. */
1363
1364struct ifs_ivopts_data
1365{
1366 struct ivopts_data *ivopts_data;
1367 tree stmt;
1368 tree *step_p;
1369};
1370
1371static bool
1372idx_find_step (tree base, tree *idx, void *data)
1373{
1374 struct ifs_ivopts_data *dta = data;
1375 struct iv *iv;
be35cf60 1376 tree step, type, iv_type, iv_step, lbound, off;
2f4675b4 1377 struct loop *loop = dta->ivopts_data->current_loop;
be35cf60
ZD
1378
1379 if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF
1380 || TREE_CODE (base) == ALIGN_INDIRECT_REF)
1381 return false;
1382
1383 /* If base is a component ref, require that the offset of the reference
3a7c155d 1384 be invariant. */
be35cf60
ZD
1385 if (TREE_CODE (base) == COMPONENT_REF)
1386 {
1387 off = component_ref_field_offset (base);
1388 return expr_invariant_in_loop_p (loop, off);
1389 }
1390
1391 /* If base is array, first check whether we will be able to move the
1392 reference out of the loop (in order to take its address in strength
1393 reduction). In order for this to work we need both lower bound
1394 and step to be loop invariants. */
1395 if (TREE_CODE (base) == ARRAY_REF)
1396 {
1397 step = array_ref_element_size (base);
1398 lbound = array_ref_low_bound (base);
1399
1400 if (!expr_invariant_in_loop_p (loop, step)
1401 || !expr_invariant_in_loop_p (loop, lbound))
1402 return false;
1403 }
1404
8b11a64c
ZD
1405 if (TREE_CODE (*idx) != SSA_NAME)
1406 return true;
1407
1408 iv = get_iv (dta->ivopts_data, *idx);
1409 if (!iv)
1410 return false;
1411
1412 *idx = iv->base;
1413
1414 if (!iv->step)
1415 return true;
1416
1417 iv_type = TREE_TYPE (iv->base);
1418 type = build_pointer_type (TREE_TYPE (base));
1419 if (TREE_CODE (base) == ARRAY_REF)
2f4675b4
ZD
1420 {
1421 step = array_ref_element_size (base);
2f4675b4
ZD
1422
1423 /* We only handle addresses whose step is an integer constant. */
1424 if (TREE_CODE (step) != INTEGER_CST)
1425 return false;
2f4675b4 1426 }
8b11a64c 1427 else
5212068f
NS
1428 /* The step for pointer arithmetics already is 1 byte. */
1429 step = build_int_cst (type, 1);
8b11a64c
ZD
1430
1431 if (TYPE_PRECISION (iv_type) < TYPE_PRECISION (type))
1432 iv_step = can_count_iv_in_wider_type (dta->ivopts_data->current_loop,
1433 type, iv->base, iv->step, dta->stmt);
1434 else
1435 iv_step = fold_convert (iv_type, iv->step);
1436
1437 if (!iv_step)
1438 {
1439 /* The index might wrap. */
1440 return false;
1441 }
1442
b52d5eaa 1443 step = fold_binary_to_constant (MULT_EXPR, type, step, iv_step);
8b11a64c
ZD
1444
1445 if (!*dta->step_p)
1446 *dta->step_p = step;
1447 else
b52d5eaa
RS
1448 *dta->step_p = fold_binary_to_constant (PLUS_EXPR, type,
1449 *dta->step_p, step);
8b11a64c
ZD
1450
1451 return true;
1452}
1453
1454/* Records use in index IDX. Callback for for_each_index. Ivopts data
1455 object is passed to it in DATA. */
1456
1457static bool
2f4675b4 1458idx_record_use (tree base, tree *idx,
8b11a64c
ZD
1459 void *data)
1460{
1461 find_interesting_uses_op (data, *idx);
2f4675b4
ZD
1462 if (TREE_CODE (base) == ARRAY_REF)
1463 {
1464 find_interesting_uses_op (data, array_ref_element_size (base));
1465 find_interesting_uses_op (data, array_ref_low_bound (base));
1466 }
8b11a64c
ZD
1467 return true;
1468}
1469
0a915e3d
ZD
1470/* Returns true if memory reference REF may be unaligned. */
1471
1472static bool
1473may_be_unaligned_p (tree ref)
1474{
1475 tree base;
1476 tree base_type;
1477 HOST_WIDE_INT bitsize;
1478 HOST_WIDE_INT bitpos;
1479 tree toffset;
1480 enum machine_mode mode;
1481 int unsignedp, volatilep;
1482 unsigned base_align;
1483
1484 /* The test below is basically copy of what expr.c:normal_inner_ref
1485 does to check whether the object must be loaded by parts when
1486 STRICT_ALIGNMENT is true. */
1487 base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1488 &unsignedp, &volatilep, true);
1489 base_type = TREE_TYPE (base);
1490 base_align = TYPE_ALIGN (base_type);
1491
1492 if (mode != BLKmode
1493 && (base_align < GET_MODE_ALIGNMENT (mode)
1494 || bitpos % GET_MODE_ALIGNMENT (mode) != 0
1495 || bitpos % BITS_PER_UNIT != 0))
1496 return true;
1497
1498 return false;
1499}
1500
8b11a64c
ZD
1501/* Finds addresses in *OP_P inside STMT. */
1502
1503static void
1504find_interesting_uses_address (struct ivopts_data *data, tree stmt, tree *op_p)
1505{
1506 tree base = unshare_expr (*op_p), step = NULL;
1507 struct iv *civ;
1508 struct ifs_ivopts_data ifs_ivopts_data;
1509
1510 /* Ignore bitfields for now. Not really something terribly complicated
1511 to handle. TODO. */
1512 if (TREE_CODE (base) == COMPONENT_REF
1513 && DECL_NONADDRESSABLE_P (TREE_OPERAND (base, 1)))
1514 goto fail;
1515
0a915e3d
ZD
1516 if (STRICT_ALIGNMENT
1517 && may_be_unaligned_p (base))
1518 goto fail;
1519
8b11a64c
ZD
1520 ifs_ivopts_data.ivopts_data = data;
1521 ifs_ivopts_data.stmt = stmt;
1522 ifs_ivopts_data.step_p = &step;
1523 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1524 || zero_p (step))
1525 goto fail;
1526
be35cf60
ZD
1527 gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
1528 gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1529
1530 if (TREE_CODE (base) == INDIRECT_REF)
8b11a64c
ZD
1531 base = TREE_OPERAND (base, 0);
1532 else
1533 base = build_addr (base);
1534
1535 civ = alloc_iv (base, step);
1536 record_use (data, op_p, civ, stmt, USE_ADDRESS);
1537 return;
1538
1539fail:
1540 for_each_index (op_p, idx_record_use, data);
1541}
1542
1543/* Finds and records invariants used in STMT. */
1544
1545static void
1546find_invariants_stmt (struct ivopts_data *data, tree stmt)
1547{
1548 use_optype uses = NULL;
1549 unsigned i, n;
1550 tree op;
1551
1552 if (TREE_CODE (stmt) == PHI_NODE)
1553 n = PHI_NUM_ARGS (stmt);
1554 else
1555 {
8b11a64c
ZD
1556 uses = STMT_USE_OPS (stmt);
1557 n = NUM_USES (uses);
1558 }
1559
1560 for (i = 0; i < n; i++)
1561 {
1562 if (TREE_CODE (stmt) == PHI_NODE)
1563 op = PHI_ARG_DEF (stmt, i);
1564 else
1565 op = USE_OP (uses, i);
1566
1567 record_invariant (data, op, false);
1568 }
1569}
1570
1571/* Finds interesting uses of induction variables in the statement STMT. */
1572
1573static void
1574find_interesting_uses_stmt (struct ivopts_data *data, tree stmt)
1575{
1576 struct iv *iv;
1577 tree op, lhs, rhs;
1578 use_optype uses = NULL;
1579 unsigned i, n;
1580
1581 find_invariants_stmt (data, stmt);
1582
1583 if (TREE_CODE (stmt) == COND_EXPR)
1584 {
1585 find_interesting_uses_cond (data, stmt, &COND_EXPR_COND (stmt));
1586 return;
1587 }
1588
1589 if (TREE_CODE (stmt) == MODIFY_EXPR)
1590 {
1591 lhs = TREE_OPERAND (stmt, 0);
1592 rhs = TREE_OPERAND (stmt, 1);
1593
1594 if (TREE_CODE (lhs) == SSA_NAME)
1595 {
1596 /* If the statement defines an induction variable, the uses are not
1597 interesting by themselves. */
1598
1599 iv = get_iv (data, lhs);
1600
1601 if (iv && !zero_p (iv->step))
1602 return;
1603 }
1604
1605 switch (TREE_CODE_CLASS (TREE_CODE (rhs)))
1606 {
6615c446 1607 case tcc_comparison:
8b11a64c
ZD
1608 find_interesting_uses_cond (data, stmt, &TREE_OPERAND (stmt, 1));
1609 return;
1610
6615c446 1611 case tcc_reference:
8b11a64c 1612 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 1));
6615c446 1613 if (REFERENCE_CLASS_P (lhs))
8b11a64c
ZD
1614 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1615 return;
1616
1617 default: ;
1618 }
1619
6615c446 1620 if (REFERENCE_CLASS_P (lhs)
2f4675b4 1621 && is_gimple_val (rhs))
8b11a64c
ZD
1622 {
1623 find_interesting_uses_address (data, stmt, &TREE_OPERAND (stmt, 0));
1624 find_interesting_uses_op (data, rhs);
1625 return;
1626 }
2f4675b4
ZD
1627
1628 /* TODO -- we should also handle address uses of type
1629
1630 memory = call (whatever);
1631
1632 and
1633
1634 call (memory). */
8b11a64c
ZD
1635 }
1636
1637 if (TREE_CODE (stmt) == PHI_NODE
1638 && bb_for_stmt (stmt) == data->current_loop->header)
1639 {
1640 lhs = PHI_RESULT (stmt);
1641 iv = get_iv (data, lhs);
1642
1643 if (iv && !zero_p (iv->step))
1644 return;
1645 }
1646
1647 if (TREE_CODE (stmt) == PHI_NODE)
1648 n = PHI_NUM_ARGS (stmt);
1649 else
1650 {
1651 uses = STMT_USE_OPS (stmt);
1652 n = NUM_USES (uses);
1653 }
1654
1655 for (i = 0; i < n; i++)
1656 {
1657 if (TREE_CODE (stmt) == PHI_NODE)
1658 op = PHI_ARG_DEF (stmt, i);
1659 else
1660 op = USE_OP (uses, i);
1661
1662 if (TREE_CODE (op) != SSA_NAME)
1663 continue;
1664
1665 iv = get_iv (data, op);
1666 if (!iv)
1667 continue;
1668
1669 find_interesting_uses_op (data, op);
1670 }
1671}
1672
1673/* Finds interesting uses of induction variables outside of loops
1674 on loop exit edge EXIT. */
1675
1676static void
1677find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1678{
1679 tree phi, def;
1680
bb29d951 1681 for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
8b11a64c
ZD
1682 {
1683 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1684 find_interesting_uses_outer (data, def);
1685 }
1686}
1687
1688/* Finds uses of the induction variables that are interesting. */
1689
1690static void
1691find_interesting_uses (struct ivopts_data *data)
1692{
1693 basic_block bb;
1694 block_stmt_iterator bsi;
1695 tree phi;
1696 basic_block *body = get_loop_body (data->current_loop);
1697 unsigned i;
1698 struct version_info *info;
1699 edge e;
1700
1701 if (dump_file && (dump_flags & TDF_DETAILS))
1702 fprintf (dump_file, "Uses:\n\n");
1703
1704 for (i = 0; i < data->current_loop->num_nodes; i++)
1705 {
628f6a4e 1706 edge_iterator ei;
8b11a64c
ZD
1707 bb = body[i];
1708
628f6a4e 1709 FOR_EACH_EDGE (e, ei, bb->succs)
8b11a64c
ZD
1710 if (e->dest != EXIT_BLOCK_PTR
1711 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1712 find_interesting_uses_outside (data, e);
1713
bb29d951 1714 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
8b11a64c
ZD
1715 find_interesting_uses_stmt (data, phi);
1716 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1717 find_interesting_uses_stmt (data, bsi_stmt (bsi));
1718 }
1719
1720 if (dump_file && (dump_flags & TDF_DETAILS))
1721 {
87c476a2
ZD
1722 bitmap_iterator bi;
1723
8b11a64c
ZD
1724 fprintf (dump_file, "\n");
1725
87c476a2 1726 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
8b11a64c
ZD
1727 {
1728 info = ver_info (data, i);
1729 if (info->inv_id)
1730 {
1731 fprintf (dump_file, " ");
1732 print_generic_expr (dump_file, info->name, TDF_SLIM);
1733 fprintf (dump_file, " is invariant (%d)%s\n",
1734 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1735 }
87c476a2 1736 }
8b11a64c
ZD
1737
1738 fprintf (dump_file, "\n");
1739 }
1740
1741 free (body);
1742}
1743
f5e2738c
ZD
1744/* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
1745 is true, assume we are inside an address. */
1746
1747static tree
1748strip_offset (tree expr, bool inside_addr, unsigned HOST_WIDE_INT *offset)
1749{
1750 tree op0 = NULL_TREE, op1 = NULL_TREE, step;
1751 enum tree_code code;
1752 tree type, orig_type = TREE_TYPE (expr);
1753 unsigned HOST_WIDE_INT off0, off1, st;
1754 tree orig_expr = expr;
1755
1756 STRIP_NOPS (expr);
1757 type = TREE_TYPE (expr);
1758 code = TREE_CODE (expr);
1759 *offset = 0;
1760
1761 switch (code)
1762 {
1763 case INTEGER_CST:
1764 if (!cst_and_fits_in_hwi (expr)
1765 || zero_p (expr))
1766 return orig_expr;
1767
1768 *offset = int_cst_value (expr);
1769 return build_int_cst_type (orig_type, 0);
1770
1771 case PLUS_EXPR:
1772 case MINUS_EXPR:
1773 op0 = TREE_OPERAND (expr, 0);
1774 op1 = TREE_OPERAND (expr, 1);
1775
1776 op0 = strip_offset (op0, false, &off0);
1777 op1 = strip_offset (op1, false, &off1);
1778
1779 *offset = (code == PLUS_EXPR ? off0 + off1 : off0 - off1);
1780 if (op0 == TREE_OPERAND (expr, 0)
1781 && op1 == TREE_OPERAND (expr, 1))
1782 return orig_expr;
1783
1784 if (zero_p (op1))
1785 expr = op0;
1786 else if (zero_p (op0))
1787 {
1788 if (code == PLUS_EXPR)
1789 expr = op1;
1790 else
1791 expr = build1 (NEGATE_EXPR, type, op1);
1792 }
1793 else
1794 expr = build2 (code, type, op0, op1);
1795
1796 return fold_convert (orig_type, expr);
1797
1798 case ARRAY_REF:
1799 if (!inside_addr)
1800 return orig_expr;
1801
1802 step = array_ref_element_size (expr);
1803 if (!cst_and_fits_in_hwi (step))
1804 break;
1805
1806 st = int_cst_value (step);
1807 op1 = TREE_OPERAND (expr, 1);
1808 op1 = strip_offset (op1, false, &off1);
1809 *offset = off1 * st;
1810 break;
1811
1812 case COMPONENT_REF:
1813 if (!inside_addr)
1814 return orig_expr;
1815 break;
1816
1817 case ADDR_EXPR:
1818 inside_addr = true;
1819 break;
1820
1821 default:
1822 return orig_expr;
1823 }
1824
1825 /* Default handling of expressions for that we want to recurse into
1826 the first operand. */
1827 op0 = TREE_OPERAND (expr, 0);
1828 op0 = strip_offset (op0, inside_addr, &off0);
1829 *offset += off0;
1830
1831 if (op0 == TREE_OPERAND (expr, 0)
1832 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
1833 return orig_expr;
1834
1835 expr = copy_node (expr);
1836 TREE_OPERAND (expr, 0) = op0;
1837 if (op1)
1838 TREE_OPERAND (expr, 1) = op1;
1839
1840 return fold_convert (orig_type, expr);
1841}
1842
d482f417
ZD
1843/* Returns variant of TYPE that can be used as base for different uses.
1844 For integer types, we return unsigned variant of the type, which
1845 avoids problems with overflows. For pointer types, we return void *. */
1846
1847static tree
1848generic_type_for (tree type)
1849{
1850 if (POINTER_TYPE_P (type))
1851 return ptr_type_node;
1852
1853 if (TYPE_UNSIGNED (type))
1854 return type;
1855
1856 return unsigned_type_for (type);
1857}
1858
8b11a64c
ZD
1859/* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
1860 position to POS. If USE is not NULL, the candidate is set as related to
1861 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
1862 replacement of the final value of the iv by a direct computation. */
1863
1864static struct iv_cand *
1865add_candidate_1 (struct ivopts_data *data,
1866 tree base, tree step, bool important, enum iv_position pos,
1867 struct iv_use *use, tree incremented_at)
1868{
1869 unsigned i;
1870 struct iv_cand *cand = NULL;
d482f417 1871 tree type, orig_type;
8b11a64c
ZD
1872
1873 if (base)
1874 {
d482f417
ZD
1875 orig_type = TREE_TYPE (base);
1876 type = generic_type_for (orig_type);
1877 if (type != orig_type)
8b11a64c 1878 {
8b11a64c
ZD
1879 base = fold_convert (type, base);
1880 if (step)
1881 step = fold_convert (type, step);
1882 }
1883 }
1884
1885 for (i = 0; i < n_iv_cands (data); i++)
1886 {
1887 cand = iv_cand (data, i);
1888
1889 if (cand->pos != pos)
1890 continue;
1891
1892 if (cand->incremented_at != incremented_at)
1893 continue;
1894
1895 if (!cand->iv)
1896 {
1897 if (!base && !step)
1898 break;
1899
1900 continue;
1901 }
1902
1903 if (!base && !step)
1904 continue;
1905
1906 if (!operand_equal_p (base, cand->iv->base, 0))
1907 continue;
1908
1909 if (zero_p (cand->iv->step))
1910 {
1911 if (zero_p (step))
1912 break;
1913 }
1914 else
1915 {
1916 if (step && operand_equal_p (step, cand->iv->step, 0))
1917 break;
1918 }
1919 }
1920
1921 if (i == n_iv_cands (data))
1922 {
1923 cand = xcalloc (1, sizeof (struct iv_cand));
1924 cand->id = i;
1925
1926 if (!base && !step)
1927 cand->iv = NULL;
1928 else
1929 cand->iv = alloc_iv (base, step);
1930
1931 cand->pos = pos;
1932 if (pos != IP_ORIGINAL && cand->iv)
1933 {
1934 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
1935 cand->var_after = cand->var_before;
1936 }
1937 cand->important = important;
1938 cand->incremented_at = incremented_at;
1939 VARRAY_PUSH_GENERIC_PTR_NOGC (data->iv_candidates, cand);
1940
1941 if (dump_file && (dump_flags & TDF_DETAILS))
1942 dump_cand (dump_file, cand);
1943 }
1944
1945 if (important && !cand->important)
1946 {
1947 cand->important = true;
1948 if (dump_file && (dump_flags & TDF_DETAILS))
1949 fprintf (dump_file, "Candidate %d is important\n", cand->id);
1950 }
1951
1952 if (use)
1953 {
1954 bitmap_set_bit (use->related_cands, i);
1955 if (dump_file && (dump_flags & TDF_DETAILS))
1956 fprintf (dump_file, "Candidate %d is related to use %d\n",
1957 cand->id, use->id);
1958 }
1959
1960 return cand;
1961}
1962
4366cf6d
ZD
1963/* Returns true if incrementing the induction variable at the end of the LOOP
1964 is allowed.
1965
1966 The purpose is to avoid splitting latch edge with a biv increment, thus
1967 creating a jump, possibly confusing other optimization passes and leaving
1968 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
1969 is not available (so we do not have a better alternative), or if the latch
1970 edge is already nonempty. */
1971
1972static bool
1973allow_ip_end_pos_p (struct loop *loop)
1974{
1975 if (!ip_normal_pos (loop))
1976 return true;
1977
1978 if (!empty_block_p (ip_end_pos (loop)))
1979 return true;
1980
1981 return false;
1982}
1983
8b11a64c
ZD
1984/* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
1985 position to POS. If USE is not NULL, the candidate is set as related to
1986 it. The candidate computation is scheduled on all available positions. */
1987
1988static void
1989add_candidate (struct ivopts_data *data,
1990 tree base, tree step, bool important, struct iv_use *use)
1991{
1992 if (ip_normal_pos (data->current_loop))
1993 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL_TREE);
4366cf6d
ZD
1994 if (ip_end_pos (data->current_loop)
1995 && allow_ip_end_pos_p (data->current_loop))
8b11a64c
ZD
1996 add_candidate_1 (data, base, step, important, IP_END, use, NULL_TREE);
1997}
1998
39b4020c
RS
1999/* Add a standard "0 + 1 * iteration" iv candidate for a
2000 type with SIZE bits. */
2001
2002static void
2003add_standard_iv_candidates_for_size (struct ivopts_data *data,
2004 unsigned int size)
2005{
2006 tree type = lang_hooks.types.type_for_size (size, true);
2007 add_candidate (data, build_int_cst (type, 0), build_int_cst (type, 1),
2008 true, NULL);
2009}
2010
8b11a64c
ZD
2011/* Adds standard iv candidates. */
2012
2013static void
2014add_standard_iv_candidates (struct ivopts_data *data)
2015{
39b4020c 2016 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE);
8b11a64c 2017
39b4020c
RS
2018 /* The same for a double-integer type if it is still fast enough. */
2019 if (BITS_PER_WORD >= INT_TYPE_SIZE * 2)
2020 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE * 2);
8b11a64c
ZD
2021}
2022
2023
2024/* Adds candidates bases on the old induction variable IV. */
2025
2026static void
2027add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2028{
2029 tree phi, def;
2030 struct iv_cand *cand;
2031
2032 add_candidate (data, iv->base, iv->step, true, NULL);
2033
2034 /* The same, but with initial value zero. */
2035 add_candidate (data,
5212068f 2036 build_int_cst (TREE_TYPE (iv->base), 0),
8b11a64c
ZD
2037 iv->step, true, NULL);
2038
2039 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2040 if (TREE_CODE (phi) == PHI_NODE)
2041 {
2042 /* Additionally record the possibility of leaving the original iv
2043 untouched. */
2044 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2045 cand = add_candidate_1 (data,
2046 iv->base, iv->step, true, IP_ORIGINAL, NULL,
2047 SSA_NAME_DEF_STMT (def));
2048 cand->var_before = iv->ssa_name;
2049 cand->var_after = def;
2050 }
2051}
2052
2053/* Adds candidates based on the old induction variables. */
2054
2055static void
2056add_old_ivs_candidates (struct ivopts_data *data)
2057{
2058 unsigned i;
2059 struct iv *iv;
87c476a2 2060 bitmap_iterator bi;
8b11a64c 2061
87c476a2 2062 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
8b11a64c
ZD
2063 {
2064 iv = ver_info (data, i)->iv;
2065 if (iv && iv->biv_p && !zero_p (iv->step))
2066 add_old_iv_candidates (data, iv);
87c476a2 2067 }
8b11a64c
ZD
2068}
2069
2070/* Adds candidates based on the value of the induction variable IV and USE. */
2071
2072static void
2073add_iv_value_candidates (struct ivopts_data *data,
2074 struct iv *iv, struct iv_use *use)
2075{
2076 add_candidate (data, iv->base, iv->step, false, use);
2077
2078 /* The same, but with initial value zero. */
5212068f 2079 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
8b11a64c
ZD
2080 iv->step, false, use);
2081}
2082
2083/* Adds candidates based on the address IV and USE. */
2084
2085static void
2086add_address_candidates (struct ivopts_data *data,
2087 struct iv *iv, struct iv_use *use)
2088{
f5e2738c
ZD
2089 tree base, abase;
2090 unsigned HOST_WIDE_INT offset;
8b11a64c
ZD
2091
2092 /* First, the trivial choices. */
2093 add_iv_value_candidates (data, iv, use);
2094
2095 /* Second, try removing the COMPONENT_REFs. */
2096 if (TREE_CODE (iv->base) == ADDR_EXPR)
2097 {
2098 base = TREE_OPERAND (iv->base, 0);
2099 while (TREE_CODE (base) == COMPONENT_REF
2100 || (TREE_CODE (base) == ARRAY_REF
2101 && TREE_CODE (TREE_OPERAND (base, 1)) == INTEGER_CST))
2102 base = TREE_OPERAND (base, 0);
2103
2104 if (base != TREE_OPERAND (iv->base, 0))
2105 {
be35cf60
ZD
2106 gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
2107 gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
2108
2109 if (TREE_CODE (base) == INDIRECT_REF)
8b11a64c
ZD
2110 base = TREE_OPERAND (base, 0);
2111 else
2112 base = build_addr (base);
2113 add_candidate (data, base, iv->step, false, use);
2114 }
2115 }
2116
2117 /* Third, try removing the constant offset. */
2118 abase = iv->base;
f5e2738c
ZD
2119 base = strip_offset (abase, false, &offset);
2120 if (offset)
2121 add_candidate (data, base, iv->step, false, use);
8b11a64c
ZD
2122}
2123
2124/* Possibly adds pseudocandidate for replacing the final value of USE by
2125 a direct computation. */
2126
2127static void
2128add_iv_outer_candidates (struct ivopts_data *data, struct iv_use *use)
2129{
2130 struct tree_niter_desc *niter;
8b11a64c
ZD
2131
2132 /* We must know where we exit the loop and how many times does it roll. */
ca4c3169
ZD
2133 niter = niter_for_single_dom_exit (data);
2134 if (!niter
2135 || !zero_p (niter->may_be_zero))
8b11a64c
ZD
2136 return;
2137
2138 add_candidate_1 (data, NULL, NULL, false, IP_NORMAL, use, NULL_TREE);
2139}
2140
2141/* Adds candidates based on the uses. */
2142
2143static void
2144add_derived_ivs_candidates (struct ivopts_data *data)
2145{
2146 unsigned i;
2147
2148 for (i = 0; i < n_iv_uses (data); i++)
2149 {
2150 struct iv_use *use = iv_use (data, i);
2151
2152 if (!use)
2153 continue;
2154
2155 switch (use->type)
2156 {
2157 case USE_NONLINEAR_EXPR:
2158 case USE_COMPARE:
2159 /* Just add the ivs based on the value of the iv used here. */
2160 add_iv_value_candidates (data, use->iv, use);
2161 break;
2162
2163 case USE_OUTER:
2164 add_iv_value_candidates (data, use->iv, use);
2165
2166 /* Additionally, add the pseudocandidate for the possibility to
2167 replace the final value by a direct computation. */
2168 add_iv_outer_candidates (data, use);
2169 break;
2170
2171 case USE_ADDRESS:
2172 add_address_candidates (data, use->iv, use);
2173 break;
2174
2175 default:
1e128c5f 2176 gcc_unreachable ();
8b11a64c
ZD
2177 }
2178 }
2179}
2180
b1b02be2
ZD
2181/* Record important candidates and add them to related_cands bitmaps
2182 if needed. */
2183
2184static void
2185record_important_candidates (struct ivopts_data *data)
2186{
2187 unsigned i;
2188 struct iv_use *use;
2189
2190 for (i = 0; i < n_iv_cands (data); i++)
2191 {
2192 struct iv_cand *cand = iv_cand (data, i);
2193
2194 if (cand->important)
2195 bitmap_set_bit (data->important_candidates, i);
2196 }
2197
2198 data->consider_all_candidates = (n_iv_cands (data)
2199 <= CONSIDER_ALL_CANDIDATES_BOUND);
2200
2201 if (data->consider_all_candidates)
2202 {
2203 /* We will not need "related_cands" bitmaps in this case,
2204 so release them to decrease peak memory consumption. */
2205 for (i = 0; i < n_iv_uses (data); i++)
2206 {
2207 use = iv_use (data, i);
8bdbfff5 2208 BITMAP_FREE (use->related_cands);
b1b02be2
ZD
2209 }
2210 }
2211 else
2212 {
2213 /* Add important candidates to the related_cands bitmaps. */
2214 for (i = 0; i < n_iv_uses (data); i++)
2215 bitmap_ior_into (iv_use (data, i)->related_cands,
2216 data->important_candidates);
2217 }
2218}
2219
8b11a64c
ZD
2220/* Finds the candidates for the induction variables. */
2221
2222static void
2223find_iv_candidates (struct ivopts_data *data)
2224{
2225 /* Add commonly used ivs. */
2226 add_standard_iv_candidates (data);
2227
2228 /* Add old induction variables. */
2229 add_old_ivs_candidates (data);
2230
2231 /* Add induction variables derived from uses. */
2232 add_derived_ivs_candidates (data);
b1b02be2
ZD
2233
2234 /* Record the important candidates. */
2235 record_important_candidates (data);
8b11a64c
ZD
2236}
2237
2238/* Allocates the data structure mapping the (use, candidate) pairs to costs.
2239 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2240 we allocate a simple list to every use. */
2241
2242static void
2243alloc_use_cost_map (struct ivopts_data *data)
2244{
b1b02be2 2245 unsigned i, size, s, j;
8b11a64c
ZD
2246
2247 for (i = 0; i < n_iv_uses (data); i++)
2248 {
2249 struct iv_use *use = iv_use (data, i);
87c476a2 2250 bitmap_iterator bi;
8b11a64c
ZD
2251
2252 if (data->consider_all_candidates)
b1b02be2 2253 size = n_iv_cands (data);
8b11a64c
ZD
2254 else
2255 {
b1b02be2 2256 s = 0;
87c476a2
ZD
2257 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2258 {
b1b02be2 2259 s++;
87c476a2 2260 }
b1b02be2
ZD
2261
2262 /* Round up to the power of two, so that moduling by it is fast. */
2263 for (size = 1; size < s; size <<= 1)
2264 continue;
8b11a64c
ZD
2265 }
2266
b1b02be2 2267 use->n_map_members = size;
8b11a64c
ZD
2268 use->cost_map = xcalloc (size, sizeof (struct cost_pair));
2269 }
2270}
2271
2272/* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2273 on invariants DEPENDS_ON. */
2274
2275static void
2276set_use_iv_cost (struct ivopts_data *data,
2277 struct iv_use *use, struct iv_cand *cand, unsigned cost,
2278 bitmap depends_on)
2279{
b1b02be2
ZD
2280 unsigned i, s;
2281
2282 if (cost == INFTY)
8b11a64c 2283 {
8bdbfff5 2284 BITMAP_FREE (depends_on);
b1b02be2 2285 return;
8b11a64c
ZD
2286 }
2287
2288 if (data->consider_all_candidates)
2289 {
2290 use->cost_map[cand->id].cand = cand;
2291 use->cost_map[cand->id].cost = cost;
2292 use->cost_map[cand->id].depends_on = depends_on;
2293 return;
2294 }
2295
b1b02be2
ZD
2296 /* n_map_members is a power of two, so this computes modulo. */
2297 s = cand->id & (use->n_map_members - 1);
2298 for (i = s; i < use->n_map_members; i++)
2299 if (!use->cost_map[i].cand)
2300 goto found;
2301 for (i = 0; i < s; i++)
2302 if (!use->cost_map[i].cand)
2303 goto found;
2304
2305 gcc_unreachable ();
8b11a64c 2306
b1b02be2
ZD
2307found:
2308 use->cost_map[i].cand = cand;
2309 use->cost_map[i].cost = cost;
2310 use->cost_map[i].depends_on = depends_on;
8b11a64c
ZD
2311}
2312
b1b02be2 2313/* Gets cost of (USE, CANDIDATE) pair. */
8b11a64c 2314
b1b02be2
ZD
2315static struct cost_pair *
2316get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2317 struct iv_cand *cand)
8b11a64c 2318{
b1b02be2
ZD
2319 unsigned i, s;
2320 struct cost_pair *ret;
8b11a64c
ZD
2321
2322 if (!cand)
b1b02be2 2323 return NULL;
8b11a64c
ZD
2324
2325 if (data->consider_all_candidates)
8b11a64c 2326 {
b1b02be2
ZD
2327 ret = use->cost_map + cand->id;
2328 if (!ret->cand)
2329 return NULL;
8b11a64c 2330
b1b02be2 2331 return ret;
8b11a64c 2332 }
b1b02be2
ZD
2333
2334 /* n_map_members is a power of two, so this computes modulo. */
2335 s = cand->id & (use->n_map_members - 1);
2336 for (i = s; i < use->n_map_members; i++)
2337 if (use->cost_map[i].cand == cand)
2338 return use->cost_map + i;
8b11a64c 2339
b1b02be2
ZD
2340 for (i = 0; i < s; i++)
2341 if (use->cost_map[i].cand == cand)
2342 return use->cost_map + i;
2343
2344 return NULL;
8b11a64c
ZD
2345}
2346
2347/* Returns estimate on cost of computing SEQ. */
2348
2349static unsigned
2350seq_cost (rtx seq)
2351{
2352 unsigned cost = 0;
2353 rtx set;
2354
2355 for (; seq; seq = NEXT_INSN (seq))
2356 {
2357 set = single_set (seq);
2358 if (set)
2359 cost += rtx_cost (set, SET);
2360 else
2361 cost++;
2362 }
2363
2364 return cost;
2365}
2366
8679c649
JH
2367/* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2368static rtx
2369produce_memory_decl_rtl (tree obj, int *regno)
2370{
2371 rtx x;
e16acfcd
NS
2372
2373 gcc_assert (obj);
8679c649
JH
2374 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2375 {
2376 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2377 x = gen_rtx_SYMBOL_REF (Pmode, name);
2378 }
2379 else
2380 x = gen_raw_REG (Pmode, (*regno)++);
2381
2382 return gen_rtx_MEM (DECL_MODE (obj), x);
2383}
2384
8b11a64c
ZD
2385/* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2386 walk_tree. DATA contains the actual fake register number. */
2387
2388static tree
2389prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2390{
2391 tree obj = NULL_TREE;
2392 rtx x = NULL_RTX;
2393 int *regno = data;
2394
2395 switch (TREE_CODE (*expr_p))
2396 {
8679c649
JH
2397 case ADDR_EXPR:
2398 for (expr_p = &TREE_OPERAND (*expr_p, 0);
afe84921
RH
2399 handled_component_p (*expr_p);
2400 expr_p = &TREE_OPERAND (*expr_p, 0))
2401 continue;
8679c649
JH
2402 obj = *expr_p;
2403 if (DECL_P (obj))
2404 x = produce_memory_decl_rtl (obj, regno);
2405 break;
2406
8b11a64c
ZD
2407 case SSA_NAME:
2408 *ws = 0;
2409 obj = SSA_NAME_VAR (*expr_p);
2410 if (!DECL_RTL_SET_P (obj))
2411 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2412 break;
2413
2414 case VAR_DECL:
2415 case PARM_DECL:
2416 case RESULT_DECL:
2417 *ws = 0;
2418 obj = *expr_p;
2419
2420 if (DECL_RTL_SET_P (obj))
2421 break;
2422
2423 if (DECL_MODE (obj) == BLKmode)
8679c649 2424 x = produce_memory_decl_rtl (obj, regno);
8b11a64c
ZD
2425 else
2426 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2427
2428 break;
2429
2430 default:
2431 break;
2432 }
2433
2434 if (x)
2435 {
2436 VARRAY_PUSH_GENERIC_PTR_NOGC (decl_rtl_to_reset, obj);
2437 SET_DECL_RTL (obj, x);
2438 }
2439
2440 return NULL_TREE;
2441}
2442
2443/* Determines cost of the computation of EXPR. */
2444
2445static unsigned
2446computation_cost (tree expr)
2447{
2448 rtx seq, rslt;
2449 tree type = TREE_TYPE (expr);
2450 unsigned cost;
1d27fed4
DD
2451 /* Avoid using hard regs in ways which may be unsupported. */
2452 int regno = LAST_VIRTUAL_REGISTER + 1;
8b11a64c
ZD
2453
2454 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2455 start_sequence ();
2456 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2457 seq = get_insns ();
2458 end_sequence ();
2459
2460 cost = seq_cost (seq);
2461 if (GET_CODE (rslt) == MEM)
2462 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type));
2463
2464 return cost;
2465}
2466
2467/* Returns variable containing the value of candidate CAND at statement AT. */
2468
2469static tree
2470var_at_stmt (struct loop *loop, struct iv_cand *cand, tree stmt)
2471{
2472 if (stmt_after_increment (loop, cand, stmt))
2473 return cand->var_after;
2474 else
2475 return cand->var_before;
2476}
2477
2478/* Determines the expression by that USE is expressed from induction variable
2479 CAND at statement AT in LOOP. */
2480
2481static tree
2482get_computation_at (struct loop *loop,
2483 struct iv_use *use, struct iv_cand *cand, tree at)
2484{
2f4675b4
ZD
2485 tree ubase = use->iv->base;
2486 tree ustep = use->iv->step;
2487 tree cbase = cand->iv->base;
2488 tree cstep = cand->iv->step;
8b11a64c
ZD
2489 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2490 tree uutype;
2491 tree expr, delta;
2492 tree ratio;
2493 unsigned HOST_WIDE_INT ustepi, cstepi;
2494 HOST_WIDE_INT ratioi;
2495
2496 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2497 {
2498 /* We do not have a precision to express the values of use. */
2499 return NULL_TREE;
2500 }
2501
2502 expr = var_at_stmt (loop, cand, at);
2503
2504 if (TREE_TYPE (expr) != ctype)
2505 {
2506 /* This may happen with the original ivs. */
2507 expr = fold_convert (ctype, expr);
2508 }
2509
2510 if (TYPE_UNSIGNED (utype))
2511 uutype = utype;
2512 else
2513 {
2514 uutype = unsigned_type_for (utype);
2515 ubase = fold_convert (uutype, ubase);
2516 ustep = fold_convert (uutype, ustep);
2517 }
2518
2519 if (uutype != ctype)
2520 {
2521 expr = fold_convert (uutype, expr);
2522 cbase = fold_convert (uutype, cbase);
2523 cstep = fold_convert (uutype, cstep);
2524 }
2525
2526 if (!cst_and_fits_in_hwi (cstep)
2527 || !cst_and_fits_in_hwi (ustep))
2528 return NULL_TREE;
2529
2530 ustepi = int_cst_value (ustep);
2531 cstepi = int_cst_value (cstep);
2532
2533 if (!divide (TYPE_PRECISION (uutype), ustepi, cstepi, &ratioi))
2534 {
2535 /* TODO maybe consider case when ustep divides cstep and the ratio is
2536 a power of 2 (so that the division is fast to execute)? We would
2537 need to be much more careful with overflows etc. then. */
2538 return NULL_TREE;
2539 }
2540
2541 /* We may need to shift the value if we are after the increment. */
2542 if (stmt_after_increment (loop, cand, at))
2543 cbase = fold (build2 (PLUS_EXPR, uutype, cbase, cstep));
2544
b0a168b6
ZD
2545 /* use = ubase - ratio * cbase + ratio * var.
2546
2547 In general case ubase + ratio * (var - cbase) could be better (one less
2548 multiplication), but often it is possible to eliminate redundant parts
2549 of computations from (ubase - ratio * cbase) term, and if it does not
2550 happen, fold is able to apply the distributive law to obtain this form
2551 anyway. */
8b11a64c
ZD
2552
2553 if (ratioi == 1)
2554 {
2555 delta = fold (build2 (MINUS_EXPR, uutype, ubase, cbase));
2556 expr = fold (build2 (PLUS_EXPR, uutype, expr, delta));
2557 }
2558 else if (ratioi == -1)
2559 {
2560 delta = fold (build2 (PLUS_EXPR, uutype, ubase, cbase));
2561 expr = fold (build2 (MINUS_EXPR, uutype, delta, expr));
2562 }
b0a168b6 2563 else
8b11a64c
ZD
2564 {
2565 ratio = build_int_cst_type (uutype, ratioi);
2566 delta = fold (build2 (MULT_EXPR, uutype, ratio, cbase));
2567 delta = fold (build2 (MINUS_EXPR, uutype, ubase, delta));
2568 expr = fold (build2 (MULT_EXPR, uutype, ratio, expr));
2569 expr = fold (build2 (PLUS_EXPR, uutype, delta, expr));
2570 }
8b11a64c
ZD
2571
2572 return fold_convert (utype, expr);
2573}
2574
2575/* Determines the expression by that USE is expressed from induction variable
2576 CAND in LOOP. */
2577
2578static tree
2579get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
2580{
2581 return get_computation_at (loop, use, cand, use->stmt);
2582}
2583
8b11a64c
ZD
2584/* Returns cost of addition in MODE. */
2585
2586static unsigned
2587add_cost (enum machine_mode mode)
2588{
2589 static unsigned costs[NUM_MACHINE_MODES];
2590 rtx seq;
2591 unsigned cost;
2592
2593 if (costs[mode])
2594 return costs[mode];
2595
2596 start_sequence ();
2597 force_operand (gen_rtx_fmt_ee (PLUS, mode,
2598 gen_raw_REG (mode, FIRST_PSEUDO_REGISTER),
2599 gen_raw_REG (mode, FIRST_PSEUDO_REGISTER + 1)),
2600 NULL_RTX);
2601 seq = get_insns ();
2602 end_sequence ();
2603
2604 cost = seq_cost (seq);
2605 if (!cost)
2606 cost = 1;
2607
2608 costs[mode] = cost;
2609
2610 if (dump_file && (dump_flags & TDF_DETAILS))
2611 fprintf (dump_file, "Addition in %s costs %d\n",
2612 GET_MODE_NAME (mode), cost);
2613 return cost;
2614}
2615
2616/* Entry in a hashtable of already known costs for multiplication. */
2617struct mbc_entry
2618{
2619 HOST_WIDE_INT cst; /* The constant to multiply by. */
2620 enum machine_mode mode; /* In mode. */
2621 unsigned cost; /* The cost. */
2622};
2623
2624/* Counts hash value for the ENTRY. */
2625
2626static hashval_t
2627mbc_entry_hash (const void *entry)
2628{
2629 const struct mbc_entry *e = entry;
2630
2631 return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
2632}
2633
2634/* Compares the hash table entries ENTRY1 and ENTRY2. */
2635
2636static int
2637mbc_entry_eq (const void *entry1, const void *entry2)
2638{
2639 const struct mbc_entry *e1 = entry1;
2640 const struct mbc_entry *e2 = entry2;
2641
2642 return (e1->mode == e2->mode
2643 && e1->cst == e2->cst);
2644}
2645
2646/* Returns cost of multiplication by constant CST in MODE. */
2647
2648static unsigned
2649multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode)
2650{
2651 static htab_t costs;
2652 struct mbc_entry **cached, act;
2653 rtx seq;
2654 unsigned cost;
2655
2656 if (!costs)
2657 costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
2658
2659 act.mode = mode;
2660 act.cst = cst;
2661 cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
2662 if (*cached)
2663 return (*cached)->cost;
2664
2665 *cached = xmalloc (sizeof (struct mbc_entry));
2666 (*cached)->mode = mode;
2667 (*cached)->cst = cst;
2668
2669 start_sequence ();
2670 expand_mult (mode, gen_raw_REG (mode, FIRST_PSEUDO_REGISTER), GEN_INT (cst),
2671 NULL_RTX, 0);
2672 seq = get_insns ();
2673 end_sequence ();
2674
2675 cost = seq_cost (seq);
2676
2677 if (dump_file && (dump_flags & TDF_DETAILS))
2678 fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
2679 (int) cst, GET_MODE_NAME (mode), cost);
2680
2681 (*cached)->cost = cost;
2682
2683 return cost;
2684}
2685
2686/* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
2687 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
2688 variable is omitted. The created memory accesses MODE.
2689
2690 TODO -- there must be some better way. This all is quite crude. */
2691
2692static unsigned
2693get_address_cost (bool symbol_present, bool var_present,
2694 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio)
2695{
2696#define MAX_RATIO 128
2697 static sbitmap valid_mult;
2698 static HOST_WIDE_INT rat, off;
2699 static HOST_WIDE_INT min_offset, max_offset;
2700 static unsigned costs[2][2][2][2];
2701 unsigned cost, acost;
2702 rtx seq, addr, base;
2703 bool offset_p, ratio_p;
2704 rtx reg1;
2705 HOST_WIDE_INT s_offset;
2706 unsigned HOST_WIDE_INT mask;
2707 unsigned bits;
2708
2709 if (!valid_mult)
2710 {
2711 HOST_WIDE_INT i;
2712
2713 reg1 = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER);
2714
2715 addr = gen_rtx_fmt_ee (PLUS, Pmode, reg1, NULL_RTX);
2716 for (i = 1; i <= 1 << 20; i <<= 1)
2717 {
2718 XEXP (addr, 1) = GEN_INT (i);
2719 if (!memory_address_p (Pmode, addr))
2720 break;
2721 }
2722 max_offset = i >> 1;
2723 off = max_offset;
2724
2725 for (i = 1; i <= 1 << 20; i <<= 1)
2726 {
2727 XEXP (addr, 1) = GEN_INT (-i);
2728 if (!memory_address_p (Pmode, addr))
2729 break;
2730 }
2731 min_offset = -(i >> 1);
2732
2733 if (dump_file && (dump_flags & TDF_DETAILS))
2734 {
2735 fprintf (dump_file, "get_address_cost:\n");
2736 fprintf (dump_file, " min offset %d\n", (int) min_offset);
2737 fprintf (dump_file, " max offset %d\n", (int) max_offset);
2738 }
2739
2740 valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
2741 sbitmap_zero (valid_mult);
2742 rat = 1;
2743 addr = gen_rtx_fmt_ee (MULT, Pmode, reg1, NULL_RTX);
2744 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2745 {
2746 XEXP (addr, 1) = GEN_INT (i);
2747 if (memory_address_p (Pmode, addr))
2748 {
2749 SET_BIT (valid_mult, i + MAX_RATIO);
2750 rat = i;
2751 }
2752 }
2753
2754 if (dump_file && (dump_flags & TDF_DETAILS))
2755 {
2756 fprintf (dump_file, " allowed multipliers:");
2757 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2758 if (TEST_BIT (valid_mult, i + MAX_RATIO))
2759 fprintf (dump_file, " %d", (int) i);
2760 fprintf (dump_file, "\n");
2761 fprintf (dump_file, "\n");
2762 }
2763 }
2764
2765 bits = GET_MODE_BITSIZE (Pmode);
2766 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
2767 offset &= mask;
2768 if ((offset >> (bits - 1) & 1))
2769 offset |= ~mask;
2770 s_offset = offset;
2771
2772 cost = 0;
a85a9024
UW
2773 offset_p = (s_offset != 0
2774 && min_offset <= s_offset && s_offset <= max_offset);
8b11a64c
ZD
2775 ratio_p = (ratio != 1
2776 && -MAX_RATIO <= ratio && ratio <= MAX_RATIO
2777 && TEST_BIT (valid_mult, ratio + MAX_RATIO));
2778
2779 if (ratio != 1 && !ratio_p)
2780 cost += multiply_by_cost (ratio, Pmode);
2781
2782 if (s_offset && !offset_p && !symbol_present)
2783 {
2784 cost += add_cost (Pmode);
2785 var_present = true;
2786 }
2787
2788 acost = costs[symbol_present][var_present][offset_p][ratio_p];
2789 if (!acost)
2790 {
2791 acost = 0;
2792
2793 addr = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER);
2794 reg1 = gen_raw_REG (Pmode, FIRST_PSEUDO_REGISTER + 1);
2795 if (ratio_p)
2796 addr = gen_rtx_fmt_ee (MULT, Pmode, addr, GEN_INT (rat));
2797
a85a9024 2798 if (var_present)
7299dbfb 2799 addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, reg1);
a85a9024 2800
8b11a64c
ZD
2801 if (symbol_present)
2802 {
2803 base = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (""));
2804 if (offset_p)
2805 base = gen_rtx_fmt_e (CONST, Pmode,
2806 gen_rtx_fmt_ee (PLUS, Pmode,
2807 base,
2808 GEN_INT (off)));
8b11a64c
ZD
2809 }
2810 else if (offset_p)
2811 base = GEN_INT (off);
2812 else
2813 base = NULL_RTX;
2814
2815 if (base)
7299dbfb 2816 addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, base);
8b11a64c
ZD
2817
2818 start_sequence ();
2819 addr = memory_address (Pmode, addr);
2820 seq = get_insns ();
2821 end_sequence ();
2822
2823 acost = seq_cost (seq);
2824 acost += address_cost (addr, Pmode);
2825
2826 if (!acost)
2827 acost = 1;
2828 costs[symbol_present][var_present][offset_p][ratio_p] = acost;
2829 }
2830
2831 return cost + acost;
2832}
2833
2834/* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2835 the bitmap to that we should store it. */
2836
2837static struct ivopts_data *fd_ivopts_data;
2838static tree
2839find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2840{
2841 bitmap *depends_on = data;
2842 struct version_info *info;
2843
2844 if (TREE_CODE (*expr_p) != SSA_NAME)
2845 return NULL_TREE;
2846 info = name_info (fd_ivopts_data, *expr_p);
2847
2848 if (!info->inv_id || info->has_nonlin_use)
2849 return NULL_TREE;
2850
2851 if (!*depends_on)
8bdbfff5 2852 *depends_on = BITMAP_ALLOC (NULL);
8b11a64c
ZD
2853 bitmap_set_bit (*depends_on, info->inv_id);
2854
2855 return NULL_TREE;
2856}
2857
7299dbfb 2858/* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
8b11a64c
ZD
2859 invariants the computation depends on. */
2860
2861static unsigned
2862force_var_cost (struct ivopts_data *data,
2863 tree expr, bitmap *depends_on)
2864{
2865 static bool costs_initialized = false;
2866 static unsigned integer_cost;
2867 static unsigned symbol_cost;
2868 static unsigned address_cost;
7299dbfb
ZD
2869 tree op0, op1;
2870 unsigned cost0, cost1, cost;
2871 enum machine_mode mode;
8b11a64c
ZD
2872
2873 if (!costs_initialized)
2874 {
2875 tree var = create_tmp_var_raw (integer_type_node, "test_var");
2876 rtx x = gen_rtx_MEM (DECL_MODE (var),
2877 gen_rtx_SYMBOL_REF (Pmode, "test_var"));
2878 tree addr;
2879 tree type = build_pointer_type (integer_type_node);
2880
2881 integer_cost = computation_cost (build_int_cst_type (integer_type_node,
2882 2000));
2883
2884 SET_DECL_RTL (var, x);
2885 TREE_STATIC (var) = 1;
2886 addr = build1 (ADDR_EXPR, type, var);
2887 symbol_cost = computation_cost (addr) + 1;
2888
2889 address_cost
2890 = computation_cost (build2 (PLUS_EXPR, type,
2891 addr,
2892 build_int_cst_type (type, 2000))) + 1;
2893 if (dump_file && (dump_flags & TDF_DETAILS))
2894 {
2895 fprintf (dump_file, "force_var_cost:\n");
2896 fprintf (dump_file, " integer %d\n", (int) integer_cost);
2897 fprintf (dump_file, " symbol %d\n", (int) symbol_cost);
2898 fprintf (dump_file, " address %d\n", (int) address_cost);
2899 fprintf (dump_file, " other %d\n", (int) target_spill_cost);
2900 fprintf (dump_file, "\n");
2901 }
2902
2903 costs_initialized = true;
2904 }
2905
f5e2738c
ZD
2906 STRIP_NOPS (expr);
2907
8b11a64c
ZD
2908 if (depends_on)
2909 {
2910 fd_ivopts_data = data;
2911 walk_tree (&expr, find_depends, depends_on, NULL);
2912 }
2913
2914 if (SSA_VAR_P (expr))
2915 return 0;
2916
2917 if (TREE_INVARIANT (expr))
2918 {
2919 if (TREE_CODE (expr) == INTEGER_CST)
2920 return integer_cost;
2921
2922 if (TREE_CODE (expr) == ADDR_EXPR)
2923 {
2924 tree obj = TREE_OPERAND (expr, 0);
2925
2926 if (TREE_CODE (obj) == VAR_DECL
2927 || TREE_CODE (obj) == PARM_DECL
2928 || TREE_CODE (obj) == RESULT_DECL)
2929 return symbol_cost;
2930 }
2931
2932 return address_cost;
2933 }
2934
7299dbfb
ZD
2935 switch (TREE_CODE (expr))
2936 {
2937 case PLUS_EXPR:
2938 case MINUS_EXPR:
2939 case MULT_EXPR:
2940 op0 = TREE_OPERAND (expr, 0);
2941 op1 = TREE_OPERAND (expr, 1);
f5e2738c
ZD
2942 STRIP_NOPS (op0);
2943 STRIP_NOPS (op1);
7299dbfb
ZD
2944
2945 if (is_gimple_val (op0))
2946 cost0 = 0;
2947 else
2948 cost0 = force_var_cost (data, op0, NULL);
2949
2950 if (is_gimple_val (op1))
2951 cost1 = 0;
2952 else
2953 cost1 = force_var_cost (data, op1, NULL);
2954
2955 break;
2956
2957 default:
2958 /* Just an arbitrary value, FIXME. */
2959 return target_spill_cost;
2960 }
2961
2962 mode = TYPE_MODE (TREE_TYPE (expr));
2963 switch (TREE_CODE (expr))
2964 {
2965 case PLUS_EXPR:
2966 case MINUS_EXPR:
2967 cost = add_cost (mode);
2968 break;
2969
2970 case MULT_EXPR:
2971 if (cst_and_fits_in_hwi (op0))
2972 cost = multiply_by_cost (int_cst_value (op0), mode);
2973 else if (cst_and_fits_in_hwi (op1))
2974 cost = multiply_by_cost (int_cst_value (op1), mode);
2975 else
2976 return target_spill_cost;
2977 break;
2978
2979 default:
2980 gcc_unreachable ();
2981 }
2982
2983 cost += cost0;
2984 cost += cost1;
2985
2986 /* Bound the cost by target_spill_cost. The parts of complicated
2987 computations often are either loop invariant or at least can
2988 be shared between several iv uses, so letting this grow without
2989 limits would not give reasonable results. */
2990 return cost < target_spill_cost ? cost : target_spill_cost;
8b11a64c
ZD
2991}
2992
8b11a64c
ZD
2993/* Estimates cost of expressing address ADDR as var + symbol + offset. The
2994 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
2995 to false if the corresponding part is missing. DEPENDS_ON is a set of the
2996 invariants the computation depends on. */
2997
2998static unsigned
2999split_address_cost (struct ivopts_data *data,
3000 tree addr, bool *symbol_present, bool *var_present,
3001 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3002{
2f4675b4
ZD
3003 tree core;
3004 HOST_WIDE_INT bitsize;
3005 HOST_WIDE_INT bitpos;
3006 tree toffset;
3007 enum machine_mode mode;
3008 int unsignedp, volatilep;
3009
3010 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
2614034e 3011 &unsignedp, &volatilep, false);
8b11a64c 3012
2f4675b4
ZD
3013 if (toffset != 0
3014 || bitpos % BITS_PER_UNIT != 0
3015 || TREE_CODE (core) != VAR_DECL)
8b11a64c
ZD
3016 {
3017 *symbol_present = false;
3018 *var_present = true;
3019 fd_ivopts_data = data;
3020 walk_tree (&addr, find_depends, depends_on, NULL);
3021 return target_spill_cost;
2f4675b4
ZD
3022 }
3023
3024 *offset += bitpos / BITS_PER_UNIT;
8b11a64c
ZD
3025 if (TREE_STATIC (core)
3026 || DECL_EXTERNAL (core))
3027 {
3028 *symbol_present = true;
3029 *var_present = false;
3030 return 0;
3031 }
3032
3033 *symbol_present = false;
3034 *var_present = true;
3035 return 0;
3036}
3037
3038/* Estimates cost of expressing difference of addresses E1 - E2 as
3039 var + symbol + offset. The value of offset is added to OFFSET,
3040 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3041 part is missing. DEPENDS_ON is a set of the invariants the computation
3042 depends on. */
3043
3044static unsigned
3045ptr_difference_cost (struct ivopts_data *data,
3046 tree e1, tree e2, bool *symbol_present, bool *var_present,
3047 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3048{
2f4675b4 3049 HOST_WIDE_INT diff = 0;
8b11a64c
ZD
3050 unsigned cost;
3051
1e128c5f 3052 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
8b11a64c 3053
7299dbfb 3054 if (ptr_difference_const (e1, e2, &diff))
8b11a64c
ZD
3055 {
3056 *offset += diff;
3057 *symbol_present = false;
3058 *var_present = false;
3059 return 0;
3060 }
3061
3062 if (e2 == integer_zero_node)
3063 return split_address_cost (data, TREE_OPERAND (e1, 0),
3064 symbol_present, var_present, offset, depends_on);
3065
3066 *symbol_present = false;
3067 *var_present = true;
3068
3069 cost = force_var_cost (data, e1, depends_on);
3070 cost += force_var_cost (data, e2, depends_on);
3071 cost += add_cost (Pmode);
3072
3073 return cost;
3074}
3075
3076/* Estimates cost of expressing difference E1 - E2 as
3077 var + symbol + offset. The value of offset is added to OFFSET,
3078 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3079 part is missing. DEPENDS_ON is a set of the invariants the computation
3080 depends on. */
3081
3082static unsigned
3083difference_cost (struct ivopts_data *data,
3084 tree e1, tree e2, bool *symbol_present, bool *var_present,
3085 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3086{
3087 unsigned cost;
3088 enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
f5e2738c
ZD
3089 unsigned HOST_WIDE_INT off1, off2;
3090
3091 e1 = strip_offset (e1, false, &off1);
3092 e2 = strip_offset (e2, false, &off2);
3093 *offset += off1 - off2;
8b11a64c 3094
f5e2738c
ZD
3095 STRIP_NOPS (e1);
3096 STRIP_NOPS (e2);
8b11a64c
ZD
3097
3098 if (TREE_CODE (e1) == ADDR_EXPR)
3099 return ptr_difference_cost (data, e1, e2, symbol_present, var_present, offset,
3100 depends_on);
3101 *symbol_present = false;
3102
3103 if (operand_equal_p (e1, e2, 0))
3104 {
3105 *var_present = false;
3106 return 0;
3107 }
3108 *var_present = true;
3109 if (zero_p (e2))
3110 return force_var_cost (data, e1, depends_on);
3111
3112 if (zero_p (e1))
3113 {
3114 cost = force_var_cost (data, e2, depends_on);
3115 cost += multiply_by_cost (-1, mode);
3116
3117 return cost;
3118 }
3119
3120 cost = force_var_cost (data, e1, depends_on);
3121 cost += force_var_cost (data, e2, depends_on);
3122 cost += add_cost (mode);
3123
3124 return cost;
3125}
3126
3127/* Determines the cost of the computation by that USE is expressed
3128 from induction variable CAND. If ADDRESS_P is true, we just need
3129 to create an address from it, otherwise we want to get it into
3130 register. A set of invariants we depend on is stored in
3131 DEPENDS_ON. AT is the statement at that the value is computed. */
3132
3133static unsigned
3134get_computation_cost_at (struct ivopts_data *data,
3135 struct iv_use *use, struct iv_cand *cand,
3136 bool address_p, bitmap *depends_on, tree at)
3137{
3138 tree ubase = use->iv->base, ustep = use->iv->step;
3139 tree cbase, cstep;
3140 tree utype = TREE_TYPE (ubase), ctype;
3141 unsigned HOST_WIDE_INT ustepi, cstepi, offset = 0;
3142 HOST_WIDE_INT ratio, aratio;
3143 bool var_present, symbol_present;
3144 unsigned cost = 0, n_sums;
3145
3146 *depends_on = NULL;
3147
3148 /* Only consider real candidates. */
3149 if (!cand->iv)
3150 return INFTY;
3151
3152 cbase = cand->iv->base;
3153 cstep = cand->iv->step;
3154 ctype = TREE_TYPE (cbase);
3155
3156 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3157 {
3158 /* We do not have a precision to express the values of use. */
3159 return INFTY;
3160 }
3161
e6845c23
ZD
3162 if (address_p)
3163 {
3164 /* Do not try to express address of an object with computation based
3165 on address of a different object. This may cause problems in rtl
3166 level alias analysis (that does not expect this to be happening,
3167 as this is illegal in C), and would be unlikely to be useful
3168 anyway. */
3169 if (use->iv->base_object
3170 && cand->iv->base_object
3171 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3172 return INFTY;
3173 }
3174
8b11a64c
ZD
3175 if (!cst_and_fits_in_hwi (ustep)
3176 || !cst_and_fits_in_hwi (cstep))
3177 return INFTY;
3178
3179 if (TREE_CODE (ubase) == INTEGER_CST
3180 && !cst_and_fits_in_hwi (ubase))
3181 goto fallback;
3182
3183 if (TREE_CODE (cbase) == INTEGER_CST
3184 && !cst_and_fits_in_hwi (cbase))
3185 goto fallback;
3186
3187 ustepi = int_cst_value (ustep);
3188 cstepi = int_cst_value (cstep);
3189
3190 if (TYPE_PRECISION (utype) != TYPE_PRECISION (ctype))
3191 {
3192 /* TODO -- add direct handling of this case. */
3193 goto fallback;
3194 }
3195
3196 if (!divide (TYPE_PRECISION (utype), ustepi, cstepi, &ratio))
3197 return INFTY;
3198
3199 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
3200 or ratio == 1, it is better to handle this like
3201
3202 ubase - ratio * cbase + ratio * var
3203
3204 (also holds in the case ratio == -1, TODO. */
3205
3206 if (TREE_CODE (cbase) == INTEGER_CST)
3207 {
3208 offset = - ratio * int_cst_value (cbase);
3209 cost += difference_cost (data,
3210 ubase, integer_zero_node,
3211 &symbol_present, &var_present, &offset,
3212 depends_on);
3213 }
3214 else if (ratio == 1)
3215 {
3216 cost += difference_cost (data,
3217 ubase, cbase,
3218 &symbol_present, &var_present, &offset,
3219 depends_on);
3220 }
3221 else
3222 {
3223 cost += force_var_cost (data, cbase, depends_on);
3224 cost += add_cost (TYPE_MODE (ctype));
3225 cost += difference_cost (data,
3226 ubase, integer_zero_node,
3227 &symbol_present, &var_present, &offset,
3228 depends_on);
3229 }
3230
3231 /* If we are after the increment, the value of the candidate is higher by
3232 one iteration. */
3233 if (stmt_after_increment (data->current_loop, cand, at))
3234 offset -= ratio * cstepi;
3235
3236 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3237 (symbol/var/const parts may be omitted). If we are looking for an address,
3238 find the cost of addressing this. */
3239 if (address_p)
7299dbfb 3240 return cost + get_address_cost (symbol_present, var_present, offset, ratio);
8b11a64c
ZD
3241
3242 /* Otherwise estimate the costs for computing the expression. */
3243 aratio = ratio > 0 ? ratio : -ratio;
3244 if (!symbol_present && !var_present && !offset)
3245 {
3246 if (ratio != 1)
3247 cost += multiply_by_cost (ratio, TYPE_MODE (ctype));
3248
3249 return cost;
3250 }
3251
3252 if (aratio != 1)
3253 cost += multiply_by_cost (aratio, TYPE_MODE (ctype));
3254
3255 n_sums = 1;
3256 if (var_present
3257 /* Symbol + offset should be compile-time computable. */
3258 && (symbol_present || offset))
3259 n_sums++;
3260
3261 return cost + n_sums * add_cost (TYPE_MODE (ctype));
3262
3263fallback:
3264 {
3265 /* Just get the expression, expand it and measure the cost. */
3266 tree comp = get_computation_at (data->current_loop, use, cand, at);
3267
3268 if (!comp)
3269 return INFTY;
3270
3271 if (address_p)
3272 comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp);
3273
3274 return computation_cost (comp);
3275 }
3276}
3277
3278/* Determines the cost of the computation by that USE is expressed
3279 from induction variable CAND. If ADDRESS_P is true, we just need
3280 to create an address from it, otherwise we want to get it into
3281 register. A set of invariants we depend on is stored in
3282 DEPENDS_ON. */
3283
3284static unsigned
3285get_computation_cost (struct ivopts_data *data,
3286 struct iv_use *use, struct iv_cand *cand,
3287 bool address_p, bitmap *depends_on)
3288{
3289 return get_computation_cost_at (data,
3290 use, cand, address_p, depends_on, use->stmt);
3291}
3292
3293/* Determines cost of basing replacement of USE on CAND in a generic
3294 expression. */
3295
b1b02be2 3296static bool
8b11a64c
ZD
3297determine_use_iv_cost_generic (struct ivopts_data *data,
3298 struct iv_use *use, struct iv_cand *cand)
3299{
3300 bitmap depends_on;
eec5fec9
ZD
3301 unsigned cost;
3302
3303 /* The simple case first -- if we need to express value of the preserved
3304 original biv, the cost is 0. This also prevents us from counting the
3305 cost of increment twice -- once at this use and once in the cost of
3306 the candidate. */
3307 if (cand->pos == IP_ORIGINAL
3308 && cand->incremented_at == use->stmt)
3309 {
3310 set_use_iv_cost (data, use, cand, 0, NULL);
3311 return true;
3312 }
8b11a64c 3313
eec5fec9 3314 cost = get_computation_cost (data, use, cand, false, &depends_on);
8b11a64c 3315 set_use_iv_cost (data, use, cand, cost, depends_on);
b1b02be2
ZD
3316
3317 return cost != INFTY;
8b11a64c
ZD
3318}
3319
3320/* Determines cost of basing replacement of USE on CAND in an address. */
3321
b1b02be2 3322static bool
8b11a64c
ZD
3323determine_use_iv_cost_address (struct ivopts_data *data,
3324 struct iv_use *use, struct iv_cand *cand)
3325{
3326 bitmap depends_on;
3327 unsigned cost = get_computation_cost (data, use, cand, true, &depends_on);
3328
3329 set_use_iv_cost (data, use, cand, cost, depends_on);
b1b02be2
ZD
3330
3331 return cost != INFTY;
8b11a64c
ZD
3332}
3333
3334/* Computes value of induction variable IV in iteration NITER. */
3335
3336static tree
3337iv_value (struct iv *iv, tree niter)
3338{
3339 tree val;
3340 tree type = TREE_TYPE (iv->base);
3341
3342 niter = fold_convert (type, niter);
2f4675b4 3343 val = fold (build2 (MULT_EXPR, type, iv->step, niter));
8b11a64c
ZD
3344
3345 return fold (build2 (PLUS_EXPR, type, iv->base, val));
3346}
3347
3348/* Computes value of candidate CAND at position AT in iteration NITER. */
3349
3350static tree
3351cand_value_at (struct loop *loop, struct iv_cand *cand, tree at, tree niter)
3352{
3353 tree val = iv_value (cand->iv, niter);
3354 tree type = TREE_TYPE (cand->iv->base);
3355
3356 if (stmt_after_increment (loop, cand, at))
3357 val = fold (build2 (PLUS_EXPR, type, val, cand->iv->step));
3358
3359 return val;
3360}
3361
ca4c3169
ZD
3362/* Returns period of induction variable iv. */
3363
3364static tree
3365iv_period (struct iv *iv)
3366{
3367 tree step = iv->step, period, type;
3368 tree pow2div;
3369
3370 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
3371
3372 /* Period of the iv is gcd (step, type range). Since type range is power
3373 of two, it suffices to determine the maximum power of two that divides
3374 step. */
3375 pow2div = num_ending_zeros (step);
3376 type = unsigned_type_for (TREE_TYPE (step));
3377
3378 period = build_low_bits_mask (type,
3379 (TYPE_PRECISION (type)
3380 - tree_low_cst (pow2div, 1)));
3381
3382 return period;
3383}
3384
8b11a64c
ZD
3385/* Check whether it is possible to express the condition in USE by comparison
3386 of candidate CAND. If so, store the comparison code to COMPARE and the
3387 value compared with to BOUND. */
3388
3389static bool
ca4c3169 3390may_eliminate_iv (struct ivopts_data *data,
8b11a64c
ZD
3391 struct iv_use *use, struct iv_cand *cand,
3392 enum tree_code *compare, tree *bound)
3393{
e6845c23 3394 basic_block ex_bb;
8b11a64c 3395 edge exit;
ca4c3169
ZD
3396 struct tree_niter_desc *niter;
3397 tree nit, nit_type;
3398 tree wider_type, period, per_type;
3399 struct loop *loop = data->current_loop;
e6845c23
ZD
3400
3401 /* For now works only for exits that dominate the loop latch. TODO -- extend
3402 for other conditions inside loop body. */
3403 ex_bb = bb_for_stmt (use->stmt);
3404 if (use->stmt != last_stmt (ex_bb)
3405 || TREE_CODE (use->stmt) != COND_EXPR)
8b11a64c 3406 return false;
e6845c23 3407 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
8b11a64c
ZD
3408 return false;
3409
e6845c23
ZD
3410 exit = EDGE_SUCC (ex_bb, 0);
3411 if (flow_bb_inside_loop_p (loop, exit->dest))
3412 exit = EDGE_SUCC (ex_bb, 1);
3413 if (flow_bb_inside_loop_p (loop, exit->dest))
3414 return false;
3415
ca4c3169
ZD
3416 niter = niter_for_exit (data, exit);
3417 if (!niter
3418 || !zero_p (niter->may_be_zero))
8b11a64c
ZD
3419 return false;
3420
ca4c3169
ZD
3421 nit = niter->niter;
3422 nit_type = TREE_TYPE (nit);
8b11a64c 3423
ca4c3169
ZD
3424 /* Determine whether we may use the variable to test whether niter iterations
3425 elapsed. This is the case iff the period of the induction variable is
3426 greater than the number of iterations. */
3427 period = iv_period (cand->iv);
3428 if (!period)
8b11a64c 3429 return false;
ca4c3169
ZD
3430 per_type = TREE_TYPE (period);
3431
3432 wider_type = TREE_TYPE (period);
3433 if (TYPE_PRECISION (nit_type) < TYPE_PRECISION (per_type))
3434 wider_type = per_type;
3435 else
3436 wider_type = nit_type;
8b11a64c 3437
ca4c3169
ZD
3438 if (!integer_nonzerop (fold (build2 (GE_EXPR, boolean_type_node,
3439 fold_convert (wider_type, period),
3440 fold_convert (wider_type, nit)))))
8b11a64c
ZD
3441 return false;
3442
ca4c3169
ZD
3443 if (exit->flags & EDGE_TRUE_VALUE)
3444 *compare = EQ_EXPR;
3445 else
3446 *compare = NE_EXPR;
3447
3448 *bound = cand_value_at (loop, cand, use->stmt, nit);
8b11a64c
ZD
3449 return true;
3450}
3451
3452/* Determines cost of basing replacement of USE on CAND in a condition. */
3453
b1b02be2 3454static bool
8b11a64c
ZD
3455determine_use_iv_cost_condition (struct ivopts_data *data,
3456 struct iv_use *use, struct iv_cand *cand)
3457{
3458 tree bound;
3459 enum tree_code compare;
3460
3461 /* Only consider real candidates. */
3462 if (!cand->iv)
3463 {
3464 set_use_iv_cost (data, use, cand, INFTY, NULL);
b1b02be2 3465 return false;
8b11a64c
ZD
3466 }
3467
ca4c3169 3468 if (may_eliminate_iv (data, use, cand, &compare, &bound))
8b11a64c
ZD
3469 {
3470 bitmap depends_on = NULL;
3471 unsigned cost = force_var_cost (data, bound, &depends_on);
3472
3473 set_use_iv_cost (data, use, cand, cost, depends_on);
b1b02be2 3474 return cost != INFTY;
8b11a64c
ZD
3475 }
3476
3477 /* The induction variable elimination failed; just express the original
3478 giv. If it is compared with an invariant, note that we cannot get
3479 rid of it. */
3480 if (TREE_CODE (*use->op_p) == SSA_NAME)
3481 record_invariant (data, *use->op_p, true);
3482 else
3483 {
3484 record_invariant (data, TREE_OPERAND (*use->op_p, 0), true);
3485 record_invariant (data, TREE_OPERAND (*use->op_p, 1), true);
3486 }
3487
b1b02be2 3488 return determine_use_iv_cost_generic (data, use, cand);
8b11a64c
ZD
3489}
3490
3491/* Checks whether it is possible to replace the final value of USE by
3492 a direct computation. If so, the formula is stored to *VALUE. */
3493
3494static bool
ca4c3169
ZD
3495may_replace_final_value (struct ivopts_data *data, struct iv_use *use,
3496 tree *value)
8b11a64c 3497{
ca4c3169 3498 struct loop *loop = data->current_loop;
8b11a64c
ZD
3499 edge exit;
3500 struct tree_niter_desc *niter;
3501
3502 exit = single_dom_exit (loop);
3503 if (!exit)
3504 return false;
3505
1e128c5f
GB
3506 gcc_assert (dominated_by_p (CDI_DOMINATORS, exit->src,
3507 bb_for_stmt (use->stmt)));
8b11a64c 3508
ca4c3169
ZD
3509 niter = niter_for_single_dom_exit (data);
3510 if (!niter
3511 || !zero_p (niter->may_be_zero))
8b11a64c
ZD
3512 return false;
3513
3514 *value = iv_value (use->iv, niter->niter);
3515
3516 return true;
3517}
3518
3519/* Determines cost of replacing final value of USE using CAND. */
3520
b1b02be2 3521static bool
8b11a64c
ZD
3522determine_use_iv_cost_outer (struct ivopts_data *data,
3523 struct iv_use *use, struct iv_cand *cand)
3524{
3525 bitmap depends_on;
3526 unsigned cost;
3527 edge exit;
3528 tree value;
3529 struct loop *loop = data->current_loop;
eec5fec9
ZD
3530
3531 /* The simple case first -- if we need to express value of the preserved
3532 original biv, the cost is 0. This also prevents us from counting the
3533 cost of increment twice -- once at this use and once in the cost of
3534 the candidate. */
3535 if (cand->pos == IP_ORIGINAL
3536 && cand->incremented_at == use->stmt)
3537 {
3538 set_use_iv_cost (data, use, cand, 0, NULL);
3539 return true;
3540 }
3541
8b11a64c
ZD
3542 if (!cand->iv)
3543 {
ca4c3169 3544 if (!may_replace_final_value (data, use, &value))
8b11a64c
ZD
3545 {
3546 set_use_iv_cost (data, use, cand, INFTY, NULL);
b1b02be2 3547 return false;
8b11a64c
ZD
3548 }
3549
3550 depends_on = NULL;
3551 cost = force_var_cost (data, value, &depends_on);
3552
3553 cost /= AVG_LOOP_NITER (loop);
3554
3555 set_use_iv_cost (data, use, cand, cost, depends_on);
b1b02be2 3556 return cost != INFTY;
8b11a64c
ZD
3557 }
3558
3559 exit = single_dom_exit (loop);
3560 if (exit)
3561 {
3562 /* If there is just a single exit, we may use value of the candidate
3563 after we take it to determine the value of use. */
3564 cost = get_computation_cost_at (data, use, cand, false, &depends_on,
3565 last_stmt (exit->src));
3566 if (cost != INFTY)
3567 cost /= AVG_LOOP_NITER (loop);
3568 }
3569 else
3570 {
3571 /* Otherwise we just need to compute the iv. */
3572 cost = get_computation_cost (data, use, cand, false, &depends_on);
3573 }
3574
3575 set_use_iv_cost (data, use, cand, cost, depends_on);
b1b02be2
ZD
3576
3577 return cost != INFTY;
8b11a64c
ZD
3578}
3579
b1b02be2
ZD
3580/* Determines cost of basing replacement of USE on CAND. Returns false
3581 if USE cannot be based on CAND. */
8b11a64c 3582
b1b02be2 3583static bool
8b11a64c
ZD
3584determine_use_iv_cost (struct ivopts_data *data,
3585 struct iv_use *use, struct iv_cand *cand)
3586{
3587 switch (use->type)
3588 {
3589 case USE_NONLINEAR_EXPR:
b1b02be2 3590 return determine_use_iv_cost_generic (data, use, cand);
8b11a64c
ZD
3591
3592 case USE_OUTER:
b1b02be2 3593 return determine_use_iv_cost_outer (data, use, cand);
8b11a64c
ZD
3594
3595 case USE_ADDRESS:
b1b02be2 3596 return determine_use_iv_cost_address (data, use, cand);
8b11a64c
ZD
3597
3598 case USE_COMPARE:
b1b02be2 3599 return determine_use_iv_cost_condition (data, use, cand);
8b11a64c
ZD
3600
3601 default:
1e128c5f 3602 gcc_unreachable ();
8b11a64c
ZD
3603 }
3604}
3605
3606/* Determines costs of basing the use of the iv on an iv candidate. */
3607
3608static void
3609determine_use_iv_costs (struct ivopts_data *data)
3610{
3611 unsigned i, j;
3612 struct iv_use *use;
3613 struct iv_cand *cand;
8bdbfff5 3614 bitmap to_clear = BITMAP_ALLOC (NULL);
8b11a64c
ZD
3615
3616 alloc_use_cost_map (data);
3617
8b11a64c
ZD
3618 for (i = 0; i < n_iv_uses (data); i++)
3619 {
3620 use = iv_use (data, i);
3621
3622 if (data->consider_all_candidates)
3623 {
3624 for (j = 0; j < n_iv_cands (data); j++)
3625 {
3626 cand = iv_cand (data, j);
3627 determine_use_iv_cost (data, use, cand);
3628 }
3629 }
3630 else
3631 {
87c476a2
ZD
3632 bitmap_iterator bi;
3633
3634 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
8b11a64c
ZD
3635 {
3636 cand = iv_cand (data, j);
b1b02be2
ZD
3637 if (!determine_use_iv_cost (data, use, cand))
3638 bitmap_set_bit (to_clear, j);
87c476a2 3639 }
b1b02be2
ZD
3640
3641 /* Remove the candidates for that the cost is infinite from
3642 the list of related candidates. */
3643 bitmap_and_compl_into (use->related_cands, to_clear);
3644 bitmap_clear (to_clear);
8b11a64c
ZD
3645 }
3646 }
3647
8bdbfff5 3648 BITMAP_FREE (to_clear);
b1b02be2 3649
8b11a64c
ZD
3650 if (dump_file && (dump_flags & TDF_DETAILS))
3651 {
3652 fprintf (dump_file, "Use-candidate costs:\n");
3653
3654 for (i = 0; i < n_iv_uses (data); i++)
3655 {
3656 use = iv_use (data, i);
3657
3658 fprintf (dump_file, "Use %d:\n", i);
3659 fprintf (dump_file, " cand\tcost\tdepends on\n");
3660 for (j = 0; j < use->n_map_members; j++)
3661 {
3662 if (!use->cost_map[j].cand
3663 || use->cost_map[j].cost == INFTY)
3664 continue;
3665
3666 fprintf (dump_file, " %d\t%d\t",
3667 use->cost_map[j].cand->id,
3668 use->cost_map[j].cost);
3669 if (use->cost_map[j].depends_on)
3670 bitmap_print (dump_file,
3671 use->cost_map[j].depends_on, "","");
3672 fprintf (dump_file, "\n");
3673 }
3674
3675 fprintf (dump_file, "\n");
3676 }
3677 fprintf (dump_file, "\n");
3678 }
3679}
3680
3681/* Determines cost of the candidate CAND. */
3682
3683static void
3684determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
3685{
3686 unsigned cost_base, cost_step;
4366cf6d 3687 tree base;
8b11a64c
ZD
3688
3689 if (!cand->iv)
3690 {
3691 cand->cost = 0;
3692 return;
3693 }
3694
3695 /* There are two costs associated with the candidate -- its increment
3696 and its initialization. The second is almost negligible for any loop
3697 that rolls enough, so we take it just very little into account. */
3698
3699 base = cand->iv->base;
3700 cost_base = force_var_cost (data, base, NULL);
3701 cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)));
3702
3703 cand->cost = cost_step + cost_base / AVG_LOOP_NITER (current_loop);
3704
cf5d1802
ZD
3705 /* Prefer the original iv unless we may gain something by replacing it;
3706 this is not really relevant for artificial ivs created by other
3707 passes. */
3708 if (cand->pos == IP_ORIGINAL
3709 && !DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
8b11a64c
ZD
3710 cand->cost--;
3711
3712 /* Prefer not to insert statements into latch unless there are some
3713 already (so that we do not create unnecessary jumps). */
4366cf6d
ZD
3714 if (cand->pos == IP_END
3715 && empty_block_p (ip_end_pos (data->current_loop)))
3716 cand->cost++;
8b11a64c
ZD
3717}
3718
3719/* Determines costs of computation of the candidates. */
3720
3721static void
3722determine_iv_costs (struct ivopts_data *data)
3723{
3724 unsigned i;
3725
3726 if (dump_file && (dump_flags & TDF_DETAILS))
3727 {
3728 fprintf (dump_file, "Candidate costs:\n");
3729 fprintf (dump_file, " cand\tcost\n");
3730 }
3731
3732 for (i = 0; i < n_iv_cands (data); i++)
3733 {
3734 struct iv_cand *cand = iv_cand (data, i);
3735
3736 determine_iv_cost (data, cand);
3737
3738 if (dump_file && (dump_flags & TDF_DETAILS))
3739 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
3740 }
3741
3742if (dump_file && (dump_flags & TDF_DETAILS))
3743 fprintf (dump_file, "\n");
3744}
3745
3746/* Calculates cost for having SIZE induction variables. */
3747
3748static unsigned
3749ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
3750{
3751 return global_cost_for_size (size,
3752 loop_data (data->current_loop)->regs_used,
3753 n_iv_uses (data));
3754}
3755
3756/* For each size of the induction variable set determine the penalty. */
3757
3758static void
3759determine_set_costs (struct ivopts_data *data)
3760{
3761 unsigned j, n;
3762 tree phi, op;
3763 struct loop *loop = data->current_loop;
87c476a2 3764 bitmap_iterator bi;
8b11a64c
ZD
3765
3766 /* We use the following model (definitely improvable, especially the
3767 cost function -- TODO):
3768
3769 We estimate the number of registers available (using MD data), name it A.
3770
3771 We estimate the number of registers used by the loop, name it U. This
3772 number is obtained as the number of loop phi nodes (not counting virtual
3773 registers and bivs) + the number of variables from outside of the loop.
3774
3775 We set a reserve R (free regs that are used for temporary computations,
3776 etc.). For now the reserve is a constant 3.
3777
3778 Let I be the number of induction variables.
3779
3780 -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
3781 make a lot of ivs without a reason).
3782 -- if A - R < U + I <= A, the cost is I * PRES_COST
3783 -- if U + I > A, the cost is I * PRES_COST and
3784 number of uses * SPILL_COST * (U + I - A) / (U + I) is added. */
3785
3786 if (dump_file && (dump_flags & TDF_DETAILS))
3787 {
3788 fprintf (dump_file, "Global costs:\n");
3789 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
3790 fprintf (dump_file, " target_small_cost %d\n", target_small_cost);
3791 fprintf (dump_file, " target_pres_cost %d\n", target_pres_cost);
3792 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost);
3793 }
3794
3795 n = 0;
bb29d951 3796 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
8b11a64c
ZD
3797 {
3798 op = PHI_RESULT (phi);
3799
3800 if (!is_gimple_reg (op))
3801 continue;
3802
3803 if (get_iv (data, op))
3804 continue;
3805
3806 n++;
3807 }
3808
87c476a2 3809 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
8b11a64c
ZD
3810 {
3811 struct version_info *info = ver_info (data, j);
3812
3813 if (info->inv_id && info->has_nonlin_use)
3814 n++;
87c476a2 3815 }
8b11a64c
ZD
3816
3817 loop_data (loop)->regs_used = n;
3818 if (dump_file && (dump_flags & TDF_DETAILS))
3819 fprintf (dump_file, " regs_used %d\n", n);
3820
3821 if (dump_file && (dump_flags & TDF_DETAILS))
3822 {
3823 fprintf (dump_file, " cost for size:\n");
3824 fprintf (dump_file, " ivs\tcost\n");
3825 for (j = 0; j <= 2 * target_avail_regs; j++)
3826 fprintf (dump_file, " %d\t%d\n", j,
3827 ivopts_global_cost_for_size (data, j));
3828 fprintf (dump_file, "\n");
3829 }
3830}
3831
b1b02be2 3832/* Returns true if A is a cheaper cost pair than B. */
8b11a64c 3833
b1b02be2
ZD
3834static bool
3835cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
8b11a64c 3836{
b1b02be2
ZD
3837 if (!a)
3838 return false;
8b11a64c 3839
b1b02be2
ZD
3840 if (!b)
3841 return true;
3842
3843 if (a->cost < b->cost)
3844 return true;
3845
3846 if (a->cost > b->cost)
3847 return false;
3848
3849 /* In case the costs are the same, prefer the cheaper candidate. */
3850 if (a->cand->cost < b->cand->cost)
3851 return true;
3852
3853 return false;
3854}
3855
3856/* Computes the cost field of IVS structure. */
3857
3858static void
3859iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
3860{
3861 unsigned cost = 0;
3862
3863 cost += ivs->cand_use_cost;
3864 cost += ivs->cand_cost;
3865 cost += ivopts_global_cost_for_size (data, ivs->n_regs);
3866
3867 ivs->cost = cost;
3868}
3869
3870/* Set USE not to be expressed by any candidate in IVS. */
3871
3872static void
3873iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
3874 struct iv_use *use)
3875{
3876 unsigned uid = use->id, cid, iid;
3877 bitmap deps;
3878 struct cost_pair *cp;
3879 bitmap_iterator bi;
3880
3881 cp = ivs->cand_for_use[uid];
3882 if (!cp)
3883 return;
3884 cid = cp->cand->id;
3885
3886 ivs->bad_uses++;
3887 ivs->cand_for_use[uid] = NULL;
3888 ivs->n_cand_uses[cid]--;
3889
3890 if (ivs->n_cand_uses[cid] == 0)
8b11a64c 3891 {
b1b02be2
ZD
3892 bitmap_clear_bit (ivs->cands, cid);
3893 /* Do not count the pseudocandidates. */
3894 if (cp->cand->iv)
3895 ivs->n_regs--;
36f5ada1 3896 ivs->n_cands--;
b1b02be2
ZD
3897 ivs->cand_cost -= cp->cand->cost;
3898 }
3899
3900 ivs->cand_use_cost -= cp->cost;
3901
3902 deps = cp->depends_on;
80cad5fa 3903
b1b02be2
ZD
3904 if (deps)
3905 {
3906 EXECUTE_IF_SET_IN_BITMAP (deps, 0, iid, bi)
3907 {
3908 ivs->n_invariant_uses[iid]--;
3909 if (ivs->n_invariant_uses[iid] == 0)
3910 ivs->n_regs--;
3911 }
8b11a64c
ZD
3912 }
3913
b1b02be2
ZD
3914 iv_ca_recount_cost (data, ivs);
3915}
3916
3917/* Set cost pair for USE in set IVS to CP. */
3918
3919static void
3920iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
3921 struct iv_use *use, struct cost_pair *cp)
3922{
3923 unsigned uid = use->id, cid, iid;
3924 bitmap deps;
3925 bitmap_iterator bi;
3926
3927 if (ivs->cand_for_use[uid] == cp)
3928 return;
3929
3930 if (ivs->cand_for_use[uid])
3931 iv_ca_set_no_cp (data, ivs, use);
3932
3933 if (cp)
8b11a64c 3934 {
b1b02be2 3935 cid = cp->cand->id;
8b11a64c 3936
b1b02be2
ZD
3937 ivs->bad_uses--;
3938 ivs->cand_for_use[uid] = cp;
3939 ivs->n_cand_uses[cid]++;
3940 if (ivs->n_cand_uses[cid] == 1)
8b11a64c 3941 {
b1b02be2
ZD
3942 bitmap_set_bit (ivs->cands, cid);
3943 /* Do not count the pseudocandidates. */
3944 if (cp->cand->iv)
3945 ivs->n_regs++;
36f5ada1 3946 ivs->n_cands++;
b1b02be2 3947 ivs->cand_cost += cp->cand->cost;
8b11a64c
ZD
3948 }
3949
b1b02be2
ZD
3950 ivs->cand_use_cost += cp->cost;
3951
3952 deps = cp->depends_on;
3953
3954 if (deps)
8b11a64c 3955 {
b1b02be2 3956 EXECUTE_IF_SET_IN_BITMAP (deps, 0, iid, bi)
87c476a2 3957 {
b1b02be2
ZD
3958 ivs->n_invariant_uses[iid]++;
3959 if (ivs->n_invariant_uses[iid] == 1)
3960 ivs->n_regs++;
87c476a2 3961 }
8b11a64c
ZD
3962 }
3963
b1b02be2 3964 iv_ca_recount_cost (data, ivs);
87c476a2 3965 }
b1b02be2
ZD
3966}
3967
3968/* Extend set IVS by expressing USE by some of the candidates in it
3969 if possible. */
3970
3971static void
3972iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
3973 struct iv_use *use)
3974{
3975 struct cost_pair *best_cp = NULL, *cp;
3976 bitmap_iterator bi;
3977 unsigned i;
8b11a64c 3978
b1b02be2
ZD
3979 gcc_assert (ivs->upto >= use->id);
3980
3981 if (ivs->upto == use->id)
3982 {
3983 ivs->upto++;
3984 ivs->bad_uses++;
3985 }
8b11a64c 3986
b1b02be2
ZD
3987 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
3988 {
3989 cp = get_use_iv_cost (data, use, iv_cand (data, i));
8b11a64c 3990
b1b02be2
ZD
3991 if (cheaper_cost_pair (cp, best_cp))
3992 best_cp = cp;
3993 }
8b11a64c 3994
b1b02be2 3995 iv_ca_set_cp (data, ivs, use, best_cp);
8b11a64c
ZD
3996}
3997
b1b02be2 3998/* Get cost for assignment IVS. */
8b11a64c
ZD
3999
4000static unsigned
b1b02be2
ZD
4001iv_ca_cost (struct iv_ca *ivs)
4002{
4003 return (ivs->bad_uses ? INFTY : ivs->cost);
4004}
4005
4006/* Returns true if all dependences of CP are among invariants in IVS. */
4007
4008static bool
4009iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
8b11a64c
ZD
4010{
4011 unsigned i;
87c476a2 4012 bitmap_iterator bi;
8b11a64c 4013
b1b02be2
ZD
4014 if (!cp->depends_on)
4015 return true;
4016
4017 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
8b11a64c 4018 {
b1b02be2
ZD
4019 if (ivs->n_invariant_uses[i] == 0)
4020 return false;
4021 }
4022
4023 return true;
4024}
4025
4026/* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
4027 it before NEXT_CHANGE. */
4028
4029static struct iv_ca_delta *
4030iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
4031 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
4032{
4033 struct iv_ca_delta *change = xmalloc (sizeof (struct iv_ca_delta));
4034
4035 change->use = use;
4036 change->old_cp = old_cp;
4037 change->new_cp = new_cp;
4038 change->next_change = next_change;
4039
4040 return change;
4041}
4042
36f5ada1 4043/* Joins two lists of changes L1 and L2. Destructive -- old lists
6c6cfbfd 4044 are rewritten. */
36f5ada1
ZD
4045
4046static struct iv_ca_delta *
4047iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
4048{
4049 struct iv_ca_delta *last;
4050
4051 if (!l2)
4052 return l1;
4053
4054 if (!l1)
4055 return l2;
4056
4057 for (last = l1; last->next_change; last = last->next_change)
4058 continue;
4059 last->next_change = l2;
4060
4061 return l1;
4062}
4063
b1b02be2
ZD
4064/* Returns candidate by that USE is expressed in IVS. */
4065
4066static struct cost_pair *
4067iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
4068{
4069 return ivs->cand_for_use[use->id];
4070}
4071
36f5ada1
ZD
4072/* Reverse the list of changes DELTA, forming the inverse to it. */
4073
4074static struct iv_ca_delta *
4075iv_ca_delta_reverse (struct iv_ca_delta *delta)
4076{
4077 struct iv_ca_delta *act, *next, *prev = NULL;
4078 struct cost_pair *tmp;
4079
4080 for (act = delta; act; act = next)
4081 {
4082 next = act->next_change;
4083 act->next_change = prev;
4084 prev = act;
4085
4086 tmp = act->old_cp;
4087 act->old_cp = act->new_cp;
4088 act->new_cp = tmp;
4089 }
4090
4091 return prev;
4092}
4093
b1b02be2
ZD
4094/* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
4095 reverted instead. */
4096
4097static void
4098iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
4099 struct iv_ca_delta *delta, bool forward)
4100{
4101 struct cost_pair *from, *to;
36f5ada1 4102 struct iv_ca_delta *act;
b1b02be2 4103
36f5ada1
ZD
4104 if (!forward)
4105 delta = iv_ca_delta_reverse (delta);
b1b02be2 4106
36f5ada1
ZD
4107 for (act = delta; act; act = act->next_change)
4108 {
4109 from = act->old_cp;
4110 to = act->new_cp;
4111 gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
4112 iv_ca_set_cp (data, ivs, act->use, to);
8b11a64c 4113 }
36f5ada1
ZD
4114
4115 if (!forward)
4116 iv_ca_delta_reverse (delta);
b1b02be2 4117}
8b11a64c 4118
b1b02be2 4119/* Returns true if CAND is used in IVS. */
8b11a64c 4120
b1b02be2
ZD
4121static bool
4122iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
4123{
4124 return ivs->n_cand_uses[cand->id] > 0;
4125}
8b11a64c 4126
36f5ada1
ZD
4127/* Returns number of induction variable candidates in the set IVS. */
4128
4129static unsigned
4130iv_ca_n_cands (struct iv_ca *ivs)
4131{
4132 return ivs->n_cands;
4133}
4134
b1b02be2
ZD
4135/* Free the list of changes DELTA. */
4136
4137static void
4138iv_ca_delta_free (struct iv_ca_delta **delta)
4139{
4140 struct iv_ca_delta *act, *next;
4141
4142 for (act = *delta; act; act = next)
87c476a2 4143 {
b1b02be2
ZD
4144 next = act->next_change;
4145 free (act);
87c476a2 4146 }
8b11a64c 4147
b1b02be2
ZD
4148 *delta = NULL;
4149}
4150
4151/* Allocates new iv candidates assignment. */
4152
4153static struct iv_ca *
4154iv_ca_new (struct ivopts_data *data)
4155{
4156 struct iv_ca *nw = xmalloc (sizeof (struct iv_ca));
8b11a64c 4157
b1b02be2
ZD
4158 nw->upto = 0;
4159 nw->bad_uses = 0;
4160 nw->cand_for_use = xcalloc (n_iv_uses (data), sizeof (struct cost_pair *));
4161 nw->n_cand_uses = xcalloc (n_iv_cands (data), sizeof (unsigned));
8bdbfff5 4162 nw->cands = BITMAP_ALLOC (NULL);
36f5ada1 4163 nw->n_cands = 0;
b1b02be2
ZD
4164 nw->n_regs = 0;
4165 nw->cand_use_cost = 0;
4166 nw->cand_cost = 0;
4167 nw->n_invariant_uses = xcalloc (data->max_inv_id + 1, sizeof (unsigned));
4168 nw->cost = 0;
4169
4170 return nw;
4171}
4172
4173/* Free memory occupied by the set IVS. */
4174
4175static void
4176iv_ca_free (struct iv_ca **ivs)
4177{
4178 free ((*ivs)->cand_for_use);
4179 free ((*ivs)->n_cand_uses);
8bdbfff5 4180 BITMAP_FREE ((*ivs)->cands);
b1b02be2
ZD
4181 free ((*ivs)->n_invariant_uses);
4182 free (*ivs);
4183 *ivs = NULL;
4184}
4185
4186/* Dumps IVS to FILE. */
4187
4188static void
4189iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
4190{
4191 const char *pref = " invariants ";
4192 unsigned i;
4193
4194 fprintf (file, " cost %d\n", iv_ca_cost (ivs));
4195 bitmap_print (file, ivs->cands, " candidates ","\n");
4196
4197 for (i = 1; i <= data->max_inv_id; i++)
4198 if (ivs->n_invariant_uses[i])
4199 {
4200 fprintf (file, "%s%d", pref, i);
4201 pref = ", ";
4202 }
4203 fprintf (file, "\n");
4204}
4205
4206/* Try changing candidate in IVS to CAND for each use. Return cost of the
36f5ada1
ZD
4207 new set, and store differences in DELTA. Number of induction variables
4208 in the new set is stored to N_IVS. */
b1b02be2
ZD
4209
4210static unsigned
4211iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
36f5ada1
ZD
4212 struct iv_cand *cand, struct iv_ca_delta **delta,
4213 unsigned *n_ivs)
b1b02be2
ZD
4214{
4215 unsigned i, cost;
4216 struct iv_use *use;
4217 struct cost_pair *old_cp, *new_cp;
4218
4219 *delta = NULL;
4220 for (i = 0; i < ivs->upto; i++)
4221 {
4222 use = iv_use (data, i);
4223 old_cp = iv_ca_cand_for_use (ivs, use);
4224
4225 if (old_cp
4226 && old_cp->cand == cand)
4227 continue;
4228
4229 new_cp = get_use_iv_cost (data, use, cand);
4230 if (!new_cp)
4231 continue;
4232
4233 if (!iv_ca_has_deps (ivs, new_cp))
4234 continue;
4235
4236 if (!cheaper_cost_pair (new_cp, old_cp))
4237 continue;
4238
4239 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4240 }
4241
4242 iv_ca_delta_commit (data, ivs, *delta, true);
4243 cost = iv_ca_cost (ivs);
36f5ada1
ZD
4244 if (n_ivs)
4245 *n_ivs = iv_ca_n_cands (ivs);
b1b02be2 4246 iv_ca_delta_commit (data, ivs, *delta, false);
8b11a64c
ZD
4247
4248 return cost;
4249}
4250
a0eca485 4251/* Try narrowing set IVS by removing CAND. Return the cost of
b1b02be2 4252 the new set and store the differences in DELTA. */
8b11a64c
ZD
4253
4254static unsigned
b1b02be2
ZD
4255iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
4256 struct iv_cand *cand, struct iv_ca_delta **delta)
8b11a64c 4257{
b1b02be2
ZD
4258 unsigned i, ci;
4259 struct iv_use *use;
4260 struct cost_pair *old_cp, *new_cp, *cp;
4261 bitmap_iterator bi;
4262 struct iv_cand *cnd;
4263 unsigned cost;
4264
4265 *delta = NULL;
4266 for (i = 0; i < n_iv_uses (data); i++)
4267 {
4268 use = iv_use (data, i);
4269
4270 old_cp = iv_ca_cand_for_use (ivs, use);
4271 if (old_cp->cand != cand)
4272 continue;
4273
4274 new_cp = NULL;
4275
4276 if (data->consider_all_candidates)
4277 {
4278 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
4279 {
4280 if (ci == cand->id)
4281 continue;
4282
4283 cnd = iv_cand (data, ci);
4284
4285 cp = get_use_iv_cost (data, use, cnd);
4286 if (!cp)
4287 continue;
4288 if (!iv_ca_has_deps (ivs, cp))
4289 continue;
4290
4291 if (!cheaper_cost_pair (cp, new_cp))
4292 continue;
4293
4294 new_cp = cp;
4295 }
4296 }
4297 else
4298 {
4299 EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
4300 {
4301 if (ci == cand->id)
4302 continue;
4303
4304 cnd = iv_cand (data, ci);
4305
4306 cp = get_use_iv_cost (data, use, cnd);
4307 if (!cp)
4308 continue;
4309 if (!iv_ca_has_deps (ivs, cp))
4310 continue;
4311
4312 if (!cheaper_cost_pair (cp, new_cp))
4313 continue;
4314
4315 new_cp = cp;
4316 }
4317 }
4318
4319 if (!new_cp)
4320 {
4321 iv_ca_delta_free (delta);
4322 return INFTY;
4323 }
4324
4325 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4326 }
4327
4328 iv_ca_delta_commit (data, ivs, *delta, true);
4329 cost = iv_ca_cost (ivs);
4330 iv_ca_delta_commit (data, ivs, *delta, false);
4331
4332 return cost;
8b11a64c
ZD
4333}
4334
36f5ada1
ZD
4335/* Try optimizing the set of candidates IVS by removing candidates different
4336 from to EXCEPT_CAND from it. Return cost of the new set, and store
4337 differences in DELTA. */
4338
4339static unsigned
4340iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
4341 struct iv_cand *except_cand, struct iv_ca_delta **delta)
4342{
4343 bitmap_iterator bi;
4344 struct iv_ca_delta *act_delta, *best_delta;
4345 unsigned i, best_cost, acost;
4346 struct iv_cand *cand;
4347
4348 best_delta = NULL;
4349 best_cost = iv_ca_cost (ivs);
4350
4351 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4352 {
4353 cand = iv_cand (data, i);
4354
4355 if (cand == except_cand)
4356 continue;
4357
4358 acost = iv_ca_narrow (data, ivs, cand, &act_delta);
4359
4360 if (acost < best_cost)
4361 {
4362 best_cost = acost;
4363 iv_ca_delta_free (&best_delta);
4364 best_delta = act_delta;
4365 }
4366 else
4367 iv_ca_delta_free (&act_delta);
4368 }
4369
4370 if (!best_delta)
4371 {
4372 *delta = NULL;
4373 return best_cost;
4374 }
4375
4376 /* Recurse to possibly remove other unnecessary ivs. */
4377 iv_ca_delta_commit (data, ivs, best_delta, true);
4378 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
4379 iv_ca_delta_commit (data, ivs, best_delta, false);
4380 *delta = iv_ca_delta_join (best_delta, *delta);
4381 return best_cost;
4382}
4383
b1b02be2 4384/* Tries to extend the sets IVS in the best possible way in order
8b11a64c
ZD
4385 to express the USE. */
4386
4387static bool
b1b02be2 4388try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
8b11a64c
ZD
4389 struct iv_use *use)
4390{
b1b02be2 4391 unsigned best_cost, act_cost;
8b11a64c 4392 unsigned i;
38b0dcb8
ZD
4393 bitmap_iterator bi;
4394 struct iv_cand *cand;
b1b02be2
ZD
4395 struct iv_ca_delta *best_delta = NULL, *act_delta;
4396 struct cost_pair *cp;
4397
4398 iv_ca_add_use (data, ivs, use);
4399 best_cost = iv_ca_cost (ivs);
8b11a64c 4400
b1b02be2
ZD
4401 cp = iv_ca_cand_for_use (ivs, use);
4402 if (cp)
4403 {
4404 best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
4405 iv_ca_set_no_cp (data, ivs, use);
4406 }
8b11a64c 4407
38b0dcb8
ZD
4408 /* First try important candidates. Only if it fails, try the specific ones.
4409 Rationale -- in loops with many variables the best choice often is to use
4410 just one generic biv. If we added here many ivs specific to the uses,
4411 the optimization algorithm later would be likely to get stuck in a local
4412 minimum, thus causing us to create too many ivs. The approach from
1c170b5e 4413 few ivs to more seems more likely to be successful -- starting from few
38b0dcb8
ZD
4414 ivs, replacing an expensive use by a specific iv should always be a
4415 win. */
4416 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
8b11a64c 4417 {
38b0dcb8
ZD
4418 cand = iv_cand (data, i);
4419
b1b02be2 4420 if (iv_ca_cand_used_p (ivs, cand))
8b11a64c
ZD
4421 continue;
4422
b1b02be2
ZD
4423 cp = get_use_iv_cost (data, use, cand);
4424 if (!cp)
4425 continue;
4426
4427 iv_ca_set_cp (data, ivs, use, cp);
36f5ada1 4428 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
b1b02be2
ZD
4429 iv_ca_set_no_cp (data, ivs, use);
4430 act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
8b11a64c
ZD
4431
4432 if (act_cost < best_cost)
4433 {
4434 best_cost = act_cost;
b1b02be2
ZD
4435
4436 iv_ca_delta_free (&best_delta);
4437 best_delta = act_delta;
8b11a64c 4438 }
b1b02be2
ZD
4439 else
4440 iv_ca_delta_free (&act_delta);
8b11a64c
ZD
4441 }
4442
38b0dcb8
ZD
4443 if (best_cost == INFTY)
4444 {
4445 for (i = 0; i < use->n_map_members; i++)
4446 {
4447 cp = use->cost_map + i;
b1b02be2
ZD
4448 cand = cp->cand;
4449 if (!cand)
38b0dcb8
ZD
4450 continue;
4451
4452 /* Already tried this. */
b1b02be2
ZD
4453 if (cand->important)
4454 continue;
4455
4456 if (iv_ca_cand_used_p (ivs, cand))
38b0dcb8
ZD
4457 continue;
4458
b1b02be2
ZD
4459 act_delta = NULL;
4460 iv_ca_set_cp (data, ivs, use, cp);
36f5ada1 4461 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
b1b02be2
ZD
4462 iv_ca_set_no_cp (data, ivs, use);
4463 act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
4464 cp, act_delta);
38b0dcb8
ZD
4465
4466 if (act_cost < best_cost)
4467 {
4468 best_cost = act_cost;
b1b02be2
ZD
4469
4470 if (best_delta)
4471 iv_ca_delta_free (&best_delta);
4472 best_delta = act_delta;
38b0dcb8 4473 }
b1b02be2
ZD
4474 else
4475 iv_ca_delta_free (&act_delta);
38b0dcb8
ZD
4476 }
4477 }
4478
b1b02be2
ZD
4479 iv_ca_delta_commit (data, ivs, best_delta, true);
4480 iv_ca_delta_free (&best_delta);
8b11a64c
ZD
4481
4482 return (best_cost != INFTY);
4483}
4484
b1b02be2 4485/* Finds an initial assignment of candidates to uses. */
8b11a64c 4486
b1b02be2
ZD
4487static struct iv_ca *
4488get_initial_solution (struct ivopts_data *data)
8b11a64c 4489{
b1b02be2 4490 struct iv_ca *ivs = iv_ca_new (data);
8b11a64c
ZD
4491 unsigned i;
4492
4493 for (i = 0; i < n_iv_uses (data); i++)
b1b02be2
ZD
4494 if (!try_add_cand_for (data, ivs, iv_use (data, i)))
4495 {
4496 iv_ca_free (&ivs);
4497 return NULL;
4498 }
8b11a64c 4499
b1b02be2 4500 return ivs;
8b11a64c
ZD
4501}
4502
b1b02be2 4503/* Tries to improve set of induction variables IVS. */
8b11a64c
ZD
4504
4505static bool
b1b02be2 4506try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
8b11a64c 4507{
36f5ada1
ZD
4508 unsigned i, acost, best_cost = iv_ca_cost (ivs), n_ivs;
4509 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
b1b02be2 4510 struct iv_cand *cand;
8b11a64c 4511
36f5ada1 4512 /* Try extending the set of induction variables by one. */
8b11a64c
ZD
4513 for (i = 0; i < n_iv_cands (data); i++)
4514 {
b1b02be2
ZD
4515 cand = iv_cand (data, i);
4516
4517 if (iv_ca_cand_used_p (ivs, cand))
36f5ada1
ZD
4518 continue;
4519
4520 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs);
4521 if (!act_delta)
4522 continue;
4523
4524 /* If we successfully added the candidate and the set is small enough,
4525 try optimizing it by removing other candidates. */
4526 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
4527 {
4528 iv_ca_delta_commit (data, ivs, act_delta, true);
4529 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
4530 iv_ca_delta_commit (data, ivs, act_delta, false);
4531 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
4532 }
8b11a64c 4533
b1b02be2 4534 if (acost < best_cost)
8b11a64c 4535 {
b1b02be2 4536 best_cost = acost;
36f5ada1 4537 iv_ca_delta_free (&best_delta);
b1b02be2 4538 best_delta = act_delta;
8b11a64c 4539 }
8b11a64c 4540 else
b1b02be2 4541 iv_ca_delta_free (&act_delta);
8b11a64c
ZD
4542 }
4543
b1b02be2 4544 if (!best_delta)
36f5ada1
ZD
4545 {
4546 /* Try removing the candidates from the set instead. */
4547 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
4548
4549 /* Nothing more we can do. */
4550 if (!best_delta)
4551 return false;
4552 }
8b11a64c 4553
b1b02be2 4554 iv_ca_delta_commit (data, ivs, best_delta, true);
36f5ada1 4555 gcc_assert (best_cost == iv_ca_cost (ivs));
b1b02be2 4556 iv_ca_delta_free (&best_delta);
8b11a64c
ZD
4557 return true;
4558}
4559
4560/* Attempts to find the optimal set of induction variables. We do simple
4561 greedy heuristic -- we try to replace at most one candidate in the selected
4562 solution and remove the unused ivs while this improves the cost. */
4563
b1b02be2 4564static struct iv_ca *
8b11a64c
ZD
4565find_optimal_iv_set (struct ivopts_data *data)
4566{
b1b02be2
ZD
4567 unsigned i;
4568 struct iv_ca *set;
8b11a64c
ZD
4569 struct iv_use *use;
4570
b1b02be2
ZD
4571 /* Get the initial solution. */
4572 set = get_initial_solution (data);
4573 if (!set)
8b11a64c
ZD
4574 {
4575 if (dump_file && (dump_flags & TDF_DETAILS))
4576 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
8b11a64c
ZD
4577 return NULL;
4578 }
4579
4580 if (dump_file && (dump_flags & TDF_DETAILS))
4581 {
b1b02be2
ZD
4582 fprintf (dump_file, "Initial set of candidates:\n");
4583 iv_ca_dump (data, dump_file, set);
8b11a64c
ZD
4584 }
4585
b1b02be2 4586 while (try_improve_iv_set (data, set))
8b11a64c
ZD
4587 {
4588 if (dump_file && (dump_flags & TDF_DETAILS))
4589 {
b1b02be2
ZD
4590 fprintf (dump_file, "Improved to:\n");
4591 iv_ca_dump (data, dump_file, set);
8b11a64c
ZD
4592 }
4593 }
4594
4595 if (dump_file && (dump_flags & TDF_DETAILS))
b1b02be2 4596 fprintf (dump_file, "Final cost %d\n\n", iv_ca_cost (set));
8b11a64c
ZD
4597
4598 for (i = 0; i < n_iv_uses (data); i++)
4599 {
4600 use = iv_use (data, i);
b1b02be2 4601 use->selected = iv_ca_cand_for_use (set, use)->cand;
8b11a64c
ZD
4602 }
4603
8b11a64c
ZD
4604 return set;
4605}
4606
4607/* Creates a new induction variable corresponding to CAND. */
4608
4609static void
4610create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
4611{
4612 block_stmt_iterator incr_pos;
4613 tree base;
4614 bool after = false;
4615
4616 if (!cand->iv)
4617 return;
4618
4619 switch (cand->pos)
4620 {
4621 case IP_NORMAL:
4622 incr_pos = bsi_last (ip_normal_pos (data->current_loop));
4623 break;
4624
4625 case IP_END:
4626 incr_pos = bsi_last (ip_end_pos (data->current_loop));
4627 after = true;
4628 break;
4629
4630 case IP_ORIGINAL:
4631 /* Mark that the iv is preserved. */
4632 name_info (data, cand->var_before)->preserve_biv = true;
4633 name_info (data, cand->var_after)->preserve_biv = true;
4634
4635 /* Rewrite the increment so that it uses var_before directly. */
4636 find_interesting_uses_op (data, cand->var_after)->selected = cand;
4637
4638 return;
4639 }
4640
4641 gimple_add_tmp_var (cand->var_before);
4642 add_referenced_tmp_var (cand->var_before);
4643
4644 base = unshare_expr (cand->iv->base);
4645
4646 create_iv (base, cand->iv->step, cand->var_before, data->current_loop,
4647 &incr_pos, after, &cand->var_before, &cand->var_after);
4648}
4649
4650/* Creates new induction variables described in SET. */
4651
4652static void
b1b02be2 4653create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
8b11a64c
ZD
4654{
4655 unsigned i;
4656 struct iv_cand *cand;
87c476a2 4657 bitmap_iterator bi;
8b11a64c 4658
b1b02be2 4659 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
8b11a64c
ZD
4660 {
4661 cand = iv_cand (data, i);
4662 create_new_iv (data, cand);
87c476a2 4663 }
8b11a64c
ZD
4664}
4665
4666/* Removes statement STMT (real or a phi node). If INCLUDING_DEFINED_NAME
4667 is true, remove also the ssa name defined by the statement. */
4668
4669static void
4670remove_statement (tree stmt, bool including_defined_name)
4671{
4672 if (TREE_CODE (stmt) == PHI_NODE)
4673 {
4674 if (!including_defined_name)
4675 {
4676 /* Prevent the ssa name defined by the statement from being removed. */
4677 SET_PHI_RESULT (stmt, NULL);
4678 }
d19e3ef6 4679 remove_phi_node (stmt, NULL_TREE);
8b11a64c
ZD
4680 }
4681 else
4682 {
1a1804c2 4683 block_stmt_iterator bsi = bsi_for_stmt (stmt);
8b11a64c
ZD
4684
4685 bsi_remove (&bsi);
4686 }
4687}
4688
4689/* Rewrites USE (definition of iv used in a nonlinear expression)
4690 using candidate CAND. */
4691
4692static void
4693rewrite_use_nonlinear_expr (struct ivopts_data *data,
4694 struct iv_use *use, struct iv_cand *cand)
4695{
3520b745 4696 tree comp;
8b11a64c
ZD
4697 tree op, stmts, tgt, ass;
4698 block_stmt_iterator bsi, pbsi;
3520b745
ZD
4699
4700 /* An important special case -- if we are asked to express value of
4701 the original iv by itself, just exit; there is no need to
4702 introduce a new computation (that might also need casting the
4703 variable to unsigned and back). */
4704 if (cand->pos == IP_ORIGINAL
4705 && TREE_CODE (use->stmt) == MODIFY_EXPR
4706 && TREE_OPERAND (use->stmt, 0) == cand->var_after)
4707 {
4708 op = TREE_OPERAND (use->stmt, 1);
4709
4710 /* Be a bit careful. In case variable is expressed in some
4711 complicated way, rewrite it so that we may get rid of this
4712 complicated expression. */
4713 if ((TREE_CODE (op) == PLUS_EXPR
4714 || TREE_CODE (op) == MINUS_EXPR)
4715 && TREE_OPERAND (op, 0) == cand->var_before
4716 && TREE_CODE (TREE_OPERAND (op, 1)) == INTEGER_CST)
4717 return;
4718 }
4719
4720 comp = unshare_expr (get_computation (data->current_loop,
4721 use, cand));
1e128c5f 4722 switch (TREE_CODE (use->stmt))
8b11a64c 4723 {
1e128c5f 4724 case PHI_NODE:
8b11a64c
ZD
4725 tgt = PHI_RESULT (use->stmt);
4726
4727 /* If we should keep the biv, do not replace it. */
4728 if (name_info (data, tgt)->preserve_biv)
4729 return;
4730
4731 pbsi = bsi = bsi_start (bb_for_stmt (use->stmt));
4732 while (!bsi_end_p (pbsi)
4733 && TREE_CODE (bsi_stmt (pbsi)) == LABEL_EXPR)
4734 {
4735 bsi = pbsi;
4736 bsi_next (&pbsi);
4737 }
1e128c5f
GB
4738 break;
4739
4740 case MODIFY_EXPR:
8b11a64c 4741 tgt = TREE_OPERAND (use->stmt, 0);
1a1804c2 4742 bsi = bsi_for_stmt (use->stmt);
1e128c5f
GB
4743 break;
4744
4745 default:
4746 gcc_unreachable ();
8b11a64c 4747 }
8b11a64c
ZD
4748
4749 op = force_gimple_operand (comp, &stmts, false, SSA_NAME_VAR (tgt));
4750
4751 if (TREE_CODE (use->stmt) == PHI_NODE)
4752 {
4753 if (stmts)
4754 bsi_insert_after (&bsi, stmts, BSI_CONTINUE_LINKING);
4755 ass = build2 (MODIFY_EXPR, TREE_TYPE (tgt), tgt, op);
4756 bsi_insert_after (&bsi, ass, BSI_NEW_STMT);
4757 remove_statement (use->stmt, false);
4758 SSA_NAME_DEF_STMT (tgt) = ass;
4759 }
4760 else
4761 {
4762 if (stmts)
4763 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
4764 TREE_OPERAND (use->stmt, 1) = op;
4765 }
4766}
4767
4768/* Replaces ssa name in index IDX by its basic variable. Callback for
4769 for_each_index. */
4770
4771static bool
be35cf60 4772idx_remove_ssa_names (tree base, tree *idx,
8b11a64c
ZD
4773 void *data ATTRIBUTE_UNUSED)
4774{
be35cf60
ZD
4775 tree *op;
4776
8b11a64c
ZD
4777 if (TREE_CODE (*idx) == SSA_NAME)
4778 *idx = SSA_NAME_VAR (*idx);
be35cf60
ZD
4779
4780 if (TREE_CODE (base) == ARRAY_REF)
4781 {
4782 op = &TREE_OPERAND (base, 2);
4783 if (*op
4784 && TREE_CODE (*op) == SSA_NAME)
4785 *op = SSA_NAME_VAR (*op);
4786 op = &TREE_OPERAND (base, 3);
4787 if (*op
4788 && TREE_CODE (*op) == SSA_NAME)
4789 *op = SSA_NAME_VAR (*op);
4790 }
4791
8b11a64c
ZD
4792 return true;
4793}
4794
4795/* Unshares REF and replaces ssa names inside it by their basic variables. */
4796
4797static tree
4798unshare_and_remove_ssa_names (tree ref)
4799{
4800 ref = unshare_expr (ref);
4801 for_each_index (&ref, idx_remove_ssa_names, NULL);
4802
4803 return ref;
4804}
4805
4806/* Rewrites base of memory access OP with expression WITH in statement
4807 pointed to by BSI. */
4808
4809static void
4810rewrite_address_base (block_stmt_iterator *bsi, tree *op, tree with)
4811{
0bca51f0 4812 tree bvar, var, new_name, copy, name;
8b11a64c
ZD
4813 tree orig;
4814
7ccf35ed
DN
4815 var = bvar = get_base_address (*op);
4816
8b11a64c
ZD
4817 if (!var || TREE_CODE (with) != SSA_NAME)
4818 goto do_rewrite;
be35cf60
ZD
4819
4820 gcc_assert (TREE_CODE (var) != ALIGN_INDIRECT_REF);
4821 gcc_assert (TREE_CODE (var) != MISALIGNED_INDIRECT_REF);
4822 if (TREE_CODE (var) == INDIRECT_REF)
8b11a64c
ZD
4823 var = TREE_OPERAND (var, 0);
4824 if (TREE_CODE (var) == SSA_NAME)
4825 {
4826 name = var;
4827 var = SSA_NAME_VAR (var);
4828 }
4829 else if (DECL_P (var))
4830 name = NULL_TREE;
4831 else
4832 goto do_rewrite;
4833
8b11a64c
ZD
4834 /* We need to add a memory tag for the variable. But we do not want
4835 to add it to the temporary used for the computations, since this leads
4836 to problems in redundancy elimination when there are common parts
4837 in two computations referring to the different arrays. So we copy
4838 the variable to a new temporary. */
4839 copy = build2 (MODIFY_EXPR, void_type_node, NULL_TREE, with);
0bca51f0 4840
8b11a64c
ZD
4841 if (name)
4842 new_name = duplicate_ssa_name (name, copy);
4843 else
4844 {
0bca51f0
DN
4845 tree tag = var_ann (var)->type_mem_tag;
4846 tree new_ptr = create_tmp_var (TREE_TYPE (with), "ruatmp");
4847 add_referenced_tmp_var (new_ptr);
4848 if (tag)
4849 var_ann (new_ptr)->type_mem_tag = tag;
4850 else
4851 add_type_alias (new_ptr, var);
4852 new_name = make_ssa_name (new_ptr, copy);
8b11a64c 4853 }
0bca51f0 4854
8b11a64c 4855 TREE_OPERAND (copy, 0) = new_name;
f430bae8 4856 update_stmt (copy);
8b11a64c
ZD
4857 bsi_insert_before (bsi, copy, BSI_SAME_STMT);
4858 with = new_name;
4859
4860do_rewrite:
4861
4862 orig = NULL_TREE;
be35cf60
ZD
4863 gcc_assert (TREE_CODE (*op) != ALIGN_INDIRECT_REF);
4864 gcc_assert (TREE_CODE (*op) != MISALIGNED_INDIRECT_REF);
4865
4866 if (TREE_CODE (*op) == INDIRECT_REF)
8b11a64c
ZD
4867 orig = REF_ORIGINAL (*op);
4868 if (!orig)
4869 orig = unshare_and_remove_ssa_names (*op);
4870
be35cf60 4871 *op = build1 (INDIRECT_REF, TREE_TYPE (*op), with);
7ccf35ed 4872
8b11a64c
ZD
4873 /* Record the original reference, for purposes of alias analysis. */
4874 REF_ORIGINAL (*op) = orig;
0bca51f0
DN
4875
4876 /* Virtual operands in the original statement may have to be renamed
4877 because of the replacement. */
4878 mark_new_vars_to_rename (bsi_stmt (*bsi));
8b11a64c
ZD
4879}
4880
4881/* Rewrites USE (address that is an iv) using candidate CAND. */
4882
4883static void
4884rewrite_use_address (struct ivopts_data *data,
4885 struct iv_use *use, struct iv_cand *cand)
4886{
4887 tree comp = unshare_expr (get_computation (data->current_loop,
4888 use, cand));
1a1804c2 4889 block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
8b11a64c
ZD
4890 tree stmts;
4891 tree op = force_gimple_operand (comp, &stmts, true, NULL_TREE);
4892
4893 if (stmts)
4894 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
4895
4896 rewrite_address_base (&bsi, use->op_p, op);
4897}
4898
4899/* Rewrites USE (the condition such that one of the arguments is an iv) using
4900 candidate CAND. */
4901
4902static void
4903rewrite_use_compare (struct ivopts_data *data,
4904 struct iv_use *use, struct iv_cand *cand)
4905{
4906 tree comp;
4907 tree *op_p, cond, op, stmts, bound;
1a1804c2 4908 block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
8b11a64c
ZD
4909 enum tree_code compare;
4910
ca4c3169 4911 if (may_eliminate_iv (data, use, cand, &compare, &bound))
8b11a64c 4912 {
9e7376e5
ZD
4913 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
4914 tree var_type = TREE_TYPE (var);
4915
4916 bound = fold_convert (var_type, bound);
8b11a64c
ZD
4917 op = force_gimple_operand (unshare_expr (bound), &stmts,
4918 true, NULL_TREE);
4919
4920 if (stmts)
4921 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
4922
9e7376e5 4923 *use->op_p = build2 (compare, boolean_type_node, var, op);
f430bae8 4924 update_stmt (use->stmt);
8b11a64c
ZD
4925 return;
4926 }
4927
4928 /* The induction variable elimination failed; just express the original
4929 giv. */
4930 comp = unshare_expr (get_computation (data->current_loop, use, cand));
4931
4932 cond = *use->op_p;
4933 op_p = &TREE_OPERAND (cond, 0);
4934 if (TREE_CODE (*op_p) != SSA_NAME
4935 || zero_p (get_iv (data, *op_p)->step))
4936 op_p = &TREE_OPERAND (cond, 1);
4937
4938 op = force_gimple_operand (comp, &stmts, true, SSA_NAME_VAR (*op_p));
4939 if (stmts)
4940 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
4941
4942 *op_p = op;
4943}
4944
4945/* Ensure that operand *OP_P may be used at the end of EXIT without
4946 violating loop closed ssa form. */
4947
4948static void
4949protect_loop_closed_ssa_form_use (edge exit, use_operand_p op_p)
4950{
4951 basic_block def_bb;
4952 struct loop *def_loop;
4953 tree phi, use;
4954
4955 use = USE_FROM_PTR (op_p);
4956 if (TREE_CODE (use) != SSA_NAME)
4957 return;
4958
4959 def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (use));
4960 if (!def_bb)
4961 return;
4962
4963 def_loop = def_bb->loop_father;
4964 if (flow_bb_inside_loop_p (def_loop, exit->dest))
4965 return;
4966
4967 /* Try finding a phi node that copies the value out of the loop. */
bb29d951 4968 for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
8b11a64c
ZD
4969 if (PHI_ARG_DEF_FROM_EDGE (phi, exit) == use)
4970 break;
4971
4972 if (!phi)
4973 {
4974 /* Create such a phi node. */
4975 tree new_name = duplicate_ssa_name (use, NULL);
4976
4977 phi = create_phi_node (new_name, exit->dest);
4978 SSA_NAME_DEF_STMT (new_name) = phi;
d2e398df 4979 add_phi_arg (phi, use, exit);
8b11a64c
ZD
4980 }
4981
4982 SET_USE (op_p, PHI_RESULT (phi));
4983}
4984
4985/* Ensure that operands of STMT may be used at the end of EXIT without
4986 violating loop closed ssa form. */
4987
4988static void
4989protect_loop_closed_ssa_form (edge exit, tree stmt)
4990{
4991 use_optype uses;
4992 vuse_optype vuses;
4993 v_may_def_optype v_may_defs;
4994 unsigned i;
4995
8b11a64c
ZD
4996 uses = STMT_USE_OPS (stmt);
4997 for (i = 0; i < NUM_USES (uses); i++)
4998 protect_loop_closed_ssa_form_use (exit, USE_OP_PTR (uses, i));
4999
5000 vuses = STMT_VUSE_OPS (stmt);
5001 for (i = 0; i < NUM_VUSES (vuses); i++)
5002 protect_loop_closed_ssa_form_use (exit, VUSE_OP_PTR (vuses, i));
5003
5004 v_may_defs = STMT_V_MAY_DEF_OPS (stmt);
5005 for (i = 0; i < NUM_V_MAY_DEFS (v_may_defs); i++)
5006 protect_loop_closed_ssa_form_use (exit, V_MAY_DEF_OP_PTR (v_may_defs, i));
5007}
5008
5009/* STMTS compute a value of a phi argument OP on EXIT of a loop. Arrange things
5010 so that they are emitted on the correct place, and so that the loop closed
5011 ssa form is preserved. */
5012
5013static void
5014compute_phi_arg_on_exit (edge exit, tree stmts, tree op)
5015{
5016 tree_stmt_iterator tsi;
5017 block_stmt_iterator bsi;
5018 tree phi, stmt, def, next;
5019
c5cbcccf 5020 if (!single_pred_p (exit->dest))
8b11a64c
ZD
5021 split_loop_exit_edge (exit);
5022
f430bae8
AM
5023 /* Ensure there is label in exit->dest, so that we can
5024 insert after it. */
5025 tree_block_label (exit->dest);
5026 bsi = bsi_after_labels (exit->dest);
5027
8b11a64c
ZD
5028 if (TREE_CODE (stmts) == STATEMENT_LIST)
5029 {
5030 for (tsi = tsi_start (stmts); !tsi_end_p (tsi); tsi_next (&tsi))
f430bae8
AM
5031 {
5032 bsi_insert_after (&bsi, tsi_stmt (tsi), BSI_NEW_STMT);
5033 protect_loop_closed_ssa_form (exit, bsi_stmt (bsi));
5034 }
8b11a64c
ZD
5035 }
5036 else
f430bae8
AM
5037 {
5038 bsi_insert_after (&bsi, stmts, BSI_NEW_STMT);
5039 protect_loop_closed_ssa_form (exit, bsi_stmt (bsi));
5040 }
8b11a64c
ZD
5041
5042 if (!op)
5043 return;
5044
5045 for (phi = phi_nodes (exit->dest); phi; phi = next)
5046 {
eaf0dc02 5047 next = PHI_CHAIN (phi);
8b11a64c
ZD
5048
5049 if (PHI_ARG_DEF_FROM_EDGE (phi, exit) == op)
5050 {
5051 def = PHI_RESULT (phi);
5052 remove_statement (phi, false);
5053 stmt = build2 (MODIFY_EXPR, TREE_TYPE (op),
5054 def, op);
5055 SSA_NAME_DEF_STMT (def) = stmt;
5056 bsi_insert_after (&bsi, stmt, BSI_CONTINUE_LINKING);
5057 }
5058 }
5059}
5060
5061/* Rewrites the final value of USE (that is only needed outside of the loop)
5062 using candidate CAND. */
5063
5064static void
5065rewrite_use_outer (struct ivopts_data *data,
5066 struct iv_use *use, struct iv_cand *cand)
5067{
5068 edge exit;
5069 tree value, op, stmts, tgt;
5070 tree phi;
5071
1e128c5f
GB
5072 switch (TREE_CODE (use->stmt))
5073 {
5074 case PHI_NODE:
5075 tgt = PHI_RESULT (use->stmt);
5076 break;
5077 case MODIFY_EXPR:
5078 tgt = TREE_OPERAND (use->stmt, 0);
5079 break;
5080 default:
5081 gcc_unreachable ();
5082 }
5083
8b11a64c
ZD
5084 exit = single_dom_exit (data->current_loop);
5085
5086 if (exit)
5087 {
5088 if (!cand->iv)
5089 {
ca4c3169 5090 bool ok = may_replace_final_value (data, use, &value);
1e128c5f 5091 gcc_assert (ok);
8b11a64c
ZD
5092 }
5093 else
5094 value = get_computation_at (data->current_loop,
5095 use, cand, last_stmt (exit->src));
5096
2f4675b4 5097 value = unshare_expr (value);
8b11a64c
ZD
5098 op = force_gimple_operand (value, &stmts, true, SSA_NAME_VAR (tgt));
5099
5100 /* If we will preserve the iv anyway and we would need to perform
5101 some computation to replace the final value, do nothing. */
5102 if (stmts && name_info (data, tgt)->preserve_biv)
5103 return;
5104
bb29d951 5105 for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
8b11a64c
ZD
5106 {
5107 use_operand_p use_p = PHI_ARG_DEF_PTR_FROM_EDGE (phi, exit);
5108
5109 if (USE_FROM_PTR (use_p) == tgt)
5110 SET_USE (use_p, op);
5111 }
5112
5113 if (stmts)
5114 compute_phi_arg_on_exit (exit, stmts, op);
5115
5116 /* Enable removal of the statement. We cannot remove it directly,
5117 since we may still need the aliasing information attached to the
5118 ssa name defined by it. */
5119 name_info (data, tgt)->iv->have_use_for = false;
5120 return;
5121 }
5122
5123 /* If the variable is going to be preserved anyway, there is nothing to
5124 do. */
5125 if (name_info (data, tgt)->preserve_biv)
5126 return;
5127
5128 /* Otherwise we just need to compute the iv. */
5129 rewrite_use_nonlinear_expr (data, use, cand);
5130}
5131
5132/* Rewrites USE using candidate CAND. */
5133
5134static void
5135rewrite_use (struct ivopts_data *data,
5136 struct iv_use *use, struct iv_cand *cand)
5137{
5138 switch (use->type)
5139 {
5140 case USE_NONLINEAR_EXPR:
5141 rewrite_use_nonlinear_expr (data, use, cand);
5142 break;
5143
5144 case USE_OUTER:
5145 rewrite_use_outer (data, use, cand);
5146 break;
5147
5148 case USE_ADDRESS:
5149 rewrite_use_address (data, use, cand);
5150 break;
5151
5152 case USE_COMPARE:
5153 rewrite_use_compare (data, use, cand);
5154 break;
5155
5156 default:
1e128c5f 5157 gcc_unreachable ();
8b11a64c 5158 }
f430bae8 5159 update_stmt (use->stmt);
8b11a64c
ZD
5160}
5161
5162/* Rewrite the uses using the selected induction variables. */
5163
5164static void
5165rewrite_uses (struct ivopts_data *data)
5166{
5167 unsigned i;
5168 struct iv_cand *cand;
5169 struct iv_use *use;
5170
5171 for (i = 0; i < n_iv_uses (data); i++)
5172 {
5173 use = iv_use (data, i);
5174 cand = use->selected;
1e128c5f 5175 gcc_assert (cand);
8b11a64c
ZD
5176
5177 rewrite_use (data, use, cand);
5178 }
5179}
5180
5181/* Removes the ivs that are not used after rewriting. */
5182
5183static void
5184remove_unused_ivs (struct ivopts_data *data)
5185{
5186 unsigned j;
87c476a2 5187 bitmap_iterator bi;
8b11a64c 5188
87c476a2 5189 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
8b11a64c
ZD
5190 {
5191 struct version_info *info;
5192
5193 info = ver_info (data, j);
5194 if (info->iv
5195 && !zero_p (info->iv->step)
5196 && !info->inv_id
5197 && !info->iv->have_use_for
5198 && !info->preserve_biv)
5199 remove_statement (SSA_NAME_DEF_STMT (info->iv->ssa_name), true);
87c476a2 5200 }
8b11a64c
ZD
5201}
5202
5203/* Frees data allocated by the optimization of a single loop. */
5204
5205static void
5206free_loop_data (struct ivopts_data *data)
5207{
5208 unsigned i, j;
87c476a2 5209 bitmap_iterator bi;
8b11a64c 5210
ca4c3169
ZD
5211 htab_empty (data->niters);
5212
87c476a2 5213 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
8b11a64c
ZD
5214 {
5215 struct version_info *info;
5216
5217 info = ver_info (data, i);
5218 if (info->iv)
5219 free (info->iv);
5220 info->iv = NULL;
5221 info->has_nonlin_use = false;
5222 info->preserve_biv = false;
5223 info->inv_id = 0;
87c476a2 5224 }
8b11a64c 5225 bitmap_clear (data->relevant);
b1b02be2 5226 bitmap_clear (data->important_candidates);
8b11a64c
ZD
5227
5228 for (i = 0; i < n_iv_uses (data); i++)
5229 {
5230 struct iv_use *use = iv_use (data, i);
5231
5232 free (use->iv);
8bdbfff5 5233 BITMAP_FREE (use->related_cands);
8b11a64c
ZD
5234 for (j = 0; j < use->n_map_members; j++)
5235 if (use->cost_map[j].depends_on)
8bdbfff5 5236 BITMAP_FREE (use->cost_map[j].depends_on);
8b11a64c
ZD
5237 free (use->cost_map);
5238 free (use);
5239 }
5240 VARRAY_POP_ALL (data->iv_uses);
5241
5242 for (i = 0; i < n_iv_cands (data); i++)
5243 {
5244 struct iv_cand *cand = iv_cand (data, i);
5245
5246 if (cand->iv)
5247 free (cand->iv);
5248 free (cand);
5249 }
5250 VARRAY_POP_ALL (data->iv_candidates);
5251
5252 if (data->version_info_size < num_ssa_names)
5253 {
5254 data->version_info_size = 2 * num_ssa_names;
5255 free (data->version_info);
5256 data->version_info = xcalloc (data->version_info_size,
5257 sizeof (struct version_info));
5258 }
5259
5260 data->max_inv_id = 0;
5261
5262 for (i = 0; i < VARRAY_ACTIVE_SIZE (decl_rtl_to_reset); i++)
5263 {
5264 tree obj = VARRAY_GENERIC_PTR_NOGC (decl_rtl_to_reset, i);
5265
5266 SET_DECL_RTL (obj, NULL_RTX);
5267 }
5268 VARRAY_POP_ALL (decl_rtl_to_reset);
5269}
5270
5271/* Finalizes data structures used by the iv optimization pass. LOOPS is the
5272 loop tree. */
5273
5274static void
5275tree_ssa_iv_optimize_finalize (struct loops *loops, struct ivopts_data *data)
5276{
5277 unsigned i;
5278
5279 for (i = 1; i < loops->num; i++)
5280 if (loops->parray[i])
5281 {
5282 free (loops->parray[i]->aux);
5283 loops->parray[i]->aux = NULL;
5284 }
5285
5286 free_loop_data (data);
5287 free (data->version_info);
8bdbfff5
NS
5288 BITMAP_FREE (data->relevant);
5289 BITMAP_FREE (data->important_candidates);
ca4c3169 5290 htab_delete (data->niters);
8b11a64c
ZD
5291
5292 VARRAY_FREE (decl_rtl_to_reset);
5293 VARRAY_FREE (data->iv_uses);
5294 VARRAY_FREE (data->iv_candidates);
5295}
5296
5297/* Optimizes the LOOP. Returns true if anything changed. */
5298
5299static bool
5300tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
5301{
5302 bool changed = false;
b1b02be2 5303 struct iv_ca *iv_ca;
8b11a64c
ZD
5304 edge exit;
5305
5306 data->current_loop = loop;
5307
5308 if (dump_file && (dump_flags & TDF_DETAILS))
5309 {
5310 fprintf (dump_file, "Processing loop %d\n", loop->num);
5311
5312 exit = single_dom_exit (loop);
5313 if (exit)
5314 {
5315 fprintf (dump_file, " single exit %d -> %d, exit condition ",
5316 exit->src->index, exit->dest->index);
5317 print_generic_expr (dump_file, last_stmt (exit->src), TDF_SLIM);
5318 fprintf (dump_file, "\n");
5319 }
5320
5321 fprintf (dump_file, "\n");
5322 }
5323
5324 /* For each ssa name determines whether it behaves as an induction variable
5325 in some loop. */
5326 if (!find_induction_variables (data))
5327 goto finish;
5328
5329 /* Finds interesting uses (item 1). */
5330 find_interesting_uses (data);
5331 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
5332 goto finish;
5333
5334 /* Finds candidates for the induction variables (item 2). */
5335 find_iv_candidates (data);
5336
5337 /* Calculates the costs (item 3, part 1). */
5338 determine_use_iv_costs (data);
5339 determine_iv_costs (data);
5340 determine_set_costs (data);
5341
5342 /* Find the optimal set of induction variables (item 3, part 2). */
b1b02be2
ZD
5343 iv_ca = find_optimal_iv_set (data);
5344 if (!iv_ca)
8b11a64c
ZD
5345 goto finish;
5346 changed = true;
5347
5348 /* Create the new induction variables (item 4, part 1). */
b1b02be2
ZD
5349 create_new_ivs (data, iv_ca);
5350 iv_ca_free (&iv_ca);
8b11a64c
ZD
5351
5352 /* Rewrite the uses (item 4, part 2). */
5353 rewrite_uses (data);
5354
5355 /* Remove the ivs that are unused after rewriting. */
5356 remove_unused_ivs (data);
5357
8b11a64c
ZD
5358 /* We have changed the structure of induction variables; it might happen
5359 that definitions in the scev database refer to some of them that were
5360 eliminated. */
5361 scev_reset ();
5362
5363finish:
5364 free_loop_data (data);
5365
5366 return changed;
5367}
5368
5369/* Main entry point. Optimizes induction variables in LOOPS. */
5370
5371void
5372tree_ssa_iv_optimize (struct loops *loops)
5373{
5374 struct loop *loop;
5375 struct ivopts_data data;
5376
5377 tree_ssa_iv_optimize_init (loops, &data);
5378
5379 /* Optimize the loops starting with the innermost ones. */
5380 loop = loops->tree_root;
5381 while (loop->inner)
5382 loop = loop->inner;
5383
8b11a64c
ZD
5384 /* Scan the loops, inner ones first. */
5385 while (loop != loops->tree_root)
5386 {
8679c649
JH
5387 if (dump_file && (dump_flags & TDF_DETAILS))
5388 flow_loop_dump (loop, dump_file, NULL, 1);
e9472263
ZD
5389
5390 tree_ssa_iv_optimize_loop (&data, loop);
8b11a64c
ZD
5391
5392 if (loop->next)
5393 {
5394 loop = loop->next;
5395 while (loop->inner)
5396 loop = loop->inner;
5397 }
5398 else
5399 loop = loop->outer;
5400 }
5401
0bca51f0
DN
5402 /* FIXME. IV opts introduces new aliases and call-clobbered
5403 variables, which need to be renamed. However, when we call the
5404 renamer, not all statements will be scanned for operands. In
5405 particular, the newly introduced aliases may appear in statements
5406 that are considered "unmodified", so the renamer will not get a
5407 chance to rename those operands.
5408
5409 Work around this problem by forcing an operand re-scan on every
5410 statement. This will not be necessary once the new operand
5411 scanner is implemented. */
5412 if (need_ssa_update_p ())
5413 {
5414 basic_block bb;
5415 block_stmt_iterator si;
5416 FOR_EACH_BB (bb)
5417 for (si = bsi_start (bb); !bsi_end_p (si); bsi_next (&si))
5418 update_stmt (bsi_stmt (si));
5419
5420 update_ssa (TODO_update_ssa);
5421 }
e9472263 5422
0bca51f0 5423 rewrite_into_loop_closed_ssa (NULL);
8b11a64c
ZD
5424 tree_ssa_iv_optimize_finalize (loops, &data);
5425}