]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/tree-ssa-loop-ivopts.c
740db68fee06e813c8465c9d75ccdb039c1af21a
[thirdparty/gcc.git] / gcc / tree-ssa-loop-ivopts.c
1 /* Induction variable optimizations.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
3 Free Software Foundation, Inc.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 3, or (at your option) any
10 later version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 /* This pass tries to find the optimal set of induction variables for the loop.
22 It optimizes just the basic linear induction variables (although adding
23 support for other types should not be too hard). It includes the
24 optimizations commonly known as strength reduction, induction variable
25 coalescing and induction variable elimination. It does it in the
26 following steps:
27
28 1) The interesting uses of induction variables are found. This includes
29
30 -- uses of induction variables in non-linear expressions
31 -- addresses of arrays
32 -- comparisons of induction variables
33
34 2) Candidates for the induction variables are found. This includes
35
36 -- old induction variables
37 -- the variables defined by expressions derived from the "interesting
38 uses" above
39
40 3) The optimal (w.r. to a cost function) set of variables is chosen. The
41 cost function assigns a cost to sets of induction variables and consists
42 of three parts:
43
44 -- The use costs. Each of the interesting uses chooses the best induction
45 variable in the set and adds its cost to the sum. The cost reflects
46 the time spent on modifying the induction variables value to be usable
47 for the given purpose (adding base and offset for arrays, etc.).
48 -- The variable costs. Each of the variables has a cost assigned that
49 reflects the costs associated with incrementing the value of the
50 variable. The original variables are somewhat preferred.
51 -- The set cost. Depending on the size of the set, extra cost may be
52 added to reflect register pressure.
53
54 All the costs are defined in a machine-specific way, using the target
55 hooks and machine descriptions to determine them.
56
57 4) The trees are transformed to use the new variables, the dead code is
58 removed.
59
60 All of this is done loop by loop. Doing it globally is theoretically
61 possible, it might give a better performance and it might enable us
62 to decide costs more precisely, but getting all the interactions right
63 would be complicated. */
64
65 #include "config.h"
66 #include "system.h"
67 #include "coretypes.h"
68 #include "tm.h"
69 #include "tree.h"
70 #include "tm_p.h"
71 #include "basic-block.h"
72 #include "output.h"
73 #include "tree-pretty-print.h"
74 #include "gimple-pretty-print.h"
75 #include "tree-flow.h"
76 #include "tree-dump.h"
77 #include "timevar.h"
78 #include "cfgloop.h"
79 #include "tree-pass.h"
80 #include "ggc.h"
81 #include "insn-config.h"
82 #include "recog.h"
83 #include "pointer-set.h"
84 #include "hashtab.h"
85 #include "tree-chrec.h"
86 #include "tree-scalar-evolution.h"
87 #include "cfgloop.h"
88 #include "params.h"
89 #include "langhooks.h"
90 #include "tree-affine.h"
91 #include "target.h"
92 #include "tree-ssa-propagate.h"
93
94 /* FIXME: Expressions are expanded to RTL in this pass to determine the
95 cost of different addressing modes. This should be moved to a TBD
96 interface between the GIMPLE and RTL worlds. */
97 #include "expr.h"
98
99 /* The infinite cost. */
100 #define INFTY 10000000
101
102 /* The expected number of loop iterations. TODO -- use profiling instead of
103 this. */
104 #define AVG_LOOP_NITER(LOOP) 5
105
106
107 /* Representation of the induction variable. */
108 struct iv
109 {
110 tree base; /* Initial value of the iv. */
111 tree base_object; /* A memory object to that the induction variable points. */
112 tree step; /* Step of the iv (constant only). */
113 tree ssa_name; /* The ssa name with the value. */
114 bool biv_p; /* Is it a biv? */
115 bool have_use_for; /* Do we already have a use for it? */
116 unsigned use_id; /* The identifier in the use if it is the case. */
117 };
118
119 /* Per-ssa version information (induction variable descriptions, etc.). */
120 struct version_info
121 {
122 tree name; /* The ssa name. */
123 struct iv *iv; /* Induction variable description. */
124 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
125 an expression that is not an induction variable. */
126 bool preserve_biv; /* For the original biv, whether to preserve it. */
127 unsigned inv_id; /* Id of an invariant. */
128 };
129
130 /* Types of uses. */
131 enum use_type
132 {
133 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
134 USE_ADDRESS, /* Use in an address. */
135 USE_COMPARE /* Use is a compare. */
136 };
137
138 /* Cost of a computation. */
139 typedef struct
140 {
141 int cost; /* The runtime cost. */
142 unsigned complexity; /* The estimate of the complexity of the code for
143 the computation (in no concrete units --
144 complexity field should be larger for more
145 complex expressions and addressing modes). */
146 } comp_cost;
147
148 static const comp_cost zero_cost = {0, 0};
149 static const comp_cost infinite_cost = {INFTY, INFTY};
150
151 /* The candidate - cost pair. */
152 struct cost_pair
153 {
154 struct iv_cand *cand; /* The candidate. */
155 comp_cost cost; /* The cost. */
156 bitmap depends_on; /* The list of invariants that have to be
157 preserved. */
158 tree value; /* For final value elimination, the expression for
159 the final value of the iv. For iv elimination,
160 the new bound to compare with. */
161 };
162
163 /* Use. */
164 struct iv_use
165 {
166 unsigned id; /* The id of the use. */
167 enum use_type type; /* Type of the use. */
168 struct iv *iv; /* The induction variable it is based on. */
169 gimple stmt; /* Statement in that it occurs. */
170 tree *op_p; /* The place where it occurs. */
171 bitmap related_cands; /* The set of "related" iv candidates, plus the common
172 important ones. */
173
174 unsigned n_map_members; /* Number of candidates in the cost_map list. */
175 struct cost_pair *cost_map;
176 /* The costs wrto the iv candidates. */
177
178 struct iv_cand *selected;
179 /* The selected candidate. */
180 };
181
182 /* The position where the iv is computed. */
183 enum iv_position
184 {
185 IP_NORMAL, /* At the end, just before the exit condition. */
186 IP_END, /* At the end of the latch block. */
187 IP_BEFORE_USE, /* Immediately before a specific use. */
188 IP_AFTER_USE, /* Immediately after a specific use. */
189 IP_ORIGINAL /* The original biv. */
190 };
191
192 /* The induction variable candidate. */
193 struct iv_cand
194 {
195 unsigned id; /* The number of the candidate. */
196 bool important; /* Whether this is an "important" candidate, i.e. such
197 that it should be considered by all uses. */
198 ENUM_BITFIELD(iv_position) pos : 8; /* Where it is computed. */
199 gimple incremented_at;/* For original biv, the statement where it is
200 incremented. */
201 tree var_before; /* The variable used for it before increment. */
202 tree var_after; /* The variable used for it after increment. */
203 struct iv *iv; /* The value of the candidate. NULL for
204 "pseudocandidate" used to indicate the possibility
205 to replace the final value of an iv by direct
206 computation of the value. */
207 unsigned cost; /* Cost of the candidate. */
208 unsigned cost_step; /* Cost of the candidate's increment operation. */
209 struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
210 where it is incremented. */
211 bitmap depends_on; /* The list of invariants that are used in step of the
212 biv. */
213 };
214
215 /* The data used by the induction variable optimizations. */
216
217 typedef struct iv_use *iv_use_p;
218 DEF_VEC_P(iv_use_p);
219 DEF_VEC_ALLOC_P(iv_use_p,heap);
220
221 typedef struct iv_cand *iv_cand_p;
222 DEF_VEC_P(iv_cand_p);
223 DEF_VEC_ALLOC_P(iv_cand_p,heap);
224
225 struct ivopts_data
226 {
227 /* The currently optimized loop. */
228 struct loop *current_loop;
229
230 /* Numbers of iterations for all exits of the current loop. */
231 struct pointer_map_t *niters;
232
233 /* Number of registers used in it. */
234 unsigned regs_used;
235
236 /* The size of version_info array allocated. */
237 unsigned version_info_size;
238
239 /* The array of information for the ssa names. */
240 struct version_info *version_info;
241
242 /* The bitmap of indices in version_info whose value was changed. */
243 bitmap relevant;
244
245 /* The uses of induction variables. */
246 VEC(iv_use_p,heap) *iv_uses;
247
248 /* The candidates. */
249 VEC(iv_cand_p,heap) *iv_candidates;
250
251 /* A bitmap of important candidates. */
252 bitmap important_candidates;
253
254 /* The maximum invariant id. */
255 unsigned max_inv_id;
256
257 /* Whether to consider just related and important candidates when replacing a
258 use. */
259 bool consider_all_candidates;
260
261 /* Are we optimizing for speed? */
262 bool speed;
263 };
264
265 /* An assignment of iv candidates to uses. */
266
267 struct iv_ca
268 {
269 /* The number of uses covered by the assignment. */
270 unsigned upto;
271
272 /* Number of uses that cannot be expressed by the candidates in the set. */
273 unsigned bad_uses;
274
275 /* Candidate assigned to a use, together with the related costs. */
276 struct cost_pair **cand_for_use;
277
278 /* Number of times each candidate is used. */
279 unsigned *n_cand_uses;
280
281 /* The candidates used. */
282 bitmap cands;
283
284 /* The number of candidates in the set. */
285 unsigned n_cands;
286
287 /* Total number of registers needed. */
288 unsigned n_regs;
289
290 /* Total cost of expressing uses. */
291 comp_cost cand_use_cost;
292
293 /* Total cost of candidates. */
294 unsigned cand_cost;
295
296 /* Number of times each invariant is used. */
297 unsigned *n_invariant_uses;
298
299 /* Total cost of the assignment. */
300 comp_cost cost;
301 };
302
303 /* Difference of two iv candidate assignments. */
304
305 struct iv_ca_delta
306 {
307 /* Changed use. */
308 struct iv_use *use;
309
310 /* An old assignment (for rollback purposes). */
311 struct cost_pair *old_cp;
312
313 /* A new assignment. */
314 struct cost_pair *new_cp;
315
316 /* Next change in the list. */
317 struct iv_ca_delta *next_change;
318 };
319
320 /* Bound on number of candidates below that all candidates are considered. */
321
322 #define CONSIDER_ALL_CANDIDATES_BOUND \
323 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
324
325 /* If there are more iv occurrences, we just give up (it is quite unlikely that
326 optimizing such a loop would help, and it would take ages). */
327
328 #define MAX_CONSIDERED_USES \
329 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
330
331 /* If there are at most this number of ivs in the set, try removing unnecessary
332 ivs from the set always. */
333
334 #define ALWAYS_PRUNE_CAND_SET_BOUND \
335 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
336
337 /* The list of trees for that the decl_rtl field must be reset is stored
338 here. */
339
340 static VEC(tree,heap) *decl_rtl_to_reset;
341
342 /* Number of uses recorded in DATA. */
343
344 static inline unsigned
345 n_iv_uses (struct ivopts_data *data)
346 {
347 return VEC_length (iv_use_p, data->iv_uses);
348 }
349
350 /* Ith use recorded in DATA. */
351
352 static inline struct iv_use *
353 iv_use (struct ivopts_data *data, unsigned i)
354 {
355 return VEC_index (iv_use_p, data->iv_uses, i);
356 }
357
358 /* Number of candidates recorded in DATA. */
359
360 static inline unsigned
361 n_iv_cands (struct ivopts_data *data)
362 {
363 return VEC_length (iv_cand_p, data->iv_candidates);
364 }
365
366 /* Ith candidate recorded in DATA. */
367
368 static inline struct iv_cand *
369 iv_cand (struct ivopts_data *data, unsigned i)
370 {
371 return VEC_index (iv_cand_p, data->iv_candidates, i);
372 }
373
374 /* The single loop exit if it dominates the latch, NULL otherwise. */
375
376 edge
377 single_dom_exit (struct loop *loop)
378 {
379 edge exit = single_exit (loop);
380
381 if (!exit)
382 return NULL;
383
384 if (!just_once_each_iteration_p (loop, exit->src))
385 return NULL;
386
387 return exit;
388 }
389
390 /* Dumps information about the induction variable IV to FILE. */
391
392 extern void dump_iv (FILE *, struct iv *);
393 void
394 dump_iv (FILE *file, struct iv *iv)
395 {
396 if (iv->ssa_name)
397 {
398 fprintf (file, "ssa name ");
399 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
400 fprintf (file, "\n");
401 }
402
403 fprintf (file, " type ");
404 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
405 fprintf (file, "\n");
406
407 if (iv->step)
408 {
409 fprintf (file, " base ");
410 print_generic_expr (file, iv->base, TDF_SLIM);
411 fprintf (file, "\n");
412
413 fprintf (file, " step ");
414 print_generic_expr (file, iv->step, TDF_SLIM);
415 fprintf (file, "\n");
416 }
417 else
418 {
419 fprintf (file, " invariant ");
420 print_generic_expr (file, iv->base, TDF_SLIM);
421 fprintf (file, "\n");
422 }
423
424 if (iv->base_object)
425 {
426 fprintf (file, " base object ");
427 print_generic_expr (file, iv->base_object, TDF_SLIM);
428 fprintf (file, "\n");
429 }
430
431 if (iv->biv_p)
432 fprintf (file, " is a biv\n");
433 }
434
435 /* Dumps information about the USE to FILE. */
436
437 extern void dump_use (FILE *, struct iv_use *);
438 void
439 dump_use (FILE *file, struct iv_use *use)
440 {
441 fprintf (file, "use %d\n", use->id);
442
443 switch (use->type)
444 {
445 case USE_NONLINEAR_EXPR:
446 fprintf (file, " generic\n");
447 break;
448
449 case USE_ADDRESS:
450 fprintf (file, " address\n");
451 break;
452
453 case USE_COMPARE:
454 fprintf (file, " compare\n");
455 break;
456
457 default:
458 gcc_unreachable ();
459 }
460
461 fprintf (file, " in statement ");
462 print_gimple_stmt (file, use->stmt, 0, 0);
463 fprintf (file, "\n");
464
465 fprintf (file, " at position ");
466 if (use->op_p)
467 print_generic_expr (file, *use->op_p, TDF_SLIM);
468 fprintf (file, "\n");
469
470 dump_iv (file, use->iv);
471
472 if (use->related_cands)
473 {
474 fprintf (file, " related candidates ");
475 dump_bitmap (file, use->related_cands);
476 }
477 }
478
479 /* Dumps information about the uses to FILE. */
480
481 extern void dump_uses (FILE *, struct ivopts_data *);
482 void
483 dump_uses (FILE *file, struct ivopts_data *data)
484 {
485 unsigned i;
486 struct iv_use *use;
487
488 for (i = 0; i < n_iv_uses (data); i++)
489 {
490 use = iv_use (data, i);
491
492 dump_use (file, use);
493 fprintf (file, "\n");
494 }
495 }
496
497 /* Dumps information about induction variable candidate CAND to FILE. */
498
499 extern void dump_cand (FILE *, struct iv_cand *);
500 void
501 dump_cand (FILE *file, struct iv_cand *cand)
502 {
503 struct iv *iv = cand->iv;
504
505 fprintf (file, "candidate %d%s\n",
506 cand->id, cand->important ? " (important)" : "");
507
508 if (cand->depends_on)
509 {
510 fprintf (file, " depends on ");
511 dump_bitmap (file, cand->depends_on);
512 }
513
514 if (!iv)
515 {
516 fprintf (file, " final value replacement\n");
517 return;
518 }
519
520 switch (cand->pos)
521 {
522 case IP_NORMAL:
523 fprintf (file, " incremented before exit test\n");
524 break;
525
526 case IP_BEFORE_USE:
527 fprintf (file, " incremented before use %d\n", cand->ainc_use->id);
528 break;
529
530 case IP_AFTER_USE:
531 fprintf (file, " incremented after use %d\n", cand->ainc_use->id);
532 break;
533
534 case IP_END:
535 fprintf (file, " incremented at end\n");
536 break;
537
538 case IP_ORIGINAL:
539 fprintf (file, " original biv\n");
540 break;
541 }
542
543 dump_iv (file, iv);
544 }
545
546 /* Returns the info for ssa version VER. */
547
548 static inline struct version_info *
549 ver_info (struct ivopts_data *data, unsigned ver)
550 {
551 return data->version_info + ver;
552 }
553
554 /* Returns the info for ssa name NAME. */
555
556 static inline struct version_info *
557 name_info (struct ivopts_data *data, tree name)
558 {
559 return ver_info (data, SSA_NAME_VERSION (name));
560 }
561
562 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
563 emitted in LOOP. */
564
565 static bool
566 stmt_after_ip_normal_pos (struct loop *loop, gimple stmt)
567 {
568 basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
569
570 gcc_assert (bb);
571
572 if (sbb == loop->latch)
573 return true;
574
575 if (sbb != bb)
576 return false;
577
578 return stmt == last_stmt (bb);
579 }
580
581 /* Returns true if STMT if after the place where the original induction
582 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
583 if the positions are identical. */
584
585 static bool
586 stmt_after_inc_pos (struct iv_cand *cand, gimple stmt, bool true_if_equal)
587 {
588 basic_block cand_bb = gimple_bb (cand->incremented_at);
589 basic_block stmt_bb = gimple_bb (stmt);
590
591 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
592 return false;
593
594 if (stmt_bb != cand_bb)
595 return true;
596
597 if (true_if_equal
598 && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
599 return true;
600 return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
601 }
602
603 /* Returns true if STMT if after the place where the induction variable
604 CAND is incremented in LOOP. */
605
606 static bool
607 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple stmt)
608 {
609 switch (cand->pos)
610 {
611 case IP_END:
612 return false;
613
614 case IP_NORMAL:
615 return stmt_after_ip_normal_pos (loop, stmt);
616
617 case IP_ORIGINAL:
618 case IP_AFTER_USE:
619 return stmt_after_inc_pos (cand, stmt, false);
620
621 case IP_BEFORE_USE:
622 return stmt_after_inc_pos (cand, stmt, true);
623
624 default:
625 gcc_unreachable ();
626 }
627 }
628
629 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
630
631 static bool
632 abnormal_ssa_name_p (tree exp)
633 {
634 if (!exp)
635 return false;
636
637 if (TREE_CODE (exp) != SSA_NAME)
638 return false;
639
640 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
641 }
642
643 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
644 abnormal phi node. Callback for for_each_index. */
645
646 static bool
647 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
648 void *data ATTRIBUTE_UNUSED)
649 {
650 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
651 {
652 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
653 return false;
654 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
655 return false;
656 }
657
658 return !abnormal_ssa_name_p (*index);
659 }
660
661 /* Returns true if EXPR contains a ssa name that occurs in an
662 abnormal phi node. */
663
664 bool
665 contains_abnormal_ssa_name_p (tree expr)
666 {
667 enum tree_code code;
668 enum tree_code_class codeclass;
669
670 if (!expr)
671 return false;
672
673 code = TREE_CODE (expr);
674 codeclass = TREE_CODE_CLASS (code);
675
676 if (code == SSA_NAME)
677 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
678
679 if (code == INTEGER_CST
680 || is_gimple_min_invariant (expr))
681 return false;
682
683 if (code == ADDR_EXPR)
684 return !for_each_index (&TREE_OPERAND (expr, 0),
685 idx_contains_abnormal_ssa_name_p,
686 NULL);
687
688 if (code == COND_EXPR)
689 return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
690 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
691 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
692
693 switch (codeclass)
694 {
695 case tcc_binary:
696 case tcc_comparison:
697 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
698 return true;
699
700 /* Fallthru. */
701 case tcc_unary:
702 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
703 return true;
704
705 break;
706
707 default:
708 gcc_unreachable ();
709 }
710
711 return false;
712 }
713
714 /* Returns tree describing number of iterations determined from
715 EXIT of DATA->current_loop, or NULL if something goes wrong. */
716
717 static tree
718 niter_for_exit (struct ivopts_data *data, edge exit)
719 {
720 struct tree_niter_desc desc;
721 tree niter;
722 void **slot;
723
724 if (!data->niters)
725 {
726 data->niters = pointer_map_create ();
727 slot = NULL;
728 }
729 else
730 slot = pointer_map_contains (data->niters, exit);
731
732 if (!slot)
733 {
734 /* Try to determine number of iterations. We must know it
735 unconditionally (i.e., without possibility of # of iterations
736 being zero). Also, we cannot safely work with ssa names that
737 appear in phi nodes on abnormal edges, so that we do not create
738 overlapping life ranges for them (PR 27283). */
739 if (number_of_iterations_exit (data->current_loop,
740 exit, &desc, true)
741 && integer_zerop (desc.may_be_zero)
742 && !contains_abnormal_ssa_name_p (desc.niter))
743 niter = desc.niter;
744 else
745 niter = NULL_TREE;
746
747 *pointer_map_insert (data->niters, exit) = niter;
748 }
749 else
750 niter = (tree) *slot;
751
752 return niter;
753 }
754
755 /* Returns tree describing number of iterations determined from
756 single dominating exit of DATA->current_loop, or NULL if something
757 goes wrong. */
758
759 static tree
760 niter_for_single_dom_exit (struct ivopts_data *data)
761 {
762 edge exit = single_dom_exit (data->current_loop);
763
764 if (!exit)
765 return NULL;
766
767 return niter_for_exit (data, exit);
768 }
769
770 /* Initializes data structures used by the iv optimization pass, stored
771 in DATA. */
772
773 static void
774 tree_ssa_iv_optimize_init (struct ivopts_data *data)
775 {
776 data->version_info_size = 2 * num_ssa_names;
777 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
778 data->relevant = BITMAP_ALLOC (NULL);
779 data->important_candidates = BITMAP_ALLOC (NULL);
780 data->max_inv_id = 0;
781 data->niters = NULL;
782 data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
783 data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
784 decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
785 }
786
787 /* Returns a memory object to that EXPR points. In case we are able to
788 determine that it does not point to any such object, NULL is returned. */
789
790 static tree
791 determine_base_object (tree expr)
792 {
793 enum tree_code code = TREE_CODE (expr);
794 tree base, obj;
795
796 /* If this is a pointer casted to any type, we need to determine
797 the base object for the pointer; so handle conversions before
798 throwing away non-pointer expressions. */
799 if (CONVERT_EXPR_P (expr))
800 return determine_base_object (TREE_OPERAND (expr, 0));
801
802 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
803 return NULL_TREE;
804
805 switch (code)
806 {
807 case INTEGER_CST:
808 return NULL_TREE;
809
810 case ADDR_EXPR:
811 obj = TREE_OPERAND (expr, 0);
812 base = get_base_address (obj);
813
814 if (!base)
815 return expr;
816
817 if (TREE_CODE (base) == MEM_REF)
818 return determine_base_object (TREE_OPERAND (base, 0));
819
820 return fold_convert (ptr_type_node,
821 build_fold_addr_expr (base));
822
823 case POINTER_PLUS_EXPR:
824 return determine_base_object (TREE_OPERAND (expr, 0));
825
826 case PLUS_EXPR:
827 case MINUS_EXPR:
828 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
829 gcc_unreachable ();
830
831 default:
832 return fold_convert (ptr_type_node, expr);
833 }
834 }
835
836 /* Allocates an induction variable with given initial value BASE and step STEP
837 for loop LOOP. */
838
839 static struct iv *
840 alloc_iv (tree base, tree step)
841 {
842 struct iv *iv = XCNEW (struct iv);
843 gcc_assert (step != NULL_TREE);
844
845 iv->base = base;
846 iv->base_object = determine_base_object (base);
847 iv->step = step;
848 iv->biv_p = false;
849 iv->have_use_for = false;
850 iv->use_id = 0;
851 iv->ssa_name = NULL_TREE;
852
853 return iv;
854 }
855
856 /* Sets STEP and BASE for induction variable IV. */
857
858 static void
859 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
860 {
861 struct version_info *info = name_info (data, iv);
862
863 gcc_assert (!info->iv);
864
865 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
866 info->iv = alloc_iv (base, step);
867 info->iv->ssa_name = iv;
868 }
869
870 /* Finds induction variable declaration for VAR. */
871
872 static struct iv *
873 get_iv (struct ivopts_data *data, tree var)
874 {
875 basic_block bb;
876 tree type = TREE_TYPE (var);
877
878 if (!POINTER_TYPE_P (type)
879 && !INTEGRAL_TYPE_P (type))
880 return NULL;
881
882 if (!name_info (data, var)->iv)
883 {
884 bb = gimple_bb (SSA_NAME_DEF_STMT (var));
885
886 if (!bb
887 || !flow_bb_inside_loop_p (data->current_loop, bb))
888 set_iv (data, var, var, build_int_cst (type, 0));
889 }
890
891 return name_info (data, var)->iv;
892 }
893
894 /* Determines the step of a biv defined in PHI. Returns NULL if PHI does
895 not define a simple affine biv with nonzero step. */
896
897 static tree
898 determine_biv_step (gimple phi)
899 {
900 struct loop *loop = gimple_bb (phi)->loop_father;
901 tree name = PHI_RESULT (phi);
902 affine_iv iv;
903
904 if (!is_gimple_reg (name))
905 return NULL_TREE;
906
907 if (!simple_iv (loop, loop, name, &iv, true))
908 return NULL_TREE;
909
910 return integer_zerop (iv.step) ? NULL_TREE : iv.step;
911 }
912
913 /* Finds basic ivs. */
914
915 static bool
916 find_bivs (struct ivopts_data *data)
917 {
918 gimple phi;
919 tree step, type, base;
920 bool found = false;
921 struct loop *loop = data->current_loop;
922 gimple_stmt_iterator psi;
923
924 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
925 {
926 phi = gsi_stmt (psi);
927
928 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
929 continue;
930
931 step = determine_biv_step (phi);
932 if (!step)
933 continue;
934
935 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
936 base = expand_simple_operations (base);
937 if (contains_abnormal_ssa_name_p (base)
938 || contains_abnormal_ssa_name_p (step))
939 continue;
940
941 type = TREE_TYPE (PHI_RESULT (phi));
942 base = fold_convert (type, base);
943 if (step)
944 {
945 if (POINTER_TYPE_P (type))
946 step = fold_convert (sizetype, step);
947 else
948 step = fold_convert (type, step);
949 }
950
951 set_iv (data, PHI_RESULT (phi), base, step);
952 found = true;
953 }
954
955 return found;
956 }
957
958 /* Marks basic ivs. */
959
960 static void
961 mark_bivs (struct ivopts_data *data)
962 {
963 gimple phi;
964 tree var;
965 struct iv *iv, *incr_iv;
966 struct loop *loop = data->current_loop;
967 basic_block incr_bb;
968 gimple_stmt_iterator psi;
969
970 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
971 {
972 phi = gsi_stmt (psi);
973
974 iv = get_iv (data, PHI_RESULT (phi));
975 if (!iv)
976 continue;
977
978 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
979 incr_iv = get_iv (data, var);
980 if (!incr_iv)
981 continue;
982
983 /* If the increment is in the subloop, ignore it. */
984 incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
985 if (incr_bb->loop_father != data->current_loop
986 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
987 continue;
988
989 iv->biv_p = true;
990 incr_iv->biv_p = true;
991 }
992 }
993
994 /* Checks whether STMT defines a linear induction variable and stores its
995 parameters to IV. */
996
997 static bool
998 find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv)
999 {
1000 tree lhs;
1001 struct loop *loop = data->current_loop;
1002
1003 iv->base = NULL_TREE;
1004 iv->step = NULL_TREE;
1005
1006 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1007 return false;
1008
1009 lhs = gimple_assign_lhs (stmt);
1010 if (TREE_CODE (lhs) != SSA_NAME)
1011 return false;
1012
1013 if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1014 return false;
1015 iv->base = expand_simple_operations (iv->base);
1016
1017 if (contains_abnormal_ssa_name_p (iv->base)
1018 || contains_abnormal_ssa_name_p (iv->step))
1019 return false;
1020
1021 return true;
1022 }
1023
1024 /* Finds general ivs in statement STMT. */
1025
1026 static void
1027 find_givs_in_stmt (struct ivopts_data *data, gimple stmt)
1028 {
1029 affine_iv iv;
1030
1031 if (!find_givs_in_stmt_scev (data, stmt, &iv))
1032 return;
1033
1034 set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step);
1035 }
1036
1037 /* Finds general ivs in basic block BB. */
1038
1039 static void
1040 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1041 {
1042 gimple_stmt_iterator bsi;
1043
1044 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1045 find_givs_in_stmt (data, gsi_stmt (bsi));
1046 }
1047
1048 /* Finds general ivs. */
1049
1050 static void
1051 find_givs (struct ivopts_data *data)
1052 {
1053 struct loop *loop = data->current_loop;
1054 basic_block *body = get_loop_body_in_dom_order (loop);
1055 unsigned i;
1056
1057 for (i = 0; i < loop->num_nodes; i++)
1058 find_givs_in_bb (data, body[i]);
1059 free (body);
1060 }
1061
1062 /* For each ssa name defined in LOOP determines whether it is an induction
1063 variable and if so, its initial value and step. */
1064
1065 static bool
1066 find_induction_variables (struct ivopts_data *data)
1067 {
1068 unsigned i;
1069 bitmap_iterator bi;
1070
1071 if (!find_bivs (data))
1072 return false;
1073
1074 find_givs (data);
1075 mark_bivs (data);
1076
1077 if (dump_file && (dump_flags & TDF_DETAILS))
1078 {
1079 tree niter = niter_for_single_dom_exit (data);
1080
1081 if (niter)
1082 {
1083 fprintf (dump_file, " number of iterations ");
1084 print_generic_expr (dump_file, niter, TDF_SLIM);
1085 fprintf (dump_file, "\n\n");
1086 };
1087
1088 fprintf (dump_file, "Induction variables:\n\n");
1089
1090 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1091 {
1092 if (ver_info (data, i)->iv)
1093 dump_iv (dump_file, ver_info (data, i)->iv);
1094 }
1095 }
1096
1097 return true;
1098 }
1099
1100 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1101
1102 static struct iv_use *
1103 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1104 gimple stmt, enum use_type use_type)
1105 {
1106 struct iv_use *use = XCNEW (struct iv_use);
1107
1108 use->id = n_iv_uses (data);
1109 use->type = use_type;
1110 use->iv = iv;
1111 use->stmt = stmt;
1112 use->op_p = use_p;
1113 use->related_cands = BITMAP_ALLOC (NULL);
1114
1115 /* To avoid showing ssa name in the dumps, if it was not reset by the
1116 caller. */
1117 iv->ssa_name = NULL_TREE;
1118
1119 if (dump_file && (dump_flags & TDF_DETAILS))
1120 dump_use (dump_file, use);
1121
1122 VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
1123
1124 return use;
1125 }
1126
1127 /* Checks whether OP is a loop-level invariant and if so, records it.
1128 NONLINEAR_USE is true if the invariant is used in a way we do not
1129 handle specially. */
1130
1131 static void
1132 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1133 {
1134 basic_block bb;
1135 struct version_info *info;
1136
1137 if (TREE_CODE (op) != SSA_NAME
1138 || !is_gimple_reg (op))
1139 return;
1140
1141 bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1142 if (bb
1143 && flow_bb_inside_loop_p (data->current_loop, bb))
1144 return;
1145
1146 info = name_info (data, op);
1147 info->name = op;
1148 info->has_nonlin_use |= nonlinear_use;
1149 if (!info->inv_id)
1150 info->inv_id = ++data->max_inv_id;
1151 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1152 }
1153
1154 /* Checks whether the use OP is interesting and if so, records it. */
1155
1156 static struct iv_use *
1157 find_interesting_uses_op (struct ivopts_data *data, tree op)
1158 {
1159 struct iv *iv;
1160 struct iv *civ;
1161 gimple stmt;
1162 struct iv_use *use;
1163
1164 if (TREE_CODE (op) != SSA_NAME)
1165 return NULL;
1166
1167 iv = get_iv (data, op);
1168 if (!iv)
1169 return NULL;
1170
1171 if (iv->have_use_for)
1172 {
1173 use = iv_use (data, iv->use_id);
1174
1175 gcc_assert (use->type == USE_NONLINEAR_EXPR);
1176 return use;
1177 }
1178
1179 if (integer_zerop (iv->step))
1180 {
1181 record_invariant (data, op, true);
1182 return NULL;
1183 }
1184 iv->have_use_for = true;
1185
1186 civ = XNEW (struct iv);
1187 *civ = *iv;
1188
1189 stmt = SSA_NAME_DEF_STMT (op);
1190 gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1191 || is_gimple_assign (stmt));
1192
1193 use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1194 iv->use_id = use->id;
1195
1196 return use;
1197 }
1198
1199 /* Given a condition in statement STMT, checks whether it is a compare
1200 of an induction variable and an invariant. If this is the case,
1201 CONTROL_VAR is set to location of the iv, BOUND to the location of
1202 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1203 induction variable descriptions, and true is returned. If this is not
1204 the case, CONTROL_VAR and BOUND are set to the arguments of the
1205 condition and false is returned. */
1206
1207 static bool
1208 extract_cond_operands (struct ivopts_data *data, gimple stmt,
1209 tree **control_var, tree **bound,
1210 struct iv **iv_var, struct iv **iv_bound)
1211 {
1212 /* The objects returned when COND has constant operands. */
1213 static struct iv const_iv;
1214 static tree zero;
1215 tree *op0 = &zero, *op1 = &zero, *tmp_op;
1216 struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1217 bool ret = false;
1218
1219 if (gimple_code (stmt) == GIMPLE_COND)
1220 {
1221 op0 = gimple_cond_lhs_ptr (stmt);
1222 op1 = gimple_cond_rhs_ptr (stmt);
1223 }
1224 else
1225 {
1226 op0 = gimple_assign_rhs1_ptr (stmt);
1227 op1 = gimple_assign_rhs2_ptr (stmt);
1228 }
1229
1230 zero = integer_zero_node;
1231 const_iv.step = integer_zero_node;
1232
1233 if (TREE_CODE (*op0) == SSA_NAME)
1234 iv0 = get_iv (data, *op0);
1235 if (TREE_CODE (*op1) == SSA_NAME)
1236 iv1 = get_iv (data, *op1);
1237
1238 /* Exactly one of the compared values must be an iv, and the other one must
1239 be an invariant. */
1240 if (!iv0 || !iv1)
1241 goto end;
1242
1243 if (integer_zerop (iv0->step))
1244 {
1245 /* Control variable may be on the other side. */
1246 tmp_op = op0; op0 = op1; op1 = tmp_op;
1247 tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1248 }
1249 ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1250
1251 end:
1252 if (control_var)
1253 *control_var = op0;;
1254 if (iv_var)
1255 *iv_var = iv0;;
1256 if (bound)
1257 *bound = op1;
1258 if (iv_bound)
1259 *iv_bound = iv1;
1260
1261 return ret;
1262 }
1263
1264 /* Checks whether the condition in STMT is interesting and if so,
1265 records it. */
1266
1267 static void
1268 find_interesting_uses_cond (struct ivopts_data *data, gimple stmt)
1269 {
1270 tree *var_p, *bound_p;
1271 struct iv *var_iv, *civ;
1272
1273 if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1274 {
1275 find_interesting_uses_op (data, *var_p);
1276 find_interesting_uses_op (data, *bound_p);
1277 return;
1278 }
1279
1280 civ = XNEW (struct iv);
1281 *civ = *var_iv;
1282 record_use (data, NULL, civ, stmt, USE_COMPARE);
1283 }
1284
1285 /* Returns true if expression EXPR is obviously invariant in LOOP,
1286 i.e. if all its operands are defined outside of the LOOP. LOOP
1287 should not be the function body. */
1288
1289 bool
1290 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1291 {
1292 basic_block def_bb;
1293 unsigned i, len;
1294
1295 gcc_assert (loop_depth (loop) > 0);
1296
1297 if (is_gimple_min_invariant (expr))
1298 return true;
1299
1300 if (TREE_CODE (expr) == SSA_NAME)
1301 {
1302 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1303 if (def_bb
1304 && flow_bb_inside_loop_p (loop, def_bb))
1305 return false;
1306
1307 return true;
1308 }
1309
1310 if (!EXPR_P (expr))
1311 return false;
1312
1313 len = TREE_OPERAND_LENGTH (expr);
1314 for (i = 0; i < len; i++)
1315 if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1316 return false;
1317
1318 return true;
1319 }
1320
1321 /* Returns true if statement STMT is obviously invariant in LOOP,
1322 i.e. if all its operands on the RHS are defined outside of the LOOP.
1323 LOOP should not be the function body. */
1324
1325 bool
1326 stmt_invariant_in_loop_p (struct loop *loop, gimple stmt)
1327 {
1328 unsigned i;
1329 tree lhs;
1330
1331 gcc_assert (loop_depth (loop) > 0);
1332
1333 lhs = gimple_get_lhs (stmt);
1334 for (i = 0; i < gimple_num_ops (stmt); i++)
1335 {
1336 tree op = gimple_op (stmt, i);
1337 if (op != lhs && !expr_invariant_in_loop_p (loop, op))
1338 return false;
1339 }
1340
1341 return true;
1342 }
1343
1344 /* Cumulates the steps of indices into DATA and replaces their values with the
1345 initial ones. Returns false when the value of the index cannot be determined.
1346 Callback for for_each_index. */
1347
1348 struct ifs_ivopts_data
1349 {
1350 struct ivopts_data *ivopts_data;
1351 gimple stmt;
1352 tree step;
1353 };
1354
1355 static bool
1356 idx_find_step (tree base, tree *idx, void *data)
1357 {
1358 struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1359 struct iv *iv;
1360 tree step, iv_base, iv_step, lbound, off;
1361 struct loop *loop = dta->ivopts_data->current_loop;
1362
1363 if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF)
1364 return false;
1365
1366 /* If base is a component ref, require that the offset of the reference
1367 be invariant. */
1368 if (TREE_CODE (base) == COMPONENT_REF)
1369 {
1370 off = component_ref_field_offset (base);
1371 return expr_invariant_in_loop_p (loop, off);
1372 }
1373
1374 /* If base is array, first check whether we will be able to move the
1375 reference out of the loop (in order to take its address in strength
1376 reduction). In order for this to work we need both lower bound
1377 and step to be loop invariants. */
1378 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1379 {
1380 /* Moreover, for a range, the size needs to be invariant as well. */
1381 if (TREE_CODE (base) == ARRAY_RANGE_REF
1382 && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1383 return false;
1384
1385 step = array_ref_element_size (base);
1386 lbound = array_ref_low_bound (base);
1387
1388 if (!expr_invariant_in_loop_p (loop, step)
1389 || !expr_invariant_in_loop_p (loop, lbound))
1390 return false;
1391 }
1392
1393 if (TREE_CODE (*idx) != SSA_NAME)
1394 return true;
1395
1396 iv = get_iv (dta->ivopts_data, *idx);
1397 if (!iv)
1398 return false;
1399
1400 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1401 *&x[0], which is not folded and does not trigger the
1402 ARRAY_REF path below. */
1403 *idx = iv->base;
1404
1405 if (integer_zerop (iv->step))
1406 return true;
1407
1408 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1409 {
1410 step = array_ref_element_size (base);
1411
1412 /* We only handle addresses whose step is an integer constant. */
1413 if (TREE_CODE (step) != INTEGER_CST)
1414 return false;
1415 }
1416 else
1417 /* The step for pointer arithmetics already is 1 byte. */
1418 step = build_int_cst (sizetype, 1);
1419
1420 iv_base = iv->base;
1421 iv_step = iv->step;
1422 if (!convert_affine_scev (dta->ivopts_data->current_loop,
1423 sizetype, &iv_base, &iv_step, dta->stmt,
1424 false))
1425 {
1426 /* The index might wrap. */
1427 return false;
1428 }
1429
1430 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1431 dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1432
1433 return true;
1434 }
1435
1436 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1437 object is passed to it in DATA. */
1438
1439 static bool
1440 idx_record_use (tree base, tree *idx,
1441 void *vdata)
1442 {
1443 struct ivopts_data *data = (struct ivopts_data *) vdata;
1444 find_interesting_uses_op (data, *idx);
1445 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1446 {
1447 find_interesting_uses_op (data, array_ref_element_size (base));
1448 find_interesting_uses_op (data, array_ref_low_bound (base));
1449 }
1450 return true;
1451 }
1452
1453 /* If we can prove that TOP = cst * BOT for some constant cst,
1454 store cst to MUL and return true. Otherwise return false.
1455 The returned value is always sign-extended, regardless of the
1456 signedness of TOP and BOT. */
1457
1458 static bool
1459 constant_multiple_of (tree top, tree bot, double_int *mul)
1460 {
1461 tree mby;
1462 enum tree_code code;
1463 double_int res, p0, p1;
1464 unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1465
1466 STRIP_NOPS (top);
1467 STRIP_NOPS (bot);
1468
1469 if (operand_equal_p (top, bot, 0))
1470 {
1471 *mul = double_int_one;
1472 return true;
1473 }
1474
1475 code = TREE_CODE (top);
1476 switch (code)
1477 {
1478 case MULT_EXPR:
1479 mby = TREE_OPERAND (top, 1);
1480 if (TREE_CODE (mby) != INTEGER_CST)
1481 return false;
1482
1483 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1484 return false;
1485
1486 *mul = double_int_sext (double_int_mul (res, tree_to_double_int (mby)),
1487 precision);
1488 return true;
1489
1490 case PLUS_EXPR:
1491 case MINUS_EXPR:
1492 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1493 || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1494 return false;
1495
1496 if (code == MINUS_EXPR)
1497 p1 = double_int_neg (p1);
1498 *mul = double_int_sext (double_int_add (p0, p1), precision);
1499 return true;
1500
1501 case INTEGER_CST:
1502 if (TREE_CODE (bot) != INTEGER_CST)
1503 return false;
1504
1505 p0 = double_int_sext (tree_to_double_int (top), precision);
1506 p1 = double_int_sext (tree_to_double_int (bot), precision);
1507 if (double_int_zero_p (p1))
1508 return false;
1509 *mul = double_int_sext (double_int_sdivmod (p0, p1, FLOOR_DIV_EXPR, &res),
1510 precision);
1511 return double_int_zero_p (res);
1512
1513 default:
1514 return false;
1515 }
1516 }
1517
1518 /* Returns true if memory reference REF with step STEP may be unaligned. */
1519
1520 static bool
1521 may_be_unaligned_p (tree ref, tree step)
1522 {
1523 tree base;
1524 tree base_type;
1525 HOST_WIDE_INT bitsize;
1526 HOST_WIDE_INT bitpos;
1527 tree toffset;
1528 enum machine_mode mode;
1529 int unsignedp, volatilep;
1530 unsigned base_align;
1531
1532 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1533 thus they are not misaligned. */
1534 if (TREE_CODE (ref) == TARGET_MEM_REF)
1535 return false;
1536
1537 /* The test below is basically copy of what expr.c:normal_inner_ref
1538 does to check whether the object must be loaded by parts when
1539 STRICT_ALIGNMENT is true. */
1540 base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1541 &unsignedp, &volatilep, true);
1542 base_type = TREE_TYPE (base);
1543 base_align = TYPE_ALIGN (base_type);
1544
1545 if (mode != BLKmode)
1546 {
1547 unsigned mode_align = GET_MODE_ALIGNMENT (mode);
1548
1549 if (base_align < mode_align
1550 || (bitpos % mode_align) != 0
1551 || (bitpos % BITS_PER_UNIT) != 0)
1552 return true;
1553
1554 if (toffset
1555 && (highest_pow2_factor (toffset) * BITS_PER_UNIT) < mode_align)
1556 return true;
1557
1558 if ((highest_pow2_factor (step) * BITS_PER_UNIT) < mode_align)
1559 return true;
1560 }
1561
1562 return false;
1563 }
1564
1565 /* Return true if EXPR may be non-addressable. */
1566
1567 static bool
1568 may_be_nonaddressable_p (tree expr)
1569 {
1570 switch (TREE_CODE (expr))
1571 {
1572 case TARGET_MEM_REF:
1573 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1574 target, thus they are always addressable. */
1575 return false;
1576
1577 case COMPONENT_REF:
1578 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1579 || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1580
1581 case VIEW_CONVERT_EXPR:
1582 /* This kind of view-conversions may wrap non-addressable objects
1583 and make them look addressable. After some processing the
1584 non-addressability may be uncovered again, causing ADDR_EXPRs
1585 of inappropriate objects to be built. */
1586 if (is_gimple_reg (TREE_OPERAND (expr, 0))
1587 || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
1588 return true;
1589
1590 /* ... fall through ... */
1591
1592 case ARRAY_REF:
1593 case ARRAY_RANGE_REF:
1594 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1595
1596 CASE_CONVERT:
1597 return true;
1598
1599 default:
1600 break;
1601 }
1602
1603 return false;
1604 }
1605
1606 /* Finds addresses in *OP_P inside STMT. */
1607
1608 static void
1609 find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p)
1610 {
1611 tree base = *op_p, step = build_int_cst (sizetype, 0);
1612 struct iv *civ;
1613 struct ifs_ivopts_data ifs_ivopts_data;
1614
1615 /* Do not play with volatile memory references. A bit too conservative,
1616 perhaps, but safe. */
1617 if (gimple_has_volatile_ops (stmt))
1618 goto fail;
1619
1620 /* Ignore bitfields for now. Not really something terribly complicated
1621 to handle. TODO. */
1622 if (TREE_CODE (base) == BIT_FIELD_REF)
1623 goto fail;
1624
1625 base = unshare_expr (base);
1626
1627 if (TREE_CODE (base) == TARGET_MEM_REF)
1628 {
1629 tree type = build_pointer_type (TREE_TYPE (base));
1630 tree astep;
1631
1632 if (TMR_BASE (base)
1633 && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1634 {
1635 civ = get_iv (data, TMR_BASE (base));
1636 if (!civ)
1637 goto fail;
1638
1639 TMR_BASE (base) = civ->base;
1640 step = civ->step;
1641 }
1642 if (TMR_INDEX (base)
1643 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1644 {
1645 civ = get_iv (data, TMR_INDEX (base));
1646 if (!civ)
1647 goto fail;
1648
1649 TMR_INDEX (base) = civ->base;
1650 astep = civ->step;
1651
1652 if (astep)
1653 {
1654 if (TMR_STEP (base))
1655 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1656
1657 step = fold_build2 (PLUS_EXPR, type, step, astep);
1658 }
1659 }
1660
1661 if (integer_zerop (step))
1662 goto fail;
1663 base = tree_mem_ref_addr (type, base);
1664 }
1665 else
1666 {
1667 ifs_ivopts_data.ivopts_data = data;
1668 ifs_ivopts_data.stmt = stmt;
1669 ifs_ivopts_data.step = build_int_cst (sizetype, 0);
1670 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1671 || integer_zerop (ifs_ivopts_data.step))
1672 goto fail;
1673 step = ifs_ivopts_data.step;
1674
1675 gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1676
1677 /* Check that the base expression is addressable. This needs
1678 to be done after substituting bases of IVs into it. */
1679 if (may_be_nonaddressable_p (base))
1680 goto fail;
1681
1682 /* Moreover, on strict alignment platforms, check that it is
1683 sufficiently aligned. */
1684 if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
1685 goto fail;
1686
1687 base = build_fold_addr_expr (base);
1688
1689 /* Substituting bases of IVs into the base expression might
1690 have caused folding opportunities. */
1691 if (TREE_CODE (base) == ADDR_EXPR)
1692 {
1693 tree *ref = &TREE_OPERAND (base, 0);
1694 while (handled_component_p (*ref))
1695 ref = &TREE_OPERAND (*ref, 0);
1696 if (TREE_CODE (*ref) == MEM_REF)
1697 {
1698 tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
1699 TREE_OPERAND (*ref, 0),
1700 TREE_OPERAND (*ref, 1));
1701 if (tem)
1702 *ref = tem;
1703 }
1704 }
1705 }
1706
1707 civ = alloc_iv (base, step);
1708 record_use (data, op_p, civ, stmt, USE_ADDRESS);
1709 return;
1710
1711 fail:
1712 for_each_index (op_p, idx_record_use, data);
1713 }
1714
1715 /* Finds and records invariants used in STMT. */
1716
1717 static void
1718 find_invariants_stmt (struct ivopts_data *data, gimple stmt)
1719 {
1720 ssa_op_iter iter;
1721 use_operand_p use_p;
1722 tree op;
1723
1724 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1725 {
1726 op = USE_FROM_PTR (use_p);
1727 record_invariant (data, op, false);
1728 }
1729 }
1730
1731 /* Finds interesting uses of induction variables in the statement STMT. */
1732
1733 static void
1734 find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt)
1735 {
1736 struct iv *iv;
1737 tree op, *lhs, *rhs;
1738 ssa_op_iter iter;
1739 use_operand_p use_p;
1740 enum tree_code code;
1741
1742 find_invariants_stmt (data, stmt);
1743
1744 if (gimple_code (stmt) == GIMPLE_COND)
1745 {
1746 find_interesting_uses_cond (data, stmt);
1747 return;
1748 }
1749
1750 if (is_gimple_assign (stmt))
1751 {
1752 lhs = gimple_assign_lhs_ptr (stmt);
1753 rhs = gimple_assign_rhs1_ptr (stmt);
1754
1755 if (TREE_CODE (*lhs) == SSA_NAME)
1756 {
1757 /* If the statement defines an induction variable, the uses are not
1758 interesting by themselves. */
1759
1760 iv = get_iv (data, *lhs);
1761
1762 if (iv && !integer_zerop (iv->step))
1763 return;
1764 }
1765
1766 code = gimple_assign_rhs_code (stmt);
1767 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
1768 && (REFERENCE_CLASS_P (*rhs)
1769 || is_gimple_val (*rhs)))
1770 {
1771 if (REFERENCE_CLASS_P (*rhs))
1772 find_interesting_uses_address (data, stmt, rhs);
1773 else
1774 find_interesting_uses_op (data, *rhs);
1775
1776 if (REFERENCE_CLASS_P (*lhs))
1777 find_interesting_uses_address (data, stmt, lhs);
1778 return;
1779 }
1780 else if (TREE_CODE_CLASS (code) == tcc_comparison)
1781 {
1782 find_interesting_uses_cond (data, stmt);
1783 return;
1784 }
1785
1786 /* TODO -- we should also handle address uses of type
1787
1788 memory = call (whatever);
1789
1790 and
1791
1792 call (memory). */
1793 }
1794
1795 if (gimple_code (stmt) == GIMPLE_PHI
1796 && gimple_bb (stmt) == data->current_loop->header)
1797 {
1798 iv = get_iv (data, PHI_RESULT (stmt));
1799
1800 if (iv && !integer_zerop (iv->step))
1801 return;
1802 }
1803
1804 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1805 {
1806 op = USE_FROM_PTR (use_p);
1807
1808 if (TREE_CODE (op) != SSA_NAME)
1809 continue;
1810
1811 iv = get_iv (data, op);
1812 if (!iv)
1813 continue;
1814
1815 find_interesting_uses_op (data, op);
1816 }
1817 }
1818
1819 /* Finds interesting uses of induction variables outside of loops
1820 on loop exit edge EXIT. */
1821
1822 static void
1823 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1824 {
1825 gimple phi;
1826 gimple_stmt_iterator psi;
1827 tree def;
1828
1829 for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
1830 {
1831 phi = gsi_stmt (psi);
1832 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1833 if (is_gimple_reg (def))
1834 find_interesting_uses_op (data, def);
1835 }
1836 }
1837
1838 /* Finds uses of the induction variables that are interesting. */
1839
1840 static void
1841 find_interesting_uses (struct ivopts_data *data)
1842 {
1843 basic_block bb;
1844 gimple_stmt_iterator bsi;
1845 basic_block *body = get_loop_body (data->current_loop);
1846 unsigned i;
1847 struct version_info *info;
1848 edge e;
1849
1850 if (dump_file && (dump_flags & TDF_DETAILS))
1851 fprintf (dump_file, "Uses:\n\n");
1852
1853 for (i = 0; i < data->current_loop->num_nodes; i++)
1854 {
1855 edge_iterator ei;
1856 bb = body[i];
1857
1858 FOR_EACH_EDGE (e, ei, bb->succs)
1859 if (e->dest != EXIT_BLOCK_PTR
1860 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1861 find_interesting_uses_outside (data, e);
1862
1863 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1864 find_interesting_uses_stmt (data, gsi_stmt (bsi));
1865 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1866 if (!is_gimple_debug (gsi_stmt (bsi)))
1867 find_interesting_uses_stmt (data, gsi_stmt (bsi));
1868 }
1869
1870 if (dump_file && (dump_flags & TDF_DETAILS))
1871 {
1872 bitmap_iterator bi;
1873
1874 fprintf (dump_file, "\n");
1875
1876 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1877 {
1878 info = ver_info (data, i);
1879 if (info->inv_id)
1880 {
1881 fprintf (dump_file, " ");
1882 print_generic_expr (dump_file, info->name, TDF_SLIM);
1883 fprintf (dump_file, " is invariant (%d)%s\n",
1884 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1885 }
1886 }
1887
1888 fprintf (dump_file, "\n");
1889 }
1890
1891 free (body);
1892 }
1893
1894 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
1895 is true, assume we are inside an address. If TOP_COMPREF is true, assume
1896 we are at the top-level of the processed address. */
1897
1898 static tree
1899 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
1900 unsigned HOST_WIDE_INT *offset)
1901 {
1902 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
1903 enum tree_code code;
1904 tree type, orig_type = TREE_TYPE (expr);
1905 unsigned HOST_WIDE_INT off0, off1, st;
1906 tree orig_expr = expr;
1907
1908 STRIP_NOPS (expr);
1909
1910 type = TREE_TYPE (expr);
1911 code = TREE_CODE (expr);
1912 *offset = 0;
1913
1914 switch (code)
1915 {
1916 case INTEGER_CST:
1917 if (!cst_and_fits_in_hwi (expr)
1918 || integer_zerop (expr))
1919 return orig_expr;
1920
1921 *offset = int_cst_value (expr);
1922 return build_int_cst (orig_type, 0);
1923
1924 case POINTER_PLUS_EXPR:
1925 case PLUS_EXPR:
1926 case MINUS_EXPR:
1927 op0 = TREE_OPERAND (expr, 0);
1928 op1 = TREE_OPERAND (expr, 1);
1929
1930 op0 = strip_offset_1 (op0, false, false, &off0);
1931 op1 = strip_offset_1 (op1, false, false, &off1);
1932
1933 *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
1934 if (op0 == TREE_OPERAND (expr, 0)
1935 && op1 == TREE_OPERAND (expr, 1))
1936 return orig_expr;
1937
1938 if (integer_zerop (op1))
1939 expr = op0;
1940 else if (integer_zerop (op0))
1941 {
1942 if (code == MINUS_EXPR)
1943 expr = fold_build1 (NEGATE_EXPR, type, op1);
1944 else
1945 expr = op1;
1946 }
1947 else
1948 expr = fold_build2 (code, type, op0, op1);
1949
1950 return fold_convert (orig_type, expr);
1951
1952 case MULT_EXPR:
1953 op1 = TREE_OPERAND (expr, 1);
1954 if (!cst_and_fits_in_hwi (op1))
1955 return orig_expr;
1956
1957 op0 = TREE_OPERAND (expr, 0);
1958 op0 = strip_offset_1 (op0, false, false, &off0);
1959 if (op0 == TREE_OPERAND (expr, 0))
1960 return orig_expr;
1961
1962 *offset = off0 * int_cst_value (op1);
1963 if (integer_zerop (op0))
1964 expr = op0;
1965 else
1966 expr = fold_build2 (MULT_EXPR, type, op0, op1);
1967
1968 return fold_convert (orig_type, expr);
1969
1970 case ARRAY_REF:
1971 case ARRAY_RANGE_REF:
1972 if (!inside_addr)
1973 return orig_expr;
1974
1975 step = array_ref_element_size (expr);
1976 if (!cst_and_fits_in_hwi (step))
1977 break;
1978
1979 st = int_cst_value (step);
1980 op1 = TREE_OPERAND (expr, 1);
1981 op1 = strip_offset_1 (op1, false, false, &off1);
1982 *offset = off1 * st;
1983
1984 if (top_compref
1985 && integer_zerop (op1))
1986 {
1987 /* Strip the component reference completely. */
1988 op0 = TREE_OPERAND (expr, 0);
1989 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1990 *offset += off0;
1991 return op0;
1992 }
1993 break;
1994
1995 case COMPONENT_REF:
1996 if (!inside_addr)
1997 return orig_expr;
1998
1999 tmp = component_ref_field_offset (expr);
2000 if (top_compref
2001 && cst_and_fits_in_hwi (tmp))
2002 {
2003 /* Strip the component reference completely. */
2004 op0 = TREE_OPERAND (expr, 0);
2005 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2006 *offset = off0 + int_cst_value (tmp);
2007 return op0;
2008 }
2009 break;
2010
2011 case ADDR_EXPR:
2012 op0 = TREE_OPERAND (expr, 0);
2013 op0 = strip_offset_1 (op0, true, true, &off0);
2014 *offset += off0;
2015
2016 if (op0 == TREE_OPERAND (expr, 0))
2017 return orig_expr;
2018
2019 expr = build_fold_addr_expr (op0);
2020 return fold_convert (orig_type, expr);
2021
2022 case MEM_REF:
2023 /* ??? Offset operand? */
2024 inside_addr = false;
2025 break;
2026
2027 default:
2028 return orig_expr;
2029 }
2030
2031 /* Default handling of expressions for that we want to recurse into
2032 the first operand. */
2033 op0 = TREE_OPERAND (expr, 0);
2034 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2035 *offset += off0;
2036
2037 if (op0 == TREE_OPERAND (expr, 0)
2038 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2039 return orig_expr;
2040
2041 expr = copy_node (expr);
2042 TREE_OPERAND (expr, 0) = op0;
2043 if (op1)
2044 TREE_OPERAND (expr, 1) = op1;
2045
2046 /* Inside address, we might strip the top level component references,
2047 thus changing type of the expression. Handling of ADDR_EXPR
2048 will fix that. */
2049 expr = fold_convert (orig_type, expr);
2050
2051 return expr;
2052 }
2053
2054 /* Strips constant offsets from EXPR and stores them to OFFSET. */
2055
2056 static tree
2057 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2058 {
2059 return strip_offset_1 (expr, false, false, offset);
2060 }
2061
2062 /* Returns variant of TYPE that can be used as base for different uses.
2063 We return unsigned type with the same precision, which avoids problems
2064 with overflows. */
2065
2066 static tree
2067 generic_type_for (tree type)
2068 {
2069 if (POINTER_TYPE_P (type))
2070 return unsigned_type_for (type);
2071
2072 if (TYPE_UNSIGNED (type))
2073 return type;
2074
2075 return unsigned_type_for (type);
2076 }
2077
2078 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2079 the bitmap to that we should store it. */
2080
2081 static struct ivopts_data *fd_ivopts_data;
2082 static tree
2083 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2084 {
2085 bitmap *depends_on = (bitmap *) data;
2086 struct version_info *info;
2087
2088 if (TREE_CODE (*expr_p) != SSA_NAME)
2089 return NULL_TREE;
2090 info = name_info (fd_ivopts_data, *expr_p);
2091
2092 if (!info->inv_id || info->has_nonlin_use)
2093 return NULL_TREE;
2094
2095 if (!*depends_on)
2096 *depends_on = BITMAP_ALLOC (NULL);
2097 bitmap_set_bit (*depends_on, info->inv_id);
2098
2099 return NULL_TREE;
2100 }
2101
2102 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2103 position to POS. If USE is not NULL, the candidate is set as related to
2104 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
2105 replacement of the final value of the iv by a direct computation. */
2106
2107 static struct iv_cand *
2108 add_candidate_1 (struct ivopts_data *data,
2109 tree base, tree step, bool important, enum iv_position pos,
2110 struct iv_use *use, gimple incremented_at)
2111 {
2112 unsigned i;
2113 struct iv_cand *cand = NULL;
2114 tree type, orig_type;
2115
2116 if (base)
2117 {
2118 orig_type = TREE_TYPE (base);
2119 type = generic_type_for (orig_type);
2120 if (type != orig_type)
2121 {
2122 base = fold_convert (type, base);
2123 step = fold_convert (type, step);
2124 }
2125 }
2126
2127 for (i = 0; i < n_iv_cands (data); i++)
2128 {
2129 cand = iv_cand (data, i);
2130
2131 if (cand->pos != pos)
2132 continue;
2133
2134 if (cand->incremented_at != incremented_at
2135 || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2136 && cand->ainc_use != use))
2137 continue;
2138
2139 if (!cand->iv)
2140 {
2141 if (!base && !step)
2142 break;
2143
2144 continue;
2145 }
2146
2147 if (!base && !step)
2148 continue;
2149
2150 if (operand_equal_p (base, cand->iv->base, 0)
2151 && operand_equal_p (step, cand->iv->step, 0))
2152 break;
2153 }
2154
2155 if (i == n_iv_cands (data))
2156 {
2157 cand = XCNEW (struct iv_cand);
2158 cand->id = i;
2159
2160 if (!base && !step)
2161 cand->iv = NULL;
2162 else
2163 cand->iv = alloc_iv (base, step);
2164
2165 cand->pos = pos;
2166 if (pos != IP_ORIGINAL && cand->iv)
2167 {
2168 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2169 cand->var_after = cand->var_before;
2170 }
2171 cand->important = important;
2172 cand->incremented_at = incremented_at;
2173 VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
2174
2175 if (step
2176 && TREE_CODE (step) != INTEGER_CST)
2177 {
2178 fd_ivopts_data = data;
2179 walk_tree (&step, find_depends, &cand->depends_on, NULL);
2180 }
2181
2182 if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2183 cand->ainc_use = use;
2184 else
2185 cand->ainc_use = NULL;
2186
2187 if (dump_file && (dump_flags & TDF_DETAILS))
2188 dump_cand (dump_file, cand);
2189 }
2190
2191 if (important && !cand->important)
2192 {
2193 cand->important = true;
2194 if (dump_file && (dump_flags & TDF_DETAILS))
2195 fprintf (dump_file, "Candidate %d is important\n", cand->id);
2196 }
2197
2198 if (use)
2199 {
2200 bitmap_set_bit (use->related_cands, i);
2201 if (dump_file && (dump_flags & TDF_DETAILS))
2202 fprintf (dump_file, "Candidate %d is related to use %d\n",
2203 cand->id, use->id);
2204 }
2205
2206 return cand;
2207 }
2208
2209 /* Returns true if incrementing the induction variable at the end of the LOOP
2210 is allowed.
2211
2212 The purpose is to avoid splitting latch edge with a biv increment, thus
2213 creating a jump, possibly confusing other optimization passes and leaving
2214 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2215 is not available (so we do not have a better alternative), or if the latch
2216 edge is already nonempty. */
2217
2218 static bool
2219 allow_ip_end_pos_p (struct loop *loop)
2220 {
2221 if (!ip_normal_pos (loop))
2222 return true;
2223
2224 if (!empty_block_p (ip_end_pos (loop)))
2225 return true;
2226
2227 return false;
2228 }
2229
2230 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2231 Important field is set to IMPORTANT. */
2232
2233 static void
2234 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2235 bool important, struct iv_use *use)
2236 {
2237 basic_block use_bb = gimple_bb (use->stmt);
2238 enum machine_mode mem_mode;
2239 unsigned HOST_WIDE_INT cstepi;
2240
2241 /* If we insert the increment in any position other than the standard
2242 ones, we must ensure that it is incremented once per iteration.
2243 It must not be in an inner nested loop, or one side of an if
2244 statement. */
2245 if (use_bb->loop_father != data->current_loop
2246 || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2247 || stmt_could_throw_p (use->stmt)
2248 || !cst_and_fits_in_hwi (step))
2249 return;
2250
2251 cstepi = int_cst_value (step);
2252
2253 mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2254 if ((HAVE_PRE_INCREMENT && GET_MODE_SIZE (mem_mode) == cstepi)
2255 || (HAVE_PRE_DECREMENT && GET_MODE_SIZE (mem_mode) == -cstepi))
2256 {
2257 enum tree_code code = MINUS_EXPR;
2258 tree new_base;
2259 tree new_step = step;
2260
2261 if (POINTER_TYPE_P (TREE_TYPE (base)))
2262 {
2263 new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2264 code = POINTER_PLUS_EXPR;
2265 }
2266 else
2267 new_step = fold_convert (TREE_TYPE (base), new_step);
2268 new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2269 add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2270 use->stmt);
2271 }
2272 if ((HAVE_POST_INCREMENT && GET_MODE_SIZE (mem_mode) == cstepi)
2273 || (HAVE_POST_DECREMENT && GET_MODE_SIZE (mem_mode) == -cstepi))
2274 {
2275 add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
2276 use->stmt);
2277 }
2278 }
2279
2280 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2281 position to POS. If USE is not NULL, the candidate is set as related to
2282 it. The candidate computation is scheduled on all available positions. */
2283
2284 static void
2285 add_candidate (struct ivopts_data *data,
2286 tree base, tree step, bool important, struct iv_use *use)
2287 {
2288 if (ip_normal_pos (data->current_loop))
2289 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL);
2290 if (ip_end_pos (data->current_loop)
2291 && allow_ip_end_pos_p (data->current_loop))
2292 add_candidate_1 (data, base, step, important, IP_END, use, NULL);
2293
2294 if (use != NULL && use->type == USE_ADDRESS)
2295 add_autoinc_candidates (data, base, step, important, use);
2296 }
2297
2298 /* Add a standard "0 + 1 * iteration" iv candidate for a
2299 type with SIZE bits. */
2300
2301 static void
2302 add_standard_iv_candidates_for_size (struct ivopts_data *data,
2303 unsigned int size)
2304 {
2305 tree type = lang_hooks.types.type_for_size (size, true);
2306 add_candidate (data, build_int_cst (type, 0), build_int_cst (type, 1),
2307 true, NULL);
2308 }
2309
2310 /* Adds standard iv candidates. */
2311
2312 static void
2313 add_standard_iv_candidates (struct ivopts_data *data)
2314 {
2315 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE);
2316
2317 /* The same for a double-integer type if it is still fast enough. */
2318 if (BITS_PER_WORD >= INT_TYPE_SIZE * 2)
2319 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE * 2);
2320 }
2321
2322
2323 /* Adds candidates bases on the old induction variable IV. */
2324
2325 static void
2326 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2327 {
2328 gimple phi;
2329 tree def;
2330 struct iv_cand *cand;
2331
2332 add_candidate (data, iv->base, iv->step, true, NULL);
2333
2334 /* The same, but with initial value zero. */
2335 if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
2336 add_candidate (data, size_int (0), iv->step, true, NULL);
2337 else
2338 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2339 iv->step, true, NULL);
2340
2341 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2342 if (gimple_code (phi) == GIMPLE_PHI)
2343 {
2344 /* Additionally record the possibility of leaving the original iv
2345 untouched. */
2346 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2347 cand = add_candidate_1 (data,
2348 iv->base, iv->step, true, IP_ORIGINAL, NULL,
2349 SSA_NAME_DEF_STMT (def));
2350 cand->var_before = iv->ssa_name;
2351 cand->var_after = def;
2352 }
2353 }
2354
2355 /* Adds candidates based on the old induction variables. */
2356
2357 static void
2358 add_old_ivs_candidates (struct ivopts_data *data)
2359 {
2360 unsigned i;
2361 struct iv *iv;
2362 bitmap_iterator bi;
2363
2364 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2365 {
2366 iv = ver_info (data, i)->iv;
2367 if (iv && iv->biv_p && !integer_zerop (iv->step))
2368 add_old_iv_candidates (data, iv);
2369 }
2370 }
2371
2372 /* Adds candidates based on the value of the induction variable IV and USE. */
2373
2374 static void
2375 add_iv_value_candidates (struct ivopts_data *data,
2376 struct iv *iv, struct iv_use *use)
2377 {
2378 unsigned HOST_WIDE_INT offset;
2379 tree base;
2380 tree basetype;
2381
2382 add_candidate (data, iv->base, iv->step, false, use);
2383
2384 /* The same, but with initial value zero. Make such variable important,
2385 since it is generic enough so that possibly many uses may be based
2386 on it. */
2387 basetype = TREE_TYPE (iv->base);
2388 if (POINTER_TYPE_P (basetype))
2389 basetype = sizetype;
2390 add_candidate (data, build_int_cst (basetype, 0),
2391 iv->step, true, use);
2392
2393 /* Third, try removing the constant offset. Make sure to even
2394 add a candidate for &a[0] vs. (T *)&a. */
2395 base = strip_offset (iv->base, &offset);
2396 if (offset
2397 || base != iv->base)
2398 add_candidate (data, base, iv->step, false, use);
2399 }
2400
2401 /* Adds candidates based on the uses. */
2402
2403 static void
2404 add_derived_ivs_candidates (struct ivopts_data *data)
2405 {
2406 unsigned i;
2407
2408 for (i = 0; i < n_iv_uses (data); i++)
2409 {
2410 struct iv_use *use = iv_use (data, i);
2411
2412 if (!use)
2413 continue;
2414
2415 switch (use->type)
2416 {
2417 case USE_NONLINEAR_EXPR:
2418 case USE_COMPARE:
2419 case USE_ADDRESS:
2420 /* Just add the ivs based on the value of the iv used here. */
2421 add_iv_value_candidates (data, use->iv, use);
2422 break;
2423
2424 default:
2425 gcc_unreachable ();
2426 }
2427 }
2428 }
2429
2430 /* Record important candidates and add them to related_cands bitmaps
2431 if needed. */
2432
2433 static void
2434 record_important_candidates (struct ivopts_data *data)
2435 {
2436 unsigned i;
2437 struct iv_use *use;
2438
2439 for (i = 0; i < n_iv_cands (data); i++)
2440 {
2441 struct iv_cand *cand = iv_cand (data, i);
2442
2443 if (cand->important)
2444 bitmap_set_bit (data->important_candidates, i);
2445 }
2446
2447 data->consider_all_candidates = (n_iv_cands (data)
2448 <= CONSIDER_ALL_CANDIDATES_BOUND);
2449
2450 if (data->consider_all_candidates)
2451 {
2452 /* We will not need "related_cands" bitmaps in this case,
2453 so release them to decrease peak memory consumption. */
2454 for (i = 0; i < n_iv_uses (data); i++)
2455 {
2456 use = iv_use (data, i);
2457 BITMAP_FREE (use->related_cands);
2458 }
2459 }
2460 else
2461 {
2462 /* Add important candidates to the related_cands bitmaps. */
2463 for (i = 0; i < n_iv_uses (data); i++)
2464 bitmap_ior_into (iv_use (data, i)->related_cands,
2465 data->important_candidates);
2466 }
2467 }
2468
2469 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2470 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2471 we allocate a simple list to every use. */
2472
2473 static void
2474 alloc_use_cost_map (struct ivopts_data *data)
2475 {
2476 unsigned i, size, s, j;
2477
2478 for (i = 0; i < n_iv_uses (data); i++)
2479 {
2480 struct iv_use *use = iv_use (data, i);
2481 bitmap_iterator bi;
2482
2483 if (data->consider_all_candidates)
2484 size = n_iv_cands (data);
2485 else
2486 {
2487 s = 0;
2488 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2489 {
2490 s++;
2491 }
2492
2493 /* Round up to the power of two, so that moduling by it is fast. */
2494 for (size = 1; size < s; size <<= 1)
2495 continue;
2496 }
2497
2498 use->n_map_members = size;
2499 use->cost_map = XCNEWVEC (struct cost_pair, size);
2500 }
2501 }
2502
2503 /* Returns description of computation cost of expression whose runtime
2504 cost is RUNTIME and complexity corresponds to COMPLEXITY. */
2505
2506 static comp_cost
2507 new_cost (unsigned runtime, unsigned complexity)
2508 {
2509 comp_cost cost;
2510
2511 cost.cost = runtime;
2512 cost.complexity = complexity;
2513
2514 return cost;
2515 }
2516
2517 /* Adds costs COST1 and COST2. */
2518
2519 static comp_cost
2520 add_costs (comp_cost cost1, comp_cost cost2)
2521 {
2522 cost1.cost += cost2.cost;
2523 cost1.complexity += cost2.complexity;
2524
2525 return cost1;
2526 }
2527 /* Subtracts costs COST1 and COST2. */
2528
2529 static comp_cost
2530 sub_costs (comp_cost cost1, comp_cost cost2)
2531 {
2532 cost1.cost -= cost2.cost;
2533 cost1.complexity -= cost2.complexity;
2534
2535 return cost1;
2536 }
2537
2538 /* Returns a negative number if COST1 < COST2, a positive number if
2539 COST1 > COST2, and 0 if COST1 = COST2. */
2540
2541 static int
2542 compare_costs (comp_cost cost1, comp_cost cost2)
2543 {
2544 if (cost1.cost == cost2.cost)
2545 return cost1.complexity - cost2.complexity;
2546
2547 return cost1.cost - cost2.cost;
2548 }
2549
2550 /* Returns true if COST is infinite. */
2551
2552 static bool
2553 infinite_cost_p (comp_cost cost)
2554 {
2555 return cost.cost == INFTY;
2556 }
2557
2558 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2559 on invariants DEPENDS_ON and that the value used in expressing it
2560 is VALUE. */
2561
2562 static void
2563 set_use_iv_cost (struct ivopts_data *data,
2564 struct iv_use *use, struct iv_cand *cand,
2565 comp_cost cost, bitmap depends_on, tree value)
2566 {
2567 unsigned i, s;
2568
2569 if (infinite_cost_p (cost))
2570 {
2571 BITMAP_FREE (depends_on);
2572 return;
2573 }
2574
2575 if (data->consider_all_candidates)
2576 {
2577 use->cost_map[cand->id].cand = cand;
2578 use->cost_map[cand->id].cost = cost;
2579 use->cost_map[cand->id].depends_on = depends_on;
2580 use->cost_map[cand->id].value = value;
2581 return;
2582 }
2583
2584 /* n_map_members is a power of two, so this computes modulo. */
2585 s = cand->id & (use->n_map_members - 1);
2586 for (i = s; i < use->n_map_members; i++)
2587 if (!use->cost_map[i].cand)
2588 goto found;
2589 for (i = 0; i < s; i++)
2590 if (!use->cost_map[i].cand)
2591 goto found;
2592
2593 gcc_unreachable ();
2594
2595 found:
2596 use->cost_map[i].cand = cand;
2597 use->cost_map[i].cost = cost;
2598 use->cost_map[i].depends_on = depends_on;
2599 use->cost_map[i].value = value;
2600 }
2601
2602 /* Gets cost of (USE, CANDIDATE) pair. */
2603
2604 static struct cost_pair *
2605 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2606 struct iv_cand *cand)
2607 {
2608 unsigned i, s;
2609 struct cost_pair *ret;
2610
2611 if (!cand)
2612 return NULL;
2613
2614 if (data->consider_all_candidates)
2615 {
2616 ret = use->cost_map + cand->id;
2617 if (!ret->cand)
2618 return NULL;
2619
2620 return ret;
2621 }
2622
2623 /* n_map_members is a power of two, so this computes modulo. */
2624 s = cand->id & (use->n_map_members - 1);
2625 for (i = s; i < use->n_map_members; i++)
2626 if (use->cost_map[i].cand == cand)
2627 return use->cost_map + i;
2628
2629 for (i = 0; i < s; i++)
2630 if (use->cost_map[i].cand == cand)
2631 return use->cost_map + i;
2632
2633 return NULL;
2634 }
2635
2636 /* Returns estimate on cost of computing SEQ. */
2637
2638 static unsigned
2639 seq_cost (rtx seq, bool speed)
2640 {
2641 unsigned cost = 0;
2642 rtx set;
2643
2644 for (; seq; seq = NEXT_INSN (seq))
2645 {
2646 set = single_set (seq);
2647 if (set)
2648 cost += rtx_cost (set, SET,speed);
2649 else
2650 cost++;
2651 }
2652
2653 return cost;
2654 }
2655
2656 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2657 static rtx
2658 produce_memory_decl_rtl (tree obj, int *regno)
2659 {
2660 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
2661 enum machine_mode address_mode = targetm.addr_space.address_mode (as);
2662 rtx x;
2663
2664 gcc_assert (obj);
2665 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2666 {
2667 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2668 x = gen_rtx_SYMBOL_REF (address_mode, name);
2669 SET_SYMBOL_REF_DECL (x, obj);
2670 x = gen_rtx_MEM (DECL_MODE (obj), x);
2671 set_mem_addr_space (x, as);
2672 targetm.encode_section_info (obj, x, true);
2673 }
2674 else
2675 {
2676 x = gen_raw_REG (address_mode, (*regno)++);
2677 x = gen_rtx_MEM (DECL_MODE (obj), x);
2678 set_mem_addr_space (x, as);
2679 }
2680
2681 return x;
2682 }
2683
2684 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2685 walk_tree. DATA contains the actual fake register number. */
2686
2687 static tree
2688 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2689 {
2690 tree obj = NULL_TREE;
2691 rtx x = NULL_RTX;
2692 int *regno = (int *) data;
2693
2694 switch (TREE_CODE (*expr_p))
2695 {
2696 case ADDR_EXPR:
2697 for (expr_p = &TREE_OPERAND (*expr_p, 0);
2698 handled_component_p (*expr_p);
2699 expr_p = &TREE_OPERAND (*expr_p, 0))
2700 continue;
2701 obj = *expr_p;
2702 if (DECL_P (obj) && !DECL_RTL_SET_P (obj))
2703 x = produce_memory_decl_rtl (obj, regno);
2704 break;
2705
2706 case SSA_NAME:
2707 *ws = 0;
2708 obj = SSA_NAME_VAR (*expr_p);
2709 if (!DECL_RTL_SET_P (obj))
2710 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2711 break;
2712
2713 case VAR_DECL:
2714 case PARM_DECL:
2715 case RESULT_DECL:
2716 *ws = 0;
2717 obj = *expr_p;
2718
2719 if (DECL_RTL_SET_P (obj))
2720 break;
2721
2722 if (DECL_MODE (obj) == BLKmode)
2723 x = produce_memory_decl_rtl (obj, regno);
2724 else
2725 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2726
2727 break;
2728
2729 default:
2730 break;
2731 }
2732
2733 if (x)
2734 {
2735 VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
2736 SET_DECL_RTL (obj, x);
2737 }
2738
2739 return NULL_TREE;
2740 }
2741
2742 /* Determines cost of the computation of EXPR. */
2743
2744 static unsigned
2745 computation_cost (tree expr, bool speed)
2746 {
2747 rtx seq, rslt;
2748 tree type = TREE_TYPE (expr);
2749 unsigned cost;
2750 /* Avoid using hard regs in ways which may be unsupported. */
2751 int regno = LAST_VIRTUAL_REGISTER + 1;
2752 struct cgraph_node *node = cgraph_node (current_function_decl);
2753 enum node_frequency real_frequency = node->frequency;
2754
2755 node->frequency = NODE_FREQUENCY_NORMAL;
2756 crtl->maybe_hot_insn_p = speed;
2757 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2758 start_sequence ();
2759 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2760 seq = get_insns ();
2761 end_sequence ();
2762 default_rtl_profile ();
2763 node->frequency = real_frequency;
2764
2765 cost = seq_cost (seq, speed);
2766 if (MEM_P (rslt))
2767 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
2768 TYPE_ADDR_SPACE (type), speed);
2769
2770 return cost;
2771 }
2772
2773 /* Returns variable containing the value of candidate CAND at statement AT. */
2774
2775 static tree
2776 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple stmt)
2777 {
2778 if (stmt_after_increment (loop, cand, stmt))
2779 return cand->var_after;
2780 else
2781 return cand->var_before;
2782 }
2783
2784 /* Return the most significant (sign) bit of T. Similar to tree_int_cst_msb,
2785 but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE. */
2786
2787 int
2788 tree_int_cst_sign_bit (const_tree t)
2789 {
2790 unsigned bitno = TYPE_PRECISION (TREE_TYPE (t)) - 1;
2791 unsigned HOST_WIDE_INT w;
2792
2793 if (bitno < HOST_BITS_PER_WIDE_INT)
2794 w = TREE_INT_CST_LOW (t);
2795 else
2796 {
2797 w = TREE_INT_CST_HIGH (t);
2798 bitno -= HOST_BITS_PER_WIDE_INT;
2799 }
2800
2801 return (w >> bitno) & 1;
2802 }
2803
2804 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2805 same precision that is at least as wide as the precision of TYPE, stores
2806 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
2807 type of A and B. */
2808
2809 static tree
2810 determine_common_wider_type (tree *a, tree *b)
2811 {
2812 tree wider_type = NULL;
2813 tree suba, subb;
2814 tree atype = TREE_TYPE (*a);
2815
2816 if (CONVERT_EXPR_P (*a))
2817 {
2818 suba = TREE_OPERAND (*a, 0);
2819 wider_type = TREE_TYPE (suba);
2820 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
2821 return atype;
2822 }
2823 else
2824 return atype;
2825
2826 if (CONVERT_EXPR_P (*b))
2827 {
2828 subb = TREE_OPERAND (*b, 0);
2829 if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
2830 return atype;
2831 }
2832 else
2833 return atype;
2834
2835 *a = suba;
2836 *b = subb;
2837 return wider_type;
2838 }
2839
2840 /* Determines the expression by that USE is expressed from induction variable
2841 CAND at statement AT in LOOP. The expression is stored in a decomposed
2842 form into AFF. Returns false if USE cannot be expressed using CAND. */
2843
2844 static bool
2845 get_computation_aff (struct loop *loop,
2846 struct iv_use *use, struct iv_cand *cand, gimple at,
2847 struct affine_tree_combination *aff)
2848 {
2849 tree ubase = use->iv->base;
2850 tree ustep = use->iv->step;
2851 tree cbase = cand->iv->base;
2852 tree cstep = cand->iv->step, cstep_common;
2853 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2854 tree common_type, var;
2855 tree uutype;
2856 aff_tree cbase_aff, var_aff;
2857 double_int rat;
2858
2859 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2860 {
2861 /* We do not have a precision to express the values of use. */
2862 return false;
2863 }
2864
2865 var = var_at_stmt (loop, cand, at);
2866 uutype = unsigned_type_for (utype);
2867
2868 /* If the conversion is not noop, perform it. */
2869 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
2870 {
2871 cstep = fold_convert (uutype, cstep);
2872 cbase = fold_convert (uutype, cbase);
2873 var = fold_convert (uutype, var);
2874 }
2875
2876 if (!constant_multiple_of (ustep, cstep, &rat))
2877 return false;
2878
2879 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
2880 type, we achieve better folding by computing their difference in this
2881 wider type, and cast the result to UUTYPE. We do not need to worry about
2882 overflows, as all the arithmetics will in the end be performed in UUTYPE
2883 anyway. */
2884 common_type = determine_common_wider_type (&ubase, &cbase);
2885
2886 /* use = ubase - ratio * cbase + ratio * var. */
2887 tree_to_aff_combination (ubase, common_type, aff);
2888 tree_to_aff_combination (cbase, common_type, &cbase_aff);
2889 tree_to_aff_combination (var, uutype, &var_aff);
2890
2891 /* We need to shift the value if we are after the increment. */
2892 if (stmt_after_increment (loop, cand, at))
2893 {
2894 aff_tree cstep_aff;
2895
2896 if (common_type != uutype)
2897 cstep_common = fold_convert (common_type, cstep);
2898 else
2899 cstep_common = cstep;
2900
2901 tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
2902 aff_combination_add (&cbase_aff, &cstep_aff);
2903 }
2904
2905 aff_combination_scale (&cbase_aff, double_int_neg (rat));
2906 aff_combination_add (aff, &cbase_aff);
2907 if (common_type != uutype)
2908 aff_combination_convert (aff, uutype);
2909
2910 aff_combination_scale (&var_aff, rat);
2911 aff_combination_add (aff, &var_aff);
2912
2913 return true;
2914 }
2915
2916 /* Determines the expression by that USE is expressed from induction variable
2917 CAND at statement AT in LOOP. The computation is unshared. */
2918
2919 static tree
2920 get_computation_at (struct loop *loop,
2921 struct iv_use *use, struct iv_cand *cand, gimple at)
2922 {
2923 aff_tree aff;
2924 tree type = TREE_TYPE (use->iv->base);
2925
2926 if (!get_computation_aff (loop, use, cand, at, &aff))
2927 return NULL_TREE;
2928 unshare_aff_combination (&aff);
2929 return fold_convert (type, aff_combination_to_tree (&aff));
2930 }
2931
2932 /* Determines the expression by that USE is expressed from induction variable
2933 CAND in LOOP. The computation is unshared. */
2934
2935 static tree
2936 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
2937 {
2938 return get_computation_at (loop, use, cand, use->stmt);
2939 }
2940
2941 /* Adjust the cost COST for being in loop setup rather than loop body.
2942 If we're optimizing for space, the loop setup overhead is constant;
2943 if we're optimizing for speed, amortize it over the per-iteration cost. */
2944 static unsigned
2945 adjust_setup_cost (struct ivopts_data *data, unsigned cost)
2946 {
2947 if (cost == INFTY)
2948 return cost;
2949 else if (optimize_loop_for_speed_p (data->current_loop))
2950 return cost / AVG_LOOP_NITER (data->current_loop);
2951 else
2952 return cost;
2953 }
2954
2955 /* Returns cost of addition in MODE. */
2956
2957 static unsigned
2958 add_cost (enum machine_mode mode, bool speed)
2959 {
2960 static unsigned costs[NUM_MACHINE_MODES];
2961 rtx seq;
2962 unsigned cost;
2963
2964 if (costs[mode])
2965 return costs[mode];
2966
2967 start_sequence ();
2968 force_operand (gen_rtx_fmt_ee (PLUS, mode,
2969 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
2970 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 2)),
2971 NULL_RTX);
2972 seq = get_insns ();
2973 end_sequence ();
2974
2975 cost = seq_cost (seq, speed);
2976 if (!cost)
2977 cost = 1;
2978
2979 costs[mode] = cost;
2980
2981 if (dump_file && (dump_flags & TDF_DETAILS))
2982 fprintf (dump_file, "Addition in %s costs %d\n",
2983 GET_MODE_NAME (mode), cost);
2984 return cost;
2985 }
2986
2987 /* Entry in a hashtable of already known costs for multiplication. */
2988 struct mbc_entry
2989 {
2990 HOST_WIDE_INT cst; /* The constant to multiply by. */
2991 enum machine_mode mode; /* In mode. */
2992 unsigned cost; /* The cost. */
2993 };
2994
2995 /* Counts hash value for the ENTRY. */
2996
2997 static hashval_t
2998 mbc_entry_hash (const void *entry)
2999 {
3000 const struct mbc_entry *e = (const struct mbc_entry *) entry;
3001
3002 return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
3003 }
3004
3005 /* Compares the hash table entries ENTRY1 and ENTRY2. */
3006
3007 static int
3008 mbc_entry_eq (const void *entry1, const void *entry2)
3009 {
3010 const struct mbc_entry *e1 = (const struct mbc_entry *) entry1;
3011 const struct mbc_entry *e2 = (const struct mbc_entry *) entry2;
3012
3013 return (e1->mode == e2->mode
3014 && e1->cst == e2->cst);
3015 }
3016
3017 /* Returns cost of multiplication by constant CST in MODE. */
3018
3019 unsigned
3020 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode, bool speed)
3021 {
3022 static htab_t costs;
3023 struct mbc_entry **cached, act;
3024 rtx seq;
3025 unsigned cost;
3026
3027 if (!costs)
3028 costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
3029
3030 act.mode = mode;
3031 act.cst = cst;
3032 cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
3033 if (*cached)
3034 return (*cached)->cost;
3035
3036 *cached = XNEW (struct mbc_entry);
3037 (*cached)->mode = mode;
3038 (*cached)->cst = cst;
3039
3040 start_sequence ();
3041 expand_mult (mode, gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3042 gen_int_mode (cst, mode), NULL_RTX, 0);
3043 seq = get_insns ();
3044 end_sequence ();
3045
3046 cost = seq_cost (seq, speed);
3047
3048 if (dump_file && (dump_flags & TDF_DETAILS))
3049 fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
3050 (int) cst, GET_MODE_NAME (mode), cost);
3051
3052 (*cached)->cost = cost;
3053
3054 return cost;
3055 }
3056
3057 /* Returns true if multiplying by RATIO is allowed in an address. Test the
3058 validity for a memory reference accessing memory of mode MODE in
3059 address space AS. */
3060
3061 DEF_VEC_P (sbitmap);
3062 DEF_VEC_ALLOC_P (sbitmap, heap);
3063
3064 bool
3065 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode,
3066 addr_space_t as)
3067 {
3068 #define MAX_RATIO 128
3069 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3070 static VEC (sbitmap, heap) *valid_mult_list;
3071 sbitmap valid_mult;
3072
3073 if (data_index >= VEC_length (sbitmap, valid_mult_list))
3074 VEC_safe_grow_cleared (sbitmap, heap, valid_mult_list, data_index + 1);
3075
3076 valid_mult = VEC_index (sbitmap, valid_mult_list, data_index);
3077 if (!valid_mult)
3078 {
3079 enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3080 rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3081 rtx addr;
3082 HOST_WIDE_INT i;
3083
3084 valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3085 sbitmap_zero (valid_mult);
3086 addr = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3087 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3088 {
3089 XEXP (addr, 1) = gen_int_mode (i, address_mode);
3090 if (memory_address_addr_space_p (mode, addr, as))
3091 SET_BIT (valid_mult, i + MAX_RATIO);
3092 }
3093
3094 if (dump_file && (dump_flags & TDF_DETAILS))
3095 {
3096 fprintf (dump_file, " allowed multipliers:");
3097 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3098 if (TEST_BIT (valid_mult, i + MAX_RATIO))
3099 fprintf (dump_file, " %d", (int) i);
3100 fprintf (dump_file, "\n");
3101 fprintf (dump_file, "\n");
3102 }
3103
3104 VEC_replace (sbitmap, valid_mult_list, data_index, valid_mult);
3105 }
3106
3107 if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3108 return false;
3109
3110 return TEST_BIT (valid_mult, ratio + MAX_RATIO);
3111 }
3112
3113 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3114 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3115 variable is omitted. Compute the cost for a memory reference that accesses
3116 a memory location of mode MEM_MODE in address space AS.
3117
3118 MAY_AUTOINC is set to true if the autoincrement (increasing index by
3119 size of MEM_MODE / RATIO) is available. To make this determination, we
3120 look at the size of the increment to be made, which is given in CSTEP.
3121 CSTEP may be zero if the step is unknown.
3122 STMT_AFTER_INC is true iff the statement we're looking at is after the
3123 increment of the original biv.
3124
3125 TODO -- there must be some better way. This all is quite crude. */
3126
3127 typedef struct
3128 {
3129 HOST_WIDE_INT min_offset, max_offset;
3130 unsigned costs[2][2][2][2];
3131 } *address_cost_data;
3132
3133 DEF_VEC_P (address_cost_data);
3134 DEF_VEC_ALLOC_P (address_cost_data, heap);
3135
3136 static comp_cost
3137 get_address_cost (bool symbol_present, bool var_present,
3138 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3139 HOST_WIDE_INT cstep, enum machine_mode mem_mode,
3140 addr_space_t as, bool speed,
3141 bool stmt_after_inc, bool *may_autoinc)
3142 {
3143 enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3144 static VEC(address_cost_data, heap) *address_cost_data_list;
3145 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3146 address_cost_data data;
3147 static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3148 static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3149 unsigned cost, acost, complexity;
3150 bool offset_p, ratio_p, autoinc;
3151 HOST_WIDE_INT s_offset, autoinc_offset, msize;
3152 unsigned HOST_WIDE_INT mask;
3153 unsigned bits;
3154
3155 if (data_index >= VEC_length (address_cost_data, address_cost_data_list))
3156 VEC_safe_grow_cleared (address_cost_data, heap, address_cost_data_list,
3157 data_index + 1);
3158
3159 data = VEC_index (address_cost_data, address_cost_data_list, data_index);
3160 if (!data)
3161 {
3162 HOST_WIDE_INT i;
3163 HOST_WIDE_INT start = BIGGEST_ALIGNMENT / BITS_PER_UNIT;
3164 HOST_WIDE_INT rat, off;
3165 int old_cse_not_expected;
3166 unsigned sym_p, var_p, off_p, rat_p, add_c;
3167 rtx seq, addr, base;
3168 rtx reg0, reg1;
3169
3170 data = (address_cost_data) xcalloc (1, sizeof (*data));
3171
3172 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3173
3174 addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
3175 for (i = start; i <= 1 << 20; i <<= 1)
3176 {
3177 XEXP (addr, 1) = gen_int_mode (i, address_mode);
3178 if (!memory_address_addr_space_p (mem_mode, addr, as))
3179 break;
3180 }
3181 data->max_offset = i == start ? 0 : i >> 1;
3182 off = data->max_offset;
3183
3184 for (i = start; i <= 1 << 20; i <<= 1)
3185 {
3186 XEXP (addr, 1) = gen_int_mode (-i, address_mode);
3187 if (!memory_address_addr_space_p (mem_mode, addr, as))
3188 break;
3189 }
3190 data->min_offset = i == start ? 0 : -(i >> 1);
3191
3192 if (dump_file && (dump_flags & TDF_DETAILS))
3193 {
3194 fprintf (dump_file, "get_address_cost:\n");
3195 fprintf (dump_file, " min offset %s %d\n",
3196 GET_MODE_NAME (mem_mode),
3197 (int) data->min_offset);
3198 fprintf (dump_file, " max offset %s %d\n",
3199 GET_MODE_NAME (mem_mode),
3200 (int) data->max_offset);
3201 }
3202
3203 rat = 1;
3204 for (i = 2; i <= MAX_RATIO; i++)
3205 if (multiplier_allowed_in_address_p (i, mem_mode, as))
3206 {
3207 rat = i;
3208 break;
3209 }
3210
3211 /* Compute the cost of various addressing modes. */
3212 acost = 0;
3213 reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3214 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3215
3216 if (HAVE_PRE_DECREMENT)
3217 {
3218 addr = gen_rtx_PRE_DEC (address_mode, reg0);
3219 has_predec[mem_mode]
3220 = memory_address_addr_space_p (mem_mode, addr, as);
3221 }
3222 if (HAVE_POST_DECREMENT)
3223 {
3224 addr = gen_rtx_POST_DEC (address_mode, reg0);
3225 has_postdec[mem_mode]
3226 = memory_address_addr_space_p (mem_mode, addr, as);
3227 }
3228 if (HAVE_PRE_INCREMENT)
3229 {
3230 addr = gen_rtx_PRE_INC (address_mode, reg0);
3231 has_preinc[mem_mode]
3232 = memory_address_addr_space_p (mem_mode, addr, as);
3233 }
3234 if (HAVE_POST_INCREMENT)
3235 {
3236 addr = gen_rtx_POST_INC (address_mode, reg0);
3237 has_postinc[mem_mode]
3238 = memory_address_addr_space_p (mem_mode, addr, as);
3239 }
3240 for (i = 0; i < 16; i++)
3241 {
3242 sym_p = i & 1;
3243 var_p = (i >> 1) & 1;
3244 off_p = (i >> 2) & 1;
3245 rat_p = (i >> 3) & 1;
3246
3247 addr = reg0;
3248 if (rat_p)
3249 addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
3250 gen_int_mode (rat, address_mode));
3251
3252 if (var_p)
3253 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
3254
3255 if (sym_p)
3256 {
3257 base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
3258 /* ??? We can run into trouble with some backends by presenting
3259 it with symbols which haven't been properly passed through
3260 targetm.encode_section_info. By setting the local bit, we
3261 enhance the probability of things working. */
3262 SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3263
3264 if (off_p)
3265 base = gen_rtx_fmt_e (CONST, address_mode,
3266 gen_rtx_fmt_ee
3267 (PLUS, address_mode, base,
3268 gen_int_mode (off, address_mode)));
3269 }
3270 else if (off_p)
3271 base = gen_int_mode (off, address_mode);
3272 else
3273 base = NULL_RTX;
3274
3275 if (base)
3276 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
3277
3278 start_sequence ();
3279 /* To avoid splitting addressing modes, pretend that no cse will
3280 follow. */
3281 old_cse_not_expected = cse_not_expected;
3282 cse_not_expected = true;
3283 addr = memory_address_addr_space (mem_mode, addr, as);
3284 cse_not_expected = old_cse_not_expected;
3285 seq = get_insns ();
3286 end_sequence ();
3287
3288 acost = seq_cost (seq, speed);
3289 acost += address_cost (addr, mem_mode, as, speed);
3290
3291 if (!acost)
3292 acost = 1;
3293 data->costs[sym_p][var_p][off_p][rat_p] = acost;
3294 }
3295
3296 /* On some targets, it is quite expensive to load symbol to a register,
3297 which makes addresses that contain symbols look much more expensive.
3298 However, the symbol will have to be loaded in any case before the
3299 loop (and quite likely we have it in register already), so it does not
3300 make much sense to penalize them too heavily. So make some final
3301 tweaks for the SYMBOL_PRESENT modes:
3302
3303 If VAR_PRESENT is false, and the mode obtained by changing symbol to
3304 var is cheaper, use this mode with small penalty.
3305 If VAR_PRESENT is true, try whether the mode with
3306 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3307 if this is the case, use it. */
3308 add_c = add_cost (address_mode, speed);
3309 for (i = 0; i < 8; i++)
3310 {
3311 var_p = i & 1;
3312 off_p = (i >> 1) & 1;
3313 rat_p = (i >> 2) & 1;
3314
3315 acost = data->costs[0][1][off_p][rat_p] + 1;
3316 if (var_p)
3317 acost += add_c;
3318
3319 if (acost < data->costs[1][var_p][off_p][rat_p])
3320 data->costs[1][var_p][off_p][rat_p] = acost;
3321 }
3322
3323 if (dump_file && (dump_flags & TDF_DETAILS))
3324 {
3325 fprintf (dump_file, "Address costs:\n");
3326
3327 for (i = 0; i < 16; i++)
3328 {
3329 sym_p = i & 1;
3330 var_p = (i >> 1) & 1;
3331 off_p = (i >> 2) & 1;
3332 rat_p = (i >> 3) & 1;
3333
3334 fprintf (dump_file, " ");
3335 if (sym_p)
3336 fprintf (dump_file, "sym + ");
3337 if (var_p)
3338 fprintf (dump_file, "var + ");
3339 if (off_p)
3340 fprintf (dump_file, "cst + ");
3341 if (rat_p)
3342 fprintf (dump_file, "rat * ");
3343
3344 acost = data->costs[sym_p][var_p][off_p][rat_p];
3345 fprintf (dump_file, "index costs %d\n", acost);
3346 }
3347 if (has_predec[mem_mode] || has_postdec[mem_mode]
3348 || has_preinc[mem_mode] || has_postinc[mem_mode])
3349 fprintf (dump_file, " May include autoinc/dec\n");
3350 fprintf (dump_file, "\n");
3351 }
3352
3353 VEC_replace (address_cost_data, address_cost_data_list,
3354 data_index, data);
3355 }
3356
3357 bits = GET_MODE_BITSIZE (address_mode);
3358 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3359 offset &= mask;
3360 if ((offset >> (bits - 1) & 1))
3361 offset |= ~mask;
3362 s_offset = offset;
3363
3364 autoinc = false;
3365 msize = GET_MODE_SIZE (mem_mode);
3366 autoinc_offset = offset;
3367 if (stmt_after_inc)
3368 autoinc_offset += ratio * cstep;
3369 if (symbol_present || var_present || ratio != 1)
3370 autoinc = false;
3371 else if ((has_postinc[mem_mode] && autoinc_offset == 0
3372 && msize == cstep)
3373 || (has_postdec[mem_mode] && autoinc_offset == 0
3374 && msize == -cstep)
3375 || (has_preinc[mem_mode] && autoinc_offset == msize
3376 && msize == cstep)
3377 || (has_predec[mem_mode] && autoinc_offset == -msize
3378 && msize == -cstep))
3379 autoinc = true;
3380
3381 cost = 0;
3382 offset_p = (s_offset != 0
3383 && data->min_offset <= s_offset
3384 && s_offset <= data->max_offset);
3385 ratio_p = (ratio != 1
3386 && multiplier_allowed_in_address_p (ratio, mem_mode, as));
3387
3388 if (ratio != 1 && !ratio_p)
3389 cost += multiply_by_cost (ratio, address_mode, speed);
3390
3391 if (s_offset && !offset_p && !symbol_present)
3392 cost += add_cost (address_mode, speed);
3393
3394 if (may_autoinc)
3395 *may_autoinc = autoinc;
3396 acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
3397 complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3398 return new_cost (cost + acost, complexity);
3399 }
3400
3401 /* Estimates cost of forcing expression EXPR into a variable. */
3402
3403 static comp_cost
3404 force_expr_to_var_cost (tree expr, bool speed)
3405 {
3406 static bool costs_initialized = false;
3407 static unsigned integer_cost [2];
3408 static unsigned symbol_cost [2];
3409 static unsigned address_cost [2];
3410 tree op0, op1;
3411 comp_cost cost0, cost1, cost;
3412 enum machine_mode mode;
3413
3414 if (!costs_initialized)
3415 {
3416 tree type = build_pointer_type (integer_type_node);
3417 tree var, addr;
3418 rtx x;
3419 int i;
3420
3421 var = create_tmp_var_raw (integer_type_node, "test_var");
3422 TREE_STATIC (var) = 1;
3423 x = produce_memory_decl_rtl (var, NULL);
3424 SET_DECL_RTL (var, x);
3425
3426 addr = build1 (ADDR_EXPR, type, var);
3427
3428
3429 for (i = 0; i < 2; i++)
3430 {
3431 integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
3432 2000), i);
3433
3434 symbol_cost[i] = computation_cost (addr, i) + 1;
3435
3436 address_cost[i]
3437 = computation_cost (build2 (POINTER_PLUS_EXPR, type,
3438 addr,
3439 build_int_cst (sizetype, 2000)), i) + 1;
3440 if (dump_file && (dump_flags & TDF_DETAILS))
3441 {
3442 fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
3443 fprintf (dump_file, " integer %d\n", (int) integer_cost[i]);
3444 fprintf (dump_file, " symbol %d\n", (int) symbol_cost[i]);
3445 fprintf (dump_file, " address %d\n", (int) address_cost[i]);
3446 fprintf (dump_file, " other %d\n", (int) target_spill_cost[i]);
3447 fprintf (dump_file, "\n");
3448 }
3449 }
3450
3451 costs_initialized = true;
3452 }
3453
3454 STRIP_NOPS (expr);
3455
3456 if (SSA_VAR_P (expr))
3457 return zero_cost;
3458
3459 if (is_gimple_min_invariant (expr))
3460 {
3461 if (TREE_CODE (expr) == INTEGER_CST)
3462 return new_cost (integer_cost [speed], 0);
3463
3464 if (TREE_CODE (expr) == ADDR_EXPR)
3465 {
3466 tree obj = TREE_OPERAND (expr, 0);
3467
3468 if (TREE_CODE (obj) == VAR_DECL
3469 || TREE_CODE (obj) == PARM_DECL
3470 || TREE_CODE (obj) == RESULT_DECL)
3471 return new_cost (symbol_cost [speed], 0);
3472 }
3473
3474 return new_cost (address_cost [speed], 0);
3475 }
3476
3477 switch (TREE_CODE (expr))
3478 {
3479 case POINTER_PLUS_EXPR:
3480 case PLUS_EXPR:
3481 case MINUS_EXPR:
3482 case MULT_EXPR:
3483 op0 = TREE_OPERAND (expr, 0);
3484 op1 = TREE_OPERAND (expr, 1);
3485 STRIP_NOPS (op0);
3486 STRIP_NOPS (op1);
3487
3488 if (is_gimple_val (op0))
3489 cost0 = zero_cost;
3490 else
3491 cost0 = force_expr_to_var_cost (op0, speed);
3492
3493 if (is_gimple_val (op1))
3494 cost1 = zero_cost;
3495 else
3496 cost1 = force_expr_to_var_cost (op1, speed);
3497
3498 break;
3499
3500 case NEGATE_EXPR:
3501 op0 = TREE_OPERAND (expr, 0);
3502 STRIP_NOPS (op0);
3503 op1 = NULL_TREE;
3504
3505 if (is_gimple_val (op0))
3506 cost0 = zero_cost;
3507 else
3508 cost0 = force_expr_to_var_cost (op0, speed);
3509
3510 cost1 = zero_cost;
3511 break;
3512
3513 default:
3514 /* Just an arbitrary value, FIXME. */
3515 return new_cost (target_spill_cost[speed], 0);
3516 }
3517
3518 mode = TYPE_MODE (TREE_TYPE (expr));
3519 switch (TREE_CODE (expr))
3520 {
3521 case POINTER_PLUS_EXPR:
3522 case PLUS_EXPR:
3523 case MINUS_EXPR:
3524 case NEGATE_EXPR:
3525 cost = new_cost (add_cost (mode, speed), 0);
3526 break;
3527
3528 case MULT_EXPR:
3529 if (cst_and_fits_in_hwi (op0))
3530 cost = new_cost (multiply_by_cost (int_cst_value (op0), mode, speed), 0);
3531 else if (cst_and_fits_in_hwi (op1))
3532 cost = new_cost (multiply_by_cost (int_cst_value (op1), mode, speed), 0);
3533 else
3534 return new_cost (target_spill_cost [speed], 0);
3535 break;
3536
3537 default:
3538 gcc_unreachable ();
3539 }
3540
3541 cost = add_costs (cost, cost0);
3542 cost = add_costs (cost, cost1);
3543
3544 /* Bound the cost by target_spill_cost. The parts of complicated
3545 computations often are either loop invariant or at least can
3546 be shared between several iv uses, so letting this grow without
3547 limits would not give reasonable results. */
3548 if (cost.cost > (int) target_spill_cost [speed])
3549 cost.cost = target_spill_cost [speed];
3550
3551 return cost;
3552 }
3553
3554 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
3555 invariants the computation depends on. */
3556
3557 static comp_cost
3558 force_var_cost (struct ivopts_data *data,
3559 tree expr, bitmap *depends_on)
3560 {
3561 if (depends_on)
3562 {
3563 fd_ivopts_data = data;
3564 walk_tree (&expr, find_depends, depends_on, NULL);
3565 }
3566
3567 return force_expr_to_var_cost (expr, data->speed);
3568 }
3569
3570 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
3571 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3572 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3573 invariants the computation depends on. */
3574
3575 static comp_cost
3576 split_address_cost (struct ivopts_data *data,
3577 tree addr, bool *symbol_present, bool *var_present,
3578 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3579 {
3580 tree core;
3581 HOST_WIDE_INT bitsize;
3582 HOST_WIDE_INT bitpos;
3583 tree toffset;
3584 enum machine_mode mode;
3585 int unsignedp, volatilep;
3586
3587 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3588 &unsignedp, &volatilep, false);
3589
3590 if (toffset != 0
3591 || bitpos % BITS_PER_UNIT != 0
3592 || TREE_CODE (core) != VAR_DECL)
3593 {
3594 *symbol_present = false;
3595 *var_present = true;
3596 fd_ivopts_data = data;
3597 walk_tree (&addr, find_depends, depends_on, NULL);
3598 return new_cost (target_spill_cost[data->speed], 0);
3599 }
3600
3601 *offset += bitpos / BITS_PER_UNIT;
3602 if (TREE_STATIC (core)
3603 || DECL_EXTERNAL (core))
3604 {
3605 *symbol_present = true;
3606 *var_present = false;
3607 return zero_cost;
3608 }
3609
3610 *symbol_present = false;
3611 *var_present = true;
3612 return zero_cost;
3613 }
3614
3615 /* Estimates cost of expressing difference of addresses E1 - E2 as
3616 var + symbol + offset. The value of offset is added to OFFSET,
3617 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3618 part is missing. DEPENDS_ON is a set of the invariants the computation
3619 depends on. */
3620
3621 static comp_cost
3622 ptr_difference_cost (struct ivopts_data *data,
3623 tree e1, tree e2, bool *symbol_present, bool *var_present,
3624 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3625 {
3626 HOST_WIDE_INT diff = 0;
3627 aff_tree aff_e1, aff_e2;
3628 tree type;
3629
3630 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3631
3632 if (ptr_difference_const (e1, e2, &diff))
3633 {
3634 *offset += diff;
3635 *symbol_present = false;
3636 *var_present = false;
3637 return zero_cost;
3638 }
3639
3640 if (integer_zerop (e2))
3641 return split_address_cost (data, TREE_OPERAND (e1, 0),
3642 symbol_present, var_present, offset, depends_on);
3643
3644 *symbol_present = false;
3645 *var_present = true;
3646
3647 type = signed_type_for (TREE_TYPE (e1));
3648 tree_to_aff_combination (e1, type, &aff_e1);
3649 tree_to_aff_combination (e2, type, &aff_e2);
3650 aff_combination_scale (&aff_e2, double_int_minus_one);
3651 aff_combination_add (&aff_e1, &aff_e2);
3652
3653 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3654 }
3655
3656 /* Estimates cost of expressing difference E1 - E2 as
3657 var + symbol + offset. The value of offset is added to OFFSET,
3658 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3659 part is missing. DEPENDS_ON is a set of the invariants the computation
3660 depends on. */
3661
3662 static comp_cost
3663 difference_cost (struct ivopts_data *data,
3664 tree e1, tree e2, bool *symbol_present, bool *var_present,
3665 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3666 {
3667 enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3668 unsigned HOST_WIDE_INT off1, off2;
3669 aff_tree aff_e1, aff_e2;
3670 tree type;
3671
3672 e1 = strip_offset (e1, &off1);
3673 e2 = strip_offset (e2, &off2);
3674 *offset += off1 - off2;
3675
3676 STRIP_NOPS (e1);
3677 STRIP_NOPS (e2);
3678
3679 if (TREE_CODE (e1) == ADDR_EXPR)
3680 return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
3681 offset, depends_on);
3682 *symbol_present = false;
3683
3684 if (operand_equal_p (e1, e2, 0))
3685 {
3686 *var_present = false;
3687 return zero_cost;
3688 }
3689
3690 *var_present = true;
3691
3692 if (integer_zerop (e2))
3693 return force_var_cost (data, e1, depends_on);
3694
3695 if (integer_zerop (e1))
3696 {
3697 comp_cost cost = force_var_cost (data, e2, depends_on);
3698 cost.cost += multiply_by_cost (-1, mode, data->speed);
3699 return cost;
3700 }
3701
3702 type = signed_type_for (TREE_TYPE (e1));
3703 tree_to_aff_combination (e1, type, &aff_e1);
3704 tree_to_aff_combination (e2, type, &aff_e2);
3705 aff_combination_scale (&aff_e2, double_int_minus_one);
3706 aff_combination_add (&aff_e1, &aff_e2);
3707
3708 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3709 }
3710
3711 /* Determines the cost of the computation by that USE is expressed
3712 from induction variable CAND. If ADDRESS_P is true, we just need
3713 to create an address from it, otherwise we want to get it into
3714 register. A set of invariants we depend on is stored in
3715 DEPENDS_ON. AT is the statement at that the value is computed.
3716 If CAN_AUTOINC is nonnull, use it to record whether autoinc
3717 addressing is likely. */
3718
3719 static comp_cost
3720 get_computation_cost_at (struct ivopts_data *data,
3721 struct iv_use *use, struct iv_cand *cand,
3722 bool address_p, bitmap *depends_on, gimple at,
3723 bool *can_autoinc)
3724 {
3725 tree ubase = use->iv->base, ustep = use->iv->step;
3726 tree cbase, cstep;
3727 tree utype = TREE_TYPE (ubase), ctype;
3728 unsigned HOST_WIDE_INT cstepi, offset = 0;
3729 HOST_WIDE_INT ratio, aratio;
3730 bool var_present, symbol_present, stmt_is_after_inc;
3731 comp_cost cost;
3732 double_int rat;
3733 bool speed = optimize_bb_for_speed_p (gimple_bb (at));
3734
3735 *depends_on = NULL;
3736
3737 /* Only consider real candidates. */
3738 if (!cand->iv)
3739 return infinite_cost;
3740
3741 cbase = cand->iv->base;
3742 cstep = cand->iv->step;
3743 ctype = TREE_TYPE (cbase);
3744
3745 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3746 {
3747 /* We do not have a precision to express the values of use. */
3748 return infinite_cost;
3749 }
3750
3751 if (address_p)
3752 {
3753 /* Do not try to express address of an object with computation based
3754 on address of a different object. This may cause problems in rtl
3755 level alias analysis (that does not expect this to be happening,
3756 as this is illegal in C), and would be unlikely to be useful
3757 anyway. */
3758 if (use->iv->base_object
3759 && cand->iv->base_object
3760 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3761 return infinite_cost;
3762 }
3763
3764 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3765 {
3766 /* TODO -- add direct handling of this case. */
3767 goto fallback;
3768 }
3769
3770 /* CSTEPI is removed from the offset in case statement is after the
3771 increment. If the step is not constant, we use zero instead.
3772 This is a bit imprecise (there is the extra addition), but
3773 redundancy elimination is likely to transform the code so that
3774 it uses value of the variable before increment anyway,
3775 so it is not that much unrealistic. */
3776 if (cst_and_fits_in_hwi (cstep))
3777 cstepi = int_cst_value (cstep);
3778 else
3779 cstepi = 0;
3780
3781 if (!constant_multiple_of (ustep, cstep, &rat))
3782 return infinite_cost;
3783
3784 if (double_int_fits_in_shwi_p (rat))
3785 ratio = double_int_to_shwi (rat);
3786 else
3787 return infinite_cost;
3788
3789 STRIP_NOPS (cbase);
3790 ctype = TREE_TYPE (cbase);
3791
3792 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
3793 or ratio == 1, it is better to handle this like
3794
3795 ubase - ratio * cbase + ratio * var
3796
3797 (also holds in the case ratio == -1, TODO. */
3798
3799 if (cst_and_fits_in_hwi (cbase))
3800 {
3801 offset = - ratio * int_cst_value (cbase);
3802 cost = difference_cost (data,
3803 ubase, build_int_cst (utype, 0),
3804 &symbol_present, &var_present, &offset,
3805 depends_on);
3806 }
3807 else if (ratio == 1)
3808 {
3809 cost = difference_cost (data,
3810 ubase, cbase,
3811 &symbol_present, &var_present, &offset,
3812 depends_on);
3813 }
3814 else if (address_p
3815 && !POINTER_TYPE_P (ctype)
3816 && multiplier_allowed_in_address_p
3817 (ratio, TYPE_MODE (TREE_TYPE (utype)),
3818 TYPE_ADDR_SPACE (TREE_TYPE (utype))))
3819 {
3820 cbase
3821 = fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio));
3822 cost = difference_cost (data,
3823 ubase, cbase,
3824 &symbol_present, &var_present, &offset,
3825 depends_on);
3826 }
3827 else
3828 {
3829 cost = force_var_cost (data, cbase, depends_on);
3830 cost.cost += add_cost (TYPE_MODE (ctype), data->speed);
3831 cost = add_costs (cost,
3832 difference_cost (data,
3833 ubase, build_int_cst (utype, 0),
3834 &symbol_present, &var_present,
3835 &offset, depends_on));
3836 }
3837
3838 /* If we are after the increment, the value of the candidate is higher by
3839 one iteration. */
3840 stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
3841 if (stmt_is_after_inc)
3842 offset -= ratio * cstepi;
3843
3844 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3845 (symbol/var1/const parts may be omitted). If we are looking for an
3846 address, find the cost of addressing this. */
3847 if (address_p)
3848 return add_costs (cost,
3849 get_address_cost (symbol_present, var_present,
3850 offset, ratio, cstepi,
3851 TYPE_MODE (TREE_TYPE (utype)),
3852 TYPE_ADDR_SPACE (TREE_TYPE (utype)),
3853 speed, stmt_is_after_inc,
3854 can_autoinc));
3855
3856 /* Otherwise estimate the costs for computing the expression. */
3857 if (!symbol_present && !var_present && !offset)
3858 {
3859 if (ratio != 1)
3860 cost.cost += multiply_by_cost (ratio, TYPE_MODE (ctype), speed);
3861 return cost;
3862 }
3863
3864 /* Symbol + offset should be compile-time computable so consider that they
3865 are added once to the variable, if present. */
3866 if (var_present && (symbol_present || offset))
3867 cost.cost += adjust_setup_cost (data,
3868 add_cost (TYPE_MODE (ctype), speed));
3869
3870 /* Having offset does not affect runtime cost in case it is added to
3871 symbol, but it increases complexity. */
3872 if (offset)
3873 cost.complexity++;
3874
3875 cost.cost += add_cost (TYPE_MODE (ctype), speed);
3876
3877 aratio = ratio > 0 ? ratio : -ratio;
3878 if (aratio != 1)
3879 cost.cost += multiply_by_cost (aratio, TYPE_MODE (ctype), speed);
3880 return cost;
3881
3882 fallback:
3883 if (can_autoinc)
3884 *can_autoinc = false;
3885
3886 {
3887 /* Just get the expression, expand it and measure the cost. */
3888 tree comp = get_computation_at (data->current_loop, use, cand, at);
3889
3890 if (!comp)
3891 return infinite_cost;
3892
3893 if (address_p)
3894 comp = build_simple_mem_ref (comp);
3895
3896 return new_cost (computation_cost (comp, speed), 0);
3897 }
3898 }
3899
3900 /* Determines the cost of the computation by that USE is expressed
3901 from induction variable CAND. If ADDRESS_P is true, we just need
3902 to create an address from it, otherwise we want to get it into
3903 register. A set of invariants we depend on is stored in
3904 DEPENDS_ON. If CAN_AUTOINC is nonnull, use it to record whether
3905 autoinc addressing is likely. */
3906
3907 static comp_cost
3908 get_computation_cost (struct ivopts_data *data,
3909 struct iv_use *use, struct iv_cand *cand,
3910 bool address_p, bitmap *depends_on, bool *can_autoinc)
3911 {
3912 return get_computation_cost_at (data,
3913 use, cand, address_p, depends_on, use->stmt,
3914 can_autoinc);
3915 }
3916
3917 /* Determines cost of basing replacement of USE on CAND in a generic
3918 expression. */
3919
3920 static bool
3921 determine_use_iv_cost_generic (struct ivopts_data *data,
3922 struct iv_use *use, struct iv_cand *cand)
3923 {
3924 bitmap depends_on;
3925 comp_cost cost;
3926
3927 /* The simple case first -- if we need to express value of the preserved
3928 original biv, the cost is 0. This also prevents us from counting the
3929 cost of increment twice -- once at this use and once in the cost of
3930 the candidate. */
3931 if (cand->pos == IP_ORIGINAL
3932 && cand->incremented_at == use->stmt)
3933 {
3934 set_use_iv_cost (data, use, cand, zero_cost, NULL, NULL_TREE);
3935 return true;
3936 }
3937
3938 cost = get_computation_cost (data, use, cand, false, &depends_on, NULL);
3939 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3940
3941 return !infinite_cost_p (cost);
3942 }
3943
3944 /* Determines cost of basing replacement of USE on CAND in an address. */
3945
3946 static bool
3947 determine_use_iv_cost_address (struct ivopts_data *data,
3948 struct iv_use *use, struct iv_cand *cand)
3949 {
3950 bitmap depends_on;
3951 bool can_autoinc;
3952 comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
3953 &can_autoinc);
3954
3955 if (cand->ainc_use == use)
3956 {
3957 if (can_autoinc)
3958 cost.cost -= cand->cost_step;
3959 /* If we generated the candidate solely for exploiting autoincrement
3960 opportunities, and it turns out it can't be used, set the cost to
3961 infinity to make sure we ignore it. */
3962 else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
3963 cost = infinite_cost;
3964 }
3965 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3966
3967 return !infinite_cost_p (cost);
3968 }
3969
3970 /* Computes value of candidate CAND at position AT in iteration NITER, and
3971 stores it to VAL. */
3972
3973 static void
3974 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple at, tree niter,
3975 aff_tree *val)
3976 {
3977 aff_tree step, delta, nit;
3978 struct iv *iv = cand->iv;
3979 tree type = TREE_TYPE (iv->base);
3980 tree steptype = type;
3981 if (POINTER_TYPE_P (type))
3982 steptype = sizetype;
3983
3984 tree_to_aff_combination (iv->step, steptype, &step);
3985 tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
3986 aff_combination_convert (&nit, steptype);
3987 aff_combination_mult (&nit, &step, &delta);
3988 if (stmt_after_increment (loop, cand, at))
3989 aff_combination_add (&delta, &step);
3990
3991 tree_to_aff_combination (iv->base, type, val);
3992 aff_combination_add (val, &delta);
3993 }
3994
3995 /* Returns period of induction variable iv. */
3996
3997 static tree
3998 iv_period (struct iv *iv)
3999 {
4000 tree step = iv->step, period, type;
4001 tree pow2div;
4002
4003 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4004
4005 /* Period of the iv is gcd (step, type range). Since type range is power
4006 of two, it suffices to determine the maximum power of two that divides
4007 step. */
4008 pow2div = num_ending_zeros (step);
4009 type = unsigned_type_for (TREE_TYPE (step));
4010
4011 period = build_low_bits_mask (type,
4012 (TYPE_PRECISION (type)
4013 - tree_low_cst (pow2div, 1)));
4014
4015 return period;
4016 }
4017
4018 /* Returns the comparison operator used when eliminating the iv USE. */
4019
4020 static enum tree_code
4021 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4022 {
4023 struct loop *loop = data->current_loop;
4024 basic_block ex_bb;
4025 edge exit;
4026
4027 ex_bb = gimple_bb (use->stmt);
4028 exit = EDGE_SUCC (ex_bb, 0);
4029 if (flow_bb_inside_loop_p (loop, exit->dest))
4030 exit = EDGE_SUCC (ex_bb, 1);
4031
4032 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4033 }
4034
4035 /* Check whether it is possible to express the condition in USE by comparison
4036 of candidate CAND. If so, store the value compared with to BOUND. */
4037
4038 static bool
4039 may_eliminate_iv (struct ivopts_data *data,
4040 struct iv_use *use, struct iv_cand *cand, tree *bound)
4041 {
4042 basic_block ex_bb;
4043 edge exit;
4044 tree nit, period;
4045 struct loop *loop = data->current_loop;
4046 aff_tree bnd;
4047
4048 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
4049 return false;
4050
4051 /* For now works only for exits that dominate the loop latch.
4052 TODO: extend to other conditions inside loop body. */
4053 ex_bb = gimple_bb (use->stmt);
4054 if (use->stmt != last_stmt (ex_bb)
4055 || gimple_code (use->stmt) != GIMPLE_COND
4056 || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
4057 return false;
4058
4059 exit = EDGE_SUCC (ex_bb, 0);
4060 if (flow_bb_inside_loop_p (loop, exit->dest))
4061 exit = EDGE_SUCC (ex_bb, 1);
4062 if (flow_bb_inside_loop_p (loop, exit->dest))
4063 return false;
4064
4065 nit = niter_for_exit (data, exit);
4066 if (!nit)
4067 return false;
4068
4069 /* Determine whether we can use the variable to test the exit condition.
4070 This is the case iff the period of the induction variable is greater
4071 than the number of iterations for which the exit condition is true. */
4072 period = iv_period (cand->iv);
4073
4074 /* If the number of iterations is constant, compare against it directly. */
4075 if (TREE_CODE (nit) == INTEGER_CST)
4076 {
4077 if (!tree_int_cst_lt (nit, period))
4078 return false;
4079 }
4080
4081 /* If not, and if this is the only possible exit of the loop, see whether
4082 we can get a conservative estimate on the number of iterations of the
4083 entire loop and compare against that instead. */
4084 else if (loop_only_exit_p (loop, exit))
4085 {
4086 double_int period_value, max_niter;
4087 if (!estimated_loop_iterations (loop, true, &max_niter))
4088 return false;
4089 period_value = tree_to_double_int (period);
4090 if (double_int_ucmp (max_niter, period_value) >= 0)
4091 return false;
4092 }
4093
4094 /* Otherwise, punt. */
4095 else
4096 return false;
4097
4098 cand_value_at (loop, cand, use->stmt, nit, &bnd);
4099
4100 *bound = aff_combination_to_tree (&bnd);
4101 /* It is unlikely that computing the number of iterations using division
4102 would be more profitable than keeping the original induction variable. */
4103 if (expression_expensive_p (*bound))
4104 return false;
4105 return true;
4106 }
4107
4108 /* Determines cost of basing replacement of USE on CAND in a condition. */
4109
4110 static bool
4111 determine_use_iv_cost_condition (struct ivopts_data *data,
4112 struct iv_use *use, struct iv_cand *cand)
4113 {
4114 tree bound = NULL_TREE;
4115 struct iv *cmp_iv;
4116 bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
4117 comp_cost elim_cost, express_cost, cost;
4118 bool ok;
4119 tree *control_var, *bound_cst;
4120
4121 /* Only consider real candidates. */
4122 if (!cand->iv)
4123 {
4124 set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE);
4125 return false;
4126 }
4127
4128 /* Try iv elimination. */
4129 if (may_eliminate_iv (data, use, cand, &bound))
4130 {
4131 elim_cost = force_var_cost (data, bound, &depends_on_elim);
4132 /* The bound is a loop invariant, so it will be only computed
4133 once. */
4134 elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
4135 }
4136 else
4137 elim_cost = infinite_cost;
4138
4139 /* Try expressing the original giv. If it is compared with an invariant,
4140 note that we cannot get rid of it. */
4141 ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
4142 NULL, &cmp_iv);
4143 gcc_assert (ok);
4144
4145 /* When the condition is a comparison of the candidate IV against
4146 zero, prefer this IV.
4147
4148 TODO: The constant that we're substracting from the cost should
4149 be target-dependent. This information should be added to the
4150 target costs for each backend. */
4151 if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */
4152 && integer_zerop (*bound_cst)
4153 && (operand_equal_p (*control_var, cand->var_after, 0)
4154 || operand_equal_p (*control_var, cand->var_before, 0)))
4155 elim_cost.cost -= 1;
4156
4157 express_cost = get_computation_cost (data, use, cand, false,
4158 &depends_on_express, NULL);
4159 fd_ivopts_data = data;
4160 walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
4161
4162 /* Choose the better approach, preferring the eliminated IV. */
4163 if (compare_costs (elim_cost, express_cost) <= 0)
4164 {
4165 cost = elim_cost;
4166 depends_on = depends_on_elim;
4167 depends_on_elim = NULL;
4168 }
4169 else
4170 {
4171 cost = express_cost;
4172 depends_on = depends_on_express;
4173 depends_on_express = NULL;
4174 bound = NULL_TREE;
4175 }
4176
4177 set_use_iv_cost (data, use, cand, cost, depends_on, bound);
4178
4179 if (depends_on_elim)
4180 BITMAP_FREE (depends_on_elim);
4181 if (depends_on_express)
4182 BITMAP_FREE (depends_on_express);
4183
4184 return !infinite_cost_p (cost);
4185 }
4186
4187 /* Determines cost of basing replacement of USE on CAND. Returns false
4188 if USE cannot be based on CAND. */
4189
4190 static bool
4191 determine_use_iv_cost (struct ivopts_data *data,
4192 struct iv_use *use, struct iv_cand *cand)
4193 {
4194 switch (use->type)
4195 {
4196 case USE_NONLINEAR_EXPR:
4197 return determine_use_iv_cost_generic (data, use, cand);
4198
4199 case USE_ADDRESS:
4200 return determine_use_iv_cost_address (data, use, cand);
4201
4202 case USE_COMPARE:
4203 return determine_use_iv_cost_condition (data, use, cand);
4204
4205 default:
4206 gcc_unreachable ();
4207 }
4208 }
4209
4210 /* Return true if get_computation_cost indicates that autoincrement is
4211 a possibility for the pair of USE and CAND, false otherwise. */
4212
4213 static bool
4214 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
4215 struct iv_cand *cand)
4216 {
4217 bitmap depends_on;
4218 bool can_autoinc;
4219 comp_cost cost;
4220
4221 if (use->type != USE_ADDRESS)
4222 return false;
4223
4224 cost = get_computation_cost (data, use, cand, true, &depends_on,
4225 &can_autoinc);
4226
4227 BITMAP_FREE (depends_on);
4228
4229 return !infinite_cost_p (cost) && can_autoinc;
4230 }
4231
4232 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
4233 use that allows autoincrement, and set their AINC_USE if possible. */
4234
4235 static void
4236 set_autoinc_for_original_candidates (struct ivopts_data *data)
4237 {
4238 unsigned i, j;
4239
4240 for (i = 0; i < n_iv_cands (data); i++)
4241 {
4242 struct iv_cand *cand = iv_cand (data, i);
4243 struct iv_use *closest = NULL;
4244 if (cand->pos != IP_ORIGINAL)
4245 continue;
4246 for (j = 0; j < n_iv_uses (data); j++)
4247 {
4248 struct iv_use *use = iv_use (data, j);
4249 unsigned uid = gimple_uid (use->stmt);
4250 if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at)
4251 || uid > gimple_uid (cand->incremented_at))
4252 continue;
4253 if (closest == NULL || uid > gimple_uid (closest->stmt))
4254 closest = use;
4255 }
4256 if (closest == NULL || !autoinc_possible_for_pair (data, closest, cand))
4257 continue;
4258 cand->ainc_use = closest;
4259 }
4260 }
4261
4262 /* Finds the candidates for the induction variables. */
4263
4264 static void
4265 find_iv_candidates (struct ivopts_data *data)
4266 {
4267 /* Add commonly used ivs. */
4268 add_standard_iv_candidates (data);
4269
4270 /* Add old induction variables. */
4271 add_old_ivs_candidates (data);
4272
4273 /* Add induction variables derived from uses. */
4274 add_derived_ivs_candidates (data);
4275
4276 set_autoinc_for_original_candidates (data);
4277
4278 /* Record the important candidates. */
4279 record_important_candidates (data);
4280 }
4281
4282 /* Determines costs of basing the use of the iv on an iv candidate. */
4283
4284 static void
4285 determine_use_iv_costs (struct ivopts_data *data)
4286 {
4287 unsigned i, j;
4288 struct iv_use *use;
4289 struct iv_cand *cand;
4290 bitmap to_clear = BITMAP_ALLOC (NULL);
4291
4292 alloc_use_cost_map (data);
4293
4294 for (i = 0; i < n_iv_uses (data); i++)
4295 {
4296 use = iv_use (data, i);
4297
4298 if (data->consider_all_candidates)
4299 {
4300 for (j = 0; j < n_iv_cands (data); j++)
4301 {
4302 cand = iv_cand (data, j);
4303 determine_use_iv_cost (data, use, cand);
4304 }
4305 }
4306 else
4307 {
4308 bitmap_iterator bi;
4309
4310 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
4311 {
4312 cand = iv_cand (data, j);
4313 if (!determine_use_iv_cost (data, use, cand))
4314 bitmap_set_bit (to_clear, j);
4315 }
4316
4317 /* Remove the candidates for that the cost is infinite from
4318 the list of related candidates. */
4319 bitmap_and_compl_into (use->related_cands, to_clear);
4320 bitmap_clear (to_clear);
4321 }
4322 }
4323
4324 BITMAP_FREE (to_clear);
4325
4326 if (dump_file && (dump_flags & TDF_DETAILS))
4327 {
4328 fprintf (dump_file, "Use-candidate costs:\n");
4329
4330 for (i = 0; i < n_iv_uses (data); i++)
4331 {
4332 use = iv_use (data, i);
4333
4334 fprintf (dump_file, "Use %d:\n", i);
4335 fprintf (dump_file, " cand\tcost\tcompl.\tdepends on\n");
4336 for (j = 0; j < use->n_map_members; j++)
4337 {
4338 if (!use->cost_map[j].cand
4339 || infinite_cost_p (use->cost_map[j].cost))
4340 continue;
4341
4342 fprintf (dump_file, " %d\t%d\t%d\t",
4343 use->cost_map[j].cand->id,
4344 use->cost_map[j].cost.cost,
4345 use->cost_map[j].cost.complexity);
4346 if (use->cost_map[j].depends_on)
4347 bitmap_print (dump_file,
4348 use->cost_map[j].depends_on, "","");
4349 fprintf (dump_file, "\n");
4350 }
4351
4352 fprintf (dump_file, "\n");
4353 }
4354 fprintf (dump_file, "\n");
4355 }
4356 }
4357
4358 /* Determines cost of the candidate CAND. */
4359
4360 static void
4361 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
4362 {
4363 comp_cost cost_base;
4364 unsigned cost, cost_step;
4365 tree base;
4366
4367 if (!cand->iv)
4368 {
4369 cand->cost = 0;
4370 return;
4371 }
4372
4373 /* There are two costs associated with the candidate -- its increment
4374 and its initialization. The second is almost negligible for any loop
4375 that rolls enough, so we take it just very little into account. */
4376
4377 base = cand->iv->base;
4378 cost_base = force_var_cost (data, base, NULL);
4379 cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)), data->speed);
4380
4381 cost = cost_step + adjust_setup_cost (data, cost_base.cost);
4382
4383 /* Prefer the original ivs unless we may gain something by replacing it.
4384 The reason is to make debugging simpler; so this is not relevant for
4385 artificial ivs created by other optimization passes. */
4386 if (cand->pos != IP_ORIGINAL
4387 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
4388 cost++;
4389
4390 /* Prefer not to insert statements into latch unless there are some
4391 already (so that we do not create unnecessary jumps). */
4392 if (cand->pos == IP_END
4393 && empty_block_p (ip_end_pos (data->current_loop)))
4394 cost++;
4395
4396 cand->cost = cost;
4397 cand->cost_step = cost_step;
4398 }
4399
4400 /* Determines costs of computation of the candidates. */
4401
4402 static void
4403 determine_iv_costs (struct ivopts_data *data)
4404 {
4405 unsigned i;
4406
4407 if (dump_file && (dump_flags & TDF_DETAILS))
4408 {
4409 fprintf (dump_file, "Candidate costs:\n");
4410 fprintf (dump_file, " cand\tcost\n");
4411 }
4412
4413 for (i = 0; i < n_iv_cands (data); i++)
4414 {
4415 struct iv_cand *cand = iv_cand (data, i);
4416
4417 determine_iv_cost (data, cand);
4418
4419 if (dump_file && (dump_flags & TDF_DETAILS))
4420 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
4421 }
4422
4423 if (dump_file && (dump_flags & TDF_DETAILS))
4424 fprintf (dump_file, "\n");
4425 }
4426
4427 /* Calculates cost for having SIZE induction variables. */
4428
4429 static unsigned
4430 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
4431 {
4432 /* We add size to the cost, so that we prefer eliminating ivs
4433 if possible. */
4434 return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed);
4435 }
4436
4437 /* For each size of the induction variable set determine the penalty. */
4438
4439 static void
4440 determine_set_costs (struct ivopts_data *data)
4441 {
4442 unsigned j, n;
4443 gimple phi;
4444 gimple_stmt_iterator psi;
4445 tree op;
4446 struct loop *loop = data->current_loop;
4447 bitmap_iterator bi;
4448
4449 /* We use the following model (definitely improvable, especially the
4450 cost function -- TODO):
4451
4452 We estimate the number of registers available (using MD data), name it A.
4453
4454 We estimate the number of registers used by the loop, name it U. This
4455 number is obtained as the number of loop phi nodes (not counting virtual
4456 registers and bivs) + the number of variables from outside of the loop.
4457
4458 We set a reserve R (free regs that are used for temporary computations,
4459 etc.). For now the reserve is a constant 3.
4460
4461 Let I be the number of induction variables.
4462
4463 -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
4464 make a lot of ivs without a reason).
4465 -- if A - R < U + I <= A, the cost is I * PRES_COST
4466 -- if U + I > A, the cost is I * PRES_COST and
4467 number of uses * SPILL_COST * (U + I - A) / (U + I) is added. */
4468
4469 if (dump_file && (dump_flags & TDF_DETAILS))
4470 {
4471 fprintf (dump_file, "Global costs:\n");
4472 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
4473 fprintf (dump_file, " target_reg_cost %d\n", target_reg_cost[data->speed]);
4474 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost[data->speed]);
4475 }
4476
4477 n = 0;
4478 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
4479 {
4480 phi = gsi_stmt (psi);
4481 op = PHI_RESULT (phi);
4482
4483 if (!is_gimple_reg (op))
4484 continue;
4485
4486 if (get_iv (data, op))
4487 continue;
4488
4489 n++;
4490 }
4491
4492 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
4493 {
4494 struct version_info *info = ver_info (data, j);
4495
4496 if (info->inv_id && info->has_nonlin_use)
4497 n++;
4498 }
4499
4500 data->regs_used = n;
4501 if (dump_file && (dump_flags & TDF_DETAILS))
4502 fprintf (dump_file, " regs_used %d\n", n);
4503
4504 if (dump_file && (dump_flags & TDF_DETAILS))
4505 {
4506 fprintf (dump_file, " cost for size:\n");
4507 fprintf (dump_file, " ivs\tcost\n");
4508 for (j = 0; j <= 2 * target_avail_regs; j++)
4509 fprintf (dump_file, " %d\t%d\n", j,
4510 ivopts_global_cost_for_size (data, j));
4511 fprintf (dump_file, "\n");
4512 }
4513 }
4514
4515 /* Returns true if A is a cheaper cost pair than B. */
4516
4517 static bool
4518 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
4519 {
4520 int cmp;
4521
4522 if (!a)
4523 return false;
4524
4525 if (!b)
4526 return true;
4527
4528 cmp = compare_costs (a->cost, b->cost);
4529 if (cmp < 0)
4530 return true;
4531
4532 if (cmp > 0)
4533 return false;
4534
4535 /* In case the costs are the same, prefer the cheaper candidate. */
4536 if (a->cand->cost < b->cand->cost)
4537 return true;
4538
4539 return false;
4540 }
4541
4542 /* Computes the cost field of IVS structure. */
4543
4544 static void
4545 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
4546 {
4547 comp_cost cost = ivs->cand_use_cost;
4548 cost.cost += ivs->cand_cost;
4549 cost.cost += ivopts_global_cost_for_size (data, ivs->n_regs);
4550
4551 ivs->cost = cost;
4552 }
4553
4554 /* Remove invariants in set INVS to set IVS. */
4555
4556 static void
4557 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
4558 {
4559 bitmap_iterator bi;
4560 unsigned iid;
4561
4562 if (!invs)
4563 return;
4564
4565 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4566 {
4567 ivs->n_invariant_uses[iid]--;
4568 if (ivs->n_invariant_uses[iid] == 0)
4569 ivs->n_regs--;
4570 }
4571 }
4572
4573 /* Set USE not to be expressed by any candidate in IVS. */
4574
4575 static void
4576 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
4577 struct iv_use *use)
4578 {
4579 unsigned uid = use->id, cid;
4580 struct cost_pair *cp;
4581
4582 cp = ivs->cand_for_use[uid];
4583 if (!cp)
4584 return;
4585 cid = cp->cand->id;
4586
4587 ivs->bad_uses++;
4588 ivs->cand_for_use[uid] = NULL;
4589 ivs->n_cand_uses[cid]--;
4590
4591 if (ivs->n_cand_uses[cid] == 0)
4592 {
4593 bitmap_clear_bit (ivs->cands, cid);
4594 /* Do not count the pseudocandidates. */
4595 if (cp->cand->iv)
4596 ivs->n_regs--;
4597 ivs->n_cands--;
4598 ivs->cand_cost -= cp->cand->cost;
4599
4600 iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
4601 }
4602
4603 ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
4604
4605 iv_ca_set_remove_invariants (ivs, cp->depends_on);
4606 iv_ca_recount_cost (data, ivs);
4607 }
4608
4609 /* Add invariants in set INVS to set IVS. */
4610
4611 static void
4612 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
4613 {
4614 bitmap_iterator bi;
4615 unsigned iid;
4616
4617 if (!invs)
4618 return;
4619
4620 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4621 {
4622 ivs->n_invariant_uses[iid]++;
4623 if (ivs->n_invariant_uses[iid] == 1)
4624 ivs->n_regs++;
4625 }
4626 }
4627
4628 /* Set cost pair for USE in set IVS to CP. */
4629
4630 static void
4631 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
4632 struct iv_use *use, struct cost_pair *cp)
4633 {
4634 unsigned uid = use->id, cid;
4635
4636 if (ivs->cand_for_use[uid] == cp)
4637 return;
4638
4639 if (ivs->cand_for_use[uid])
4640 iv_ca_set_no_cp (data, ivs, use);
4641
4642 if (cp)
4643 {
4644 cid = cp->cand->id;
4645
4646 ivs->bad_uses--;
4647 ivs->cand_for_use[uid] = cp;
4648 ivs->n_cand_uses[cid]++;
4649 if (ivs->n_cand_uses[cid] == 1)
4650 {
4651 bitmap_set_bit (ivs->cands, cid);
4652 /* Do not count the pseudocandidates. */
4653 if (cp->cand->iv)
4654 ivs->n_regs++;
4655 ivs->n_cands++;
4656 ivs->cand_cost += cp->cand->cost;
4657
4658 iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
4659 }
4660
4661 ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
4662 iv_ca_set_add_invariants (ivs, cp->depends_on);
4663 iv_ca_recount_cost (data, ivs);
4664 }
4665 }
4666
4667 /* Extend set IVS by expressing USE by some of the candidates in it
4668 if possible. */
4669
4670 static void
4671 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
4672 struct iv_use *use)
4673 {
4674 struct cost_pair *best_cp = NULL, *cp;
4675 bitmap_iterator bi;
4676 unsigned i;
4677
4678 gcc_assert (ivs->upto >= use->id);
4679
4680 if (ivs->upto == use->id)
4681 {
4682 ivs->upto++;
4683 ivs->bad_uses++;
4684 }
4685
4686 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4687 {
4688 cp = get_use_iv_cost (data, use, iv_cand (data, i));
4689
4690 if (cheaper_cost_pair (cp, best_cp))
4691 best_cp = cp;
4692 }
4693
4694 iv_ca_set_cp (data, ivs, use, best_cp);
4695 }
4696
4697 /* Get cost for assignment IVS. */
4698
4699 static comp_cost
4700 iv_ca_cost (struct iv_ca *ivs)
4701 {
4702 /* This was a conditional expression but it triggered a bug in
4703 Sun C 5.5. */
4704 if (ivs->bad_uses)
4705 return infinite_cost;
4706 else
4707 return ivs->cost;
4708 }
4709
4710 /* Returns true if all dependences of CP are among invariants in IVS. */
4711
4712 static bool
4713 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
4714 {
4715 unsigned i;
4716 bitmap_iterator bi;
4717
4718 if (!cp->depends_on)
4719 return true;
4720
4721 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
4722 {
4723 if (ivs->n_invariant_uses[i] == 0)
4724 return false;
4725 }
4726
4727 return true;
4728 }
4729
4730 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
4731 it before NEXT_CHANGE. */
4732
4733 static struct iv_ca_delta *
4734 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
4735 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
4736 {
4737 struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
4738
4739 change->use = use;
4740 change->old_cp = old_cp;
4741 change->new_cp = new_cp;
4742 change->next_change = next_change;
4743
4744 return change;
4745 }
4746
4747 /* Joins two lists of changes L1 and L2. Destructive -- old lists
4748 are rewritten. */
4749
4750 static struct iv_ca_delta *
4751 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
4752 {
4753 struct iv_ca_delta *last;
4754
4755 if (!l2)
4756 return l1;
4757
4758 if (!l1)
4759 return l2;
4760
4761 for (last = l1; last->next_change; last = last->next_change)
4762 continue;
4763 last->next_change = l2;
4764
4765 return l1;
4766 }
4767
4768 /* Returns candidate by that USE is expressed in IVS. */
4769
4770 static struct cost_pair *
4771 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
4772 {
4773 return ivs->cand_for_use[use->id];
4774 }
4775
4776 /* Reverse the list of changes DELTA, forming the inverse to it. */
4777
4778 static struct iv_ca_delta *
4779 iv_ca_delta_reverse (struct iv_ca_delta *delta)
4780 {
4781 struct iv_ca_delta *act, *next, *prev = NULL;
4782 struct cost_pair *tmp;
4783
4784 for (act = delta; act; act = next)
4785 {
4786 next = act->next_change;
4787 act->next_change = prev;
4788 prev = act;
4789
4790 tmp = act->old_cp;
4791 act->old_cp = act->new_cp;
4792 act->new_cp = tmp;
4793 }
4794
4795 return prev;
4796 }
4797
4798 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
4799 reverted instead. */
4800
4801 static void
4802 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
4803 struct iv_ca_delta *delta, bool forward)
4804 {
4805 struct cost_pair *from, *to;
4806 struct iv_ca_delta *act;
4807
4808 if (!forward)
4809 delta = iv_ca_delta_reverse (delta);
4810
4811 for (act = delta; act; act = act->next_change)
4812 {
4813 from = act->old_cp;
4814 to = act->new_cp;
4815 gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
4816 iv_ca_set_cp (data, ivs, act->use, to);
4817 }
4818
4819 if (!forward)
4820 iv_ca_delta_reverse (delta);
4821 }
4822
4823 /* Returns true if CAND is used in IVS. */
4824
4825 static bool
4826 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
4827 {
4828 return ivs->n_cand_uses[cand->id] > 0;
4829 }
4830
4831 /* Returns number of induction variable candidates in the set IVS. */
4832
4833 static unsigned
4834 iv_ca_n_cands (struct iv_ca *ivs)
4835 {
4836 return ivs->n_cands;
4837 }
4838
4839 /* Free the list of changes DELTA. */
4840
4841 static void
4842 iv_ca_delta_free (struct iv_ca_delta **delta)
4843 {
4844 struct iv_ca_delta *act, *next;
4845
4846 for (act = *delta; act; act = next)
4847 {
4848 next = act->next_change;
4849 free (act);
4850 }
4851
4852 *delta = NULL;
4853 }
4854
4855 /* Allocates new iv candidates assignment. */
4856
4857 static struct iv_ca *
4858 iv_ca_new (struct ivopts_data *data)
4859 {
4860 struct iv_ca *nw = XNEW (struct iv_ca);
4861
4862 nw->upto = 0;
4863 nw->bad_uses = 0;
4864 nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
4865 nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
4866 nw->cands = BITMAP_ALLOC (NULL);
4867 nw->n_cands = 0;
4868 nw->n_regs = 0;
4869 nw->cand_use_cost = zero_cost;
4870 nw->cand_cost = 0;
4871 nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
4872 nw->cost = zero_cost;
4873
4874 return nw;
4875 }
4876
4877 /* Free memory occupied by the set IVS. */
4878
4879 static void
4880 iv_ca_free (struct iv_ca **ivs)
4881 {
4882 free ((*ivs)->cand_for_use);
4883 free ((*ivs)->n_cand_uses);
4884 BITMAP_FREE ((*ivs)->cands);
4885 free ((*ivs)->n_invariant_uses);
4886 free (*ivs);
4887 *ivs = NULL;
4888 }
4889
4890 /* Dumps IVS to FILE. */
4891
4892 static void
4893 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
4894 {
4895 const char *pref = " invariants ";
4896 unsigned i;
4897 comp_cost cost = iv_ca_cost (ivs);
4898
4899 fprintf (file, " cost %d (complexity %d)\n", cost.cost, cost.complexity);
4900 bitmap_print (file, ivs->cands, " candidates ","\n");
4901
4902 for (i = 1; i <= data->max_inv_id; i++)
4903 if (ivs->n_invariant_uses[i])
4904 {
4905 fprintf (file, "%s%d", pref, i);
4906 pref = ", ";
4907 }
4908 fprintf (file, "\n");
4909 }
4910
4911 /* Try changing candidate in IVS to CAND for each use. Return cost of the
4912 new set, and store differences in DELTA. Number of induction variables
4913 in the new set is stored to N_IVS. */
4914
4915 static comp_cost
4916 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
4917 struct iv_cand *cand, struct iv_ca_delta **delta,
4918 unsigned *n_ivs)
4919 {
4920 unsigned i;
4921 comp_cost cost;
4922 struct iv_use *use;
4923 struct cost_pair *old_cp, *new_cp;
4924
4925 *delta = NULL;
4926 for (i = 0; i < ivs->upto; i++)
4927 {
4928 use = iv_use (data, i);
4929 old_cp = iv_ca_cand_for_use (ivs, use);
4930
4931 if (old_cp
4932 && old_cp->cand == cand)
4933 continue;
4934
4935 new_cp = get_use_iv_cost (data, use, cand);
4936 if (!new_cp)
4937 continue;
4938
4939 if (!iv_ca_has_deps (ivs, new_cp))
4940 continue;
4941
4942 if (!cheaper_cost_pair (new_cp, old_cp))
4943 continue;
4944
4945 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4946 }
4947
4948 iv_ca_delta_commit (data, ivs, *delta, true);
4949 cost = iv_ca_cost (ivs);
4950 if (n_ivs)
4951 *n_ivs = iv_ca_n_cands (ivs);
4952 iv_ca_delta_commit (data, ivs, *delta, false);
4953
4954 return cost;
4955 }
4956
4957 /* Try narrowing set IVS by removing CAND. Return the cost of
4958 the new set and store the differences in DELTA. */
4959
4960 static comp_cost
4961 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
4962 struct iv_cand *cand, struct iv_ca_delta **delta)
4963 {
4964 unsigned i, ci;
4965 struct iv_use *use;
4966 struct cost_pair *old_cp, *new_cp, *cp;
4967 bitmap_iterator bi;
4968 struct iv_cand *cnd;
4969 comp_cost cost;
4970
4971 *delta = NULL;
4972 for (i = 0; i < n_iv_uses (data); i++)
4973 {
4974 use = iv_use (data, i);
4975
4976 old_cp = iv_ca_cand_for_use (ivs, use);
4977 if (old_cp->cand != cand)
4978 continue;
4979
4980 new_cp = NULL;
4981
4982 if (data->consider_all_candidates)
4983 {
4984 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
4985 {
4986 if (ci == cand->id)
4987 continue;
4988
4989 cnd = iv_cand (data, ci);
4990
4991 cp = get_use_iv_cost (data, use, cnd);
4992 if (!cp)
4993 continue;
4994 if (!iv_ca_has_deps (ivs, cp))
4995 continue;
4996
4997 if (!cheaper_cost_pair (cp, new_cp))
4998 continue;
4999
5000 new_cp = cp;
5001 }
5002 }
5003 else
5004 {
5005 EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
5006 {
5007 if (ci == cand->id)
5008 continue;
5009
5010 cnd = iv_cand (data, ci);
5011
5012 cp = get_use_iv_cost (data, use, cnd);
5013 if (!cp)
5014 continue;
5015 if (!iv_ca_has_deps (ivs, cp))
5016 continue;
5017
5018 if (!cheaper_cost_pair (cp, new_cp))
5019 continue;
5020
5021 new_cp = cp;
5022 }
5023 }
5024
5025 if (!new_cp)
5026 {
5027 iv_ca_delta_free (delta);
5028 return infinite_cost;
5029 }
5030
5031 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5032 }
5033
5034 iv_ca_delta_commit (data, ivs, *delta, true);
5035 cost = iv_ca_cost (ivs);
5036 iv_ca_delta_commit (data, ivs, *delta, false);
5037
5038 return cost;
5039 }
5040
5041 /* Try optimizing the set of candidates IVS by removing candidates different
5042 from to EXCEPT_CAND from it. Return cost of the new set, and store
5043 differences in DELTA. */
5044
5045 static comp_cost
5046 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
5047 struct iv_cand *except_cand, struct iv_ca_delta **delta)
5048 {
5049 bitmap_iterator bi;
5050 struct iv_ca_delta *act_delta, *best_delta;
5051 unsigned i;
5052 comp_cost best_cost, acost;
5053 struct iv_cand *cand;
5054
5055 best_delta = NULL;
5056 best_cost = iv_ca_cost (ivs);
5057
5058 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5059 {
5060 cand = iv_cand (data, i);
5061
5062 if (cand == except_cand)
5063 continue;
5064
5065 acost = iv_ca_narrow (data, ivs, cand, &act_delta);
5066
5067 if (compare_costs (acost, best_cost) < 0)
5068 {
5069 best_cost = acost;
5070 iv_ca_delta_free (&best_delta);
5071 best_delta = act_delta;
5072 }
5073 else
5074 iv_ca_delta_free (&act_delta);
5075 }
5076
5077 if (!best_delta)
5078 {
5079 *delta = NULL;
5080 return best_cost;
5081 }
5082
5083 /* Recurse to possibly remove other unnecessary ivs. */
5084 iv_ca_delta_commit (data, ivs, best_delta, true);
5085 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
5086 iv_ca_delta_commit (data, ivs, best_delta, false);
5087 *delta = iv_ca_delta_join (best_delta, *delta);
5088 return best_cost;
5089 }
5090
5091 /* Tries to extend the sets IVS in the best possible way in order
5092 to express the USE. */
5093
5094 static bool
5095 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
5096 struct iv_use *use)
5097 {
5098 comp_cost best_cost, act_cost;
5099 unsigned i;
5100 bitmap_iterator bi;
5101 struct iv_cand *cand;
5102 struct iv_ca_delta *best_delta = NULL, *act_delta;
5103 struct cost_pair *cp;
5104
5105 iv_ca_add_use (data, ivs, use);
5106 best_cost = iv_ca_cost (ivs);
5107
5108 cp = iv_ca_cand_for_use (ivs, use);
5109 if (cp)
5110 {
5111 best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
5112 iv_ca_set_no_cp (data, ivs, use);
5113 }
5114
5115 /* First try important candidates not based on any memory object. Only if
5116 this fails, try the specific ones. Rationale -- in loops with many
5117 variables the best choice often is to use just one generic biv. If we
5118 added here many ivs specific to the uses, the optimization algorithm later
5119 would be likely to get stuck in a local minimum, thus causing us to create
5120 too many ivs. The approach from few ivs to more seems more likely to be
5121 successful -- starting from few ivs, replacing an expensive use by a
5122 specific iv should always be a win. */
5123 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5124 {
5125 cand = iv_cand (data, i);
5126
5127 if (cand->iv->base_object != NULL_TREE)
5128 continue;
5129
5130 if (iv_ca_cand_used_p (ivs, cand))
5131 continue;
5132
5133 cp = get_use_iv_cost (data, use, cand);
5134 if (!cp)
5135 continue;
5136
5137 iv_ca_set_cp (data, ivs, use, cp);
5138 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
5139 iv_ca_set_no_cp (data, ivs, use);
5140 act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
5141
5142 if (compare_costs (act_cost, best_cost) < 0)
5143 {
5144 best_cost = act_cost;
5145
5146 iv_ca_delta_free (&best_delta);
5147 best_delta = act_delta;
5148 }
5149 else
5150 iv_ca_delta_free (&act_delta);
5151 }
5152
5153 if (infinite_cost_p (best_cost))
5154 {
5155 for (i = 0; i < use->n_map_members; i++)
5156 {
5157 cp = use->cost_map + i;
5158 cand = cp->cand;
5159 if (!cand)
5160 continue;
5161
5162 /* Already tried this. */
5163 if (cand->important && cand->iv->base_object == NULL_TREE)
5164 continue;
5165
5166 if (iv_ca_cand_used_p (ivs, cand))
5167 continue;
5168
5169 act_delta = NULL;
5170 iv_ca_set_cp (data, ivs, use, cp);
5171 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
5172 iv_ca_set_no_cp (data, ivs, use);
5173 act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
5174 cp, act_delta);
5175
5176 if (compare_costs (act_cost, best_cost) < 0)
5177 {
5178 best_cost = act_cost;
5179
5180 if (best_delta)
5181 iv_ca_delta_free (&best_delta);
5182 best_delta = act_delta;
5183 }
5184 else
5185 iv_ca_delta_free (&act_delta);
5186 }
5187 }
5188
5189 iv_ca_delta_commit (data, ivs, best_delta, true);
5190 iv_ca_delta_free (&best_delta);
5191
5192 return !infinite_cost_p (best_cost);
5193 }
5194
5195 /* Finds an initial assignment of candidates to uses. */
5196
5197 static struct iv_ca *
5198 get_initial_solution (struct ivopts_data *data)
5199 {
5200 struct iv_ca *ivs = iv_ca_new (data);
5201 unsigned i;
5202
5203 for (i = 0; i < n_iv_uses (data); i++)
5204 if (!try_add_cand_for (data, ivs, iv_use (data, i)))
5205 {
5206 iv_ca_free (&ivs);
5207 return NULL;
5208 }
5209
5210 return ivs;
5211 }
5212
5213 /* Tries to improve set of induction variables IVS. */
5214
5215 static bool
5216 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
5217 {
5218 unsigned i, n_ivs;
5219 comp_cost acost, best_cost = iv_ca_cost (ivs);
5220 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
5221 struct iv_cand *cand;
5222
5223 /* Try extending the set of induction variables by one. */
5224 for (i = 0; i < n_iv_cands (data); i++)
5225 {
5226 cand = iv_cand (data, i);
5227
5228 if (iv_ca_cand_used_p (ivs, cand))
5229 continue;
5230
5231 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs);
5232 if (!act_delta)
5233 continue;
5234
5235 /* If we successfully added the candidate and the set is small enough,
5236 try optimizing it by removing other candidates. */
5237 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
5238 {
5239 iv_ca_delta_commit (data, ivs, act_delta, true);
5240 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
5241 iv_ca_delta_commit (data, ivs, act_delta, false);
5242 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
5243 }
5244
5245 if (compare_costs (acost, best_cost) < 0)
5246 {
5247 best_cost = acost;
5248 iv_ca_delta_free (&best_delta);
5249 best_delta = act_delta;
5250 }
5251 else
5252 iv_ca_delta_free (&act_delta);
5253 }
5254
5255 if (!best_delta)
5256 {
5257 /* Try removing the candidates from the set instead. */
5258 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
5259
5260 /* Nothing more we can do. */
5261 if (!best_delta)
5262 return false;
5263 }
5264
5265 iv_ca_delta_commit (data, ivs, best_delta, true);
5266 gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
5267 iv_ca_delta_free (&best_delta);
5268 return true;
5269 }
5270
5271 /* Attempts to find the optimal set of induction variables. We do simple
5272 greedy heuristic -- we try to replace at most one candidate in the selected
5273 solution and remove the unused ivs while this improves the cost. */
5274
5275 static struct iv_ca *
5276 find_optimal_iv_set (struct ivopts_data *data)
5277 {
5278 unsigned i;
5279 struct iv_ca *set;
5280 struct iv_use *use;
5281
5282 /* Get the initial solution. */
5283 set = get_initial_solution (data);
5284 if (!set)
5285 {
5286 if (dump_file && (dump_flags & TDF_DETAILS))
5287 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
5288 return NULL;
5289 }
5290
5291 if (dump_file && (dump_flags & TDF_DETAILS))
5292 {
5293 fprintf (dump_file, "Initial set of candidates:\n");
5294 iv_ca_dump (data, dump_file, set);
5295 }
5296
5297 while (try_improve_iv_set (data, set))
5298 {
5299 if (dump_file && (dump_flags & TDF_DETAILS))
5300 {
5301 fprintf (dump_file, "Improved to:\n");
5302 iv_ca_dump (data, dump_file, set);
5303 }
5304 }
5305
5306 if (dump_file && (dump_flags & TDF_DETAILS))
5307 {
5308 comp_cost cost = iv_ca_cost (set);
5309 fprintf (dump_file, "Final cost %d (complexity %d)\n\n", cost.cost, cost.complexity);
5310 }
5311
5312 for (i = 0; i < n_iv_uses (data); i++)
5313 {
5314 use = iv_use (data, i);
5315 use->selected = iv_ca_cand_for_use (set, use)->cand;
5316 }
5317
5318 return set;
5319 }
5320
5321 /* Creates a new induction variable corresponding to CAND. */
5322
5323 static void
5324 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
5325 {
5326 gimple_stmt_iterator incr_pos;
5327 tree base;
5328 bool after = false;
5329
5330 if (!cand->iv)
5331 return;
5332
5333 switch (cand->pos)
5334 {
5335 case IP_NORMAL:
5336 incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
5337 break;
5338
5339 case IP_END:
5340 incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
5341 after = true;
5342 break;
5343
5344 case IP_AFTER_USE:
5345 after = true;
5346 /* fall through */
5347 case IP_BEFORE_USE:
5348 incr_pos = gsi_for_stmt (cand->incremented_at);
5349 break;
5350
5351 case IP_ORIGINAL:
5352 /* Mark that the iv is preserved. */
5353 name_info (data, cand->var_before)->preserve_biv = true;
5354 name_info (data, cand->var_after)->preserve_biv = true;
5355
5356 /* Rewrite the increment so that it uses var_before directly. */
5357 find_interesting_uses_op (data, cand->var_after)->selected = cand;
5358
5359 return;
5360 }
5361
5362 gimple_add_tmp_var (cand->var_before);
5363 add_referenced_var (cand->var_before);
5364
5365 base = unshare_expr (cand->iv->base);
5366
5367 create_iv (base, unshare_expr (cand->iv->step),
5368 cand->var_before, data->current_loop,
5369 &incr_pos, after, &cand->var_before, &cand->var_after);
5370 }
5371
5372 /* Creates new induction variables described in SET. */
5373
5374 static void
5375 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
5376 {
5377 unsigned i;
5378 struct iv_cand *cand;
5379 bitmap_iterator bi;
5380
5381 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
5382 {
5383 cand = iv_cand (data, i);
5384 create_new_iv (data, cand);
5385 }
5386 }
5387
5388
5389 /* Rewrites USE (definition of iv used in a nonlinear expression)
5390 using candidate CAND. */
5391
5392 static void
5393 rewrite_use_nonlinear_expr (struct ivopts_data *data,
5394 struct iv_use *use, struct iv_cand *cand)
5395 {
5396 tree comp;
5397 tree op, tgt;
5398 gimple ass;
5399 gimple_stmt_iterator bsi;
5400
5401 /* An important special case -- if we are asked to express value of
5402 the original iv by itself, just exit; there is no need to
5403 introduce a new computation (that might also need casting the
5404 variable to unsigned and back). */
5405 if (cand->pos == IP_ORIGINAL
5406 && cand->incremented_at == use->stmt)
5407 {
5408 tree step, ctype, utype;
5409 enum tree_code incr_code = PLUS_EXPR, old_code;
5410
5411 gcc_assert (is_gimple_assign (use->stmt));
5412 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
5413
5414 step = cand->iv->step;
5415 ctype = TREE_TYPE (step);
5416 utype = TREE_TYPE (cand->var_after);
5417 if (TREE_CODE (step) == NEGATE_EXPR)
5418 {
5419 incr_code = MINUS_EXPR;
5420 step = TREE_OPERAND (step, 0);
5421 }
5422
5423 /* Check whether we may leave the computation unchanged.
5424 This is the case only if it does not rely on other
5425 computations in the loop -- otherwise, the computation
5426 we rely upon may be removed in remove_unused_ivs,
5427 thus leading to ICE. */
5428 old_code = gimple_assign_rhs_code (use->stmt);
5429 if (old_code == PLUS_EXPR
5430 || old_code == MINUS_EXPR
5431 || old_code == POINTER_PLUS_EXPR)
5432 {
5433 if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
5434 op = gimple_assign_rhs2 (use->stmt);
5435 else if (old_code != MINUS_EXPR
5436 && gimple_assign_rhs2 (use->stmt) == cand->var_before)
5437 op = gimple_assign_rhs1 (use->stmt);
5438 else
5439 op = NULL_TREE;
5440 }
5441 else
5442 op = NULL_TREE;
5443
5444 if (op
5445 && (TREE_CODE (op) == INTEGER_CST
5446 || operand_equal_p (op, step, 0)))
5447 return;
5448
5449 /* Otherwise, add the necessary computations to express
5450 the iv. */
5451 op = fold_convert (ctype, cand->var_before);
5452 comp = fold_convert (utype,
5453 build2 (incr_code, ctype, op,
5454 unshare_expr (step)));
5455 }
5456 else
5457 {
5458 comp = get_computation (data->current_loop, use, cand);
5459 gcc_assert (comp != NULL_TREE);
5460 }
5461
5462 switch (gimple_code (use->stmt))
5463 {
5464 case GIMPLE_PHI:
5465 tgt = PHI_RESULT (use->stmt);
5466
5467 /* If we should keep the biv, do not replace it. */
5468 if (name_info (data, tgt)->preserve_biv)
5469 return;
5470
5471 bsi = gsi_after_labels (gimple_bb (use->stmt));
5472 break;
5473
5474 case GIMPLE_ASSIGN:
5475 tgt = gimple_assign_lhs (use->stmt);
5476 bsi = gsi_for_stmt (use->stmt);
5477 break;
5478
5479 default:
5480 gcc_unreachable ();
5481 }
5482
5483 if (!valid_gimple_rhs_p (comp)
5484 || (gimple_code (use->stmt) != GIMPLE_PHI
5485 /* We can't allow re-allocating the stmt as it might be pointed
5486 to still. */
5487 && (get_gimple_rhs_num_ops (TREE_CODE (comp))
5488 >= gimple_num_ops (gsi_stmt (bsi)))))
5489 comp = force_gimple_operand_gsi (&bsi, comp, false, SSA_NAME_VAR (tgt),
5490 true, GSI_SAME_STMT);
5491
5492 if (gimple_code (use->stmt) == GIMPLE_PHI)
5493 {
5494 ass = gimple_build_assign (tgt, comp);
5495 gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
5496
5497 bsi = gsi_for_stmt (use->stmt);
5498 remove_phi_node (&bsi, false);
5499 }
5500 else
5501 {
5502 gimple_assign_set_rhs_from_tree (&bsi, comp);
5503 use->stmt = gsi_stmt (bsi);
5504 }
5505 }
5506
5507 /* Replaces ssa name in index IDX by its basic variable. Callback for
5508 for_each_index. */
5509
5510 static bool
5511 idx_remove_ssa_names (tree base, tree *idx,
5512 void *data ATTRIBUTE_UNUSED)
5513 {
5514 tree *op;
5515
5516 if (TREE_CODE (*idx) == SSA_NAME)
5517 *idx = SSA_NAME_VAR (*idx);
5518
5519 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
5520 {
5521 op = &TREE_OPERAND (base, 2);
5522 if (*op
5523 && TREE_CODE (*op) == SSA_NAME)
5524 *op = SSA_NAME_VAR (*op);
5525 op = &TREE_OPERAND (base, 3);
5526 if (*op
5527 && TREE_CODE (*op) == SSA_NAME)
5528 *op = SSA_NAME_VAR (*op);
5529 }
5530
5531 return true;
5532 }
5533
5534 /* Unshares REF and replaces ssa names inside it by their basic variables. */
5535
5536 static tree
5537 unshare_and_remove_ssa_names (tree ref)
5538 {
5539 ref = unshare_expr (ref);
5540 for_each_index (&ref, idx_remove_ssa_names, NULL);
5541
5542 return ref;
5543 }
5544
5545 /* Copies the reference information from OLD_REF to NEW_REF. */
5546
5547 static void
5548 copy_ref_info (tree new_ref, tree old_ref)
5549 {
5550 if (TREE_CODE (old_ref) == TARGET_MEM_REF)
5551 copy_mem_ref_info (new_ref, old_ref);
5552 else
5553 {
5554 TMR_ORIGINAL (new_ref) = unshare_and_remove_ssa_names (old_ref);
5555 TREE_SIDE_EFFECTS (new_ref) = TREE_SIDE_EFFECTS (old_ref);
5556 TREE_THIS_VOLATILE (new_ref) = TREE_THIS_VOLATILE (old_ref);
5557 /* We can transfer points-to information from an old pointer
5558 or decl base to the new one. */
5559 if (TMR_BASE (new_ref)
5560 && TREE_CODE (TMR_BASE (new_ref)) == SSA_NAME
5561 && POINTER_TYPE_P (TREE_TYPE (TMR_BASE (new_ref)))
5562 && !SSA_NAME_PTR_INFO (TMR_BASE (new_ref)))
5563 {
5564 tree base = get_base_address (old_ref);
5565 if ((INDIRECT_REF_P (base)
5566 || TREE_CODE (base) == MEM_REF)
5567 && TREE_CODE (TREE_OPERAND (base, 0)) == SSA_NAME)
5568 duplicate_ssa_name_ptr_info
5569 (TMR_BASE (new_ref), SSA_NAME_PTR_INFO (TREE_OPERAND (base, 0)));
5570 else if (TREE_CODE (base) == VAR_DECL
5571 || TREE_CODE (base) == PARM_DECL
5572 || TREE_CODE (base) == RESULT_DECL)
5573 {
5574 struct ptr_info_def *pi = get_ptr_info (TMR_BASE (new_ref));
5575 pt_solution_set_var (&pi->pt, base);
5576 }
5577 }
5578 }
5579 }
5580
5581 /* Rewrites USE (address that is an iv) using candidate CAND. */
5582
5583 static void
5584 rewrite_use_address (struct ivopts_data *data,
5585 struct iv_use *use, struct iv_cand *cand)
5586 {
5587 aff_tree aff;
5588 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
5589 tree base_hint = NULL_TREE;
5590 tree ref;
5591 bool ok;
5592
5593 ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
5594 gcc_assert (ok);
5595 unshare_aff_combination (&aff);
5596
5597 /* To avoid undefined overflow problems, all IV candidates use unsigned
5598 integer types. The drawback is that this makes it impossible for
5599 create_mem_ref to distinguish an IV that is based on a memory object
5600 from one that represents simply an offset.
5601
5602 To work around this problem, we pass a hint to create_mem_ref that
5603 indicates which variable (if any) in aff is an IV based on a memory
5604 object. Note that we only consider the candidate. If this is not
5605 based on an object, the base of the reference is in some subexpression
5606 of the use -- but these will use pointer types, so they are recognized
5607 by the create_mem_ref heuristics anyway. */
5608 if (cand->iv->base_object)
5609 base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
5610
5611 ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff, base_hint,
5612 data->speed);
5613 copy_ref_info (ref, *use->op_p);
5614 *use->op_p = ref;
5615 }
5616
5617 /* Rewrites USE (the condition such that one of the arguments is an iv) using
5618 candidate CAND. */
5619
5620 static void
5621 rewrite_use_compare (struct ivopts_data *data,
5622 struct iv_use *use, struct iv_cand *cand)
5623 {
5624 tree comp, *var_p, op, bound;
5625 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
5626 enum tree_code compare;
5627 struct cost_pair *cp = get_use_iv_cost (data, use, cand);
5628 bool ok;
5629
5630 bound = cp->value;
5631 if (bound)
5632 {
5633 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
5634 tree var_type = TREE_TYPE (var);
5635 gimple_seq stmts;
5636
5637 compare = iv_elimination_compare (data, use);
5638 bound = unshare_expr (fold_convert (var_type, bound));
5639 op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
5640 if (stmts)
5641 gsi_insert_seq_on_edge_immediate (
5642 loop_preheader_edge (data->current_loop),
5643 stmts);
5644
5645 gimple_cond_set_lhs (use->stmt, var);
5646 gimple_cond_set_code (use->stmt, compare);
5647 gimple_cond_set_rhs (use->stmt, op);
5648 return;
5649 }
5650
5651 /* The induction variable elimination failed; just express the original
5652 giv. */
5653 comp = get_computation (data->current_loop, use, cand);
5654 gcc_assert (comp != NULL_TREE);
5655
5656 ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
5657 gcc_assert (ok);
5658
5659 *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
5660 true, GSI_SAME_STMT);
5661 }
5662
5663 /* Rewrites USE using candidate CAND. */
5664
5665 static void
5666 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
5667 {
5668 switch (use->type)
5669 {
5670 case USE_NONLINEAR_EXPR:
5671 rewrite_use_nonlinear_expr (data, use, cand);
5672 break;
5673
5674 case USE_ADDRESS:
5675 rewrite_use_address (data, use, cand);
5676 break;
5677
5678 case USE_COMPARE:
5679 rewrite_use_compare (data, use, cand);
5680 break;
5681
5682 default:
5683 gcc_unreachable ();
5684 }
5685
5686 update_stmt (use->stmt);
5687 }
5688
5689 /* Rewrite the uses using the selected induction variables. */
5690
5691 static void
5692 rewrite_uses (struct ivopts_data *data)
5693 {
5694 unsigned i;
5695 struct iv_cand *cand;
5696 struct iv_use *use;
5697
5698 for (i = 0; i < n_iv_uses (data); i++)
5699 {
5700 use = iv_use (data, i);
5701 cand = use->selected;
5702 gcc_assert (cand);
5703
5704 rewrite_use (data, use, cand);
5705 }
5706 }
5707
5708 /* Removes the ivs that are not used after rewriting. */
5709
5710 static void
5711 remove_unused_ivs (struct ivopts_data *data)
5712 {
5713 unsigned j;
5714 bitmap_iterator bi;
5715 bitmap toremove = BITMAP_ALLOC (NULL);
5716
5717 /* Figure out an order in which to release SSA DEFs so that we don't
5718 release something that we'd have to propagate into a debug stmt
5719 afterwards. */
5720 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5721 {
5722 struct version_info *info;
5723
5724 info = ver_info (data, j);
5725 if (info->iv
5726 && !integer_zerop (info->iv->step)
5727 && !info->inv_id
5728 && !info->iv->have_use_for
5729 && !info->preserve_biv)
5730 bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
5731 }
5732
5733 release_defs_bitset (toremove);
5734
5735 BITMAP_FREE (toremove);
5736 }
5737
5738 /* Frees data allocated by the optimization of a single loop. */
5739
5740 static void
5741 free_loop_data (struct ivopts_data *data)
5742 {
5743 unsigned i, j;
5744 bitmap_iterator bi;
5745 tree obj;
5746
5747 if (data->niters)
5748 {
5749 pointer_map_destroy (data->niters);
5750 data->niters = NULL;
5751 }
5752
5753 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5754 {
5755 struct version_info *info;
5756
5757 info = ver_info (data, i);
5758 if (info->iv)
5759 free (info->iv);
5760 info->iv = NULL;
5761 info->has_nonlin_use = false;
5762 info->preserve_biv = false;
5763 info->inv_id = 0;
5764 }
5765 bitmap_clear (data->relevant);
5766 bitmap_clear (data->important_candidates);
5767
5768 for (i = 0; i < n_iv_uses (data); i++)
5769 {
5770 struct iv_use *use = iv_use (data, i);
5771
5772 free (use->iv);
5773 BITMAP_FREE (use->related_cands);
5774 for (j = 0; j < use->n_map_members; j++)
5775 if (use->cost_map[j].depends_on)
5776 BITMAP_FREE (use->cost_map[j].depends_on);
5777 free (use->cost_map);
5778 free (use);
5779 }
5780 VEC_truncate (iv_use_p, data->iv_uses, 0);
5781
5782 for (i = 0; i < n_iv_cands (data); i++)
5783 {
5784 struct iv_cand *cand = iv_cand (data, i);
5785
5786 if (cand->iv)
5787 free (cand->iv);
5788 if (cand->depends_on)
5789 BITMAP_FREE (cand->depends_on);
5790 free (cand);
5791 }
5792 VEC_truncate (iv_cand_p, data->iv_candidates, 0);
5793
5794 if (data->version_info_size < num_ssa_names)
5795 {
5796 data->version_info_size = 2 * num_ssa_names;
5797 free (data->version_info);
5798 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
5799 }
5800
5801 data->max_inv_id = 0;
5802
5803 for (i = 0; VEC_iterate (tree, decl_rtl_to_reset, i, obj); i++)
5804 SET_DECL_RTL (obj, NULL_RTX);
5805
5806 VEC_truncate (tree, decl_rtl_to_reset, 0);
5807 }
5808
5809 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
5810 loop tree. */
5811
5812 static void
5813 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
5814 {
5815 free_loop_data (data);
5816 free (data->version_info);
5817 BITMAP_FREE (data->relevant);
5818 BITMAP_FREE (data->important_candidates);
5819
5820 VEC_free (tree, heap, decl_rtl_to_reset);
5821 VEC_free (iv_use_p, heap, data->iv_uses);
5822 VEC_free (iv_cand_p, heap, data->iv_candidates);
5823 }
5824
5825 /* Optimizes the LOOP. Returns true if anything changed. */
5826
5827 static bool
5828 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
5829 {
5830 bool changed = false;
5831 struct iv_ca *iv_ca;
5832 edge exit;
5833 basic_block *body;
5834
5835 gcc_assert (!data->niters);
5836 data->current_loop = loop;
5837 data->speed = optimize_loop_for_speed_p (loop);
5838
5839 if (dump_file && (dump_flags & TDF_DETAILS))
5840 {
5841 fprintf (dump_file, "Processing loop %d\n", loop->num);
5842
5843 exit = single_dom_exit (loop);
5844 if (exit)
5845 {
5846 fprintf (dump_file, " single exit %d -> %d, exit condition ",
5847 exit->src->index, exit->dest->index);
5848 print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
5849 fprintf (dump_file, "\n");
5850 }
5851
5852 fprintf (dump_file, "\n");
5853 }
5854
5855 body = get_loop_body (loop);
5856 renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
5857 free (body);
5858
5859 /* For each ssa name determines whether it behaves as an induction variable
5860 in some loop. */
5861 if (!find_induction_variables (data))
5862 goto finish;
5863
5864 /* Finds interesting uses (item 1). */
5865 find_interesting_uses (data);
5866 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
5867 goto finish;
5868
5869 /* Finds candidates for the induction variables (item 2). */
5870 find_iv_candidates (data);
5871
5872 /* Calculates the costs (item 3, part 1). */
5873 determine_iv_costs (data);
5874 determine_use_iv_costs (data);
5875 determine_set_costs (data);
5876
5877 /* Find the optimal set of induction variables (item 3, part 2). */
5878 iv_ca = find_optimal_iv_set (data);
5879 if (!iv_ca)
5880 goto finish;
5881 changed = true;
5882
5883 /* Create the new induction variables (item 4, part 1). */
5884 create_new_ivs (data, iv_ca);
5885 iv_ca_free (&iv_ca);
5886
5887 /* Rewrite the uses (item 4, part 2). */
5888 rewrite_uses (data);
5889
5890 /* Remove the ivs that are unused after rewriting. */
5891 remove_unused_ivs (data);
5892
5893 /* We have changed the structure of induction variables; it might happen
5894 that definitions in the scev database refer to some of them that were
5895 eliminated. */
5896 scev_reset ();
5897
5898 finish:
5899 free_loop_data (data);
5900
5901 return changed;
5902 }
5903
5904 /* Main entry point. Optimizes induction variables in loops. */
5905
5906 void
5907 tree_ssa_iv_optimize (void)
5908 {
5909 struct loop *loop;
5910 struct ivopts_data data;
5911 loop_iterator li;
5912
5913 tree_ssa_iv_optimize_init (&data);
5914
5915 /* Optimize the loops starting with the innermost ones. */
5916 FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
5917 {
5918 if (dump_file && (dump_flags & TDF_DETAILS))
5919 flow_loop_dump (loop, dump_file, NULL, 1);
5920
5921 tree_ssa_iv_optimize_loop (&data, loop);
5922 }
5923
5924 tree_ssa_iv_optimize_finalize (&data);
5925 }