]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/tree-ssa-loop-ivopts.c
This patch rewrites the old VEC macro-based interface into a new one
[thirdparty/gcc.git] / gcc / tree-ssa-loop-ivopts.c
1 /* Induction variable optimizations.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012
3 Free Software Foundation, Inc.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 3, or (at your option) any
10 later version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 /* This pass tries to find the optimal set of induction variables for the loop.
22 It optimizes just the basic linear induction variables (although adding
23 support for other types should not be too hard). It includes the
24 optimizations commonly known as strength reduction, induction variable
25 coalescing and induction variable elimination. It does it in the
26 following steps:
27
28 1) The interesting uses of induction variables are found. This includes
29
30 -- uses of induction variables in non-linear expressions
31 -- addresses of arrays
32 -- comparisons of induction variables
33
34 2) Candidates for the induction variables are found. This includes
35
36 -- old induction variables
37 -- the variables defined by expressions derived from the "interesting
38 uses" above
39
40 3) The optimal (w.r. to a cost function) set of variables is chosen. The
41 cost function assigns a cost to sets of induction variables and consists
42 of three parts:
43
44 -- The use costs. Each of the interesting uses chooses the best induction
45 variable in the set and adds its cost to the sum. The cost reflects
46 the time spent on modifying the induction variables value to be usable
47 for the given purpose (adding base and offset for arrays, etc.).
48 -- The variable costs. Each of the variables has a cost assigned that
49 reflects the costs associated with incrementing the value of the
50 variable. The original variables are somewhat preferred.
51 -- The set cost. Depending on the size of the set, extra cost may be
52 added to reflect register pressure.
53
54 All the costs are defined in a machine-specific way, using the target
55 hooks and machine descriptions to determine them.
56
57 4) The trees are transformed to use the new variables, the dead code is
58 removed.
59
60 All of this is done loop by loop. Doing it globally is theoretically
61 possible, it might give a better performance and it might enable us
62 to decide costs more precisely, but getting all the interactions right
63 would be complicated. */
64
65 #include "config.h"
66 #include "system.h"
67 #include "coretypes.h"
68 #include "tm.h"
69 #include "tree.h"
70 #include "tm_p.h"
71 #include "basic-block.h"
72 #include "gimple-pretty-print.h"
73 #include "tree-flow.h"
74 #include "cfgloop.h"
75 #include "tree-pass.h"
76 #include "ggc.h"
77 #include "insn-config.h"
78 #include "pointer-set.h"
79 #include "hashtab.h"
80 #include "tree-chrec.h"
81 #include "tree-scalar-evolution.h"
82 #include "cfgloop.h"
83 #include "params.h"
84 #include "langhooks.h"
85 #include "tree-affine.h"
86 #include "target.h"
87 #include "tree-inline.h"
88 #include "tree-ssa-propagate.h"
89 #include "expmed.h"
90
91 /* FIXME: Expressions are expanded to RTL in this pass to determine the
92 cost of different addressing modes. This should be moved to a TBD
93 interface between the GIMPLE and RTL worlds. */
94 #include "expr.h"
95 #include "recog.h"
96
97 /* The infinite cost. */
98 #define INFTY 10000000
99
100 #define AVG_LOOP_NITER(LOOP) 5
101
102 /* Returns the expected number of loop iterations for LOOP.
103 The average trip count is computed from profile data if it
104 exists. */
105
106 static inline HOST_WIDE_INT
107 avg_loop_niter (struct loop *loop)
108 {
109 HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
110 if (niter == -1)
111 return AVG_LOOP_NITER (loop);
112
113 return niter;
114 }
115
116 /* Representation of the induction variable. */
117 struct iv
118 {
119 tree base; /* Initial value of the iv. */
120 tree base_object; /* A memory object to that the induction variable points. */
121 tree step; /* Step of the iv (constant only). */
122 tree ssa_name; /* The ssa name with the value. */
123 bool biv_p; /* Is it a biv? */
124 bool have_use_for; /* Do we already have a use for it? */
125 unsigned use_id; /* The identifier in the use if it is the case. */
126 };
127
128 /* Per-ssa version information (induction variable descriptions, etc.). */
129 struct version_info
130 {
131 tree name; /* The ssa name. */
132 struct iv *iv; /* Induction variable description. */
133 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
134 an expression that is not an induction variable. */
135 bool preserve_biv; /* For the original biv, whether to preserve it. */
136 unsigned inv_id; /* Id of an invariant. */
137 };
138
139 /* Types of uses. */
140 enum use_type
141 {
142 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
143 USE_ADDRESS, /* Use in an address. */
144 USE_COMPARE /* Use is a compare. */
145 };
146
147 /* Cost of a computation. */
148 typedef struct
149 {
150 int cost; /* The runtime cost. */
151 unsigned complexity; /* The estimate of the complexity of the code for
152 the computation (in no concrete units --
153 complexity field should be larger for more
154 complex expressions and addressing modes). */
155 } comp_cost;
156
157 static const comp_cost no_cost = {0, 0};
158 static const comp_cost infinite_cost = {INFTY, INFTY};
159
160 /* The candidate - cost pair. */
161 struct cost_pair
162 {
163 struct iv_cand *cand; /* The candidate. */
164 comp_cost cost; /* The cost. */
165 bitmap depends_on; /* The list of invariants that have to be
166 preserved. */
167 tree value; /* For final value elimination, the expression for
168 the final value of the iv. For iv elimination,
169 the new bound to compare with. */
170 enum tree_code comp; /* For iv elimination, the comparison. */
171 int inv_expr_id; /* Loop invariant expression id. */
172 };
173
174 /* Use. */
175 struct iv_use
176 {
177 unsigned id; /* The id of the use. */
178 enum use_type type; /* Type of the use. */
179 struct iv *iv; /* The induction variable it is based on. */
180 gimple stmt; /* Statement in that it occurs. */
181 tree *op_p; /* The place where it occurs. */
182 bitmap related_cands; /* The set of "related" iv candidates, plus the common
183 important ones. */
184
185 unsigned n_map_members; /* Number of candidates in the cost_map list. */
186 struct cost_pair *cost_map;
187 /* The costs wrto the iv candidates. */
188
189 struct iv_cand *selected;
190 /* The selected candidate. */
191 };
192
193 /* The position where the iv is computed. */
194 enum iv_position
195 {
196 IP_NORMAL, /* At the end, just before the exit condition. */
197 IP_END, /* At the end of the latch block. */
198 IP_BEFORE_USE, /* Immediately before a specific use. */
199 IP_AFTER_USE, /* Immediately after a specific use. */
200 IP_ORIGINAL /* The original biv. */
201 };
202
203 /* The induction variable candidate. */
204 struct iv_cand
205 {
206 unsigned id; /* The number of the candidate. */
207 bool important; /* Whether this is an "important" candidate, i.e. such
208 that it should be considered by all uses. */
209 ENUM_BITFIELD(iv_position) pos : 8; /* Where it is computed. */
210 gimple incremented_at;/* For original biv, the statement where it is
211 incremented. */
212 tree var_before; /* The variable used for it before increment. */
213 tree var_after; /* The variable used for it after increment. */
214 struct iv *iv; /* The value of the candidate. NULL for
215 "pseudocandidate" used to indicate the possibility
216 to replace the final value of an iv by direct
217 computation of the value. */
218 unsigned cost; /* Cost of the candidate. */
219 unsigned cost_step; /* Cost of the candidate's increment operation. */
220 struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
221 where it is incremented. */
222 bitmap depends_on; /* The list of invariants that are used in step of the
223 biv. */
224 };
225
226 /* Loop invariant expression hashtable entry. */
227 struct iv_inv_expr_ent
228 {
229 tree expr;
230 int id;
231 hashval_t hash;
232 };
233
234 /* The data used by the induction variable optimizations. */
235
236 typedef struct iv_use *iv_use_p;
237
238 typedef struct iv_cand *iv_cand_p;
239
240 struct ivopts_data
241 {
242 /* The currently optimized loop. */
243 struct loop *current_loop;
244
245 /* Numbers of iterations for all exits of the current loop. */
246 struct pointer_map_t *niters;
247
248 /* Number of registers used in it. */
249 unsigned regs_used;
250
251 /* The size of version_info array allocated. */
252 unsigned version_info_size;
253
254 /* The array of information for the ssa names. */
255 struct version_info *version_info;
256
257 /* The hashtable of loop invariant expressions created
258 by ivopt. */
259 htab_t inv_expr_tab;
260
261 /* Loop invariant expression id. */
262 int inv_expr_id;
263
264 /* The bitmap of indices in version_info whose value was changed. */
265 bitmap relevant;
266
267 /* The uses of induction variables. */
268 vec<iv_use_p> iv_uses;
269
270 /* The candidates. */
271 vec<iv_cand_p> iv_candidates;
272
273 /* A bitmap of important candidates. */
274 bitmap important_candidates;
275
276 /* The maximum invariant id. */
277 unsigned max_inv_id;
278
279 /* Whether to consider just related and important candidates when replacing a
280 use. */
281 bool consider_all_candidates;
282
283 /* Are we optimizing for speed? */
284 bool speed;
285
286 /* Whether the loop body includes any function calls. */
287 bool body_includes_call;
288
289 /* Whether the loop body can only be exited via single exit. */
290 bool loop_single_exit_p;
291 };
292
293 /* An assignment of iv candidates to uses. */
294
295 struct iv_ca
296 {
297 /* The number of uses covered by the assignment. */
298 unsigned upto;
299
300 /* Number of uses that cannot be expressed by the candidates in the set. */
301 unsigned bad_uses;
302
303 /* Candidate assigned to a use, together with the related costs. */
304 struct cost_pair **cand_for_use;
305
306 /* Number of times each candidate is used. */
307 unsigned *n_cand_uses;
308
309 /* The candidates used. */
310 bitmap cands;
311
312 /* The number of candidates in the set. */
313 unsigned n_cands;
314
315 /* Total number of registers needed. */
316 unsigned n_regs;
317
318 /* Total cost of expressing uses. */
319 comp_cost cand_use_cost;
320
321 /* Total cost of candidates. */
322 unsigned cand_cost;
323
324 /* Number of times each invariant is used. */
325 unsigned *n_invariant_uses;
326
327 /* The array holding the number of uses of each loop
328 invariant expressions created by ivopt. */
329 unsigned *used_inv_expr;
330
331 /* The number of created loop invariants. */
332 unsigned num_used_inv_expr;
333
334 /* Total cost of the assignment. */
335 comp_cost cost;
336 };
337
338 /* Difference of two iv candidate assignments. */
339
340 struct iv_ca_delta
341 {
342 /* Changed use. */
343 struct iv_use *use;
344
345 /* An old assignment (for rollback purposes). */
346 struct cost_pair *old_cp;
347
348 /* A new assignment. */
349 struct cost_pair *new_cp;
350
351 /* Next change in the list. */
352 struct iv_ca_delta *next_change;
353 };
354
355 /* Bound on number of candidates below that all candidates are considered. */
356
357 #define CONSIDER_ALL_CANDIDATES_BOUND \
358 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
359
360 /* If there are more iv occurrences, we just give up (it is quite unlikely that
361 optimizing such a loop would help, and it would take ages). */
362
363 #define MAX_CONSIDERED_USES \
364 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
365
366 /* If there are at most this number of ivs in the set, try removing unnecessary
367 ivs from the set always. */
368
369 #define ALWAYS_PRUNE_CAND_SET_BOUND \
370 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
371
372 /* The list of trees for that the decl_rtl field must be reset is stored
373 here. */
374
375 static vec<tree> decl_rtl_to_reset;
376
377 static comp_cost force_expr_to_var_cost (tree, bool);
378
379 /* Number of uses recorded in DATA. */
380
381 static inline unsigned
382 n_iv_uses (struct ivopts_data *data)
383 {
384 return data->iv_uses.length ();
385 }
386
387 /* Ith use recorded in DATA. */
388
389 static inline struct iv_use *
390 iv_use (struct ivopts_data *data, unsigned i)
391 {
392 return data->iv_uses[i];
393 }
394
395 /* Number of candidates recorded in DATA. */
396
397 static inline unsigned
398 n_iv_cands (struct ivopts_data *data)
399 {
400 return data->iv_candidates.length ();
401 }
402
403 /* Ith candidate recorded in DATA. */
404
405 static inline struct iv_cand *
406 iv_cand (struct ivopts_data *data, unsigned i)
407 {
408 return data->iv_candidates[i];
409 }
410
411 /* The single loop exit if it dominates the latch, NULL otherwise. */
412
413 edge
414 single_dom_exit (struct loop *loop)
415 {
416 edge exit = single_exit (loop);
417
418 if (!exit)
419 return NULL;
420
421 if (!just_once_each_iteration_p (loop, exit->src))
422 return NULL;
423
424 return exit;
425 }
426
427 /* Dumps information about the induction variable IV to FILE. */
428
429 extern void dump_iv (FILE *, struct iv *);
430 void
431 dump_iv (FILE *file, struct iv *iv)
432 {
433 if (iv->ssa_name)
434 {
435 fprintf (file, "ssa name ");
436 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
437 fprintf (file, "\n");
438 }
439
440 fprintf (file, " type ");
441 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
442 fprintf (file, "\n");
443
444 if (iv->step)
445 {
446 fprintf (file, " base ");
447 print_generic_expr (file, iv->base, TDF_SLIM);
448 fprintf (file, "\n");
449
450 fprintf (file, " step ");
451 print_generic_expr (file, iv->step, TDF_SLIM);
452 fprintf (file, "\n");
453 }
454 else
455 {
456 fprintf (file, " invariant ");
457 print_generic_expr (file, iv->base, TDF_SLIM);
458 fprintf (file, "\n");
459 }
460
461 if (iv->base_object)
462 {
463 fprintf (file, " base object ");
464 print_generic_expr (file, iv->base_object, TDF_SLIM);
465 fprintf (file, "\n");
466 }
467
468 if (iv->biv_p)
469 fprintf (file, " is a biv\n");
470 }
471
472 /* Dumps information about the USE to FILE. */
473
474 extern void dump_use (FILE *, struct iv_use *);
475 void
476 dump_use (FILE *file, struct iv_use *use)
477 {
478 fprintf (file, "use %d\n", use->id);
479
480 switch (use->type)
481 {
482 case USE_NONLINEAR_EXPR:
483 fprintf (file, " generic\n");
484 break;
485
486 case USE_ADDRESS:
487 fprintf (file, " address\n");
488 break;
489
490 case USE_COMPARE:
491 fprintf (file, " compare\n");
492 break;
493
494 default:
495 gcc_unreachable ();
496 }
497
498 fprintf (file, " in statement ");
499 print_gimple_stmt (file, use->stmt, 0, 0);
500 fprintf (file, "\n");
501
502 fprintf (file, " at position ");
503 if (use->op_p)
504 print_generic_expr (file, *use->op_p, TDF_SLIM);
505 fprintf (file, "\n");
506
507 dump_iv (file, use->iv);
508
509 if (use->related_cands)
510 {
511 fprintf (file, " related candidates ");
512 dump_bitmap (file, use->related_cands);
513 }
514 }
515
516 /* Dumps information about the uses to FILE. */
517
518 extern void dump_uses (FILE *, struct ivopts_data *);
519 void
520 dump_uses (FILE *file, struct ivopts_data *data)
521 {
522 unsigned i;
523 struct iv_use *use;
524
525 for (i = 0; i < n_iv_uses (data); i++)
526 {
527 use = iv_use (data, i);
528
529 dump_use (file, use);
530 fprintf (file, "\n");
531 }
532 }
533
534 /* Dumps information about induction variable candidate CAND to FILE. */
535
536 extern void dump_cand (FILE *, struct iv_cand *);
537 void
538 dump_cand (FILE *file, struct iv_cand *cand)
539 {
540 struct iv *iv = cand->iv;
541
542 fprintf (file, "candidate %d%s\n",
543 cand->id, cand->important ? " (important)" : "");
544
545 if (cand->depends_on)
546 {
547 fprintf (file, " depends on ");
548 dump_bitmap (file, cand->depends_on);
549 }
550
551 if (!iv)
552 {
553 fprintf (file, " final value replacement\n");
554 return;
555 }
556
557 if (cand->var_before)
558 {
559 fprintf (file, " var_before ");
560 print_generic_expr (file, cand->var_before, TDF_SLIM);
561 fprintf (file, "\n");
562 }
563 if (cand->var_after)
564 {
565 fprintf (file, " var_after ");
566 print_generic_expr (file, cand->var_after, TDF_SLIM);
567 fprintf (file, "\n");
568 }
569
570 switch (cand->pos)
571 {
572 case IP_NORMAL:
573 fprintf (file, " incremented before exit test\n");
574 break;
575
576 case IP_BEFORE_USE:
577 fprintf (file, " incremented before use %d\n", cand->ainc_use->id);
578 break;
579
580 case IP_AFTER_USE:
581 fprintf (file, " incremented after use %d\n", cand->ainc_use->id);
582 break;
583
584 case IP_END:
585 fprintf (file, " incremented at end\n");
586 break;
587
588 case IP_ORIGINAL:
589 fprintf (file, " original biv\n");
590 break;
591 }
592
593 dump_iv (file, iv);
594 }
595
596 /* Returns the info for ssa version VER. */
597
598 static inline struct version_info *
599 ver_info (struct ivopts_data *data, unsigned ver)
600 {
601 return data->version_info + ver;
602 }
603
604 /* Returns the info for ssa name NAME. */
605
606 static inline struct version_info *
607 name_info (struct ivopts_data *data, tree name)
608 {
609 return ver_info (data, SSA_NAME_VERSION (name));
610 }
611
612 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
613 emitted in LOOP. */
614
615 static bool
616 stmt_after_ip_normal_pos (struct loop *loop, gimple stmt)
617 {
618 basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
619
620 gcc_assert (bb);
621
622 if (sbb == loop->latch)
623 return true;
624
625 if (sbb != bb)
626 return false;
627
628 return stmt == last_stmt (bb);
629 }
630
631 /* Returns true if STMT if after the place where the original induction
632 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
633 if the positions are identical. */
634
635 static bool
636 stmt_after_inc_pos (struct iv_cand *cand, gimple stmt, bool true_if_equal)
637 {
638 basic_block cand_bb = gimple_bb (cand->incremented_at);
639 basic_block stmt_bb = gimple_bb (stmt);
640
641 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
642 return false;
643
644 if (stmt_bb != cand_bb)
645 return true;
646
647 if (true_if_equal
648 && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
649 return true;
650 return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
651 }
652
653 /* Returns true if STMT if after the place where the induction variable
654 CAND is incremented in LOOP. */
655
656 static bool
657 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple stmt)
658 {
659 switch (cand->pos)
660 {
661 case IP_END:
662 return false;
663
664 case IP_NORMAL:
665 return stmt_after_ip_normal_pos (loop, stmt);
666
667 case IP_ORIGINAL:
668 case IP_AFTER_USE:
669 return stmt_after_inc_pos (cand, stmt, false);
670
671 case IP_BEFORE_USE:
672 return stmt_after_inc_pos (cand, stmt, true);
673
674 default:
675 gcc_unreachable ();
676 }
677 }
678
679 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
680
681 static bool
682 abnormal_ssa_name_p (tree exp)
683 {
684 if (!exp)
685 return false;
686
687 if (TREE_CODE (exp) != SSA_NAME)
688 return false;
689
690 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
691 }
692
693 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
694 abnormal phi node. Callback for for_each_index. */
695
696 static bool
697 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
698 void *data ATTRIBUTE_UNUSED)
699 {
700 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
701 {
702 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
703 return false;
704 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
705 return false;
706 }
707
708 return !abnormal_ssa_name_p (*index);
709 }
710
711 /* Returns true if EXPR contains a ssa name that occurs in an
712 abnormal phi node. */
713
714 bool
715 contains_abnormal_ssa_name_p (tree expr)
716 {
717 enum tree_code code;
718 enum tree_code_class codeclass;
719
720 if (!expr)
721 return false;
722
723 code = TREE_CODE (expr);
724 codeclass = TREE_CODE_CLASS (code);
725
726 if (code == SSA_NAME)
727 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
728
729 if (code == INTEGER_CST
730 || is_gimple_min_invariant (expr))
731 return false;
732
733 if (code == ADDR_EXPR)
734 return !for_each_index (&TREE_OPERAND (expr, 0),
735 idx_contains_abnormal_ssa_name_p,
736 NULL);
737
738 if (code == COND_EXPR)
739 return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
740 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
741 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
742
743 switch (codeclass)
744 {
745 case tcc_binary:
746 case tcc_comparison:
747 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
748 return true;
749
750 /* Fallthru. */
751 case tcc_unary:
752 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
753 return true;
754
755 break;
756
757 default:
758 gcc_unreachable ();
759 }
760
761 return false;
762 }
763
764 /* Returns the structure describing number of iterations determined from
765 EXIT of DATA->current_loop, or NULL if something goes wrong. */
766
767 static struct tree_niter_desc *
768 niter_for_exit (struct ivopts_data *data, edge exit)
769 {
770 struct tree_niter_desc *desc;
771 void **slot;
772
773 if (!data->niters)
774 {
775 data->niters = pointer_map_create ();
776 slot = NULL;
777 }
778 else
779 slot = pointer_map_contains (data->niters, exit);
780
781 if (!slot)
782 {
783 /* Try to determine number of iterations. We cannot safely work with ssa
784 names that appear in phi nodes on abnormal edges, so that we do not
785 create overlapping life ranges for them (PR 27283). */
786 desc = XNEW (struct tree_niter_desc);
787 if (!number_of_iterations_exit (data->current_loop,
788 exit, desc, true)
789 || contains_abnormal_ssa_name_p (desc->niter))
790 {
791 XDELETE (desc);
792 desc = NULL;
793 }
794 slot = pointer_map_insert (data->niters, exit);
795 *slot = desc;
796 }
797 else
798 desc = (struct tree_niter_desc *) *slot;
799
800 return desc;
801 }
802
803 /* Returns the structure describing number of iterations determined from
804 single dominating exit of DATA->current_loop, or NULL if something
805 goes wrong. */
806
807 static struct tree_niter_desc *
808 niter_for_single_dom_exit (struct ivopts_data *data)
809 {
810 edge exit = single_dom_exit (data->current_loop);
811
812 if (!exit)
813 return NULL;
814
815 return niter_for_exit (data, exit);
816 }
817
818 /* Hash table equality function for expressions. */
819
820 static int
821 htab_inv_expr_eq (const void *ent1, const void *ent2)
822 {
823 const struct iv_inv_expr_ent *expr1 =
824 (const struct iv_inv_expr_ent *)ent1;
825 const struct iv_inv_expr_ent *expr2 =
826 (const struct iv_inv_expr_ent *)ent2;
827
828 return expr1->hash == expr2->hash
829 && operand_equal_p (expr1->expr, expr2->expr, 0);
830 }
831
832 /* Hash function for loop invariant expressions. */
833
834 static hashval_t
835 htab_inv_expr_hash (const void *ent)
836 {
837 const struct iv_inv_expr_ent *expr =
838 (const struct iv_inv_expr_ent *)ent;
839 return expr->hash;
840 }
841
842 /* Initializes data structures used by the iv optimization pass, stored
843 in DATA. */
844
845 static void
846 tree_ssa_iv_optimize_init (struct ivopts_data *data)
847 {
848 data->version_info_size = 2 * num_ssa_names;
849 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
850 data->relevant = BITMAP_ALLOC (NULL);
851 data->important_candidates = BITMAP_ALLOC (NULL);
852 data->max_inv_id = 0;
853 data->niters = NULL;
854 data->iv_uses.create (20);
855 data->iv_candidates.create (20);
856 data->inv_expr_tab = htab_create (10, htab_inv_expr_hash,
857 htab_inv_expr_eq, free);
858 data->inv_expr_id = 0;
859 decl_rtl_to_reset.create (20);
860 }
861
862 /* Returns a memory object to that EXPR points. In case we are able to
863 determine that it does not point to any such object, NULL is returned. */
864
865 static tree
866 determine_base_object (tree expr)
867 {
868 enum tree_code code = TREE_CODE (expr);
869 tree base, obj;
870
871 /* If this is a pointer casted to any type, we need to determine
872 the base object for the pointer; so handle conversions before
873 throwing away non-pointer expressions. */
874 if (CONVERT_EXPR_P (expr))
875 return determine_base_object (TREE_OPERAND (expr, 0));
876
877 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
878 return NULL_TREE;
879
880 switch (code)
881 {
882 case INTEGER_CST:
883 return NULL_TREE;
884
885 case ADDR_EXPR:
886 obj = TREE_OPERAND (expr, 0);
887 base = get_base_address (obj);
888
889 if (!base)
890 return expr;
891
892 if (TREE_CODE (base) == MEM_REF)
893 return determine_base_object (TREE_OPERAND (base, 0));
894
895 return fold_convert (ptr_type_node,
896 build_fold_addr_expr (base));
897
898 case POINTER_PLUS_EXPR:
899 return determine_base_object (TREE_OPERAND (expr, 0));
900
901 case PLUS_EXPR:
902 case MINUS_EXPR:
903 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
904 gcc_unreachable ();
905
906 default:
907 return fold_convert (ptr_type_node, expr);
908 }
909 }
910
911 /* Allocates an induction variable with given initial value BASE and step STEP
912 for loop LOOP. */
913
914 static struct iv *
915 alloc_iv (tree base, tree step)
916 {
917 struct iv *iv = XCNEW (struct iv);
918 gcc_assert (step != NULL_TREE);
919
920 iv->base = base;
921 iv->base_object = determine_base_object (base);
922 iv->step = step;
923 iv->biv_p = false;
924 iv->have_use_for = false;
925 iv->use_id = 0;
926 iv->ssa_name = NULL_TREE;
927
928 return iv;
929 }
930
931 /* Sets STEP and BASE for induction variable IV. */
932
933 static void
934 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
935 {
936 struct version_info *info = name_info (data, iv);
937
938 gcc_assert (!info->iv);
939
940 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
941 info->iv = alloc_iv (base, step);
942 info->iv->ssa_name = iv;
943 }
944
945 /* Finds induction variable declaration for VAR. */
946
947 static struct iv *
948 get_iv (struct ivopts_data *data, tree var)
949 {
950 basic_block bb;
951 tree type = TREE_TYPE (var);
952
953 if (!POINTER_TYPE_P (type)
954 && !INTEGRAL_TYPE_P (type))
955 return NULL;
956
957 if (!name_info (data, var)->iv)
958 {
959 bb = gimple_bb (SSA_NAME_DEF_STMT (var));
960
961 if (!bb
962 || !flow_bb_inside_loop_p (data->current_loop, bb))
963 set_iv (data, var, var, build_int_cst (type, 0));
964 }
965
966 return name_info (data, var)->iv;
967 }
968
969 /* Determines the step of a biv defined in PHI. Returns NULL if PHI does
970 not define a simple affine biv with nonzero step. */
971
972 static tree
973 determine_biv_step (gimple phi)
974 {
975 struct loop *loop = gimple_bb (phi)->loop_father;
976 tree name = PHI_RESULT (phi);
977 affine_iv iv;
978
979 if (virtual_operand_p (name))
980 return NULL_TREE;
981
982 if (!simple_iv (loop, loop, name, &iv, true))
983 return NULL_TREE;
984
985 return integer_zerop (iv.step) ? NULL_TREE : iv.step;
986 }
987
988 /* Finds basic ivs. */
989
990 static bool
991 find_bivs (struct ivopts_data *data)
992 {
993 gimple phi;
994 tree step, type, base;
995 bool found = false;
996 struct loop *loop = data->current_loop;
997 gimple_stmt_iterator psi;
998
999 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1000 {
1001 phi = gsi_stmt (psi);
1002
1003 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1004 continue;
1005
1006 step = determine_biv_step (phi);
1007 if (!step)
1008 continue;
1009
1010 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1011 base = expand_simple_operations (base);
1012 if (contains_abnormal_ssa_name_p (base)
1013 || contains_abnormal_ssa_name_p (step))
1014 continue;
1015
1016 type = TREE_TYPE (PHI_RESULT (phi));
1017 base = fold_convert (type, base);
1018 if (step)
1019 {
1020 if (POINTER_TYPE_P (type))
1021 step = convert_to_ptrofftype (step);
1022 else
1023 step = fold_convert (type, step);
1024 }
1025
1026 set_iv (data, PHI_RESULT (phi), base, step);
1027 found = true;
1028 }
1029
1030 return found;
1031 }
1032
1033 /* Marks basic ivs. */
1034
1035 static void
1036 mark_bivs (struct ivopts_data *data)
1037 {
1038 gimple phi;
1039 tree var;
1040 struct iv *iv, *incr_iv;
1041 struct loop *loop = data->current_loop;
1042 basic_block incr_bb;
1043 gimple_stmt_iterator psi;
1044
1045 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1046 {
1047 phi = gsi_stmt (psi);
1048
1049 iv = get_iv (data, PHI_RESULT (phi));
1050 if (!iv)
1051 continue;
1052
1053 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1054 incr_iv = get_iv (data, var);
1055 if (!incr_iv)
1056 continue;
1057
1058 /* If the increment is in the subloop, ignore it. */
1059 incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1060 if (incr_bb->loop_father != data->current_loop
1061 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1062 continue;
1063
1064 iv->biv_p = true;
1065 incr_iv->biv_p = true;
1066 }
1067 }
1068
1069 /* Checks whether STMT defines a linear induction variable and stores its
1070 parameters to IV. */
1071
1072 static bool
1073 find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv)
1074 {
1075 tree lhs;
1076 struct loop *loop = data->current_loop;
1077
1078 iv->base = NULL_TREE;
1079 iv->step = NULL_TREE;
1080
1081 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1082 return false;
1083
1084 lhs = gimple_assign_lhs (stmt);
1085 if (TREE_CODE (lhs) != SSA_NAME)
1086 return false;
1087
1088 if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1089 return false;
1090 iv->base = expand_simple_operations (iv->base);
1091
1092 if (contains_abnormal_ssa_name_p (iv->base)
1093 || contains_abnormal_ssa_name_p (iv->step))
1094 return false;
1095
1096 /* If STMT could throw, then do not consider STMT as defining a GIV.
1097 While this will suppress optimizations, we can not safely delete this
1098 GIV and associated statements, even if it appears it is not used. */
1099 if (stmt_could_throw_p (stmt))
1100 return false;
1101
1102 return true;
1103 }
1104
1105 /* Finds general ivs in statement STMT. */
1106
1107 static void
1108 find_givs_in_stmt (struct ivopts_data *data, gimple stmt)
1109 {
1110 affine_iv iv;
1111
1112 if (!find_givs_in_stmt_scev (data, stmt, &iv))
1113 return;
1114
1115 set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step);
1116 }
1117
1118 /* Finds general ivs in basic block BB. */
1119
1120 static void
1121 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1122 {
1123 gimple_stmt_iterator bsi;
1124
1125 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1126 find_givs_in_stmt (data, gsi_stmt (bsi));
1127 }
1128
1129 /* Finds general ivs. */
1130
1131 static void
1132 find_givs (struct ivopts_data *data)
1133 {
1134 struct loop *loop = data->current_loop;
1135 basic_block *body = get_loop_body_in_dom_order (loop);
1136 unsigned i;
1137
1138 for (i = 0; i < loop->num_nodes; i++)
1139 find_givs_in_bb (data, body[i]);
1140 free (body);
1141 }
1142
1143 /* For each ssa name defined in LOOP determines whether it is an induction
1144 variable and if so, its initial value and step. */
1145
1146 static bool
1147 find_induction_variables (struct ivopts_data *data)
1148 {
1149 unsigned i;
1150 bitmap_iterator bi;
1151
1152 if (!find_bivs (data))
1153 return false;
1154
1155 find_givs (data);
1156 mark_bivs (data);
1157
1158 if (dump_file && (dump_flags & TDF_DETAILS))
1159 {
1160 struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
1161
1162 if (niter)
1163 {
1164 fprintf (dump_file, " number of iterations ");
1165 print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1166 if (!integer_zerop (niter->may_be_zero))
1167 {
1168 fprintf (dump_file, "; zero if ");
1169 print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1170 }
1171 fprintf (dump_file, "\n\n");
1172 };
1173
1174 fprintf (dump_file, "Induction variables:\n\n");
1175
1176 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1177 {
1178 if (ver_info (data, i)->iv)
1179 dump_iv (dump_file, ver_info (data, i)->iv);
1180 }
1181 }
1182
1183 return true;
1184 }
1185
1186 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1187
1188 static struct iv_use *
1189 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1190 gimple stmt, enum use_type use_type)
1191 {
1192 struct iv_use *use = XCNEW (struct iv_use);
1193
1194 use->id = n_iv_uses (data);
1195 use->type = use_type;
1196 use->iv = iv;
1197 use->stmt = stmt;
1198 use->op_p = use_p;
1199 use->related_cands = BITMAP_ALLOC (NULL);
1200
1201 /* To avoid showing ssa name in the dumps, if it was not reset by the
1202 caller. */
1203 iv->ssa_name = NULL_TREE;
1204
1205 if (dump_file && (dump_flags & TDF_DETAILS))
1206 dump_use (dump_file, use);
1207
1208 data->iv_uses.safe_push (use);
1209
1210 return use;
1211 }
1212
1213 /* Checks whether OP is a loop-level invariant and if so, records it.
1214 NONLINEAR_USE is true if the invariant is used in a way we do not
1215 handle specially. */
1216
1217 static void
1218 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1219 {
1220 basic_block bb;
1221 struct version_info *info;
1222
1223 if (TREE_CODE (op) != SSA_NAME
1224 || virtual_operand_p (op))
1225 return;
1226
1227 bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1228 if (bb
1229 && flow_bb_inside_loop_p (data->current_loop, bb))
1230 return;
1231
1232 info = name_info (data, op);
1233 info->name = op;
1234 info->has_nonlin_use |= nonlinear_use;
1235 if (!info->inv_id)
1236 info->inv_id = ++data->max_inv_id;
1237 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1238 }
1239
1240 /* Checks whether the use OP is interesting and if so, records it. */
1241
1242 static struct iv_use *
1243 find_interesting_uses_op (struct ivopts_data *data, tree op)
1244 {
1245 struct iv *iv;
1246 struct iv *civ;
1247 gimple stmt;
1248 struct iv_use *use;
1249
1250 if (TREE_CODE (op) != SSA_NAME)
1251 return NULL;
1252
1253 iv = get_iv (data, op);
1254 if (!iv)
1255 return NULL;
1256
1257 if (iv->have_use_for)
1258 {
1259 use = iv_use (data, iv->use_id);
1260
1261 gcc_assert (use->type == USE_NONLINEAR_EXPR);
1262 return use;
1263 }
1264
1265 if (integer_zerop (iv->step))
1266 {
1267 record_invariant (data, op, true);
1268 return NULL;
1269 }
1270 iv->have_use_for = true;
1271
1272 civ = XNEW (struct iv);
1273 *civ = *iv;
1274
1275 stmt = SSA_NAME_DEF_STMT (op);
1276 gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1277 || is_gimple_assign (stmt));
1278
1279 use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1280 iv->use_id = use->id;
1281
1282 return use;
1283 }
1284
1285 /* Given a condition in statement STMT, checks whether it is a compare
1286 of an induction variable and an invariant. If this is the case,
1287 CONTROL_VAR is set to location of the iv, BOUND to the location of
1288 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1289 induction variable descriptions, and true is returned. If this is not
1290 the case, CONTROL_VAR and BOUND are set to the arguments of the
1291 condition and false is returned. */
1292
1293 static bool
1294 extract_cond_operands (struct ivopts_data *data, gimple stmt,
1295 tree **control_var, tree **bound,
1296 struct iv **iv_var, struct iv **iv_bound)
1297 {
1298 /* The objects returned when COND has constant operands. */
1299 static struct iv const_iv;
1300 static tree zero;
1301 tree *op0 = &zero, *op1 = &zero, *tmp_op;
1302 struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1303 bool ret = false;
1304
1305 if (gimple_code (stmt) == GIMPLE_COND)
1306 {
1307 op0 = gimple_cond_lhs_ptr (stmt);
1308 op1 = gimple_cond_rhs_ptr (stmt);
1309 }
1310 else
1311 {
1312 op0 = gimple_assign_rhs1_ptr (stmt);
1313 op1 = gimple_assign_rhs2_ptr (stmt);
1314 }
1315
1316 zero = integer_zero_node;
1317 const_iv.step = integer_zero_node;
1318
1319 if (TREE_CODE (*op0) == SSA_NAME)
1320 iv0 = get_iv (data, *op0);
1321 if (TREE_CODE (*op1) == SSA_NAME)
1322 iv1 = get_iv (data, *op1);
1323
1324 /* Exactly one of the compared values must be an iv, and the other one must
1325 be an invariant. */
1326 if (!iv0 || !iv1)
1327 goto end;
1328
1329 if (integer_zerop (iv0->step))
1330 {
1331 /* Control variable may be on the other side. */
1332 tmp_op = op0; op0 = op1; op1 = tmp_op;
1333 tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1334 }
1335 ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1336
1337 end:
1338 if (control_var)
1339 *control_var = op0;;
1340 if (iv_var)
1341 *iv_var = iv0;;
1342 if (bound)
1343 *bound = op1;
1344 if (iv_bound)
1345 *iv_bound = iv1;
1346
1347 return ret;
1348 }
1349
1350 /* Checks whether the condition in STMT is interesting and if so,
1351 records it. */
1352
1353 static void
1354 find_interesting_uses_cond (struct ivopts_data *data, gimple stmt)
1355 {
1356 tree *var_p, *bound_p;
1357 struct iv *var_iv, *civ;
1358
1359 if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1360 {
1361 find_interesting_uses_op (data, *var_p);
1362 find_interesting_uses_op (data, *bound_p);
1363 return;
1364 }
1365
1366 civ = XNEW (struct iv);
1367 *civ = *var_iv;
1368 record_use (data, NULL, civ, stmt, USE_COMPARE);
1369 }
1370
1371 /* Returns true if expression EXPR is obviously invariant in LOOP,
1372 i.e. if all its operands are defined outside of the LOOP. LOOP
1373 should not be the function body. */
1374
1375 bool
1376 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1377 {
1378 basic_block def_bb;
1379 unsigned i, len;
1380
1381 gcc_assert (loop_depth (loop) > 0);
1382
1383 if (is_gimple_min_invariant (expr))
1384 return true;
1385
1386 if (TREE_CODE (expr) == SSA_NAME)
1387 {
1388 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1389 if (def_bb
1390 && flow_bb_inside_loop_p (loop, def_bb))
1391 return false;
1392
1393 return true;
1394 }
1395
1396 if (!EXPR_P (expr))
1397 return false;
1398
1399 len = TREE_OPERAND_LENGTH (expr);
1400 for (i = 0; i < len; i++)
1401 if (TREE_OPERAND (expr, i)
1402 && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1403 return false;
1404
1405 return true;
1406 }
1407
1408 /* Returns true if statement STMT is obviously invariant in LOOP,
1409 i.e. if all its operands on the RHS are defined outside of the LOOP.
1410 LOOP should not be the function body. */
1411
1412 bool
1413 stmt_invariant_in_loop_p (struct loop *loop, gimple stmt)
1414 {
1415 unsigned i;
1416 tree lhs;
1417
1418 gcc_assert (loop_depth (loop) > 0);
1419
1420 lhs = gimple_get_lhs (stmt);
1421 for (i = 0; i < gimple_num_ops (stmt); i++)
1422 {
1423 tree op = gimple_op (stmt, i);
1424 if (op != lhs && !expr_invariant_in_loop_p (loop, op))
1425 return false;
1426 }
1427
1428 return true;
1429 }
1430
1431 /* Cumulates the steps of indices into DATA and replaces their values with the
1432 initial ones. Returns false when the value of the index cannot be determined.
1433 Callback for for_each_index. */
1434
1435 struct ifs_ivopts_data
1436 {
1437 struct ivopts_data *ivopts_data;
1438 gimple stmt;
1439 tree step;
1440 };
1441
1442 static bool
1443 idx_find_step (tree base, tree *idx, void *data)
1444 {
1445 struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1446 struct iv *iv;
1447 tree step, iv_base, iv_step, lbound, off;
1448 struct loop *loop = dta->ivopts_data->current_loop;
1449
1450 /* If base is a component ref, require that the offset of the reference
1451 be invariant. */
1452 if (TREE_CODE (base) == COMPONENT_REF)
1453 {
1454 off = component_ref_field_offset (base);
1455 return expr_invariant_in_loop_p (loop, off);
1456 }
1457
1458 /* If base is array, first check whether we will be able to move the
1459 reference out of the loop (in order to take its address in strength
1460 reduction). In order for this to work we need both lower bound
1461 and step to be loop invariants. */
1462 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1463 {
1464 /* Moreover, for a range, the size needs to be invariant as well. */
1465 if (TREE_CODE (base) == ARRAY_RANGE_REF
1466 && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1467 return false;
1468
1469 step = array_ref_element_size (base);
1470 lbound = array_ref_low_bound (base);
1471
1472 if (!expr_invariant_in_loop_p (loop, step)
1473 || !expr_invariant_in_loop_p (loop, lbound))
1474 return false;
1475 }
1476
1477 if (TREE_CODE (*idx) != SSA_NAME)
1478 return true;
1479
1480 iv = get_iv (dta->ivopts_data, *idx);
1481 if (!iv)
1482 return false;
1483
1484 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1485 *&x[0], which is not folded and does not trigger the
1486 ARRAY_REF path below. */
1487 *idx = iv->base;
1488
1489 if (integer_zerop (iv->step))
1490 return true;
1491
1492 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1493 {
1494 step = array_ref_element_size (base);
1495
1496 /* We only handle addresses whose step is an integer constant. */
1497 if (TREE_CODE (step) != INTEGER_CST)
1498 return false;
1499 }
1500 else
1501 /* The step for pointer arithmetics already is 1 byte. */
1502 step = size_one_node;
1503
1504 iv_base = iv->base;
1505 iv_step = iv->step;
1506 if (!convert_affine_scev (dta->ivopts_data->current_loop,
1507 sizetype, &iv_base, &iv_step, dta->stmt,
1508 false))
1509 {
1510 /* The index might wrap. */
1511 return false;
1512 }
1513
1514 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1515 dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1516
1517 return true;
1518 }
1519
1520 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1521 object is passed to it in DATA. */
1522
1523 static bool
1524 idx_record_use (tree base, tree *idx,
1525 void *vdata)
1526 {
1527 struct ivopts_data *data = (struct ivopts_data *) vdata;
1528 find_interesting_uses_op (data, *idx);
1529 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1530 {
1531 find_interesting_uses_op (data, array_ref_element_size (base));
1532 find_interesting_uses_op (data, array_ref_low_bound (base));
1533 }
1534 return true;
1535 }
1536
1537 /* If we can prove that TOP = cst * BOT for some constant cst,
1538 store cst to MUL and return true. Otherwise return false.
1539 The returned value is always sign-extended, regardless of the
1540 signedness of TOP and BOT. */
1541
1542 static bool
1543 constant_multiple_of (tree top, tree bot, double_int *mul)
1544 {
1545 tree mby;
1546 enum tree_code code;
1547 double_int res, p0, p1;
1548 unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1549
1550 STRIP_NOPS (top);
1551 STRIP_NOPS (bot);
1552
1553 if (operand_equal_p (top, bot, 0))
1554 {
1555 *mul = double_int_one;
1556 return true;
1557 }
1558
1559 code = TREE_CODE (top);
1560 switch (code)
1561 {
1562 case MULT_EXPR:
1563 mby = TREE_OPERAND (top, 1);
1564 if (TREE_CODE (mby) != INTEGER_CST)
1565 return false;
1566
1567 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1568 return false;
1569
1570 *mul = (res * tree_to_double_int (mby)).sext (precision);
1571 return true;
1572
1573 case PLUS_EXPR:
1574 case MINUS_EXPR:
1575 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1576 || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1577 return false;
1578
1579 if (code == MINUS_EXPR)
1580 p1 = -p1;
1581 *mul = (p0 + p1).sext (precision);
1582 return true;
1583
1584 case INTEGER_CST:
1585 if (TREE_CODE (bot) != INTEGER_CST)
1586 return false;
1587
1588 p0 = tree_to_double_int (top).sext (precision);
1589 p1 = tree_to_double_int (bot).sext (precision);
1590 if (p1.is_zero ())
1591 return false;
1592 *mul = p0.sdivmod (p1, FLOOR_DIV_EXPR, &res).sext (precision);
1593 return res.is_zero ();
1594
1595 default:
1596 return false;
1597 }
1598 }
1599
1600 /* Returns true if memory reference REF with step STEP may be unaligned. */
1601
1602 static bool
1603 may_be_unaligned_p (tree ref, tree step)
1604 {
1605 tree base;
1606 tree base_type;
1607 HOST_WIDE_INT bitsize;
1608 HOST_WIDE_INT bitpos;
1609 tree toffset;
1610 enum machine_mode mode;
1611 int unsignedp, volatilep;
1612 unsigned base_align;
1613
1614 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1615 thus they are not misaligned. */
1616 if (TREE_CODE (ref) == TARGET_MEM_REF)
1617 return false;
1618
1619 /* The test below is basically copy of what expr.c:normal_inner_ref
1620 does to check whether the object must be loaded by parts when
1621 STRICT_ALIGNMENT is true. */
1622 base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1623 &unsignedp, &volatilep, true);
1624 base_type = TREE_TYPE (base);
1625 base_align = get_object_alignment (base);
1626 base_align = MAX (base_align, TYPE_ALIGN (base_type));
1627
1628 if (mode != BLKmode)
1629 {
1630 unsigned mode_align = GET_MODE_ALIGNMENT (mode);
1631
1632 if (base_align < mode_align
1633 || (bitpos % mode_align) != 0
1634 || (bitpos % BITS_PER_UNIT) != 0)
1635 return true;
1636
1637 if (toffset
1638 && (highest_pow2_factor (toffset) * BITS_PER_UNIT) < mode_align)
1639 return true;
1640
1641 if ((highest_pow2_factor (step) * BITS_PER_UNIT) < mode_align)
1642 return true;
1643 }
1644
1645 return false;
1646 }
1647
1648 /* Return true if EXPR may be non-addressable. */
1649
1650 bool
1651 may_be_nonaddressable_p (tree expr)
1652 {
1653 switch (TREE_CODE (expr))
1654 {
1655 case TARGET_MEM_REF:
1656 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1657 target, thus they are always addressable. */
1658 return false;
1659
1660 case COMPONENT_REF:
1661 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1662 || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1663
1664 case VIEW_CONVERT_EXPR:
1665 /* This kind of view-conversions may wrap non-addressable objects
1666 and make them look addressable. After some processing the
1667 non-addressability may be uncovered again, causing ADDR_EXPRs
1668 of inappropriate objects to be built. */
1669 if (is_gimple_reg (TREE_OPERAND (expr, 0))
1670 || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
1671 return true;
1672
1673 /* ... fall through ... */
1674
1675 case ARRAY_REF:
1676 case ARRAY_RANGE_REF:
1677 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1678
1679 CASE_CONVERT:
1680 return true;
1681
1682 default:
1683 break;
1684 }
1685
1686 return false;
1687 }
1688
1689 /* Finds addresses in *OP_P inside STMT. */
1690
1691 static void
1692 find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p)
1693 {
1694 tree base = *op_p, step = size_zero_node;
1695 struct iv *civ;
1696 struct ifs_ivopts_data ifs_ivopts_data;
1697
1698 /* Do not play with volatile memory references. A bit too conservative,
1699 perhaps, but safe. */
1700 if (gimple_has_volatile_ops (stmt))
1701 goto fail;
1702
1703 /* Ignore bitfields for now. Not really something terribly complicated
1704 to handle. TODO. */
1705 if (TREE_CODE (base) == BIT_FIELD_REF)
1706 goto fail;
1707
1708 base = unshare_expr (base);
1709
1710 if (TREE_CODE (base) == TARGET_MEM_REF)
1711 {
1712 tree type = build_pointer_type (TREE_TYPE (base));
1713 tree astep;
1714
1715 if (TMR_BASE (base)
1716 && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1717 {
1718 civ = get_iv (data, TMR_BASE (base));
1719 if (!civ)
1720 goto fail;
1721
1722 TMR_BASE (base) = civ->base;
1723 step = civ->step;
1724 }
1725 if (TMR_INDEX2 (base)
1726 && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
1727 {
1728 civ = get_iv (data, TMR_INDEX2 (base));
1729 if (!civ)
1730 goto fail;
1731
1732 TMR_INDEX2 (base) = civ->base;
1733 step = civ->step;
1734 }
1735 if (TMR_INDEX (base)
1736 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1737 {
1738 civ = get_iv (data, TMR_INDEX (base));
1739 if (!civ)
1740 goto fail;
1741
1742 TMR_INDEX (base) = civ->base;
1743 astep = civ->step;
1744
1745 if (astep)
1746 {
1747 if (TMR_STEP (base))
1748 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1749
1750 step = fold_build2 (PLUS_EXPR, type, step, astep);
1751 }
1752 }
1753
1754 if (integer_zerop (step))
1755 goto fail;
1756 base = tree_mem_ref_addr (type, base);
1757 }
1758 else
1759 {
1760 ifs_ivopts_data.ivopts_data = data;
1761 ifs_ivopts_data.stmt = stmt;
1762 ifs_ivopts_data.step = size_zero_node;
1763 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1764 || integer_zerop (ifs_ivopts_data.step))
1765 goto fail;
1766 step = ifs_ivopts_data.step;
1767
1768 /* Check that the base expression is addressable. This needs
1769 to be done after substituting bases of IVs into it. */
1770 if (may_be_nonaddressable_p (base))
1771 goto fail;
1772
1773 /* Moreover, on strict alignment platforms, check that it is
1774 sufficiently aligned. */
1775 if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
1776 goto fail;
1777
1778 base = build_fold_addr_expr (base);
1779
1780 /* Substituting bases of IVs into the base expression might
1781 have caused folding opportunities. */
1782 if (TREE_CODE (base) == ADDR_EXPR)
1783 {
1784 tree *ref = &TREE_OPERAND (base, 0);
1785 while (handled_component_p (*ref))
1786 ref = &TREE_OPERAND (*ref, 0);
1787 if (TREE_CODE (*ref) == MEM_REF)
1788 {
1789 tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
1790 TREE_OPERAND (*ref, 0),
1791 TREE_OPERAND (*ref, 1));
1792 if (tem)
1793 *ref = tem;
1794 }
1795 }
1796 }
1797
1798 civ = alloc_iv (base, step);
1799 record_use (data, op_p, civ, stmt, USE_ADDRESS);
1800 return;
1801
1802 fail:
1803 for_each_index (op_p, idx_record_use, data);
1804 }
1805
1806 /* Finds and records invariants used in STMT. */
1807
1808 static void
1809 find_invariants_stmt (struct ivopts_data *data, gimple stmt)
1810 {
1811 ssa_op_iter iter;
1812 use_operand_p use_p;
1813 tree op;
1814
1815 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1816 {
1817 op = USE_FROM_PTR (use_p);
1818 record_invariant (data, op, false);
1819 }
1820 }
1821
1822 /* Finds interesting uses of induction variables in the statement STMT. */
1823
1824 static void
1825 find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt)
1826 {
1827 struct iv *iv;
1828 tree op, *lhs, *rhs;
1829 ssa_op_iter iter;
1830 use_operand_p use_p;
1831 enum tree_code code;
1832
1833 find_invariants_stmt (data, stmt);
1834
1835 if (gimple_code (stmt) == GIMPLE_COND)
1836 {
1837 find_interesting_uses_cond (data, stmt);
1838 return;
1839 }
1840
1841 if (is_gimple_assign (stmt))
1842 {
1843 lhs = gimple_assign_lhs_ptr (stmt);
1844 rhs = gimple_assign_rhs1_ptr (stmt);
1845
1846 if (TREE_CODE (*lhs) == SSA_NAME)
1847 {
1848 /* If the statement defines an induction variable, the uses are not
1849 interesting by themselves. */
1850
1851 iv = get_iv (data, *lhs);
1852
1853 if (iv && !integer_zerop (iv->step))
1854 return;
1855 }
1856
1857 code = gimple_assign_rhs_code (stmt);
1858 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
1859 && (REFERENCE_CLASS_P (*rhs)
1860 || is_gimple_val (*rhs)))
1861 {
1862 if (REFERENCE_CLASS_P (*rhs))
1863 find_interesting_uses_address (data, stmt, rhs);
1864 else
1865 find_interesting_uses_op (data, *rhs);
1866
1867 if (REFERENCE_CLASS_P (*lhs))
1868 find_interesting_uses_address (data, stmt, lhs);
1869 return;
1870 }
1871 else if (TREE_CODE_CLASS (code) == tcc_comparison)
1872 {
1873 find_interesting_uses_cond (data, stmt);
1874 return;
1875 }
1876
1877 /* TODO -- we should also handle address uses of type
1878
1879 memory = call (whatever);
1880
1881 and
1882
1883 call (memory). */
1884 }
1885
1886 if (gimple_code (stmt) == GIMPLE_PHI
1887 && gimple_bb (stmt) == data->current_loop->header)
1888 {
1889 iv = get_iv (data, PHI_RESULT (stmt));
1890
1891 if (iv && !integer_zerop (iv->step))
1892 return;
1893 }
1894
1895 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1896 {
1897 op = USE_FROM_PTR (use_p);
1898
1899 if (TREE_CODE (op) != SSA_NAME)
1900 continue;
1901
1902 iv = get_iv (data, op);
1903 if (!iv)
1904 continue;
1905
1906 find_interesting_uses_op (data, op);
1907 }
1908 }
1909
1910 /* Finds interesting uses of induction variables outside of loops
1911 on loop exit edge EXIT. */
1912
1913 static void
1914 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1915 {
1916 gimple phi;
1917 gimple_stmt_iterator psi;
1918 tree def;
1919
1920 for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
1921 {
1922 phi = gsi_stmt (psi);
1923 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1924 if (!virtual_operand_p (def))
1925 find_interesting_uses_op (data, def);
1926 }
1927 }
1928
1929 /* Finds uses of the induction variables that are interesting. */
1930
1931 static void
1932 find_interesting_uses (struct ivopts_data *data)
1933 {
1934 basic_block bb;
1935 gimple_stmt_iterator bsi;
1936 basic_block *body = get_loop_body (data->current_loop);
1937 unsigned i;
1938 struct version_info *info;
1939 edge e;
1940
1941 if (dump_file && (dump_flags & TDF_DETAILS))
1942 fprintf (dump_file, "Uses:\n\n");
1943
1944 for (i = 0; i < data->current_loop->num_nodes; i++)
1945 {
1946 edge_iterator ei;
1947 bb = body[i];
1948
1949 FOR_EACH_EDGE (e, ei, bb->succs)
1950 if (e->dest != EXIT_BLOCK_PTR
1951 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1952 find_interesting_uses_outside (data, e);
1953
1954 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1955 find_interesting_uses_stmt (data, gsi_stmt (bsi));
1956 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1957 if (!is_gimple_debug (gsi_stmt (bsi)))
1958 find_interesting_uses_stmt (data, gsi_stmt (bsi));
1959 }
1960
1961 if (dump_file && (dump_flags & TDF_DETAILS))
1962 {
1963 bitmap_iterator bi;
1964
1965 fprintf (dump_file, "\n");
1966
1967 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1968 {
1969 info = ver_info (data, i);
1970 if (info->inv_id)
1971 {
1972 fprintf (dump_file, " ");
1973 print_generic_expr (dump_file, info->name, TDF_SLIM);
1974 fprintf (dump_file, " is invariant (%d)%s\n",
1975 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1976 }
1977 }
1978
1979 fprintf (dump_file, "\n");
1980 }
1981
1982 free (body);
1983 }
1984
1985 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
1986 is true, assume we are inside an address. If TOP_COMPREF is true, assume
1987 we are at the top-level of the processed address. */
1988
1989 static tree
1990 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
1991 unsigned HOST_WIDE_INT *offset)
1992 {
1993 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
1994 enum tree_code code;
1995 tree type, orig_type = TREE_TYPE (expr);
1996 unsigned HOST_WIDE_INT off0, off1, st;
1997 tree orig_expr = expr;
1998
1999 STRIP_NOPS (expr);
2000
2001 type = TREE_TYPE (expr);
2002 code = TREE_CODE (expr);
2003 *offset = 0;
2004
2005 switch (code)
2006 {
2007 case INTEGER_CST:
2008 if (!cst_and_fits_in_hwi (expr)
2009 || integer_zerop (expr))
2010 return orig_expr;
2011
2012 *offset = int_cst_value (expr);
2013 return build_int_cst (orig_type, 0);
2014
2015 case POINTER_PLUS_EXPR:
2016 case PLUS_EXPR:
2017 case MINUS_EXPR:
2018 op0 = TREE_OPERAND (expr, 0);
2019 op1 = TREE_OPERAND (expr, 1);
2020
2021 op0 = strip_offset_1 (op0, false, false, &off0);
2022 op1 = strip_offset_1 (op1, false, false, &off1);
2023
2024 *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2025 if (op0 == TREE_OPERAND (expr, 0)
2026 && op1 == TREE_OPERAND (expr, 1))
2027 return orig_expr;
2028
2029 if (integer_zerop (op1))
2030 expr = op0;
2031 else if (integer_zerop (op0))
2032 {
2033 if (code == MINUS_EXPR)
2034 expr = fold_build1 (NEGATE_EXPR, type, op1);
2035 else
2036 expr = op1;
2037 }
2038 else
2039 expr = fold_build2 (code, type, op0, op1);
2040
2041 return fold_convert (orig_type, expr);
2042
2043 case MULT_EXPR:
2044 op1 = TREE_OPERAND (expr, 1);
2045 if (!cst_and_fits_in_hwi (op1))
2046 return orig_expr;
2047
2048 op0 = TREE_OPERAND (expr, 0);
2049 op0 = strip_offset_1 (op0, false, false, &off0);
2050 if (op0 == TREE_OPERAND (expr, 0))
2051 return orig_expr;
2052
2053 *offset = off0 * int_cst_value (op1);
2054 if (integer_zerop (op0))
2055 expr = op0;
2056 else
2057 expr = fold_build2 (MULT_EXPR, type, op0, op1);
2058
2059 return fold_convert (orig_type, expr);
2060
2061 case ARRAY_REF:
2062 case ARRAY_RANGE_REF:
2063 if (!inside_addr)
2064 return orig_expr;
2065
2066 step = array_ref_element_size (expr);
2067 if (!cst_and_fits_in_hwi (step))
2068 break;
2069
2070 st = int_cst_value (step);
2071 op1 = TREE_OPERAND (expr, 1);
2072 op1 = strip_offset_1 (op1, false, false, &off1);
2073 *offset = off1 * st;
2074
2075 if (top_compref
2076 && integer_zerop (op1))
2077 {
2078 /* Strip the component reference completely. */
2079 op0 = TREE_OPERAND (expr, 0);
2080 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2081 *offset += off0;
2082 return op0;
2083 }
2084 break;
2085
2086 case COMPONENT_REF:
2087 if (!inside_addr)
2088 return orig_expr;
2089
2090 tmp = component_ref_field_offset (expr);
2091 if (top_compref
2092 && cst_and_fits_in_hwi (tmp))
2093 {
2094 /* Strip the component reference completely. */
2095 op0 = TREE_OPERAND (expr, 0);
2096 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2097 *offset = off0 + int_cst_value (tmp);
2098 return op0;
2099 }
2100 break;
2101
2102 case ADDR_EXPR:
2103 op0 = TREE_OPERAND (expr, 0);
2104 op0 = strip_offset_1 (op0, true, true, &off0);
2105 *offset += off0;
2106
2107 if (op0 == TREE_OPERAND (expr, 0))
2108 return orig_expr;
2109
2110 expr = build_fold_addr_expr (op0);
2111 return fold_convert (orig_type, expr);
2112
2113 case MEM_REF:
2114 /* ??? Offset operand? */
2115 inside_addr = false;
2116 break;
2117
2118 default:
2119 return orig_expr;
2120 }
2121
2122 /* Default handling of expressions for that we want to recurse into
2123 the first operand. */
2124 op0 = TREE_OPERAND (expr, 0);
2125 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2126 *offset += off0;
2127
2128 if (op0 == TREE_OPERAND (expr, 0)
2129 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2130 return orig_expr;
2131
2132 expr = copy_node (expr);
2133 TREE_OPERAND (expr, 0) = op0;
2134 if (op1)
2135 TREE_OPERAND (expr, 1) = op1;
2136
2137 /* Inside address, we might strip the top level component references,
2138 thus changing type of the expression. Handling of ADDR_EXPR
2139 will fix that. */
2140 expr = fold_convert (orig_type, expr);
2141
2142 return expr;
2143 }
2144
2145 /* Strips constant offsets from EXPR and stores them to OFFSET. */
2146
2147 static tree
2148 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2149 {
2150 return strip_offset_1 (expr, false, false, offset);
2151 }
2152
2153 /* Returns variant of TYPE that can be used as base for different uses.
2154 We return unsigned type with the same precision, which avoids problems
2155 with overflows. */
2156
2157 static tree
2158 generic_type_for (tree type)
2159 {
2160 if (POINTER_TYPE_P (type))
2161 return unsigned_type_for (type);
2162
2163 if (TYPE_UNSIGNED (type))
2164 return type;
2165
2166 return unsigned_type_for (type);
2167 }
2168
2169 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2170 the bitmap to that we should store it. */
2171
2172 static struct ivopts_data *fd_ivopts_data;
2173 static tree
2174 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2175 {
2176 bitmap *depends_on = (bitmap *) data;
2177 struct version_info *info;
2178
2179 if (TREE_CODE (*expr_p) != SSA_NAME)
2180 return NULL_TREE;
2181 info = name_info (fd_ivopts_data, *expr_p);
2182
2183 if (!info->inv_id || info->has_nonlin_use)
2184 return NULL_TREE;
2185
2186 if (!*depends_on)
2187 *depends_on = BITMAP_ALLOC (NULL);
2188 bitmap_set_bit (*depends_on, info->inv_id);
2189
2190 return NULL_TREE;
2191 }
2192
2193 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2194 position to POS. If USE is not NULL, the candidate is set as related to
2195 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
2196 replacement of the final value of the iv by a direct computation. */
2197
2198 static struct iv_cand *
2199 add_candidate_1 (struct ivopts_data *data,
2200 tree base, tree step, bool important, enum iv_position pos,
2201 struct iv_use *use, gimple incremented_at)
2202 {
2203 unsigned i;
2204 struct iv_cand *cand = NULL;
2205 tree type, orig_type;
2206
2207 /* For non-original variables, make sure their values are computed in a type
2208 that does not invoke undefined behavior on overflows (since in general,
2209 we cannot prove that these induction variables are non-wrapping). */
2210 if (pos != IP_ORIGINAL)
2211 {
2212 orig_type = TREE_TYPE (base);
2213 type = generic_type_for (orig_type);
2214 if (type != orig_type)
2215 {
2216 base = fold_convert (type, base);
2217 step = fold_convert (type, step);
2218 }
2219 }
2220
2221 for (i = 0; i < n_iv_cands (data); i++)
2222 {
2223 cand = iv_cand (data, i);
2224
2225 if (cand->pos != pos)
2226 continue;
2227
2228 if (cand->incremented_at != incremented_at
2229 || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2230 && cand->ainc_use != use))
2231 continue;
2232
2233 if (!cand->iv)
2234 {
2235 if (!base && !step)
2236 break;
2237
2238 continue;
2239 }
2240
2241 if (!base && !step)
2242 continue;
2243
2244 if (operand_equal_p (base, cand->iv->base, 0)
2245 && operand_equal_p (step, cand->iv->step, 0)
2246 && (TYPE_PRECISION (TREE_TYPE (base))
2247 == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
2248 break;
2249 }
2250
2251 if (i == n_iv_cands (data))
2252 {
2253 cand = XCNEW (struct iv_cand);
2254 cand->id = i;
2255
2256 if (!base && !step)
2257 cand->iv = NULL;
2258 else
2259 cand->iv = alloc_iv (base, step);
2260
2261 cand->pos = pos;
2262 if (pos != IP_ORIGINAL && cand->iv)
2263 {
2264 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2265 cand->var_after = cand->var_before;
2266 }
2267 cand->important = important;
2268 cand->incremented_at = incremented_at;
2269 data->iv_candidates.safe_push (cand);
2270
2271 if (step
2272 && TREE_CODE (step) != INTEGER_CST)
2273 {
2274 fd_ivopts_data = data;
2275 walk_tree (&step, find_depends, &cand->depends_on, NULL);
2276 }
2277
2278 if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2279 cand->ainc_use = use;
2280 else
2281 cand->ainc_use = NULL;
2282
2283 if (dump_file && (dump_flags & TDF_DETAILS))
2284 dump_cand (dump_file, cand);
2285 }
2286
2287 if (important && !cand->important)
2288 {
2289 cand->important = true;
2290 if (dump_file && (dump_flags & TDF_DETAILS))
2291 fprintf (dump_file, "Candidate %d is important\n", cand->id);
2292 }
2293
2294 if (use)
2295 {
2296 bitmap_set_bit (use->related_cands, i);
2297 if (dump_file && (dump_flags & TDF_DETAILS))
2298 fprintf (dump_file, "Candidate %d is related to use %d\n",
2299 cand->id, use->id);
2300 }
2301
2302 return cand;
2303 }
2304
2305 /* Returns true if incrementing the induction variable at the end of the LOOP
2306 is allowed.
2307
2308 The purpose is to avoid splitting latch edge with a biv increment, thus
2309 creating a jump, possibly confusing other optimization passes and leaving
2310 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2311 is not available (so we do not have a better alternative), or if the latch
2312 edge is already nonempty. */
2313
2314 static bool
2315 allow_ip_end_pos_p (struct loop *loop)
2316 {
2317 if (!ip_normal_pos (loop))
2318 return true;
2319
2320 if (!empty_block_p (ip_end_pos (loop)))
2321 return true;
2322
2323 return false;
2324 }
2325
2326 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2327 Important field is set to IMPORTANT. */
2328
2329 static void
2330 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2331 bool important, struct iv_use *use)
2332 {
2333 basic_block use_bb = gimple_bb (use->stmt);
2334 enum machine_mode mem_mode;
2335 unsigned HOST_WIDE_INT cstepi;
2336
2337 /* If we insert the increment in any position other than the standard
2338 ones, we must ensure that it is incremented once per iteration.
2339 It must not be in an inner nested loop, or one side of an if
2340 statement. */
2341 if (use_bb->loop_father != data->current_loop
2342 || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2343 || stmt_could_throw_p (use->stmt)
2344 || !cst_and_fits_in_hwi (step))
2345 return;
2346
2347 cstepi = int_cst_value (step);
2348
2349 mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2350 if (((USE_LOAD_PRE_INCREMENT (mem_mode)
2351 || USE_STORE_PRE_INCREMENT (mem_mode))
2352 && GET_MODE_SIZE (mem_mode) == cstepi)
2353 || ((USE_LOAD_PRE_DECREMENT (mem_mode)
2354 || USE_STORE_PRE_DECREMENT (mem_mode))
2355 && GET_MODE_SIZE (mem_mode) == -cstepi))
2356 {
2357 enum tree_code code = MINUS_EXPR;
2358 tree new_base;
2359 tree new_step = step;
2360
2361 if (POINTER_TYPE_P (TREE_TYPE (base)))
2362 {
2363 new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2364 code = POINTER_PLUS_EXPR;
2365 }
2366 else
2367 new_step = fold_convert (TREE_TYPE (base), new_step);
2368 new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2369 add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2370 use->stmt);
2371 }
2372 if (((USE_LOAD_POST_INCREMENT (mem_mode)
2373 || USE_STORE_POST_INCREMENT (mem_mode))
2374 && GET_MODE_SIZE (mem_mode) == cstepi)
2375 || ((USE_LOAD_POST_DECREMENT (mem_mode)
2376 || USE_STORE_POST_DECREMENT (mem_mode))
2377 && GET_MODE_SIZE (mem_mode) == -cstepi))
2378 {
2379 add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
2380 use->stmt);
2381 }
2382 }
2383
2384 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2385 position to POS. If USE is not NULL, the candidate is set as related to
2386 it. The candidate computation is scheduled on all available positions. */
2387
2388 static void
2389 add_candidate (struct ivopts_data *data,
2390 tree base, tree step, bool important, struct iv_use *use)
2391 {
2392 if (ip_normal_pos (data->current_loop))
2393 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL);
2394 if (ip_end_pos (data->current_loop)
2395 && allow_ip_end_pos_p (data->current_loop))
2396 add_candidate_1 (data, base, step, important, IP_END, use, NULL);
2397
2398 if (use != NULL && use->type == USE_ADDRESS)
2399 add_autoinc_candidates (data, base, step, important, use);
2400 }
2401
2402 /* Adds standard iv candidates. */
2403
2404 static void
2405 add_standard_iv_candidates (struct ivopts_data *data)
2406 {
2407 add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
2408
2409 /* The same for a double-integer type if it is still fast enough. */
2410 if (TYPE_PRECISION
2411 (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
2412 && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
2413 add_candidate (data, build_int_cst (long_integer_type_node, 0),
2414 build_int_cst (long_integer_type_node, 1), true, NULL);
2415
2416 /* The same for a double-integer type if it is still fast enough. */
2417 if (TYPE_PRECISION
2418 (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
2419 && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
2420 add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
2421 build_int_cst (long_long_integer_type_node, 1), true, NULL);
2422 }
2423
2424
2425 /* Adds candidates bases on the old induction variable IV. */
2426
2427 static void
2428 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2429 {
2430 gimple phi;
2431 tree def;
2432 struct iv_cand *cand;
2433
2434 add_candidate (data, iv->base, iv->step, true, NULL);
2435
2436 /* The same, but with initial value zero. */
2437 if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
2438 add_candidate (data, size_int (0), iv->step, true, NULL);
2439 else
2440 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2441 iv->step, true, NULL);
2442
2443 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2444 if (gimple_code (phi) == GIMPLE_PHI)
2445 {
2446 /* Additionally record the possibility of leaving the original iv
2447 untouched. */
2448 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2449 cand = add_candidate_1 (data,
2450 iv->base, iv->step, true, IP_ORIGINAL, NULL,
2451 SSA_NAME_DEF_STMT (def));
2452 cand->var_before = iv->ssa_name;
2453 cand->var_after = def;
2454 }
2455 }
2456
2457 /* Adds candidates based on the old induction variables. */
2458
2459 static void
2460 add_old_ivs_candidates (struct ivopts_data *data)
2461 {
2462 unsigned i;
2463 struct iv *iv;
2464 bitmap_iterator bi;
2465
2466 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2467 {
2468 iv = ver_info (data, i)->iv;
2469 if (iv && iv->biv_p && !integer_zerop (iv->step))
2470 add_old_iv_candidates (data, iv);
2471 }
2472 }
2473
2474 /* Adds candidates based on the value of the induction variable IV and USE. */
2475
2476 static void
2477 add_iv_value_candidates (struct ivopts_data *data,
2478 struct iv *iv, struct iv_use *use)
2479 {
2480 unsigned HOST_WIDE_INT offset;
2481 tree base;
2482 tree basetype;
2483
2484 add_candidate (data, iv->base, iv->step, false, use);
2485
2486 /* The same, but with initial value zero. Make such variable important,
2487 since it is generic enough so that possibly many uses may be based
2488 on it. */
2489 basetype = TREE_TYPE (iv->base);
2490 if (POINTER_TYPE_P (basetype))
2491 basetype = sizetype;
2492 add_candidate (data, build_int_cst (basetype, 0),
2493 iv->step, true, use);
2494
2495 /* Third, try removing the constant offset. Make sure to even
2496 add a candidate for &a[0] vs. (T *)&a. */
2497 base = strip_offset (iv->base, &offset);
2498 if (offset
2499 || base != iv->base)
2500 add_candidate (data, base, iv->step, false, use);
2501 }
2502
2503 /* Adds candidates based on the uses. */
2504
2505 static void
2506 add_derived_ivs_candidates (struct ivopts_data *data)
2507 {
2508 unsigned i;
2509
2510 for (i = 0; i < n_iv_uses (data); i++)
2511 {
2512 struct iv_use *use = iv_use (data, i);
2513
2514 if (!use)
2515 continue;
2516
2517 switch (use->type)
2518 {
2519 case USE_NONLINEAR_EXPR:
2520 case USE_COMPARE:
2521 case USE_ADDRESS:
2522 /* Just add the ivs based on the value of the iv used here. */
2523 add_iv_value_candidates (data, use->iv, use);
2524 break;
2525
2526 default:
2527 gcc_unreachable ();
2528 }
2529 }
2530 }
2531
2532 /* Record important candidates and add them to related_cands bitmaps
2533 if needed. */
2534
2535 static void
2536 record_important_candidates (struct ivopts_data *data)
2537 {
2538 unsigned i;
2539 struct iv_use *use;
2540
2541 for (i = 0; i < n_iv_cands (data); i++)
2542 {
2543 struct iv_cand *cand = iv_cand (data, i);
2544
2545 if (cand->important)
2546 bitmap_set_bit (data->important_candidates, i);
2547 }
2548
2549 data->consider_all_candidates = (n_iv_cands (data)
2550 <= CONSIDER_ALL_CANDIDATES_BOUND);
2551
2552 if (data->consider_all_candidates)
2553 {
2554 /* We will not need "related_cands" bitmaps in this case,
2555 so release them to decrease peak memory consumption. */
2556 for (i = 0; i < n_iv_uses (data); i++)
2557 {
2558 use = iv_use (data, i);
2559 BITMAP_FREE (use->related_cands);
2560 }
2561 }
2562 else
2563 {
2564 /* Add important candidates to the related_cands bitmaps. */
2565 for (i = 0; i < n_iv_uses (data); i++)
2566 bitmap_ior_into (iv_use (data, i)->related_cands,
2567 data->important_candidates);
2568 }
2569 }
2570
2571 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2572 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2573 we allocate a simple list to every use. */
2574
2575 static void
2576 alloc_use_cost_map (struct ivopts_data *data)
2577 {
2578 unsigned i, size, s, j;
2579
2580 for (i = 0; i < n_iv_uses (data); i++)
2581 {
2582 struct iv_use *use = iv_use (data, i);
2583 bitmap_iterator bi;
2584
2585 if (data->consider_all_candidates)
2586 size = n_iv_cands (data);
2587 else
2588 {
2589 s = 0;
2590 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2591 {
2592 s++;
2593 }
2594
2595 /* Round up to the power of two, so that moduling by it is fast. */
2596 for (size = 1; size < s; size <<= 1)
2597 continue;
2598 }
2599
2600 use->n_map_members = size;
2601 use->cost_map = XCNEWVEC (struct cost_pair, size);
2602 }
2603 }
2604
2605 /* Returns description of computation cost of expression whose runtime
2606 cost is RUNTIME and complexity corresponds to COMPLEXITY. */
2607
2608 static comp_cost
2609 new_cost (unsigned runtime, unsigned complexity)
2610 {
2611 comp_cost cost;
2612
2613 cost.cost = runtime;
2614 cost.complexity = complexity;
2615
2616 return cost;
2617 }
2618
2619 /* Adds costs COST1 and COST2. */
2620
2621 static comp_cost
2622 add_costs (comp_cost cost1, comp_cost cost2)
2623 {
2624 cost1.cost += cost2.cost;
2625 cost1.complexity += cost2.complexity;
2626
2627 return cost1;
2628 }
2629 /* Subtracts costs COST1 and COST2. */
2630
2631 static comp_cost
2632 sub_costs (comp_cost cost1, comp_cost cost2)
2633 {
2634 cost1.cost -= cost2.cost;
2635 cost1.complexity -= cost2.complexity;
2636
2637 return cost1;
2638 }
2639
2640 /* Returns a negative number if COST1 < COST2, a positive number if
2641 COST1 > COST2, and 0 if COST1 = COST2. */
2642
2643 static int
2644 compare_costs (comp_cost cost1, comp_cost cost2)
2645 {
2646 if (cost1.cost == cost2.cost)
2647 return cost1.complexity - cost2.complexity;
2648
2649 return cost1.cost - cost2.cost;
2650 }
2651
2652 /* Returns true if COST is infinite. */
2653
2654 static bool
2655 infinite_cost_p (comp_cost cost)
2656 {
2657 return cost.cost == INFTY;
2658 }
2659
2660 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2661 on invariants DEPENDS_ON and that the value used in expressing it
2662 is VALUE, and in case of iv elimination the comparison operator is COMP. */
2663
2664 static void
2665 set_use_iv_cost (struct ivopts_data *data,
2666 struct iv_use *use, struct iv_cand *cand,
2667 comp_cost cost, bitmap depends_on, tree value,
2668 enum tree_code comp, int inv_expr_id)
2669 {
2670 unsigned i, s;
2671
2672 if (infinite_cost_p (cost))
2673 {
2674 BITMAP_FREE (depends_on);
2675 return;
2676 }
2677
2678 if (data->consider_all_candidates)
2679 {
2680 use->cost_map[cand->id].cand = cand;
2681 use->cost_map[cand->id].cost = cost;
2682 use->cost_map[cand->id].depends_on = depends_on;
2683 use->cost_map[cand->id].value = value;
2684 use->cost_map[cand->id].comp = comp;
2685 use->cost_map[cand->id].inv_expr_id = inv_expr_id;
2686 return;
2687 }
2688
2689 /* n_map_members is a power of two, so this computes modulo. */
2690 s = cand->id & (use->n_map_members - 1);
2691 for (i = s; i < use->n_map_members; i++)
2692 if (!use->cost_map[i].cand)
2693 goto found;
2694 for (i = 0; i < s; i++)
2695 if (!use->cost_map[i].cand)
2696 goto found;
2697
2698 gcc_unreachable ();
2699
2700 found:
2701 use->cost_map[i].cand = cand;
2702 use->cost_map[i].cost = cost;
2703 use->cost_map[i].depends_on = depends_on;
2704 use->cost_map[i].value = value;
2705 use->cost_map[i].comp = comp;
2706 use->cost_map[i].inv_expr_id = inv_expr_id;
2707 }
2708
2709 /* Gets cost of (USE, CANDIDATE) pair. */
2710
2711 static struct cost_pair *
2712 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2713 struct iv_cand *cand)
2714 {
2715 unsigned i, s;
2716 struct cost_pair *ret;
2717
2718 if (!cand)
2719 return NULL;
2720
2721 if (data->consider_all_candidates)
2722 {
2723 ret = use->cost_map + cand->id;
2724 if (!ret->cand)
2725 return NULL;
2726
2727 return ret;
2728 }
2729
2730 /* n_map_members is a power of two, so this computes modulo. */
2731 s = cand->id & (use->n_map_members - 1);
2732 for (i = s; i < use->n_map_members; i++)
2733 if (use->cost_map[i].cand == cand)
2734 return use->cost_map + i;
2735
2736 for (i = 0; i < s; i++)
2737 if (use->cost_map[i].cand == cand)
2738 return use->cost_map + i;
2739
2740 return NULL;
2741 }
2742
2743 /* Returns estimate on cost of computing SEQ. */
2744
2745 static unsigned
2746 seq_cost (rtx seq, bool speed)
2747 {
2748 unsigned cost = 0;
2749 rtx set;
2750
2751 for (; seq; seq = NEXT_INSN (seq))
2752 {
2753 set = single_set (seq);
2754 if (set)
2755 cost += set_src_cost (SET_SRC (set), speed);
2756 else
2757 cost++;
2758 }
2759
2760 return cost;
2761 }
2762
2763 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2764 static rtx
2765 produce_memory_decl_rtl (tree obj, int *regno)
2766 {
2767 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
2768 enum machine_mode address_mode = targetm.addr_space.address_mode (as);
2769 rtx x;
2770
2771 gcc_assert (obj);
2772 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2773 {
2774 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2775 x = gen_rtx_SYMBOL_REF (address_mode, name);
2776 SET_SYMBOL_REF_DECL (x, obj);
2777 x = gen_rtx_MEM (DECL_MODE (obj), x);
2778 set_mem_addr_space (x, as);
2779 targetm.encode_section_info (obj, x, true);
2780 }
2781 else
2782 {
2783 x = gen_raw_REG (address_mode, (*regno)++);
2784 x = gen_rtx_MEM (DECL_MODE (obj), x);
2785 set_mem_addr_space (x, as);
2786 }
2787
2788 return x;
2789 }
2790
2791 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2792 walk_tree. DATA contains the actual fake register number. */
2793
2794 static tree
2795 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2796 {
2797 tree obj = NULL_TREE;
2798 rtx x = NULL_RTX;
2799 int *regno = (int *) data;
2800
2801 switch (TREE_CODE (*expr_p))
2802 {
2803 case ADDR_EXPR:
2804 for (expr_p = &TREE_OPERAND (*expr_p, 0);
2805 handled_component_p (*expr_p);
2806 expr_p = &TREE_OPERAND (*expr_p, 0))
2807 continue;
2808 obj = *expr_p;
2809 if (DECL_P (obj) && !DECL_RTL_SET_P (obj))
2810 x = produce_memory_decl_rtl (obj, regno);
2811 break;
2812
2813 case SSA_NAME:
2814 *ws = 0;
2815 obj = SSA_NAME_VAR (*expr_p);
2816 /* Defer handling of anonymous SSA_NAMEs to the expander. */
2817 if (!obj)
2818 return NULL_TREE;
2819 if (!DECL_RTL_SET_P (obj))
2820 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2821 break;
2822
2823 case VAR_DECL:
2824 case PARM_DECL:
2825 case RESULT_DECL:
2826 *ws = 0;
2827 obj = *expr_p;
2828
2829 if (DECL_RTL_SET_P (obj))
2830 break;
2831
2832 if (DECL_MODE (obj) == BLKmode)
2833 x = produce_memory_decl_rtl (obj, regno);
2834 else
2835 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2836
2837 break;
2838
2839 default:
2840 break;
2841 }
2842
2843 if (x)
2844 {
2845 decl_rtl_to_reset.safe_push (obj);
2846 SET_DECL_RTL (obj, x);
2847 }
2848
2849 return NULL_TREE;
2850 }
2851
2852 /* Determines cost of the computation of EXPR. */
2853
2854 static unsigned
2855 computation_cost (tree expr, bool speed)
2856 {
2857 rtx seq, rslt;
2858 tree type = TREE_TYPE (expr);
2859 unsigned cost;
2860 /* Avoid using hard regs in ways which may be unsupported. */
2861 int regno = LAST_VIRTUAL_REGISTER + 1;
2862 struct cgraph_node *node = cgraph_get_node (current_function_decl);
2863 enum node_frequency real_frequency = node->frequency;
2864
2865 node->frequency = NODE_FREQUENCY_NORMAL;
2866 crtl->maybe_hot_insn_p = speed;
2867 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2868 start_sequence ();
2869 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2870 seq = get_insns ();
2871 end_sequence ();
2872 default_rtl_profile ();
2873 node->frequency = real_frequency;
2874
2875 cost = seq_cost (seq, speed);
2876 if (MEM_P (rslt))
2877 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
2878 TYPE_ADDR_SPACE (type), speed);
2879 else if (!REG_P (rslt))
2880 cost += set_src_cost (rslt, speed);
2881
2882 return cost;
2883 }
2884
2885 /* Returns variable containing the value of candidate CAND at statement AT. */
2886
2887 static tree
2888 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple stmt)
2889 {
2890 if (stmt_after_increment (loop, cand, stmt))
2891 return cand->var_after;
2892 else
2893 return cand->var_before;
2894 }
2895
2896 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2897 same precision that is at least as wide as the precision of TYPE, stores
2898 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
2899 type of A and B. */
2900
2901 static tree
2902 determine_common_wider_type (tree *a, tree *b)
2903 {
2904 tree wider_type = NULL;
2905 tree suba, subb;
2906 tree atype = TREE_TYPE (*a);
2907
2908 if (CONVERT_EXPR_P (*a))
2909 {
2910 suba = TREE_OPERAND (*a, 0);
2911 wider_type = TREE_TYPE (suba);
2912 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
2913 return atype;
2914 }
2915 else
2916 return atype;
2917
2918 if (CONVERT_EXPR_P (*b))
2919 {
2920 subb = TREE_OPERAND (*b, 0);
2921 if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
2922 return atype;
2923 }
2924 else
2925 return atype;
2926
2927 *a = suba;
2928 *b = subb;
2929 return wider_type;
2930 }
2931
2932 /* Determines the expression by that USE is expressed from induction variable
2933 CAND at statement AT in LOOP. The expression is stored in a decomposed
2934 form into AFF. Returns false if USE cannot be expressed using CAND. */
2935
2936 static bool
2937 get_computation_aff (struct loop *loop,
2938 struct iv_use *use, struct iv_cand *cand, gimple at,
2939 struct affine_tree_combination *aff)
2940 {
2941 tree ubase = use->iv->base;
2942 tree ustep = use->iv->step;
2943 tree cbase = cand->iv->base;
2944 tree cstep = cand->iv->step, cstep_common;
2945 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2946 tree common_type, var;
2947 tree uutype;
2948 aff_tree cbase_aff, var_aff;
2949 double_int rat;
2950
2951 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2952 {
2953 /* We do not have a precision to express the values of use. */
2954 return false;
2955 }
2956
2957 var = var_at_stmt (loop, cand, at);
2958 uutype = unsigned_type_for (utype);
2959
2960 /* If the conversion is not noop, perform it. */
2961 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
2962 {
2963 cstep = fold_convert (uutype, cstep);
2964 cbase = fold_convert (uutype, cbase);
2965 var = fold_convert (uutype, var);
2966 }
2967
2968 if (!constant_multiple_of (ustep, cstep, &rat))
2969 return false;
2970
2971 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
2972 type, we achieve better folding by computing their difference in this
2973 wider type, and cast the result to UUTYPE. We do not need to worry about
2974 overflows, as all the arithmetics will in the end be performed in UUTYPE
2975 anyway. */
2976 common_type = determine_common_wider_type (&ubase, &cbase);
2977
2978 /* use = ubase - ratio * cbase + ratio * var. */
2979 tree_to_aff_combination (ubase, common_type, aff);
2980 tree_to_aff_combination (cbase, common_type, &cbase_aff);
2981 tree_to_aff_combination (var, uutype, &var_aff);
2982
2983 /* We need to shift the value if we are after the increment. */
2984 if (stmt_after_increment (loop, cand, at))
2985 {
2986 aff_tree cstep_aff;
2987
2988 if (common_type != uutype)
2989 cstep_common = fold_convert (common_type, cstep);
2990 else
2991 cstep_common = cstep;
2992
2993 tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
2994 aff_combination_add (&cbase_aff, &cstep_aff);
2995 }
2996
2997 aff_combination_scale (&cbase_aff, -rat);
2998 aff_combination_add (aff, &cbase_aff);
2999 if (common_type != uutype)
3000 aff_combination_convert (aff, uutype);
3001
3002 aff_combination_scale (&var_aff, rat);
3003 aff_combination_add (aff, &var_aff);
3004
3005 return true;
3006 }
3007
3008 /* Return the type of USE. */
3009
3010 static tree
3011 get_use_type (struct iv_use *use)
3012 {
3013 tree base_type = TREE_TYPE (use->iv->base);
3014 tree type;
3015
3016 if (use->type == USE_ADDRESS)
3017 {
3018 /* The base_type may be a void pointer. Create a pointer type based on
3019 the mem_ref instead. */
3020 type = build_pointer_type (TREE_TYPE (*use->op_p));
3021 gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
3022 == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
3023 }
3024 else
3025 type = base_type;
3026
3027 return type;
3028 }
3029
3030 /* Determines the expression by that USE is expressed from induction variable
3031 CAND at statement AT in LOOP. The computation is unshared. */
3032
3033 static tree
3034 get_computation_at (struct loop *loop,
3035 struct iv_use *use, struct iv_cand *cand, gimple at)
3036 {
3037 aff_tree aff;
3038 tree type = get_use_type (use);
3039
3040 if (!get_computation_aff (loop, use, cand, at, &aff))
3041 return NULL_TREE;
3042 unshare_aff_combination (&aff);
3043 return fold_convert (type, aff_combination_to_tree (&aff));
3044 }
3045
3046 /* Determines the expression by that USE is expressed from induction variable
3047 CAND in LOOP. The computation is unshared. */
3048
3049 static tree
3050 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3051 {
3052 return get_computation_at (loop, use, cand, use->stmt);
3053 }
3054
3055 /* Adjust the cost COST for being in loop setup rather than loop body.
3056 If we're optimizing for space, the loop setup overhead is constant;
3057 if we're optimizing for speed, amortize it over the per-iteration cost. */
3058 static unsigned
3059 adjust_setup_cost (struct ivopts_data *data, unsigned cost)
3060 {
3061 if (cost == INFTY)
3062 return cost;
3063 else if (optimize_loop_for_speed_p (data->current_loop))
3064 return cost / avg_loop_niter (data->current_loop);
3065 else
3066 return cost;
3067 }
3068
3069 /* Returns true if multiplying by RATIO is allowed in an address. Test the
3070 validity for a memory reference accessing memory of mode MODE in
3071 address space AS. */
3072
3073
3074 bool
3075 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode,
3076 addr_space_t as)
3077 {
3078 #define MAX_RATIO 128
3079 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3080 static vec<sbitmap> valid_mult_list;
3081 sbitmap valid_mult;
3082
3083 if (data_index >= valid_mult_list.length ())
3084 valid_mult_list.safe_grow_cleared (data_index + 1);
3085
3086 valid_mult = valid_mult_list[data_index];
3087 if (!valid_mult)
3088 {
3089 enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3090 rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3091 rtx addr;
3092 HOST_WIDE_INT i;
3093
3094 valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3095 bitmap_clear (valid_mult);
3096 addr = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3097 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3098 {
3099 XEXP (addr, 1) = gen_int_mode (i, address_mode);
3100 if (memory_address_addr_space_p (mode, addr, as))
3101 bitmap_set_bit (valid_mult, i + MAX_RATIO);
3102 }
3103
3104 if (dump_file && (dump_flags & TDF_DETAILS))
3105 {
3106 fprintf (dump_file, " allowed multipliers:");
3107 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3108 if (bitmap_bit_p (valid_mult, i + MAX_RATIO))
3109 fprintf (dump_file, " %d", (int) i);
3110 fprintf (dump_file, "\n");
3111 fprintf (dump_file, "\n");
3112 }
3113
3114 valid_mult_list[data_index] = valid_mult;
3115 }
3116
3117 if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3118 return false;
3119
3120 return bitmap_bit_p (valid_mult, ratio + MAX_RATIO);
3121 }
3122
3123 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3124 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3125 variable is omitted. Compute the cost for a memory reference that accesses
3126 a memory location of mode MEM_MODE in address space AS.
3127
3128 MAY_AUTOINC is set to true if the autoincrement (increasing index by
3129 size of MEM_MODE / RATIO) is available. To make this determination, we
3130 look at the size of the increment to be made, which is given in CSTEP.
3131 CSTEP may be zero if the step is unknown.
3132 STMT_AFTER_INC is true iff the statement we're looking at is after the
3133 increment of the original biv.
3134
3135 TODO -- there must be some better way. This all is quite crude. */
3136
3137 typedef struct address_cost_data_s
3138 {
3139 HOST_WIDE_INT min_offset, max_offset;
3140 unsigned costs[2][2][2][2];
3141 } *address_cost_data;
3142
3143
3144 static comp_cost
3145 get_address_cost (bool symbol_present, bool var_present,
3146 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3147 HOST_WIDE_INT cstep, enum machine_mode mem_mode,
3148 addr_space_t as, bool speed,
3149 bool stmt_after_inc, bool *may_autoinc)
3150 {
3151 enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3152 static vec<address_cost_data> address_cost_data_list;
3153 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3154 address_cost_data data;
3155 static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3156 static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3157 unsigned cost, acost, complexity;
3158 bool offset_p, ratio_p, autoinc;
3159 HOST_WIDE_INT s_offset, autoinc_offset, msize;
3160 unsigned HOST_WIDE_INT mask;
3161 unsigned bits;
3162
3163 if (data_index >= address_cost_data_list.length ())
3164 address_cost_data_list.safe_grow_cleared (data_index + 1);
3165
3166 data = address_cost_data_list[data_index];
3167 if (!data)
3168 {
3169 HOST_WIDE_INT i;
3170 HOST_WIDE_INT rat, off = 0;
3171 int old_cse_not_expected, width;
3172 unsigned sym_p, var_p, off_p, rat_p, add_c;
3173 rtx seq, addr, base;
3174 rtx reg0, reg1;
3175
3176 data = (address_cost_data) xcalloc (1, sizeof (*data));
3177
3178 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3179
3180 width = GET_MODE_BITSIZE (address_mode) - 1;
3181 if (width > (HOST_BITS_PER_WIDE_INT - 1))
3182 width = HOST_BITS_PER_WIDE_INT - 1;
3183 addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
3184
3185 for (i = width; i >= 0; i--)
3186 {
3187 off = -((unsigned HOST_WIDE_INT) 1 << i);
3188 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3189 if (memory_address_addr_space_p (mem_mode, addr, as))
3190 break;
3191 }
3192 data->min_offset = (i == -1? 0 : off);
3193
3194 for (i = width; i >= 0; i--)
3195 {
3196 off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
3197 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3198 if (memory_address_addr_space_p (mem_mode, addr, as))
3199 break;
3200 }
3201 if (i == -1)
3202 off = 0;
3203 data->max_offset = off;
3204
3205 if (dump_file && (dump_flags & TDF_DETAILS))
3206 {
3207 fprintf (dump_file, "get_address_cost:\n");
3208 fprintf (dump_file, " min offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3209 GET_MODE_NAME (mem_mode),
3210 data->min_offset);
3211 fprintf (dump_file, " max offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3212 GET_MODE_NAME (mem_mode),
3213 data->max_offset);
3214 }
3215
3216 rat = 1;
3217 for (i = 2; i <= MAX_RATIO; i++)
3218 if (multiplier_allowed_in_address_p (i, mem_mode, as))
3219 {
3220 rat = i;
3221 break;
3222 }
3223
3224 /* Compute the cost of various addressing modes. */
3225 acost = 0;
3226 reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3227 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3228
3229 if (USE_LOAD_PRE_DECREMENT (mem_mode)
3230 || USE_STORE_PRE_DECREMENT (mem_mode))
3231 {
3232 addr = gen_rtx_PRE_DEC (address_mode, reg0);
3233 has_predec[mem_mode]
3234 = memory_address_addr_space_p (mem_mode, addr, as);
3235 }
3236 if (USE_LOAD_POST_DECREMENT (mem_mode)
3237 || USE_STORE_POST_DECREMENT (mem_mode))
3238 {
3239 addr = gen_rtx_POST_DEC (address_mode, reg0);
3240 has_postdec[mem_mode]
3241 = memory_address_addr_space_p (mem_mode, addr, as);
3242 }
3243 if (USE_LOAD_PRE_INCREMENT (mem_mode)
3244 || USE_STORE_PRE_DECREMENT (mem_mode))
3245 {
3246 addr = gen_rtx_PRE_INC (address_mode, reg0);
3247 has_preinc[mem_mode]
3248 = memory_address_addr_space_p (mem_mode, addr, as);
3249 }
3250 if (USE_LOAD_POST_INCREMENT (mem_mode)
3251 || USE_STORE_POST_INCREMENT (mem_mode))
3252 {
3253 addr = gen_rtx_POST_INC (address_mode, reg0);
3254 has_postinc[mem_mode]
3255 = memory_address_addr_space_p (mem_mode, addr, as);
3256 }
3257 for (i = 0; i < 16; i++)
3258 {
3259 sym_p = i & 1;
3260 var_p = (i >> 1) & 1;
3261 off_p = (i >> 2) & 1;
3262 rat_p = (i >> 3) & 1;
3263
3264 addr = reg0;
3265 if (rat_p)
3266 addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
3267 gen_int_mode (rat, address_mode));
3268
3269 if (var_p)
3270 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
3271
3272 if (sym_p)
3273 {
3274 base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
3275 /* ??? We can run into trouble with some backends by presenting
3276 it with symbols which haven't been properly passed through
3277 targetm.encode_section_info. By setting the local bit, we
3278 enhance the probability of things working. */
3279 SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3280
3281 if (off_p)
3282 base = gen_rtx_fmt_e (CONST, address_mode,
3283 gen_rtx_fmt_ee
3284 (PLUS, address_mode, base,
3285 gen_int_mode (off, address_mode)));
3286 }
3287 else if (off_p)
3288 base = gen_int_mode (off, address_mode);
3289 else
3290 base = NULL_RTX;
3291
3292 if (base)
3293 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
3294
3295 start_sequence ();
3296 /* To avoid splitting addressing modes, pretend that no cse will
3297 follow. */
3298 old_cse_not_expected = cse_not_expected;
3299 cse_not_expected = true;
3300 addr = memory_address_addr_space (mem_mode, addr, as);
3301 cse_not_expected = old_cse_not_expected;
3302 seq = get_insns ();
3303 end_sequence ();
3304
3305 acost = seq_cost (seq, speed);
3306 acost += address_cost (addr, mem_mode, as, speed);
3307
3308 if (!acost)
3309 acost = 1;
3310 data->costs[sym_p][var_p][off_p][rat_p] = acost;
3311 }
3312
3313 /* On some targets, it is quite expensive to load symbol to a register,
3314 which makes addresses that contain symbols look much more expensive.
3315 However, the symbol will have to be loaded in any case before the
3316 loop (and quite likely we have it in register already), so it does not
3317 make much sense to penalize them too heavily. So make some final
3318 tweaks for the SYMBOL_PRESENT modes:
3319
3320 If VAR_PRESENT is false, and the mode obtained by changing symbol to
3321 var is cheaper, use this mode with small penalty.
3322 If VAR_PRESENT is true, try whether the mode with
3323 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3324 if this is the case, use it. */
3325 add_c = add_cost (speed, address_mode);
3326 for (i = 0; i < 8; i++)
3327 {
3328 var_p = i & 1;
3329 off_p = (i >> 1) & 1;
3330 rat_p = (i >> 2) & 1;
3331
3332 acost = data->costs[0][1][off_p][rat_p] + 1;
3333 if (var_p)
3334 acost += add_c;
3335
3336 if (acost < data->costs[1][var_p][off_p][rat_p])
3337 data->costs[1][var_p][off_p][rat_p] = acost;
3338 }
3339
3340 if (dump_file && (dump_flags & TDF_DETAILS))
3341 {
3342 fprintf (dump_file, "Address costs:\n");
3343
3344 for (i = 0; i < 16; i++)
3345 {
3346 sym_p = i & 1;
3347 var_p = (i >> 1) & 1;
3348 off_p = (i >> 2) & 1;
3349 rat_p = (i >> 3) & 1;
3350
3351 fprintf (dump_file, " ");
3352 if (sym_p)
3353 fprintf (dump_file, "sym + ");
3354 if (var_p)
3355 fprintf (dump_file, "var + ");
3356 if (off_p)
3357 fprintf (dump_file, "cst + ");
3358 if (rat_p)
3359 fprintf (dump_file, "rat * ");
3360
3361 acost = data->costs[sym_p][var_p][off_p][rat_p];
3362 fprintf (dump_file, "index costs %d\n", acost);
3363 }
3364 if (has_predec[mem_mode] || has_postdec[mem_mode]
3365 || has_preinc[mem_mode] || has_postinc[mem_mode])
3366 fprintf (dump_file, " May include autoinc/dec\n");
3367 fprintf (dump_file, "\n");
3368 }
3369
3370 address_cost_data_list[data_index] = data;
3371 }
3372
3373 bits = GET_MODE_BITSIZE (address_mode);
3374 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3375 offset &= mask;
3376 if ((offset >> (bits - 1) & 1))
3377 offset |= ~mask;
3378 s_offset = offset;
3379
3380 autoinc = false;
3381 msize = GET_MODE_SIZE (mem_mode);
3382 autoinc_offset = offset;
3383 if (stmt_after_inc)
3384 autoinc_offset += ratio * cstep;
3385 if (symbol_present || var_present || ratio != 1)
3386 autoinc = false;
3387 else if ((has_postinc[mem_mode] && autoinc_offset == 0
3388 && msize == cstep)
3389 || (has_postdec[mem_mode] && autoinc_offset == 0
3390 && msize == -cstep)
3391 || (has_preinc[mem_mode] && autoinc_offset == msize
3392 && msize == cstep)
3393 || (has_predec[mem_mode] && autoinc_offset == -msize
3394 && msize == -cstep))
3395 autoinc = true;
3396
3397 cost = 0;
3398 offset_p = (s_offset != 0
3399 && data->min_offset <= s_offset
3400 && s_offset <= data->max_offset);
3401 ratio_p = (ratio != 1
3402 && multiplier_allowed_in_address_p (ratio, mem_mode, as));
3403
3404 if (ratio != 1 && !ratio_p)
3405 cost += mult_by_coeff_cost (ratio, address_mode, speed);
3406
3407 if (s_offset && !offset_p && !symbol_present)
3408 cost += add_cost (speed, address_mode);
3409
3410 if (may_autoinc)
3411 *may_autoinc = autoinc;
3412 acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
3413 complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3414 return new_cost (cost + acost, complexity);
3415 }
3416
3417 /* Calculate the SPEED or size cost of shiftadd EXPR in MODE. MULT is the
3418 the EXPR operand holding the shift. COST0 and COST1 are the costs for
3419 calculating the operands of EXPR. Returns true if successful, and returns
3420 the cost in COST. */
3421
3422 static bool
3423 get_shiftadd_cost (tree expr, enum machine_mode mode, comp_cost cost0,
3424 comp_cost cost1, tree mult, bool speed, comp_cost *cost)
3425 {
3426 comp_cost res;
3427 tree op1 = TREE_OPERAND (expr, 1);
3428 tree cst = TREE_OPERAND (mult, 1);
3429 tree multop = TREE_OPERAND (mult, 0);
3430 int m = exact_log2 (int_cst_value (cst));
3431 int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
3432 int sa_cost;
3433
3434 if (!(m >= 0 && m < maxm))
3435 return false;
3436
3437 sa_cost = (TREE_CODE (expr) != MINUS_EXPR
3438 ? shiftadd_cost (speed, mode, m)
3439 : (mult == op1
3440 ? shiftsub1_cost (speed, mode, m)
3441 : shiftsub0_cost (speed, mode, m)));
3442 res = new_cost (sa_cost, 0);
3443 res = add_costs (res, mult == op1 ? cost0 : cost1);
3444
3445 STRIP_NOPS (multop);
3446 if (!is_gimple_val (multop))
3447 res = add_costs (res, force_expr_to_var_cost (multop, speed));
3448
3449 *cost = res;
3450 return true;
3451 }
3452
3453 /* Estimates cost of forcing expression EXPR into a variable. */
3454
3455 static comp_cost
3456 force_expr_to_var_cost (tree expr, bool speed)
3457 {
3458 static bool costs_initialized = false;
3459 static unsigned integer_cost [2];
3460 static unsigned symbol_cost [2];
3461 static unsigned address_cost [2];
3462 tree op0, op1;
3463 comp_cost cost0, cost1, cost;
3464 enum machine_mode mode;
3465
3466 if (!costs_initialized)
3467 {
3468 tree type = build_pointer_type (integer_type_node);
3469 tree var, addr;
3470 rtx x;
3471 int i;
3472
3473 var = create_tmp_var_raw (integer_type_node, "test_var");
3474 TREE_STATIC (var) = 1;
3475 x = produce_memory_decl_rtl (var, NULL);
3476 SET_DECL_RTL (var, x);
3477
3478 addr = build1 (ADDR_EXPR, type, var);
3479
3480
3481 for (i = 0; i < 2; i++)
3482 {
3483 integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
3484 2000), i);
3485
3486 symbol_cost[i] = computation_cost (addr, i) + 1;
3487
3488 address_cost[i]
3489 = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
3490 if (dump_file && (dump_flags & TDF_DETAILS))
3491 {
3492 fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
3493 fprintf (dump_file, " integer %d\n", (int) integer_cost[i]);
3494 fprintf (dump_file, " symbol %d\n", (int) symbol_cost[i]);
3495 fprintf (dump_file, " address %d\n", (int) address_cost[i]);
3496 fprintf (dump_file, " other %d\n", (int) target_spill_cost[i]);
3497 fprintf (dump_file, "\n");
3498 }
3499 }
3500
3501 costs_initialized = true;
3502 }
3503
3504 STRIP_NOPS (expr);
3505
3506 if (SSA_VAR_P (expr))
3507 return no_cost;
3508
3509 if (is_gimple_min_invariant (expr))
3510 {
3511 if (TREE_CODE (expr) == INTEGER_CST)
3512 return new_cost (integer_cost [speed], 0);
3513
3514 if (TREE_CODE (expr) == ADDR_EXPR)
3515 {
3516 tree obj = TREE_OPERAND (expr, 0);
3517
3518 if (TREE_CODE (obj) == VAR_DECL
3519 || TREE_CODE (obj) == PARM_DECL
3520 || TREE_CODE (obj) == RESULT_DECL)
3521 return new_cost (symbol_cost [speed], 0);
3522 }
3523
3524 return new_cost (address_cost [speed], 0);
3525 }
3526
3527 switch (TREE_CODE (expr))
3528 {
3529 case POINTER_PLUS_EXPR:
3530 case PLUS_EXPR:
3531 case MINUS_EXPR:
3532 case MULT_EXPR:
3533 op0 = TREE_OPERAND (expr, 0);
3534 op1 = TREE_OPERAND (expr, 1);
3535 STRIP_NOPS (op0);
3536 STRIP_NOPS (op1);
3537
3538 if (is_gimple_val (op0))
3539 cost0 = no_cost;
3540 else
3541 cost0 = force_expr_to_var_cost (op0, speed);
3542
3543 if (is_gimple_val (op1))
3544 cost1 = no_cost;
3545 else
3546 cost1 = force_expr_to_var_cost (op1, speed);
3547
3548 break;
3549
3550 case NEGATE_EXPR:
3551 op0 = TREE_OPERAND (expr, 0);
3552 STRIP_NOPS (op0);
3553 op1 = NULL_TREE;
3554
3555 if (is_gimple_val (op0))
3556 cost0 = no_cost;
3557 else
3558 cost0 = force_expr_to_var_cost (op0, speed);
3559
3560 cost1 = no_cost;
3561 break;
3562
3563 default:
3564 /* Just an arbitrary value, FIXME. */
3565 return new_cost (target_spill_cost[speed], 0);
3566 }
3567
3568 mode = TYPE_MODE (TREE_TYPE (expr));
3569 switch (TREE_CODE (expr))
3570 {
3571 case POINTER_PLUS_EXPR:
3572 case PLUS_EXPR:
3573 case MINUS_EXPR:
3574 case NEGATE_EXPR:
3575 cost = new_cost (add_cost (speed, mode), 0);
3576 if (TREE_CODE (expr) != NEGATE_EXPR)
3577 {
3578 tree mult = NULL_TREE;
3579 comp_cost sa_cost;
3580 if (TREE_CODE (op1) == MULT_EXPR)
3581 mult = op1;
3582 else if (TREE_CODE (op0) == MULT_EXPR)
3583 mult = op0;
3584
3585 if (mult != NULL_TREE
3586 && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
3587 && get_shiftadd_cost (expr, mode, cost0, cost1, mult,
3588 speed, &sa_cost))
3589 return sa_cost;
3590 }
3591 break;
3592
3593 case MULT_EXPR:
3594 if (cst_and_fits_in_hwi (op0))
3595 cost = new_cost (mult_by_coeff_cost (int_cst_value (op0),
3596 mode, speed), 0);
3597 else if (cst_and_fits_in_hwi (op1))
3598 cost = new_cost (mult_by_coeff_cost (int_cst_value (op1),
3599 mode, speed), 0);
3600 else
3601 return new_cost (target_spill_cost [speed], 0);
3602 break;
3603
3604 default:
3605 gcc_unreachable ();
3606 }
3607
3608 cost = add_costs (cost, cost0);
3609 cost = add_costs (cost, cost1);
3610
3611 /* Bound the cost by target_spill_cost. The parts of complicated
3612 computations often are either loop invariant or at least can
3613 be shared between several iv uses, so letting this grow without
3614 limits would not give reasonable results. */
3615 if (cost.cost > (int) target_spill_cost [speed])
3616 cost.cost = target_spill_cost [speed];
3617
3618 return cost;
3619 }
3620
3621 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
3622 invariants the computation depends on. */
3623
3624 static comp_cost
3625 force_var_cost (struct ivopts_data *data,
3626 tree expr, bitmap *depends_on)
3627 {
3628 if (depends_on)
3629 {
3630 fd_ivopts_data = data;
3631 walk_tree (&expr, find_depends, depends_on, NULL);
3632 }
3633
3634 return force_expr_to_var_cost (expr, data->speed);
3635 }
3636
3637 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
3638 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3639 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3640 invariants the computation depends on. */
3641
3642 static comp_cost
3643 split_address_cost (struct ivopts_data *data,
3644 tree addr, bool *symbol_present, bool *var_present,
3645 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3646 {
3647 tree core;
3648 HOST_WIDE_INT bitsize;
3649 HOST_WIDE_INT bitpos;
3650 tree toffset;
3651 enum machine_mode mode;
3652 int unsignedp, volatilep;
3653
3654 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3655 &unsignedp, &volatilep, false);
3656
3657 if (toffset != 0
3658 || bitpos % BITS_PER_UNIT != 0
3659 || TREE_CODE (core) != VAR_DECL)
3660 {
3661 *symbol_present = false;
3662 *var_present = true;
3663 fd_ivopts_data = data;
3664 walk_tree (&addr, find_depends, depends_on, NULL);
3665 return new_cost (target_spill_cost[data->speed], 0);
3666 }
3667
3668 *offset += bitpos / BITS_PER_UNIT;
3669 if (TREE_STATIC (core)
3670 || DECL_EXTERNAL (core))
3671 {
3672 *symbol_present = true;
3673 *var_present = false;
3674 return no_cost;
3675 }
3676
3677 *symbol_present = false;
3678 *var_present = true;
3679 return no_cost;
3680 }
3681
3682 /* Estimates cost of expressing difference of addresses E1 - E2 as
3683 var + symbol + offset. The value of offset is added to OFFSET,
3684 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3685 part is missing. DEPENDS_ON is a set of the invariants the computation
3686 depends on. */
3687
3688 static comp_cost
3689 ptr_difference_cost (struct ivopts_data *data,
3690 tree e1, tree e2, bool *symbol_present, bool *var_present,
3691 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3692 {
3693 HOST_WIDE_INT diff = 0;
3694 aff_tree aff_e1, aff_e2;
3695 tree type;
3696
3697 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3698
3699 if (ptr_difference_const (e1, e2, &diff))
3700 {
3701 *offset += diff;
3702 *symbol_present = false;
3703 *var_present = false;
3704 return no_cost;
3705 }
3706
3707 if (integer_zerop (e2))
3708 return split_address_cost (data, TREE_OPERAND (e1, 0),
3709 symbol_present, var_present, offset, depends_on);
3710
3711 *symbol_present = false;
3712 *var_present = true;
3713
3714 type = signed_type_for (TREE_TYPE (e1));
3715 tree_to_aff_combination (e1, type, &aff_e1);
3716 tree_to_aff_combination (e2, type, &aff_e2);
3717 aff_combination_scale (&aff_e2, double_int_minus_one);
3718 aff_combination_add (&aff_e1, &aff_e2);
3719
3720 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3721 }
3722
3723 /* Estimates cost of expressing difference E1 - E2 as
3724 var + symbol + offset. The value of offset is added to OFFSET,
3725 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3726 part is missing. DEPENDS_ON is a set of the invariants the computation
3727 depends on. */
3728
3729 static comp_cost
3730 difference_cost (struct ivopts_data *data,
3731 tree e1, tree e2, bool *symbol_present, bool *var_present,
3732 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3733 {
3734 enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3735 unsigned HOST_WIDE_INT off1, off2;
3736 aff_tree aff_e1, aff_e2;
3737 tree type;
3738
3739 e1 = strip_offset (e1, &off1);
3740 e2 = strip_offset (e2, &off2);
3741 *offset += off1 - off2;
3742
3743 STRIP_NOPS (e1);
3744 STRIP_NOPS (e2);
3745
3746 if (TREE_CODE (e1) == ADDR_EXPR)
3747 return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
3748 offset, depends_on);
3749 *symbol_present = false;
3750
3751 if (operand_equal_p (e1, e2, 0))
3752 {
3753 *var_present = false;
3754 return no_cost;
3755 }
3756
3757 *var_present = true;
3758
3759 if (integer_zerop (e2))
3760 return force_var_cost (data, e1, depends_on);
3761
3762 if (integer_zerop (e1))
3763 {
3764 comp_cost cost = force_var_cost (data, e2, depends_on);
3765 cost.cost += mult_by_coeff_cost (-1, mode, data->speed);
3766 return cost;
3767 }
3768
3769 type = signed_type_for (TREE_TYPE (e1));
3770 tree_to_aff_combination (e1, type, &aff_e1);
3771 tree_to_aff_combination (e2, type, &aff_e2);
3772 aff_combination_scale (&aff_e2, double_int_minus_one);
3773 aff_combination_add (&aff_e1, &aff_e2);
3774
3775 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3776 }
3777
3778 /* Returns true if AFF1 and AFF2 are identical. */
3779
3780 static bool
3781 compare_aff_trees (aff_tree *aff1, aff_tree *aff2)
3782 {
3783 unsigned i;
3784
3785 if (aff1->n != aff2->n)
3786 return false;
3787
3788 for (i = 0; i < aff1->n; i++)
3789 {
3790 if (aff1->elts[i].coef != aff2->elts[i].coef)
3791 return false;
3792
3793 if (!operand_equal_p (aff1->elts[i].val, aff2->elts[i].val, 0))
3794 return false;
3795 }
3796 return true;
3797 }
3798
3799 /* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id. */
3800
3801 static int
3802 get_expr_id (struct ivopts_data *data, tree expr)
3803 {
3804 struct iv_inv_expr_ent ent;
3805 struct iv_inv_expr_ent **slot;
3806
3807 ent.expr = expr;
3808 ent.hash = iterative_hash_expr (expr, 0);
3809 slot = (struct iv_inv_expr_ent **) htab_find_slot (data->inv_expr_tab,
3810 &ent, INSERT);
3811 if (*slot)
3812 return (*slot)->id;
3813
3814 *slot = XNEW (struct iv_inv_expr_ent);
3815 (*slot)->expr = expr;
3816 (*slot)->hash = ent.hash;
3817 (*slot)->id = data->inv_expr_id++;
3818 return (*slot)->id;
3819 }
3820
3821 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
3822 requires a new compiler generated temporary. Returns -1 otherwise.
3823 ADDRESS_P is a flag indicating if the expression is for address
3824 computation. */
3825
3826 static int
3827 get_loop_invariant_expr_id (struct ivopts_data *data, tree ubase,
3828 tree cbase, HOST_WIDE_INT ratio,
3829 bool address_p)
3830 {
3831 aff_tree ubase_aff, cbase_aff;
3832 tree expr, ub, cb;
3833
3834 STRIP_NOPS (ubase);
3835 STRIP_NOPS (cbase);
3836 ub = ubase;
3837 cb = cbase;
3838
3839 if ((TREE_CODE (ubase) == INTEGER_CST)
3840 && (TREE_CODE (cbase) == INTEGER_CST))
3841 return -1;
3842
3843 /* Strips the constant part. */
3844 if (TREE_CODE (ubase) == PLUS_EXPR
3845 || TREE_CODE (ubase) == MINUS_EXPR
3846 || TREE_CODE (ubase) == POINTER_PLUS_EXPR)
3847 {
3848 if (TREE_CODE (TREE_OPERAND (ubase, 1)) == INTEGER_CST)
3849 ubase = TREE_OPERAND (ubase, 0);
3850 }
3851
3852 /* Strips the constant part. */
3853 if (TREE_CODE (cbase) == PLUS_EXPR
3854 || TREE_CODE (cbase) == MINUS_EXPR
3855 || TREE_CODE (cbase) == POINTER_PLUS_EXPR)
3856 {
3857 if (TREE_CODE (TREE_OPERAND (cbase, 1)) == INTEGER_CST)
3858 cbase = TREE_OPERAND (cbase, 0);
3859 }
3860
3861 if (address_p)
3862 {
3863 if (((TREE_CODE (ubase) == SSA_NAME)
3864 || (TREE_CODE (ubase) == ADDR_EXPR
3865 && is_gimple_min_invariant (ubase)))
3866 && (TREE_CODE (cbase) == INTEGER_CST))
3867 return -1;
3868
3869 if (((TREE_CODE (cbase) == SSA_NAME)
3870 || (TREE_CODE (cbase) == ADDR_EXPR
3871 && is_gimple_min_invariant (cbase)))
3872 && (TREE_CODE (ubase) == INTEGER_CST))
3873 return -1;
3874 }
3875
3876 if (ratio == 1)
3877 {
3878 if(operand_equal_p (ubase, cbase, 0))
3879 return -1;
3880
3881 if (TREE_CODE (ubase) == ADDR_EXPR
3882 && TREE_CODE (cbase) == ADDR_EXPR)
3883 {
3884 tree usym, csym;
3885
3886 usym = TREE_OPERAND (ubase, 0);
3887 csym = TREE_OPERAND (cbase, 0);
3888 if (TREE_CODE (usym) == ARRAY_REF)
3889 {
3890 tree ind = TREE_OPERAND (usym, 1);
3891 if (TREE_CODE (ind) == INTEGER_CST
3892 && host_integerp (ind, 0)
3893 && TREE_INT_CST_LOW (ind) == 0)
3894 usym = TREE_OPERAND (usym, 0);
3895 }
3896 if (TREE_CODE (csym) == ARRAY_REF)
3897 {
3898 tree ind = TREE_OPERAND (csym, 1);
3899 if (TREE_CODE (ind) == INTEGER_CST
3900 && host_integerp (ind, 0)
3901 && TREE_INT_CST_LOW (ind) == 0)
3902 csym = TREE_OPERAND (csym, 0);
3903 }
3904 if (operand_equal_p (usym, csym, 0))
3905 return -1;
3906 }
3907 /* Now do more complex comparison */
3908 tree_to_aff_combination (ubase, TREE_TYPE (ubase), &ubase_aff);
3909 tree_to_aff_combination (cbase, TREE_TYPE (cbase), &cbase_aff);
3910 if (compare_aff_trees (&ubase_aff, &cbase_aff))
3911 return -1;
3912 }
3913
3914 tree_to_aff_combination (ub, TREE_TYPE (ub), &ubase_aff);
3915 tree_to_aff_combination (cb, TREE_TYPE (cb), &cbase_aff);
3916
3917 aff_combination_scale (&cbase_aff, double_int::from_shwi (-1 * ratio));
3918 aff_combination_add (&ubase_aff, &cbase_aff);
3919 expr = aff_combination_to_tree (&ubase_aff);
3920 return get_expr_id (data, expr);
3921 }
3922
3923
3924
3925 /* Determines the cost of the computation by that USE is expressed
3926 from induction variable CAND. If ADDRESS_P is true, we just need
3927 to create an address from it, otherwise we want to get it into
3928 register. A set of invariants we depend on is stored in
3929 DEPENDS_ON. AT is the statement at that the value is computed.
3930 If CAN_AUTOINC is nonnull, use it to record whether autoinc
3931 addressing is likely. */
3932
3933 static comp_cost
3934 get_computation_cost_at (struct ivopts_data *data,
3935 struct iv_use *use, struct iv_cand *cand,
3936 bool address_p, bitmap *depends_on, gimple at,
3937 bool *can_autoinc,
3938 int *inv_expr_id)
3939 {
3940 tree ubase = use->iv->base, ustep = use->iv->step;
3941 tree cbase, cstep;
3942 tree utype = TREE_TYPE (ubase), ctype;
3943 unsigned HOST_WIDE_INT cstepi, offset = 0;
3944 HOST_WIDE_INT ratio, aratio;
3945 bool var_present, symbol_present, stmt_is_after_inc;
3946 comp_cost cost;
3947 double_int rat;
3948 bool speed = optimize_bb_for_speed_p (gimple_bb (at));
3949 enum machine_mode mem_mode = (address_p
3950 ? TYPE_MODE (TREE_TYPE (*use->op_p))
3951 : VOIDmode);
3952
3953 *depends_on = NULL;
3954
3955 /* Only consider real candidates. */
3956 if (!cand->iv)
3957 return infinite_cost;
3958
3959 cbase = cand->iv->base;
3960 cstep = cand->iv->step;
3961 ctype = TREE_TYPE (cbase);
3962
3963 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3964 {
3965 /* We do not have a precision to express the values of use. */
3966 return infinite_cost;
3967 }
3968
3969 if (address_p
3970 || (use->iv->base_object
3971 && cand->iv->base_object
3972 && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
3973 && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
3974 {
3975 /* Do not try to express address of an object with computation based
3976 on address of a different object. This may cause problems in rtl
3977 level alias analysis (that does not expect this to be happening,
3978 as this is illegal in C), and would be unlikely to be useful
3979 anyway. */
3980 if (use->iv->base_object
3981 && cand->iv->base_object
3982 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3983 return infinite_cost;
3984 }
3985
3986 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3987 {
3988 /* TODO -- add direct handling of this case. */
3989 goto fallback;
3990 }
3991
3992 /* CSTEPI is removed from the offset in case statement is after the
3993 increment. If the step is not constant, we use zero instead.
3994 This is a bit imprecise (there is the extra addition), but
3995 redundancy elimination is likely to transform the code so that
3996 it uses value of the variable before increment anyway,
3997 so it is not that much unrealistic. */
3998 if (cst_and_fits_in_hwi (cstep))
3999 cstepi = int_cst_value (cstep);
4000 else
4001 cstepi = 0;
4002
4003 if (!constant_multiple_of (ustep, cstep, &rat))
4004 return infinite_cost;
4005
4006 if (rat.fits_shwi ())
4007 ratio = rat.to_shwi ();
4008 else
4009 return infinite_cost;
4010
4011 STRIP_NOPS (cbase);
4012 ctype = TREE_TYPE (cbase);
4013
4014 stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
4015
4016 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
4017 or ratio == 1, it is better to handle this like
4018
4019 ubase - ratio * cbase + ratio * var
4020
4021 (also holds in the case ratio == -1, TODO. */
4022
4023 if (cst_and_fits_in_hwi (cbase))
4024 {
4025 offset = - ratio * int_cst_value (cbase);
4026 cost = difference_cost (data,
4027 ubase, build_int_cst (utype, 0),
4028 &symbol_present, &var_present, &offset,
4029 depends_on);
4030 cost.cost /= avg_loop_niter (data->current_loop);
4031 }
4032 else if (ratio == 1)
4033 {
4034 tree real_cbase = cbase;
4035
4036 /* Check to see if any adjustment is needed. */
4037 if (cstepi == 0 && stmt_is_after_inc)
4038 {
4039 aff_tree real_cbase_aff;
4040 aff_tree cstep_aff;
4041
4042 tree_to_aff_combination (cbase, TREE_TYPE (real_cbase),
4043 &real_cbase_aff);
4044 tree_to_aff_combination (cstep, TREE_TYPE (cstep), &cstep_aff);
4045
4046 aff_combination_add (&real_cbase_aff, &cstep_aff);
4047 real_cbase = aff_combination_to_tree (&real_cbase_aff);
4048 }
4049
4050 cost = difference_cost (data,
4051 ubase, real_cbase,
4052 &symbol_present, &var_present, &offset,
4053 depends_on);
4054 cost.cost /= avg_loop_niter (data->current_loop);
4055 }
4056 else if (address_p
4057 && !POINTER_TYPE_P (ctype)
4058 && multiplier_allowed_in_address_p
4059 (ratio, mem_mode,
4060 TYPE_ADDR_SPACE (TREE_TYPE (utype))))
4061 {
4062 cbase
4063 = fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio));
4064 cost = difference_cost (data,
4065 ubase, cbase,
4066 &symbol_present, &var_present, &offset,
4067 depends_on);
4068 cost.cost /= avg_loop_niter (data->current_loop);
4069 }
4070 else
4071 {
4072 cost = force_var_cost (data, cbase, depends_on);
4073 cost = add_costs (cost,
4074 difference_cost (data,
4075 ubase, build_int_cst (utype, 0),
4076 &symbol_present, &var_present,
4077 &offset, depends_on));
4078 cost.cost /= avg_loop_niter (data->current_loop);
4079 cost.cost += add_cost (data->speed, TYPE_MODE (ctype));
4080 }
4081
4082 if (inv_expr_id)
4083 {
4084 *inv_expr_id =
4085 get_loop_invariant_expr_id (data, ubase, cbase, ratio, address_p);
4086 /* Clear depends on. */
4087 if (*inv_expr_id != -1 && depends_on && *depends_on)
4088 bitmap_clear (*depends_on);
4089 }
4090
4091 /* If we are after the increment, the value of the candidate is higher by
4092 one iteration. */
4093 if (stmt_is_after_inc)
4094 offset -= ratio * cstepi;
4095
4096 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4097 (symbol/var1/const parts may be omitted). If we are looking for an
4098 address, find the cost of addressing this. */
4099 if (address_p)
4100 return add_costs (cost,
4101 get_address_cost (symbol_present, var_present,
4102 offset, ratio, cstepi,
4103 mem_mode,
4104 TYPE_ADDR_SPACE (TREE_TYPE (utype)),
4105 speed, stmt_is_after_inc,
4106 can_autoinc));
4107
4108 /* Otherwise estimate the costs for computing the expression. */
4109 if (!symbol_present && !var_present && !offset)
4110 {
4111 if (ratio != 1)
4112 cost.cost += mult_by_coeff_cost (ratio, TYPE_MODE (ctype), speed);
4113 return cost;
4114 }
4115
4116 /* Symbol + offset should be compile-time computable so consider that they
4117 are added once to the variable, if present. */
4118 if (var_present && (symbol_present || offset))
4119 cost.cost += adjust_setup_cost (data,
4120 add_cost (speed, TYPE_MODE (ctype)));
4121
4122 /* Having offset does not affect runtime cost in case it is added to
4123 symbol, but it increases complexity. */
4124 if (offset)
4125 cost.complexity++;
4126
4127 cost.cost += add_cost (speed, TYPE_MODE (ctype));
4128
4129 aratio = ratio > 0 ? ratio : -ratio;
4130 if (aratio != 1)
4131 cost.cost += mult_by_coeff_cost (aratio, TYPE_MODE (ctype), speed);
4132 return cost;
4133
4134 fallback:
4135 if (can_autoinc)
4136 *can_autoinc = false;
4137
4138 {
4139 /* Just get the expression, expand it and measure the cost. */
4140 tree comp = get_computation_at (data->current_loop, use, cand, at);
4141
4142 if (!comp)
4143 return infinite_cost;
4144
4145 if (address_p)
4146 comp = build_simple_mem_ref (comp);
4147
4148 return new_cost (computation_cost (comp, speed), 0);
4149 }
4150 }
4151
4152 /* Determines the cost of the computation by that USE is expressed
4153 from induction variable CAND. If ADDRESS_P is true, we just need
4154 to create an address from it, otherwise we want to get it into
4155 register. A set of invariants we depend on is stored in
4156 DEPENDS_ON. If CAN_AUTOINC is nonnull, use it to record whether
4157 autoinc addressing is likely. */
4158
4159 static comp_cost
4160 get_computation_cost (struct ivopts_data *data,
4161 struct iv_use *use, struct iv_cand *cand,
4162 bool address_p, bitmap *depends_on,
4163 bool *can_autoinc, int *inv_expr_id)
4164 {
4165 return get_computation_cost_at (data,
4166 use, cand, address_p, depends_on, use->stmt,
4167 can_autoinc, inv_expr_id);
4168 }
4169
4170 /* Determines cost of basing replacement of USE on CAND in a generic
4171 expression. */
4172
4173 static bool
4174 determine_use_iv_cost_generic (struct ivopts_data *data,
4175 struct iv_use *use, struct iv_cand *cand)
4176 {
4177 bitmap depends_on;
4178 comp_cost cost;
4179 int inv_expr_id = -1;
4180
4181 /* The simple case first -- if we need to express value of the preserved
4182 original biv, the cost is 0. This also prevents us from counting the
4183 cost of increment twice -- once at this use and once in the cost of
4184 the candidate. */
4185 if (cand->pos == IP_ORIGINAL
4186 && cand->incremented_at == use->stmt)
4187 {
4188 set_use_iv_cost (data, use, cand, no_cost, NULL, NULL_TREE,
4189 ERROR_MARK, -1);
4190 return true;
4191 }
4192
4193 cost = get_computation_cost (data, use, cand, false, &depends_on,
4194 NULL, &inv_expr_id);
4195
4196 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4197 inv_expr_id);
4198
4199 return !infinite_cost_p (cost);
4200 }
4201
4202 /* Determines cost of basing replacement of USE on CAND in an address. */
4203
4204 static bool
4205 determine_use_iv_cost_address (struct ivopts_data *data,
4206 struct iv_use *use, struct iv_cand *cand)
4207 {
4208 bitmap depends_on;
4209 bool can_autoinc;
4210 int inv_expr_id = -1;
4211 comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
4212 &can_autoinc, &inv_expr_id);
4213
4214 if (cand->ainc_use == use)
4215 {
4216 if (can_autoinc)
4217 cost.cost -= cand->cost_step;
4218 /* If we generated the candidate solely for exploiting autoincrement
4219 opportunities, and it turns out it can't be used, set the cost to
4220 infinity to make sure we ignore it. */
4221 else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
4222 cost = infinite_cost;
4223 }
4224 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4225 inv_expr_id);
4226
4227 return !infinite_cost_p (cost);
4228 }
4229
4230 /* Computes value of candidate CAND at position AT in iteration NITER, and
4231 stores it to VAL. */
4232
4233 static void
4234 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple at, tree niter,
4235 aff_tree *val)
4236 {
4237 aff_tree step, delta, nit;
4238 struct iv *iv = cand->iv;
4239 tree type = TREE_TYPE (iv->base);
4240 tree steptype = type;
4241 if (POINTER_TYPE_P (type))
4242 steptype = sizetype;
4243
4244 tree_to_aff_combination (iv->step, steptype, &step);
4245 tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
4246 aff_combination_convert (&nit, steptype);
4247 aff_combination_mult (&nit, &step, &delta);
4248 if (stmt_after_increment (loop, cand, at))
4249 aff_combination_add (&delta, &step);
4250
4251 tree_to_aff_combination (iv->base, type, val);
4252 aff_combination_add (val, &delta);
4253 }
4254
4255 /* Returns period of induction variable iv. */
4256
4257 static tree
4258 iv_period (struct iv *iv)
4259 {
4260 tree step = iv->step, period, type;
4261 tree pow2div;
4262
4263 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4264
4265 type = unsigned_type_for (TREE_TYPE (step));
4266 /* Period of the iv is lcm (step, type_range)/step -1,
4267 i.e., N*type_range/step - 1. Since type range is power
4268 of two, N == (step >> num_of_ending_zeros_binary (step),
4269 so the final result is
4270
4271 (type_range >> num_of_ending_zeros_binary (step)) - 1
4272
4273 */
4274 pow2div = num_ending_zeros (step);
4275
4276 period = build_low_bits_mask (type,
4277 (TYPE_PRECISION (type)
4278 - tree_low_cst (pow2div, 1)));
4279
4280 return period;
4281 }
4282
4283 /* Returns the comparison operator used when eliminating the iv USE. */
4284
4285 static enum tree_code
4286 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4287 {
4288 struct loop *loop = data->current_loop;
4289 basic_block ex_bb;
4290 edge exit;
4291
4292 ex_bb = gimple_bb (use->stmt);
4293 exit = EDGE_SUCC (ex_bb, 0);
4294 if (flow_bb_inside_loop_p (loop, exit->dest))
4295 exit = EDGE_SUCC (ex_bb, 1);
4296
4297 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4298 }
4299
4300 static tree
4301 strip_wrap_conserving_type_conversions (tree exp)
4302 {
4303 while (tree_ssa_useless_type_conversion (exp)
4304 && (nowrap_type_p (TREE_TYPE (exp))
4305 == nowrap_type_p (TREE_TYPE (TREE_OPERAND (exp, 0)))))
4306 exp = TREE_OPERAND (exp, 0);
4307 return exp;
4308 }
4309
4310 /* Walk the SSA form and check whether E == WHAT. Fairly simplistic, we
4311 check for an exact match. */
4312
4313 static bool
4314 expr_equal_p (tree e, tree what)
4315 {
4316 gimple stmt;
4317 enum tree_code code;
4318
4319 e = strip_wrap_conserving_type_conversions (e);
4320 what = strip_wrap_conserving_type_conversions (what);
4321
4322 code = TREE_CODE (what);
4323 if (TREE_TYPE (e) != TREE_TYPE (what))
4324 return false;
4325
4326 if (operand_equal_p (e, what, 0))
4327 return true;
4328
4329 if (TREE_CODE (e) != SSA_NAME)
4330 return false;
4331
4332 stmt = SSA_NAME_DEF_STMT (e);
4333 if (gimple_code (stmt) != GIMPLE_ASSIGN
4334 || gimple_assign_rhs_code (stmt) != code)
4335 return false;
4336
4337 switch (get_gimple_rhs_class (code))
4338 {
4339 case GIMPLE_BINARY_RHS:
4340 if (!expr_equal_p (gimple_assign_rhs2 (stmt), TREE_OPERAND (what, 1)))
4341 return false;
4342 /* Fallthru. */
4343
4344 case GIMPLE_UNARY_RHS:
4345 case GIMPLE_SINGLE_RHS:
4346 return expr_equal_p (gimple_assign_rhs1 (stmt), TREE_OPERAND (what, 0));
4347 default:
4348 return false;
4349 }
4350 }
4351
4352 /* Returns true if we can prove that BASE - OFFSET does not overflow. For now,
4353 we only detect the situation that BASE = SOMETHING + OFFSET, where the
4354 calculation is performed in non-wrapping type.
4355
4356 TODO: More generally, we could test for the situation that
4357 BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
4358 This would require knowing the sign of OFFSET.
4359
4360 Also, we only look for the first addition in the computation of BASE.
4361 More complex analysis would be better, but introducing it just for
4362 this optimization seems like an overkill. */
4363
4364 static bool
4365 difference_cannot_overflow_p (tree base, tree offset)
4366 {
4367 enum tree_code code;
4368 tree e1, e2;
4369
4370 if (!nowrap_type_p (TREE_TYPE (base)))
4371 return false;
4372
4373 base = expand_simple_operations (base);
4374
4375 if (TREE_CODE (base) == SSA_NAME)
4376 {
4377 gimple stmt = SSA_NAME_DEF_STMT (base);
4378
4379 if (gimple_code (stmt) != GIMPLE_ASSIGN)
4380 return false;
4381
4382 code = gimple_assign_rhs_code (stmt);
4383 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4384 return false;
4385
4386 e1 = gimple_assign_rhs1 (stmt);
4387 e2 = gimple_assign_rhs2 (stmt);
4388 }
4389 else
4390 {
4391 code = TREE_CODE (base);
4392 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4393 return false;
4394 e1 = TREE_OPERAND (base, 0);
4395 e2 = TREE_OPERAND (base, 1);
4396 }
4397
4398 /* TODO: deeper inspection may be necessary to prove the equality. */
4399 switch (code)
4400 {
4401 case PLUS_EXPR:
4402 return expr_equal_p (e1, offset) || expr_equal_p (e2, offset);
4403 case POINTER_PLUS_EXPR:
4404 return expr_equal_p (e2, offset);
4405
4406 default:
4407 return false;
4408 }
4409 }
4410
4411 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
4412 comparison with CAND. NITER describes the number of iterations of
4413 the loops. If successful, the comparison in COMP_P is altered accordingly.
4414
4415 We aim to handle the following situation:
4416
4417 sometype *base, *p;
4418 int a, b, i;
4419
4420 i = a;
4421 p = p_0 = base + a;
4422
4423 do
4424 {
4425 bla (*p);
4426 p++;
4427 i++;
4428 }
4429 while (i < b);
4430
4431 Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
4432 We aim to optimize this to
4433
4434 p = p_0 = base + a;
4435 do
4436 {
4437 bla (*p);
4438 p++;
4439 }
4440 while (p < p_0 - a + b);
4441
4442 This preserves the correctness, since the pointer arithmetics does not
4443 overflow. More precisely:
4444
4445 1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
4446 overflow in computing it or the values of p.
4447 2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
4448 overflow. To prove this, we use the fact that p_0 = base + a. */
4449
4450 static bool
4451 iv_elimination_compare_lt (struct ivopts_data *data,
4452 struct iv_cand *cand, enum tree_code *comp_p,
4453 struct tree_niter_desc *niter)
4454 {
4455 tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
4456 struct affine_tree_combination nit, tmpa, tmpb;
4457 enum tree_code comp;
4458 HOST_WIDE_INT step;
4459
4460 /* We need to know that the candidate induction variable does not overflow.
4461 While more complex analysis may be used to prove this, for now just
4462 check that the variable appears in the original program and that it
4463 is computed in a type that guarantees no overflows. */
4464 cand_type = TREE_TYPE (cand->iv->base);
4465 if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
4466 return false;
4467
4468 /* Make sure that the loop iterates till the loop bound is hit, as otherwise
4469 the calculation of the BOUND could overflow, making the comparison
4470 invalid. */
4471 if (!data->loop_single_exit_p)
4472 return false;
4473
4474 /* We need to be able to decide whether candidate is increasing or decreasing
4475 in order to choose the right comparison operator. */
4476 if (!cst_and_fits_in_hwi (cand->iv->step))
4477 return false;
4478 step = int_cst_value (cand->iv->step);
4479
4480 /* Check that the number of iterations matches the expected pattern:
4481 a + 1 > b ? 0 : b - a - 1. */
4482 mbz = niter->may_be_zero;
4483 if (TREE_CODE (mbz) == GT_EXPR)
4484 {
4485 /* Handle a + 1 > b. */
4486 tree op0 = TREE_OPERAND (mbz, 0);
4487 if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
4488 {
4489 a = TREE_OPERAND (op0, 0);
4490 b = TREE_OPERAND (mbz, 1);
4491 }
4492 else
4493 return false;
4494 }
4495 else if (TREE_CODE (mbz) == LT_EXPR)
4496 {
4497 tree op1 = TREE_OPERAND (mbz, 1);
4498
4499 /* Handle b < a + 1. */
4500 if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
4501 {
4502 a = TREE_OPERAND (op1, 0);
4503 b = TREE_OPERAND (mbz, 0);
4504 }
4505 else
4506 return false;
4507 }
4508 else
4509 return false;
4510
4511 /* Expected number of iterations is B - A - 1. Check that it matches
4512 the actual number, i.e., that B - A - NITER = 1. */
4513 tree_to_aff_combination (niter->niter, nit_type, &nit);
4514 tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
4515 tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
4516 aff_combination_scale (&nit, double_int_minus_one);
4517 aff_combination_scale (&tmpa, double_int_minus_one);
4518 aff_combination_add (&tmpb, &tmpa);
4519 aff_combination_add (&tmpb, &nit);
4520 if (tmpb.n != 0 || tmpb.offset != double_int_one)
4521 return false;
4522
4523 /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
4524 overflow. */
4525 offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
4526 cand->iv->step,
4527 fold_convert (TREE_TYPE (cand->iv->step), a));
4528 if (!difference_cannot_overflow_p (cand->iv->base, offset))
4529 return false;
4530
4531 /* Determine the new comparison operator. */
4532 comp = step < 0 ? GT_EXPR : LT_EXPR;
4533 if (*comp_p == NE_EXPR)
4534 *comp_p = comp;
4535 else if (*comp_p == EQ_EXPR)
4536 *comp_p = invert_tree_comparison (comp, false);
4537 else
4538 gcc_unreachable ();
4539
4540 return true;
4541 }
4542
4543 /* Check whether it is possible to express the condition in USE by comparison
4544 of candidate CAND. If so, store the value compared with to BOUND, and the
4545 comparison operator to COMP. */
4546
4547 static bool
4548 may_eliminate_iv (struct ivopts_data *data,
4549 struct iv_use *use, struct iv_cand *cand, tree *bound,
4550 enum tree_code *comp)
4551 {
4552 basic_block ex_bb;
4553 edge exit;
4554 tree period;
4555 struct loop *loop = data->current_loop;
4556 aff_tree bnd;
4557 struct tree_niter_desc *desc = NULL;
4558
4559 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
4560 return false;
4561
4562 /* For now works only for exits that dominate the loop latch.
4563 TODO: extend to other conditions inside loop body. */
4564 ex_bb = gimple_bb (use->stmt);
4565 if (use->stmt != last_stmt (ex_bb)
4566 || gimple_code (use->stmt) != GIMPLE_COND
4567 || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
4568 return false;
4569
4570 exit = EDGE_SUCC (ex_bb, 0);
4571 if (flow_bb_inside_loop_p (loop, exit->dest))
4572 exit = EDGE_SUCC (ex_bb, 1);
4573 if (flow_bb_inside_loop_p (loop, exit->dest))
4574 return false;
4575
4576 desc = niter_for_exit (data, exit);
4577 if (!desc)
4578 return false;
4579
4580 /* Determine whether we can use the variable to test the exit condition.
4581 This is the case iff the period of the induction variable is greater
4582 than the number of iterations for which the exit condition is true. */
4583 period = iv_period (cand->iv);
4584
4585 /* If the number of iterations is constant, compare against it directly. */
4586 if (TREE_CODE (desc->niter) == INTEGER_CST)
4587 {
4588 /* See cand_value_at. */
4589 if (stmt_after_increment (loop, cand, use->stmt))
4590 {
4591 if (!tree_int_cst_lt (desc->niter, period))
4592 return false;
4593 }
4594 else
4595 {
4596 if (tree_int_cst_lt (period, desc->niter))
4597 return false;
4598 }
4599 }
4600
4601 /* If not, and if this is the only possible exit of the loop, see whether
4602 we can get a conservative estimate on the number of iterations of the
4603 entire loop and compare against that instead. */
4604 else
4605 {
4606 double_int period_value, max_niter;
4607
4608 max_niter = desc->max;
4609 if (stmt_after_increment (loop, cand, use->stmt))
4610 max_niter += double_int_one;
4611 period_value = tree_to_double_int (period);
4612 if (max_niter.ugt (period_value))
4613 {
4614 /* See if we can take advantage of inferred loop bound information. */
4615 if (data->loop_single_exit_p)
4616 {
4617 if (!max_loop_iterations (loop, &max_niter))
4618 return false;
4619 /* The loop bound is already adjusted by adding 1. */
4620 if (max_niter.ugt (period_value))
4621 return false;
4622 }
4623 else
4624 return false;
4625 }
4626 }
4627
4628 cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
4629
4630 *bound = aff_combination_to_tree (&bnd);
4631 *comp = iv_elimination_compare (data, use);
4632
4633 /* It is unlikely that computing the number of iterations using division
4634 would be more profitable than keeping the original induction variable. */
4635 if (expression_expensive_p (*bound))
4636 return false;
4637
4638 /* Sometimes, it is possible to handle the situation that the number of
4639 iterations may be zero unless additional assumtions by using <
4640 instead of != in the exit condition.
4641
4642 TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
4643 base the exit condition on it. However, that is often too
4644 expensive. */
4645 if (!integer_zerop (desc->may_be_zero))
4646 return iv_elimination_compare_lt (data, cand, comp, desc);
4647
4648 return true;
4649 }
4650
4651 /* Calculates the cost of BOUND, if it is a PARM_DECL. A PARM_DECL must
4652 be copied, if is is used in the loop body and DATA->body_includes_call. */
4653
4654 static int
4655 parm_decl_cost (struct ivopts_data *data, tree bound)
4656 {
4657 tree sbound = bound;
4658 STRIP_NOPS (sbound);
4659
4660 if (TREE_CODE (sbound) == SSA_NAME
4661 && SSA_NAME_IS_DEFAULT_DEF (sbound)
4662 && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
4663 && data->body_includes_call)
4664 return COSTS_N_INSNS (1);
4665
4666 return 0;
4667 }
4668
4669 /* Determines cost of basing replacement of USE on CAND in a condition. */
4670
4671 static bool
4672 determine_use_iv_cost_condition (struct ivopts_data *data,
4673 struct iv_use *use, struct iv_cand *cand)
4674 {
4675 tree bound = NULL_TREE;
4676 struct iv *cmp_iv;
4677 bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
4678 comp_cost elim_cost, express_cost, cost, bound_cost;
4679 bool ok;
4680 int elim_inv_expr_id = -1, express_inv_expr_id = -1, inv_expr_id;
4681 tree *control_var, *bound_cst;
4682 enum tree_code comp = ERROR_MARK;
4683
4684 /* Only consider real candidates. */
4685 if (!cand->iv)
4686 {
4687 set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE,
4688 ERROR_MARK, -1);
4689 return false;
4690 }
4691
4692 /* Try iv elimination. */
4693 if (may_eliminate_iv (data, use, cand, &bound, &comp))
4694 {
4695 elim_cost = force_var_cost (data, bound, &depends_on_elim);
4696 if (elim_cost.cost == 0)
4697 elim_cost.cost = parm_decl_cost (data, bound);
4698 else if (TREE_CODE (bound) == INTEGER_CST)
4699 elim_cost.cost = 0;
4700 /* If we replace a loop condition 'i < n' with 'p < base + n',
4701 depends_on_elim will have 'base' and 'n' set, which implies
4702 that both 'base' and 'n' will be live during the loop. More likely,
4703 'base + n' will be loop invariant, resulting in only one live value
4704 during the loop. So in that case we clear depends_on_elim and set
4705 elim_inv_expr_id instead. */
4706 if (depends_on_elim && bitmap_count_bits (depends_on_elim) > 1)
4707 {
4708 elim_inv_expr_id = get_expr_id (data, bound);
4709 bitmap_clear (depends_on_elim);
4710 }
4711 /* The bound is a loop invariant, so it will be only computed
4712 once. */
4713 elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
4714 }
4715 else
4716 elim_cost = infinite_cost;
4717
4718 /* Try expressing the original giv. If it is compared with an invariant,
4719 note that we cannot get rid of it. */
4720 ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
4721 NULL, &cmp_iv);
4722 gcc_assert (ok);
4723
4724 /* When the condition is a comparison of the candidate IV against
4725 zero, prefer this IV.
4726
4727 TODO: The constant that we're subtracting from the cost should
4728 be target-dependent. This information should be added to the
4729 target costs for each backend. */
4730 if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */
4731 && integer_zerop (*bound_cst)
4732 && (operand_equal_p (*control_var, cand->var_after, 0)
4733 || operand_equal_p (*control_var, cand->var_before, 0)))
4734 elim_cost.cost -= 1;
4735
4736 express_cost = get_computation_cost (data, use, cand, false,
4737 &depends_on_express, NULL,
4738 &express_inv_expr_id);
4739 fd_ivopts_data = data;
4740 walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
4741
4742 /* Count the cost of the original bound as well. */
4743 bound_cost = force_var_cost (data, *bound_cst, NULL);
4744 if (bound_cost.cost == 0)
4745 bound_cost.cost = parm_decl_cost (data, *bound_cst);
4746 else if (TREE_CODE (*bound_cst) == INTEGER_CST)
4747 bound_cost.cost = 0;
4748 express_cost.cost += bound_cost.cost;
4749
4750 /* Choose the better approach, preferring the eliminated IV. */
4751 if (compare_costs (elim_cost, express_cost) <= 0)
4752 {
4753 cost = elim_cost;
4754 depends_on = depends_on_elim;
4755 depends_on_elim = NULL;
4756 inv_expr_id = elim_inv_expr_id;
4757 }
4758 else
4759 {
4760 cost = express_cost;
4761 depends_on = depends_on_express;
4762 depends_on_express = NULL;
4763 bound = NULL_TREE;
4764 comp = ERROR_MARK;
4765 inv_expr_id = express_inv_expr_id;
4766 }
4767
4768 set_use_iv_cost (data, use, cand, cost, depends_on, bound, comp, inv_expr_id);
4769
4770 if (depends_on_elim)
4771 BITMAP_FREE (depends_on_elim);
4772 if (depends_on_express)
4773 BITMAP_FREE (depends_on_express);
4774
4775 return !infinite_cost_p (cost);
4776 }
4777
4778 /* Determines cost of basing replacement of USE on CAND. Returns false
4779 if USE cannot be based on CAND. */
4780
4781 static bool
4782 determine_use_iv_cost (struct ivopts_data *data,
4783 struct iv_use *use, struct iv_cand *cand)
4784 {
4785 switch (use->type)
4786 {
4787 case USE_NONLINEAR_EXPR:
4788 return determine_use_iv_cost_generic (data, use, cand);
4789
4790 case USE_ADDRESS:
4791 return determine_use_iv_cost_address (data, use, cand);
4792
4793 case USE_COMPARE:
4794 return determine_use_iv_cost_condition (data, use, cand);
4795
4796 default:
4797 gcc_unreachable ();
4798 }
4799 }
4800
4801 /* Return true if get_computation_cost indicates that autoincrement is
4802 a possibility for the pair of USE and CAND, false otherwise. */
4803
4804 static bool
4805 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
4806 struct iv_cand *cand)
4807 {
4808 bitmap depends_on;
4809 bool can_autoinc;
4810 comp_cost cost;
4811
4812 if (use->type != USE_ADDRESS)
4813 return false;
4814
4815 cost = get_computation_cost (data, use, cand, true, &depends_on,
4816 &can_autoinc, NULL);
4817
4818 BITMAP_FREE (depends_on);
4819
4820 return !infinite_cost_p (cost) && can_autoinc;
4821 }
4822
4823 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
4824 use that allows autoincrement, and set their AINC_USE if possible. */
4825
4826 static void
4827 set_autoinc_for_original_candidates (struct ivopts_data *data)
4828 {
4829 unsigned i, j;
4830
4831 for (i = 0; i < n_iv_cands (data); i++)
4832 {
4833 struct iv_cand *cand = iv_cand (data, i);
4834 struct iv_use *closest = NULL;
4835 if (cand->pos != IP_ORIGINAL)
4836 continue;
4837 for (j = 0; j < n_iv_uses (data); j++)
4838 {
4839 struct iv_use *use = iv_use (data, j);
4840 unsigned uid = gimple_uid (use->stmt);
4841 if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at)
4842 || uid > gimple_uid (cand->incremented_at))
4843 continue;
4844 if (closest == NULL || uid > gimple_uid (closest->stmt))
4845 closest = use;
4846 }
4847 if (closest == NULL || !autoinc_possible_for_pair (data, closest, cand))
4848 continue;
4849 cand->ainc_use = closest;
4850 }
4851 }
4852
4853 /* Finds the candidates for the induction variables. */
4854
4855 static void
4856 find_iv_candidates (struct ivopts_data *data)
4857 {
4858 /* Add commonly used ivs. */
4859 add_standard_iv_candidates (data);
4860
4861 /* Add old induction variables. */
4862 add_old_ivs_candidates (data);
4863
4864 /* Add induction variables derived from uses. */
4865 add_derived_ivs_candidates (data);
4866
4867 set_autoinc_for_original_candidates (data);
4868
4869 /* Record the important candidates. */
4870 record_important_candidates (data);
4871 }
4872
4873 /* Determines costs of basing the use of the iv on an iv candidate. */
4874
4875 static void
4876 determine_use_iv_costs (struct ivopts_data *data)
4877 {
4878 unsigned i, j;
4879 struct iv_use *use;
4880 struct iv_cand *cand;
4881 bitmap to_clear = BITMAP_ALLOC (NULL);
4882
4883 alloc_use_cost_map (data);
4884
4885 for (i = 0; i < n_iv_uses (data); i++)
4886 {
4887 use = iv_use (data, i);
4888
4889 if (data->consider_all_candidates)
4890 {
4891 for (j = 0; j < n_iv_cands (data); j++)
4892 {
4893 cand = iv_cand (data, j);
4894 determine_use_iv_cost (data, use, cand);
4895 }
4896 }
4897 else
4898 {
4899 bitmap_iterator bi;
4900
4901 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
4902 {
4903 cand = iv_cand (data, j);
4904 if (!determine_use_iv_cost (data, use, cand))
4905 bitmap_set_bit (to_clear, j);
4906 }
4907
4908 /* Remove the candidates for that the cost is infinite from
4909 the list of related candidates. */
4910 bitmap_and_compl_into (use->related_cands, to_clear);
4911 bitmap_clear (to_clear);
4912 }
4913 }
4914
4915 BITMAP_FREE (to_clear);
4916
4917 if (dump_file && (dump_flags & TDF_DETAILS))
4918 {
4919 fprintf (dump_file, "Use-candidate costs:\n");
4920
4921 for (i = 0; i < n_iv_uses (data); i++)
4922 {
4923 use = iv_use (data, i);
4924
4925 fprintf (dump_file, "Use %d:\n", i);
4926 fprintf (dump_file, " cand\tcost\tcompl.\tdepends on\n");
4927 for (j = 0; j < use->n_map_members; j++)
4928 {
4929 if (!use->cost_map[j].cand
4930 || infinite_cost_p (use->cost_map[j].cost))
4931 continue;
4932
4933 fprintf (dump_file, " %d\t%d\t%d\t",
4934 use->cost_map[j].cand->id,
4935 use->cost_map[j].cost.cost,
4936 use->cost_map[j].cost.complexity);
4937 if (use->cost_map[j].depends_on)
4938 bitmap_print (dump_file,
4939 use->cost_map[j].depends_on, "","");
4940 if (use->cost_map[j].inv_expr_id != -1)
4941 fprintf (dump_file, " inv_expr:%d", use->cost_map[j].inv_expr_id);
4942 fprintf (dump_file, "\n");
4943 }
4944
4945 fprintf (dump_file, "\n");
4946 }
4947 fprintf (dump_file, "\n");
4948 }
4949 }
4950
4951 /* Determines cost of the candidate CAND. */
4952
4953 static void
4954 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
4955 {
4956 comp_cost cost_base;
4957 unsigned cost, cost_step;
4958 tree base;
4959
4960 if (!cand->iv)
4961 {
4962 cand->cost = 0;
4963 return;
4964 }
4965
4966 /* There are two costs associated with the candidate -- its increment
4967 and its initialization. The second is almost negligible for any loop
4968 that rolls enough, so we take it just very little into account. */
4969
4970 base = cand->iv->base;
4971 cost_base = force_var_cost (data, base, NULL);
4972 /* It will be exceptional that the iv register happens to be initialized with
4973 the proper value at no cost. In general, there will at least be a regcopy
4974 or a const set. */
4975 if (cost_base.cost == 0)
4976 cost_base.cost = COSTS_N_INSNS (1);
4977 cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
4978
4979 cost = cost_step + adjust_setup_cost (data, cost_base.cost);
4980
4981 /* Prefer the original ivs unless we may gain something by replacing it.
4982 The reason is to make debugging simpler; so this is not relevant for
4983 artificial ivs created by other optimization passes. */
4984 if (cand->pos != IP_ORIGINAL
4985 || !SSA_NAME_VAR (cand->var_before)
4986 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
4987 cost++;
4988
4989 /* Prefer not to insert statements into latch unless there are some
4990 already (so that we do not create unnecessary jumps). */
4991 if (cand->pos == IP_END
4992 && empty_block_p (ip_end_pos (data->current_loop)))
4993 cost++;
4994
4995 cand->cost = cost;
4996 cand->cost_step = cost_step;
4997 }
4998
4999 /* Determines costs of computation of the candidates. */
5000
5001 static void
5002 determine_iv_costs (struct ivopts_data *data)
5003 {
5004 unsigned i;
5005
5006 if (dump_file && (dump_flags & TDF_DETAILS))
5007 {
5008 fprintf (dump_file, "Candidate costs:\n");
5009 fprintf (dump_file, " cand\tcost\n");
5010 }
5011
5012 for (i = 0; i < n_iv_cands (data); i++)
5013 {
5014 struct iv_cand *cand = iv_cand (data, i);
5015
5016 determine_iv_cost (data, cand);
5017
5018 if (dump_file && (dump_flags & TDF_DETAILS))
5019 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
5020 }
5021
5022 if (dump_file && (dump_flags & TDF_DETAILS))
5023 fprintf (dump_file, "\n");
5024 }
5025
5026 /* Calculates cost for having SIZE induction variables. */
5027
5028 static unsigned
5029 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
5030 {
5031 /* We add size to the cost, so that we prefer eliminating ivs
5032 if possible. */
5033 return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed,
5034 data->body_includes_call);
5035 }
5036
5037 /* For each size of the induction variable set determine the penalty. */
5038
5039 static void
5040 determine_set_costs (struct ivopts_data *data)
5041 {
5042 unsigned j, n;
5043 gimple phi;
5044 gimple_stmt_iterator psi;
5045 tree op;
5046 struct loop *loop = data->current_loop;
5047 bitmap_iterator bi;
5048
5049 if (dump_file && (dump_flags & TDF_DETAILS))
5050 {
5051 fprintf (dump_file, "Global costs:\n");
5052 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
5053 fprintf (dump_file, " target_clobbered_regs %d\n", target_clobbered_regs);
5054 fprintf (dump_file, " target_reg_cost %d\n", target_reg_cost[data->speed]);
5055 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost[data->speed]);
5056 }
5057
5058 n = 0;
5059 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
5060 {
5061 phi = gsi_stmt (psi);
5062 op = PHI_RESULT (phi);
5063
5064 if (virtual_operand_p (op))
5065 continue;
5066
5067 if (get_iv (data, op))
5068 continue;
5069
5070 n++;
5071 }
5072
5073 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5074 {
5075 struct version_info *info = ver_info (data, j);
5076
5077 if (info->inv_id && info->has_nonlin_use)
5078 n++;
5079 }
5080
5081 data->regs_used = n;
5082 if (dump_file && (dump_flags & TDF_DETAILS))
5083 fprintf (dump_file, " regs_used %d\n", n);
5084
5085 if (dump_file && (dump_flags & TDF_DETAILS))
5086 {
5087 fprintf (dump_file, " cost for size:\n");
5088 fprintf (dump_file, " ivs\tcost\n");
5089 for (j = 0; j <= 2 * target_avail_regs; j++)
5090 fprintf (dump_file, " %d\t%d\n", j,
5091 ivopts_global_cost_for_size (data, j));
5092 fprintf (dump_file, "\n");
5093 }
5094 }
5095
5096 /* Returns true if A is a cheaper cost pair than B. */
5097
5098 static bool
5099 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
5100 {
5101 int cmp;
5102
5103 if (!a)
5104 return false;
5105
5106 if (!b)
5107 return true;
5108
5109 cmp = compare_costs (a->cost, b->cost);
5110 if (cmp < 0)
5111 return true;
5112
5113 if (cmp > 0)
5114 return false;
5115
5116 /* In case the costs are the same, prefer the cheaper candidate. */
5117 if (a->cand->cost < b->cand->cost)
5118 return true;
5119
5120 return false;
5121 }
5122
5123
5124 /* Returns candidate by that USE is expressed in IVS. */
5125
5126 static struct cost_pair *
5127 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
5128 {
5129 return ivs->cand_for_use[use->id];
5130 }
5131
5132 /* Computes the cost field of IVS structure. */
5133
5134 static void
5135 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
5136 {
5137 comp_cost cost = ivs->cand_use_cost;
5138
5139 cost.cost += ivs->cand_cost;
5140
5141 cost.cost += ivopts_global_cost_for_size (data,
5142 ivs->n_regs + ivs->num_used_inv_expr);
5143
5144 ivs->cost = cost;
5145 }
5146
5147 /* Remove invariants in set INVS to set IVS. */
5148
5149 static void
5150 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
5151 {
5152 bitmap_iterator bi;
5153 unsigned iid;
5154
5155 if (!invs)
5156 return;
5157
5158 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5159 {
5160 ivs->n_invariant_uses[iid]--;
5161 if (ivs->n_invariant_uses[iid] == 0)
5162 ivs->n_regs--;
5163 }
5164 }
5165
5166 /* Set USE not to be expressed by any candidate in IVS. */
5167
5168 static void
5169 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
5170 struct iv_use *use)
5171 {
5172 unsigned uid = use->id, cid;
5173 struct cost_pair *cp;
5174
5175 cp = ivs->cand_for_use[uid];
5176 if (!cp)
5177 return;
5178 cid = cp->cand->id;
5179
5180 ivs->bad_uses++;
5181 ivs->cand_for_use[uid] = NULL;
5182 ivs->n_cand_uses[cid]--;
5183
5184 if (ivs->n_cand_uses[cid] == 0)
5185 {
5186 bitmap_clear_bit (ivs->cands, cid);
5187 /* Do not count the pseudocandidates. */
5188 if (cp->cand->iv)
5189 ivs->n_regs--;
5190 ivs->n_cands--;
5191 ivs->cand_cost -= cp->cand->cost;
5192
5193 iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
5194 }
5195
5196 ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
5197
5198 iv_ca_set_remove_invariants (ivs, cp->depends_on);
5199
5200 if (cp->inv_expr_id != -1)
5201 {
5202 ivs->used_inv_expr[cp->inv_expr_id]--;
5203 if (ivs->used_inv_expr[cp->inv_expr_id] == 0)
5204 ivs->num_used_inv_expr--;
5205 }
5206 iv_ca_recount_cost (data, ivs);
5207 }
5208
5209 /* Add invariants in set INVS to set IVS. */
5210
5211 static void
5212 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
5213 {
5214 bitmap_iterator bi;
5215 unsigned iid;
5216
5217 if (!invs)
5218 return;
5219
5220 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5221 {
5222 ivs->n_invariant_uses[iid]++;
5223 if (ivs->n_invariant_uses[iid] == 1)
5224 ivs->n_regs++;
5225 }
5226 }
5227
5228 /* Set cost pair for USE in set IVS to CP. */
5229
5230 static void
5231 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
5232 struct iv_use *use, struct cost_pair *cp)
5233 {
5234 unsigned uid = use->id, cid;
5235
5236 if (ivs->cand_for_use[uid] == cp)
5237 return;
5238
5239 if (ivs->cand_for_use[uid])
5240 iv_ca_set_no_cp (data, ivs, use);
5241
5242 if (cp)
5243 {
5244 cid = cp->cand->id;
5245
5246 ivs->bad_uses--;
5247 ivs->cand_for_use[uid] = cp;
5248 ivs->n_cand_uses[cid]++;
5249 if (ivs->n_cand_uses[cid] == 1)
5250 {
5251 bitmap_set_bit (ivs->cands, cid);
5252 /* Do not count the pseudocandidates. */
5253 if (cp->cand->iv)
5254 ivs->n_regs++;
5255 ivs->n_cands++;
5256 ivs->cand_cost += cp->cand->cost;
5257
5258 iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
5259 }
5260
5261 ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
5262 iv_ca_set_add_invariants (ivs, cp->depends_on);
5263
5264 if (cp->inv_expr_id != -1)
5265 {
5266 ivs->used_inv_expr[cp->inv_expr_id]++;
5267 if (ivs->used_inv_expr[cp->inv_expr_id] == 1)
5268 ivs->num_used_inv_expr++;
5269 }
5270 iv_ca_recount_cost (data, ivs);
5271 }
5272 }
5273
5274 /* Extend set IVS by expressing USE by some of the candidates in it
5275 if possible. All important candidates will be considered
5276 if IMPORTANT_CANDIDATES is true. */
5277
5278 static void
5279 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
5280 struct iv_use *use, bool important_candidates)
5281 {
5282 struct cost_pair *best_cp = NULL, *cp;
5283 bitmap_iterator bi;
5284 bitmap cands;
5285 unsigned i;
5286
5287 gcc_assert (ivs->upto >= use->id);
5288
5289 if (ivs->upto == use->id)
5290 {
5291 ivs->upto++;
5292 ivs->bad_uses++;
5293 }
5294
5295 cands = (important_candidates ? data->important_candidates : ivs->cands);
5296 EXECUTE_IF_SET_IN_BITMAP (cands, 0, i, bi)
5297 {
5298 struct iv_cand *cand = iv_cand (data, i);
5299
5300 cp = get_use_iv_cost (data, use, cand);
5301
5302 if (cheaper_cost_pair (cp, best_cp))
5303 best_cp = cp;
5304 }
5305
5306 iv_ca_set_cp (data, ivs, use, best_cp);
5307 }
5308
5309 /* Get cost for assignment IVS. */
5310
5311 static comp_cost
5312 iv_ca_cost (struct iv_ca *ivs)
5313 {
5314 /* This was a conditional expression but it triggered a bug in
5315 Sun C 5.5. */
5316 if (ivs->bad_uses)
5317 return infinite_cost;
5318 else
5319 return ivs->cost;
5320 }
5321
5322 /* Returns true if all dependences of CP are among invariants in IVS. */
5323
5324 static bool
5325 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
5326 {
5327 unsigned i;
5328 bitmap_iterator bi;
5329
5330 if (!cp->depends_on)
5331 return true;
5332
5333 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
5334 {
5335 if (ivs->n_invariant_uses[i] == 0)
5336 return false;
5337 }
5338
5339 return true;
5340 }
5341
5342 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
5343 it before NEXT_CHANGE. */
5344
5345 static struct iv_ca_delta *
5346 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
5347 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
5348 {
5349 struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
5350
5351 change->use = use;
5352 change->old_cp = old_cp;
5353 change->new_cp = new_cp;
5354 change->next_change = next_change;
5355
5356 return change;
5357 }
5358
5359 /* Joins two lists of changes L1 and L2. Destructive -- old lists
5360 are rewritten. */
5361
5362 static struct iv_ca_delta *
5363 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
5364 {
5365 struct iv_ca_delta *last;
5366
5367 if (!l2)
5368 return l1;
5369
5370 if (!l1)
5371 return l2;
5372
5373 for (last = l1; last->next_change; last = last->next_change)
5374 continue;
5375 last->next_change = l2;
5376
5377 return l1;
5378 }
5379
5380 /* Reverse the list of changes DELTA, forming the inverse to it. */
5381
5382 static struct iv_ca_delta *
5383 iv_ca_delta_reverse (struct iv_ca_delta *delta)
5384 {
5385 struct iv_ca_delta *act, *next, *prev = NULL;
5386 struct cost_pair *tmp;
5387
5388 for (act = delta; act; act = next)
5389 {
5390 next = act->next_change;
5391 act->next_change = prev;
5392 prev = act;
5393
5394 tmp = act->old_cp;
5395 act->old_cp = act->new_cp;
5396 act->new_cp = tmp;
5397 }
5398
5399 return prev;
5400 }
5401
5402 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
5403 reverted instead. */
5404
5405 static void
5406 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
5407 struct iv_ca_delta *delta, bool forward)
5408 {
5409 struct cost_pair *from, *to;
5410 struct iv_ca_delta *act;
5411
5412 if (!forward)
5413 delta = iv_ca_delta_reverse (delta);
5414
5415 for (act = delta; act; act = act->next_change)
5416 {
5417 from = act->old_cp;
5418 to = act->new_cp;
5419 gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
5420 iv_ca_set_cp (data, ivs, act->use, to);
5421 }
5422
5423 if (!forward)
5424 iv_ca_delta_reverse (delta);
5425 }
5426
5427 /* Returns true if CAND is used in IVS. */
5428
5429 static bool
5430 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
5431 {
5432 return ivs->n_cand_uses[cand->id] > 0;
5433 }
5434
5435 /* Returns number of induction variable candidates in the set IVS. */
5436
5437 static unsigned
5438 iv_ca_n_cands (struct iv_ca *ivs)
5439 {
5440 return ivs->n_cands;
5441 }
5442
5443 /* Free the list of changes DELTA. */
5444
5445 static void
5446 iv_ca_delta_free (struct iv_ca_delta **delta)
5447 {
5448 struct iv_ca_delta *act, *next;
5449
5450 for (act = *delta; act; act = next)
5451 {
5452 next = act->next_change;
5453 free (act);
5454 }
5455
5456 *delta = NULL;
5457 }
5458
5459 /* Allocates new iv candidates assignment. */
5460
5461 static struct iv_ca *
5462 iv_ca_new (struct ivopts_data *data)
5463 {
5464 struct iv_ca *nw = XNEW (struct iv_ca);
5465
5466 nw->upto = 0;
5467 nw->bad_uses = 0;
5468 nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
5469 nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
5470 nw->cands = BITMAP_ALLOC (NULL);
5471 nw->n_cands = 0;
5472 nw->n_regs = 0;
5473 nw->cand_use_cost = no_cost;
5474 nw->cand_cost = 0;
5475 nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
5476 nw->cost = no_cost;
5477 nw->used_inv_expr = XCNEWVEC (unsigned, data->inv_expr_id + 1);
5478 nw->num_used_inv_expr = 0;
5479
5480 return nw;
5481 }
5482
5483 /* Free memory occupied by the set IVS. */
5484
5485 static void
5486 iv_ca_free (struct iv_ca **ivs)
5487 {
5488 free ((*ivs)->cand_for_use);
5489 free ((*ivs)->n_cand_uses);
5490 BITMAP_FREE ((*ivs)->cands);
5491 free ((*ivs)->n_invariant_uses);
5492 free ((*ivs)->used_inv_expr);
5493 free (*ivs);
5494 *ivs = NULL;
5495 }
5496
5497 /* Dumps IVS to FILE. */
5498
5499 static void
5500 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
5501 {
5502 const char *pref = " invariants ";
5503 unsigned i;
5504 comp_cost cost = iv_ca_cost (ivs);
5505
5506 fprintf (file, " cost: %d (complexity %d)\n", cost.cost, cost.complexity);
5507 fprintf (file, " cand_cost: %d\n cand_use_cost: %d (complexity %d)\n",
5508 ivs->cand_cost, ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
5509 bitmap_print (file, ivs->cands, " candidates: ","\n");
5510
5511 for (i = 0; i < ivs->upto; i++)
5512 {
5513 struct iv_use *use = iv_use (data, i);
5514 struct cost_pair *cp = iv_ca_cand_for_use (ivs, use);
5515 if (cp)
5516 fprintf (file, " use:%d --> iv_cand:%d, cost=(%d,%d)\n",
5517 use->id, cp->cand->id, cp->cost.cost, cp->cost.complexity);
5518 else
5519 fprintf (file, " use:%d --> ??\n", use->id);
5520 }
5521
5522 for (i = 1; i <= data->max_inv_id; i++)
5523 if (ivs->n_invariant_uses[i])
5524 {
5525 fprintf (file, "%s%d", pref, i);
5526 pref = ", ";
5527 }
5528 fprintf (file, "\n\n");
5529 }
5530
5531 /* Try changing candidate in IVS to CAND for each use. Return cost of the
5532 new set, and store differences in DELTA. Number of induction variables
5533 in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
5534 the function will try to find a solution with mimimal iv candidates. */
5535
5536 static comp_cost
5537 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
5538 struct iv_cand *cand, struct iv_ca_delta **delta,
5539 unsigned *n_ivs, bool min_ncand)
5540 {
5541 unsigned i;
5542 comp_cost cost;
5543 struct iv_use *use;
5544 struct cost_pair *old_cp, *new_cp;
5545
5546 *delta = NULL;
5547 for (i = 0; i < ivs->upto; i++)
5548 {
5549 use = iv_use (data, i);
5550 old_cp = iv_ca_cand_for_use (ivs, use);
5551
5552 if (old_cp
5553 && old_cp->cand == cand)
5554 continue;
5555
5556 new_cp = get_use_iv_cost (data, use, cand);
5557 if (!new_cp)
5558 continue;
5559
5560 if (!min_ncand && !iv_ca_has_deps (ivs, new_cp))
5561 continue;
5562
5563 if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp))
5564 continue;
5565
5566 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5567 }
5568
5569 iv_ca_delta_commit (data, ivs, *delta, true);
5570 cost = iv_ca_cost (ivs);
5571 if (n_ivs)
5572 *n_ivs = iv_ca_n_cands (ivs);
5573 iv_ca_delta_commit (data, ivs, *delta, false);
5574
5575 return cost;
5576 }
5577
5578 /* Try narrowing set IVS by removing CAND. Return the cost of
5579 the new set and store the differences in DELTA. */
5580
5581 static comp_cost
5582 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
5583 struct iv_cand *cand, struct iv_ca_delta **delta)
5584 {
5585 unsigned i, ci;
5586 struct iv_use *use;
5587 struct cost_pair *old_cp, *new_cp, *cp;
5588 bitmap_iterator bi;
5589 struct iv_cand *cnd;
5590 comp_cost cost;
5591
5592 *delta = NULL;
5593 for (i = 0; i < n_iv_uses (data); i++)
5594 {
5595 use = iv_use (data, i);
5596
5597 old_cp = iv_ca_cand_for_use (ivs, use);
5598 if (old_cp->cand != cand)
5599 continue;
5600
5601 new_cp = NULL;
5602
5603 if (data->consider_all_candidates)
5604 {
5605 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
5606 {
5607 if (ci == cand->id)
5608 continue;
5609
5610 cnd = iv_cand (data, ci);
5611
5612 cp = get_use_iv_cost (data, use, cnd);
5613 if (!cp)
5614 continue;
5615
5616 if (!iv_ca_has_deps (ivs, cp))
5617 continue;
5618
5619 if (!cheaper_cost_pair (cp, new_cp))
5620 continue;
5621
5622 new_cp = cp;
5623 }
5624 }
5625 else
5626 {
5627 EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
5628 {
5629 if (ci == cand->id)
5630 continue;
5631
5632 cnd = iv_cand (data, ci);
5633
5634 cp = get_use_iv_cost (data, use, cnd);
5635 if (!cp)
5636 continue;
5637 if (!iv_ca_has_deps (ivs, cp))
5638 continue;
5639
5640 if (!cheaper_cost_pair (cp, new_cp))
5641 continue;
5642
5643 new_cp = cp;
5644 }
5645 }
5646
5647 if (!new_cp)
5648 {
5649 iv_ca_delta_free (delta);
5650 return infinite_cost;
5651 }
5652
5653 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5654 }
5655
5656 iv_ca_delta_commit (data, ivs, *delta, true);
5657 cost = iv_ca_cost (ivs);
5658 iv_ca_delta_commit (data, ivs, *delta, false);
5659
5660 return cost;
5661 }
5662
5663 /* Try optimizing the set of candidates IVS by removing candidates different
5664 from to EXCEPT_CAND from it. Return cost of the new set, and store
5665 differences in DELTA. */
5666
5667 static comp_cost
5668 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
5669 struct iv_cand *except_cand, struct iv_ca_delta **delta)
5670 {
5671 bitmap_iterator bi;
5672 struct iv_ca_delta *act_delta, *best_delta;
5673 unsigned i;
5674 comp_cost best_cost, acost;
5675 struct iv_cand *cand;
5676
5677 best_delta = NULL;
5678 best_cost = iv_ca_cost (ivs);
5679
5680 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5681 {
5682 cand = iv_cand (data, i);
5683
5684 if (cand == except_cand)
5685 continue;
5686
5687 acost = iv_ca_narrow (data, ivs, cand, &act_delta);
5688
5689 if (compare_costs (acost, best_cost) < 0)
5690 {
5691 best_cost = acost;
5692 iv_ca_delta_free (&best_delta);
5693 best_delta = act_delta;
5694 }
5695 else
5696 iv_ca_delta_free (&act_delta);
5697 }
5698
5699 if (!best_delta)
5700 {
5701 *delta = NULL;
5702 return best_cost;
5703 }
5704
5705 /* Recurse to possibly remove other unnecessary ivs. */
5706 iv_ca_delta_commit (data, ivs, best_delta, true);
5707 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
5708 iv_ca_delta_commit (data, ivs, best_delta, false);
5709 *delta = iv_ca_delta_join (best_delta, *delta);
5710 return best_cost;
5711 }
5712
5713 /* Tries to extend the sets IVS in the best possible way in order
5714 to express the USE. If ORIGINALP is true, prefer candidates from
5715 the original set of IVs, otherwise favor important candidates not
5716 based on any memory object. */
5717
5718 static bool
5719 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
5720 struct iv_use *use, bool originalp)
5721 {
5722 comp_cost best_cost, act_cost;
5723 unsigned i;
5724 bitmap_iterator bi;
5725 struct iv_cand *cand;
5726 struct iv_ca_delta *best_delta = NULL, *act_delta;
5727 struct cost_pair *cp;
5728
5729 iv_ca_add_use (data, ivs, use, false);
5730 best_cost = iv_ca_cost (ivs);
5731
5732 cp = iv_ca_cand_for_use (ivs, use);
5733 if (!cp)
5734 {
5735 ivs->upto--;
5736 ivs->bad_uses--;
5737 iv_ca_add_use (data, ivs, use, true);
5738 best_cost = iv_ca_cost (ivs);
5739 cp = iv_ca_cand_for_use (ivs, use);
5740 }
5741 if (cp)
5742 {
5743 best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
5744 iv_ca_set_no_cp (data, ivs, use);
5745 }
5746
5747 /* If ORIGINALP is true, try to find the original IV for the use. Otherwise
5748 first try important candidates not based on any memory object. Only if
5749 this fails, try the specific ones. Rationale -- in loops with many
5750 variables the best choice often is to use just one generic biv. If we
5751 added here many ivs specific to the uses, the optimization algorithm later
5752 would be likely to get stuck in a local minimum, thus causing us to create
5753 too many ivs. The approach from few ivs to more seems more likely to be
5754 successful -- starting from few ivs, replacing an expensive use by a
5755 specific iv should always be a win. */
5756 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5757 {
5758 cand = iv_cand (data, i);
5759
5760 if (originalp && cand->pos !=IP_ORIGINAL)
5761 continue;
5762
5763 if (!originalp && cand->iv->base_object != NULL_TREE)
5764 continue;
5765
5766 if (iv_ca_cand_used_p (ivs, cand))
5767 continue;
5768
5769 cp = get_use_iv_cost (data, use, cand);
5770 if (!cp)
5771 continue;
5772
5773 iv_ca_set_cp (data, ivs, use, cp);
5774 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
5775 true);
5776 iv_ca_set_no_cp (data, ivs, use);
5777 act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
5778
5779 if (compare_costs (act_cost, best_cost) < 0)
5780 {
5781 best_cost = act_cost;
5782
5783 iv_ca_delta_free (&best_delta);
5784 best_delta = act_delta;
5785 }
5786 else
5787 iv_ca_delta_free (&act_delta);
5788 }
5789
5790 if (infinite_cost_p (best_cost))
5791 {
5792 for (i = 0; i < use->n_map_members; i++)
5793 {
5794 cp = use->cost_map + i;
5795 cand = cp->cand;
5796 if (!cand)
5797 continue;
5798
5799 /* Already tried this. */
5800 if (cand->important)
5801 {
5802 if (originalp && cand->pos == IP_ORIGINAL)
5803 continue;
5804 if (!originalp && cand->iv->base_object == NULL_TREE)
5805 continue;
5806 }
5807
5808 if (iv_ca_cand_used_p (ivs, cand))
5809 continue;
5810
5811 act_delta = NULL;
5812 iv_ca_set_cp (data, ivs, use, cp);
5813 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
5814 iv_ca_set_no_cp (data, ivs, use);
5815 act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
5816 cp, act_delta);
5817
5818 if (compare_costs (act_cost, best_cost) < 0)
5819 {
5820 best_cost = act_cost;
5821
5822 if (best_delta)
5823 iv_ca_delta_free (&best_delta);
5824 best_delta = act_delta;
5825 }
5826 else
5827 iv_ca_delta_free (&act_delta);
5828 }
5829 }
5830
5831 iv_ca_delta_commit (data, ivs, best_delta, true);
5832 iv_ca_delta_free (&best_delta);
5833
5834 return !infinite_cost_p (best_cost);
5835 }
5836
5837 /* Finds an initial assignment of candidates to uses. */
5838
5839 static struct iv_ca *
5840 get_initial_solution (struct ivopts_data *data, bool originalp)
5841 {
5842 struct iv_ca *ivs = iv_ca_new (data);
5843 unsigned i;
5844
5845 for (i = 0; i < n_iv_uses (data); i++)
5846 if (!try_add_cand_for (data, ivs, iv_use (data, i), originalp))
5847 {
5848 iv_ca_free (&ivs);
5849 return NULL;
5850 }
5851
5852 return ivs;
5853 }
5854
5855 /* Tries to improve set of induction variables IVS. */
5856
5857 static bool
5858 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
5859 {
5860 unsigned i, n_ivs;
5861 comp_cost acost, best_cost = iv_ca_cost (ivs);
5862 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
5863 struct iv_cand *cand;
5864
5865 /* Try extending the set of induction variables by one. */
5866 for (i = 0; i < n_iv_cands (data); i++)
5867 {
5868 cand = iv_cand (data, i);
5869
5870 if (iv_ca_cand_used_p (ivs, cand))
5871 continue;
5872
5873 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
5874 if (!act_delta)
5875 continue;
5876
5877 /* If we successfully added the candidate and the set is small enough,
5878 try optimizing it by removing other candidates. */
5879 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
5880 {
5881 iv_ca_delta_commit (data, ivs, act_delta, true);
5882 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
5883 iv_ca_delta_commit (data, ivs, act_delta, false);
5884 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
5885 }
5886
5887 if (compare_costs (acost, best_cost) < 0)
5888 {
5889 best_cost = acost;
5890 iv_ca_delta_free (&best_delta);
5891 best_delta = act_delta;
5892 }
5893 else
5894 iv_ca_delta_free (&act_delta);
5895 }
5896
5897 if (!best_delta)
5898 {
5899 /* Try removing the candidates from the set instead. */
5900 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
5901
5902 /* Nothing more we can do. */
5903 if (!best_delta)
5904 return false;
5905 }
5906
5907 iv_ca_delta_commit (data, ivs, best_delta, true);
5908 gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
5909 iv_ca_delta_free (&best_delta);
5910 return true;
5911 }
5912
5913 /* Attempts to find the optimal set of induction variables. We do simple
5914 greedy heuristic -- we try to replace at most one candidate in the selected
5915 solution and remove the unused ivs while this improves the cost. */
5916
5917 static struct iv_ca *
5918 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
5919 {
5920 struct iv_ca *set;
5921
5922 /* Get the initial solution. */
5923 set = get_initial_solution (data, originalp);
5924 if (!set)
5925 {
5926 if (dump_file && (dump_flags & TDF_DETAILS))
5927 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
5928 return NULL;
5929 }
5930
5931 if (dump_file && (dump_flags & TDF_DETAILS))
5932 {
5933 fprintf (dump_file, "Initial set of candidates:\n");
5934 iv_ca_dump (data, dump_file, set);
5935 }
5936
5937 while (try_improve_iv_set (data, set))
5938 {
5939 if (dump_file && (dump_flags & TDF_DETAILS))
5940 {
5941 fprintf (dump_file, "Improved to:\n");
5942 iv_ca_dump (data, dump_file, set);
5943 }
5944 }
5945
5946 return set;
5947 }
5948
5949 static struct iv_ca *
5950 find_optimal_iv_set (struct ivopts_data *data)
5951 {
5952 unsigned i;
5953 struct iv_ca *set, *origset;
5954 struct iv_use *use;
5955 comp_cost cost, origcost;
5956
5957 /* Determine the cost based on a strategy that starts with original IVs,
5958 and try again using a strategy that prefers candidates not based
5959 on any IVs. */
5960 origset = find_optimal_iv_set_1 (data, true);
5961 set = find_optimal_iv_set_1 (data, false);
5962
5963 if (!origset && !set)
5964 return NULL;
5965
5966 origcost = origset ? iv_ca_cost (origset) : infinite_cost;
5967 cost = set ? iv_ca_cost (set) : infinite_cost;
5968
5969 if (dump_file && (dump_flags & TDF_DETAILS))
5970 {
5971 fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
5972 origcost.cost, origcost.complexity);
5973 fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
5974 cost.cost, cost.complexity);
5975 }
5976
5977 /* Choose the one with the best cost. */
5978 if (compare_costs (origcost, cost) <= 0)
5979 {
5980 if (set)
5981 iv_ca_free (&set);
5982 set = origset;
5983 }
5984 else if (origset)
5985 iv_ca_free (&origset);
5986
5987 for (i = 0; i < n_iv_uses (data); i++)
5988 {
5989 use = iv_use (data, i);
5990 use->selected = iv_ca_cand_for_use (set, use)->cand;
5991 }
5992
5993 return set;
5994 }
5995
5996 /* Creates a new induction variable corresponding to CAND. */
5997
5998 static void
5999 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
6000 {
6001 gimple_stmt_iterator incr_pos;
6002 tree base;
6003 bool after = false;
6004
6005 if (!cand->iv)
6006 return;
6007
6008 switch (cand->pos)
6009 {
6010 case IP_NORMAL:
6011 incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
6012 break;
6013
6014 case IP_END:
6015 incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
6016 after = true;
6017 break;
6018
6019 case IP_AFTER_USE:
6020 after = true;
6021 /* fall through */
6022 case IP_BEFORE_USE:
6023 incr_pos = gsi_for_stmt (cand->incremented_at);
6024 break;
6025
6026 case IP_ORIGINAL:
6027 /* Mark that the iv is preserved. */
6028 name_info (data, cand->var_before)->preserve_biv = true;
6029 name_info (data, cand->var_after)->preserve_biv = true;
6030
6031 /* Rewrite the increment so that it uses var_before directly. */
6032 find_interesting_uses_op (data, cand->var_after)->selected = cand;
6033 return;
6034 }
6035
6036 gimple_add_tmp_var (cand->var_before);
6037
6038 base = unshare_expr (cand->iv->base);
6039
6040 create_iv (base, unshare_expr (cand->iv->step),
6041 cand->var_before, data->current_loop,
6042 &incr_pos, after, &cand->var_before, &cand->var_after);
6043 }
6044
6045 /* Creates new induction variables described in SET. */
6046
6047 static void
6048 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
6049 {
6050 unsigned i;
6051 struct iv_cand *cand;
6052 bitmap_iterator bi;
6053
6054 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6055 {
6056 cand = iv_cand (data, i);
6057 create_new_iv (data, cand);
6058 }
6059
6060 if (dump_file && (dump_flags & TDF_DETAILS))
6061 {
6062 fprintf (dump_file, "\nSelected IV set: \n");
6063 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6064 {
6065 cand = iv_cand (data, i);
6066 dump_cand (dump_file, cand);
6067 }
6068 fprintf (dump_file, "\n");
6069 }
6070 }
6071
6072 /* Rewrites USE (definition of iv used in a nonlinear expression)
6073 using candidate CAND. */
6074
6075 static void
6076 rewrite_use_nonlinear_expr (struct ivopts_data *data,
6077 struct iv_use *use, struct iv_cand *cand)
6078 {
6079 tree comp;
6080 tree op, tgt;
6081 gimple ass;
6082 gimple_stmt_iterator bsi;
6083
6084 /* An important special case -- if we are asked to express value of
6085 the original iv by itself, just exit; there is no need to
6086 introduce a new computation (that might also need casting the
6087 variable to unsigned and back). */
6088 if (cand->pos == IP_ORIGINAL
6089 && cand->incremented_at == use->stmt)
6090 {
6091 tree step, ctype, utype;
6092 enum tree_code incr_code = PLUS_EXPR, old_code;
6093
6094 gcc_assert (is_gimple_assign (use->stmt));
6095 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
6096
6097 step = cand->iv->step;
6098 ctype = TREE_TYPE (step);
6099 utype = TREE_TYPE (cand->var_after);
6100 if (TREE_CODE (step) == NEGATE_EXPR)
6101 {
6102 incr_code = MINUS_EXPR;
6103 step = TREE_OPERAND (step, 0);
6104 }
6105
6106 /* Check whether we may leave the computation unchanged.
6107 This is the case only if it does not rely on other
6108 computations in the loop -- otherwise, the computation
6109 we rely upon may be removed in remove_unused_ivs,
6110 thus leading to ICE. */
6111 old_code = gimple_assign_rhs_code (use->stmt);
6112 if (old_code == PLUS_EXPR
6113 || old_code == MINUS_EXPR
6114 || old_code == POINTER_PLUS_EXPR)
6115 {
6116 if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
6117 op = gimple_assign_rhs2 (use->stmt);
6118 else if (old_code != MINUS_EXPR
6119 && gimple_assign_rhs2 (use->stmt) == cand->var_before)
6120 op = gimple_assign_rhs1 (use->stmt);
6121 else
6122 op = NULL_TREE;
6123 }
6124 else
6125 op = NULL_TREE;
6126
6127 if (op
6128 && (TREE_CODE (op) == INTEGER_CST
6129 || operand_equal_p (op, step, 0)))
6130 return;
6131
6132 /* Otherwise, add the necessary computations to express
6133 the iv. */
6134 op = fold_convert (ctype, cand->var_before);
6135 comp = fold_convert (utype,
6136 build2 (incr_code, ctype, op,
6137 unshare_expr (step)));
6138 }
6139 else
6140 {
6141 comp = get_computation (data->current_loop, use, cand);
6142 gcc_assert (comp != NULL_TREE);
6143 }
6144
6145 switch (gimple_code (use->stmt))
6146 {
6147 case GIMPLE_PHI:
6148 tgt = PHI_RESULT (use->stmt);
6149
6150 /* If we should keep the biv, do not replace it. */
6151 if (name_info (data, tgt)->preserve_biv)
6152 return;
6153
6154 bsi = gsi_after_labels (gimple_bb (use->stmt));
6155 break;
6156
6157 case GIMPLE_ASSIGN:
6158 tgt = gimple_assign_lhs (use->stmt);
6159 bsi = gsi_for_stmt (use->stmt);
6160 break;
6161
6162 default:
6163 gcc_unreachable ();
6164 }
6165
6166 if (!valid_gimple_rhs_p (comp)
6167 || (gimple_code (use->stmt) != GIMPLE_PHI
6168 /* We can't allow re-allocating the stmt as it might be pointed
6169 to still. */
6170 && (get_gimple_rhs_num_ops (TREE_CODE (comp))
6171 >= gimple_num_ops (gsi_stmt (bsi)))))
6172 {
6173 comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE,
6174 true, GSI_SAME_STMT);
6175 if (POINTER_TYPE_P (TREE_TYPE (tgt)))
6176 {
6177 duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
6178 /* As this isn't a plain copy we have to reset alignment
6179 information. */
6180 if (SSA_NAME_PTR_INFO (comp))
6181 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
6182 }
6183 }
6184
6185 if (gimple_code (use->stmt) == GIMPLE_PHI)
6186 {
6187 ass = gimple_build_assign (tgt, comp);
6188 gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
6189
6190 bsi = gsi_for_stmt (use->stmt);
6191 remove_phi_node (&bsi, false);
6192 }
6193 else
6194 {
6195 gimple_assign_set_rhs_from_tree (&bsi, comp);
6196 use->stmt = gsi_stmt (bsi);
6197 }
6198 }
6199
6200 /* Performs a peephole optimization to reorder the iv update statement with
6201 a mem ref to enable instruction combining in later phases. The mem ref uses
6202 the iv value before the update, so the reordering transformation requires
6203 adjustment of the offset. CAND is the selected IV_CAND.
6204
6205 Example:
6206
6207 t = MEM_REF (base, iv1, 8, 16); // base, index, stride, offset
6208 iv2 = iv1 + 1;
6209
6210 if (t < val) (1)
6211 goto L;
6212 goto Head;
6213
6214
6215 directly propagating t over to (1) will introduce overlapping live range
6216 thus increase register pressure. This peephole transform it into:
6217
6218
6219 iv2 = iv1 + 1;
6220 t = MEM_REF (base, iv2, 8, 8);
6221 if (t < val)
6222 goto L;
6223 goto Head;
6224 */
6225
6226 static void
6227 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
6228 {
6229 tree var_after;
6230 gimple iv_update, stmt;
6231 basic_block bb;
6232 gimple_stmt_iterator gsi, gsi_iv;
6233
6234 if (cand->pos != IP_NORMAL)
6235 return;
6236
6237 var_after = cand->var_after;
6238 iv_update = SSA_NAME_DEF_STMT (var_after);
6239
6240 bb = gimple_bb (iv_update);
6241 gsi = gsi_last_nondebug_bb (bb);
6242 stmt = gsi_stmt (gsi);
6243
6244 /* Only handle conditional statement for now. */
6245 if (gimple_code (stmt) != GIMPLE_COND)
6246 return;
6247
6248 gsi_prev_nondebug (&gsi);
6249 stmt = gsi_stmt (gsi);
6250 if (stmt != iv_update)
6251 return;
6252
6253 gsi_prev_nondebug (&gsi);
6254 if (gsi_end_p (gsi))
6255 return;
6256
6257 stmt = gsi_stmt (gsi);
6258 if (gimple_code (stmt) != GIMPLE_ASSIGN)
6259 return;
6260
6261 if (stmt != use->stmt)
6262 return;
6263
6264 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
6265 return;
6266
6267 if (dump_file && (dump_flags & TDF_DETAILS))
6268 {
6269 fprintf (dump_file, "Reordering \n");
6270 print_gimple_stmt (dump_file, iv_update, 0, 0);
6271 print_gimple_stmt (dump_file, use->stmt, 0, 0);
6272 fprintf (dump_file, "\n");
6273 }
6274
6275 gsi = gsi_for_stmt (use->stmt);
6276 gsi_iv = gsi_for_stmt (iv_update);
6277 gsi_move_before (&gsi_iv, &gsi);
6278
6279 cand->pos = IP_BEFORE_USE;
6280 cand->incremented_at = use->stmt;
6281 }
6282
6283 /* Rewrites USE (address that is an iv) using candidate CAND. */
6284
6285 static void
6286 rewrite_use_address (struct ivopts_data *data,
6287 struct iv_use *use, struct iv_cand *cand)
6288 {
6289 aff_tree aff;
6290 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6291 tree base_hint = NULL_TREE;
6292 tree ref, iv;
6293 bool ok;
6294
6295 adjust_iv_update_pos (cand, use);
6296 ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
6297 gcc_assert (ok);
6298 unshare_aff_combination (&aff);
6299
6300 /* To avoid undefined overflow problems, all IV candidates use unsigned
6301 integer types. The drawback is that this makes it impossible for
6302 create_mem_ref to distinguish an IV that is based on a memory object
6303 from one that represents simply an offset.
6304
6305 To work around this problem, we pass a hint to create_mem_ref that
6306 indicates which variable (if any) in aff is an IV based on a memory
6307 object. Note that we only consider the candidate. If this is not
6308 based on an object, the base of the reference is in some subexpression
6309 of the use -- but these will use pointer types, so they are recognized
6310 by the create_mem_ref heuristics anyway. */
6311 if (cand->iv->base_object)
6312 base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
6313
6314 iv = var_at_stmt (data->current_loop, cand, use->stmt);
6315 ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff,
6316 reference_alias_ptr_type (*use->op_p),
6317 iv, base_hint, data->speed);
6318 copy_ref_info (ref, *use->op_p);
6319 *use->op_p = ref;
6320 }
6321
6322 /* Rewrites USE (the condition such that one of the arguments is an iv) using
6323 candidate CAND. */
6324
6325 static void
6326 rewrite_use_compare (struct ivopts_data *data,
6327 struct iv_use *use, struct iv_cand *cand)
6328 {
6329 tree comp, *var_p, op, bound;
6330 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6331 enum tree_code compare;
6332 struct cost_pair *cp = get_use_iv_cost (data, use, cand);
6333 bool ok;
6334
6335 bound = cp->value;
6336 if (bound)
6337 {
6338 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
6339 tree var_type = TREE_TYPE (var);
6340 gimple_seq stmts;
6341
6342 if (dump_file && (dump_flags & TDF_DETAILS))
6343 {
6344 fprintf (dump_file, "Replacing exit test: ");
6345 print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
6346 }
6347 compare = cp->comp;
6348 bound = unshare_expr (fold_convert (var_type, bound));
6349 op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
6350 if (stmts)
6351 gsi_insert_seq_on_edge_immediate (
6352 loop_preheader_edge (data->current_loop),
6353 stmts);
6354
6355 gimple_cond_set_lhs (use->stmt, var);
6356 gimple_cond_set_code (use->stmt, compare);
6357 gimple_cond_set_rhs (use->stmt, op);
6358 return;
6359 }
6360
6361 /* The induction variable elimination failed; just express the original
6362 giv. */
6363 comp = get_computation (data->current_loop, use, cand);
6364 gcc_assert (comp != NULL_TREE);
6365
6366 ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
6367 gcc_assert (ok);
6368
6369 *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
6370 true, GSI_SAME_STMT);
6371 }
6372
6373 /* Rewrites USE using candidate CAND. */
6374
6375 static void
6376 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
6377 {
6378 switch (use->type)
6379 {
6380 case USE_NONLINEAR_EXPR:
6381 rewrite_use_nonlinear_expr (data, use, cand);
6382 break;
6383
6384 case USE_ADDRESS:
6385 rewrite_use_address (data, use, cand);
6386 break;
6387
6388 case USE_COMPARE:
6389 rewrite_use_compare (data, use, cand);
6390 break;
6391
6392 default:
6393 gcc_unreachable ();
6394 }
6395
6396 update_stmt (use->stmt);
6397 }
6398
6399 /* Rewrite the uses using the selected induction variables. */
6400
6401 static void
6402 rewrite_uses (struct ivopts_data *data)
6403 {
6404 unsigned i;
6405 struct iv_cand *cand;
6406 struct iv_use *use;
6407
6408 for (i = 0; i < n_iv_uses (data); i++)
6409 {
6410 use = iv_use (data, i);
6411 cand = use->selected;
6412 gcc_assert (cand);
6413
6414 rewrite_use (data, use, cand);
6415 }
6416 }
6417
6418 /* Removes the ivs that are not used after rewriting. */
6419
6420 static void
6421 remove_unused_ivs (struct ivopts_data *data)
6422 {
6423 unsigned j;
6424 bitmap_iterator bi;
6425 bitmap toremove = BITMAP_ALLOC (NULL);
6426
6427 /* Figure out an order in which to release SSA DEFs so that we don't
6428 release something that we'd have to propagate into a debug stmt
6429 afterwards. */
6430 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
6431 {
6432 struct version_info *info;
6433
6434 info = ver_info (data, j);
6435 if (info->iv
6436 && !integer_zerop (info->iv->step)
6437 && !info->inv_id
6438 && !info->iv->have_use_for
6439 && !info->preserve_biv)
6440 {
6441 bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
6442
6443 tree def = info->iv->ssa_name;
6444
6445 if (MAY_HAVE_DEBUG_STMTS && SSA_NAME_DEF_STMT (def))
6446 {
6447 imm_use_iterator imm_iter;
6448 use_operand_p use_p;
6449 gimple stmt;
6450 int count = 0;
6451
6452 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
6453 {
6454 if (!gimple_debug_bind_p (stmt))
6455 continue;
6456
6457 /* We just want to determine whether to do nothing
6458 (count == 0), to substitute the computed
6459 expression into a single use of the SSA DEF by
6460 itself (count == 1), or to use a debug temp
6461 because the SSA DEF is used multiple times or as
6462 part of a larger expression (count > 1). */
6463 count++;
6464 if (gimple_debug_bind_get_value (stmt) != def)
6465 count++;
6466
6467 if (count > 1)
6468 BREAK_FROM_IMM_USE_STMT (imm_iter);
6469 }
6470
6471 if (!count)
6472 continue;
6473
6474 struct iv_use dummy_use;
6475 struct iv_cand *best_cand = NULL, *cand;
6476 unsigned i, best_pref = 0, cand_pref;
6477
6478 memset (&dummy_use, 0, sizeof (dummy_use));
6479 dummy_use.iv = info->iv;
6480 for (i = 0; i < n_iv_uses (data) && i < 64; i++)
6481 {
6482 cand = iv_use (data, i)->selected;
6483 if (cand == best_cand)
6484 continue;
6485 cand_pref = operand_equal_p (cand->iv->step,
6486 info->iv->step, 0)
6487 ? 4 : 0;
6488 cand_pref
6489 += TYPE_MODE (TREE_TYPE (cand->iv->base))
6490 == TYPE_MODE (TREE_TYPE (info->iv->base))
6491 ? 2 : 0;
6492 cand_pref
6493 += TREE_CODE (cand->iv->base) == INTEGER_CST
6494 ? 1 : 0;
6495 if (best_cand == NULL || best_pref < cand_pref)
6496 {
6497 best_cand = cand;
6498 best_pref = cand_pref;
6499 }
6500 }
6501
6502 if (!best_cand)
6503 continue;
6504
6505 tree comp = get_computation_at (data->current_loop,
6506 &dummy_use, best_cand,
6507 SSA_NAME_DEF_STMT (def));
6508 if (!comp)
6509 continue;
6510
6511 if (count > 1)
6512 {
6513 tree vexpr = make_node (DEBUG_EXPR_DECL);
6514 DECL_ARTIFICIAL (vexpr) = 1;
6515 TREE_TYPE (vexpr) = TREE_TYPE (comp);
6516 if (SSA_NAME_VAR (def))
6517 DECL_MODE (vexpr) = DECL_MODE (SSA_NAME_VAR (def));
6518 else
6519 DECL_MODE (vexpr) = TYPE_MODE (TREE_TYPE (vexpr));
6520 gimple def_temp = gimple_build_debug_bind (vexpr, comp, NULL);
6521 gimple_stmt_iterator gsi;
6522
6523 if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
6524 gsi = gsi_after_labels (gimple_bb
6525 (SSA_NAME_DEF_STMT (def)));
6526 else
6527 gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
6528
6529 gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
6530 comp = vexpr;
6531 }
6532
6533 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
6534 {
6535 if (!gimple_debug_bind_p (stmt))
6536 continue;
6537
6538 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
6539 SET_USE (use_p, comp);
6540
6541 update_stmt (stmt);
6542 }
6543 }
6544 }
6545 }
6546
6547 release_defs_bitset (toremove);
6548
6549 BITMAP_FREE (toremove);
6550 }
6551
6552 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
6553 for pointer_map_traverse. */
6554
6555 static bool
6556 free_tree_niter_desc (const void *key ATTRIBUTE_UNUSED, void **value,
6557 void *data ATTRIBUTE_UNUSED)
6558 {
6559 struct tree_niter_desc *const niter = (struct tree_niter_desc *) *value;
6560
6561 free (niter);
6562 return true;
6563 }
6564
6565 /* Frees data allocated by the optimization of a single loop. */
6566
6567 static void
6568 free_loop_data (struct ivopts_data *data)
6569 {
6570 unsigned i, j;
6571 bitmap_iterator bi;
6572 tree obj;
6573
6574 if (data->niters)
6575 {
6576 pointer_map_traverse (data->niters, free_tree_niter_desc, NULL);
6577 pointer_map_destroy (data->niters);
6578 data->niters = NULL;
6579 }
6580
6581 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
6582 {
6583 struct version_info *info;
6584
6585 info = ver_info (data, i);
6586 free (info->iv);
6587 info->iv = NULL;
6588 info->has_nonlin_use = false;
6589 info->preserve_biv = false;
6590 info->inv_id = 0;
6591 }
6592 bitmap_clear (data->relevant);
6593 bitmap_clear (data->important_candidates);
6594
6595 for (i = 0; i < n_iv_uses (data); i++)
6596 {
6597 struct iv_use *use = iv_use (data, i);
6598
6599 free (use->iv);
6600 BITMAP_FREE (use->related_cands);
6601 for (j = 0; j < use->n_map_members; j++)
6602 if (use->cost_map[j].depends_on)
6603 BITMAP_FREE (use->cost_map[j].depends_on);
6604 free (use->cost_map);
6605 free (use);
6606 }
6607 data->iv_uses.truncate (0);
6608
6609 for (i = 0; i < n_iv_cands (data); i++)
6610 {
6611 struct iv_cand *cand = iv_cand (data, i);
6612
6613 free (cand->iv);
6614 if (cand->depends_on)
6615 BITMAP_FREE (cand->depends_on);
6616 free (cand);
6617 }
6618 data->iv_candidates.truncate (0);
6619
6620 if (data->version_info_size < num_ssa_names)
6621 {
6622 data->version_info_size = 2 * num_ssa_names;
6623 free (data->version_info);
6624 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
6625 }
6626
6627 data->max_inv_id = 0;
6628
6629 FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
6630 SET_DECL_RTL (obj, NULL_RTX);
6631
6632 decl_rtl_to_reset.truncate (0);
6633
6634 htab_empty (data->inv_expr_tab);
6635 data->inv_expr_id = 0;
6636 }
6637
6638 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
6639 loop tree. */
6640
6641 static void
6642 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
6643 {
6644 free_loop_data (data);
6645 free (data->version_info);
6646 BITMAP_FREE (data->relevant);
6647 BITMAP_FREE (data->important_candidates);
6648
6649 decl_rtl_to_reset.release ();
6650 data->iv_uses.release ();
6651 data->iv_candidates.release ();
6652 htab_delete (data->inv_expr_tab);
6653 }
6654
6655 /* Returns true if the loop body BODY includes any function calls. */
6656
6657 static bool
6658 loop_body_includes_call (basic_block *body, unsigned num_nodes)
6659 {
6660 gimple_stmt_iterator gsi;
6661 unsigned i;
6662
6663 for (i = 0; i < num_nodes; i++)
6664 for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
6665 {
6666 gimple stmt = gsi_stmt (gsi);
6667 if (is_gimple_call (stmt)
6668 && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
6669 return true;
6670 }
6671 return false;
6672 }
6673
6674 /* Optimizes the LOOP. Returns true if anything changed. */
6675
6676 static bool
6677 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
6678 {
6679 bool changed = false;
6680 struct iv_ca *iv_ca;
6681 edge exit = single_dom_exit (loop);
6682 basic_block *body;
6683
6684 gcc_assert (!data->niters);
6685 data->current_loop = loop;
6686 data->speed = optimize_loop_for_speed_p (loop);
6687
6688 if (dump_file && (dump_flags & TDF_DETAILS))
6689 {
6690 fprintf (dump_file, "Processing loop %d\n", loop->num);
6691
6692 if (exit)
6693 {
6694 fprintf (dump_file, " single exit %d -> %d, exit condition ",
6695 exit->src->index, exit->dest->index);
6696 print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
6697 fprintf (dump_file, "\n");
6698 }
6699
6700 fprintf (dump_file, "\n");
6701 }
6702
6703 body = get_loop_body (loop);
6704 data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
6705 renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
6706 free (body);
6707
6708 data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
6709
6710 /* For each ssa name determines whether it behaves as an induction variable
6711 in some loop. */
6712 if (!find_induction_variables (data))
6713 goto finish;
6714
6715 /* Finds interesting uses (item 1). */
6716 find_interesting_uses (data);
6717 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
6718 goto finish;
6719
6720 /* Finds candidates for the induction variables (item 2). */
6721 find_iv_candidates (data);
6722
6723 /* Calculates the costs (item 3, part 1). */
6724 determine_iv_costs (data);
6725 determine_use_iv_costs (data);
6726 determine_set_costs (data);
6727
6728 /* Find the optimal set of induction variables (item 3, part 2). */
6729 iv_ca = find_optimal_iv_set (data);
6730 if (!iv_ca)
6731 goto finish;
6732 changed = true;
6733
6734 /* Create the new induction variables (item 4, part 1). */
6735 create_new_ivs (data, iv_ca);
6736 iv_ca_free (&iv_ca);
6737
6738 /* Rewrite the uses (item 4, part 2). */
6739 rewrite_uses (data);
6740
6741 /* Remove the ivs that are unused after rewriting. */
6742 remove_unused_ivs (data);
6743
6744 /* We have changed the structure of induction variables; it might happen
6745 that definitions in the scev database refer to some of them that were
6746 eliminated. */
6747 scev_reset ();
6748
6749 finish:
6750 free_loop_data (data);
6751
6752 return changed;
6753 }
6754
6755 /* Main entry point. Optimizes induction variables in loops. */
6756
6757 void
6758 tree_ssa_iv_optimize (void)
6759 {
6760 struct loop *loop;
6761 struct ivopts_data data;
6762 loop_iterator li;
6763
6764 tree_ssa_iv_optimize_init (&data);
6765
6766 /* Optimize the loops starting with the innermost ones. */
6767 FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
6768 {
6769 if (dump_file && (dump_flags & TDF_DETAILS))
6770 flow_loop_dump (loop, dump_file, NULL, 1);
6771
6772 tree_ssa_iv_optimize_loop (&data, loop);
6773 }
6774
6775 tree_ssa_iv_optimize_finalize (&data);
6776 }