]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/tree-ssa-loop-ivopts.c
re PR middle-end/42834 (memcpy folding overeager)
[thirdparty/gcc.git] / gcc / tree-ssa-loop-ivopts.c
1 /* Induction variable optimizations.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
3 Free Software Foundation, Inc.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published by the
9 Free Software Foundation; either version 3, or (at your option) any
10 later version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 /* This pass tries to find the optimal set of induction variables for the loop.
22 It optimizes just the basic linear induction variables (although adding
23 support for other types should not be too hard). It includes the
24 optimizations commonly known as strength reduction, induction variable
25 coalescing and induction variable elimination. It does it in the
26 following steps:
27
28 1) The interesting uses of induction variables are found. This includes
29
30 -- uses of induction variables in non-linear expressions
31 -- addresses of arrays
32 -- comparisons of induction variables
33
34 2) Candidates for the induction variables are found. This includes
35
36 -- old induction variables
37 -- the variables defined by expressions derived from the "interesting
38 uses" above
39
40 3) The optimal (w.r. to a cost function) set of variables is chosen. The
41 cost function assigns a cost to sets of induction variables and consists
42 of three parts:
43
44 -- The use costs. Each of the interesting uses chooses the best induction
45 variable in the set and adds its cost to the sum. The cost reflects
46 the time spent on modifying the induction variables value to be usable
47 for the given purpose (adding base and offset for arrays, etc.).
48 -- The variable costs. Each of the variables has a cost assigned that
49 reflects the costs associated with incrementing the value of the
50 variable. The original variables are somewhat preferred.
51 -- The set cost. Depending on the size of the set, extra cost may be
52 added to reflect register pressure.
53
54 All the costs are defined in a machine-specific way, using the target
55 hooks and machine descriptions to determine them.
56
57 4) The trees are transformed to use the new variables, the dead code is
58 removed.
59
60 All of this is done loop by loop. Doing it globally is theoretically
61 possible, it might give a better performance and it might enable us
62 to decide costs more precisely, but getting all the interactions right
63 would be complicated. */
64
65 #include "config.h"
66 #include "system.h"
67 #include "coretypes.h"
68 #include "tm.h"
69 #include "tree.h"
70 #include "tm_p.h"
71 #include "basic-block.h"
72 #include "output.h"
73 #include "tree-pretty-print.h"
74 #include "gimple-pretty-print.h"
75 #include "tree-flow.h"
76 #include "tree-dump.h"
77 #include "timevar.h"
78 #include "cfgloop.h"
79 #include "tree-pass.h"
80 #include "ggc.h"
81 #include "insn-config.h"
82 #include "recog.h"
83 #include "pointer-set.h"
84 #include "hashtab.h"
85 #include "tree-chrec.h"
86 #include "tree-scalar-evolution.h"
87 #include "cfgloop.h"
88 #include "params.h"
89 #include "langhooks.h"
90 #include "tree-affine.h"
91 #include "target.h"
92
93 /* FIXME: Expressions are expanded to RTL in this pass to determine the
94 cost of different addressing modes. This should be moved to a TBD
95 interface between the GIMPLE and RTL worlds. */
96 #include "expr.h"
97
98 /* The infinite cost. */
99 #define INFTY 10000000
100
101 /* The expected number of loop iterations. TODO -- use profiling instead of
102 this. */
103 #define AVG_LOOP_NITER(LOOP) 5
104
105
106 /* Representation of the induction variable. */
107 struct iv
108 {
109 tree base; /* Initial value of the iv. */
110 tree base_object; /* A memory object to that the induction variable points. */
111 tree step; /* Step of the iv (constant only). */
112 tree ssa_name; /* The ssa name with the value. */
113 bool biv_p; /* Is it a biv? */
114 bool have_use_for; /* Do we already have a use for it? */
115 unsigned use_id; /* The identifier in the use if it is the case. */
116 };
117
118 /* Per-ssa version information (induction variable descriptions, etc.). */
119 struct version_info
120 {
121 tree name; /* The ssa name. */
122 struct iv *iv; /* Induction variable description. */
123 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
124 an expression that is not an induction variable. */
125 bool preserve_biv; /* For the original biv, whether to preserve it. */
126 unsigned inv_id; /* Id of an invariant. */
127 };
128
129 /* Types of uses. */
130 enum use_type
131 {
132 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
133 USE_ADDRESS, /* Use in an address. */
134 USE_COMPARE /* Use is a compare. */
135 };
136
137 /* Cost of a computation. */
138 typedef struct
139 {
140 int cost; /* The runtime cost. */
141 unsigned complexity; /* The estimate of the complexity of the code for
142 the computation (in no concrete units --
143 complexity field should be larger for more
144 complex expressions and addressing modes). */
145 } comp_cost;
146
147 static const comp_cost zero_cost = {0, 0};
148 static const comp_cost infinite_cost = {INFTY, INFTY};
149
150 /* The candidate - cost pair. */
151 struct cost_pair
152 {
153 struct iv_cand *cand; /* The candidate. */
154 comp_cost cost; /* The cost. */
155 bitmap depends_on; /* The list of invariants that have to be
156 preserved. */
157 tree value; /* For final value elimination, the expression for
158 the final value of the iv. For iv elimination,
159 the new bound to compare with. */
160 };
161
162 /* Use. */
163 struct iv_use
164 {
165 unsigned id; /* The id of the use. */
166 enum use_type type; /* Type of the use. */
167 struct iv *iv; /* The induction variable it is based on. */
168 gimple stmt; /* Statement in that it occurs. */
169 tree *op_p; /* The place where it occurs. */
170 bitmap related_cands; /* The set of "related" iv candidates, plus the common
171 important ones. */
172
173 unsigned n_map_members; /* Number of candidates in the cost_map list. */
174 struct cost_pair *cost_map;
175 /* The costs wrto the iv candidates. */
176
177 struct iv_cand *selected;
178 /* The selected candidate. */
179 };
180
181 /* The position where the iv is computed. */
182 enum iv_position
183 {
184 IP_NORMAL, /* At the end, just before the exit condition. */
185 IP_END, /* At the end of the latch block. */
186 IP_BEFORE_USE, /* Immediately before a specific use. */
187 IP_AFTER_USE, /* Immediately after a specific use. */
188 IP_ORIGINAL /* The original biv. */
189 };
190
191 /* The induction variable candidate. */
192 struct iv_cand
193 {
194 unsigned id; /* The number of the candidate. */
195 bool important; /* Whether this is an "important" candidate, i.e. such
196 that it should be considered by all uses. */
197 ENUM_BITFIELD(iv_position) pos : 8; /* Where it is computed. */
198 gimple incremented_at;/* For original biv, the statement where it is
199 incremented. */
200 tree var_before; /* The variable used for it before increment. */
201 tree var_after; /* The variable used for it after increment. */
202 struct iv *iv; /* The value of the candidate. NULL for
203 "pseudocandidate" used to indicate the possibility
204 to replace the final value of an iv by direct
205 computation of the value. */
206 unsigned cost; /* Cost of the candidate. */
207 unsigned cost_step; /* Cost of the candidate's increment operation. */
208 struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
209 where it is incremented. */
210 bitmap depends_on; /* The list of invariants that are used in step of the
211 biv. */
212 };
213
214 /* The data used by the induction variable optimizations. */
215
216 typedef struct iv_use *iv_use_p;
217 DEF_VEC_P(iv_use_p);
218 DEF_VEC_ALLOC_P(iv_use_p,heap);
219
220 typedef struct iv_cand *iv_cand_p;
221 DEF_VEC_P(iv_cand_p);
222 DEF_VEC_ALLOC_P(iv_cand_p,heap);
223
224 struct ivopts_data
225 {
226 /* The currently optimized loop. */
227 struct loop *current_loop;
228
229 /* Numbers of iterations for all exits of the current loop. */
230 struct pointer_map_t *niters;
231
232 /* Number of registers used in it. */
233 unsigned regs_used;
234
235 /* The size of version_info array allocated. */
236 unsigned version_info_size;
237
238 /* The array of information for the ssa names. */
239 struct version_info *version_info;
240
241 /* The bitmap of indices in version_info whose value was changed. */
242 bitmap relevant;
243
244 /* The uses of induction variables. */
245 VEC(iv_use_p,heap) *iv_uses;
246
247 /* The candidates. */
248 VEC(iv_cand_p,heap) *iv_candidates;
249
250 /* A bitmap of important candidates. */
251 bitmap important_candidates;
252
253 /* The maximum invariant id. */
254 unsigned max_inv_id;
255
256 /* Whether to consider just related and important candidates when replacing a
257 use. */
258 bool consider_all_candidates;
259
260 /* Are we optimizing for speed? */
261 bool speed;
262 };
263
264 /* An assignment of iv candidates to uses. */
265
266 struct iv_ca
267 {
268 /* The number of uses covered by the assignment. */
269 unsigned upto;
270
271 /* Number of uses that cannot be expressed by the candidates in the set. */
272 unsigned bad_uses;
273
274 /* Candidate assigned to a use, together with the related costs. */
275 struct cost_pair **cand_for_use;
276
277 /* Number of times each candidate is used. */
278 unsigned *n_cand_uses;
279
280 /* The candidates used. */
281 bitmap cands;
282
283 /* The number of candidates in the set. */
284 unsigned n_cands;
285
286 /* Total number of registers needed. */
287 unsigned n_regs;
288
289 /* Total cost of expressing uses. */
290 comp_cost cand_use_cost;
291
292 /* Total cost of candidates. */
293 unsigned cand_cost;
294
295 /* Number of times each invariant is used. */
296 unsigned *n_invariant_uses;
297
298 /* Total cost of the assignment. */
299 comp_cost cost;
300 };
301
302 /* Difference of two iv candidate assignments. */
303
304 struct iv_ca_delta
305 {
306 /* Changed use. */
307 struct iv_use *use;
308
309 /* An old assignment (for rollback purposes). */
310 struct cost_pair *old_cp;
311
312 /* A new assignment. */
313 struct cost_pair *new_cp;
314
315 /* Next change in the list. */
316 struct iv_ca_delta *next_change;
317 };
318
319 /* Bound on number of candidates below that all candidates are considered. */
320
321 #define CONSIDER_ALL_CANDIDATES_BOUND \
322 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
323
324 /* If there are more iv occurrences, we just give up (it is quite unlikely that
325 optimizing such a loop would help, and it would take ages). */
326
327 #define MAX_CONSIDERED_USES \
328 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
329
330 /* If there are at most this number of ivs in the set, try removing unnecessary
331 ivs from the set always. */
332
333 #define ALWAYS_PRUNE_CAND_SET_BOUND \
334 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
335
336 /* The list of trees for that the decl_rtl field must be reset is stored
337 here. */
338
339 static VEC(tree,heap) *decl_rtl_to_reset;
340
341 /* Number of uses recorded in DATA. */
342
343 static inline unsigned
344 n_iv_uses (struct ivopts_data *data)
345 {
346 return VEC_length (iv_use_p, data->iv_uses);
347 }
348
349 /* Ith use recorded in DATA. */
350
351 static inline struct iv_use *
352 iv_use (struct ivopts_data *data, unsigned i)
353 {
354 return VEC_index (iv_use_p, data->iv_uses, i);
355 }
356
357 /* Number of candidates recorded in DATA. */
358
359 static inline unsigned
360 n_iv_cands (struct ivopts_data *data)
361 {
362 return VEC_length (iv_cand_p, data->iv_candidates);
363 }
364
365 /* Ith candidate recorded in DATA. */
366
367 static inline struct iv_cand *
368 iv_cand (struct ivopts_data *data, unsigned i)
369 {
370 return VEC_index (iv_cand_p, data->iv_candidates, i);
371 }
372
373 /* The single loop exit if it dominates the latch, NULL otherwise. */
374
375 edge
376 single_dom_exit (struct loop *loop)
377 {
378 edge exit = single_exit (loop);
379
380 if (!exit)
381 return NULL;
382
383 if (!just_once_each_iteration_p (loop, exit->src))
384 return NULL;
385
386 return exit;
387 }
388
389 /* Dumps information about the induction variable IV to FILE. */
390
391 extern void dump_iv (FILE *, struct iv *);
392 void
393 dump_iv (FILE *file, struct iv *iv)
394 {
395 if (iv->ssa_name)
396 {
397 fprintf (file, "ssa name ");
398 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
399 fprintf (file, "\n");
400 }
401
402 fprintf (file, " type ");
403 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
404 fprintf (file, "\n");
405
406 if (iv->step)
407 {
408 fprintf (file, " base ");
409 print_generic_expr (file, iv->base, TDF_SLIM);
410 fprintf (file, "\n");
411
412 fprintf (file, " step ");
413 print_generic_expr (file, iv->step, TDF_SLIM);
414 fprintf (file, "\n");
415 }
416 else
417 {
418 fprintf (file, " invariant ");
419 print_generic_expr (file, iv->base, TDF_SLIM);
420 fprintf (file, "\n");
421 }
422
423 if (iv->base_object)
424 {
425 fprintf (file, " base object ");
426 print_generic_expr (file, iv->base_object, TDF_SLIM);
427 fprintf (file, "\n");
428 }
429
430 if (iv->biv_p)
431 fprintf (file, " is a biv\n");
432 }
433
434 /* Dumps information about the USE to FILE. */
435
436 extern void dump_use (FILE *, struct iv_use *);
437 void
438 dump_use (FILE *file, struct iv_use *use)
439 {
440 fprintf (file, "use %d\n", use->id);
441
442 switch (use->type)
443 {
444 case USE_NONLINEAR_EXPR:
445 fprintf (file, " generic\n");
446 break;
447
448 case USE_ADDRESS:
449 fprintf (file, " address\n");
450 break;
451
452 case USE_COMPARE:
453 fprintf (file, " compare\n");
454 break;
455
456 default:
457 gcc_unreachable ();
458 }
459
460 fprintf (file, " in statement ");
461 print_gimple_stmt (file, use->stmt, 0, 0);
462 fprintf (file, "\n");
463
464 fprintf (file, " at position ");
465 if (use->op_p)
466 print_generic_expr (file, *use->op_p, TDF_SLIM);
467 fprintf (file, "\n");
468
469 dump_iv (file, use->iv);
470
471 if (use->related_cands)
472 {
473 fprintf (file, " related candidates ");
474 dump_bitmap (file, use->related_cands);
475 }
476 }
477
478 /* Dumps information about the uses to FILE. */
479
480 extern void dump_uses (FILE *, struct ivopts_data *);
481 void
482 dump_uses (FILE *file, struct ivopts_data *data)
483 {
484 unsigned i;
485 struct iv_use *use;
486
487 for (i = 0; i < n_iv_uses (data); i++)
488 {
489 use = iv_use (data, i);
490
491 dump_use (file, use);
492 fprintf (file, "\n");
493 }
494 }
495
496 /* Dumps information about induction variable candidate CAND to FILE. */
497
498 extern void dump_cand (FILE *, struct iv_cand *);
499 void
500 dump_cand (FILE *file, struct iv_cand *cand)
501 {
502 struct iv *iv = cand->iv;
503
504 fprintf (file, "candidate %d%s\n",
505 cand->id, cand->important ? " (important)" : "");
506
507 if (cand->depends_on)
508 {
509 fprintf (file, " depends on ");
510 dump_bitmap (file, cand->depends_on);
511 }
512
513 if (!iv)
514 {
515 fprintf (file, " final value replacement\n");
516 return;
517 }
518
519 switch (cand->pos)
520 {
521 case IP_NORMAL:
522 fprintf (file, " incremented before exit test\n");
523 break;
524
525 case IP_BEFORE_USE:
526 fprintf (file, " incremented before use %d\n", cand->ainc_use->id);
527 break;
528
529 case IP_AFTER_USE:
530 fprintf (file, " incremented after use %d\n", cand->ainc_use->id);
531 break;
532
533 case IP_END:
534 fprintf (file, " incremented at end\n");
535 break;
536
537 case IP_ORIGINAL:
538 fprintf (file, " original biv\n");
539 break;
540 }
541
542 dump_iv (file, iv);
543 }
544
545 /* Returns the info for ssa version VER. */
546
547 static inline struct version_info *
548 ver_info (struct ivopts_data *data, unsigned ver)
549 {
550 return data->version_info + ver;
551 }
552
553 /* Returns the info for ssa name NAME. */
554
555 static inline struct version_info *
556 name_info (struct ivopts_data *data, tree name)
557 {
558 return ver_info (data, SSA_NAME_VERSION (name));
559 }
560
561 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
562 emitted in LOOP. */
563
564 static bool
565 stmt_after_ip_normal_pos (struct loop *loop, gimple stmt)
566 {
567 basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
568
569 gcc_assert (bb);
570
571 if (sbb == loop->latch)
572 return true;
573
574 if (sbb != bb)
575 return false;
576
577 return stmt == last_stmt (bb);
578 }
579
580 /* Returns true if STMT if after the place where the original induction
581 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
582 if the positions are identical. */
583
584 static bool
585 stmt_after_inc_pos (struct iv_cand *cand, gimple stmt, bool true_if_equal)
586 {
587 basic_block cand_bb = gimple_bb (cand->incremented_at);
588 basic_block stmt_bb = gimple_bb (stmt);
589
590 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
591 return false;
592
593 if (stmt_bb != cand_bb)
594 return true;
595
596 if (true_if_equal
597 && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
598 return true;
599 return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
600 }
601
602 /* Returns true if STMT if after the place where the induction variable
603 CAND is incremented in LOOP. */
604
605 static bool
606 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple stmt)
607 {
608 switch (cand->pos)
609 {
610 case IP_END:
611 return false;
612
613 case IP_NORMAL:
614 return stmt_after_ip_normal_pos (loop, stmt);
615
616 case IP_ORIGINAL:
617 case IP_AFTER_USE:
618 return stmt_after_inc_pos (cand, stmt, false);
619
620 case IP_BEFORE_USE:
621 return stmt_after_inc_pos (cand, stmt, true);
622
623 default:
624 gcc_unreachable ();
625 }
626 }
627
628 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
629
630 static bool
631 abnormal_ssa_name_p (tree exp)
632 {
633 if (!exp)
634 return false;
635
636 if (TREE_CODE (exp) != SSA_NAME)
637 return false;
638
639 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
640 }
641
642 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
643 abnormal phi node. Callback for for_each_index. */
644
645 static bool
646 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
647 void *data ATTRIBUTE_UNUSED)
648 {
649 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
650 {
651 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
652 return false;
653 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
654 return false;
655 }
656
657 return !abnormal_ssa_name_p (*index);
658 }
659
660 /* Returns true if EXPR contains a ssa name that occurs in an
661 abnormal phi node. */
662
663 bool
664 contains_abnormal_ssa_name_p (tree expr)
665 {
666 enum tree_code code;
667 enum tree_code_class codeclass;
668
669 if (!expr)
670 return false;
671
672 code = TREE_CODE (expr);
673 codeclass = TREE_CODE_CLASS (code);
674
675 if (code == SSA_NAME)
676 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
677
678 if (code == INTEGER_CST
679 || is_gimple_min_invariant (expr))
680 return false;
681
682 if (code == ADDR_EXPR)
683 return !for_each_index (&TREE_OPERAND (expr, 0),
684 idx_contains_abnormal_ssa_name_p,
685 NULL);
686
687 if (code == COND_EXPR)
688 return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
689 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
690 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
691
692 switch (codeclass)
693 {
694 case tcc_binary:
695 case tcc_comparison:
696 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
697 return true;
698
699 /* Fallthru. */
700 case tcc_unary:
701 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
702 return true;
703
704 break;
705
706 default:
707 gcc_unreachable ();
708 }
709
710 return false;
711 }
712
713 /* Returns tree describing number of iterations determined from
714 EXIT of DATA->current_loop, or NULL if something goes wrong. */
715
716 static tree
717 niter_for_exit (struct ivopts_data *data, edge exit)
718 {
719 struct tree_niter_desc desc;
720 tree niter;
721 void **slot;
722
723 if (!data->niters)
724 {
725 data->niters = pointer_map_create ();
726 slot = NULL;
727 }
728 else
729 slot = pointer_map_contains (data->niters, exit);
730
731 if (!slot)
732 {
733 /* Try to determine number of iterations. We must know it
734 unconditionally (i.e., without possibility of # of iterations
735 being zero). Also, we cannot safely work with ssa names that
736 appear in phi nodes on abnormal edges, so that we do not create
737 overlapping life ranges for them (PR 27283). */
738 if (number_of_iterations_exit (data->current_loop,
739 exit, &desc, true)
740 && integer_zerop (desc.may_be_zero)
741 && !contains_abnormal_ssa_name_p (desc.niter))
742 niter = desc.niter;
743 else
744 niter = NULL_TREE;
745
746 *pointer_map_insert (data->niters, exit) = niter;
747 }
748 else
749 niter = (tree) *slot;
750
751 return niter;
752 }
753
754 /* Returns tree describing number of iterations determined from
755 single dominating exit of DATA->current_loop, or NULL if something
756 goes wrong. */
757
758 static tree
759 niter_for_single_dom_exit (struct ivopts_data *data)
760 {
761 edge exit = single_dom_exit (data->current_loop);
762
763 if (!exit)
764 return NULL;
765
766 return niter_for_exit (data, exit);
767 }
768
769 /* Initializes data structures used by the iv optimization pass, stored
770 in DATA. */
771
772 static void
773 tree_ssa_iv_optimize_init (struct ivopts_data *data)
774 {
775 data->version_info_size = 2 * num_ssa_names;
776 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
777 data->relevant = BITMAP_ALLOC (NULL);
778 data->important_candidates = BITMAP_ALLOC (NULL);
779 data->max_inv_id = 0;
780 data->niters = NULL;
781 data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
782 data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
783 decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
784 }
785
786 /* Returns a memory object to that EXPR points. In case we are able to
787 determine that it does not point to any such object, NULL is returned. */
788
789 static tree
790 determine_base_object (tree expr)
791 {
792 enum tree_code code = TREE_CODE (expr);
793 tree base, obj;
794
795 /* If this is a pointer casted to any type, we need to determine
796 the base object for the pointer; so handle conversions before
797 throwing away non-pointer expressions. */
798 if (CONVERT_EXPR_P (expr))
799 return determine_base_object (TREE_OPERAND (expr, 0));
800
801 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
802 return NULL_TREE;
803
804 switch (code)
805 {
806 case INTEGER_CST:
807 return NULL_TREE;
808
809 case ADDR_EXPR:
810 obj = TREE_OPERAND (expr, 0);
811 base = get_base_address (obj);
812
813 if (!base)
814 return expr;
815
816 if (TREE_CODE (base) == MEM_REF)
817 return determine_base_object (TREE_OPERAND (base, 0));
818
819 return fold_convert (ptr_type_node,
820 build_fold_addr_expr (base));
821
822 case POINTER_PLUS_EXPR:
823 return determine_base_object (TREE_OPERAND (expr, 0));
824
825 case PLUS_EXPR:
826 case MINUS_EXPR:
827 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
828 gcc_unreachable ();
829
830 default:
831 return fold_convert (ptr_type_node, expr);
832 }
833 }
834
835 /* Allocates an induction variable with given initial value BASE and step STEP
836 for loop LOOP. */
837
838 static struct iv *
839 alloc_iv (tree base, tree step)
840 {
841 struct iv *iv = XCNEW (struct iv);
842 gcc_assert (step != NULL_TREE);
843
844 iv->base = base;
845 iv->base_object = determine_base_object (base);
846 iv->step = step;
847 iv->biv_p = false;
848 iv->have_use_for = false;
849 iv->use_id = 0;
850 iv->ssa_name = NULL_TREE;
851
852 return iv;
853 }
854
855 /* Sets STEP and BASE for induction variable IV. */
856
857 static void
858 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
859 {
860 struct version_info *info = name_info (data, iv);
861
862 gcc_assert (!info->iv);
863
864 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
865 info->iv = alloc_iv (base, step);
866 info->iv->ssa_name = iv;
867 }
868
869 /* Finds induction variable declaration for VAR. */
870
871 static struct iv *
872 get_iv (struct ivopts_data *data, tree var)
873 {
874 basic_block bb;
875 tree type = TREE_TYPE (var);
876
877 if (!POINTER_TYPE_P (type)
878 && !INTEGRAL_TYPE_P (type))
879 return NULL;
880
881 if (!name_info (data, var)->iv)
882 {
883 bb = gimple_bb (SSA_NAME_DEF_STMT (var));
884
885 if (!bb
886 || !flow_bb_inside_loop_p (data->current_loop, bb))
887 set_iv (data, var, var, build_int_cst (type, 0));
888 }
889
890 return name_info (data, var)->iv;
891 }
892
893 /* Determines the step of a biv defined in PHI. Returns NULL if PHI does
894 not define a simple affine biv with nonzero step. */
895
896 static tree
897 determine_biv_step (gimple phi)
898 {
899 struct loop *loop = gimple_bb (phi)->loop_father;
900 tree name = PHI_RESULT (phi);
901 affine_iv iv;
902
903 if (!is_gimple_reg (name))
904 return NULL_TREE;
905
906 if (!simple_iv (loop, loop, name, &iv, true))
907 return NULL_TREE;
908
909 return integer_zerop (iv.step) ? NULL_TREE : iv.step;
910 }
911
912 /* Finds basic ivs. */
913
914 static bool
915 find_bivs (struct ivopts_data *data)
916 {
917 gimple phi;
918 tree step, type, base;
919 bool found = false;
920 struct loop *loop = data->current_loop;
921 gimple_stmt_iterator psi;
922
923 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
924 {
925 phi = gsi_stmt (psi);
926
927 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
928 continue;
929
930 step = determine_biv_step (phi);
931 if (!step)
932 continue;
933
934 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
935 base = expand_simple_operations (base);
936 if (contains_abnormal_ssa_name_p (base)
937 || contains_abnormal_ssa_name_p (step))
938 continue;
939
940 type = TREE_TYPE (PHI_RESULT (phi));
941 base = fold_convert (type, base);
942 if (step)
943 {
944 if (POINTER_TYPE_P (type))
945 step = fold_convert (sizetype, step);
946 else
947 step = fold_convert (type, step);
948 }
949
950 set_iv (data, PHI_RESULT (phi), base, step);
951 found = true;
952 }
953
954 return found;
955 }
956
957 /* Marks basic ivs. */
958
959 static void
960 mark_bivs (struct ivopts_data *data)
961 {
962 gimple phi;
963 tree var;
964 struct iv *iv, *incr_iv;
965 struct loop *loop = data->current_loop;
966 basic_block incr_bb;
967 gimple_stmt_iterator psi;
968
969 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
970 {
971 phi = gsi_stmt (psi);
972
973 iv = get_iv (data, PHI_RESULT (phi));
974 if (!iv)
975 continue;
976
977 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
978 incr_iv = get_iv (data, var);
979 if (!incr_iv)
980 continue;
981
982 /* If the increment is in the subloop, ignore it. */
983 incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
984 if (incr_bb->loop_father != data->current_loop
985 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
986 continue;
987
988 iv->biv_p = true;
989 incr_iv->biv_p = true;
990 }
991 }
992
993 /* Checks whether STMT defines a linear induction variable and stores its
994 parameters to IV. */
995
996 static bool
997 find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv)
998 {
999 tree lhs;
1000 struct loop *loop = data->current_loop;
1001
1002 iv->base = NULL_TREE;
1003 iv->step = NULL_TREE;
1004
1005 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1006 return false;
1007
1008 lhs = gimple_assign_lhs (stmt);
1009 if (TREE_CODE (lhs) != SSA_NAME)
1010 return false;
1011
1012 if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1013 return false;
1014 iv->base = expand_simple_operations (iv->base);
1015
1016 if (contains_abnormal_ssa_name_p (iv->base)
1017 || contains_abnormal_ssa_name_p (iv->step))
1018 return false;
1019
1020 return true;
1021 }
1022
1023 /* Finds general ivs in statement STMT. */
1024
1025 static void
1026 find_givs_in_stmt (struct ivopts_data *data, gimple stmt)
1027 {
1028 affine_iv iv;
1029
1030 if (!find_givs_in_stmt_scev (data, stmt, &iv))
1031 return;
1032
1033 set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step);
1034 }
1035
1036 /* Finds general ivs in basic block BB. */
1037
1038 static void
1039 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1040 {
1041 gimple_stmt_iterator bsi;
1042
1043 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1044 find_givs_in_stmt (data, gsi_stmt (bsi));
1045 }
1046
1047 /* Finds general ivs. */
1048
1049 static void
1050 find_givs (struct ivopts_data *data)
1051 {
1052 struct loop *loop = data->current_loop;
1053 basic_block *body = get_loop_body_in_dom_order (loop);
1054 unsigned i;
1055
1056 for (i = 0; i < loop->num_nodes; i++)
1057 find_givs_in_bb (data, body[i]);
1058 free (body);
1059 }
1060
1061 /* For each ssa name defined in LOOP determines whether it is an induction
1062 variable and if so, its initial value and step. */
1063
1064 static bool
1065 find_induction_variables (struct ivopts_data *data)
1066 {
1067 unsigned i;
1068 bitmap_iterator bi;
1069
1070 if (!find_bivs (data))
1071 return false;
1072
1073 find_givs (data);
1074 mark_bivs (data);
1075
1076 if (dump_file && (dump_flags & TDF_DETAILS))
1077 {
1078 tree niter = niter_for_single_dom_exit (data);
1079
1080 if (niter)
1081 {
1082 fprintf (dump_file, " number of iterations ");
1083 print_generic_expr (dump_file, niter, TDF_SLIM);
1084 fprintf (dump_file, "\n\n");
1085 };
1086
1087 fprintf (dump_file, "Induction variables:\n\n");
1088
1089 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1090 {
1091 if (ver_info (data, i)->iv)
1092 dump_iv (dump_file, ver_info (data, i)->iv);
1093 }
1094 }
1095
1096 return true;
1097 }
1098
1099 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1100
1101 static struct iv_use *
1102 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1103 gimple stmt, enum use_type use_type)
1104 {
1105 struct iv_use *use = XCNEW (struct iv_use);
1106
1107 use->id = n_iv_uses (data);
1108 use->type = use_type;
1109 use->iv = iv;
1110 use->stmt = stmt;
1111 use->op_p = use_p;
1112 use->related_cands = BITMAP_ALLOC (NULL);
1113
1114 /* To avoid showing ssa name in the dumps, if it was not reset by the
1115 caller. */
1116 iv->ssa_name = NULL_TREE;
1117
1118 if (dump_file && (dump_flags & TDF_DETAILS))
1119 dump_use (dump_file, use);
1120
1121 VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
1122
1123 return use;
1124 }
1125
1126 /* Checks whether OP is a loop-level invariant and if so, records it.
1127 NONLINEAR_USE is true if the invariant is used in a way we do not
1128 handle specially. */
1129
1130 static void
1131 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1132 {
1133 basic_block bb;
1134 struct version_info *info;
1135
1136 if (TREE_CODE (op) != SSA_NAME
1137 || !is_gimple_reg (op))
1138 return;
1139
1140 bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1141 if (bb
1142 && flow_bb_inside_loop_p (data->current_loop, bb))
1143 return;
1144
1145 info = name_info (data, op);
1146 info->name = op;
1147 info->has_nonlin_use |= nonlinear_use;
1148 if (!info->inv_id)
1149 info->inv_id = ++data->max_inv_id;
1150 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1151 }
1152
1153 /* Checks whether the use OP is interesting and if so, records it. */
1154
1155 static struct iv_use *
1156 find_interesting_uses_op (struct ivopts_data *data, tree op)
1157 {
1158 struct iv *iv;
1159 struct iv *civ;
1160 gimple stmt;
1161 struct iv_use *use;
1162
1163 if (TREE_CODE (op) != SSA_NAME)
1164 return NULL;
1165
1166 iv = get_iv (data, op);
1167 if (!iv)
1168 return NULL;
1169
1170 if (iv->have_use_for)
1171 {
1172 use = iv_use (data, iv->use_id);
1173
1174 gcc_assert (use->type == USE_NONLINEAR_EXPR);
1175 return use;
1176 }
1177
1178 if (integer_zerop (iv->step))
1179 {
1180 record_invariant (data, op, true);
1181 return NULL;
1182 }
1183 iv->have_use_for = true;
1184
1185 civ = XNEW (struct iv);
1186 *civ = *iv;
1187
1188 stmt = SSA_NAME_DEF_STMT (op);
1189 gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1190 || is_gimple_assign (stmt));
1191
1192 use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1193 iv->use_id = use->id;
1194
1195 return use;
1196 }
1197
1198 /* Given a condition in statement STMT, checks whether it is a compare
1199 of an induction variable and an invariant. If this is the case,
1200 CONTROL_VAR is set to location of the iv, BOUND to the location of
1201 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1202 induction variable descriptions, and true is returned. If this is not
1203 the case, CONTROL_VAR and BOUND are set to the arguments of the
1204 condition and false is returned. */
1205
1206 static bool
1207 extract_cond_operands (struct ivopts_data *data, gimple stmt,
1208 tree **control_var, tree **bound,
1209 struct iv **iv_var, struct iv **iv_bound)
1210 {
1211 /* The objects returned when COND has constant operands. */
1212 static struct iv const_iv;
1213 static tree zero;
1214 tree *op0 = &zero, *op1 = &zero, *tmp_op;
1215 struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1216 bool ret = false;
1217
1218 if (gimple_code (stmt) == GIMPLE_COND)
1219 {
1220 op0 = gimple_cond_lhs_ptr (stmt);
1221 op1 = gimple_cond_rhs_ptr (stmt);
1222 }
1223 else
1224 {
1225 op0 = gimple_assign_rhs1_ptr (stmt);
1226 op1 = gimple_assign_rhs2_ptr (stmt);
1227 }
1228
1229 zero = integer_zero_node;
1230 const_iv.step = integer_zero_node;
1231
1232 if (TREE_CODE (*op0) == SSA_NAME)
1233 iv0 = get_iv (data, *op0);
1234 if (TREE_CODE (*op1) == SSA_NAME)
1235 iv1 = get_iv (data, *op1);
1236
1237 /* Exactly one of the compared values must be an iv, and the other one must
1238 be an invariant. */
1239 if (!iv0 || !iv1)
1240 goto end;
1241
1242 if (integer_zerop (iv0->step))
1243 {
1244 /* Control variable may be on the other side. */
1245 tmp_op = op0; op0 = op1; op1 = tmp_op;
1246 tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1247 }
1248 ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1249
1250 end:
1251 if (control_var)
1252 *control_var = op0;;
1253 if (iv_var)
1254 *iv_var = iv0;;
1255 if (bound)
1256 *bound = op1;
1257 if (iv_bound)
1258 *iv_bound = iv1;
1259
1260 return ret;
1261 }
1262
1263 /* Checks whether the condition in STMT is interesting and if so,
1264 records it. */
1265
1266 static void
1267 find_interesting_uses_cond (struct ivopts_data *data, gimple stmt)
1268 {
1269 tree *var_p, *bound_p;
1270 struct iv *var_iv, *civ;
1271
1272 if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1273 {
1274 find_interesting_uses_op (data, *var_p);
1275 find_interesting_uses_op (data, *bound_p);
1276 return;
1277 }
1278
1279 civ = XNEW (struct iv);
1280 *civ = *var_iv;
1281 record_use (data, NULL, civ, stmt, USE_COMPARE);
1282 }
1283
1284 /* Returns true if expression EXPR is obviously invariant in LOOP,
1285 i.e. if all its operands are defined outside of the LOOP. LOOP
1286 should not be the function body. */
1287
1288 bool
1289 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1290 {
1291 basic_block def_bb;
1292 unsigned i, len;
1293
1294 gcc_assert (loop_depth (loop) > 0);
1295
1296 if (is_gimple_min_invariant (expr))
1297 return true;
1298
1299 if (TREE_CODE (expr) == SSA_NAME)
1300 {
1301 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1302 if (def_bb
1303 && flow_bb_inside_loop_p (loop, def_bb))
1304 return false;
1305
1306 return true;
1307 }
1308
1309 if (!EXPR_P (expr))
1310 return false;
1311
1312 len = TREE_OPERAND_LENGTH (expr);
1313 for (i = 0; i < len; i++)
1314 if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1315 return false;
1316
1317 return true;
1318 }
1319
1320 /* Returns true if statement STMT is obviously invariant in LOOP,
1321 i.e. if all its operands on the RHS are defined outside of the LOOP.
1322 LOOP should not be the function body. */
1323
1324 bool
1325 stmt_invariant_in_loop_p (struct loop *loop, gimple stmt)
1326 {
1327 unsigned i;
1328 tree lhs;
1329
1330 gcc_assert (loop_depth (loop) > 0);
1331
1332 lhs = gimple_get_lhs (stmt);
1333 for (i = 0; i < gimple_num_ops (stmt); i++)
1334 {
1335 tree op = gimple_op (stmt, i);
1336 if (op != lhs && !expr_invariant_in_loop_p (loop, op))
1337 return false;
1338 }
1339
1340 return true;
1341 }
1342
1343 /* Cumulates the steps of indices into DATA and replaces their values with the
1344 initial ones. Returns false when the value of the index cannot be determined.
1345 Callback for for_each_index. */
1346
1347 struct ifs_ivopts_data
1348 {
1349 struct ivopts_data *ivopts_data;
1350 gimple stmt;
1351 tree step;
1352 };
1353
1354 static bool
1355 idx_find_step (tree base, tree *idx, void *data)
1356 {
1357 struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1358 struct iv *iv;
1359 tree step, iv_base, iv_step, lbound, off;
1360 struct loop *loop = dta->ivopts_data->current_loop;
1361
1362 if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF
1363 || TREE_CODE (base) == ALIGN_INDIRECT_REF)
1364 return false;
1365
1366 /* If base is a component ref, require that the offset of the reference
1367 be invariant. */
1368 if (TREE_CODE (base) == COMPONENT_REF)
1369 {
1370 off = component_ref_field_offset (base);
1371 return expr_invariant_in_loop_p (loop, off);
1372 }
1373
1374 /* If base is array, first check whether we will be able to move the
1375 reference out of the loop (in order to take its address in strength
1376 reduction). In order for this to work we need both lower bound
1377 and step to be loop invariants. */
1378 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1379 {
1380 /* Moreover, for a range, the size needs to be invariant as well. */
1381 if (TREE_CODE (base) == ARRAY_RANGE_REF
1382 && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1383 return false;
1384
1385 step = array_ref_element_size (base);
1386 lbound = array_ref_low_bound (base);
1387
1388 if (!expr_invariant_in_loop_p (loop, step)
1389 || !expr_invariant_in_loop_p (loop, lbound))
1390 return false;
1391 }
1392
1393 if (TREE_CODE (*idx) != SSA_NAME)
1394 return true;
1395
1396 iv = get_iv (dta->ivopts_data, *idx);
1397 if (!iv)
1398 return false;
1399
1400 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1401 *&x[0], which is not folded and does not trigger the
1402 ARRAY_REF path below. */
1403 *idx = iv->base;
1404
1405 if (integer_zerop (iv->step))
1406 return true;
1407
1408 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1409 {
1410 step = array_ref_element_size (base);
1411
1412 /* We only handle addresses whose step is an integer constant. */
1413 if (TREE_CODE (step) != INTEGER_CST)
1414 return false;
1415 }
1416 else
1417 /* The step for pointer arithmetics already is 1 byte. */
1418 step = build_int_cst (sizetype, 1);
1419
1420 iv_base = iv->base;
1421 iv_step = iv->step;
1422 if (!convert_affine_scev (dta->ivopts_data->current_loop,
1423 sizetype, &iv_base, &iv_step, dta->stmt,
1424 false))
1425 {
1426 /* The index might wrap. */
1427 return false;
1428 }
1429
1430 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1431 dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1432
1433 return true;
1434 }
1435
1436 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1437 object is passed to it in DATA. */
1438
1439 static bool
1440 idx_record_use (tree base, tree *idx,
1441 void *vdata)
1442 {
1443 struct ivopts_data *data = (struct ivopts_data *) vdata;
1444 find_interesting_uses_op (data, *idx);
1445 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1446 {
1447 find_interesting_uses_op (data, array_ref_element_size (base));
1448 find_interesting_uses_op (data, array_ref_low_bound (base));
1449 }
1450 return true;
1451 }
1452
1453 /* If we can prove that TOP = cst * BOT for some constant cst,
1454 store cst to MUL and return true. Otherwise return false.
1455 The returned value is always sign-extended, regardless of the
1456 signedness of TOP and BOT. */
1457
1458 static bool
1459 constant_multiple_of (tree top, tree bot, double_int *mul)
1460 {
1461 tree mby;
1462 enum tree_code code;
1463 double_int res, p0, p1;
1464 unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1465
1466 STRIP_NOPS (top);
1467 STRIP_NOPS (bot);
1468
1469 if (operand_equal_p (top, bot, 0))
1470 {
1471 *mul = double_int_one;
1472 return true;
1473 }
1474
1475 code = TREE_CODE (top);
1476 switch (code)
1477 {
1478 case MULT_EXPR:
1479 mby = TREE_OPERAND (top, 1);
1480 if (TREE_CODE (mby) != INTEGER_CST)
1481 return false;
1482
1483 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1484 return false;
1485
1486 *mul = double_int_sext (double_int_mul (res, tree_to_double_int (mby)),
1487 precision);
1488 return true;
1489
1490 case PLUS_EXPR:
1491 case MINUS_EXPR:
1492 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1493 || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1494 return false;
1495
1496 if (code == MINUS_EXPR)
1497 p1 = double_int_neg (p1);
1498 *mul = double_int_sext (double_int_add (p0, p1), precision);
1499 return true;
1500
1501 case INTEGER_CST:
1502 if (TREE_CODE (bot) != INTEGER_CST)
1503 return false;
1504
1505 p0 = double_int_sext (tree_to_double_int (top), precision);
1506 p1 = double_int_sext (tree_to_double_int (bot), precision);
1507 if (double_int_zero_p (p1))
1508 return false;
1509 *mul = double_int_sext (double_int_sdivmod (p0, p1, FLOOR_DIV_EXPR, &res),
1510 precision);
1511 return double_int_zero_p (res);
1512
1513 default:
1514 return false;
1515 }
1516 }
1517
1518 /* Returns true if memory reference REF with step STEP may be unaligned. */
1519
1520 static bool
1521 may_be_unaligned_p (tree ref, tree step)
1522 {
1523 tree base;
1524 tree base_type;
1525 HOST_WIDE_INT bitsize;
1526 HOST_WIDE_INT bitpos;
1527 tree toffset;
1528 enum machine_mode mode;
1529 int unsignedp, volatilep;
1530 unsigned base_align;
1531
1532 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1533 thus they are not misaligned. */
1534 if (TREE_CODE (ref) == TARGET_MEM_REF)
1535 return false;
1536
1537 /* The test below is basically copy of what expr.c:normal_inner_ref
1538 does to check whether the object must be loaded by parts when
1539 STRICT_ALIGNMENT is true. */
1540 base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1541 &unsignedp, &volatilep, true);
1542 base_type = TREE_TYPE (base);
1543 base_align = TYPE_ALIGN (base_type);
1544
1545 if (mode != BLKmode)
1546 {
1547 unsigned mode_align = GET_MODE_ALIGNMENT (mode);
1548
1549 if (base_align < mode_align
1550 || (bitpos % mode_align) != 0
1551 || (bitpos % BITS_PER_UNIT) != 0)
1552 return true;
1553
1554 if (toffset
1555 && (highest_pow2_factor (toffset) * BITS_PER_UNIT) < mode_align)
1556 return true;
1557
1558 if ((highest_pow2_factor (step) * BITS_PER_UNIT) < mode_align)
1559 return true;
1560 }
1561
1562 return false;
1563 }
1564
1565 /* Return true if EXPR may be non-addressable. */
1566
1567 static bool
1568 may_be_nonaddressable_p (tree expr)
1569 {
1570 switch (TREE_CODE (expr))
1571 {
1572 case TARGET_MEM_REF:
1573 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1574 target, thus they are always addressable. */
1575 return false;
1576
1577 case COMPONENT_REF:
1578 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1579 || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1580
1581 case VIEW_CONVERT_EXPR:
1582 /* This kind of view-conversions may wrap non-addressable objects
1583 and make them look addressable. After some processing the
1584 non-addressability may be uncovered again, causing ADDR_EXPRs
1585 of inappropriate objects to be built. */
1586 if (is_gimple_reg (TREE_OPERAND (expr, 0))
1587 || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
1588 return true;
1589
1590 /* ... fall through ... */
1591
1592 case ARRAY_REF:
1593 case ARRAY_RANGE_REF:
1594 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1595
1596 CASE_CONVERT:
1597 return true;
1598
1599 default:
1600 break;
1601 }
1602
1603 return false;
1604 }
1605
1606 /* Finds addresses in *OP_P inside STMT. */
1607
1608 static void
1609 find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p)
1610 {
1611 tree base = *op_p, step = build_int_cst (sizetype, 0);
1612 struct iv *civ;
1613 struct ifs_ivopts_data ifs_ivopts_data;
1614
1615 /* Do not play with volatile memory references. A bit too conservative,
1616 perhaps, but safe. */
1617 if (gimple_has_volatile_ops (stmt))
1618 goto fail;
1619
1620 /* Ignore bitfields for now. Not really something terribly complicated
1621 to handle. TODO. */
1622 if (TREE_CODE (base) == BIT_FIELD_REF)
1623 goto fail;
1624
1625 base = unshare_expr (base);
1626
1627 if (TREE_CODE (base) == TARGET_MEM_REF)
1628 {
1629 tree type = build_pointer_type (TREE_TYPE (base));
1630 tree astep;
1631
1632 if (TMR_BASE (base)
1633 && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1634 {
1635 civ = get_iv (data, TMR_BASE (base));
1636 if (!civ)
1637 goto fail;
1638
1639 TMR_BASE (base) = civ->base;
1640 step = civ->step;
1641 }
1642 if (TMR_INDEX (base)
1643 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1644 {
1645 civ = get_iv (data, TMR_INDEX (base));
1646 if (!civ)
1647 goto fail;
1648
1649 TMR_INDEX (base) = civ->base;
1650 astep = civ->step;
1651
1652 if (astep)
1653 {
1654 if (TMR_STEP (base))
1655 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1656
1657 step = fold_build2 (PLUS_EXPR, type, step, astep);
1658 }
1659 }
1660
1661 if (integer_zerop (step))
1662 goto fail;
1663 base = tree_mem_ref_addr (type, base);
1664 }
1665 else
1666 {
1667 ifs_ivopts_data.ivopts_data = data;
1668 ifs_ivopts_data.stmt = stmt;
1669 ifs_ivopts_data.step = build_int_cst (sizetype, 0);
1670 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1671 || integer_zerop (ifs_ivopts_data.step))
1672 goto fail;
1673 step = ifs_ivopts_data.step;
1674
1675 gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
1676 gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1677
1678 /* Check that the base expression is addressable. This needs
1679 to be done after substituting bases of IVs into it. */
1680 if (may_be_nonaddressable_p (base))
1681 goto fail;
1682
1683 /* Moreover, on strict alignment platforms, check that it is
1684 sufficiently aligned. */
1685 if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
1686 goto fail;
1687
1688 base = build_fold_addr_expr (base);
1689
1690 /* Substituting bases of IVs into the base expression might
1691 have caused folding opportunities. */
1692 if (TREE_CODE (base) == ADDR_EXPR)
1693 {
1694 tree *ref = &TREE_OPERAND (base, 0);
1695 while (handled_component_p (*ref))
1696 ref = &TREE_OPERAND (*ref, 0);
1697 if (TREE_CODE (*ref) == MEM_REF)
1698 {
1699 tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
1700 TREE_OPERAND (*ref, 0),
1701 TREE_OPERAND (*ref, 1));
1702 if (tem)
1703 *ref = tem;
1704 }
1705 }
1706 }
1707
1708 civ = alloc_iv (base, step);
1709 record_use (data, op_p, civ, stmt, USE_ADDRESS);
1710 return;
1711
1712 fail:
1713 for_each_index (op_p, idx_record_use, data);
1714 }
1715
1716 /* Finds and records invariants used in STMT. */
1717
1718 static void
1719 find_invariants_stmt (struct ivopts_data *data, gimple stmt)
1720 {
1721 ssa_op_iter iter;
1722 use_operand_p use_p;
1723 tree op;
1724
1725 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1726 {
1727 op = USE_FROM_PTR (use_p);
1728 record_invariant (data, op, false);
1729 }
1730 }
1731
1732 /* Finds interesting uses of induction variables in the statement STMT. */
1733
1734 static void
1735 find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt)
1736 {
1737 struct iv *iv;
1738 tree op, *lhs, *rhs;
1739 ssa_op_iter iter;
1740 use_operand_p use_p;
1741 enum tree_code code;
1742
1743 find_invariants_stmt (data, stmt);
1744
1745 if (gimple_code (stmt) == GIMPLE_COND)
1746 {
1747 find_interesting_uses_cond (data, stmt);
1748 return;
1749 }
1750
1751 if (is_gimple_assign (stmt))
1752 {
1753 lhs = gimple_assign_lhs_ptr (stmt);
1754 rhs = gimple_assign_rhs1_ptr (stmt);
1755
1756 if (TREE_CODE (*lhs) == SSA_NAME)
1757 {
1758 /* If the statement defines an induction variable, the uses are not
1759 interesting by themselves. */
1760
1761 iv = get_iv (data, *lhs);
1762
1763 if (iv && !integer_zerop (iv->step))
1764 return;
1765 }
1766
1767 code = gimple_assign_rhs_code (stmt);
1768 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
1769 && (REFERENCE_CLASS_P (*rhs)
1770 || is_gimple_val (*rhs)))
1771 {
1772 if (REFERENCE_CLASS_P (*rhs))
1773 find_interesting_uses_address (data, stmt, rhs);
1774 else
1775 find_interesting_uses_op (data, *rhs);
1776
1777 if (REFERENCE_CLASS_P (*lhs))
1778 find_interesting_uses_address (data, stmt, lhs);
1779 return;
1780 }
1781 else if (TREE_CODE_CLASS (code) == tcc_comparison)
1782 {
1783 find_interesting_uses_cond (data, stmt);
1784 return;
1785 }
1786
1787 /* TODO -- we should also handle address uses of type
1788
1789 memory = call (whatever);
1790
1791 and
1792
1793 call (memory). */
1794 }
1795
1796 if (gimple_code (stmt) == GIMPLE_PHI
1797 && gimple_bb (stmt) == data->current_loop->header)
1798 {
1799 iv = get_iv (data, PHI_RESULT (stmt));
1800
1801 if (iv && !integer_zerop (iv->step))
1802 return;
1803 }
1804
1805 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1806 {
1807 op = USE_FROM_PTR (use_p);
1808
1809 if (TREE_CODE (op) != SSA_NAME)
1810 continue;
1811
1812 iv = get_iv (data, op);
1813 if (!iv)
1814 continue;
1815
1816 find_interesting_uses_op (data, op);
1817 }
1818 }
1819
1820 /* Finds interesting uses of induction variables outside of loops
1821 on loop exit edge EXIT. */
1822
1823 static void
1824 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1825 {
1826 gimple phi;
1827 gimple_stmt_iterator psi;
1828 tree def;
1829
1830 for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
1831 {
1832 phi = gsi_stmt (psi);
1833 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1834 if (is_gimple_reg (def))
1835 find_interesting_uses_op (data, def);
1836 }
1837 }
1838
1839 /* Finds uses of the induction variables that are interesting. */
1840
1841 static void
1842 find_interesting_uses (struct ivopts_data *data)
1843 {
1844 basic_block bb;
1845 gimple_stmt_iterator bsi;
1846 basic_block *body = get_loop_body (data->current_loop);
1847 unsigned i;
1848 struct version_info *info;
1849 edge e;
1850
1851 if (dump_file && (dump_flags & TDF_DETAILS))
1852 fprintf (dump_file, "Uses:\n\n");
1853
1854 for (i = 0; i < data->current_loop->num_nodes; i++)
1855 {
1856 edge_iterator ei;
1857 bb = body[i];
1858
1859 FOR_EACH_EDGE (e, ei, bb->succs)
1860 if (e->dest != EXIT_BLOCK_PTR
1861 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1862 find_interesting_uses_outside (data, e);
1863
1864 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1865 find_interesting_uses_stmt (data, gsi_stmt (bsi));
1866 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1867 if (!is_gimple_debug (gsi_stmt (bsi)))
1868 find_interesting_uses_stmt (data, gsi_stmt (bsi));
1869 }
1870
1871 if (dump_file && (dump_flags & TDF_DETAILS))
1872 {
1873 bitmap_iterator bi;
1874
1875 fprintf (dump_file, "\n");
1876
1877 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1878 {
1879 info = ver_info (data, i);
1880 if (info->inv_id)
1881 {
1882 fprintf (dump_file, " ");
1883 print_generic_expr (dump_file, info->name, TDF_SLIM);
1884 fprintf (dump_file, " is invariant (%d)%s\n",
1885 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1886 }
1887 }
1888
1889 fprintf (dump_file, "\n");
1890 }
1891
1892 free (body);
1893 }
1894
1895 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
1896 is true, assume we are inside an address. If TOP_COMPREF is true, assume
1897 we are at the top-level of the processed address. */
1898
1899 static tree
1900 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
1901 unsigned HOST_WIDE_INT *offset)
1902 {
1903 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
1904 enum tree_code code;
1905 tree type, orig_type = TREE_TYPE (expr);
1906 unsigned HOST_WIDE_INT off0, off1, st;
1907 tree orig_expr = expr;
1908
1909 STRIP_NOPS (expr);
1910
1911 type = TREE_TYPE (expr);
1912 code = TREE_CODE (expr);
1913 *offset = 0;
1914
1915 switch (code)
1916 {
1917 case INTEGER_CST:
1918 if (!cst_and_fits_in_hwi (expr)
1919 || integer_zerop (expr))
1920 return orig_expr;
1921
1922 *offset = int_cst_value (expr);
1923 return build_int_cst (orig_type, 0);
1924
1925 case POINTER_PLUS_EXPR:
1926 case PLUS_EXPR:
1927 case MINUS_EXPR:
1928 op0 = TREE_OPERAND (expr, 0);
1929 op1 = TREE_OPERAND (expr, 1);
1930
1931 op0 = strip_offset_1 (op0, false, false, &off0);
1932 op1 = strip_offset_1 (op1, false, false, &off1);
1933
1934 *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
1935 if (op0 == TREE_OPERAND (expr, 0)
1936 && op1 == TREE_OPERAND (expr, 1))
1937 return orig_expr;
1938
1939 if (integer_zerop (op1))
1940 expr = op0;
1941 else if (integer_zerop (op0))
1942 {
1943 if (code == MINUS_EXPR)
1944 expr = fold_build1 (NEGATE_EXPR, type, op1);
1945 else
1946 expr = op1;
1947 }
1948 else
1949 expr = fold_build2 (code, type, op0, op1);
1950
1951 return fold_convert (orig_type, expr);
1952
1953 case MULT_EXPR:
1954 op1 = TREE_OPERAND (expr, 1);
1955 if (!cst_and_fits_in_hwi (op1))
1956 return orig_expr;
1957
1958 op0 = TREE_OPERAND (expr, 0);
1959 op0 = strip_offset_1 (op0, false, false, &off0);
1960 if (op0 == TREE_OPERAND (expr, 0))
1961 return orig_expr;
1962
1963 *offset = off0 * int_cst_value (op1);
1964 if (integer_zerop (op0))
1965 expr = op0;
1966 else
1967 expr = fold_build2 (MULT_EXPR, type, op0, op1);
1968
1969 return fold_convert (orig_type, expr);
1970
1971 case ARRAY_REF:
1972 case ARRAY_RANGE_REF:
1973 if (!inside_addr)
1974 return orig_expr;
1975
1976 step = array_ref_element_size (expr);
1977 if (!cst_and_fits_in_hwi (step))
1978 break;
1979
1980 st = int_cst_value (step);
1981 op1 = TREE_OPERAND (expr, 1);
1982 op1 = strip_offset_1 (op1, false, false, &off1);
1983 *offset = off1 * st;
1984
1985 if (top_compref
1986 && integer_zerop (op1))
1987 {
1988 /* Strip the component reference completely. */
1989 op0 = TREE_OPERAND (expr, 0);
1990 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1991 *offset += off0;
1992 return op0;
1993 }
1994 break;
1995
1996 case COMPONENT_REF:
1997 if (!inside_addr)
1998 return orig_expr;
1999
2000 tmp = component_ref_field_offset (expr);
2001 if (top_compref
2002 && cst_and_fits_in_hwi (tmp))
2003 {
2004 /* Strip the component reference completely. */
2005 op0 = TREE_OPERAND (expr, 0);
2006 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2007 *offset = off0 + int_cst_value (tmp);
2008 return op0;
2009 }
2010 break;
2011
2012 case ADDR_EXPR:
2013 op0 = TREE_OPERAND (expr, 0);
2014 op0 = strip_offset_1 (op0, true, true, &off0);
2015 *offset += off0;
2016
2017 if (op0 == TREE_OPERAND (expr, 0))
2018 return orig_expr;
2019
2020 expr = build_fold_addr_expr (op0);
2021 return fold_convert (orig_type, expr);
2022
2023 case MEM_REF:
2024 /* ??? Offset operand? */
2025 inside_addr = false;
2026 break;
2027
2028 default:
2029 return orig_expr;
2030 }
2031
2032 /* Default handling of expressions for that we want to recurse into
2033 the first operand. */
2034 op0 = TREE_OPERAND (expr, 0);
2035 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2036 *offset += off0;
2037
2038 if (op0 == TREE_OPERAND (expr, 0)
2039 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2040 return orig_expr;
2041
2042 expr = copy_node (expr);
2043 TREE_OPERAND (expr, 0) = op0;
2044 if (op1)
2045 TREE_OPERAND (expr, 1) = op1;
2046
2047 /* Inside address, we might strip the top level component references,
2048 thus changing type of the expression. Handling of ADDR_EXPR
2049 will fix that. */
2050 expr = fold_convert (orig_type, expr);
2051
2052 return expr;
2053 }
2054
2055 /* Strips constant offsets from EXPR and stores them to OFFSET. */
2056
2057 static tree
2058 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2059 {
2060 return strip_offset_1 (expr, false, false, offset);
2061 }
2062
2063 /* Returns variant of TYPE that can be used as base for different uses.
2064 We return unsigned type with the same precision, which avoids problems
2065 with overflows. */
2066
2067 static tree
2068 generic_type_for (tree type)
2069 {
2070 if (POINTER_TYPE_P (type))
2071 return unsigned_type_for (type);
2072
2073 if (TYPE_UNSIGNED (type))
2074 return type;
2075
2076 return unsigned_type_for (type);
2077 }
2078
2079 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2080 the bitmap to that we should store it. */
2081
2082 static struct ivopts_data *fd_ivopts_data;
2083 static tree
2084 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2085 {
2086 bitmap *depends_on = (bitmap *) data;
2087 struct version_info *info;
2088
2089 if (TREE_CODE (*expr_p) != SSA_NAME)
2090 return NULL_TREE;
2091 info = name_info (fd_ivopts_data, *expr_p);
2092
2093 if (!info->inv_id || info->has_nonlin_use)
2094 return NULL_TREE;
2095
2096 if (!*depends_on)
2097 *depends_on = BITMAP_ALLOC (NULL);
2098 bitmap_set_bit (*depends_on, info->inv_id);
2099
2100 return NULL_TREE;
2101 }
2102
2103 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2104 position to POS. If USE is not NULL, the candidate is set as related to
2105 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
2106 replacement of the final value of the iv by a direct computation. */
2107
2108 static struct iv_cand *
2109 add_candidate_1 (struct ivopts_data *data,
2110 tree base, tree step, bool important, enum iv_position pos,
2111 struct iv_use *use, gimple incremented_at)
2112 {
2113 unsigned i;
2114 struct iv_cand *cand = NULL;
2115 tree type, orig_type;
2116
2117 if (base)
2118 {
2119 orig_type = TREE_TYPE (base);
2120 type = generic_type_for (orig_type);
2121 if (type != orig_type)
2122 {
2123 base = fold_convert (type, base);
2124 step = fold_convert (type, step);
2125 }
2126 }
2127
2128 for (i = 0; i < n_iv_cands (data); i++)
2129 {
2130 cand = iv_cand (data, i);
2131
2132 if (cand->pos != pos)
2133 continue;
2134
2135 if (cand->incremented_at != incremented_at
2136 || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2137 && cand->ainc_use != use))
2138 continue;
2139
2140 if (!cand->iv)
2141 {
2142 if (!base && !step)
2143 break;
2144
2145 continue;
2146 }
2147
2148 if (!base && !step)
2149 continue;
2150
2151 if (operand_equal_p (base, cand->iv->base, 0)
2152 && operand_equal_p (step, cand->iv->step, 0))
2153 break;
2154 }
2155
2156 if (i == n_iv_cands (data))
2157 {
2158 cand = XCNEW (struct iv_cand);
2159 cand->id = i;
2160
2161 if (!base && !step)
2162 cand->iv = NULL;
2163 else
2164 cand->iv = alloc_iv (base, step);
2165
2166 cand->pos = pos;
2167 if (pos != IP_ORIGINAL && cand->iv)
2168 {
2169 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2170 cand->var_after = cand->var_before;
2171 }
2172 cand->important = important;
2173 cand->incremented_at = incremented_at;
2174 VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
2175
2176 if (step
2177 && TREE_CODE (step) != INTEGER_CST)
2178 {
2179 fd_ivopts_data = data;
2180 walk_tree (&step, find_depends, &cand->depends_on, NULL);
2181 }
2182
2183 if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2184 cand->ainc_use = use;
2185 else
2186 cand->ainc_use = NULL;
2187
2188 if (dump_file && (dump_flags & TDF_DETAILS))
2189 dump_cand (dump_file, cand);
2190 }
2191
2192 if (important && !cand->important)
2193 {
2194 cand->important = true;
2195 if (dump_file && (dump_flags & TDF_DETAILS))
2196 fprintf (dump_file, "Candidate %d is important\n", cand->id);
2197 }
2198
2199 if (use)
2200 {
2201 bitmap_set_bit (use->related_cands, i);
2202 if (dump_file && (dump_flags & TDF_DETAILS))
2203 fprintf (dump_file, "Candidate %d is related to use %d\n",
2204 cand->id, use->id);
2205 }
2206
2207 return cand;
2208 }
2209
2210 /* Returns true if incrementing the induction variable at the end of the LOOP
2211 is allowed.
2212
2213 The purpose is to avoid splitting latch edge with a biv increment, thus
2214 creating a jump, possibly confusing other optimization passes and leaving
2215 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2216 is not available (so we do not have a better alternative), or if the latch
2217 edge is already nonempty. */
2218
2219 static bool
2220 allow_ip_end_pos_p (struct loop *loop)
2221 {
2222 if (!ip_normal_pos (loop))
2223 return true;
2224
2225 if (!empty_block_p (ip_end_pos (loop)))
2226 return true;
2227
2228 return false;
2229 }
2230
2231 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2232 Important field is set to IMPORTANT. */
2233
2234 static void
2235 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2236 bool important, struct iv_use *use)
2237 {
2238 basic_block use_bb = gimple_bb (use->stmt);
2239 enum machine_mode mem_mode;
2240 unsigned HOST_WIDE_INT cstepi;
2241
2242 /* If we insert the increment in any position other than the standard
2243 ones, we must ensure that it is incremented once per iteration.
2244 It must not be in an inner nested loop, or one side of an if
2245 statement. */
2246 if (use_bb->loop_father != data->current_loop
2247 || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2248 || stmt_could_throw_p (use->stmt)
2249 || !cst_and_fits_in_hwi (step))
2250 return;
2251
2252 cstepi = int_cst_value (step);
2253
2254 mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2255 if ((HAVE_PRE_INCREMENT && GET_MODE_SIZE (mem_mode) == cstepi)
2256 || (HAVE_PRE_DECREMENT && GET_MODE_SIZE (mem_mode) == -cstepi))
2257 {
2258 enum tree_code code = MINUS_EXPR;
2259 tree new_base;
2260 tree new_step = step;
2261
2262 if (POINTER_TYPE_P (TREE_TYPE (base)))
2263 {
2264 new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2265 code = POINTER_PLUS_EXPR;
2266 }
2267 else
2268 new_step = fold_convert (TREE_TYPE (base), new_step);
2269 new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2270 add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2271 use->stmt);
2272 }
2273 if ((HAVE_POST_INCREMENT && GET_MODE_SIZE (mem_mode) == cstepi)
2274 || (HAVE_POST_DECREMENT && GET_MODE_SIZE (mem_mode) == -cstepi))
2275 {
2276 add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
2277 use->stmt);
2278 }
2279 }
2280
2281 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2282 position to POS. If USE is not NULL, the candidate is set as related to
2283 it. The candidate computation is scheduled on all available positions. */
2284
2285 static void
2286 add_candidate (struct ivopts_data *data,
2287 tree base, tree step, bool important, struct iv_use *use)
2288 {
2289 if (ip_normal_pos (data->current_loop))
2290 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL);
2291 if (ip_end_pos (data->current_loop)
2292 && allow_ip_end_pos_p (data->current_loop))
2293 add_candidate_1 (data, base, step, important, IP_END, use, NULL);
2294
2295 if (use != NULL && use->type == USE_ADDRESS)
2296 add_autoinc_candidates (data, base, step, important, use);
2297 }
2298
2299 /* Add a standard "0 + 1 * iteration" iv candidate for a
2300 type with SIZE bits. */
2301
2302 static void
2303 add_standard_iv_candidates_for_size (struct ivopts_data *data,
2304 unsigned int size)
2305 {
2306 tree type = lang_hooks.types.type_for_size (size, true);
2307 add_candidate (data, build_int_cst (type, 0), build_int_cst (type, 1),
2308 true, NULL);
2309 }
2310
2311 /* Adds standard iv candidates. */
2312
2313 static void
2314 add_standard_iv_candidates (struct ivopts_data *data)
2315 {
2316 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE);
2317
2318 /* The same for a double-integer type if it is still fast enough. */
2319 if (BITS_PER_WORD >= INT_TYPE_SIZE * 2)
2320 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE * 2);
2321 }
2322
2323
2324 /* Adds candidates bases on the old induction variable IV. */
2325
2326 static void
2327 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2328 {
2329 gimple phi;
2330 tree def;
2331 struct iv_cand *cand;
2332
2333 add_candidate (data, iv->base, iv->step, true, NULL);
2334
2335 /* The same, but with initial value zero. */
2336 if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
2337 add_candidate (data, size_int (0), iv->step, true, NULL);
2338 else
2339 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2340 iv->step, true, NULL);
2341
2342 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2343 if (gimple_code (phi) == GIMPLE_PHI)
2344 {
2345 /* Additionally record the possibility of leaving the original iv
2346 untouched. */
2347 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2348 cand = add_candidate_1 (data,
2349 iv->base, iv->step, true, IP_ORIGINAL, NULL,
2350 SSA_NAME_DEF_STMT (def));
2351 cand->var_before = iv->ssa_name;
2352 cand->var_after = def;
2353 }
2354 }
2355
2356 /* Adds candidates based on the old induction variables. */
2357
2358 static void
2359 add_old_ivs_candidates (struct ivopts_data *data)
2360 {
2361 unsigned i;
2362 struct iv *iv;
2363 bitmap_iterator bi;
2364
2365 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2366 {
2367 iv = ver_info (data, i)->iv;
2368 if (iv && iv->biv_p && !integer_zerop (iv->step))
2369 add_old_iv_candidates (data, iv);
2370 }
2371 }
2372
2373 /* Adds candidates based on the value of the induction variable IV and USE. */
2374
2375 static void
2376 add_iv_value_candidates (struct ivopts_data *data,
2377 struct iv *iv, struct iv_use *use)
2378 {
2379 unsigned HOST_WIDE_INT offset;
2380 tree base;
2381 tree basetype;
2382
2383 add_candidate (data, iv->base, iv->step, false, use);
2384
2385 /* The same, but with initial value zero. Make such variable important,
2386 since it is generic enough so that possibly many uses may be based
2387 on it. */
2388 basetype = TREE_TYPE (iv->base);
2389 if (POINTER_TYPE_P (basetype))
2390 basetype = sizetype;
2391 add_candidate (data, build_int_cst (basetype, 0),
2392 iv->step, true, use);
2393
2394 /* Third, try removing the constant offset. Make sure to even
2395 add a candidate for &a[0] vs. (T *)&a. */
2396 base = strip_offset (iv->base, &offset);
2397 if (offset
2398 || base != iv->base)
2399 add_candidate (data, base, iv->step, false, use);
2400 }
2401
2402 /* Adds candidates based on the uses. */
2403
2404 static void
2405 add_derived_ivs_candidates (struct ivopts_data *data)
2406 {
2407 unsigned i;
2408
2409 for (i = 0; i < n_iv_uses (data); i++)
2410 {
2411 struct iv_use *use = iv_use (data, i);
2412
2413 if (!use)
2414 continue;
2415
2416 switch (use->type)
2417 {
2418 case USE_NONLINEAR_EXPR:
2419 case USE_COMPARE:
2420 case USE_ADDRESS:
2421 /* Just add the ivs based on the value of the iv used here. */
2422 add_iv_value_candidates (data, use->iv, use);
2423 break;
2424
2425 default:
2426 gcc_unreachable ();
2427 }
2428 }
2429 }
2430
2431 /* Record important candidates and add them to related_cands bitmaps
2432 if needed. */
2433
2434 static void
2435 record_important_candidates (struct ivopts_data *data)
2436 {
2437 unsigned i;
2438 struct iv_use *use;
2439
2440 for (i = 0; i < n_iv_cands (data); i++)
2441 {
2442 struct iv_cand *cand = iv_cand (data, i);
2443
2444 if (cand->important)
2445 bitmap_set_bit (data->important_candidates, i);
2446 }
2447
2448 data->consider_all_candidates = (n_iv_cands (data)
2449 <= CONSIDER_ALL_CANDIDATES_BOUND);
2450
2451 if (data->consider_all_candidates)
2452 {
2453 /* We will not need "related_cands" bitmaps in this case,
2454 so release them to decrease peak memory consumption. */
2455 for (i = 0; i < n_iv_uses (data); i++)
2456 {
2457 use = iv_use (data, i);
2458 BITMAP_FREE (use->related_cands);
2459 }
2460 }
2461 else
2462 {
2463 /* Add important candidates to the related_cands bitmaps. */
2464 for (i = 0; i < n_iv_uses (data); i++)
2465 bitmap_ior_into (iv_use (data, i)->related_cands,
2466 data->important_candidates);
2467 }
2468 }
2469
2470 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2471 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2472 we allocate a simple list to every use. */
2473
2474 static void
2475 alloc_use_cost_map (struct ivopts_data *data)
2476 {
2477 unsigned i, size, s, j;
2478
2479 for (i = 0; i < n_iv_uses (data); i++)
2480 {
2481 struct iv_use *use = iv_use (data, i);
2482 bitmap_iterator bi;
2483
2484 if (data->consider_all_candidates)
2485 size = n_iv_cands (data);
2486 else
2487 {
2488 s = 0;
2489 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2490 {
2491 s++;
2492 }
2493
2494 /* Round up to the power of two, so that moduling by it is fast. */
2495 for (size = 1; size < s; size <<= 1)
2496 continue;
2497 }
2498
2499 use->n_map_members = size;
2500 use->cost_map = XCNEWVEC (struct cost_pair, size);
2501 }
2502 }
2503
2504 /* Returns description of computation cost of expression whose runtime
2505 cost is RUNTIME and complexity corresponds to COMPLEXITY. */
2506
2507 static comp_cost
2508 new_cost (unsigned runtime, unsigned complexity)
2509 {
2510 comp_cost cost;
2511
2512 cost.cost = runtime;
2513 cost.complexity = complexity;
2514
2515 return cost;
2516 }
2517
2518 /* Adds costs COST1 and COST2. */
2519
2520 static comp_cost
2521 add_costs (comp_cost cost1, comp_cost cost2)
2522 {
2523 cost1.cost += cost2.cost;
2524 cost1.complexity += cost2.complexity;
2525
2526 return cost1;
2527 }
2528 /* Subtracts costs COST1 and COST2. */
2529
2530 static comp_cost
2531 sub_costs (comp_cost cost1, comp_cost cost2)
2532 {
2533 cost1.cost -= cost2.cost;
2534 cost1.complexity -= cost2.complexity;
2535
2536 return cost1;
2537 }
2538
2539 /* Returns a negative number if COST1 < COST2, a positive number if
2540 COST1 > COST2, and 0 if COST1 = COST2. */
2541
2542 static int
2543 compare_costs (comp_cost cost1, comp_cost cost2)
2544 {
2545 if (cost1.cost == cost2.cost)
2546 return cost1.complexity - cost2.complexity;
2547
2548 return cost1.cost - cost2.cost;
2549 }
2550
2551 /* Returns true if COST is infinite. */
2552
2553 static bool
2554 infinite_cost_p (comp_cost cost)
2555 {
2556 return cost.cost == INFTY;
2557 }
2558
2559 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2560 on invariants DEPENDS_ON and that the value used in expressing it
2561 is VALUE. */
2562
2563 static void
2564 set_use_iv_cost (struct ivopts_data *data,
2565 struct iv_use *use, struct iv_cand *cand,
2566 comp_cost cost, bitmap depends_on, tree value)
2567 {
2568 unsigned i, s;
2569
2570 if (infinite_cost_p (cost))
2571 {
2572 BITMAP_FREE (depends_on);
2573 return;
2574 }
2575
2576 if (data->consider_all_candidates)
2577 {
2578 use->cost_map[cand->id].cand = cand;
2579 use->cost_map[cand->id].cost = cost;
2580 use->cost_map[cand->id].depends_on = depends_on;
2581 use->cost_map[cand->id].value = value;
2582 return;
2583 }
2584
2585 /* n_map_members is a power of two, so this computes modulo. */
2586 s = cand->id & (use->n_map_members - 1);
2587 for (i = s; i < use->n_map_members; i++)
2588 if (!use->cost_map[i].cand)
2589 goto found;
2590 for (i = 0; i < s; i++)
2591 if (!use->cost_map[i].cand)
2592 goto found;
2593
2594 gcc_unreachable ();
2595
2596 found:
2597 use->cost_map[i].cand = cand;
2598 use->cost_map[i].cost = cost;
2599 use->cost_map[i].depends_on = depends_on;
2600 use->cost_map[i].value = value;
2601 }
2602
2603 /* Gets cost of (USE, CANDIDATE) pair. */
2604
2605 static struct cost_pair *
2606 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2607 struct iv_cand *cand)
2608 {
2609 unsigned i, s;
2610 struct cost_pair *ret;
2611
2612 if (!cand)
2613 return NULL;
2614
2615 if (data->consider_all_candidates)
2616 {
2617 ret = use->cost_map + cand->id;
2618 if (!ret->cand)
2619 return NULL;
2620
2621 return ret;
2622 }
2623
2624 /* n_map_members is a power of two, so this computes modulo. */
2625 s = cand->id & (use->n_map_members - 1);
2626 for (i = s; i < use->n_map_members; i++)
2627 if (use->cost_map[i].cand == cand)
2628 return use->cost_map + i;
2629
2630 for (i = 0; i < s; i++)
2631 if (use->cost_map[i].cand == cand)
2632 return use->cost_map + i;
2633
2634 return NULL;
2635 }
2636
2637 /* Returns estimate on cost of computing SEQ. */
2638
2639 static unsigned
2640 seq_cost (rtx seq, bool speed)
2641 {
2642 unsigned cost = 0;
2643 rtx set;
2644
2645 for (; seq; seq = NEXT_INSN (seq))
2646 {
2647 set = single_set (seq);
2648 if (set)
2649 cost += rtx_cost (set, SET,speed);
2650 else
2651 cost++;
2652 }
2653
2654 return cost;
2655 }
2656
2657 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2658 static rtx
2659 produce_memory_decl_rtl (tree obj, int *regno)
2660 {
2661 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
2662 enum machine_mode address_mode = targetm.addr_space.address_mode (as);
2663 rtx x;
2664
2665 gcc_assert (obj);
2666 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2667 {
2668 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2669 x = gen_rtx_SYMBOL_REF (address_mode, name);
2670 SET_SYMBOL_REF_DECL (x, obj);
2671 x = gen_rtx_MEM (DECL_MODE (obj), x);
2672 set_mem_addr_space (x, as);
2673 targetm.encode_section_info (obj, x, true);
2674 }
2675 else
2676 {
2677 x = gen_raw_REG (address_mode, (*regno)++);
2678 x = gen_rtx_MEM (DECL_MODE (obj), x);
2679 set_mem_addr_space (x, as);
2680 }
2681
2682 return x;
2683 }
2684
2685 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2686 walk_tree. DATA contains the actual fake register number. */
2687
2688 static tree
2689 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2690 {
2691 tree obj = NULL_TREE;
2692 rtx x = NULL_RTX;
2693 int *regno = (int *) data;
2694
2695 switch (TREE_CODE (*expr_p))
2696 {
2697 case ADDR_EXPR:
2698 for (expr_p = &TREE_OPERAND (*expr_p, 0);
2699 handled_component_p (*expr_p);
2700 expr_p = &TREE_OPERAND (*expr_p, 0))
2701 continue;
2702 obj = *expr_p;
2703 if (DECL_P (obj) && !DECL_RTL_SET_P (obj))
2704 x = produce_memory_decl_rtl (obj, regno);
2705 break;
2706
2707 case SSA_NAME:
2708 *ws = 0;
2709 obj = SSA_NAME_VAR (*expr_p);
2710 if (!DECL_RTL_SET_P (obj))
2711 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2712 break;
2713
2714 case VAR_DECL:
2715 case PARM_DECL:
2716 case RESULT_DECL:
2717 *ws = 0;
2718 obj = *expr_p;
2719
2720 if (DECL_RTL_SET_P (obj))
2721 break;
2722
2723 if (DECL_MODE (obj) == BLKmode)
2724 x = produce_memory_decl_rtl (obj, regno);
2725 else
2726 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2727
2728 break;
2729
2730 default:
2731 break;
2732 }
2733
2734 if (x)
2735 {
2736 VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
2737 SET_DECL_RTL (obj, x);
2738 }
2739
2740 return NULL_TREE;
2741 }
2742
2743 /* Determines cost of the computation of EXPR. */
2744
2745 static unsigned
2746 computation_cost (tree expr, bool speed)
2747 {
2748 rtx seq, rslt;
2749 tree type = TREE_TYPE (expr);
2750 unsigned cost;
2751 /* Avoid using hard regs in ways which may be unsupported. */
2752 int regno = LAST_VIRTUAL_REGISTER + 1;
2753 struct cgraph_node *node = cgraph_node (current_function_decl);
2754 enum node_frequency real_frequency = node->frequency;
2755
2756 node->frequency = NODE_FREQUENCY_NORMAL;
2757 crtl->maybe_hot_insn_p = speed;
2758 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2759 start_sequence ();
2760 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2761 seq = get_insns ();
2762 end_sequence ();
2763 default_rtl_profile ();
2764 node->frequency = real_frequency;
2765
2766 cost = seq_cost (seq, speed);
2767 if (MEM_P (rslt))
2768 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
2769 TYPE_ADDR_SPACE (type), speed);
2770
2771 return cost;
2772 }
2773
2774 /* Returns variable containing the value of candidate CAND at statement AT. */
2775
2776 static tree
2777 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple stmt)
2778 {
2779 if (stmt_after_increment (loop, cand, stmt))
2780 return cand->var_after;
2781 else
2782 return cand->var_before;
2783 }
2784
2785 /* Return the most significant (sign) bit of T. Similar to tree_int_cst_msb,
2786 but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE. */
2787
2788 int
2789 tree_int_cst_sign_bit (const_tree t)
2790 {
2791 unsigned bitno = TYPE_PRECISION (TREE_TYPE (t)) - 1;
2792 unsigned HOST_WIDE_INT w;
2793
2794 if (bitno < HOST_BITS_PER_WIDE_INT)
2795 w = TREE_INT_CST_LOW (t);
2796 else
2797 {
2798 w = TREE_INT_CST_HIGH (t);
2799 bitno -= HOST_BITS_PER_WIDE_INT;
2800 }
2801
2802 return (w >> bitno) & 1;
2803 }
2804
2805 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2806 same precision that is at least as wide as the precision of TYPE, stores
2807 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
2808 type of A and B. */
2809
2810 static tree
2811 determine_common_wider_type (tree *a, tree *b)
2812 {
2813 tree wider_type = NULL;
2814 tree suba, subb;
2815 tree atype = TREE_TYPE (*a);
2816
2817 if (CONVERT_EXPR_P (*a))
2818 {
2819 suba = TREE_OPERAND (*a, 0);
2820 wider_type = TREE_TYPE (suba);
2821 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
2822 return atype;
2823 }
2824 else
2825 return atype;
2826
2827 if (CONVERT_EXPR_P (*b))
2828 {
2829 subb = TREE_OPERAND (*b, 0);
2830 if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
2831 return atype;
2832 }
2833 else
2834 return atype;
2835
2836 *a = suba;
2837 *b = subb;
2838 return wider_type;
2839 }
2840
2841 /* Determines the expression by that USE is expressed from induction variable
2842 CAND at statement AT in LOOP. The expression is stored in a decomposed
2843 form into AFF. Returns false if USE cannot be expressed using CAND. */
2844
2845 static bool
2846 get_computation_aff (struct loop *loop,
2847 struct iv_use *use, struct iv_cand *cand, gimple at,
2848 struct affine_tree_combination *aff)
2849 {
2850 tree ubase = use->iv->base;
2851 tree ustep = use->iv->step;
2852 tree cbase = cand->iv->base;
2853 tree cstep = cand->iv->step, cstep_common;
2854 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2855 tree common_type, var;
2856 tree uutype;
2857 aff_tree cbase_aff, var_aff;
2858 double_int rat;
2859
2860 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2861 {
2862 /* We do not have a precision to express the values of use. */
2863 return false;
2864 }
2865
2866 var = var_at_stmt (loop, cand, at);
2867 uutype = unsigned_type_for (utype);
2868
2869 /* If the conversion is not noop, perform it. */
2870 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
2871 {
2872 cstep = fold_convert (uutype, cstep);
2873 cbase = fold_convert (uutype, cbase);
2874 var = fold_convert (uutype, var);
2875 }
2876
2877 if (!constant_multiple_of (ustep, cstep, &rat))
2878 return false;
2879
2880 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
2881 type, we achieve better folding by computing their difference in this
2882 wider type, and cast the result to UUTYPE. We do not need to worry about
2883 overflows, as all the arithmetics will in the end be performed in UUTYPE
2884 anyway. */
2885 common_type = determine_common_wider_type (&ubase, &cbase);
2886
2887 /* use = ubase - ratio * cbase + ratio * var. */
2888 tree_to_aff_combination (ubase, common_type, aff);
2889 tree_to_aff_combination (cbase, common_type, &cbase_aff);
2890 tree_to_aff_combination (var, uutype, &var_aff);
2891
2892 /* We need to shift the value if we are after the increment. */
2893 if (stmt_after_increment (loop, cand, at))
2894 {
2895 aff_tree cstep_aff;
2896
2897 if (common_type != uutype)
2898 cstep_common = fold_convert (common_type, cstep);
2899 else
2900 cstep_common = cstep;
2901
2902 tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
2903 aff_combination_add (&cbase_aff, &cstep_aff);
2904 }
2905
2906 aff_combination_scale (&cbase_aff, double_int_neg (rat));
2907 aff_combination_add (aff, &cbase_aff);
2908 if (common_type != uutype)
2909 aff_combination_convert (aff, uutype);
2910
2911 aff_combination_scale (&var_aff, rat);
2912 aff_combination_add (aff, &var_aff);
2913
2914 return true;
2915 }
2916
2917 /* Determines the expression by that USE is expressed from induction variable
2918 CAND at statement AT in LOOP. The computation is unshared. */
2919
2920 static tree
2921 get_computation_at (struct loop *loop,
2922 struct iv_use *use, struct iv_cand *cand, gimple at)
2923 {
2924 aff_tree aff;
2925 tree type = TREE_TYPE (use->iv->base);
2926
2927 if (!get_computation_aff (loop, use, cand, at, &aff))
2928 return NULL_TREE;
2929 unshare_aff_combination (&aff);
2930 return fold_convert (type, aff_combination_to_tree (&aff));
2931 }
2932
2933 /* Determines the expression by that USE is expressed from induction variable
2934 CAND in LOOP. The computation is unshared. */
2935
2936 static tree
2937 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
2938 {
2939 return get_computation_at (loop, use, cand, use->stmt);
2940 }
2941
2942 /* Adjust the cost COST for being in loop setup rather than loop body.
2943 If we're optimizing for space, the loop setup overhead is constant;
2944 if we're optimizing for speed, amortize it over the per-iteration cost. */
2945 static unsigned
2946 adjust_setup_cost (struct ivopts_data *data, unsigned cost)
2947 {
2948 if (cost == INFTY)
2949 return cost;
2950 else if (optimize_loop_for_speed_p (data->current_loop))
2951 return cost / AVG_LOOP_NITER (data->current_loop);
2952 else
2953 return cost;
2954 }
2955
2956 /* Returns cost of addition in MODE. */
2957
2958 static unsigned
2959 add_cost (enum machine_mode mode, bool speed)
2960 {
2961 static unsigned costs[NUM_MACHINE_MODES];
2962 rtx seq;
2963 unsigned cost;
2964
2965 if (costs[mode])
2966 return costs[mode];
2967
2968 start_sequence ();
2969 force_operand (gen_rtx_fmt_ee (PLUS, mode,
2970 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
2971 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 2)),
2972 NULL_RTX);
2973 seq = get_insns ();
2974 end_sequence ();
2975
2976 cost = seq_cost (seq, speed);
2977 if (!cost)
2978 cost = 1;
2979
2980 costs[mode] = cost;
2981
2982 if (dump_file && (dump_flags & TDF_DETAILS))
2983 fprintf (dump_file, "Addition in %s costs %d\n",
2984 GET_MODE_NAME (mode), cost);
2985 return cost;
2986 }
2987
2988 /* Entry in a hashtable of already known costs for multiplication. */
2989 struct mbc_entry
2990 {
2991 HOST_WIDE_INT cst; /* The constant to multiply by. */
2992 enum machine_mode mode; /* In mode. */
2993 unsigned cost; /* The cost. */
2994 };
2995
2996 /* Counts hash value for the ENTRY. */
2997
2998 static hashval_t
2999 mbc_entry_hash (const void *entry)
3000 {
3001 const struct mbc_entry *e = (const struct mbc_entry *) entry;
3002
3003 return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
3004 }
3005
3006 /* Compares the hash table entries ENTRY1 and ENTRY2. */
3007
3008 static int
3009 mbc_entry_eq (const void *entry1, const void *entry2)
3010 {
3011 const struct mbc_entry *e1 = (const struct mbc_entry *) entry1;
3012 const struct mbc_entry *e2 = (const struct mbc_entry *) entry2;
3013
3014 return (e1->mode == e2->mode
3015 && e1->cst == e2->cst);
3016 }
3017
3018 /* Returns cost of multiplication by constant CST in MODE. */
3019
3020 unsigned
3021 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode, bool speed)
3022 {
3023 static htab_t costs;
3024 struct mbc_entry **cached, act;
3025 rtx seq;
3026 unsigned cost;
3027
3028 if (!costs)
3029 costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
3030
3031 act.mode = mode;
3032 act.cst = cst;
3033 cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
3034 if (*cached)
3035 return (*cached)->cost;
3036
3037 *cached = XNEW (struct mbc_entry);
3038 (*cached)->mode = mode;
3039 (*cached)->cst = cst;
3040
3041 start_sequence ();
3042 expand_mult (mode, gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
3043 gen_int_mode (cst, mode), NULL_RTX, 0);
3044 seq = get_insns ();
3045 end_sequence ();
3046
3047 cost = seq_cost (seq, speed);
3048
3049 if (dump_file && (dump_flags & TDF_DETAILS))
3050 fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
3051 (int) cst, GET_MODE_NAME (mode), cost);
3052
3053 (*cached)->cost = cost;
3054
3055 return cost;
3056 }
3057
3058 /* Returns true if multiplying by RATIO is allowed in an address. Test the
3059 validity for a memory reference accessing memory of mode MODE in
3060 address space AS. */
3061
3062 DEF_VEC_P (sbitmap);
3063 DEF_VEC_ALLOC_P (sbitmap, heap);
3064
3065 bool
3066 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode,
3067 addr_space_t as)
3068 {
3069 #define MAX_RATIO 128
3070 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3071 static VEC (sbitmap, heap) *valid_mult_list;
3072 sbitmap valid_mult;
3073
3074 if (data_index >= VEC_length (sbitmap, valid_mult_list))
3075 VEC_safe_grow_cleared (sbitmap, heap, valid_mult_list, data_index + 1);
3076
3077 valid_mult = VEC_index (sbitmap, valid_mult_list, data_index);
3078 if (!valid_mult)
3079 {
3080 enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3081 rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3082 rtx addr;
3083 HOST_WIDE_INT i;
3084
3085 valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3086 sbitmap_zero (valid_mult);
3087 addr = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3088 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3089 {
3090 XEXP (addr, 1) = gen_int_mode (i, address_mode);
3091 if (memory_address_addr_space_p (mode, addr, as))
3092 SET_BIT (valid_mult, i + MAX_RATIO);
3093 }
3094
3095 if (dump_file && (dump_flags & TDF_DETAILS))
3096 {
3097 fprintf (dump_file, " allowed multipliers:");
3098 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3099 if (TEST_BIT (valid_mult, i + MAX_RATIO))
3100 fprintf (dump_file, " %d", (int) i);
3101 fprintf (dump_file, "\n");
3102 fprintf (dump_file, "\n");
3103 }
3104
3105 VEC_replace (sbitmap, valid_mult_list, data_index, valid_mult);
3106 }
3107
3108 if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3109 return false;
3110
3111 return TEST_BIT (valid_mult, ratio + MAX_RATIO);
3112 }
3113
3114 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3115 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3116 variable is omitted. Compute the cost for a memory reference that accesses
3117 a memory location of mode MEM_MODE in address space AS.
3118
3119 MAY_AUTOINC is set to true if the autoincrement (increasing index by
3120 size of MEM_MODE / RATIO) is available. To make this determination, we
3121 look at the size of the increment to be made, which is given in CSTEP.
3122 CSTEP may be zero if the step is unknown.
3123 STMT_AFTER_INC is true iff the statement we're looking at is after the
3124 increment of the original biv.
3125
3126 TODO -- there must be some better way. This all is quite crude. */
3127
3128 typedef struct
3129 {
3130 HOST_WIDE_INT min_offset, max_offset;
3131 unsigned costs[2][2][2][2];
3132 } *address_cost_data;
3133
3134 DEF_VEC_P (address_cost_data);
3135 DEF_VEC_ALLOC_P (address_cost_data, heap);
3136
3137 static comp_cost
3138 get_address_cost (bool symbol_present, bool var_present,
3139 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3140 HOST_WIDE_INT cstep, enum machine_mode mem_mode,
3141 addr_space_t as, bool speed,
3142 bool stmt_after_inc, bool *may_autoinc)
3143 {
3144 enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3145 static VEC(address_cost_data, heap) *address_cost_data_list;
3146 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3147 address_cost_data data;
3148 static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3149 static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3150 unsigned cost, acost, complexity;
3151 bool offset_p, ratio_p, autoinc;
3152 HOST_WIDE_INT s_offset, autoinc_offset, msize;
3153 unsigned HOST_WIDE_INT mask;
3154 unsigned bits;
3155
3156 if (data_index >= VEC_length (address_cost_data, address_cost_data_list))
3157 VEC_safe_grow_cleared (address_cost_data, heap, address_cost_data_list,
3158 data_index + 1);
3159
3160 data = VEC_index (address_cost_data, address_cost_data_list, data_index);
3161 if (!data)
3162 {
3163 HOST_WIDE_INT i;
3164 HOST_WIDE_INT start = BIGGEST_ALIGNMENT / BITS_PER_UNIT;
3165 HOST_WIDE_INT rat, off;
3166 int old_cse_not_expected;
3167 unsigned sym_p, var_p, off_p, rat_p, add_c;
3168 rtx seq, addr, base;
3169 rtx reg0, reg1;
3170
3171 data = (address_cost_data) xcalloc (1, sizeof (*data));
3172
3173 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3174
3175 addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
3176 for (i = start; i <= 1 << 20; i <<= 1)
3177 {
3178 XEXP (addr, 1) = gen_int_mode (i, address_mode);
3179 if (!memory_address_addr_space_p (mem_mode, addr, as))
3180 break;
3181 }
3182 data->max_offset = i == start ? 0 : i >> 1;
3183 off = data->max_offset;
3184
3185 for (i = start; i <= 1 << 20; i <<= 1)
3186 {
3187 XEXP (addr, 1) = gen_int_mode (-i, address_mode);
3188 if (!memory_address_addr_space_p (mem_mode, addr, as))
3189 break;
3190 }
3191 data->min_offset = i == start ? 0 : -(i >> 1);
3192
3193 if (dump_file && (dump_flags & TDF_DETAILS))
3194 {
3195 fprintf (dump_file, "get_address_cost:\n");
3196 fprintf (dump_file, " min offset %s %d\n",
3197 GET_MODE_NAME (mem_mode),
3198 (int) data->min_offset);
3199 fprintf (dump_file, " max offset %s %d\n",
3200 GET_MODE_NAME (mem_mode),
3201 (int) data->max_offset);
3202 }
3203
3204 rat = 1;
3205 for (i = 2; i <= MAX_RATIO; i++)
3206 if (multiplier_allowed_in_address_p (i, mem_mode, as))
3207 {
3208 rat = i;
3209 break;
3210 }
3211
3212 /* Compute the cost of various addressing modes. */
3213 acost = 0;
3214 reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3215 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3216
3217 if (HAVE_PRE_DECREMENT)
3218 {
3219 addr = gen_rtx_PRE_DEC (address_mode, reg0);
3220 has_predec[mem_mode]
3221 = memory_address_addr_space_p (mem_mode, addr, as);
3222 }
3223 if (HAVE_POST_DECREMENT)
3224 {
3225 addr = gen_rtx_POST_DEC (address_mode, reg0);
3226 has_postdec[mem_mode]
3227 = memory_address_addr_space_p (mem_mode, addr, as);
3228 }
3229 if (HAVE_PRE_INCREMENT)
3230 {
3231 addr = gen_rtx_PRE_INC (address_mode, reg0);
3232 has_preinc[mem_mode]
3233 = memory_address_addr_space_p (mem_mode, addr, as);
3234 }
3235 if (HAVE_POST_INCREMENT)
3236 {
3237 addr = gen_rtx_POST_INC (address_mode, reg0);
3238 has_postinc[mem_mode]
3239 = memory_address_addr_space_p (mem_mode, addr, as);
3240 }
3241 for (i = 0; i < 16; i++)
3242 {
3243 sym_p = i & 1;
3244 var_p = (i >> 1) & 1;
3245 off_p = (i >> 2) & 1;
3246 rat_p = (i >> 3) & 1;
3247
3248 addr = reg0;
3249 if (rat_p)
3250 addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
3251 gen_int_mode (rat, address_mode));
3252
3253 if (var_p)
3254 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
3255
3256 if (sym_p)
3257 {
3258 base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
3259 /* ??? We can run into trouble with some backends by presenting
3260 it with symbols which haven't been properly passed through
3261 targetm.encode_section_info. By setting the local bit, we
3262 enhance the probability of things working. */
3263 SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3264
3265 if (off_p)
3266 base = gen_rtx_fmt_e (CONST, address_mode,
3267 gen_rtx_fmt_ee
3268 (PLUS, address_mode, base,
3269 gen_int_mode (off, address_mode)));
3270 }
3271 else if (off_p)
3272 base = gen_int_mode (off, address_mode);
3273 else
3274 base = NULL_RTX;
3275
3276 if (base)
3277 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
3278
3279 start_sequence ();
3280 /* To avoid splitting addressing modes, pretend that no cse will
3281 follow. */
3282 old_cse_not_expected = cse_not_expected;
3283 cse_not_expected = true;
3284 addr = memory_address_addr_space (mem_mode, addr, as);
3285 cse_not_expected = old_cse_not_expected;
3286 seq = get_insns ();
3287 end_sequence ();
3288
3289 acost = seq_cost (seq, speed);
3290 acost += address_cost (addr, mem_mode, as, speed);
3291
3292 if (!acost)
3293 acost = 1;
3294 data->costs[sym_p][var_p][off_p][rat_p] = acost;
3295 }
3296
3297 /* On some targets, it is quite expensive to load symbol to a register,
3298 which makes addresses that contain symbols look much more expensive.
3299 However, the symbol will have to be loaded in any case before the
3300 loop (and quite likely we have it in register already), so it does not
3301 make much sense to penalize them too heavily. So make some final
3302 tweaks for the SYMBOL_PRESENT modes:
3303
3304 If VAR_PRESENT is false, and the mode obtained by changing symbol to
3305 var is cheaper, use this mode with small penalty.
3306 If VAR_PRESENT is true, try whether the mode with
3307 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3308 if this is the case, use it. */
3309 add_c = add_cost (address_mode, speed);
3310 for (i = 0; i < 8; i++)
3311 {
3312 var_p = i & 1;
3313 off_p = (i >> 1) & 1;
3314 rat_p = (i >> 2) & 1;
3315
3316 acost = data->costs[0][1][off_p][rat_p] + 1;
3317 if (var_p)
3318 acost += add_c;
3319
3320 if (acost < data->costs[1][var_p][off_p][rat_p])
3321 data->costs[1][var_p][off_p][rat_p] = acost;
3322 }
3323
3324 if (dump_file && (dump_flags & TDF_DETAILS))
3325 {
3326 fprintf (dump_file, "Address costs:\n");
3327
3328 for (i = 0; i < 16; i++)
3329 {
3330 sym_p = i & 1;
3331 var_p = (i >> 1) & 1;
3332 off_p = (i >> 2) & 1;
3333 rat_p = (i >> 3) & 1;
3334
3335 fprintf (dump_file, " ");
3336 if (sym_p)
3337 fprintf (dump_file, "sym + ");
3338 if (var_p)
3339 fprintf (dump_file, "var + ");
3340 if (off_p)
3341 fprintf (dump_file, "cst + ");
3342 if (rat_p)
3343 fprintf (dump_file, "rat * ");
3344
3345 acost = data->costs[sym_p][var_p][off_p][rat_p];
3346 fprintf (dump_file, "index costs %d\n", acost);
3347 }
3348 if (has_predec[mem_mode] || has_postdec[mem_mode]
3349 || has_preinc[mem_mode] || has_postinc[mem_mode])
3350 fprintf (dump_file, " May include autoinc/dec\n");
3351 fprintf (dump_file, "\n");
3352 }
3353
3354 VEC_replace (address_cost_data, address_cost_data_list,
3355 data_index, data);
3356 }
3357
3358 bits = GET_MODE_BITSIZE (address_mode);
3359 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3360 offset &= mask;
3361 if ((offset >> (bits - 1) & 1))
3362 offset |= ~mask;
3363 s_offset = offset;
3364
3365 autoinc = false;
3366 msize = GET_MODE_SIZE (mem_mode);
3367 autoinc_offset = offset;
3368 if (stmt_after_inc)
3369 autoinc_offset += ratio * cstep;
3370 if (symbol_present || var_present || ratio != 1)
3371 autoinc = false;
3372 else if ((has_postinc[mem_mode] && autoinc_offset == 0
3373 && msize == cstep)
3374 || (has_postdec[mem_mode] && autoinc_offset == 0
3375 && msize == -cstep)
3376 || (has_preinc[mem_mode] && autoinc_offset == msize
3377 && msize == cstep)
3378 || (has_predec[mem_mode] && autoinc_offset == -msize
3379 && msize == -cstep))
3380 autoinc = true;
3381
3382 cost = 0;
3383 offset_p = (s_offset != 0
3384 && data->min_offset <= s_offset
3385 && s_offset <= data->max_offset);
3386 ratio_p = (ratio != 1
3387 && multiplier_allowed_in_address_p (ratio, mem_mode, as));
3388
3389 if (ratio != 1 && !ratio_p)
3390 cost += multiply_by_cost (ratio, address_mode, speed);
3391
3392 if (s_offset && !offset_p && !symbol_present)
3393 cost += add_cost (address_mode, speed);
3394
3395 if (may_autoinc)
3396 *may_autoinc = autoinc;
3397 acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
3398 complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3399 return new_cost (cost + acost, complexity);
3400 }
3401
3402 /* Estimates cost of forcing expression EXPR into a variable. */
3403
3404 static comp_cost
3405 force_expr_to_var_cost (tree expr, bool speed)
3406 {
3407 static bool costs_initialized = false;
3408 static unsigned integer_cost [2];
3409 static unsigned symbol_cost [2];
3410 static unsigned address_cost [2];
3411 tree op0, op1;
3412 comp_cost cost0, cost1, cost;
3413 enum machine_mode mode;
3414
3415 if (!costs_initialized)
3416 {
3417 tree type = build_pointer_type (integer_type_node);
3418 tree var, addr;
3419 rtx x;
3420 int i;
3421
3422 var = create_tmp_var_raw (integer_type_node, "test_var");
3423 TREE_STATIC (var) = 1;
3424 x = produce_memory_decl_rtl (var, NULL);
3425 SET_DECL_RTL (var, x);
3426
3427 addr = build1 (ADDR_EXPR, type, var);
3428
3429
3430 for (i = 0; i < 2; i++)
3431 {
3432 integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
3433 2000), i);
3434
3435 symbol_cost[i] = computation_cost (addr, i) + 1;
3436
3437 address_cost[i]
3438 = computation_cost (build2 (POINTER_PLUS_EXPR, type,
3439 addr,
3440 build_int_cst (sizetype, 2000)), i) + 1;
3441 if (dump_file && (dump_flags & TDF_DETAILS))
3442 {
3443 fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
3444 fprintf (dump_file, " integer %d\n", (int) integer_cost[i]);
3445 fprintf (dump_file, " symbol %d\n", (int) symbol_cost[i]);
3446 fprintf (dump_file, " address %d\n", (int) address_cost[i]);
3447 fprintf (dump_file, " other %d\n", (int) target_spill_cost[i]);
3448 fprintf (dump_file, "\n");
3449 }
3450 }
3451
3452 costs_initialized = true;
3453 }
3454
3455 STRIP_NOPS (expr);
3456
3457 if (SSA_VAR_P (expr))
3458 return zero_cost;
3459
3460 if (is_gimple_min_invariant (expr))
3461 {
3462 if (TREE_CODE (expr) == INTEGER_CST)
3463 return new_cost (integer_cost [speed], 0);
3464
3465 if (TREE_CODE (expr) == ADDR_EXPR)
3466 {
3467 tree obj = TREE_OPERAND (expr, 0);
3468
3469 if (TREE_CODE (obj) == VAR_DECL
3470 || TREE_CODE (obj) == PARM_DECL
3471 || TREE_CODE (obj) == RESULT_DECL)
3472 return new_cost (symbol_cost [speed], 0);
3473 }
3474
3475 return new_cost (address_cost [speed], 0);
3476 }
3477
3478 switch (TREE_CODE (expr))
3479 {
3480 case POINTER_PLUS_EXPR:
3481 case PLUS_EXPR:
3482 case MINUS_EXPR:
3483 case MULT_EXPR:
3484 op0 = TREE_OPERAND (expr, 0);
3485 op1 = TREE_OPERAND (expr, 1);
3486 STRIP_NOPS (op0);
3487 STRIP_NOPS (op1);
3488
3489 if (is_gimple_val (op0))
3490 cost0 = zero_cost;
3491 else
3492 cost0 = force_expr_to_var_cost (op0, speed);
3493
3494 if (is_gimple_val (op1))
3495 cost1 = zero_cost;
3496 else
3497 cost1 = force_expr_to_var_cost (op1, speed);
3498
3499 break;
3500
3501 case NEGATE_EXPR:
3502 op0 = TREE_OPERAND (expr, 0);
3503 STRIP_NOPS (op0);
3504 op1 = NULL_TREE;
3505
3506 if (is_gimple_val (op0))
3507 cost0 = zero_cost;
3508 else
3509 cost0 = force_expr_to_var_cost (op0, speed);
3510
3511 cost1 = zero_cost;
3512 break;
3513
3514 default:
3515 /* Just an arbitrary value, FIXME. */
3516 return new_cost (target_spill_cost[speed], 0);
3517 }
3518
3519 mode = TYPE_MODE (TREE_TYPE (expr));
3520 switch (TREE_CODE (expr))
3521 {
3522 case POINTER_PLUS_EXPR:
3523 case PLUS_EXPR:
3524 case MINUS_EXPR:
3525 case NEGATE_EXPR:
3526 cost = new_cost (add_cost (mode, speed), 0);
3527 break;
3528
3529 case MULT_EXPR:
3530 if (cst_and_fits_in_hwi (op0))
3531 cost = new_cost (multiply_by_cost (int_cst_value (op0), mode, speed), 0);
3532 else if (cst_and_fits_in_hwi (op1))
3533 cost = new_cost (multiply_by_cost (int_cst_value (op1), mode, speed), 0);
3534 else
3535 return new_cost (target_spill_cost [speed], 0);
3536 break;
3537
3538 default:
3539 gcc_unreachable ();
3540 }
3541
3542 cost = add_costs (cost, cost0);
3543 cost = add_costs (cost, cost1);
3544
3545 /* Bound the cost by target_spill_cost. The parts of complicated
3546 computations often are either loop invariant or at least can
3547 be shared between several iv uses, so letting this grow without
3548 limits would not give reasonable results. */
3549 if (cost.cost > (int) target_spill_cost [speed])
3550 cost.cost = target_spill_cost [speed];
3551
3552 return cost;
3553 }
3554
3555 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
3556 invariants the computation depends on. */
3557
3558 static comp_cost
3559 force_var_cost (struct ivopts_data *data,
3560 tree expr, bitmap *depends_on)
3561 {
3562 if (depends_on)
3563 {
3564 fd_ivopts_data = data;
3565 walk_tree (&expr, find_depends, depends_on, NULL);
3566 }
3567
3568 return force_expr_to_var_cost (expr, data->speed);
3569 }
3570
3571 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
3572 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3573 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3574 invariants the computation depends on. */
3575
3576 static comp_cost
3577 split_address_cost (struct ivopts_data *data,
3578 tree addr, bool *symbol_present, bool *var_present,
3579 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3580 {
3581 tree core;
3582 HOST_WIDE_INT bitsize;
3583 HOST_WIDE_INT bitpos;
3584 tree toffset;
3585 enum machine_mode mode;
3586 int unsignedp, volatilep;
3587
3588 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3589 &unsignedp, &volatilep, false);
3590
3591 if (toffset != 0
3592 || bitpos % BITS_PER_UNIT != 0
3593 || TREE_CODE (core) != VAR_DECL)
3594 {
3595 *symbol_present = false;
3596 *var_present = true;
3597 fd_ivopts_data = data;
3598 walk_tree (&addr, find_depends, depends_on, NULL);
3599 return new_cost (target_spill_cost[data->speed], 0);
3600 }
3601
3602 *offset += bitpos / BITS_PER_UNIT;
3603 if (TREE_STATIC (core)
3604 || DECL_EXTERNAL (core))
3605 {
3606 *symbol_present = true;
3607 *var_present = false;
3608 return zero_cost;
3609 }
3610
3611 *symbol_present = false;
3612 *var_present = true;
3613 return zero_cost;
3614 }
3615
3616 /* Estimates cost of expressing difference of addresses E1 - E2 as
3617 var + symbol + offset. The value of offset is added to OFFSET,
3618 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3619 part is missing. DEPENDS_ON is a set of the invariants the computation
3620 depends on. */
3621
3622 static comp_cost
3623 ptr_difference_cost (struct ivopts_data *data,
3624 tree e1, tree e2, bool *symbol_present, bool *var_present,
3625 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3626 {
3627 HOST_WIDE_INT diff = 0;
3628 aff_tree aff_e1, aff_e2;
3629 tree type;
3630
3631 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3632
3633 if (ptr_difference_const (e1, e2, &diff))
3634 {
3635 *offset += diff;
3636 *symbol_present = false;
3637 *var_present = false;
3638 return zero_cost;
3639 }
3640
3641 if (integer_zerop (e2))
3642 return split_address_cost (data, TREE_OPERAND (e1, 0),
3643 symbol_present, var_present, offset, depends_on);
3644
3645 *symbol_present = false;
3646 *var_present = true;
3647
3648 type = signed_type_for (TREE_TYPE (e1));
3649 tree_to_aff_combination (e1, type, &aff_e1);
3650 tree_to_aff_combination (e2, type, &aff_e2);
3651 aff_combination_scale (&aff_e2, double_int_minus_one);
3652 aff_combination_add (&aff_e1, &aff_e2);
3653
3654 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3655 }
3656
3657 /* Estimates cost of expressing difference E1 - E2 as
3658 var + symbol + offset. The value of offset is added to OFFSET,
3659 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3660 part is missing. DEPENDS_ON is a set of the invariants the computation
3661 depends on. */
3662
3663 static comp_cost
3664 difference_cost (struct ivopts_data *data,
3665 tree e1, tree e2, bool *symbol_present, bool *var_present,
3666 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3667 {
3668 enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3669 unsigned HOST_WIDE_INT off1, off2;
3670 aff_tree aff_e1, aff_e2;
3671 tree type;
3672
3673 e1 = strip_offset (e1, &off1);
3674 e2 = strip_offset (e2, &off2);
3675 *offset += off1 - off2;
3676
3677 STRIP_NOPS (e1);
3678 STRIP_NOPS (e2);
3679
3680 if (TREE_CODE (e1) == ADDR_EXPR)
3681 return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
3682 offset, depends_on);
3683 *symbol_present = false;
3684
3685 if (operand_equal_p (e1, e2, 0))
3686 {
3687 *var_present = false;
3688 return zero_cost;
3689 }
3690
3691 *var_present = true;
3692
3693 if (integer_zerop (e2))
3694 return force_var_cost (data, e1, depends_on);
3695
3696 if (integer_zerop (e1))
3697 {
3698 comp_cost cost = force_var_cost (data, e2, depends_on);
3699 cost.cost += multiply_by_cost (-1, mode, data->speed);
3700 return cost;
3701 }
3702
3703 type = signed_type_for (TREE_TYPE (e1));
3704 tree_to_aff_combination (e1, type, &aff_e1);
3705 tree_to_aff_combination (e2, type, &aff_e2);
3706 aff_combination_scale (&aff_e2, double_int_minus_one);
3707 aff_combination_add (&aff_e1, &aff_e2);
3708
3709 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3710 }
3711
3712 /* Determines the cost of the computation by that USE is expressed
3713 from induction variable CAND. If ADDRESS_P is true, we just need
3714 to create an address from it, otherwise we want to get it into
3715 register. A set of invariants we depend on is stored in
3716 DEPENDS_ON. AT is the statement at that the value is computed.
3717 If CAN_AUTOINC is nonnull, use it to record whether autoinc
3718 addressing is likely. */
3719
3720 static comp_cost
3721 get_computation_cost_at (struct ivopts_data *data,
3722 struct iv_use *use, struct iv_cand *cand,
3723 bool address_p, bitmap *depends_on, gimple at,
3724 bool *can_autoinc)
3725 {
3726 tree ubase = use->iv->base, ustep = use->iv->step;
3727 tree cbase, cstep;
3728 tree utype = TREE_TYPE (ubase), ctype;
3729 unsigned HOST_WIDE_INT cstepi, offset = 0;
3730 HOST_WIDE_INT ratio, aratio;
3731 bool var_present, symbol_present, stmt_is_after_inc;
3732 comp_cost cost;
3733 double_int rat;
3734 bool speed = optimize_bb_for_speed_p (gimple_bb (at));
3735
3736 *depends_on = NULL;
3737
3738 /* Only consider real candidates. */
3739 if (!cand->iv)
3740 return infinite_cost;
3741
3742 cbase = cand->iv->base;
3743 cstep = cand->iv->step;
3744 ctype = TREE_TYPE (cbase);
3745
3746 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3747 {
3748 /* We do not have a precision to express the values of use. */
3749 return infinite_cost;
3750 }
3751
3752 if (address_p)
3753 {
3754 /* Do not try to express address of an object with computation based
3755 on address of a different object. This may cause problems in rtl
3756 level alias analysis (that does not expect this to be happening,
3757 as this is illegal in C), and would be unlikely to be useful
3758 anyway. */
3759 if (use->iv->base_object
3760 && cand->iv->base_object
3761 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3762 return infinite_cost;
3763 }
3764
3765 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3766 {
3767 /* TODO -- add direct handling of this case. */
3768 goto fallback;
3769 }
3770
3771 /* CSTEPI is removed from the offset in case statement is after the
3772 increment. If the step is not constant, we use zero instead.
3773 This is a bit imprecise (there is the extra addition), but
3774 redundancy elimination is likely to transform the code so that
3775 it uses value of the variable before increment anyway,
3776 so it is not that much unrealistic. */
3777 if (cst_and_fits_in_hwi (cstep))
3778 cstepi = int_cst_value (cstep);
3779 else
3780 cstepi = 0;
3781
3782 if (!constant_multiple_of (ustep, cstep, &rat))
3783 return infinite_cost;
3784
3785 if (double_int_fits_in_shwi_p (rat))
3786 ratio = double_int_to_shwi (rat);
3787 else
3788 return infinite_cost;
3789
3790 STRIP_NOPS (cbase);
3791 ctype = TREE_TYPE (cbase);
3792
3793 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
3794 or ratio == 1, it is better to handle this like
3795
3796 ubase - ratio * cbase + ratio * var
3797
3798 (also holds in the case ratio == -1, TODO. */
3799
3800 if (cst_and_fits_in_hwi (cbase))
3801 {
3802 offset = - ratio * int_cst_value (cbase);
3803 cost = difference_cost (data,
3804 ubase, build_int_cst (utype, 0),
3805 &symbol_present, &var_present, &offset,
3806 depends_on);
3807 }
3808 else if (ratio == 1)
3809 {
3810 cost = difference_cost (data,
3811 ubase, cbase,
3812 &symbol_present, &var_present, &offset,
3813 depends_on);
3814 }
3815 else if (address_p
3816 && !POINTER_TYPE_P (ctype)
3817 && multiplier_allowed_in_address_p
3818 (ratio, TYPE_MODE (TREE_TYPE (utype)),
3819 TYPE_ADDR_SPACE (TREE_TYPE (utype))))
3820 {
3821 cbase
3822 = fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio));
3823 cost = difference_cost (data,
3824 ubase, cbase,
3825 &symbol_present, &var_present, &offset,
3826 depends_on);
3827 }
3828 else
3829 {
3830 cost = force_var_cost (data, cbase, depends_on);
3831 cost.cost += add_cost (TYPE_MODE (ctype), data->speed);
3832 cost = add_costs (cost,
3833 difference_cost (data,
3834 ubase, build_int_cst (utype, 0),
3835 &symbol_present, &var_present,
3836 &offset, depends_on));
3837 }
3838
3839 /* If we are after the increment, the value of the candidate is higher by
3840 one iteration. */
3841 stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
3842 if (stmt_is_after_inc)
3843 offset -= ratio * cstepi;
3844
3845 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3846 (symbol/var1/const parts may be omitted). If we are looking for an
3847 address, find the cost of addressing this. */
3848 if (address_p)
3849 return add_costs (cost,
3850 get_address_cost (symbol_present, var_present,
3851 offset, ratio, cstepi,
3852 TYPE_MODE (TREE_TYPE (utype)),
3853 TYPE_ADDR_SPACE (TREE_TYPE (utype)),
3854 speed, stmt_is_after_inc,
3855 can_autoinc));
3856
3857 /* Otherwise estimate the costs for computing the expression. */
3858 if (!symbol_present && !var_present && !offset)
3859 {
3860 if (ratio != 1)
3861 cost.cost += multiply_by_cost (ratio, TYPE_MODE (ctype), speed);
3862 return cost;
3863 }
3864
3865 /* Symbol + offset should be compile-time computable so consider that they
3866 are added once to the variable, if present. */
3867 if (var_present && (symbol_present || offset))
3868 cost.cost += adjust_setup_cost (data,
3869 add_cost (TYPE_MODE (ctype), speed));
3870
3871 /* Having offset does not affect runtime cost in case it is added to
3872 symbol, but it increases complexity. */
3873 if (offset)
3874 cost.complexity++;
3875
3876 cost.cost += add_cost (TYPE_MODE (ctype), speed);
3877
3878 aratio = ratio > 0 ? ratio : -ratio;
3879 if (aratio != 1)
3880 cost.cost += multiply_by_cost (aratio, TYPE_MODE (ctype), speed);
3881 return cost;
3882
3883 fallback:
3884 if (can_autoinc)
3885 *can_autoinc = false;
3886
3887 {
3888 /* Just get the expression, expand it and measure the cost. */
3889 tree comp = get_computation_at (data->current_loop, use, cand, at);
3890
3891 if (!comp)
3892 return infinite_cost;
3893
3894 if (address_p)
3895 comp = build_simple_mem_ref (comp);
3896
3897 return new_cost (computation_cost (comp, speed), 0);
3898 }
3899 }
3900
3901 /* Determines the cost of the computation by that USE is expressed
3902 from induction variable CAND. If ADDRESS_P is true, we just need
3903 to create an address from it, otherwise we want to get it into
3904 register. A set of invariants we depend on is stored in
3905 DEPENDS_ON. If CAN_AUTOINC is nonnull, use it to record whether
3906 autoinc addressing is likely. */
3907
3908 static comp_cost
3909 get_computation_cost (struct ivopts_data *data,
3910 struct iv_use *use, struct iv_cand *cand,
3911 bool address_p, bitmap *depends_on, bool *can_autoinc)
3912 {
3913 return get_computation_cost_at (data,
3914 use, cand, address_p, depends_on, use->stmt,
3915 can_autoinc);
3916 }
3917
3918 /* Determines cost of basing replacement of USE on CAND in a generic
3919 expression. */
3920
3921 static bool
3922 determine_use_iv_cost_generic (struct ivopts_data *data,
3923 struct iv_use *use, struct iv_cand *cand)
3924 {
3925 bitmap depends_on;
3926 comp_cost cost;
3927
3928 /* The simple case first -- if we need to express value of the preserved
3929 original biv, the cost is 0. This also prevents us from counting the
3930 cost of increment twice -- once at this use and once in the cost of
3931 the candidate. */
3932 if (cand->pos == IP_ORIGINAL
3933 && cand->incremented_at == use->stmt)
3934 {
3935 set_use_iv_cost (data, use, cand, zero_cost, NULL, NULL_TREE);
3936 return true;
3937 }
3938
3939 cost = get_computation_cost (data, use, cand, false, &depends_on, NULL);
3940 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3941
3942 return !infinite_cost_p (cost);
3943 }
3944
3945 /* Determines cost of basing replacement of USE on CAND in an address. */
3946
3947 static bool
3948 determine_use_iv_cost_address (struct ivopts_data *data,
3949 struct iv_use *use, struct iv_cand *cand)
3950 {
3951 bitmap depends_on;
3952 bool can_autoinc;
3953 comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
3954 &can_autoinc);
3955
3956 if (cand->ainc_use == use)
3957 {
3958 if (can_autoinc)
3959 cost.cost -= cand->cost_step;
3960 /* If we generated the candidate solely for exploiting autoincrement
3961 opportunities, and it turns out it can't be used, set the cost to
3962 infinity to make sure we ignore it. */
3963 else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
3964 cost = infinite_cost;
3965 }
3966 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3967
3968 return !infinite_cost_p (cost);
3969 }
3970
3971 /* Computes value of candidate CAND at position AT in iteration NITER, and
3972 stores it to VAL. */
3973
3974 static void
3975 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple at, tree niter,
3976 aff_tree *val)
3977 {
3978 aff_tree step, delta, nit;
3979 struct iv *iv = cand->iv;
3980 tree type = TREE_TYPE (iv->base);
3981 tree steptype = type;
3982 if (POINTER_TYPE_P (type))
3983 steptype = sizetype;
3984
3985 tree_to_aff_combination (iv->step, steptype, &step);
3986 tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
3987 aff_combination_convert (&nit, steptype);
3988 aff_combination_mult (&nit, &step, &delta);
3989 if (stmt_after_increment (loop, cand, at))
3990 aff_combination_add (&delta, &step);
3991
3992 tree_to_aff_combination (iv->base, type, val);
3993 aff_combination_add (val, &delta);
3994 }
3995
3996 /* Returns period of induction variable iv. */
3997
3998 static tree
3999 iv_period (struct iv *iv)
4000 {
4001 tree step = iv->step, period, type;
4002 tree pow2div;
4003
4004 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4005
4006 /* Period of the iv is gcd (step, type range). Since type range is power
4007 of two, it suffices to determine the maximum power of two that divides
4008 step. */
4009 pow2div = num_ending_zeros (step);
4010 type = unsigned_type_for (TREE_TYPE (step));
4011
4012 period = build_low_bits_mask (type,
4013 (TYPE_PRECISION (type)
4014 - tree_low_cst (pow2div, 1)));
4015
4016 return period;
4017 }
4018
4019 /* Returns the comparison operator used when eliminating the iv USE. */
4020
4021 static enum tree_code
4022 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4023 {
4024 struct loop *loop = data->current_loop;
4025 basic_block ex_bb;
4026 edge exit;
4027
4028 ex_bb = gimple_bb (use->stmt);
4029 exit = EDGE_SUCC (ex_bb, 0);
4030 if (flow_bb_inside_loop_p (loop, exit->dest))
4031 exit = EDGE_SUCC (ex_bb, 1);
4032
4033 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4034 }
4035
4036 /* Check whether it is possible to express the condition in USE by comparison
4037 of candidate CAND. If so, store the value compared with to BOUND. */
4038
4039 static bool
4040 may_eliminate_iv (struct ivopts_data *data,
4041 struct iv_use *use, struct iv_cand *cand, tree *bound)
4042 {
4043 basic_block ex_bb;
4044 edge exit;
4045 tree nit, period;
4046 struct loop *loop = data->current_loop;
4047 aff_tree bnd;
4048
4049 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
4050 return false;
4051
4052 /* For now works only for exits that dominate the loop latch.
4053 TODO: extend to other conditions inside loop body. */
4054 ex_bb = gimple_bb (use->stmt);
4055 if (use->stmt != last_stmt (ex_bb)
4056 || gimple_code (use->stmt) != GIMPLE_COND
4057 || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
4058 return false;
4059
4060 exit = EDGE_SUCC (ex_bb, 0);
4061 if (flow_bb_inside_loop_p (loop, exit->dest))
4062 exit = EDGE_SUCC (ex_bb, 1);
4063 if (flow_bb_inside_loop_p (loop, exit->dest))
4064 return false;
4065
4066 nit = niter_for_exit (data, exit);
4067 if (!nit)
4068 return false;
4069
4070 /* Determine whether we can use the variable to test the exit condition.
4071 This is the case iff the period of the induction variable is greater
4072 than the number of iterations for which the exit condition is true. */
4073 period = iv_period (cand->iv);
4074
4075 /* If the number of iterations is constant, compare against it directly. */
4076 if (TREE_CODE (nit) == INTEGER_CST)
4077 {
4078 if (!tree_int_cst_lt (nit, period))
4079 return false;
4080 }
4081
4082 /* If not, and if this is the only possible exit of the loop, see whether
4083 we can get a conservative estimate on the number of iterations of the
4084 entire loop and compare against that instead. */
4085 else if (loop_only_exit_p (loop, exit))
4086 {
4087 double_int period_value, max_niter;
4088 if (!estimated_loop_iterations (loop, true, &max_niter))
4089 return false;
4090 period_value = tree_to_double_int (period);
4091 if (double_int_ucmp (max_niter, period_value) >= 0)
4092 return false;
4093 }
4094
4095 /* Otherwise, punt. */
4096 else
4097 return false;
4098
4099 cand_value_at (loop, cand, use->stmt, nit, &bnd);
4100
4101 *bound = aff_combination_to_tree (&bnd);
4102 /* It is unlikely that computing the number of iterations using division
4103 would be more profitable than keeping the original induction variable. */
4104 if (expression_expensive_p (*bound))
4105 return false;
4106 return true;
4107 }
4108
4109 /* Determines cost of basing replacement of USE on CAND in a condition. */
4110
4111 static bool
4112 determine_use_iv_cost_condition (struct ivopts_data *data,
4113 struct iv_use *use, struct iv_cand *cand)
4114 {
4115 tree bound = NULL_TREE;
4116 struct iv *cmp_iv;
4117 bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
4118 comp_cost elim_cost, express_cost, cost;
4119 bool ok;
4120 tree *control_var, *bound_cst;
4121
4122 /* Only consider real candidates. */
4123 if (!cand->iv)
4124 {
4125 set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE);
4126 return false;
4127 }
4128
4129 /* Try iv elimination. */
4130 if (may_eliminate_iv (data, use, cand, &bound))
4131 {
4132 elim_cost = force_var_cost (data, bound, &depends_on_elim);
4133 /* The bound is a loop invariant, so it will be only computed
4134 once. */
4135 elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
4136 }
4137 else
4138 elim_cost = infinite_cost;
4139
4140 /* Try expressing the original giv. If it is compared with an invariant,
4141 note that we cannot get rid of it. */
4142 ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
4143 NULL, &cmp_iv);
4144 gcc_assert (ok);
4145
4146 /* When the condition is a comparison of the candidate IV against
4147 zero, prefer this IV.
4148
4149 TODO: The constant that we're substracting from the cost should
4150 be target-dependent. This information should be added to the
4151 target costs for each backend. */
4152 if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */
4153 && integer_zerop (*bound_cst)
4154 && (operand_equal_p (*control_var, cand->var_after, 0)
4155 || operand_equal_p (*control_var, cand->var_before, 0)))
4156 elim_cost.cost -= 1;
4157
4158 express_cost = get_computation_cost (data, use, cand, false,
4159 &depends_on_express, NULL);
4160 fd_ivopts_data = data;
4161 walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
4162
4163 /* Choose the better approach, preferring the eliminated IV. */
4164 if (compare_costs (elim_cost, express_cost) <= 0)
4165 {
4166 cost = elim_cost;
4167 depends_on = depends_on_elim;
4168 depends_on_elim = NULL;
4169 }
4170 else
4171 {
4172 cost = express_cost;
4173 depends_on = depends_on_express;
4174 depends_on_express = NULL;
4175 bound = NULL_TREE;
4176 }
4177
4178 set_use_iv_cost (data, use, cand, cost, depends_on, bound);
4179
4180 if (depends_on_elim)
4181 BITMAP_FREE (depends_on_elim);
4182 if (depends_on_express)
4183 BITMAP_FREE (depends_on_express);
4184
4185 return !infinite_cost_p (cost);
4186 }
4187
4188 /* Determines cost of basing replacement of USE on CAND. Returns false
4189 if USE cannot be based on CAND. */
4190
4191 static bool
4192 determine_use_iv_cost (struct ivopts_data *data,
4193 struct iv_use *use, struct iv_cand *cand)
4194 {
4195 switch (use->type)
4196 {
4197 case USE_NONLINEAR_EXPR:
4198 return determine_use_iv_cost_generic (data, use, cand);
4199
4200 case USE_ADDRESS:
4201 return determine_use_iv_cost_address (data, use, cand);
4202
4203 case USE_COMPARE:
4204 return determine_use_iv_cost_condition (data, use, cand);
4205
4206 default:
4207 gcc_unreachable ();
4208 }
4209 }
4210
4211 /* Return true if get_computation_cost indicates that autoincrement is
4212 a possibility for the pair of USE and CAND, false otherwise. */
4213
4214 static bool
4215 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
4216 struct iv_cand *cand)
4217 {
4218 bitmap depends_on;
4219 bool can_autoinc;
4220 comp_cost cost;
4221
4222 if (use->type != USE_ADDRESS)
4223 return false;
4224
4225 cost = get_computation_cost (data, use, cand, true, &depends_on,
4226 &can_autoinc);
4227
4228 BITMAP_FREE (depends_on);
4229
4230 return !infinite_cost_p (cost) && can_autoinc;
4231 }
4232
4233 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
4234 use that allows autoincrement, and set their AINC_USE if possible. */
4235
4236 static void
4237 set_autoinc_for_original_candidates (struct ivopts_data *data)
4238 {
4239 unsigned i, j;
4240
4241 for (i = 0; i < n_iv_cands (data); i++)
4242 {
4243 struct iv_cand *cand = iv_cand (data, i);
4244 struct iv_use *closest = NULL;
4245 if (cand->pos != IP_ORIGINAL)
4246 continue;
4247 for (j = 0; j < n_iv_uses (data); j++)
4248 {
4249 struct iv_use *use = iv_use (data, j);
4250 unsigned uid = gimple_uid (use->stmt);
4251 if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at)
4252 || uid > gimple_uid (cand->incremented_at))
4253 continue;
4254 if (closest == NULL || uid > gimple_uid (closest->stmt))
4255 closest = use;
4256 }
4257 if (closest == NULL || !autoinc_possible_for_pair (data, closest, cand))
4258 continue;
4259 cand->ainc_use = closest;
4260 }
4261 }
4262
4263 /* Finds the candidates for the induction variables. */
4264
4265 static void
4266 find_iv_candidates (struct ivopts_data *data)
4267 {
4268 /* Add commonly used ivs. */
4269 add_standard_iv_candidates (data);
4270
4271 /* Add old induction variables. */
4272 add_old_ivs_candidates (data);
4273
4274 /* Add induction variables derived from uses. */
4275 add_derived_ivs_candidates (data);
4276
4277 set_autoinc_for_original_candidates (data);
4278
4279 /* Record the important candidates. */
4280 record_important_candidates (data);
4281 }
4282
4283 /* Determines costs of basing the use of the iv on an iv candidate. */
4284
4285 static void
4286 determine_use_iv_costs (struct ivopts_data *data)
4287 {
4288 unsigned i, j;
4289 struct iv_use *use;
4290 struct iv_cand *cand;
4291 bitmap to_clear = BITMAP_ALLOC (NULL);
4292
4293 alloc_use_cost_map (data);
4294
4295 for (i = 0; i < n_iv_uses (data); i++)
4296 {
4297 use = iv_use (data, i);
4298
4299 if (data->consider_all_candidates)
4300 {
4301 for (j = 0; j < n_iv_cands (data); j++)
4302 {
4303 cand = iv_cand (data, j);
4304 determine_use_iv_cost (data, use, cand);
4305 }
4306 }
4307 else
4308 {
4309 bitmap_iterator bi;
4310
4311 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
4312 {
4313 cand = iv_cand (data, j);
4314 if (!determine_use_iv_cost (data, use, cand))
4315 bitmap_set_bit (to_clear, j);
4316 }
4317
4318 /* Remove the candidates for that the cost is infinite from
4319 the list of related candidates. */
4320 bitmap_and_compl_into (use->related_cands, to_clear);
4321 bitmap_clear (to_clear);
4322 }
4323 }
4324
4325 BITMAP_FREE (to_clear);
4326
4327 if (dump_file && (dump_flags & TDF_DETAILS))
4328 {
4329 fprintf (dump_file, "Use-candidate costs:\n");
4330
4331 for (i = 0; i < n_iv_uses (data); i++)
4332 {
4333 use = iv_use (data, i);
4334
4335 fprintf (dump_file, "Use %d:\n", i);
4336 fprintf (dump_file, " cand\tcost\tcompl.\tdepends on\n");
4337 for (j = 0; j < use->n_map_members; j++)
4338 {
4339 if (!use->cost_map[j].cand
4340 || infinite_cost_p (use->cost_map[j].cost))
4341 continue;
4342
4343 fprintf (dump_file, " %d\t%d\t%d\t",
4344 use->cost_map[j].cand->id,
4345 use->cost_map[j].cost.cost,
4346 use->cost_map[j].cost.complexity);
4347 if (use->cost_map[j].depends_on)
4348 bitmap_print (dump_file,
4349 use->cost_map[j].depends_on, "","");
4350 fprintf (dump_file, "\n");
4351 }
4352
4353 fprintf (dump_file, "\n");
4354 }
4355 fprintf (dump_file, "\n");
4356 }
4357 }
4358
4359 /* Determines cost of the candidate CAND. */
4360
4361 static void
4362 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
4363 {
4364 comp_cost cost_base;
4365 unsigned cost, cost_step;
4366 tree base;
4367
4368 if (!cand->iv)
4369 {
4370 cand->cost = 0;
4371 return;
4372 }
4373
4374 /* There are two costs associated with the candidate -- its increment
4375 and its initialization. The second is almost negligible for any loop
4376 that rolls enough, so we take it just very little into account. */
4377
4378 base = cand->iv->base;
4379 cost_base = force_var_cost (data, base, NULL);
4380 cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)), data->speed);
4381
4382 cost = cost_step + adjust_setup_cost (data, cost_base.cost);
4383
4384 /* Prefer the original ivs unless we may gain something by replacing it.
4385 The reason is to make debugging simpler; so this is not relevant for
4386 artificial ivs created by other optimization passes. */
4387 if (cand->pos != IP_ORIGINAL
4388 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
4389 cost++;
4390
4391 /* Prefer not to insert statements into latch unless there are some
4392 already (so that we do not create unnecessary jumps). */
4393 if (cand->pos == IP_END
4394 && empty_block_p (ip_end_pos (data->current_loop)))
4395 cost++;
4396
4397 cand->cost = cost;
4398 cand->cost_step = cost_step;
4399 }
4400
4401 /* Determines costs of computation of the candidates. */
4402
4403 static void
4404 determine_iv_costs (struct ivopts_data *data)
4405 {
4406 unsigned i;
4407
4408 if (dump_file && (dump_flags & TDF_DETAILS))
4409 {
4410 fprintf (dump_file, "Candidate costs:\n");
4411 fprintf (dump_file, " cand\tcost\n");
4412 }
4413
4414 for (i = 0; i < n_iv_cands (data); i++)
4415 {
4416 struct iv_cand *cand = iv_cand (data, i);
4417
4418 determine_iv_cost (data, cand);
4419
4420 if (dump_file && (dump_flags & TDF_DETAILS))
4421 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
4422 }
4423
4424 if (dump_file && (dump_flags & TDF_DETAILS))
4425 fprintf (dump_file, "\n");
4426 }
4427
4428 /* Calculates cost for having SIZE induction variables. */
4429
4430 static unsigned
4431 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
4432 {
4433 /* We add size to the cost, so that we prefer eliminating ivs
4434 if possible. */
4435 return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed);
4436 }
4437
4438 /* For each size of the induction variable set determine the penalty. */
4439
4440 static void
4441 determine_set_costs (struct ivopts_data *data)
4442 {
4443 unsigned j, n;
4444 gimple phi;
4445 gimple_stmt_iterator psi;
4446 tree op;
4447 struct loop *loop = data->current_loop;
4448 bitmap_iterator bi;
4449
4450 /* We use the following model (definitely improvable, especially the
4451 cost function -- TODO):
4452
4453 We estimate the number of registers available (using MD data), name it A.
4454
4455 We estimate the number of registers used by the loop, name it U. This
4456 number is obtained as the number of loop phi nodes (not counting virtual
4457 registers and bivs) + the number of variables from outside of the loop.
4458
4459 We set a reserve R (free regs that are used for temporary computations,
4460 etc.). For now the reserve is a constant 3.
4461
4462 Let I be the number of induction variables.
4463
4464 -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
4465 make a lot of ivs without a reason).
4466 -- if A - R < U + I <= A, the cost is I * PRES_COST
4467 -- if U + I > A, the cost is I * PRES_COST and
4468 number of uses * SPILL_COST * (U + I - A) / (U + I) is added. */
4469
4470 if (dump_file && (dump_flags & TDF_DETAILS))
4471 {
4472 fprintf (dump_file, "Global costs:\n");
4473 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
4474 fprintf (dump_file, " target_reg_cost %d\n", target_reg_cost[data->speed]);
4475 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost[data->speed]);
4476 }
4477
4478 n = 0;
4479 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
4480 {
4481 phi = gsi_stmt (psi);
4482 op = PHI_RESULT (phi);
4483
4484 if (!is_gimple_reg (op))
4485 continue;
4486
4487 if (get_iv (data, op))
4488 continue;
4489
4490 n++;
4491 }
4492
4493 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
4494 {
4495 struct version_info *info = ver_info (data, j);
4496
4497 if (info->inv_id && info->has_nonlin_use)
4498 n++;
4499 }
4500
4501 data->regs_used = n;
4502 if (dump_file && (dump_flags & TDF_DETAILS))
4503 fprintf (dump_file, " regs_used %d\n", n);
4504
4505 if (dump_file && (dump_flags & TDF_DETAILS))
4506 {
4507 fprintf (dump_file, " cost for size:\n");
4508 fprintf (dump_file, " ivs\tcost\n");
4509 for (j = 0; j <= 2 * target_avail_regs; j++)
4510 fprintf (dump_file, " %d\t%d\n", j,
4511 ivopts_global_cost_for_size (data, j));
4512 fprintf (dump_file, "\n");
4513 }
4514 }
4515
4516 /* Returns true if A is a cheaper cost pair than B. */
4517
4518 static bool
4519 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
4520 {
4521 int cmp;
4522
4523 if (!a)
4524 return false;
4525
4526 if (!b)
4527 return true;
4528
4529 cmp = compare_costs (a->cost, b->cost);
4530 if (cmp < 0)
4531 return true;
4532
4533 if (cmp > 0)
4534 return false;
4535
4536 /* In case the costs are the same, prefer the cheaper candidate. */
4537 if (a->cand->cost < b->cand->cost)
4538 return true;
4539
4540 return false;
4541 }
4542
4543 /* Computes the cost field of IVS structure. */
4544
4545 static void
4546 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
4547 {
4548 comp_cost cost = ivs->cand_use_cost;
4549 cost.cost += ivs->cand_cost;
4550 cost.cost += ivopts_global_cost_for_size (data, ivs->n_regs);
4551
4552 ivs->cost = cost;
4553 }
4554
4555 /* Remove invariants in set INVS to set IVS. */
4556
4557 static void
4558 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
4559 {
4560 bitmap_iterator bi;
4561 unsigned iid;
4562
4563 if (!invs)
4564 return;
4565
4566 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4567 {
4568 ivs->n_invariant_uses[iid]--;
4569 if (ivs->n_invariant_uses[iid] == 0)
4570 ivs->n_regs--;
4571 }
4572 }
4573
4574 /* Set USE not to be expressed by any candidate in IVS. */
4575
4576 static void
4577 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
4578 struct iv_use *use)
4579 {
4580 unsigned uid = use->id, cid;
4581 struct cost_pair *cp;
4582
4583 cp = ivs->cand_for_use[uid];
4584 if (!cp)
4585 return;
4586 cid = cp->cand->id;
4587
4588 ivs->bad_uses++;
4589 ivs->cand_for_use[uid] = NULL;
4590 ivs->n_cand_uses[cid]--;
4591
4592 if (ivs->n_cand_uses[cid] == 0)
4593 {
4594 bitmap_clear_bit (ivs->cands, cid);
4595 /* Do not count the pseudocandidates. */
4596 if (cp->cand->iv)
4597 ivs->n_regs--;
4598 ivs->n_cands--;
4599 ivs->cand_cost -= cp->cand->cost;
4600
4601 iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
4602 }
4603
4604 ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
4605
4606 iv_ca_set_remove_invariants (ivs, cp->depends_on);
4607 iv_ca_recount_cost (data, ivs);
4608 }
4609
4610 /* Add invariants in set INVS to set IVS. */
4611
4612 static void
4613 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
4614 {
4615 bitmap_iterator bi;
4616 unsigned iid;
4617
4618 if (!invs)
4619 return;
4620
4621 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4622 {
4623 ivs->n_invariant_uses[iid]++;
4624 if (ivs->n_invariant_uses[iid] == 1)
4625 ivs->n_regs++;
4626 }
4627 }
4628
4629 /* Set cost pair for USE in set IVS to CP. */
4630
4631 static void
4632 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
4633 struct iv_use *use, struct cost_pair *cp)
4634 {
4635 unsigned uid = use->id, cid;
4636
4637 if (ivs->cand_for_use[uid] == cp)
4638 return;
4639
4640 if (ivs->cand_for_use[uid])
4641 iv_ca_set_no_cp (data, ivs, use);
4642
4643 if (cp)
4644 {
4645 cid = cp->cand->id;
4646
4647 ivs->bad_uses--;
4648 ivs->cand_for_use[uid] = cp;
4649 ivs->n_cand_uses[cid]++;
4650 if (ivs->n_cand_uses[cid] == 1)
4651 {
4652 bitmap_set_bit (ivs->cands, cid);
4653 /* Do not count the pseudocandidates. */
4654 if (cp->cand->iv)
4655 ivs->n_regs++;
4656 ivs->n_cands++;
4657 ivs->cand_cost += cp->cand->cost;
4658
4659 iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
4660 }
4661
4662 ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
4663 iv_ca_set_add_invariants (ivs, cp->depends_on);
4664 iv_ca_recount_cost (data, ivs);
4665 }
4666 }
4667
4668 /* Extend set IVS by expressing USE by some of the candidates in it
4669 if possible. */
4670
4671 static void
4672 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
4673 struct iv_use *use)
4674 {
4675 struct cost_pair *best_cp = NULL, *cp;
4676 bitmap_iterator bi;
4677 unsigned i;
4678
4679 gcc_assert (ivs->upto >= use->id);
4680
4681 if (ivs->upto == use->id)
4682 {
4683 ivs->upto++;
4684 ivs->bad_uses++;
4685 }
4686
4687 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4688 {
4689 cp = get_use_iv_cost (data, use, iv_cand (data, i));
4690
4691 if (cheaper_cost_pair (cp, best_cp))
4692 best_cp = cp;
4693 }
4694
4695 iv_ca_set_cp (data, ivs, use, best_cp);
4696 }
4697
4698 /* Get cost for assignment IVS. */
4699
4700 static comp_cost
4701 iv_ca_cost (struct iv_ca *ivs)
4702 {
4703 /* This was a conditional expression but it triggered a bug in
4704 Sun C 5.5. */
4705 if (ivs->bad_uses)
4706 return infinite_cost;
4707 else
4708 return ivs->cost;
4709 }
4710
4711 /* Returns true if all dependences of CP are among invariants in IVS. */
4712
4713 static bool
4714 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
4715 {
4716 unsigned i;
4717 bitmap_iterator bi;
4718
4719 if (!cp->depends_on)
4720 return true;
4721
4722 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
4723 {
4724 if (ivs->n_invariant_uses[i] == 0)
4725 return false;
4726 }
4727
4728 return true;
4729 }
4730
4731 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
4732 it before NEXT_CHANGE. */
4733
4734 static struct iv_ca_delta *
4735 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
4736 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
4737 {
4738 struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
4739
4740 change->use = use;
4741 change->old_cp = old_cp;
4742 change->new_cp = new_cp;
4743 change->next_change = next_change;
4744
4745 return change;
4746 }
4747
4748 /* Joins two lists of changes L1 and L2. Destructive -- old lists
4749 are rewritten. */
4750
4751 static struct iv_ca_delta *
4752 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
4753 {
4754 struct iv_ca_delta *last;
4755
4756 if (!l2)
4757 return l1;
4758
4759 if (!l1)
4760 return l2;
4761
4762 for (last = l1; last->next_change; last = last->next_change)
4763 continue;
4764 last->next_change = l2;
4765
4766 return l1;
4767 }
4768
4769 /* Returns candidate by that USE is expressed in IVS. */
4770
4771 static struct cost_pair *
4772 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
4773 {
4774 return ivs->cand_for_use[use->id];
4775 }
4776
4777 /* Reverse the list of changes DELTA, forming the inverse to it. */
4778
4779 static struct iv_ca_delta *
4780 iv_ca_delta_reverse (struct iv_ca_delta *delta)
4781 {
4782 struct iv_ca_delta *act, *next, *prev = NULL;
4783 struct cost_pair *tmp;
4784
4785 for (act = delta; act; act = next)
4786 {
4787 next = act->next_change;
4788 act->next_change = prev;
4789 prev = act;
4790
4791 tmp = act->old_cp;
4792 act->old_cp = act->new_cp;
4793 act->new_cp = tmp;
4794 }
4795
4796 return prev;
4797 }
4798
4799 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
4800 reverted instead. */
4801
4802 static void
4803 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
4804 struct iv_ca_delta *delta, bool forward)
4805 {
4806 struct cost_pair *from, *to;
4807 struct iv_ca_delta *act;
4808
4809 if (!forward)
4810 delta = iv_ca_delta_reverse (delta);
4811
4812 for (act = delta; act; act = act->next_change)
4813 {
4814 from = act->old_cp;
4815 to = act->new_cp;
4816 gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
4817 iv_ca_set_cp (data, ivs, act->use, to);
4818 }
4819
4820 if (!forward)
4821 iv_ca_delta_reverse (delta);
4822 }
4823
4824 /* Returns true if CAND is used in IVS. */
4825
4826 static bool
4827 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
4828 {
4829 return ivs->n_cand_uses[cand->id] > 0;
4830 }
4831
4832 /* Returns number of induction variable candidates in the set IVS. */
4833
4834 static unsigned
4835 iv_ca_n_cands (struct iv_ca *ivs)
4836 {
4837 return ivs->n_cands;
4838 }
4839
4840 /* Free the list of changes DELTA. */
4841
4842 static void
4843 iv_ca_delta_free (struct iv_ca_delta **delta)
4844 {
4845 struct iv_ca_delta *act, *next;
4846
4847 for (act = *delta; act; act = next)
4848 {
4849 next = act->next_change;
4850 free (act);
4851 }
4852
4853 *delta = NULL;
4854 }
4855
4856 /* Allocates new iv candidates assignment. */
4857
4858 static struct iv_ca *
4859 iv_ca_new (struct ivopts_data *data)
4860 {
4861 struct iv_ca *nw = XNEW (struct iv_ca);
4862
4863 nw->upto = 0;
4864 nw->bad_uses = 0;
4865 nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
4866 nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
4867 nw->cands = BITMAP_ALLOC (NULL);
4868 nw->n_cands = 0;
4869 nw->n_regs = 0;
4870 nw->cand_use_cost = zero_cost;
4871 nw->cand_cost = 0;
4872 nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
4873 nw->cost = zero_cost;
4874
4875 return nw;
4876 }
4877
4878 /* Free memory occupied by the set IVS. */
4879
4880 static void
4881 iv_ca_free (struct iv_ca **ivs)
4882 {
4883 free ((*ivs)->cand_for_use);
4884 free ((*ivs)->n_cand_uses);
4885 BITMAP_FREE ((*ivs)->cands);
4886 free ((*ivs)->n_invariant_uses);
4887 free (*ivs);
4888 *ivs = NULL;
4889 }
4890
4891 /* Dumps IVS to FILE. */
4892
4893 static void
4894 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
4895 {
4896 const char *pref = " invariants ";
4897 unsigned i;
4898 comp_cost cost = iv_ca_cost (ivs);
4899
4900 fprintf (file, " cost %d (complexity %d)\n", cost.cost, cost.complexity);
4901 bitmap_print (file, ivs->cands, " candidates ","\n");
4902
4903 for (i = 1; i <= data->max_inv_id; i++)
4904 if (ivs->n_invariant_uses[i])
4905 {
4906 fprintf (file, "%s%d", pref, i);
4907 pref = ", ";
4908 }
4909 fprintf (file, "\n");
4910 }
4911
4912 /* Try changing candidate in IVS to CAND for each use. Return cost of the
4913 new set, and store differences in DELTA. Number of induction variables
4914 in the new set is stored to N_IVS. */
4915
4916 static comp_cost
4917 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
4918 struct iv_cand *cand, struct iv_ca_delta **delta,
4919 unsigned *n_ivs)
4920 {
4921 unsigned i;
4922 comp_cost cost;
4923 struct iv_use *use;
4924 struct cost_pair *old_cp, *new_cp;
4925
4926 *delta = NULL;
4927 for (i = 0; i < ivs->upto; i++)
4928 {
4929 use = iv_use (data, i);
4930 old_cp = iv_ca_cand_for_use (ivs, use);
4931
4932 if (old_cp
4933 && old_cp->cand == cand)
4934 continue;
4935
4936 new_cp = get_use_iv_cost (data, use, cand);
4937 if (!new_cp)
4938 continue;
4939
4940 if (!iv_ca_has_deps (ivs, new_cp))
4941 continue;
4942
4943 if (!cheaper_cost_pair (new_cp, old_cp))
4944 continue;
4945
4946 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4947 }
4948
4949 iv_ca_delta_commit (data, ivs, *delta, true);
4950 cost = iv_ca_cost (ivs);
4951 if (n_ivs)
4952 *n_ivs = iv_ca_n_cands (ivs);
4953 iv_ca_delta_commit (data, ivs, *delta, false);
4954
4955 return cost;
4956 }
4957
4958 /* Try narrowing set IVS by removing CAND. Return the cost of
4959 the new set and store the differences in DELTA. */
4960
4961 static comp_cost
4962 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
4963 struct iv_cand *cand, struct iv_ca_delta **delta)
4964 {
4965 unsigned i, ci;
4966 struct iv_use *use;
4967 struct cost_pair *old_cp, *new_cp, *cp;
4968 bitmap_iterator bi;
4969 struct iv_cand *cnd;
4970 comp_cost cost;
4971
4972 *delta = NULL;
4973 for (i = 0; i < n_iv_uses (data); i++)
4974 {
4975 use = iv_use (data, i);
4976
4977 old_cp = iv_ca_cand_for_use (ivs, use);
4978 if (old_cp->cand != cand)
4979 continue;
4980
4981 new_cp = NULL;
4982
4983 if (data->consider_all_candidates)
4984 {
4985 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
4986 {
4987 if (ci == cand->id)
4988 continue;
4989
4990 cnd = iv_cand (data, ci);
4991
4992 cp = get_use_iv_cost (data, use, cnd);
4993 if (!cp)
4994 continue;
4995 if (!iv_ca_has_deps (ivs, cp))
4996 continue;
4997
4998 if (!cheaper_cost_pair (cp, new_cp))
4999 continue;
5000
5001 new_cp = cp;
5002 }
5003 }
5004 else
5005 {
5006 EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
5007 {
5008 if (ci == cand->id)
5009 continue;
5010
5011 cnd = iv_cand (data, ci);
5012
5013 cp = get_use_iv_cost (data, use, cnd);
5014 if (!cp)
5015 continue;
5016 if (!iv_ca_has_deps (ivs, cp))
5017 continue;
5018
5019 if (!cheaper_cost_pair (cp, new_cp))
5020 continue;
5021
5022 new_cp = cp;
5023 }
5024 }
5025
5026 if (!new_cp)
5027 {
5028 iv_ca_delta_free (delta);
5029 return infinite_cost;
5030 }
5031
5032 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5033 }
5034
5035 iv_ca_delta_commit (data, ivs, *delta, true);
5036 cost = iv_ca_cost (ivs);
5037 iv_ca_delta_commit (data, ivs, *delta, false);
5038
5039 return cost;
5040 }
5041
5042 /* Try optimizing the set of candidates IVS by removing candidates different
5043 from to EXCEPT_CAND from it. Return cost of the new set, and store
5044 differences in DELTA. */
5045
5046 static comp_cost
5047 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
5048 struct iv_cand *except_cand, struct iv_ca_delta **delta)
5049 {
5050 bitmap_iterator bi;
5051 struct iv_ca_delta *act_delta, *best_delta;
5052 unsigned i;
5053 comp_cost best_cost, acost;
5054 struct iv_cand *cand;
5055
5056 best_delta = NULL;
5057 best_cost = iv_ca_cost (ivs);
5058
5059 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5060 {
5061 cand = iv_cand (data, i);
5062
5063 if (cand == except_cand)
5064 continue;
5065
5066 acost = iv_ca_narrow (data, ivs, cand, &act_delta);
5067
5068 if (compare_costs (acost, best_cost) < 0)
5069 {
5070 best_cost = acost;
5071 iv_ca_delta_free (&best_delta);
5072 best_delta = act_delta;
5073 }
5074 else
5075 iv_ca_delta_free (&act_delta);
5076 }
5077
5078 if (!best_delta)
5079 {
5080 *delta = NULL;
5081 return best_cost;
5082 }
5083
5084 /* Recurse to possibly remove other unnecessary ivs. */
5085 iv_ca_delta_commit (data, ivs, best_delta, true);
5086 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
5087 iv_ca_delta_commit (data, ivs, best_delta, false);
5088 *delta = iv_ca_delta_join (best_delta, *delta);
5089 return best_cost;
5090 }
5091
5092 /* Tries to extend the sets IVS in the best possible way in order
5093 to express the USE. */
5094
5095 static bool
5096 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
5097 struct iv_use *use)
5098 {
5099 comp_cost best_cost, act_cost;
5100 unsigned i;
5101 bitmap_iterator bi;
5102 struct iv_cand *cand;
5103 struct iv_ca_delta *best_delta = NULL, *act_delta;
5104 struct cost_pair *cp;
5105
5106 iv_ca_add_use (data, ivs, use);
5107 best_cost = iv_ca_cost (ivs);
5108
5109 cp = iv_ca_cand_for_use (ivs, use);
5110 if (cp)
5111 {
5112 best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
5113 iv_ca_set_no_cp (data, ivs, use);
5114 }
5115
5116 /* First try important candidates not based on any memory object. Only if
5117 this fails, try the specific ones. Rationale -- in loops with many
5118 variables the best choice often is to use just one generic biv. If we
5119 added here many ivs specific to the uses, the optimization algorithm later
5120 would be likely to get stuck in a local minimum, thus causing us to create
5121 too many ivs. The approach from few ivs to more seems more likely to be
5122 successful -- starting from few ivs, replacing an expensive use by a
5123 specific iv should always be a win. */
5124 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5125 {
5126 cand = iv_cand (data, i);
5127
5128 if (cand->iv->base_object != NULL_TREE)
5129 continue;
5130
5131 if (iv_ca_cand_used_p (ivs, cand))
5132 continue;
5133
5134 cp = get_use_iv_cost (data, use, cand);
5135 if (!cp)
5136 continue;
5137
5138 iv_ca_set_cp (data, ivs, use, cp);
5139 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
5140 iv_ca_set_no_cp (data, ivs, use);
5141 act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
5142
5143 if (compare_costs (act_cost, best_cost) < 0)
5144 {
5145 best_cost = act_cost;
5146
5147 iv_ca_delta_free (&best_delta);
5148 best_delta = act_delta;
5149 }
5150 else
5151 iv_ca_delta_free (&act_delta);
5152 }
5153
5154 if (infinite_cost_p (best_cost))
5155 {
5156 for (i = 0; i < use->n_map_members; i++)
5157 {
5158 cp = use->cost_map + i;
5159 cand = cp->cand;
5160 if (!cand)
5161 continue;
5162
5163 /* Already tried this. */
5164 if (cand->important && cand->iv->base_object == NULL_TREE)
5165 continue;
5166
5167 if (iv_ca_cand_used_p (ivs, cand))
5168 continue;
5169
5170 act_delta = NULL;
5171 iv_ca_set_cp (data, ivs, use, cp);
5172 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
5173 iv_ca_set_no_cp (data, ivs, use);
5174 act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
5175 cp, act_delta);
5176
5177 if (compare_costs (act_cost, best_cost) < 0)
5178 {
5179 best_cost = act_cost;
5180
5181 if (best_delta)
5182 iv_ca_delta_free (&best_delta);
5183 best_delta = act_delta;
5184 }
5185 else
5186 iv_ca_delta_free (&act_delta);
5187 }
5188 }
5189
5190 iv_ca_delta_commit (data, ivs, best_delta, true);
5191 iv_ca_delta_free (&best_delta);
5192
5193 return !infinite_cost_p (best_cost);
5194 }
5195
5196 /* Finds an initial assignment of candidates to uses. */
5197
5198 static struct iv_ca *
5199 get_initial_solution (struct ivopts_data *data)
5200 {
5201 struct iv_ca *ivs = iv_ca_new (data);
5202 unsigned i;
5203
5204 for (i = 0; i < n_iv_uses (data); i++)
5205 if (!try_add_cand_for (data, ivs, iv_use (data, i)))
5206 {
5207 iv_ca_free (&ivs);
5208 return NULL;
5209 }
5210
5211 return ivs;
5212 }
5213
5214 /* Tries to improve set of induction variables IVS. */
5215
5216 static bool
5217 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
5218 {
5219 unsigned i, n_ivs;
5220 comp_cost acost, best_cost = iv_ca_cost (ivs);
5221 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
5222 struct iv_cand *cand;
5223
5224 /* Try extending the set of induction variables by one. */
5225 for (i = 0; i < n_iv_cands (data); i++)
5226 {
5227 cand = iv_cand (data, i);
5228
5229 if (iv_ca_cand_used_p (ivs, cand))
5230 continue;
5231
5232 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs);
5233 if (!act_delta)
5234 continue;
5235
5236 /* If we successfully added the candidate and the set is small enough,
5237 try optimizing it by removing other candidates. */
5238 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
5239 {
5240 iv_ca_delta_commit (data, ivs, act_delta, true);
5241 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
5242 iv_ca_delta_commit (data, ivs, act_delta, false);
5243 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
5244 }
5245
5246 if (compare_costs (acost, best_cost) < 0)
5247 {
5248 best_cost = acost;
5249 iv_ca_delta_free (&best_delta);
5250 best_delta = act_delta;
5251 }
5252 else
5253 iv_ca_delta_free (&act_delta);
5254 }
5255
5256 if (!best_delta)
5257 {
5258 /* Try removing the candidates from the set instead. */
5259 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
5260
5261 /* Nothing more we can do. */
5262 if (!best_delta)
5263 return false;
5264 }
5265
5266 iv_ca_delta_commit (data, ivs, best_delta, true);
5267 gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
5268 iv_ca_delta_free (&best_delta);
5269 return true;
5270 }
5271
5272 /* Attempts to find the optimal set of induction variables. We do simple
5273 greedy heuristic -- we try to replace at most one candidate in the selected
5274 solution and remove the unused ivs while this improves the cost. */
5275
5276 static struct iv_ca *
5277 find_optimal_iv_set (struct ivopts_data *data)
5278 {
5279 unsigned i;
5280 struct iv_ca *set;
5281 struct iv_use *use;
5282
5283 /* Get the initial solution. */
5284 set = get_initial_solution (data);
5285 if (!set)
5286 {
5287 if (dump_file && (dump_flags & TDF_DETAILS))
5288 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
5289 return NULL;
5290 }
5291
5292 if (dump_file && (dump_flags & TDF_DETAILS))
5293 {
5294 fprintf (dump_file, "Initial set of candidates:\n");
5295 iv_ca_dump (data, dump_file, set);
5296 }
5297
5298 while (try_improve_iv_set (data, set))
5299 {
5300 if (dump_file && (dump_flags & TDF_DETAILS))
5301 {
5302 fprintf (dump_file, "Improved to:\n");
5303 iv_ca_dump (data, dump_file, set);
5304 }
5305 }
5306
5307 if (dump_file && (dump_flags & TDF_DETAILS))
5308 {
5309 comp_cost cost = iv_ca_cost (set);
5310 fprintf (dump_file, "Final cost %d (complexity %d)\n\n", cost.cost, cost.complexity);
5311 }
5312
5313 for (i = 0; i < n_iv_uses (data); i++)
5314 {
5315 use = iv_use (data, i);
5316 use->selected = iv_ca_cand_for_use (set, use)->cand;
5317 }
5318
5319 return set;
5320 }
5321
5322 /* Creates a new induction variable corresponding to CAND. */
5323
5324 static void
5325 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
5326 {
5327 gimple_stmt_iterator incr_pos;
5328 tree base;
5329 bool after = false;
5330
5331 if (!cand->iv)
5332 return;
5333
5334 switch (cand->pos)
5335 {
5336 case IP_NORMAL:
5337 incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
5338 break;
5339
5340 case IP_END:
5341 incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
5342 after = true;
5343 break;
5344
5345 case IP_AFTER_USE:
5346 after = true;
5347 /* fall through */
5348 case IP_BEFORE_USE:
5349 incr_pos = gsi_for_stmt (cand->incremented_at);
5350 break;
5351
5352 case IP_ORIGINAL:
5353 /* Mark that the iv is preserved. */
5354 name_info (data, cand->var_before)->preserve_biv = true;
5355 name_info (data, cand->var_after)->preserve_biv = true;
5356
5357 /* Rewrite the increment so that it uses var_before directly. */
5358 find_interesting_uses_op (data, cand->var_after)->selected = cand;
5359
5360 return;
5361 }
5362
5363 gimple_add_tmp_var (cand->var_before);
5364 add_referenced_var (cand->var_before);
5365
5366 base = unshare_expr (cand->iv->base);
5367
5368 create_iv (base, unshare_expr (cand->iv->step),
5369 cand->var_before, data->current_loop,
5370 &incr_pos, after, &cand->var_before, &cand->var_after);
5371 }
5372
5373 /* Creates new induction variables described in SET. */
5374
5375 static void
5376 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
5377 {
5378 unsigned i;
5379 struct iv_cand *cand;
5380 bitmap_iterator bi;
5381
5382 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
5383 {
5384 cand = iv_cand (data, i);
5385 create_new_iv (data, cand);
5386 }
5387 }
5388
5389
5390 /* Rewrites USE (definition of iv used in a nonlinear expression)
5391 using candidate CAND. */
5392
5393 static void
5394 rewrite_use_nonlinear_expr (struct ivopts_data *data,
5395 struct iv_use *use, struct iv_cand *cand)
5396 {
5397 tree comp;
5398 tree op, tgt;
5399 gimple ass;
5400 gimple_stmt_iterator bsi;
5401
5402 /* An important special case -- if we are asked to express value of
5403 the original iv by itself, just exit; there is no need to
5404 introduce a new computation (that might also need casting the
5405 variable to unsigned and back). */
5406 if (cand->pos == IP_ORIGINAL
5407 && cand->incremented_at == use->stmt)
5408 {
5409 tree step, ctype, utype;
5410 enum tree_code incr_code = PLUS_EXPR, old_code;
5411
5412 gcc_assert (is_gimple_assign (use->stmt));
5413 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
5414
5415 step = cand->iv->step;
5416 ctype = TREE_TYPE (step);
5417 utype = TREE_TYPE (cand->var_after);
5418 if (TREE_CODE (step) == NEGATE_EXPR)
5419 {
5420 incr_code = MINUS_EXPR;
5421 step = TREE_OPERAND (step, 0);
5422 }
5423
5424 /* Check whether we may leave the computation unchanged.
5425 This is the case only if it does not rely on other
5426 computations in the loop -- otherwise, the computation
5427 we rely upon may be removed in remove_unused_ivs,
5428 thus leading to ICE. */
5429 old_code = gimple_assign_rhs_code (use->stmt);
5430 if (old_code == PLUS_EXPR
5431 || old_code == MINUS_EXPR
5432 || old_code == POINTER_PLUS_EXPR)
5433 {
5434 if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
5435 op = gimple_assign_rhs2 (use->stmt);
5436 else if (old_code != MINUS_EXPR
5437 && gimple_assign_rhs2 (use->stmt) == cand->var_before)
5438 op = gimple_assign_rhs1 (use->stmt);
5439 else
5440 op = NULL_TREE;
5441 }
5442 else
5443 op = NULL_TREE;
5444
5445 if (op
5446 && (TREE_CODE (op) == INTEGER_CST
5447 || operand_equal_p (op, step, 0)))
5448 return;
5449
5450 /* Otherwise, add the necessary computations to express
5451 the iv. */
5452 op = fold_convert (ctype, cand->var_before);
5453 comp = fold_convert (utype,
5454 build2 (incr_code, ctype, op,
5455 unshare_expr (step)));
5456 }
5457 else
5458 {
5459 comp = get_computation (data->current_loop, use, cand);
5460 gcc_assert (comp != NULL_TREE);
5461 }
5462
5463 switch (gimple_code (use->stmt))
5464 {
5465 case GIMPLE_PHI:
5466 tgt = PHI_RESULT (use->stmt);
5467
5468 /* If we should keep the biv, do not replace it. */
5469 if (name_info (data, tgt)->preserve_biv)
5470 return;
5471
5472 bsi = gsi_after_labels (gimple_bb (use->stmt));
5473 break;
5474
5475 case GIMPLE_ASSIGN:
5476 tgt = gimple_assign_lhs (use->stmt);
5477 bsi = gsi_for_stmt (use->stmt);
5478 break;
5479
5480 default:
5481 gcc_unreachable ();
5482 }
5483
5484 op = force_gimple_operand_gsi (&bsi, comp, false, SSA_NAME_VAR (tgt),
5485 true, GSI_SAME_STMT);
5486
5487 if (gimple_code (use->stmt) == GIMPLE_PHI)
5488 {
5489 ass = gimple_build_assign (tgt, op);
5490 gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
5491
5492 bsi = gsi_for_stmt (use->stmt);
5493 remove_phi_node (&bsi, false);
5494 }
5495 else
5496 {
5497 gimple_assign_set_rhs_from_tree (&bsi, op);
5498 use->stmt = gsi_stmt (bsi);
5499 }
5500 }
5501
5502 /* Replaces ssa name in index IDX by its basic variable. Callback for
5503 for_each_index. */
5504
5505 static bool
5506 idx_remove_ssa_names (tree base, tree *idx,
5507 void *data ATTRIBUTE_UNUSED)
5508 {
5509 tree *op;
5510
5511 if (TREE_CODE (*idx) == SSA_NAME)
5512 *idx = SSA_NAME_VAR (*idx);
5513
5514 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
5515 {
5516 op = &TREE_OPERAND (base, 2);
5517 if (*op
5518 && TREE_CODE (*op) == SSA_NAME)
5519 *op = SSA_NAME_VAR (*op);
5520 op = &TREE_OPERAND (base, 3);
5521 if (*op
5522 && TREE_CODE (*op) == SSA_NAME)
5523 *op = SSA_NAME_VAR (*op);
5524 }
5525
5526 return true;
5527 }
5528
5529 /* Unshares REF and replaces ssa names inside it by their basic variables. */
5530
5531 static tree
5532 unshare_and_remove_ssa_names (tree ref)
5533 {
5534 ref = unshare_expr (ref);
5535 for_each_index (&ref, idx_remove_ssa_names, NULL);
5536
5537 return ref;
5538 }
5539
5540 /* Copies the reference information from OLD_REF to NEW_REF. */
5541
5542 static void
5543 copy_ref_info (tree new_ref, tree old_ref)
5544 {
5545 if (TREE_CODE (old_ref) == TARGET_MEM_REF)
5546 copy_mem_ref_info (new_ref, old_ref);
5547 else
5548 {
5549 TMR_ORIGINAL (new_ref) = unshare_and_remove_ssa_names (old_ref);
5550 TREE_SIDE_EFFECTS (new_ref) = TREE_SIDE_EFFECTS (old_ref);
5551 TREE_THIS_VOLATILE (new_ref) = TREE_THIS_VOLATILE (old_ref);
5552 }
5553 }
5554
5555 /* Rewrites USE (address that is an iv) using candidate CAND. */
5556
5557 static void
5558 rewrite_use_address (struct ivopts_data *data,
5559 struct iv_use *use, struct iv_cand *cand)
5560 {
5561 aff_tree aff;
5562 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
5563 tree base_hint = NULL_TREE;
5564 tree ref;
5565 bool ok;
5566
5567 ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
5568 gcc_assert (ok);
5569 unshare_aff_combination (&aff);
5570
5571 /* To avoid undefined overflow problems, all IV candidates use unsigned
5572 integer types. The drawback is that this makes it impossible for
5573 create_mem_ref to distinguish an IV that is based on a memory object
5574 from one that represents simply an offset.
5575
5576 To work around this problem, we pass a hint to create_mem_ref that
5577 indicates which variable (if any) in aff is an IV based on a memory
5578 object. Note that we only consider the candidate. If this is not
5579 based on an object, the base of the reference is in some subexpression
5580 of the use -- but these will use pointer types, so they are recognized
5581 by the create_mem_ref heuristics anyway. */
5582 if (cand->iv->base_object)
5583 base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
5584
5585 ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff, base_hint,
5586 data->speed);
5587 copy_ref_info (ref, *use->op_p);
5588 *use->op_p = ref;
5589 }
5590
5591 /* Rewrites USE (the condition such that one of the arguments is an iv) using
5592 candidate CAND. */
5593
5594 static void
5595 rewrite_use_compare (struct ivopts_data *data,
5596 struct iv_use *use, struct iv_cand *cand)
5597 {
5598 tree comp, *var_p, op, bound;
5599 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
5600 enum tree_code compare;
5601 struct cost_pair *cp = get_use_iv_cost (data, use, cand);
5602 bool ok;
5603
5604 bound = cp->value;
5605 if (bound)
5606 {
5607 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
5608 tree var_type = TREE_TYPE (var);
5609 gimple_seq stmts;
5610
5611 compare = iv_elimination_compare (data, use);
5612 bound = unshare_expr (fold_convert (var_type, bound));
5613 op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
5614 if (stmts)
5615 gsi_insert_seq_on_edge_immediate (
5616 loop_preheader_edge (data->current_loop),
5617 stmts);
5618
5619 gimple_cond_set_lhs (use->stmt, var);
5620 gimple_cond_set_code (use->stmt, compare);
5621 gimple_cond_set_rhs (use->stmt, op);
5622 return;
5623 }
5624
5625 /* The induction variable elimination failed; just express the original
5626 giv. */
5627 comp = get_computation (data->current_loop, use, cand);
5628 gcc_assert (comp != NULL_TREE);
5629
5630 ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
5631 gcc_assert (ok);
5632
5633 *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
5634 true, GSI_SAME_STMT);
5635 }
5636
5637 /* Rewrites USE using candidate CAND. */
5638
5639 static void
5640 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
5641 {
5642 switch (use->type)
5643 {
5644 case USE_NONLINEAR_EXPR:
5645 rewrite_use_nonlinear_expr (data, use, cand);
5646 break;
5647
5648 case USE_ADDRESS:
5649 rewrite_use_address (data, use, cand);
5650 break;
5651
5652 case USE_COMPARE:
5653 rewrite_use_compare (data, use, cand);
5654 break;
5655
5656 default:
5657 gcc_unreachable ();
5658 }
5659
5660 update_stmt (use->stmt);
5661 }
5662
5663 /* Rewrite the uses using the selected induction variables. */
5664
5665 static void
5666 rewrite_uses (struct ivopts_data *data)
5667 {
5668 unsigned i;
5669 struct iv_cand *cand;
5670 struct iv_use *use;
5671
5672 for (i = 0; i < n_iv_uses (data); i++)
5673 {
5674 use = iv_use (data, i);
5675 cand = use->selected;
5676 gcc_assert (cand);
5677
5678 rewrite_use (data, use, cand);
5679 }
5680 }
5681
5682 /* Removes the ivs that are not used after rewriting. */
5683
5684 static void
5685 remove_unused_ivs (struct ivopts_data *data)
5686 {
5687 unsigned j;
5688 bitmap_iterator bi;
5689 bitmap toremove = BITMAP_ALLOC (NULL);
5690
5691 /* Figure out an order in which to release SSA DEFs so that we don't
5692 release something that we'd have to propagate into a debug stmt
5693 afterwards. */
5694 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5695 {
5696 struct version_info *info;
5697
5698 info = ver_info (data, j);
5699 if (info->iv
5700 && !integer_zerop (info->iv->step)
5701 && !info->inv_id
5702 && !info->iv->have_use_for
5703 && !info->preserve_biv)
5704 bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
5705 }
5706
5707 release_defs_bitset (toremove);
5708
5709 BITMAP_FREE (toremove);
5710 }
5711
5712 /* Frees data allocated by the optimization of a single loop. */
5713
5714 static void
5715 free_loop_data (struct ivopts_data *data)
5716 {
5717 unsigned i, j;
5718 bitmap_iterator bi;
5719 tree obj;
5720
5721 if (data->niters)
5722 {
5723 pointer_map_destroy (data->niters);
5724 data->niters = NULL;
5725 }
5726
5727 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5728 {
5729 struct version_info *info;
5730
5731 info = ver_info (data, i);
5732 if (info->iv)
5733 free (info->iv);
5734 info->iv = NULL;
5735 info->has_nonlin_use = false;
5736 info->preserve_biv = false;
5737 info->inv_id = 0;
5738 }
5739 bitmap_clear (data->relevant);
5740 bitmap_clear (data->important_candidates);
5741
5742 for (i = 0; i < n_iv_uses (data); i++)
5743 {
5744 struct iv_use *use = iv_use (data, i);
5745
5746 free (use->iv);
5747 BITMAP_FREE (use->related_cands);
5748 for (j = 0; j < use->n_map_members; j++)
5749 if (use->cost_map[j].depends_on)
5750 BITMAP_FREE (use->cost_map[j].depends_on);
5751 free (use->cost_map);
5752 free (use);
5753 }
5754 VEC_truncate (iv_use_p, data->iv_uses, 0);
5755
5756 for (i = 0; i < n_iv_cands (data); i++)
5757 {
5758 struct iv_cand *cand = iv_cand (data, i);
5759
5760 if (cand->iv)
5761 free (cand->iv);
5762 if (cand->depends_on)
5763 BITMAP_FREE (cand->depends_on);
5764 free (cand);
5765 }
5766 VEC_truncate (iv_cand_p, data->iv_candidates, 0);
5767
5768 if (data->version_info_size < num_ssa_names)
5769 {
5770 data->version_info_size = 2 * num_ssa_names;
5771 free (data->version_info);
5772 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
5773 }
5774
5775 data->max_inv_id = 0;
5776
5777 for (i = 0; VEC_iterate (tree, decl_rtl_to_reset, i, obj); i++)
5778 SET_DECL_RTL (obj, NULL_RTX);
5779
5780 VEC_truncate (tree, decl_rtl_to_reset, 0);
5781 }
5782
5783 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
5784 loop tree. */
5785
5786 static void
5787 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
5788 {
5789 free_loop_data (data);
5790 free (data->version_info);
5791 BITMAP_FREE (data->relevant);
5792 BITMAP_FREE (data->important_candidates);
5793
5794 VEC_free (tree, heap, decl_rtl_to_reset);
5795 VEC_free (iv_use_p, heap, data->iv_uses);
5796 VEC_free (iv_cand_p, heap, data->iv_candidates);
5797 }
5798
5799 /* Optimizes the LOOP. Returns true if anything changed. */
5800
5801 static bool
5802 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
5803 {
5804 bool changed = false;
5805 struct iv_ca *iv_ca;
5806 edge exit;
5807 basic_block *body;
5808
5809 gcc_assert (!data->niters);
5810 data->current_loop = loop;
5811 data->speed = optimize_loop_for_speed_p (loop);
5812
5813 if (dump_file && (dump_flags & TDF_DETAILS))
5814 {
5815 fprintf (dump_file, "Processing loop %d\n", loop->num);
5816
5817 exit = single_dom_exit (loop);
5818 if (exit)
5819 {
5820 fprintf (dump_file, " single exit %d -> %d, exit condition ",
5821 exit->src->index, exit->dest->index);
5822 print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
5823 fprintf (dump_file, "\n");
5824 }
5825
5826 fprintf (dump_file, "\n");
5827 }
5828
5829 body = get_loop_body (loop);
5830 renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
5831 free (body);
5832
5833 /* For each ssa name determines whether it behaves as an induction variable
5834 in some loop. */
5835 if (!find_induction_variables (data))
5836 goto finish;
5837
5838 /* Finds interesting uses (item 1). */
5839 find_interesting_uses (data);
5840 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
5841 goto finish;
5842
5843 /* Finds candidates for the induction variables (item 2). */
5844 find_iv_candidates (data);
5845
5846 /* Calculates the costs (item 3, part 1). */
5847 determine_iv_costs (data);
5848 determine_use_iv_costs (data);
5849 determine_set_costs (data);
5850
5851 /* Find the optimal set of induction variables (item 3, part 2). */
5852 iv_ca = find_optimal_iv_set (data);
5853 if (!iv_ca)
5854 goto finish;
5855 changed = true;
5856
5857 /* Create the new induction variables (item 4, part 1). */
5858 create_new_ivs (data, iv_ca);
5859 iv_ca_free (&iv_ca);
5860
5861 /* Rewrite the uses (item 4, part 2). */
5862 rewrite_uses (data);
5863
5864 /* Remove the ivs that are unused after rewriting. */
5865 remove_unused_ivs (data);
5866
5867 /* We have changed the structure of induction variables; it might happen
5868 that definitions in the scev database refer to some of them that were
5869 eliminated. */
5870 scev_reset ();
5871
5872 finish:
5873 free_loop_data (data);
5874
5875 return changed;
5876 }
5877
5878 /* Main entry point. Optimizes induction variables in loops. */
5879
5880 void
5881 tree_ssa_iv_optimize (void)
5882 {
5883 struct loop *loop;
5884 struct ivopts_data data;
5885 loop_iterator li;
5886
5887 tree_ssa_iv_optimize_init (&data);
5888
5889 /* Optimize the loops starting with the innermost ones. */
5890 FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
5891 {
5892 if (dump_file && (dump_flags & TDF_DETAILS))
5893 flow_loop_dump (loop, dump_file, NULL, 1);
5894
5895 tree_ssa_iv_optimize_loop (&data, loop);
5896 }
5897
5898 tree_ssa_iv_optimize_finalize (&data);
5899 }