]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/tree-ssa-loop-ivopts.c
326c4dbcf7926387387a68011ec87f657b413e6b
[thirdparty/gcc.git] / gcc / tree-ssa-loop-ivopts.c
1 /* Induction variable optimizations.
2 Copyright (C) 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 2, or (at your option) any
9 later version.
10
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING. If not, write to the Free
18 Software Foundation, 51 Franklin Street, Fifth Floor, Boston, MA
19 02110-1301, USA. */
20
21 /* This pass tries to find the optimal set of induction variables for the loop.
22 It optimizes just the basic linear induction variables (although adding
23 support for other types should not be too hard). It includes the
24 optimizations commonly known as strength reduction, induction variable
25 coalescing and induction variable elimination. It does it in the
26 following steps:
27
28 1) The interesting uses of induction variables are found. This includes
29
30 -- uses of induction variables in non-linear expressions
31 -- addresses of arrays
32 -- comparisons of induction variables
33
34 2) Candidates for the induction variables are found. This includes
35
36 -- old induction variables
37 -- the variables defined by expressions derived from the "interesting
38 uses" above
39
40 3) The optimal (w.r. to a cost function) set of variables is chosen. The
41 cost function assigns a cost to sets of induction variables and consists
42 of three parts:
43
44 -- The use costs. Each of the interesting uses chooses the best induction
45 variable in the set and adds its cost to the sum. The cost reflects
46 the time spent on modifying the induction variables value to be usable
47 for the given purpose (adding base and offset for arrays, etc.).
48 -- The variable costs. Each of the variables has a cost assigned that
49 reflects the costs associated with incrementing the value of the
50 variable. The original variables are somewhat preferred.
51 -- The set cost. Depending on the size of the set, extra cost may be
52 added to reflect register pressure.
53
54 All the costs are defined in a machine-specific way, using the target
55 hooks and machine descriptions to determine them.
56
57 4) The trees are transformed to use the new variables, the dead code is
58 removed.
59
60 All of this is done loop by loop. Doing it globally is theoretically
61 possible, it might give a better performance and it might enable us
62 to decide costs more precisely, but getting all the interactions right
63 would be complicated. */
64
65 #include "config.h"
66 #include "system.h"
67 #include "coretypes.h"
68 #include "tm.h"
69 #include "tree.h"
70 #include "rtl.h"
71 #include "tm_p.h"
72 #include "hard-reg-set.h"
73 #include "basic-block.h"
74 #include "output.h"
75 #include "diagnostic.h"
76 #include "tree-flow.h"
77 #include "tree-dump.h"
78 #include "timevar.h"
79 #include "cfgloop.h"
80 #include "varray.h"
81 #include "expr.h"
82 #include "tree-pass.h"
83 #include "ggc.h"
84 #include "insn-config.h"
85 #include "recog.h"
86 #include "pointer-set.h"
87 #include "hashtab.h"
88 #include "tree-chrec.h"
89 #include "tree-scalar-evolution.h"
90 #include "cfgloop.h"
91 #include "params.h"
92 #include "langhooks.h"
93 #include "tree-affine.h"
94 #include "target.h"
95
96 /* The infinite cost. */
97 #define INFTY 10000000
98
99 /* The expected number of loop iterations. TODO -- use profiling instead of
100 this. */
101 #define AVG_LOOP_NITER(LOOP) 5
102
103
104 /* Representation of the induction variable. */
105 struct iv
106 {
107 tree base; /* Initial value of the iv. */
108 tree base_object; /* A memory object to that the induction variable points. */
109 tree step; /* Step of the iv (constant only). */
110 tree ssa_name; /* The ssa name with the value. */
111 bool biv_p; /* Is it a biv? */
112 bool have_use_for; /* Do we already have a use for it? */
113 unsigned use_id; /* The identifier in the use if it is the case. */
114 };
115
116 /* Per-ssa version information (induction variable descriptions, etc.). */
117 struct version_info
118 {
119 tree name; /* The ssa name. */
120 struct iv *iv; /* Induction variable description. */
121 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
122 an expression that is not an induction variable. */
123 unsigned inv_id; /* Id of an invariant. */
124 bool preserve_biv; /* For the original biv, whether to preserve it. */
125 };
126
127 /* Types of uses. */
128 enum use_type
129 {
130 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
131 USE_ADDRESS, /* Use in an address. */
132 USE_COMPARE /* Use is a compare. */
133 };
134
135 /* The candidate - cost pair. */
136 struct cost_pair
137 {
138 struct iv_cand *cand; /* The candidate. */
139 unsigned cost; /* The cost. */
140 bitmap depends_on; /* The list of invariants that have to be
141 preserved. */
142 tree value; /* For final value elimination, the expression for
143 the final value of the iv. For iv elimination,
144 the new bound to compare with. */
145 };
146
147 /* Use. */
148 struct iv_use
149 {
150 unsigned id; /* The id of the use. */
151 enum use_type type; /* Type of the use. */
152 struct iv *iv; /* The induction variable it is based on. */
153 tree stmt; /* Statement in that it occurs. */
154 tree *op_p; /* The place where it occurs. */
155 bitmap related_cands; /* The set of "related" iv candidates, plus the common
156 important ones. */
157
158 unsigned n_map_members; /* Number of candidates in the cost_map list. */
159 struct cost_pair *cost_map;
160 /* The costs wrto the iv candidates. */
161
162 struct iv_cand *selected;
163 /* The selected candidate. */
164 };
165
166 /* The position where the iv is computed. */
167 enum iv_position
168 {
169 IP_NORMAL, /* At the end, just before the exit condition. */
170 IP_END, /* At the end of the latch block. */
171 IP_ORIGINAL /* The original biv. */
172 };
173
174 /* The induction variable candidate. */
175 struct iv_cand
176 {
177 unsigned id; /* The number of the candidate. */
178 bool important; /* Whether this is an "important" candidate, i.e. such
179 that it should be considered by all uses. */
180 enum iv_position pos; /* Where it is computed. */
181 tree incremented_at; /* For original biv, the statement where it is
182 incremented. */
183 tree var_before; /* The variable used for it before increment. */
184 tree var_after; /* The variable used for it after increment. */
185 struct iv *iv; /* The value of the candidate. NULL for
186 "pseudocandidate" used to indicate the possibility
187 to replace the final value of an iv by direct
188 computation of the value. */
189 unsigned cost; /* Cost of the candidate. */
190 bitmap depends_on; /* The list of invariants that are used in step of the
191 biv. */
192 };
193
194 /* The data used by the induction variable optimizations. */
195
196 typedef struct iv_use *iv_use_p;
197 DEF_VEC_P(iv_use_p);
198 DEF_VEC_ALLOC_P(iv_use_p,heap);
199
200 typedef struct iv_cand *iv_cand_p;
201 DEF_VEC_P(iv_cand_p);
202 DEF_VEC_ALLOC_P(iv_cand_p,heap);
203
204 struct ivopts_data
205 {
206 /* The currently optimized loop. */
207 struct loop *current_loop;
208
209 /* Number of registers used in it. */
210 unsigned regs_used;
211
212 /* Numbers of iterations for all exits of the current loop. */
213 struct pointer_map_t *niters;
214
215 /* The size of version_info array allocated. */
216 unsigned version_info_size;
217
218 /* The array of information for the ssa names. */
219 struct version_info *version_info;
220
221 /* The bitmap of indices in version_info whose value was changed. */
222 bitmap relevant;
223
224 /* The maximum invariant id. */
225 unsigned max_inv_id;
226
227 /* The uses of induction variables. */
228 VEC(iv_use_p,heap) *iv_uses;
229
230 /* The candidates. */
231 VEC(iv_cand_p,heap) *iv_candidates;
232
233 /* A bitmap of important candidates. */
234 bitmap important_candidates;
235
236 /* Whether to consider just related and important candidates when replacing a
237 use. */
238 bool consider_all_candidates;
239 };
240
241 /* An assignment of iv candidates to uses. */
242
243 struct iv_ca
244 {
245 /* The number of uses covered by the assignment. */
246 unsigned upto;
247
248 /* Number of uses that cannot be expressed by the candidates in the set. */
249 unsigned bad_uses;
250
251 /* Candidate assigned to a use, together with the related costs. */
252 struct cost_pair **cand_for_use;
253
254 /* Number of times each candidate is used. */
255 unsigned *n_cand_uses;
256
257 /* The candidates used. */
258 bitmap cands;
259
260 /* The number of candidates in the set. */
261 unsigned n_cands;
262
263 /* Total number of registers needed. */
264 unsigned n_regs;
265
266 /* Total cost of expressing uses. */
267 unsigned cand_use_cost;
268
269 /* Total cost of candidates. */
270 unsigned cand_cost;
271
272 /* Number of times each invariant is used. */
273 unsigned *n_invariant_uses;
274
275 /* Total cost of the assignment. */
276 unsigned cost;
277 };
278
279 /* Difference of two iv candidate assignments. */
280
281 struct iv_ca_delta
282 {
283 /* Changed use. */
284 struct iv_use *use;
285
286 /* An old assignment (for rollback purposes). */
287 struct cost_pair *old_cp;
288
289 /* A new assignment. */
290 struct cost_pair *new_cp;
291
292 /* Next change in the list. */
293 struct iv_ca_delta *next_change;
294 };
295
296 /* Bound on number of candidates below that all candidates are considered. */
297
298 #define CONSIDER_ALL_CANDIDATES_BOUND \
299 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
300
301 /* If there are more iv occurrences, we just give up (it is quite unlikely that
302 optimizing such a loop would help, and it would take ages). */
303
304 #define MAX_CONSIDERED_USES \
305 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
306
307 /* If there are at most this number of ivs in the set, try removing unnecessary
308 ivs from the set always. */
309
310 #define ALWAYS_PRUNE_CAND_SET_BOUND \
311 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
312
313 /* The list of trees for that the decl_rtl field must be reset is stored
314 here. */
315
316 static VEC(tree,heap) *decl_rtl_to_reset;
317
318 /* Number of uses recorded in DATA. */
319
320 static inline unsigned
321 n_iv_uses (struct ivopts_data *data)
322 {
323 return VEC_length (iv_use_p, data->iv_uses);
324 }
325
326 /* Ith use recorded in DATA. */
327
328 static inline struct iv_use *
329 iv_use (struct ivopts_data *data, unsigned i)
330 {
331 return VEC_index (iv_use_p, data->iv_uses, i);
332 }
333
334 /* Number of candidates recorded in DATA. */
335
336 static inline unsigned
337 n_iv_cands (struct ivopts_data *data)
338 {
339 return VEC_length (iv_cand_p, data->iv_candidates);
340 }
341
342 /* Ith candidate recorded in DATA. */
343
344 static inline struct iv_cand *
345 iv_cand (struct ivopts_data *data, unsigned i)
346 {
347 return VEC_index (iv_cand_p, data->iv_candidates, i);
348 }
349
350 /* The single loop exit if it dominates the latch, NULL otherwise. */
351
352 edge
353 single_dom_exit (struct loop *loop)
354 {
355 edge exit = single_exit (loop);
356
357 if (!exit)
358 return NULL;
359
360 if (!just_once_each_iteration_p (loop, exit->src))
361 return NULL;
362
363 return exit;
364 }
365
366 /* Dumps information about the induction variable IV to FILE. */
367
368 extern void dump_iv (FILE *, struct iv *);
369 void
370 dump_iv (FILE *file, struct iv *iv)
371 {
372 if (iv->ssa_name)
373 {
374 fprintf (file, "ssa name ");
375 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
376 fprintf (file, "\n");
377 }
378
379 fprintf (file, " type ");
380 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
381 fprintf (file, "\n");
382
383 if (iv->step)
384 {
385 fprintf (file, " base ");
386 print_generic_expr (file, iv->base, TDF_SLIM);
387 fprintf (file, "\n");
388
389 fprintf (file, " step ");
390 print_generic_expr (file, iv->step, TDF_SLIM);
391 fprintf (file, "\n");
392 }
393 else
394 {
395 fprintf (file, " invariant ");
396 print_generic_expr (file, iv->base, TDF_SLIM);
397 fprintf (file, "\n");
398 }
399
400 if (iv->base_object)
401 {
402 fprintf (file, " base object ");
403 print_generic_expr (file, iv->base_object, TDF_SLIM);
404 fprintf (file, "\n");
405 }
406
407 if (iv->biv_p)
408 fprintf (file, " is a biv\n");
409 }
410
411 /* Dumps information about the USE to FILE. */
412
413 extern void dump_use (FILE *, struct iv_use *);
414 void
415 dump_use (FILE *file, struct iv_use *use)
416 {
417 fprintf (file, "use %d\n", use->id);
418
419 switch (use->type)
420 {
421 case USE_NONLINEAR_EXPR:
422 fprintf (file, " generic\n");
423 break;
424
425 case USE_ADDRESS:
426 fprintf (file, " address\n");
427 break;
428
429 case USE_COMPARE:
430 fprintf (file, " compare\n");
431 break;
432
433 default:
434 gcc_unreachable ();
435 }
436
437 fprintf (file, " in statement ");
438 print_generic_expr (file, use->stmt, TDF_SLIM);
439 fprintf (file, "\n");
440
441 fprintf (file, " at position ");
442 if (use->op_p)
443 print_generic_expr (file, *use->op_p, TDF_SLIM);
444 fprintf (file, "\n");
445
446 dump_iv (file, use->iv);
447
448 if (use->related_cands)
449 {
450 fprintf (file, " related candidates ");
451 dump_bitmap (file, use->related_cands);
452 }
453 }
454
455 /* Dumps information about the uses to FILE. */
456
457 extern void dump_uses (FILE *, struct ivopts_data *);
458 void
459 dump_uses (FILE *file, struct ivopts_data *data)
460 {
461 unsigned i;
462 struct iv_use *use;
463
464 for (i = 0; i < n_iv_uses (data); i++)
465 {
466 use = iv_use (data, i);
467
468 dump_use (file, use);
469 fprintf (file, "\n");
470 }
471 }
472
473 /* Dumps information about induction variable candidate CAND to FILE. */
474
475 extern void dump_cand (FILE *, struct iv_cand *);
476 void
477 dump_cand (FILE *file, struct iv_cand *cand)
478 {
479 struct iv *iv = cand->iv;
480
481 fprintf (file, "candidate %d%s\n",
482 cand->id, cand->important ? " (important)" : "");
483
484 if (cand->depends_on)
485 {
486 fprintf (file, " depends on ");
487 dump_bitmap (file, cand->depends_on);
488 }
489
490 if (!iv)
491 {
492 fprintf (file, " final value replacement\n");
493 return;
494 }
495
496 switch (cand->pos)
497 {
498 case IP_NORMAL:
499 fprintf (file, " incremented before exit test\n");
500 break;
501
502 case IP_END:
503 fprintf (file, " incremented at end\n");
504 break;
505
506 case IP_ORIGINAL:
507 fprintf (file, " original biv\n");
508 break;
509 }
510
511 dump_iv (file, iv);
512 }
513
514 /* Returns the info for ssa version VER. */
515
516 static inline struct version_info *
517 ver_info (struct ivopts_data *data, unsigned ver)
518 {
519 return data->version_info + ver;
520 }
521
522 /* Returns the info for ssa name NAME. */
523
524 static inline struct version_info *
525 name_info (struct ivopts_data *data, tree name)
526 {
527 return ver_info (data, SSA_NAME_VERSION (name));
528 }
529
530 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
531 emitted in LOOP. */
532
533 static bool
534 stmt_after_ip_normal_pos (struct loop *loop, tree stmt)
535 {
536 basic_block bb = ip_normal_pos (loop), sbb = bb_for_stmt (stmt);
537
538 gcc_assert (bb);
539
540 if (sbb == loop->latch)
541 return true;
542
543 if (sbb != bb)
544 return false;
545
546 return stmt == last_stmt (bb);
547 }
548
549 /* Returns true if STMT if after the place where the original induction
550 variable CAND is incremented. */
551
552 static bool
553 stmt_after_ip_original_pos (struct iv_cand *cand, tree stmt)
554 {
555 basic_block cand_bb = bb_for_stmt (cand->incremented_at);
556 basic_block stmt_bb = bb_for_stmt (stmt);
557 block_stmt_iterator bsi;
558
559 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
560 return false;
561
562 if (stmt_bb != cand_bb)
563 return true;
564
565 /* Scan the block from the end, since the original ivs are usually
566 incremented at the end of the loop body. */
567 for (bsi = bsi_last (stmt_bb); ; bsi_prev (&bsi))
568 {
569 if (bsi_stmt (bsi) == cand->incremented_at)
570 return false;
571 if (bsi_stmt (bsi) == stmt)
572 return true;
573 }
574 }
575
576 /* Returns true if STMT if after the place where the induction variable
577 CAND is incremented in LOOP. */
578
579 static bool
580 stmt_after_increment (struct loop *loop, struct iv_cand *cand, tree stmt)
581 {
582 switch (cand->pos)
583 {
584 case IP_END:
585 return false;
586
587 case IP_NORMAL:
588 return stmt_after_ip_normal_pos (loop, stmt);
589
590 case IP_ORIGINAL:
591 return stmt_after_ip_original_pos (cand, stmt);
592
593 default:
594 gcc_unreachable ();
595 }
596 }
597
598 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
599
600 static bool
601 abnormal_ssa_name_p (tree exp)
602 {
603 if (!exp)
604 return false;
605
606 if (TREE_CODE (exp) != SSA_NAME)
607 return false;
608
609 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
610 }
611
612 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
613 abnormal phi node. Callback for for_each_index. */
614
615 static bool
616 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
617 void *data ATTRIBUTE_UNUSED)
618 {
619 if (TREE_CODE (base) == ARRAY_REF)
620 {
621 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
622 return false;
623 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
624 return false;
625 }
626
627 return !abnormal_ssa_name_p (*index);
628 }
629
630 /* Returns true if EXPR contains a ssa name that occurs in an
631 abnormal phi node. */
632
633 bool
634 contains_abnormal_ssa_name_p (tree expr)
635 {
636 enum tree_code code;
637 enum tree_code_class codeclass;
638
639 if (!expr)
640 return false;
641
642 code = TREE_CODE (expr);
643 codeclass = TREE_CODE_CLASS (code);
644
645 if (code == SSA_NAME)
646 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
647
648 if (code == INTEGER_CST
649 || is_gimple_min_invariant (expr))
650 return false;
651
652 if (code == ADDR_EXPR)
653 return !for_each_index (&TREE_OPERAND (expr, 0),
654 idx_contains_abnormal_ssa_name_p,
655 NULL);
656
657 switch (codeclass)
658 {
659 case tcc_binary:
660 case tcc_comparison:
661 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
662 return true;
663
664 /* Fallthru. */
665 case tcc_unary:
666 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
667 return true;
668
669 break;
670
671 default:
672 gcc_unreachable ();
673 }
674
675 return false;
676 }
677
678 /* Returns tree describing number of iterations determined from
679 EXIT of DATA->current_loop, or NULL if something goes wrong. */
680
681 static tree
682 niter_for_exit (struct ivopts_data *data, edge exit)
683 {
684 struct tree_niter_desc desc;
685 tree niter;
686 void **slot;
687
688 if (!data->niters)
689 {
690 data->niters = pointer_map_create ();
691 slot = NULL;
692 }
693 else
694 slot = pointer_map_contains (data->niters, exit);
695
696 if (!slot)
697 {
698 /* Try to determine number of iterations. We must know it
699 unconditionally (i.e., without possibility of # of iterations
700 being zero). Also, we cannot safely work with ssa names that
701 appear in phi nodes on abnormal edges, so that we do not create
702 overlapping life ranges for them (PR 27283). */
703 if (number_of_iterations_exit (data->current_loop,
704 exit, &desc, true)
705 && integer_zerop (desc.may_be_zero)
706 && !contains_abnormal_ssa_name_p (desc.niter))
707 niter = desc.niter;
708 else
709 niter = NULL_TREE;
710
711 *pointer_map_insert (data->niters, exit) = niter;
712 }
713 else
714 niter = (tree) *slot;
715
716 return niter;
717 }
718
719 /* Returns tree describing number of iterations determined from
720 single dominating exit of DATA->current_loop, or NULL if something
721 goes wrong. */
722
723 static tree
724 niter_for_single_dom_exit (struct ivopts_data *data)
725 {
726 edge exit = single_dom_exit (data->current_loop);
727
728 if (!exit)
729 return NULL;
730
731 return niter_for_exit (data, exit);
732 }
733
734 /* Initializes data structures used by the iv optimization pass, stored
735 in DATA. */
736
737 static void
738 tree_ssa_iv_optimize_init (struct ivopts_data *data)
739 {
740 data->version_info_size = 2 * num_ssa_names;
741 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
742 data->relevant = BITMAP_ALLOC (NULL);
743 data->important_candidates = BITMAP_ALLOC (NULL);
744 data->max_inv_id = 0;
745 data->niters = NULL;
746 data->iv_uses = VEC_alloc (iv_use_p, heap, 20);
747 data->iv_candidates = VEC_alloc (iv_cand_p, heap, 20);
748 decl_rtl_to_reset = VEC_alloc (tree, heap, 20);
749 }
750
751 /* Returns a memory object to that EXPR points. In case we are able to
752 determine that it does not point to any such object, NULL is returned. */
753
754 static tree
755 determine_base_object (tree expr)
756 {
757 enum tree_code code = TREE_CODE (expr);
758 tree base, obj, op0, op1;
759
760 /* If this is a pointer casted to any type, we need to determine
761 the base object for the pointer; so handle conversions before
762 throwing away non-pointer expressions. */
763 if (TREE_CODE (expr) == NOP_EXPR
764 || TREE_CODE (expr) == CONVERT_EXPR)
765 return determine_base_object (TREE_OPERAND (expr, 0));
766
767 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
768 return NULL_TREE;
769
770 switch (code)
771 {
772 case INTEGER_CST:
773 return NULL_TREE;
774
775 case ADDR_EXPR:
776 obj = TREE_OPERAND (expr, 0);
777 base = get_base_address (obj);
778
779 if (!base)
780 return expr;
781
782 if (TREE_CODE (base) == INDIRECT_REF)
783 return determine_base_object (TREE_OPERAND (base, 0));
784
785 return fold_convert (ptr_type_node,
786 build_fold_addr_expr (base));
787
788 case PLUS_EXPR:
789 case MINUS_EXPR:
790 op0 = determine_base_object (TREE_OPERAND (expr, 0));
791 op1 = determine_base_object (TREE_OPERAND (expr, 1));
792
793 if (!op1)
794 return op0;
795
796 if (!op0)
797 return (code == PLUS_EXPR
798 ? op1
799 : fold_build1 (NEGATE_EXPR, ptr_type_node, op1));
800
801 return fold_build2 (code, ptr_type_node, op0, op1);
802
803 default:
804 return fold_convert (ptr_type_node, expr);
805 }
806 }
807
808 /* Allocates an induction variable with given initial value BASE and step STEP
809 for loop LOOP. */
810
811 static struct iv *
812 alloc_iv (tree base, tree step)
813 {
814 struct iv *iv = XCNEW (struct iv);
815 gcc_assert (step != NULL_TREE);
816
817 iv->base = base;
818 iv->base_object = determine_base_object (base);
819 iv->step = step;
820 iv->biv_p = false;
821 iv->have_use_for = false;
822 iv->use_id = 0;
823 iv->ssa_name = NULL_TREE;
824
825 return iv;
826 }
827
828 /* Sets STEP and BASE for induction variable IV. */
829
830 static void
831 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
832 {
833 struct version_info *info = name_info (data, iv);
834
835 gcc_assert (!info->iv);
836
837 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
838 info->iv = alloc_iv (base, step);
839 info->iv->ssa_name = iv;
840 }
841
842 /* Finds induction variable declaration for VAR. */
843
844 static struct iv *
845 get_iv (struct ivopts_data *data, tree var)
846 {
847 basic_block bb;
848 tree type = TREE_TYPE (var);
849
850 if (!POINTER_TYPE_P (type)
851 && !INTEGRAL_TYPE_P (type))
852 return NULL;
853
854 if (!name_info (data, var)->iv)
855 {
856 bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
857
858 if (!bb
859 || !flow_bb_inside_loop_p (data->current_loop, bb))
860 set_iv (data, var, var, build_int_cst (type, 0));
861 }
862
863 return name_info (data, var)->iv;
864 }
865
866 /* Determines the step of a biv defined in PHI. Returns NULL if PHI does
867 not define a simple affine biv with nonzero step. */
868
869 static tree
870 determine_biv_step (tree phi)
871 {
872 struct loop *loop = bb_for_stmt (phi)->loop_father;
873 tree name = PHI_RESULT (phi);
874 affine_iv iv;
875
876 if (!is_gimple_reg (name))
877 return NULL_TREE;
878
879 if (!simple_iv (loop, phi, name, &iv, true))
880 return NULL_TREE;
881
882 return integer_zerop (iv.step) ? NULL_TREE : iv.step;
883 }
884
885 /* Finds basic ivs. */
886
887 static bool
888 find_bivs (struct ivopts_data *data)
889 {
890 tree phi, step, type, base;
891 bool found = false;
892 struct loop *loop = data->current_loop;
893
894 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
895 {
896 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
897 continue;
898
899 step = determine_biv_step (phi);
900 if (!step)
901 continue;
902
903 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
904 base = expand_simple_operations (base);
905 if (contains_abnormal_ssa_name_p (base)
906 || contains_abnormal_ssa_name_p (step))
907 continue;
908
909 type = TREE_TYPE (PHI_RESULT (phi));
910 base = fold_convert (type, base);
911 if (step)
912 step = fold_convert (type, step);
913
914 set_iv (data, PHI_RESULT (phi), base, step);
915 found = true;
916 }
917
918 return found;
919 }
920
921 /* Marks basic ivs. */
922
923 static void
924 mark_bivs (struct ivopts_data *data)
925 {
926 tree phi, var;
927 struct iv *iv, *incr_iv;
928 struct loop *loop = data->current_loop;
929 basic_block incr_bb;
930
931 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
932 {
933 iv = get_iv (data, PHI_RESULT (phi));
934 if (!iv)
935 continue;
936
937 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
938 incr_iv = get_iv (data, var);
939 if (!incr_iv)
940 continue;
941
942 /* If the increment is in the subloop, ignore it. */
943 incr_bb = bb_for_stmt (SSA_NAME_DEF_STMT (var));
944 if (incr_bb->loop_father != data->current_loop
945 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
946 continue;
947
948 iv->biv_p = true;
949 incr_iv->biv_p = true;
950 }
951 }
952
953 /* Checks whether STMT defines a linear induction variable and stores its
954 parameters to IV. */
955
956 static bool
957 find_givs_in_stmt_scev (struct ivopts_data *data, tree stmt, affine_iv *iv)
958 {
959 tree lhs;
960 struct loop *loop = data->current_loop;
961
962 iv->base = NULL_TREE;
963 iv->step = NULL_TREE;
964
965 if (TREE_CODE (stmt) != GIMPLE_MODIFY_STMT)
966 return false;
967
968 lhs = GIMPLE_STMT_OPERAND (stmt, 0);
969 if (TREE_CODE (lhs) != SSA_NAME)
970 return false;
971
972 if (!simple_iv (loop, stmt, GIMPLE_STMT_OPERAND (stmt, 1), iv, true))
973 return false;
974 iv->base = expand_simple_operations (iv->base);
975
976 if (contains_abnormal_ssa_name_p (iv->base)
977 || contains_abnormal_ssa_name_p (iv->step))
978 return false;
979
980 return true;
981 }
982
983 /* Finds general ivs in statement STMT. */
984
985 static void
986 find_givs_in_stmt (struct ivopts_data *data, tree stmt)
987 {
988 affine_iv iv;
989
990 if (!find_givs_in_stmt_scev (data, stmt, &iv))
991 return;
992
993 set_iv (data, GIMPLE_STMT_OPERAND (stmt, 0), iv.base, iv.step);
994 }
995
996 /* Finds general ivs in basic block BB. */
997
998 static void
999 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1000 {
1001 block_stmt_iterator bsi;
1002
1003 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1004 find_givs_in_stmt (data, bsi_stmt (bsi));
1005 }
1006
1007 /* Finds general ivs. */
1008
1009 static void
1010 find_givs (struct ivopts_data *data)
1011 {
1012 struct loop *loop = data->current_loop;
1013 basic_block *body = get_loop_body_in_dom_order (loop);
1014 unsigned i;
1015
1016 for (i = 0; i < loop->num_nodes; i++)
1017 find_givs_in_bb (data, body[i]);
1018 free (body);
1019 }
1020
1021 /* For each ssa name defined in LOOP determines whether it is an induction
1022 variable and if so, its initial value and step. */
1023
1024 static bool
1025 find_induction_variables (struct ivopts_data *data)
1026 {
1027 unsigned i;
1028 bitmap_iterator bi;
1029
1030 if (!find_bivs (data))
1031 return false;
1032
1033 find_givs (data);
1034 mark_bivs (data);
1035
1036 if (dump_file && (dump_flags & TDF_DETAILS))
1037 {
1038 tree niter = niter_for_single_dom_exit (data);
1039
1040 if (niter)
1041 {
1042 fprintf (dump_file, " number of iterations ");
1043 print_generic_expr (dump_file, niter, TDF_SLIM);
1044 fprintf (dump_file, "\n\n");
1045 };
1046
1047 fprintf (dump_file, "Induction variables:\n\n");
1048
1049 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1050 {
1051 if (ver_info (data, i)->iv)
1052 dump_iv (dump_file, ver_info (data, i)->iv);
1053 }
1054 }
1055
1056 return true;
1057 }
1058
1059 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1060
1061 static struct iv_use *
1062 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1063 tree stmt, enum use_type use_type)
1064 {
1065 struct iv_use *use = XCNEW (struct iv_use);
1066
1067 use->id = n_iv_uses (data);
1068 use->type = use_type;
1069 use->iv = iv;
1070 use->stmt = stmt;
1071 use->op_p = use_p;
1072 use->related_cands = BITMAP_ALLOC (NULL);
1073
1074 /* To avoid showing ssa name in the dumps, if it was not reset by the
1075 caller. */
1076 iv->ssa_name = NULL_TREE;
1077
1078 if (dump_file && (dump_flags & TDF_DETAILS))
1079 dump_use (dump_file, use);
1080
1081 VEC_safe_push (iv_use_p, heap, data->iv_uses, use);
1082
1083 return use;
1084 }
1085
1086 /* Checks whether OP is a loop-level invariant and if so, records it.
1087 NONLINEAR_USE is true if the invariant is used in a way we do not
1088 handle specially. */
1089
1090 static void
1091 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1092 {
1093 basic_block bb;
1094 struct version_info *info;
1095
1096 if (TREE_CODE (op) != SSA_NAME
1097 || !is_gimple_reg (op))
1098 return;
1099
1100 bb = bb_for_stmt (SSA_NAME_DEF_STMT (op));
1101 if (bb
1102 && flow_bb_inside_loop_p (data->current_loop, bb))
1103 return;
1104
1105 info = name_info (data, op);
1106 info->name = op;
1107 info->has_nonlin_use |= nonlinear_use;
1108 if (!info->inv_id)
1109 info->inv_id = ++data->max_inv_id;
1110 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1111 }
1112
1113 /* Checks whether the use OP is interesting and if so, records it. */
1114
1115 static struct iv_use *
1116 find_interesting_uses_op (struct ivopts_data *data, tree op)
1117 {
1118 struct iv *iv;
1119 struct iv *civ;
1120 tree stmt;
1121 struct iv_use *use;
1122
1123 if (TREE_CODE (op) != SSA_NAME)
1124 return NULL;
1125
1126 iv = get_iv (data, op);
1127 if (!iv)
1128 return NULL;
1129
1130 if (iv->have_use_for)
1131 {
1132 use = iv_use (data, iv->use_id);
1133
1134 gcc_assert (use->type == USE_NONLINEAR_EXPR);
1135 return use;
1136 }
1137
1138 if (integer_zerop (iv->step))
1139 {
1140 record_invariant (data, op, true);
1141 return NULL;
1142 }
1143 iv->have_use_for = true;
1144
1145 civ = XNEW (struct iv);
1146 *civ = *iv;
1147
1148 stmt = SSA_NAME_DEF_STMT (op);
1149 gcc_assert (TREE_CODE (stmt) == PHI_NODE
1150 || TREE_CODE (stmt) == GIMPLE_MODIFY_STMT);
1151
1152 use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1153 iv->use_id = use->id;
1154
1155 return use;
1156 }
1157
1158 /* Given a condition *COND_P, checks whether it is a compare of an induction
1159 variable and an invariant. If this is the case, CONTROL_VAR is set
1160 to location of the iv, BOUND to the location of the invariant,
1161 IV_VAR and IV_BOUND are set to the corresponding induction variable
1162 descriptions, and true is returned. If this is not the case,
1163 CONTROL_VAR and BOUND are set to the arguments of the condition and
1164 false is returned. */
1165
1166 static bool
1167 extract_cond_operands (struct ivopts_data *data, tree *cond_p,
1168 tree **control_var, tree **bound,
1169 struct iv **iv_var, struct iv **iv_bound)
1170 {
1171 /* The nodes returned when COND has just one operand. Note that you should
1172 not modify anything in BOUND or IV_BOUND because of this. */
1173 static struct iv const_iv;
1174 static tree zero;
1175 tree cond = *cond_p;
1176 tree *op0 = &zero, *op1 = &zero, *tmp_op;
1177 struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1178 bool ret = false;
1179
1180 zero = integer_zero_node;
1181 const_iv.step = integer_zero_node;
1182
1183 if (TREE_CODE (cond) == SSA_NAME)
1184 {
1185 op0 = cond_p;
1186 iv0 = get_iv (data, cond);
1187 ret = (iv0 && !integer_zerop (iv0->step));
1188 goto end;
1189 }
1190
1191 if (!COMPARISON_CLASS_P (cond))
1192 {
1193 op0 = cond_p;
1194 goto end;
1195 }
1196
1197 op0 = &TREE_OPERAND (cond, 0);
1198 op1 = &TREE_OPERAND (cond, 1);
1199 if (TREE_CODE (*op0) == SSA_NAME)
1200 iv0 = get_iv (data, *op0);
1201 if (TREE_CODE (*op1) == SSA_NAME)
1202 iv1 = get_iv (data, *op1);
1203
1204 /* Exactly one of the compared values must be an iv, and the other one must
1205 be an invariant. */
1206 if (!iv0 || !iv1)
1207 goto end;
1208
1209 if (integer_zerop (iv0->step))
1210 {
1211 /* Control variable may be on the other side. */
1212 tmp_op = op0; op0 = op1; op1 = tmp_op;
1213 tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1214 }
1215 ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1216
1217 end:
1218 if (control_var)
1219 *control_var = op0;;
1220 if (iv_var)
1221 *iv_var = iv0;;
1222 if (bound)
1223 *bound = op1;
1224 if (iv_bound)
1225 *iv_bound = iv1;
1226
1227 return ret;
1228 }
1229
1230 /* Checks whether the condition *COND_P in STMT is interesting
1231 and if so, records it. */
1232
1233 static void
1234 find_interesting_uses_cond (struct ivopts_data *data, tree stmt, tree *cond_p)
1235 {
1236 tree *var_p, *bound_p;
1237 struct iv *var_iv, *civ;
1238
1239 if (!extract_cond_operands (data, cond_p, &var_p, &bound_p, &var_iv, NULL))
1240 {
1241 find_interesting_uses_op (data, *var_p);
1242 find_interesting_uses_op (data, *bound_p);
1243 return;
1244 }
1245
1246 civ = XNEW (struct iv);
1247 *civ = *var_iv;
1248 record_use (data, cond_p, civ, stmt, USE_COMPARE);
1249 }
1250
1251 /* Returns true if expression EXPR is obviously invariant in LOOP,
1252 i.e. if all its operands are defined outside of the LOOP. */
1253
1254 bool
1255 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1256 {
1257 basic_block def_bb;
1258 unsigned i, len;
1259
1260 if (is_gimple_min_invariant (expr))
1261 return true;
1262
1263 if (TREE_CODE (expr) == SSA_NAME)
1264 {
1265 def_bb = bb_for_stmt (SSA_NAME_DEF_STMT (expr));
1266 if (def_bb
1267 && flow_bb_inside_loop_p (loop, def_bb))
1268 return false;
1269
1270 return true;
1271 }
1272
1273 if (!EXPR_P (expr) && !GIMPLE_STMT_P (expr))
1274 return false;
1275
1276 len = TREE_OPERAND_LENGTH (expr);
1277 for (i = 0; i < len; i++)
1278 if (!expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1279 return false;
1280
1281 return true;
1282 }
1283
1284 /* Cumulates the steps of indices into DATA and replaces their values with the
1285 initial ones. Returns false when the value of the index cannot be determined.
1286 Callback for for_each_index. */
1287
1288 struct ifs_ivopts_data
1289 {
1290 struct ivopts_data *ivopts_data;
1291 tree stmt;
1292 tree step;
1293 };
1294
1295 static bool
1296 idx_find_step (tree base, tree *idx, void *data)
1297 {
1298 struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1299 struct iv *iv;
1300 tree step, iv_base, iv_step, lbound, off;
1301 struct loop *loop = dta->ivopts_data->current_loop;
1302
1303 if (TREE_CODE (base) == MISALIGNED_INDIRECT_REF
1304 || TREE_CODE (base) == ALIGN_INDIRECT_REF)
1305 return false;
1306
1307 /* If base is a component ref, require that the offset of the reference
1308 be invariant. */
1309 if (TREE_CODE (base) == COMPONENT_REF)
1310 {
1311 off = component_ref_field_offset (base);
1312 return expr_invariant_in_loop_p (loop, off);
1313 }
1314
1315 /* If base is array, first check whether we will be able to move the
1316 reference out of the loop (in order to take its address in strength
1317 reduction). In order for this to work we need both lower bound
1318 and step to be loop invariants. */
1319 if (TREE_CODE (base) == ARRAY_REF)
1320 {
1321 step = array_ref_element_size (base);
1322 lbound = array_ref_low_bound (base);
1323
1324 if (!expr_invariant_in_loop_p (loop, step)
1325 || !expr_invariant_in_loop_p (loop, lbound))
1326 return false;
1327 }
1328
1329 if (TREE_CODE (*idx) != SSA_NAME)
1330 return true;
1331
1332 iv = get_iv (dta->ivopts_data, *idx);
1333 if (!iv)
1334 return false;
1335
1336 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1337 *&x[0], which is not folded and does not trigger the
1338 ARRAY_REF path below. */
1339 *idx = iv->base;
1340
1341 if (integer_zerop (iv->step))
1342 return true;
1343
1344 if (TREE_CODE (base) == ARRAY_REF)
1345 {
1346 step = array_ref_element_size (base);
1347
1348 /* We only handle addresses whose step is an integer constant. */
1349 if (TREE_CODE (step) != INTEGER_CST)
1350 return false;
1351 }
1352 else
1353 /* The step for pointer arithmetics already is 1 byte. */
1354 step = build_int_cst (sizetype, 1);
1355
1356 iv_base = iv->base;
1357 iv_step = iv->step;
1358 if (!convert_affine_scev (dta->ivopts_data->current_loop,
1359 sizetype, &iv_base, &iv_step, dta->stmt,
1360 false))
1361 {
1362 /* The index might wrap. */
1363 return false;
1364 }
1365
1366 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1367 dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1368
1369 return true;
1370 }
1371
1372 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1373 object is passed to it in DATA. */
1374
1375 static bool
1376 idx_record_use (tree base, tree *idx,
1377 void *vdata)
1378 {
1379 struct ivopts_data *data = (struct ivopts_data *) vdata;
1380 find_interesting_uses_op (data, *idx);
1381 if (TREE_CODE (base) == ARRAY_REF)
1382 {
1383 find_interesting_uses_op (data, array_ref_element_size (base));
1384 find_interesting_uses_op (data, array_ref_low_bound (base));
1385 }
1386 return true;
1387 }
1388
1389 /* Returns true if memory reference REF may be unaligned. */
1390
1391 static bool
1392 may_be_unaligned_p (tree ref)
1393 {
1394 tree base;
1395 tree base_type;
1396 HOST_WIDE_INT bitsize;
1397 HOST_WIDE_INT bitpos;
1398 tree toffset;
1399 enum machine_mode mode;
1400 int unsignedp, volatilep;
1401 unsigned base_align;
1402
1403 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1404 thus they are not misaligned. */
1405 if (TREE_CODE (ref) == TARGET_MEM_REF)
1406 return false;
1407
1408 /* The test below is basically copy of what expr.c:normal_inner_ref
1409 does to check whether the object must be loaded by parts when
1410 STRICT_ALIGNMENT is true. */
1411 base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1412 &unsignedp, &volatilep, true);
1413 base_type = TREE_TYPE (base);
1414 base_align = TYPE_ALIGN (base_type);
1415
1416 if (mode != BLKmode
1417 && (base_align < GET_MODE_ALIGNMENT (mode)
1418 || bitpos % GET_MODE_ALIGNMENT (mode) != 0
1419 || bitpos % BITS_PER_UNIT != 0))
1420 return true;
1421
1422 return false;
1423 }
1424
1425 /* Return true if EXPR may be non-addressable. */
1426
1427 static bool
1428 may_be_nonaddressable_p (tree expr)
1429 {
1430 switch (TREE_CODE (expr))
1431 {
1432 case COMPONENT_REF:
1433 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1434 || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1435
1436 case ARRAY_REF:
1437 case ARRAY_RANGE_REF:
1438 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1439
1440 case VIEW_CONVERT_EXPR:
1441 /* This kind of view-conversions may wrap non-addressable objects
1442 and make them look addressable. After some processing the
1443 non-addressability may be uncovered again, causing ADDR_EXPRs
1444 of inappropriate objects to be built. */
1445 return AGGREGATE_TYPE_P (TREE_TYPE (expr))
1446 && !AGGREGATE_TYPE_P (TREE_TYPE (TREE_OPERAND (expr, 0)));
1447
1448 default:
1449 break;
1450 }
1451
1452 return false;
1453 }
1454
1455 /* Finds addresses in *OP_P inside STMT. */
1456
1457 static void
1458 find_interesting_uses_address (struct ivopts_data *data, tree stmt, tree *op_p)
1459 {
1460 tree base = *op_p, step = build_int_cst (sizetype, 0);
1461 struct iv *civ;
1462 struct ifs_ivopts_data ifs_ivopts_data;
1463
1464 /* Do not play with volatile memory references. A bit too conservative,
1465 perhaps, but safe. */
1466 if (stmt_ann (stmt)->has_volatile_ops)
1467 goto fail;
1468
1469 /* Ignore bitfields for now. Not really something terribly complicated
1470 to handle. TODO. */
1471 if (TREE_CODE (base) == BIT_FIELD_REF)
1472 goto fail;
1473
1474 if (may_be_nonaddressable_p (base))
1475 goto fail;
1476
1477 if (STRICT_ALIGNMENT
1478 && may_be_unaligned_p (base))
1479 goto fail;
1480
1481 base = unshare_expr (base);
1482
1483 if (TREE_CODE (base) == TARGET_MEM_REF)
1484 {
1485 tree type = build_pointer_type (TREE_TYPE (base));
1486 tree astep;
1487
1488 if (TMR_BASE (base)
1489 && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1490 {
1491 civ = get_iv (data, TMR_BASE (base));
1492 if (!civ)
1493 goto fail;
1494
1495 TMR_BASE (base) = civ->base;
1496 step = civ->step;
1497 }
1498 if (TMR_INDEX (base)
1499 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1500 {
1501 civ = get_iv (data, TMR_INDEX (base));
1502 if (!civ)
1503 goto fail;
1504
1505 TMR_INDEX (base) = civ->base;
1506 astep = civ->step;
1507
1508 if (astep)
1509 {
1510 if (TMR_STEP (base))
1511 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1512
1513 step = fold_build2 (PLUS_EXPR, type, step, astep);
1514 }
1515 }
1516
1517 if (integer_zerop (step))
1518 goto fail;
1519 base = tree_mem_ref_addr (type, base);
1520 }
1521 else
1522 {
1523 ifs_ivopts_data.ivopts_data = data;
1524 ifs_ivopts_data.stmt = stmt;
1525 ifs_ivopts_data.step = build_int_cst (sizetype, 0);
1526 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1527 || integer_zerop (ifs_ivopts_data.step))
1528 goto fail;
1529 step = ifs_ivopts_data.step;
1530
1531 gcc_assert (TREE_CODE (base) != ALIGN_INDIRECT_REF);
1532 gcc_assert (TREE_CODE (base) != MISALIGNED_INDIRECT_REF);
1533
1534 base = build_fold_addr_expr (base);
1535
1536 /* Substituting bases of IVs into the base expression might
1537 have caused folding opportunities. */
1538 if (TREE_CODE (base) == ADDR_EXPR)
1539 {
1540 tree *ref = &TREE_OPERAND (base, 0);
1541 while (handled_component_p (*ref))
1542 ref = &TREE_OPERAND (*ref, 0);
1543 if (TREE_CODE (*ref) == INDIRECT_REF)
1544 *ref = fold_indirect_ref (*ref);
1545 }
1546 }
1547
1548 civ = alloc_iv (base, step);
1549 record_use (data, op_p, civ, stmt, USE_ADDRESS);
1550 return;
1551
1552 fail:
1553 for_each_index (op_p, idx_record_use, data);
1554 }
1555
1556 /* Finds and records invariants used in STMT. */
1557
1558 static void
1559 find_invariants_stmt (struct ivopts_data *data, tree stmt)
1560 {
1561 ssa_op_iter iter;
1562 use_operand_p use_p;
1563 tree op;
1564
1565 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1566 {
1567 op = USE_FROM_PTR (use_p);
1568 record_invariant (data, op, false);
1569 }
1570 }
1571
1572 /* Finds interesting uses of induction variables in the statement STMT. */
1573
1574 static void
1575 find_interesting_uses_stmt (struct ivopts_data *data, tree stmt)
1576 {
1577 struct iv *iv;
1578 tree op, lhs, rhs;
1579 ssa_op_iter iter;
1580 use_operand_p use_p;
1581
1582 find_invariants_stmt (data, stmt);
1583
1584 if (TREE_CODE (stmt) == COND_EXPR)
1585 {
1586 find_interesting_uses_cond (data, stmt, &COND_EXPR_COND (stmt));
1587 return;
1588 }
1589
1590 if (TREE_CODE (stmt) == GIMPLE_MODIFY_STMT)
1591 {
1592 lhs = GIMPLE_STMT_OPERAND (stmt, 0);
1593 rhs = GIMPLE_STMT_OPERAND (stmt, 1);
1594
1595 if (TREE_CODE (lhs) == SSA_NAME)
1596 {
1597 /* If the statement defines an induction variable, the uses are not
1598 interesting by themselves. */
1599
1600 iv = get_iv (data, lhs);
1601
1602 if (iv && !integer_zerop (iv->step))
1603 return;
1604 }
1605
1606 switch (TREE_CODE_CLASS (TREE_CODE (rhs)))
1607 {
1608 case tcc_comparison:
1609 find_interesting_uses_cond (data, stmt,
1610 &GIMPLE_STMT_OPERAND (stmt, 1));
1611 return;
1612
1613 case tcc_reference:
1614 find_interesting_uses_address (data, stmt,
1615 &GIMPLE_STMT_OPERAND (stmt, 1));
1616 if (REFERENCE_CLASS_P (lhs))
1617 find_interesting_uses_address (data, stmt,
1618 &GIMPLE_STMT_OPERAND (stmt, 0));
1619 return;
1620
1621 default: ;
1622 }
1623
1624 if (REFERENCE_CLASS_P (lhs)
1625 && is_gimple_val (rhs))
1626 {
1627 find_interesting_uses_address (data, stmt,
1628 &GIMPLE_STMT_OPERAND (stmt, 0));
1629 find_interesting_uses_op (data, rhs);
1630 return;
1631 }
1632
1633 /* TODO -- we should also handle address uses of type
1634
1635 memory = call (whatever);
1636
1637 and
1638
1639 call (memory). */
1640 }
1641
1642 if (TREE_CODE (stmt) == PHI_NODE
1643 && bb_for_stmt (stmt) == data->current_loop->header)
1644 {
1645 lhs = PHI_RESULT (stmt);
1646 iv = get_iv (data, lhs);
1647
1648 if (iv && !integer_zerop (iv->step))
1649 return;
1650 }
1651
1652 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1653 {
1654 op = USE_FROM_PTR (use_p);
1655
1656 if (TREE_CODE (op) != SSA_NAME)
1657 continue;
1658
1659 iv = get_iv (data, op);
1660 if (!iv)
1661 continue;
1662
1663 find_interesting_uses_op (data, op);
1664 }
1665 }
1666
1667 /* Finds interesting uses of induction variables outside of loops
1668 on loop exit edge EXIT. */
1669
1670 static void
1671 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1672 {
1673 tree phi, def;
1674
1675 for (phi = phi_nodes (exit->dest); phi; phi = PHI_CHAIN (phi))
1676 {
1677 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1678 if (is_gimple_reg (def))
1679 find_interesting_uses_op (data, def);
1680 }
1681 }
1682
1683 /* Finds uses of the induction variables that are interesting. */
1684
1685 static void
1686 find_interesting_uses (struct ivopts_data *data)
1687 {
1688 basic_block bb;
1689 block_stmt_iterator bsi;
1690 tree phi;
1691 basic_block *body = get_loop_body (data->current_loop);
1692 unsigned i;
1693 struct version_info *info;
1694 edge e;
1695
1696 if (dump_file && (dump_flags & TDF_DETAILS))
1697 fprintf (dump_file, "Uses:\n\n");
1698
1699 for (i = 0; i < data->current_loop->num_nodes; i++)
1700 {
1701 edge_iterator ei;
1702 bb = body[i];
1703
1704 FOR_EACH_EDGE (e, ei, bb->succs)
1705 if (e->dest != EXIT_BLOCK_PTR
1706 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
1707 find_interesting_uses_outside (data, e);
1708
1709 for (phi = phi_nodes (bb); phi; phi = PHI_CHAIN (phi))
1710 find_interesting_uses_stmt (data, phi);
1711 for (bsi = bsi_start (bb); !bsi_end_p (bsi); bsi_next (&bsi))
1712 find_interesting_uses_stmt (data, bsi_stmt (bsi));
1713 }
1714
1715 if (dump_file && (dump_flags & TDF_DETAILS))
1716 {
1717 bitmap_iterator bi;
1718
1719 fprintf (dump_file, "\n");
1720
1721 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1722 {
1723 info = ver_info (data, i);
1724 if (info->inv_id)
1725 {
1726 fprintf (dump_file, " ");
1727 print_generic_expr (dump_file, info->name, TDF_SLIM);
1728 fprintf (dump_file, " is invariant (%d)%s\n",
1729 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
1730 }
1731 }
1732
1733 fprintf (dump_file, "\n");
1734 }
1735
1736 free (body);
1737 }
1738
1739 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
1740 is true, assume we are inside an address. If TOP_COMPREF is true, assume
1741 we are at the top-level of the processed address. */
1742
1743 static tree
1744 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
1745 unsigned HOST_WIDE_INT *offset)
1746 {
1747 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
1748 enum tree_code code;
1749 tree type, orig_type = TREE_TYPE (expr);
1750 unsigned HOST_WIDE_INT off0, off1, st;
1751 tree orig_expr = expr;
1752
1753 STRIP_NOPS (expr);
1754
1755 type = TREE_TYPE (expr);
1756 code = TREE_CODE (expr);
1757 *offset = 0;
1758
1759 switch (code)
1760 {
1761 case INTEGER_CST:
1762 if (!cst_and_fits_in_hwi (expr)
1763 || integer_zerop (expr))
1764 return orig_expr;
1765
1766 *offset = int_cst_value (expr);
1767 return build_int_cst (orig_type, 0);
1768
1769 case PLUS_EXPR:
1770 case MINUS_EXPR:
1771 op0 = TREE_OPERAND (expr, 0);
1772 op1 = TREE_OPERAND (expr, 1);
1773
1774 op0 = strip_offset_1 (op0, false, false, &off0);
1775 op1 = strip_offset_1 (op1, false, false, &off1);
1776
1777 *offset = (code == PLUS_EXPR ? off0 + off1 : off0 - off1);
1778 if (op0 == TREE_OPERAND (expr, 0)
1779 && op1 == TREE_OPERAND (expr, 1))
1780 return orig_expr;
1781
1782 if (integer_zerop (op1))
1783 expr = op0;
1784 else if (integer_zerop (op0))
1785 {
1786 if (code == PLUS_EXPR)
1787 expr = op1;
1788 else
1789 expr = fold_build1 (NEGATE_EXPR, type, op1);
1790 }
1791 else
1792 expr = fold_build2 (code, type, op0, op1);
1793
1794 return fold_convert (orig_type, expr);
1795
1796 case ARRAY_REF:
1797 if (!inside_addr)
1798 return orig_expr;
1799
1800 step = array_ref_element_size (expr);
1801 if (!cst_and_fits_in_hwi (step))
1802 break;
1803
1804 st = int_cst_value (step);
1805 op1 = TREE_OPERAND (expr, 1);
1806 op1 = strip_offset_1 (op1, false, false, &off1);
1807 *offset = off1 * st;
1808
1809 if (top_compref
1810 && integer_zerop (op1))
1811 {
1812 /* Strip the component reference completely. */
1813 op0 = TREE_OPERAND (expr, 0);
1814 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1815 *offset += off0;
1816 return op0;
1817 }
1818 break;
1819
1820 case COMPONENT_REF:
1821 if (!inside_addr)
1822 return orig_expr;
1823
1824 tmp = component_ref_field_offset (expr);
1825 if (top_compref
1826 && cst_and_fits_in_hwi (tmp))
1827 {
1828 /* Strip the component reference completely. */
1829 op0 = TREE_OPERAND (expr, 0);
1830 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
1831 *offset = off0 + int_cst_value (tmp);
1832 return op0;
1833 }
1834 break;
1835
1836 case ADDR_EXPR:
1837 op0 = TREE_OPERAND (expr, 0);
1838 op0 = strip_offset_1 (op0, true, true, &off0);
1839 *offset += off0;
1840
1841 if (op0 == TREE_OPERAND (expr, 0))
1842 return orig_expr;
1843
1844 expr = build_fold_addr_expr (op0);
1845 return fold_convert (orig_type, expr);
1846
1847 case INDIRECT_REF:
1848 inside_addr = false;
1849 break;
1850
1851 default:
1852 return orig_expr;
1853 }
1854
1855 /* Default handling of expressions for that we want to recurse into
1856 the first operand. */
1857 op0 = TREE_OPERAND (expr, 0);
1858 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
1859 *offset += off0;
1860
1861 if (op0 == TREE_OPERAND (expr, 0)
1862 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
1863 return orig_expr;
1864
1865 expr = copy_node (expr);
1866 TREE_OPERAND (expr, 0) = op0;
1867 if (op1)
1868 TREE_OPERAND (expr, 1) = op1;
1869
1870 /* Inside address, we might strip the top level component references,
1871 thus changing type of the expression. Handling of ADDR_EXPR
1872 will fix that. */
1873 expr = fold_convert (orig_type, expr);
1874
1875 return expr;
1876 }
1877
1878 /* Strips constant offsets from EXPR and stores them to OFFSET. */
1879
1880 static tree
1881 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
1882 {
1883 return strip_offset_1 (expr, false, false, offset);
1884 }
1885
1886 /* Returns variant of TYPE that can be used as base for different uses.
1887 We return unsigned type with the same precision, which avoids problems
1888 with overflows. */
1889
1890 static tree
1891 generic_type_for (tree type)
1892 {
1893 if (POINTER_TYPE_P (type))
1894 return unsigned_type_for (type);
1895
1896 if (TYPE_UNSIGNED (type))
1897 return type;
1898
1899 return unsigned_type_for (type);
1900 }
1901
1902 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
1903 the bitmap to that we should store it. */
1904
1905 static struct ivopts_data *fd_ivopts_data;
1906 static tree
1907 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
1908 {
1909 bitmap *depends_on = (bitmap *) data;
1910 struct version_info *info;
1911
1912 if (TREE_CODE (*expr_p) != SSA_NAME)
1913 return NULL_TREE;
1914 info = name_info (fd_ivopts_data, *expr_p);
1915
1916 if (!info->inv_id || info->has_nonlin_use)
1917 return NULL_TREE;
1918
1919 if (!*depends_on)
1920 *depends_on = BITMAP_ALLOC (NULL);
1921 bitmap_set_bit (*depends_on, info->inv_id);
1922
1923 return NULL_TREE;
1924 }
1925
1926 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
1927 position to POS. If USE is not NULL, the candidate is set as related to
1928 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
1929 replacement of the final value of the iv by a direct computation. */
1930
1931 static struct iv_cand *
1932 add_candidate_1 (struct ivopts_data *data,
1933 tree base, tree step, bool important, enum iv_position pos,
1934 struct iv_use *use, tree incremented_at)
1935 {
1936 unsigned i;
1937 struct iv_cand *cand = NULL;
1938 tree type, orig_type;
1939
1940 if (base)
1941 {
1942 orig_type = TREE_TYPE (base);
1943 type = generic_type_for (orig_type);
1944 if (type != orig_type)
1945 {
1946 base = fold_convert (type, base);
1947 step = fold_convert (type, step);
1948 }
1949 }
1950
1951 for (i = 0; i < n_iv_cands (data); i++)
1952 {
1953 cand = iv_cand (data, i);
1954
1955 if (cand->pos != pos)
1956 continue;
1957
1958 if (cand->incremented_at != incremented_at)
1959 continue;
1960
1961 if (!cand->iv)
1962 {
1963 if (!base && !step)
1964 break;
1965
1966 continue;
1967 }
1968
1969 if (!base && !step)
1970 continue;
1971
1972 if (operand_equal_p (base, cand->iv->base, 0)
1973 && operand_equal_p (step, cand->iv->step, 0))
1974 break;
1975 }
1976
1977 if (i == n_iv_cands (data))
1978 {
1979 cand = XCNEW (struct iv_cand);
1980 cand->id = i;
1981
1982 if (!base && !step)
1983 cand->iv = NULL;
1984 else
1985 cand->iv = alloc_iv (base, step);
1986
1987 cand->pos = pos;
1988 if (pos != IP_ORIGINAL && cand->iv)
1989 {
1990 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
1991 cand->var_after = cand->var_before;
1992 }
1993 cand->important = important;
1994 cand->incremented_at = incremented_at;
1995 VEC_safe_push (iv_cand_p, heap, data->iv_candidates, cand);
1996
1997 if (step
1998 && TREE_CODE (step) != INTEGER_CST)
1999 {
2000 fd_ivopts_data = data;
2001 walk_tree (&step, find_depends, &cand->depends_on, NULL);
2002 }
2003
2004 if (dump_file && (dump_flags & TDF_DETAILS))
2005 dump_cand (dump_file, cand);
2006 }
2007
2008 if (important && !cand->important)
2009 {
2010 cand->important = true;
2011 if (dump_file && (dump_flags & TDF_DETAILS))
2012 fprintf (dump_file, "Candidate %d is important\n", cand->id);
2013 }
2014
2015 if (use)
2016 {
2017 bitmap_set_bit (use->related_cands, i);
2018 if (dump_file && (dump_flags & TDF_DETAILS))
2019 fprintf (dump_file, "Candidate %d is related to use %d\n",
2020 cand->id, use->id);
2021 }
2022
2023 return cand;
2024 }
2025
2026 /* Returns true if incrementing the induction variable at the end of the LOOP
2027 is allowed.
2028
2029 The purpose is to avoid splitting latch edge with a biv increment, thus
2030 creating a jump, possibly confusing other optimization passes and leaving
2031 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2032 is not available (so we do not have a better alternative), or if the latch
2033 edge is already nonempty. */
2034
2035 static bool
2036 allow_ip_end_pos_p (struct loop *loop)
2037 {
2038 if (!ip_normal_pos (loop))
2039 return true;
2040
2041 if (!empty_block_p (ip_end_pos (loop)))
2042 return true;
2043
2044 return false;
2045 }
2046
2047 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2048 position to POS. If USE is not NULL, the candidate is set as related to
2049 it. The candidate computation is scheduled on all available positions. */
2050
2051 static void
2052 add_candidate (struct ivopts_data *data,
2053 tree base, tree step, bool important, struct iv_use *use)
2054 {
2055 if (ip_normal_pos (data->current_loop))
2056 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL_TREE);
2057 if (ip_end_pos (data->current_loop)
2058 && allow_ip_end_pos_p (data->current_loop))
2059 add_candidate_1 (data, base, step, important, IP_END, use, NULL_TREE);
2060 }
2061
2062 /* Add a standard "0 + 1 * iteration" iv candidate for a
2063 type with SIZE bits. */
2064
2065 static void
2066 add_standard_iv_candidates_for_size (struct ivopts_data *data,
2067 unsigned int size)
2068 {
2069 tree type = lang_hooks.types.type_for_size (size, true);
2070 add_candidate (data, build_int_cst (type, 0), build_int_cst (type, 1),
2071 true, NULL);
2072 }
2073
2074 /* Adds standard iv candidates. */
2075
2076 static void
2077 add_standard_iv_candidates (struct ivopts_data *data)
2078 {
2079 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE);
2080
2081 /* The same for a double-integer type if it is still fast enough. */
2082 if (BITS_PER_WORD >= INT_TYPE_SIZE * 2)
2083 add_standard_iv_candidates_for_size (data, INT_TYPE_SIZE * 2);
2084 }
2085
2086
2087 /* Adds candidates bases on the old induction variable IV. */
2088
2089 static void
2090 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2091 {
2092 tree phi, def;
2093 struct iv_cand *cand;
2094
2095 add_candidate (data, iv->base, iv->step, true, NULL);
2096
2097 /* The same, but with initial value zero. */
2098 add_candidate (data,
2099 build_int_cst (TREE_TYPE (iv->base), 0),
2100 iv->step, true, NULL);
2101
2102 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2103 if (TREE_CODE (phi) == PHI_NODE)
2104 {
2105 /* Additionally record the possibility of leaving the original iv
2106 untouched. */
2107 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2108 cand = add_candidate_1 (data,
2109 iv->base, iv->step, true, IP_ORIGINAL, NULL,
2110 SSA_NAME_DEF_STMT (def));
2111 cand->var_before = iv->ssa_name;
2112 cand->var_after = def;
2113 }
2114 }
2115
2116 /* Adds candidates based on the old induction variables. */
2117
2118 static void
2119 add_old_ivs_candidates (struct ivopts_data *data)
2120 {
2121 unsigned i;
2122 struct iv *iv;
2123 bitmap_iterator bi;
2124
2125 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2126 {
2127 iv = ver_info (data, i)->iv;
2128 if (iv && iv->biv_p && !integer_zerop (iv->step))
2129 add_old_iv_candidates (data, iv);
2130 }
2131 }
2132
2133 /* Adds candidates based on the value of the induction variable IV and USE. */
2134
2135 static void
2136 add_iv_value_candidates (struct ivopts_data *data,
2137 struct iv *iv, struct iv_use *use)
2138 {
2139 unsigned HOST_WIDE_INT offset;
2140 tree base;
2141
2142 add_candidate (data, iv->base, iv->step, false, use);
2143
2144 /* The same, but with initial value zero. Make such variable important,
2145 since it is generic enough so that possibly many uses may be based
2146 on it. */
2147 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2148 iv->step, true, use);
2149
2150 /* Third, try removing the constant offset. */
2151 base = strip_offset (iv->base, &offset);
2152 if (offset)
2153 add_candidate (data, base, iv->step, false, use);
2154 }
2155
2156 /* Adds candidates based on the uses. */
2157
2158 static void
2159 add_derived_ivs_candidates (struct ivopts_data *data)
2160 {
2161 unsigned i;
2162
2163 for (i = 0; i < n_iv_uses (data); i++)
2164 {
2165 struct iv_use *use = iv_use (data, i);
2166
2167 if (!use)
2168 continue;
2169
2170 switch (use->type)
2171 {
2172 case USE_NONLINEAR_EXPR:
2173 case USE_COMPARE:
2174 case USE_ADDRESS:
2175 /* Just add the ivs based on the value of the iv used here. */
2176 add_iv_value_candidates (data, use->iv, use);
2177 break;
2178
2179 default:
2180 gcc_unreachable ();
2181 }
2182 }
2183 }
2184
2185 /* Record important candidates and add them to related_cands bitmaps
2186 if needed. */
2187
2188 static void
2189 record_important_candidates (struct ivopts_data *data)
2190 {
2191 unsigned i;
2192 struct iv_use *use;
2193
2194 for (i = 0; i < n_iv_cands (data); i++)
2195 {
2196 struct iv_cand *cand = iv_cand (data, i);
2197
2198 if (cand->important)
2199 bitmap_set_bit (data->important_candidates, i);
2200 }
2201
2202 data->consider_all_candidates = (n_iv_cands (data)
2203 <= CONSIDER_ALL_CANDIDATES_BOUND);
2204
2205 if (data->consider_all_candidates)
2206 {
2207 /* We will not need "related_cands" bitmaps in this case,
2208 so release them to decrease peak memory consumption. */
2209 for (i = 0; i < n_iv_uses (data); i++)
2210 {
2211 use = iv_use (data, i);
2212 BITMAP_FREE (use->related_cands);
2213 }
2214 }
2215 else
2216 {
2217 /* Add important candidates to the related_cands bitmaps. */
2218 for (i = 0; i < n_iv_uses (data); i++)
2219 bitmap_ior_into (iv_use (data, i)->related_cands,
2220 data->important_candidates);
2221 }
2222 }
2223
2224 /* Finds the candidates for the induction variables. */
2225
2226 static void
2227 find_iv_candidates (struct ivopts_data *data)
2228 {
2229 /* Add commonly used ivs. */
2230 add_standard_iv_candidates (data);
2231
2232 /* Add old induction variables. */
2233 add_old_ivs_candidates (data);
2234
2235 /* Add induction variables derived from uses. */
2236 add_derived_ivs_candidates (data);
2237
2238 /* Record the important candidates. */
2239 record_important_candidates (data);
2240 }
2241
2242 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2243 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2244 we allocate a simple list to every use. */
2245
2246 static void
2247 alloc_use_cost_map (struct ivopts_data *data)
2248 {
2249 unsigned i, size, s, j;
2250
2251 for (i = 0; i < n_iv_uses (data); i++)
2252 {
2253 struct iv_use *use = iv_use (data, i);
2254 bitmap_iterator bi;
2255
2256 if (data->consider_all_candidates)
2257 size = n_iv_cands (data);
2258 else
2259 {
2260 s = 0;
2261 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
2262 {
2263 s++;
2264 }
2265
2266 /* Round up to the power of two, so that moduling by it is fast. */
2267 for (size = 1; size < s; size <<= 1)
2268 continue;
2269 }
2270
2271 use->n_map_members = size;
2272 use->cost_map = XCNEWVEC (struct cost_pair, size);
2273 }
2274 }
2275
2276 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2277 on invariants DEPENDS_ON and that the value used in expressing it
2278 is VALUE.*/
2279
2280 static void
2281 set_use_iv_cost (struct ivopts_data *data,
2282 struct iv_use *use, struct iv_cand *cand, unsigned cost,
2283 bitmap depends_on, tree value)
2284 {
2285 unsigned i, s;
2286
2287 if (cost == INFTY)
2288 {
2289 BITMAP_FREE (depends_on);
2290 return;
2291 }
2292
2293 if (data->consider_all_candidates)
2294 {
2295 use->cost_map[cand->id].cand = cand;
2296 use->cost_map[cand->id].cost = cost;
2297 use->cost_map[cand->id].depends_on = depends_on;
2298 use->cost_map[cand->id].value = value;
2299 return;
2300 }
2301
2302 /* n_map_members is a power of two, so this computes modulo. */
2303 s = cand->id & (use->n_map_members - 1);
2304 for (i = s; i < use->n_map_members; i++)
2305 if (!use->cost_map[i].cand)
2306 goto found;
2307 for (i = 0; i < s; i++)
2308 if (!use->cost_map[i].cand)
2309 goto found;
2310
2311 gcc_unreachable ();
2312
2313 found:
2314 use->cost_map[i].cand = cand;
2315 use->cost_map[i].cost = cost;
2316 use->cost_map[i].depends_on = depends_on;
2317 use->cost_map[i].value = value;
2318 }
2319
2320 /* Gets cost of (USE, CANDIDATE) pair. */
2321
2322 static struct cost_pair *
2323 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2324 struct iv_cand *cand)
2325 {
2326 unsigned i, s;
2327 struct cost_pair *ret;
2328
2329 if (!cand)
2330 return NULL;
2331
2332 if (data->consider_all_candidates)
2333 {
2334 ret = use->cost_map + cand->id;
2335 if (!ret->cand)
2336 return NULL;
2337
2338 return ret;
2339 }
2340
2341 /* n_map_members is a power of two, so this computes modulo. */
2342 s = cand->id & (use->n_map_members - 1);
2343 for (i = s; i < use->n_map_members; i++)
2344 if (use->cost_map[i].cand == cand)
2345 return use->cost_map + i;
2346
2347 for (i = 0; i < s; i++)
2348 if (use->cost_map[i].cand == cand)
2349 return use->cost_map + i;
2350
2351 return NULL;
2352 }
2353
2354 /* Returns estimate on cost of computing SEQ. */
2355
2356 static unsigned
2357 seq_cost (rtx seq)
2358 {
2359 unsigned cost = 0;
2360 rtx set;
2361
2362 for (; seq; seq = NEXT_INSN (seq))
2363 {
2364 set = single_set (seq);
2365 if (set)
2366 cost += rtx_cost (set, SET);
2367 else
2368 cost++;
2369 }
2370
2371 return cost;
2372 }
2373
2374 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2375 static rtx
2376 produce_memory_decl_rtl (tree obj, int *regno)
2377 {
2378 rtx x;
2379
2380 gcc_assert (obj);
2381 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2382 {
2383 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2384 x = gen_rtx_SYMBOL_REF (Pmode, name);
2385 SET_SYMBOL_REF_DECL (x, obj);
2386 x = gen_rtx_MEM (DECL_MODE (obj), x);
2387 targetm.encode_section_info (obj, x, true);
2388 }
2389 else
2390 {
2391 x = gen_raw_REG (Pmode, (*regno)++);
2392 x = gen_rtx_MEM (DECL_MODE (obj), x);
2393 }
2394
2395 return x;
2396 }
2397
2398 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2399 walk_tree. DATA contains the actual fake register number. */
2400
2401 static tree
2402 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2403 {
2404 tree obj = NULL_TREE;
2405 rtx x = NULL_RTX;
2406 int *regno = (int *) data;
2407
2408 switch (TREE_CODE (*expr_p))
2409 {
2410 case ADDR_EXPR:
2411 for (expr_p = &TREE_OPERAND (*expr_p, 0);
2412 handled_component_p (*expr_p);
2413 expr_p = &TREE_OPERAND (*expr_p, 0))
2414 continue;
2415 obj = *expr_p;
2416 if (DECL_P (obj) && !DECL_RTL_SET_P (obj))
2417 x = produce_memory_decl_rtl (obj, regno);
2418 break;
2419
2420 case SSA_NAME:
2421 *ws = 0;
2422 obj = SSA_NAME_VAR (*expr_p);
2423 if (!DECL_RTL_SET_P (obj))
2424 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2425 break;
2426
2427 case VAR_DECL:
2428 case PARM_DECL:
2429 case RESULT_DECL:
2430 *ws = 0;
2431 obj = *expr_p;
2432
2433 if (DECL_RTL_SET_P (obj))
2434 break;
2435
2436 if (DECL_MODE (obj) == BLKmode)
2437 x = produce_memory_decl_rtl (obj, regno);
2438 else
2439 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2440
2441 break;
2442
2443 default:
2444 break;
2445 }
2446
2447 if (x)
2448 {
2449 VEC_safe_push (tree, heap, decl_rtl_to_reset, obj);
2450 SET_DECL_RTL (obj, x);
2451 }
2452
2453 return NULL_TREE;
2454 }
2455
2456 /* Determines cost of the computation of EXPR. */
2457
2458 static unsigned
2459 computation_cost (tree expr)
2460 {
2461 rtx seq, rslt;
2462 tree type = TREE_TYPE (expr);
2463 unsigned cost;
2464 /* Avoid using hard regs in ways which may be unsupported. */
2465 int regno = LAST_VIRTUAL_REGISTER + 1;
2466
2467 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2468 start_sequence ();
2469 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2470 seq = get_insns ();
2471 end_sequence ();
2472
2473 cost = seq_cost (seq);
2474 if (MEM_P (rslt))
2475 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type));
2476
2477 return cost;
2478 }
2479
2480 /* Returns variable containing the value of candidate CAND at statement AT. */
2481
2482 static tree
2483 var_at_stmt (struct loop *loop, struct iv_cand *cand, tree stmt)
2484 {
2485 if (stmt_after_increment (loop, cand, stmt))
2486 return cand->var_after;
2487 else
2488 return cand->var_before;
2489 }
2490
2491 /* Return the most significant (sign) bit of T. Similar to tree_int_cst_msb,
2492 but the bit is determined from TYPE_PRECISION, not MODE_BITSIZE. */
2493
2494 int
2495 tree_int_cst_sign_bit (tree t)
2496 {
2497 unsigned bitno = TYPE_PRECISION (TREE_TYPE (t)) - 1;
2498 unsigned HOST_WIDE_INT w;
2499
2500 if (bitno < HOST_BITS_PER_WIDE_INT)
2501 w = TREE_INT_CST_LOW (t);
2502 else
2503 {
2504 w = TREE_INT_CST_HIGH (t);
2505 bitno -= HOST_BITS_PER_WIDE_INT;
2506 }
2507
2508 return (w >> bitno) & 1;
2509 }
2510
2511 /* If we can prove that TOP = cst * BOT for some constant cst,
2512 store cst to MUL and return true. Otherwise return false.
2513 The returned value is always sign-extended, regardless of the
2514 signedness of TOP and BOT. */
2515
2516 static bool
2517 constant_multiple_of (tree top, tree bot, double_int *mul)
2518 {
2519 tree mby;
2520 enum tree_code code;
2521 double_int res, p0, p1;
2522 unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
2523
2524 STRIP_NOPS (top);
2525 STRIP_NOPS (bot);
2526
2527 if (operand_equal_p (top, bot, 0))
2528 {
2529 *mul = double_int_one;
2530 return true;
2531 }
2532
2533 code = TREE_CODE (top);
2534 switch (code)
2535 {
2536 case MULT_EXPR:
2537 mby = TREE_OPERAND (top, 1);
2538 if (TREE_CODE (mby) != INTEGER_CST)
2539 return false;
2540
2541 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
2542 return false;
2543
2544 *mul = double_int_sext (double_int_mul (res, tree_to_double_int (mby)),
2545 precision);
2546 return true;
2547
2548 case PLUS_EXPR:
2549 case MINUS_EXPR:
2550 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
2551 || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
2552 return false;
2553
2554 if (code == MINUS_EXPR)
2555 p1 = double_int_neg (p1);
2556 *mul = double_int_sext (double_int_add (p0, p1), precision);
2557 return true;
2558
2559 case INTEGER_CST:
2560 if (TREE_CODE (bot) != INTEGER_CST)
2561 return false;
2562
2563 p0 = double_int_sext (tree_to_double_int (top), precision);
2564 p1 = double_int_sext (tree_to_double_int (bot), precision);
2565 if (double_int_zero_p (p1))
2566 return false;
2567 *mul = double_int_sext (double_int_sdivmod (p0, p1, FLOOR_DIV_EXPR, &res),
2568 precision);
2569 return double_int_zero_p (res);
2570
2571 default:
2572 return false;
2573 }
2574 }
2575
2576 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2577 same precision that is at least as wide as the precision of TYPE, stores
2578 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
2579 type of A and B. */
2580
2581 static tree
2582 determine_common_wider_type (tree *a, tree *b)
2583 {
2584 tree wider_type = NULL;
2585 tree suba, subb;
2586 tree atype = TREE_TYPE (*a);
2587
2588 if ((TREE_CODE (*a) == NOP_EXPR
2589 || TREE_CODE (*a) == CONVERT_EXPR))
2590 {
2591 suba = TREE_OPERAND (*a, 0);
2592 wider_type = TREE_TYPE (suba);
2593 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
2594 return atype;
2595 }
2596 else
2597 return atype;
2598
2599 if ((TREE_CODE (*b) == NOP_EXPR
2600 || TREE_CODE (*b) == CONVERT_EXPR))
2601 {
2602 subb = TREE_OPERAND (*b, 0);
2603 if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
2604 return atype;
2605 }
2606 else
2607 return atype;
2608
2609 *a = suba;
2610 *b = subb;
2611 return wider_type;
2612 }
2613
2614 /* Determines the expression by that USE is expressed from induction variable
2615 CAND at statement AT in LOOP. The expression is stored in a decomposed
2616 form into AFF. Returns false if USE cannot be expressed using CAND. */
2617
2618 static bool
2619 get_computation_aff (struct loop *loop,
2620 struct iv_use *use, struct iv_cand *cand, tree at,
2621 struct affine_tree_combination *aff)
2622 {
2623 tree ubase = use->iv->base;
2624 tree ustep = use->iv->step;
2625 tree cbase = cand->iv->base;
2626 tree cstep = cand->iv->step, cstep_common;
2627 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
2628 tree common_type, var;
2629 tree uutype;
2630 aff_tree cbase_aff, var_aff;
2631 double_int rat;
2632
2633 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
2634 {
2635 /* We do not have a precision to express the values of use. */
2636 return false;
2637 }
2638
2639 var = var_at_stmt (loop, cand, at);
2640 uutype = unsigned_type_for (utype);
2641
2642 /* If the conversion is not noop, perform it. */
2643 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
2644 {
2645 cstep = fold_convert (uutype, cstep);
2646 cbase = fold_convert (uutype, cbase);
2647 var = fold_convert (uutype, var);
2648 }
2649
2650 if (!constant_multiple_of (ustep, cstep, &rat))
2651 return false;
2652
2653 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
2654 type, we achieve better folding by computing their difference in this
2655 wider type, and cast the result to UUTYPE. We do not need to worry about
2656 overflows, as all the arithmetics will in the end be performed in UUTYPE
2657 anyway. */
2658 common_type = determine_common_wider_type (&ubase, &cbase);
2659
2660 /* use = ubase - ratio * cbase + ratio * var. */
2661 tree_to_aff_combination (ubase, common_type, aff);
2662 tree_to_aff_combination (cbase, common_type, &cbase_aff);
2663 tree_to_aff_combination (var, uutype, &var_aff);
2664
2665 /* We need to shift the value if we are after the increment. */
2666 if (stmt_after_increment (loop, cand, at))
2667 {
2668 aff_tree cstep_aff;
2669
2670 if (common_type != uutype)
2671 cstep_common = fold_convert (common_type, cstep);
2672 else
2673 cstep_common = cstep;
2674
2675 tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
2676 aff_combination_add (&cbase_aff, &cstep_aff);
2677 }
2678
2679 aff_combination_scale (&cbase_aff, double_int_neg (rat));
2680 aff_combination_add (aff, &cbase_aff);
2681 if (common_type != uutype)
2682 aff_combination_convert (aff, uutype);
2683
2684 aff_combination_scale (&var_aff, rat);
2685 aff_combination_add (aff, &var_aff);
2686
2687 return true;
2688 }
2689
2690 /* Determines the expression by that USE is expressed from induction variable
2691 CAND at statement AT in LOOP. The computation is unshared. */
2692
2693 static tree
2694 get_computation_at (struct loop *loop,
2695 struct iv_use *use, struct iv_cand *cand, tree at)
2696 {
2697 aff_tree aff;
2698 tree type = TREE_TYPE (use->iv->base);
2699
2700 if (!get_computation_aff (loop, use, cand, at, &aff))
2701 return NULL_TREE;
2702 unshare_aff_combination (&aff);
2703 return fold_convert (type, aff_combination_to_tree (&aff));
2704 }
2705
2706 /* Determines the expression by that USE is expressed from induction variable
2707 CAND in LOOP. The computation is unshared. */
2708
2709 static tree
2710 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
2711 {
2712 return get_computation_at (loop, use, cand, use->stmt);
2713 }
2714
2715 /* Returns cost of addition in MODE. */
2716
2717 static unsigned
2718 add_cost (enum machine_mode mode)
2719 {
2720 static unsigned costs[NUM_MACHINE_MODES];
2721 rtx seq;
2722 unsigned cost;
2723
2724 if (costs[mode])
2725 return costs[mode];
2726
2727 start_sequence ();
2728 force_operand (gen_rtx_fmt_ee (PLUS, mode,
2729 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
2730 gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 2)),
2731 NULL_RTX);
2732 seq = get_insns ();
2733 end_sequence ();
2734
2735 cost = seq_cost (seq);
2736 if (!cost)
2737 cost = 1;
2738
2739 costs[mode] = cost;
2740
2741 if (dump_file && (dump_flags & TDF_DETAILS))
2742 fprintf (dump_file, "Addition in %s costs %d\n",
2743 GET_MODE_NAME (mode), cost);
2744 return cost;
2745 }
2746
2747 /* Entry in a hashtable of already known costs for multiplication. */
2748 struct mbc_entry
2749 {
2750 HOST_WIDE_INT cst; /* The constant to multiply by. */
2751 enum machine_mode mode; /* In mode. */
2752 unsigned cost; /* The cost. */
2753 };
2754
2755 /* Counts hash value for the ENTRY. */
2756
2757 static hashval_t
2758 mbc_entry_hash (const void *entry)
2759 {
2760 const struct mbc_entry *e = (const struct mbc_entry *) entry;
2761
2762 return 57 * (hashval_t) e->mode + (hashval_t) (e->cst % 877);
2763 }
2764
2765 /* Compares the hash table entries ENTRY1 and ENTRY2. */
2766
2767 static int
2768 mbc_entry_eq (const void *entry1, const void *entry2)
2769 {
2770 const struct mbc_entry *e1 = (const struct mbc_entry *) entry1;
2771 const struct mbc_entry *e2 = (const struct mbc_entry *) entry2;
2772
2773 return (e1->mode == e2->mode
2774 && e1->cst == e2->cst);
2775 }
2776
2777 /* Returns cost of multiplication by constant CST in MODE. */
2778
2779 unsigned
2780 multiply_by_cost (HOST_WIDE_INT cst, enum machine_mode mode)
2781 {
2782 static htab_t costs;
2783 struct mbc_entry **cached, act;
2784 rtx seq;
2785 unsigned cost;
2786
2787 if (!costs)
2788 costs = htab_create (100, mbc_entry_hash, mbc_entry_eq, free);
2789
2790 act.mode = mode;
2791 act.cst = cst;
2792 cached = (struct mbc_entry **) htab_find_slot (costs, &act, INSERT);
2793 if (*cached)
2794 return (*cached)->cost;
2795
2796 *cached = XNEW (struct mbc_entry);
2797 (*cached)->mode = mode;
2798 (*cached)->cst = cst;
2799
2800 start_sequence ();
2801 expand_mult (mode, gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1),
2802 gen_int_mode (cst, mode), NULL_RTX, 0);
2803 seq = get_insns ();
2804 end_sequence ();
2805
2806 cost = seq_cost (seq);
2807
2808 if (dump_file && (dump_flags & TDF_DETAILS))
2809 fprintf (dump_file, "Multiplication by %d in %s costs %d\n",
2810 (int) cst, GET_MODE_NAME (mode), cost);
2811
2812 (*cached)->cost = cost;
2813
2814 return cost;
2815 }
2816
2817 /* Returns true if multiplying by RATIO is allowed in an address. Test the
2818 validity for a memory reference accessing memory of mode MODE. */
2819
2820 bool
2821 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode)
2822 {
2823 #define MAX_RATIO 128
2824 static sbitmap valid_mult[MAX_MACHINE_MODE];
2825
2826 if (!valid_mult[mode])
2827 {
2828 rtx reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
2829 rtx addr;
2830 HOST_WIDE_INT i;
2831
2832 valid_mult[mode] = sbitmap_alloc (2 * MAX_RATIO + 1);
2833 sbitmap_zero (valid_mult[mode]);
2834 addr = gen_rtx_fmt_ee (MULT, Pmode, reg1, NULL_RTX);
2835 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2836 {
2837 XEXP (addr, 1) = gen_int_mode (i, Pmode);
2838 if (memory_address_p (mode, addr))
2839 SET_BIT (valid_mult[mode], i + MAX_RATIO);
2840 }
2841
2842 if (dump_file && (dump_flags & TDF_DETAILS))
2843 {
2844 fprintf (dump_file, " allowed multipliers:");
2845 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
2846 if (TEST_BIT (valid_mult[mode], i + MAX_RATIO))
2847 fprintf (dump_file, " %d", (int) i);
2848 fprintf (dump_file, "\n");
2849 fprintf (dump_file, "\n");
2850 }
2851 }
2852
2853 if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
2854 return false;
2855
2856 return TEST_BIT (valid_mult[mode], ratio + MAX_RATIO);
2857 }
2858
2859 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
2860 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
2861 variable is omitted. Compute the cost for a memory reference that accesses
2862 a memory location of mode MEM_MODE.
2863
2864 TODO -- there must be some better way. This all is quite crude. */
2865
2866 static unsigned
2867 get_address_cost (bool symbol_present, bool var_present,
2868 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
2869 enum machine_mode mem_mode)
2870 {
2871 static bool initialized[MAX_MACHINE_MODE];
2872 static HOST_WIDE_INT rat[MAX_MACHINE_MODE], off[MAX_MACHINE_MODE];
2873 static HOST_WIDE_INT min_offset[MAX_MACHINE_MODE], max_offset[MAX_MACHINE_MODE];
2874 static unsigned costs[MAX_MACHINE_MODE][2][2][2][2];
2875 unsigned cost, acost;
2876 bool offset_p, ratio_p;
2877 HOST_WIDE_INT s_offset;
2878 unsigned HOST_WIDE_INT mask;
2879 unsigned bits;
2880
2881 if (!initialized[mem_mode])
2882 {
2883 HOST_WIDE_INT i;
2884 HOST_WIDE_INT start = BIGGEST_ALIGNMENT / BITS_PER_UNIT;
2885 int old_cse_not_expected;
2886 unsigned sym_p, var_p, off_p, rat_p, add_c;
2887 rtx seq, addr, base;
2888 rtx reg0, reg1;
2889
2890 initialized[mem_mode] = true;
2891
2892 reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
2893
2894 addr = gen_rtx_fmt_ee (PLUS, Pmode, reg1, NULL_RTX);
2895 for (i = start; i <= 1 << 20; i <<= 1)
2896 {
2897 XEXP (addr, 1) = gen_int_mode (i, Pmode);
2898 if (!memory_address_p (mem_mode, addr))
2899 break;
2900 }
2901 max_offset[mem_mode] = i == start ? 0 : i >> 1;
2902 off[mem_mode] = max_offset[mem_mode];
2903
2904 for (i = start; i <= 1 << 20; i <<= 1)
2905 {
2906 XEXP (addr, 1) = gen_int_mode (-i, Pmode);
2907 if (!memory_address_p (mem_mode, addr))
2908 break;
2909 }
2910 min_offset[mem_mode] = i == start ? 0 : -(i >> 1);
2911
2912 if (dump_file && (dump_flags & TDF_DETAILS))
2913 {
2914 fprintf (dump_file, "get_address_cost:\n");
2915 fprintf (dump_file, " min offset %s %d\n",
2916 GET_MODE_NAME (mem_mode),
2917 (int) min_offset[mem_mode]);
2918 fprintf (dump_file, " max offset %s %d\n",
2919 GET_MODE_NAME (mem_mode),
2920 (int) max_offset[mem_mode]);
2921 }
2922
2923 rat[mem_mode] = 1;
2924 for (i = 2; i <= MAX_RATIO; i++)
2925 if (multiplier_allowed_in_address_p (i, mem_mode))
2926 {
2927 rat[mem_mode] = i;
2928 break;
2929 }
2930
2931 /* Compute the cost of various addressing modes. */
2932 acost = 0;
2933 reg0 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 1);
2934 reg1 = gen_raw_REG (Pmode, LAST_VIRTUAL_REGISTER + 2);
2935
2936 for (i = 0; i < 16; i++)
2937 {
2938 sym_p = i & 1;
2939 var_p = (i >> 1) & 1;
2940 off_p = (i >> 2) & 1;
2941 rat_p = (i >> 3) & 1;
2942
2943 addr = reg0;
2944 if (rat_p)
2945 addr = gen_rtx_fmt_ee (MULT, Pmode, addr,
2946 gen_int_mode (rat[mem_mode], Pmode));
2947
2948 if (var_p)
2949 addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, reg1);
2950
2951 if (sym_p)
2952 {
2953 base = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (""));
2954 /* ??? We can run into trouble with some backends by presenting
2955 it with symbols which havn't been properly passed through
2956 targetm.encode_section_info. By setting the local bit, we
2957 enhance the probability of things working. */
2958 SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
2959
2960 if (off_p)
2961 base = gen_rtx_fmt_e (CONST, Pmode,
2962 gen_rtx_fmt_ee (PLUS, Pmode,
2963 base,
2964 gen_int_mode (off[mem_mode],
2965 Pmode)));
2966 }
2967 else if (off_p)
2968 base = gen_int_mode (off[mem_mode], Pmode);
2969 else
2970 base = NULL_RTX;
2971
2972 if (base)
2973 addr = gen_rtx_fmt_ee (PLUS, Pmode, addr, base);
2974
2975 start_sequence ();
2976 /* To avoid splitting addressing modes, pretend that no cse will
2977 follow. */
2978 old_cse_not_expected = cse_not_expected;
2979 cse_not_expected = true;
2980 addr = memory_address (mem_mode, addr);
2981 cse_not_expected = old_cse_not_expected;
2982 seq = get_insns ();
2983 end_sequence ();
2984
2985 acost = seq_cost (seq);
2986 acost += address_cost (addr, mem_mode);
2987
2988 if (!acost)
2989 acost = 1;
2990 costs[mem_mode][sym_p][var_p][off_p][rat_p] = acost;
2991 }
2992
2993 /* On some targets, it is quite expensive to load symbol to a register,
2994 which makes addresses that contain symbols look much more expensive.
2995 However, the symbol will have to be loaded in any case before the
2996 loop (and quite likely we have it in register already), so it does not
2997 make much sense to penalize them too heavily. So make some final
2998 tweaks for the SYMBOL_PRESENT modes:
2999
3000 If VAR_PRESENT is false, and the mode obtained by changing symbol to
3001 var is cheaper, use this mode with small penalty.
3002 If VAR_PRESENT is true, try whether the mode with
3003 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3004 if this is the case, use it. */
3005 add_c = add_cost (Pmode);
3006 for (i = 0; i < 8; i++)
3007 {
3008 var_p = i & 1;
3009 off_p = (i >> 1) & 1;
3010 rat_p = (i >> 2) & 1;
3011
3012 acost = costs[mem_mode][0][1][off_p][rat_p] + 1;
3013 if (var_p)
3014 acost += add_c;
3015
3016 if (acost < costs[mem_mode][1][var_p][off_p][rat_p])
3017 costs[mem_mode][1][var_p][off_p][rat_p] = acost;
3018 }
3019
3020 if (dump_file && (dump_flags & TDF_DETAILS))
3021 {
3022 fprintf (dump_file, "Address costs:\n");
3023
3024 for (i = 0; i < 16; i++)
3025 {
3026 sym_p = i & 1;
3027 var_p = (i >> 1) & 1;
3028 off_p = (i >> 2) & 1;
3029 rat_p = (i >> 3) & 1;
3030
3031 fprintf (dump_file, " ");
3032 if (sym_p)
3033 fprintf (dump_file, "sym + ");
3034 if (var_p)
3035 fprintf (dump_file, "var + ");
3036 if (off_p)
3037 fprintf (dump_file, "cst + ");
3038 if (rat_p)
3039 fprintf (dump_file, "rat * ");
3040
3041 acost = costs[mem_mode][sym_p][var_p][off_p][rat_p];
3042 fprintf (dump_file, "index costs %d\n", acost);
3043 }
3044 fprintf (dump_file, "\n");
3045 }
3046 }
3047
3048 bits = GET_MODE_BITSIZE (Pmode);
3049 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3050 offset &= mask;
3051 if ((offset >> (bits - 1) & 1))
3052 offset |= ~mask;
3053 s_offset = offset;
3054
3055 cost = 0;
3056 offset_p = (s_offset != 0
3057 && min_offset[mem_mode] <= s_offset
3058 && s_offset <= max_offset[mem_mode]);
3059 ratio_p = (ratio != 1
3060 && multiplier_allowed_in_address_p (ratio, mem_mode));
3061
3062 if (ratio != 1 && !ratio_p)
3063 cost += multiply_by_cost (ratio, Pmode);
3064
3065 if (s_offset && !offset_p && !symbol_present)
3066 cost += add_cost (Pmode);
3067
3068 acost = costs[mem_mode][symbol_present][var_present][offset_p][ratio_p];
3069 return cost + acost;
3070 }
3071
3072 /* Estimates cost of forcing expression EXPR into a variable. */
3073
3074 unsigned
3075 force_expr_to_var_cost (tree expr)
3076 {
3077 static bool costs_initialized = false;
3078 static unsigned integer_cost;
3079 static unsigned symbol_cost;
3080 static unsigned address_cost;
3081 tree op0, op1;
3082 unsigned cost0, cost1, cost;
3083 enum machine_mode mode;
3084
3085 if (!costs_initialized)
3086 {
3087 tree type = build_pointer_type (integer_type_node);
3088 tree var, addr;
3089 rtx x;
3090
3091 var = create_tmp_var_raw (integer_type_node, "test_var");
3092 TREE_STATIC (var) = 1;
3093 x = produce_memory_decl_rtl (var, NULL);
3094 SET_DECL_RTL (var, x);
3095
3096 integer_cost = computation_cost (build_int_cst (integer_type_node,
3097 2000));
3098
3099 addr = build1 (ADDR_EXPR, type, var);
3100 symbol_cost = computation_cost (addr) + 1;
3101
3102 address_cost
3103 = computation_cost (build2 (PLUS_EXPR, type,
3104 addr,
3105 build_int_cst (type, 2000))) + 1;
3106 if (dump_file && (dump_flags & TDF_DETAILS))
3107 {
3108 fprintf (dump_file, "force_expr_to_var_cost:\n");
3109 fprintf (dump_file, " integer %d\n", (int) integer_cost);
3110 fprintf (dump_file, " symbol %d\n", (int) symbol_cost);
3111 fprintf (dump_file, " address %d\n", (int) address_cost);
3112 fprintf (dump_file, " other %d\n", (int) target_spill_cost);
3113 fprintf (dump_file, "\n");
3114 }
3115
3116 costs_initialized = true;
3117 }
3118
3119 STRIP_NOPS (expr);
3120
3121 if (SSA_VAR_P (expr))
3122 return 0;
3123
3124 if (TREE_INVARIANT (expr))
3125 {
3126 if (TREE_CODE (expr) == INTEGER_CST)
3127 return integer_cost;
3128
3129 if (TREE_CODE (expr) == ADDR_EXPR)
3130 {
3131 tree obj = TREE_OPERAND (expr, 0);
3132
3133 if (TREE_CODE (obj) == VAR_DECL
3134 || TREE_CODE (obj) == PARM_DECL
3135 || TREE_CODE (obj) == RESULT_DECL)
3136 return symbol_cost;
3137 }
3138
3139 return address_cost;
3140 }
3141
3142 switch (TREE_CODE (expr))
3143 {
3144 case PLUS_EXPR:
3145 case MINUS_EXPR:
3146 case MULT_EXPR:
3147 op0 = TREE_OPERAND (expr, 0);
3148 op1 = TREE_OPERAND (expr, 1);
3149 STRIP_NOPS (op0);
3150 STRIP_NOPS (op1);
3151
3152 if (is_gimple_val (op0))
3153 cost0 = 0;
3154 else
3155 cost0 = force_expr_to_var_cost (op0);
3156
3157 if (is_gimple_val (op1))
3158 cost1 = 0;
3159 else
3160 cost1 = force_expr_to_var_cost (op1);
3161
3162 break;
3163
3164 default:
3165 /* Just an arbitrary value, FIXME. */
3166 return target_spill_cost;
3167 }
3168
3169 mode = TYPE_MODE (TREE_TYPE (expr));
3170 switch (TREE_CODE (expr))
3171 {
3172 case PLUS_EXPR:
3173 case MINUS_EXPR:
3174 cost = add_cost (mode);
3175 break;
3176
3177 case MULT_EXPR:
3178 if (cst_and_fits_in_hwi (op0))
3179 cost = multiply_by_cost (int_cst_value (op0), mode);
3180 else if (cst_and_fits_in_hwi (op1))
3181 cost = multiply_by_cost (int_cst_value (op1), mode);
3182 else
3183 return target_spill_cost;
3184 break;
3185
3186 default:
3187 gcc_unreachable ();
3188 }
3189
3190 cost += cost0;
3191 cost += cost1;
3192
3193 /* Bound the cost by target_spill_cost. The parts of complicated
3194 computations often are either loop invariant or at least can
3195 be shared between several iv uses, so letting this grow without
3196 limits would not give reasonable results. */
3197 return cost < target_spill_cost ? cost : target_spill_cost;
3198 }
3199
3200 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
3201 invariants the computation depends on. */
3202
3203 static unsigned
3204 force_var_cost (struct ivopts_data *data,
3205 tree expr, bitmap *depends_on)
3206 {
3207 if (depends_on)
3208 {
3209 fd_ivopts_data = data;
3210 walk_tree (&expr, find_depends, depends_on, NULL);
3211 }
3212
3213 return force_expr_to_var_cost (expr);
3214 }
3215
3216 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
3217 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3218 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3219 invariants the computation depends on. */
3220
3221 static unsigned
3222 split_address_cost (struct ivopts_data *data,
3223 tree addr, bool *symbol_present, bool *var_present,
3224 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3225 {
3226 tree core;
3227 HOST_WIDE_INT bitsize;
3228 HOST_WIDE_INT bitpos;
3229 tree toffset;
3230 enum machine_mode mode;
3231 int unsignedp, volatilep;
3232
3233 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3234 &unsignedp, &volatilep, false);
3235
3236 if (toffset != 0
3237 || bitpos % BITS_PER_UNIT != 0
3238 || TREE_CODE (core) != VAR_DECL)
3239 {
3240 *symbol_present = false;
3241 *var_present = true;
3242 fd_ivopts_data = data;
3243 walk_tree (&addr, find_depends, depends_on, NULL);
3244 return target_spill_cost;
3245 }
3246
3247 *offset += bitpos / BITS_PER_UNIT;
3248 if (TREE_STATIC (core)
3249 || DECL_EXTERNAL (core))
3250 {
3251 *symbol_present = true;
3252 *var_present = false;
3253 return 0;
3254 }
3255
3256 *symbol_present = false;
3257 *var_present = true;
3258 return 0;
3259 }
3260
3261 /* Estimates cost of expressing difference of addresses E1 - E2 as
3262 var + symbol + offset. The value of offset is added to OFFSET,
3263 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3264 part is missing. DEPENDS_ON is a set of the invariants the computation
3265 depends on. */
3266
3267 static unsigned
3268 ptr_difference_cost (struct ivopts_data *data,
3269 tree e1, tree e2, bool *symbol_present, bool *var_present,
3270 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3271 {
3272 HOST_WIDE_INT diff = 0;
3273 unsigned cost;
3274
3275 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3276
3277 if (ptr_difference_const (e1, e2, &diff))
3278 {
3279 *offset += diff;
3280 *symbol_present = false;
3281 *var_present = false;
3282 return 0;
3283 }
3284
3285 if (e2 == integer_zero_node)
3286 return split_address_cost (data, TREE_OPERAND (e1, 0),
3287 symbol_present, var_present, offset, depends_on);
3288
3289 *symbol_present = false;
3290 *var_present = true;
3291
3292 cost = force_var_cost (data, e1, depends_on);
3293 cost += force_var_cost (data, e2, depends_on);
3294 cost += add_cost (Pmode);
3295
3296 return cost;
3297 }
3298
3299 /* Estimates cost of expressing difference E1 - E2 as
3300 var + symbol + offset. The value of offset is added to OFFSET,
3301 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3302 part is missing. DEPENDS_ON is a set of the invariants the computation
3303 depends on. */
3304
3305 static unsigned
3306 difference_cost (struct ivopts_data *data,
3307 tree e1, tree e2, bool *symbol_present, bool *var_present,
3308 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3309 {
3310 unsigned cost;
3311 enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3312 unsigned HOST_WIDE_INT off1, off2;
3313
3314 e1 = strip_offset (e1, &off1);
3315 e2 = strip_offset (e2, &off2);
3316 *offset += off1 - off2;
3317
3318 STRIP_NOPS (e1);
3319 STRIP_NOPS (e2);
3320
3321 if (TREE_CODE (e1) == ADDR_EXPR)
3322 return ptr_difference_cost (data, e1, e2, symbol_present, var_present, offset,
3323 depends_on);
3324 *symbol_present = false;
3325
3326 if (operand_equal_p (e1, e2, 0))
3327 {
3328 *var_present = false;
3329 return 0;
3330 }
3331 *var_present = true;
3332 if (integer_zerop (e2))
3333 return force_var_cost (data, e1, depends_on);
3334
3335 if (integer_zerop (e1))
3336 {
3337 cost = force_var_cost (data, e2, depends_on);
3338 cost += multiply_by_cost (-1, mode);
3339
3340 return cost;
3341 }
3342
3343 cost = force_var_cost (data, e1, depends_on);
3344 cost += force_var_cost (data, e2, depends_on);
3345 cost += add_cost (mode);
3346
3347 return cost;
3348 }
3349
3350 /* Determines the cost of the computation by that USE is expressed
3351 from induction variable CAND. If ADDRESS_P is true, we just need
3352 to create an address from it, otherwise we want to get it into
3353 register. A set of invariants we depend on is stored in
3354 DEPENDS_ON. AT is the statement at that the value is computed. */
3355
3356 static unsigned
3357 get_computation_cost_at (struct ivopts_data *data,
3358 struct iv_use *use, struct iv_cand *cand,
3359 bool address_p, bitmap *depends_on, tree at)
3360 {
3361 tree ubase = use->iv->base, ustep = use->iv->step;
3362 tree cbase, cstep;
3363 tree utype = TREE_TYPE (ubase), ctype;
3364 unsigned HOST_WIDE_INT cstepi, offset = 0;
3365 HOST_WIDE_INT ratio, aratio;
3366 bool var_present, symbol_present;
3367 unsigned cost = 0, n_sums;
3368 double_int rat;
3369
3370 *depends_on = NULL;
3371
3372 /* Only consider real candidates. */
3373 if (!cand->iv)
3374 return INFTY;
3375
3376 cbase = cand->iv->base;
3377 cstep = cand->iv->step;
3378 ctype = TREE_TYPE (cbase);
3379
3380 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3381 {
3382 /* We do not have a precision to express the values of use. */
3383 return INFTY;
3384 }
3385
3386 if (address_p)
3387 {
3388 /* Do not try to express address of an object with computation based
3389 on address of a different object. This may cause problems in rtl
3390 level alias analysis (that does not expect this to be happening,
3391 as this is illegal in C), and would be unlikely to be useful
3392 anyway. */
3393 if (use->iv->base_object
3394 && cand->iv->base_object
3395 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
3396 return INFTY;
3397 }
3398
3399 if (TYPE_PRECISION (utype) != TYPE_PRECISION (ctype))
3400 {
3401 /* TODO -- add direct handling of this case. */
3402 goto fallback;
3403 }
3404
3405 /* CSTEPI is removed from the offset in case statement is after the
3406 increment. If the step is not constant, we use zero instead.
3407 This is a bit imprecise (there is the extra addition), but
3408 redundancy elimination is likely to transform the code so that
3409 it uses value of the variable before increment anyway,
3410 so it is not that much unrealistic. */
3411 if (cst_and_fits_in_hwi (cstep))
3412 cstepi = int_cst_value (cstep);
3413 else
3414 cstepi = 0;
3415
3416 if (!constant_multiple_of (ustep, cstep, &rat))
3417 return INFTY;
3418
3419 if (double_int_fits_in_shwi_p (rat))
3420 ratio = double_int_to_shwi (rat);
3421 else
3422 return INFTY;
3423
3424 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
3425 or ratio == 1, it is better to handle this like
3426
3427 ubase - ratio * cbase + ratio * var
3428
3429 (also holds in the case ratio == -1, TODO. */
3430
3431 if (cst_and_fits_in_hwi (cbase))
3432 {
3433 offset = - ratio * int_cst_value (cbase);
3434 cost += difference_cost (data,
3435 ubase, integer_zero_node,
3436 &symbol_present, &var_present, &offset,
3437 depends_on);
3438 }
3439 else if (ratio == 1)
3440 {
3441 cost += difference_cost (data,
3442 ubase, cbase,
3443 &symbol_present, &var_present, &offset,
3444 depends_on);
3445 }
3446 else
3447 {
3448 cost += force_var_cost (data, cbase, depends_on);
3449 cost += add_cost (TYPE_MODE (ctype));
3450 cost += difference_cost (data,
3451 ubase, integer_zero_node,
3452 &symbol_present, &var_present, &offset,
3453 depends_on);
3454 }
3455
3456 /* If we are after the increment, the value of the candidate is higher by
3457 one iteration. */
3458 if (stmt_after_increment (data->current_loop, cand, at))
3459 offset -= ratio * cstepi;
3460
3461 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
3462 (symbol/var/const parts may be omitted). If we are looking for an address,
3463 find the cost of addressing this. */
3464 if (address_p)
3465 return cost + get_address_cost (symbol_present, var_present, offset, ratio,
3466 TYPE_MODE (TREE_TYPE (*use->op_p)));
3467
3468 /* Otherwise estimate the costs for computing the expression. */
3469 aratio = ratio > 0 ? ratio : -ratio;
3470 if (!symbol_present && !var_present && !offset)
3471 {
3472 if (ratio != 1)
3473 cost += multiply_by_cost (ratio, TYPE_MODE (ctype));
3474
3475 return cost;
3476 }
3477
3478 if (aratio != 1)
3479 cost += multiply_by_cost (aratio, TYPE_MODE (ctype));
3480
3481 n_sums = 1;
3482 if (var_present
3483 /* Symbol + offset should be compile-time computable. */
3484 && (symbol_present || offset))
3485 n_sums++;
3486
3487 return cost + n_sums * add_cost (TYPE_MODE (ctype));
3488
3489 fallback:
3490 {
3491 /* Just get the expression, expand it and measure the cost. */
3492 tree comp = get_computation_at (data->current_loop, use, cand, at);
3493
3494 if (!comp)
3495 return INFTY;
3496
3497 if (address_p)
3498 comp = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (comp)), comp);
3499
3500 return computation_cost (comp);
3501 }
3502 }
3503
3504 /* Determines the cost of the computation by that USE is expressed
3505 from induction variable CAND. If ADDRESS_P is true, we just need
3506 to create an address from it, otherwise we want to get it into
3507 register. A set of invariants we depend on is stored in
3508 DEPENDS_ON. */
3509
3510 static unsigned
3511 get_computation_cost (struct ivopts_data *data,
3512 struct iv_use *use, struct iv_cand *cand,
3513 bool address_p, bitmap *depends_on)
3514 {
3515 return get_computation_cost_at (data,
3516 use, cand, address_p, depends_on, use->stmt);
3517 }
3518
3519 /* Determines cost of basing replacement of USE on CAND in a generic
3520 expression. */
3521
3522 static bool
3523 determine_use_iv_cost_generic (struct ivopts_data *data,
3524 struct iv_use *use, struct iv_cand *cand)
3525 {
3526 bitmap depends_on;
3527 unsigned cost;
3528
3529 /* The simple case first -- if we need to express value of the preserved
3530 original biv, the cost is 0. This also prevents us from counting the
3531 cost of increment twice -- once at this use and once in the cost of
3532 the candidate. */
3533 if (cand->pos == IP_ORIGINAL
3534 && cand->incremented_at == use->stmt)
3535 {
3536 set_use_iv_cost (data, use, cand, 0, NULL, NULL_TREE);
3537 return true;
3538 }
3539
3540 cost = get_computation_cost (data, use, cand, false, &depends_on);
3541 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3542
3543 return cost != INFTY;
3544 }
3545
3546 /* Determines cost of basing replacement of USE on CAND in an address. */
3547
3548 static bool
3549 determine_use_iv_cost_address (struct ivopts_data *data,
3550 struct iv_use *use, struct iv_cand *cand)
3551 {
3552 bitmap depends_on;
3553 unsigned cost = get_computation_cost (data, use, cand, true, &depends_on);
3554
3555 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE);
3556
3557 return cost != INFTY;
3558 }
3559
3560 /* Computes value of candidate CAND at position AT in iteration NITER, and
3561 stores it to VAL. */
3562
3563 static void
3564 cand_value_at (struct loop *loop, struct iv_cand *cand, tree at, tree niter,
3565 aff_tree *val)
3566 {
3567 aff_tree step, delta, nit;
3568 struct iv *iv = cand->iv;
3569 tree type = TREE_TYPE (iv->base);
3570
3571 tree_to_aff_combination (iv->step, type, &step);
3572 tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
3573 aff_combination_convert (&nit, type);
3574 aff_combination_mult (&nit, &step, &delta);
3575 if (stmt_after_increment (loop, cand, at))
3576 aff_combination_add (&delta, &step);
3577
3578 tree_to_aff_combination (iv->base, type, val);
3579 aff_combination_add (val, &delta);
3580 }
3581
3582 /* Returns period of induction variable iv. */
3583
3584 static tree
3585 iv_period (struct iv *iv)
3586 {
3587 tree step = iv->step, period, type;
3588 tree pow2div;
3589
3590 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
3591
3592 /* Period of the iv is gcd (step, type range). Since type range is power
3593 of two, it suffices to determine the maximum power of two that divides
3594 step. */
3595 pow2div = num_ending_zeros (step);
3596 type = unsigned_type_for (TREE_TYPE (step));
3597
3598 period = build_low_bits_mask (type,
3599 (TYPE_PRECISION (type)
3600 - tree_low_cst (pow2div, 1)));
3601
3602 return period;
3603 }
3604
3605 /* Returns the comparison operator used when eliminating the iv USE. */
3606
3607 static enum tree_code
3608 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
3609 {
3610 struct loop *loop = data->current_loop;
3611 basic_block ex_bb;
3612 edge exit;
3613
3614 ex_bb = bb_for_stmt (use->stmt);
3615 exit = EDGE_SUCC (ex_bb, 0);
3616 if (flow_bb_inside_loop_p (loop, exit->dest))
3617 exit = EDGE_SUCC (ex_bb, 1);
3618
3619 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
3620 }
3621
3622 /* Check whether it is possible to express the condition in USE by comparison
3623 of candidate CAND. If so, store the value compared with to BOUND. */
3624
3625 static bool
3626 may_eliminate_iv (struct ivopts_data *data,
3627 struct iv_use *use, struct iv_cand *cand, tree *bound)
3628 {
3629 basic_block ex_bb;
3630 edge exit;
3631 tree nit, nit_type;
3632 tree wider_type, period, per_type;
3633 struct loop *loop = data->current_loop;
3634 aff_tree bnd;
3635
3636 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
3637 return false;
3638
3639 /* For now works only for exits that dominate the loop latch. TODO -- extend
3640 for other conditions inside loop body. */
3641 ex_bb = bb_for_stmt (use->stmt);
3642 if (use->stmt != last_stmt (ex_bb)
3643 || TREE_CODE (use->stmt) != COND_EXPR)
3644 return false;
3645 if (!dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
3646 return false;
3647
3648 exit = EDGE_SUCC (ex_bb, 0);
3649 if (flow_bb_inside_loop_p (loop, exit->dest))
3650 exit = EDGE_SUCC (ex_bb, 1);
3651 if (flow_bb_inside_loop_p (loop, exit->dest))
3652 return false;
3653
3654 nit = niter_for_exit (data, exit);
3655 if (!nit)
3656 return false;
3657
3658 nit_type = TREE_TYPE (nit);
3659
3660 /* Determine whether we may use the variable to test whether niter iterations
3661 elapsed. This is the case iff the period of the induction variable is
3662 greater than the number of iterations. */
3663 period = iv_period (cand->iv);
3664 if (!period)
3665 return false;
3666 per_type = TREE_TYPE (period);
3667
3668 wider_type = TREE_TYPE (period);
3669 if (TYPE_PRECISION (nit_type) < TYPE_PRECISION (per_type))
3670 wider_type = per_type;
3671 else
3672 wider_type = nit_type;
3673
3674 if (!integer_nonzerop (fold_build2 (GE_EXPR, boolean_type_node,
3675 fold_convert (wider_type, period),
3676 fold_convert (wider_type, nit))))
3677 return false;
3678
3679 cand_value_at (loop, cand, use->stmt, nit, &bnd);
3680 *bound = aff_combination_to_tree (&bnd);
3681 return true;
3682 }
3683
3684 /* Determines cost of basing replacement of USE on CAND in a condition. */
3685
3686 static bool
3687 determine_use_iv_cost_condition (struct ivopts_data *data,
3688 struct iv_use *use, struct iv_cand *cand)
3689 {
3690 tree bound = NULL_TREE;
3691 struct iv *cmp_iv;
3692 bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
3693 unsigned elim_cost, express_cost, cost;
3694 bool ok;
3695
3696 /* Only consider real candidates. */
3697 if (!cand->iv)
3698 {
3699 set_use_iv_cost (data, use, cand, INFTY, NULL, NULL_TREE);
3700 return false;
3701 }
3702
3703 /* Try iv elimination. */
3704 if (may_eliminate_iv (data, use, cand, &bound))
3705 elim_cost = force_var_cost (data, bound, &depends_on_elim);
3706 else
3707 elim_cost = INFTY;
3708
3709 /* Try expressing the original giv. If it is compared with an invariant,
3710 note that we cannot get rid of it. */
3711 ok = extract_cond_operands (data, use->op_p, NULL, NULL, NULL, &cmp_iv);
3712 gcc_assert (ok);
3713
3714 express_cost = get_computation_cost (data, use, cand, false,
3715 &depends_on_express);
3716 fd_ivopts_data = data;
3717 walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
3718
3719 /* Choose the better approach. */
3720 if (elim_cost < express_cost)
3721 {
3722 cost = elim_cost;
3723 depends_on = depends_on_elim;
3724 depends_on_elim = NULL;
3725 }
3726 else
3727 {
3728 cost = express_cost;
3729 depends_on = depends_on_express;
3730 depends_on_express = NULL;
3731 bound = NULL_TREE;
3732 }
3733
3734 set_use_iv_cost (data, use, cand, cost, depends_on, bound);
3735
3736 if (depends_on_elim)
3737 BITMAP_FREE (depends_on_elim);
3738 if (depends_on_express)
3739 BITMAP_FREE (depends_on_express);
3740
3741 return cost != INFTY;
3742 }
3743
3744 /* Determines cost of basing replacement of USE on CAND. Returns false
3745 if USE cannot be based on CAND. */
3746
3747 static bool
3748 determine_use_iv_cost (struct ivopts_data *data,
3749 struct iv_use *use, struct iv_cand *cand)
3750 {
3751 switch (use->type)
3752 {
3753 case USE_NONLINEAR_EXPR:
3754 return determine_use_iv_cost_generic (data, use, cand);
3755
3756 case USE_ADDRESS:
3757 return determine_use_iv_cost_address (data, use, cand);
3758
3759 case USE_COMPARE:
3760 return determine_use_iv_cost_condition (data, use, cand);
3761
3762 default:
3763 gcc_unreachable ();
3764 }
3765 }
3766
3767 /* Determines costs of basing the use of the iv on an iv candidate. */
3768
3769 static void
3770 determine_use_iv_costs (struct ivopts_data *data)
3771 {
3772 unsigned i, j;
3773 struct iv_use *use;
3774 struct iv_cand *cand;
3775 bitmap to_clear = BITMAP_ALLOC (NULL);
3776
3777 alloc_use_cost_map (data);
3778
3779 for (i = 0; i < n_iv_uses (data); i++)
3780 {
3781 use = iv_use (data, i);
3782
3783 if (data->consider_all_candidates)
3784 {
3785 for (j = 0; j < n_iv_cands (data); j++)
3786 {
3787 cand = iv_cand (data, j);
3788 determine_use_iv_cost (data, use, cand);
3789 }
3790 }
3791 else
3792 {
3793 bitmap_iterator bi;
3794
3795 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
3796 {
3797 cand = iv_cand (data, j);
3798 if (!determine_use_iv_cost (data, use, cand))
3799 bitmap_set_bit (to_clear, j);
3800 }
3801
3802 /* Remove the candidates for that the cost is infinite from
3803 the list of related candidates. */
3804 bitmap_and_compl_into (use->related_cands, to_clear);
3805 bitmap_clear (to_clear);
3806 }
3807 }
3808
3809 BITMAP_FREE (to_clear);
3810
3811 if (dump_file && (dump_flags & TDF_DETAILS))
3812 {
3813 fprintf (dump_file, "Use-candidate costs:\n");
3814
3815 for (i = 0; i < n_iv_uses (data); i++)
3816 {
3817 use = iv_use (data, i);
3818
3819 fprintf (dump_file, "Use %d:\n", i);
3820 fprintf (dump_file, " cand\tcost\tdepends on\n");
3821 for (j = 0; j < use->n_map_members; j++)
3822 {
3823 if (!use->cost_map[j].cand
3824 || use->cost_map[j].cost == INFTY)
3825 continue;
3826
3827 fprintf (dump_file, " %d\t%d\t",
3828 use->cost_map[j].cand->id,
3829 use->cost_map[j].cost);
3830 if (use->cost_map[j].depends_on)
3831 bitmap_print (dump_file,
3832 use->cost_map[j].depends_on, "","");
3833 fprintf (dump_file, "\n");
3834 }
3835
3836 fprintf (dump_file, "\n");
3837 }
3838 fprintf (dump_file, "\n");
3839 }
3840 }
3841
3842 /* Determines cost of the candidate CAND. */
3843
3844 static void
3845 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
3846 {
3847 unsigned cost_base, cost_step;
3848 tree base;
3849
3850 if (!cand->iv)
3851 {
3852 cand->cost = 0;
3853 return;
3854 }
3855
3856 /* There are two costs associated with the candidate -- its increment
3857 and its initialization. The second is almost negligible for any loop
3858 that rolls enough, so we take it just very little into account. */
3859
3860 base = cand->iv->base;
3861 cost_base = force_var_cost (data, base, NULL);
3862 cost_step = add_cost (TYPE_MODE (TREE_TYPE (base)));
3863
3864 cand->cost = cost_step + cost_base / AVG_LOOP_NITER (current_loop);
3865
3866 /* Prefer the original iv unless we may gain something by replacing it;
3867 this is not really relevant for artificial ivs created by other
3868 passes. */
3869 if (cand->pos == IP_ORIGINAL
3870 && !DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
3871 cand->cost--;
3872
3873 /* Prefer not to insert statements into latch unless there are some
3874 already (so that we do not create unnecessary jumps). */
3875 if (cand->pos == IP_END
3876 && empty_block_p (ip_end_pos (data->current_loop)))
3877 cand->cost++;
3878 }
3879
3880 /* Determines costs of computation of the candidates. */
3881
3882 static void
3883 determine_iv_costs (struct ivopts_data *data)
3884 {
3885 unsigned i;
3886
3887 if (dump_file && (dump_flags & TDF_DETAILS))
3888 {
3889 fprintf (dump_file, "Candidate costs:\n");
3890 fprintf (dump_file, " cand\tcost\n");
3891 }
3892
3893 for (i = 0; i < n_iv_cands (data); i++)
3894 {
3895 struct iv_cand *cand = iv_cand (data, i);
3896
3897 determine_iv_cost (data, cand);
3898
3899 if (dump_file && (dump_flags & TDF_DETAILS))
3900 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
3901 }
3902
3903 if (dump_file && (dump_flags & TDF_DETAILS))
3904 fprintf (dump_file, "\n");
3905 }
3906
3907 /* Calculates cost for having SIZE induction variables. */
3908
3909 static unsigned
3910 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
3911 {
3912 /* We add size to the cost, so that we prefer eliminating ivs
3913 if possible. */
3914 return size + estimate_reg_pressure_cost (size, data->regs_used);
3915 }
3916
3917 /* For each size of the induction variable set determine the penalty. */
3918
3919 static void
3920 determine_set_costs (struct ivopts_data *data)
3921 {
3922 unsigned j, n;
3923 tree phi, op;
3924 struct loop *loop = data->current_loop;
3925 bitmap_iterator bi;
3926
3927 /* We use the following model (definitely improvable, especially the
3928 cost function -- TODO):
3929
3930 We estimate the number of registers available (using MD data), name it A.
3931
3932 We estimate the number of registers used by the loop, name it U. This
3933 number is obtained as the number of loop phi nodes (not counting virtual
3934 registers and bivs) + the number of variables from outside of the loop.
3935
3936 We set a reserve R (free regs that are used for temporary computations,
3937 etc.). For now the reserve is a constant 3.
3938
3939 Let I be the number of induction variables.
3940
3941 -- if U + I + R <= A, the cost is I * SMALL_COST (just not to encourage
3942 make a lot of ivs without a reason).
3943 -- if A - R < U + I <= A, the cost is I * PRES_COST
3944 -- if U + I > A, the cost is I * PRES_COST and
3945 number of uses * SPILL_COST * (U + I - A) / (U + I) is added. */
3946
3947 if (dump_file && (dump_flags & TDF_DETAILS))
3948 {
3949 fprintf (dump_file, "Global costs:\n");
3950 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
3951 fprintf (dump_file, " target_reg_cost %d\n", target_reg_cost);
3952 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost);
3953 }
3954
3955 n = 0;
3956 for (phi = phi_nodes (loop->header); phi; phi = PHI_CHAIN (phi))
3957 {
3958 op = PHI_RESULT (phi);
3959
3960 if (!is_gimple_reg (op))
3961 continue;
3962
3963 if (get_iv (data, op))
3964 continue;
3965
3966 n++;
3967 }
3968
3969 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
3970 {
3971 struct version_info *info = ver_info (data, j);
3972
3973 if (info->inv_id && info->has_nonlin_use)
3974 n++;
3975 }
3976
3977 data->regs_used = n;
3978 if (dump_file && (dump_flags & TDF_DETAILS))
3979 fprintf (dump_file, " regs_used %d\n", n);
3980
3981 if (dump_file && (dump_flags & TDF_DETAILS))
3982 {
3983 fprintf (dump_file, " cost for size:\n");
3984 fprintf (dump_file, " ivs\tcost\n");
3985 for (j = 0; j <= 2 * target_avail_regs; j++)
3986 fprintf (dump_file, " %d\t%d\n", j,
3987 ivopts_global_cost_for_size (data, j));
3988 fprintf (dump_file, "\n");
3989 }
3990 }
3991
3992 /* Returns true if A is a cheaper cost pair than B. */
3993
3994 static bool
3995 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
3996 {
3997 if (!a)
3998 return false;
3999
4000 if (!b)
4001 return true;
4002
4003 if (a->cost < b->cost)
4004 return true;
4005
4006 if (a->cost > b->cost)
4007 return false;
4008
4009 /* In case the costs are the same, prefer the cheaper candidate. */
4010 if (a->cand->cost < b->cand->cost)
4011 return true;
4012
4013 return false;
4014 }
4015
4016 /* Computes the cost field of IVS structure. */
4017
4018 static void
4019 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
4020 {
4021 unsigned cost = 0;
4022
4023 cost += ivs->cand_use_cost;
4024 cost += ivs->cand_cost;
4025 cost += ivopts_global_cost_for_size (data, ivs->n_regs);
4026
4027 ivs->cost = cost;
4028 }
4029
4030 /* Remove invariants in set INVS to set IVS. */
4031
4032 static void
4033 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
4034 {
4035 bitmap_iterator bi;
4036 unsigned iid;
4037
4038 if (!invs)
4039 return;
4040
4041 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4042 {
4043 ivs->n_invariant_uses[iid]--;
4044 if (ivs->n_invariant_uses[iid] == 0)
4045 ivs->n_regs--;
4046 }
4047 }
4048
4049 /* Set USE not to be expressed by any candidate in IVS. */
4050
4051 static void
4052 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
4053 struct iv_use *use)
4054 {
4055 unsigned uid = use->id, cid;
4056 struct cost_pair *cp;
4057
4058 cp = ivs->cand_for_use[uid];
4059 if (!cp)
4060 return;
4061 cid = cp->cand->id;
4062
4063 ivs->bad_uses++;
4064 ivs->cand_for_use[uid] = NULL;
4065 ivs->n_cand_uses[cid]--;
4066
4067 if (ivs->n_cand_uses[cid] == 0)
4068 {
4069 bitmap_clear_bit (ivs->cands, cid);
4070 /* Do not count the pseudocandidates. */
4071 if (cp->cand->iv)
4072 ivs->n_regs--;
4073 ivs->n_cands--;
4074 ivs->cand_cost -= cp->cand->cost;
4075
4076 iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
4077 }
4078
4079 ivs->cand_use_cost -= cp->cost;
4080
4081 iv_ca_set_remove_invariants (ivs, cp->depends_on);
4082 iv_ca_recount_cost (data, ivs);
4083 }
4084
4085 /* Add invariants in set INVS to set IVS. */
4086
4087 static void
4088 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
4089 {
4090 bitmap_iterator bi;
4091 unsigned iid;
4092
4093 if (!invs)
4094 return;
4095
4096 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
4097 {
4098 ivs->n_invariant_uses[iid]++;
4099 if (ivs->n_invariant_uses[iid] == 1)
4100 ivs->n_regs++;
4101 }
4102 }
4103
4104 /* Set cost pair for USE in set IVS to CP. */
4105
4106 static void
4107 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
4108 struct iv_use *use, struct cost_pair *cp)
4109 {
4110 unsigned uid = use->id, cid;
4111
4112 if (ivs->cand_for_use[uid] == cp)
4113 return;
4114
4115 if (ivs->cand_for_use[uid])
4116 iv_ca_set_no_cp (data, ivs, use);
4117
4118 if (cp)
4119 {
4120 cid = cp->cand->id;
4121
4122 ivs->bad_uses--;
4123 ivs->cand_for_use[uid] = cp;
4124 ivs->n_cand_uses[cid]++;
4125 if (ivs->n_cand_uses[cid] == 1)
4126 {
4127 bitmap_set_bit (ivs->cands, cid);
4128 /* Do not count the pseudocandidates. */
4129 if (cp->cand->iv)
4130 ivs->n_regs++;
4131 ivs->n_cands++;
4132 ivs->cand_cost += cp->cand->cost;
4133
4134 iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
4135 }
4136
4137 ivs->cand_use_cost += cp->cost;
4138 iv_ca_set_add_invariants (ivs, cp->depends_on);
4139 iv_ca_recount_cost (data, ivs);
4140 }
4141 }
4142
4143 /* Extend set IVS by expressing USE by some of the candidates in it
4144 if possible. */
4145
4146 static void
4147 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
4148 struct iv_use *use)
4149 {
4150 struct cost_pair *best_cp = NULL, *cp;
4151 bitmap_iterator bi;
4152 unsigned i;
4153
4154 gcc_assert (ivs->upto >= use->id);
4155
4156 if (ivs->upto == use->id)
4157 {
4158 ivs->upto++;
4159 ivs->bad_uses++;
4160 }
4161
4162 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4163 {
4164 cp = get_use_iv_cost (data, use, iv_cand (data, i));
4165
4166 if (cheaper_cost_pair (cp, best_cp))
4167 best_cp = cp;
4168 }
4169
4170 iv_ca_set_cp (data, ivs, use, best_cp);
4171 }
4172
4173 /* Get cost for assignment IVS. */
4174
4175 static unsigned
4176 iv_ca_cost (struct iv_ca *ivs)
4177 {
4178 return (ivs->bad_uses ? INFTY : ivs->cost);
4179 }
4180
4181 /* Returns true if all dependences of CP are among invariants in IVS. */
4182
4183 static bool
4184 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
4185 {
4186 unsigned i;
4187 bitmap_iterator bi;
4188
4189 if (!cp->depends_on)
4190 return true;
4191
4192 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
4193 {
4194 if (ivs->n_invariant_uses[i] == 0)
4195 return false;
4196 }
4197
4198 return true;
4199 }
4200
4201 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
4202 it before NEXT_CHANGE. */
4203
4204 static struct iv_ca_delta *
4205 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
4206 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
4207 {
4208 struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
4209
4210 change->use = use;
4211 change->old_cp = old_cp;
4212 change->new_cp = new_cp;
4213 change->next_change = next_change;
4214
4215 return change;
4216 }
4217
4218 /* Joins two lists of changes L1 and L2. Destructive -- old lists
4219 are rewritten. */
4220
4221 static struct iv_ca_delta *
4222 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
4223 {
4224 struct iv_ca_delta *last;
4225
4226 if (!l2)
4227 return l1;
4228
4229 if (!l1)
4230 return l2;
4231
4232 for (last = l1; last->next_change; last = last->next_change)
4233 continue;
4234 last->next_change = l2;
4235
4236 return l1;
4237 }
4238
4239 /* Returns candidate by that USE is expressed in IVS. */
4240
4241 static struct cost_pair *
4242 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
4243 {
4244 return ivs->cand_for_use[use->id];
4245 }
4246
4247 /* Reverse the list of changes DELTA, forming the inverse to it. */
4248
4249 static struct iv_ca_delta *
4250 iv_ca_delta_reverse (struct iv_ca_delta *delta)
4251 {
4252 struct iv_ca_delta *act, *next, *prev = NULL;
4253 struct cost_pair *tmp;
4254
4255 for (act = delta; act; act = next)
4256 {
4257 next = act->next_change;
4258 act->next_change = prev;
4259 prev = act;
4260
4261 tmp = act->old_cp;
4262 act->old_cp = act->new_cp;
4263 act->new_cp = tmp;
4264 }
4265
4266 return prev;
4267 }
4268
4269 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
4270 reverted instead. */
4271
4272 static void
4273 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
4274 struct iv_ca_delta *delta, bool forward)
4275 {
4276 struct cost_pair *from, *to;
4277 struct iv_ca_delta *act;
4278
4279 if (!forward)
4280 delta = iv_ca_delta_reverse (delta);
4281
4282 for (act = delta; act; act = act->next_change)
4283 {
4284 from = act->old_cp;
4285 to = act->new_cp;
4286 gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
4287 iv_ca_set_cp (data, ivs, act->use, to);
4288 }
4289
4290 if (!forward)
4291 iv_ca_delta_reverse (delta);
4292 }
4293
4294 /* Returns true if CAND is used in IVS. */
4295
4296 static bool
4297 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
4298 {
4299 return ivs->n_cand_uses[cand->id] > 0;
4300 }
4301
4302 /* Returns number of induction variable candidates in the set IVS. */
4303
4304 static unsigned
4305 iv_ca_n_cands (struct iv_ca *ivs)
4306 {
4307 return ivs->n_cands;
4308 }
4309
4310 /* Free the list of changes DELTA. */
4311
4312 static void
4313 iv_ca_delta_free (struct iv_ca_delta **delta)
4314 {
4315 struct iv_ca_delta *act, *next;
4316
4317 for (act = *delta; act; act = next)
4318 {
4319 next = act->next_change;
4320 free (act);
4321 }
4322
4323 *delta = NULL;
4324 }
4325
4326 /* Allocates new iv candidates assignment. */
4327
4328 static struct iv_ca *
4329 iv_ca_new (struct ivopts_data *data)
4330 {
4331 struct iv_ca *nw = XNEW (struct iv_ca);
4332
4333 nw->upto = 0;
4334 nw->bad_uses = 0;
4335 nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
4336 nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
4337 nw->cands = BITMAP_ALLOC (NULL);
4338 nw->n_cands = 0;
4339 nw->n_regs = 0;
4340 nw->cand_use_cost = 0;
4341 nw->cand_cost = 0;
4342 nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
4343 nw->cost = 0;
4344
4345 return nw;
4346 }
4347
4348 /* Free memory occupied by the set IVS. */
4349
4350 static void
4351 iv_ca_free (struct iv_ca **ivs)
4352 {
4353 free ((*ivs)->cand_for_use);
4354 free ((*ivs)->n_cand_uses);
4355 BITMAP_FREE ((*ivs)->cands);
4356 free ((*ivs)->n_invariant_uses);
4357 free (*ivs);
4358 *ivs = NULL;
4359 }
4360
4361 /* Dumps IVS to FILE. */
4362
4363 static void
4364 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
4365 {
4366 const char *pref = " invariants ";
4367 unsigned i;
4368
4369 fprintf (file, " cost %d\n", iv_ca_cost (ivs));
4370 bitmap_print (file, ivs->cands, " candidates ","\n");
4371
4372 for (i = 1; i <= data->max_inv_id; i++)
4373 if (ivs->n_invariant_uses[i])
4374 {
4375 fprintf (file, "%s%d", pref, i);
4376 pref = ", ";
4377 }
4378 fprintf (file, "\n");
4379 }
4380
4381 /* Try changing candidate in IVS to CAND for each use. Return cost of the
4382 new set, and store differences in DELTA. Number of induction variables
4383 in the new set is stored to N_IVS. */
4384
4385 static unsigned
4386 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
4387 struct iv_cand *cand, struct iv_ca_delta **delta,
4388 unsigned *n_ivs)
4389 {
4390 unsigned i, cost;
4391 struct iv_use *use;
4392 struct cost_pair *old_cp, *new_cp;
4393
4394 *delta = NULL;
4395 for (i = 0; i < ivs->upto; i++)
4396 {
4397 use = iv_use (data, i);
4398 old_cp = iv_ca_cand_for_use (ivs, use);
4399
4400 if (old_cp
4401 && old_cp->cand == cand)
4402 continue;
4403
4404 new_cp = get_use_iv_cost (data, use, cand);
4405 if (!new_cp)
4406 continue;
4407
4408 if (!iv_ca_has_deps (ivs, new_cp))
4409 continue;
4410
4411 if (!cheaper_cost_pair (new_cp, old_cp))
4412 continue;
4413
4414 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4415 }
4416
4417 iv_ca_delta_commit (data, ivs, *delta, true);
4418 cost = iv_ca_cost (ivs);
4419 if (n_ivs)
4420 *n_ivs = iv_ca_n_cands (ivs);
4421 iv_ca_delta_commit (data, ivs, *delta, false);
4422
4423 return cost;
4424 }
4425
4426 /* Try narrowing set IVS by removing CAND. Return the cost of
4427 the new set and store the differences in DELTA. */
4428
4429 static unsigned
4430 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
4431 struct iv_cand *cand, struct iv_ca_delta **delta)
4432 {
4433 unsigned i, ci;
4434 struct iv_use *use;
4435 struct cost_pair *old_cp, *new_cp, *cp;
4436 bitmap_iterator bi;
4437 struct iv_cand *cnd;
4438 unsigned cost;
4439
4440 *delta = NULL;
4441 for (i = 0; i < n_iv_uses (data); i++)
4442 {
4443 use = iv_use (data, i);
4444
4445 old_cp = iv_ca_cand_for_use (ivs, use);
4446 if (old_cp->cand != cand)
4447 continue;
4448
4449 new_cp = NULL;
4450
4451 if (data->consider_all_candidates)
4452 {
4453 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
4454 {
4455 if (ci == cand->id)
4456 continue;
4457
4458 cnd = iv_cand (data, ci);
4459
4460 cp = get_use_iv_cost (data, use, cnd);
4461 if (!cp)
4462 continue;
4463 if (!iv_ca_has_deps (ivs, cp))
4464 continue;
4465
4466 if (!cheaper_cost_pair (cp, new_cp))
4467 continue;
4468
4469 new_cp = cp;
4470 }
4471 }
4472 else
4473 {
4474 EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
4475 {
4476 if (ci == cand->id)
4477 continue;
4478
4479 cnd = iv_cand (data, ci);
4480
4481 cp = get_use_iv_cost (data, use, cnd);
4482 if (!cp)
4483 continue;
4484 if (!iv_ca_has_deps (ivs, cp))
4485 continue;
4486
4487 if (!cheaper_cost_pair (cp, new_cp))
4488 continue;
4489
4490 new_cp = cp;
4491 }
4492 }
4493
4494 if (!new_cp)
4495 {
4496 iv_ca_delta_free (delta);
4497 return INFTY;
4498 }
4499
4500 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
4501 }
4502
4503 iv_ca_delta_commit (data, ivs, *delta, true);
4504 cost = iv_ca_cost (ivs);
4505 iv_ca_delta_commit (data, ivs, *delta, false);
4506
4507 return cost;
4508 }
4509
4510 /* Try optimizing the set of candidates IVS by removing candidates different
4511 from to EXCEPT_CAND from it. Return cost of the new set, and store
4512 differences in DELTA. */
4513
4514 static unsigned
4515 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
4516 struct iv_cand *except_cand, struct iv_ca_delta **delta)
4517 {
4518 bitmap_iterator bi;
4519 struct iv_ca_delta *act_delta, *best_delta;
4520 unsigned i, best_cost, acost;
4521 struct iv_cand *cand;
4522
4523 best_delta = NULL;
4524 best_cost = iv_ca_cost (ivs);
4525
4526 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
4527 {
4528 cand = iv_cand (data, i);
4529
4530 if (cand == except_cand)
4531 continue;
4532
4533 acost = iv_ca_narrow (data, ivs, cand, &act_delta);
4534
4535 if (acost < best_cost)
4536 {
4537 best_cost = acost;
4538 iv_ca_delta_free (&best_delta);
4539 best_delta = act_delta;
4540 }
4541 else
4542 iv_ca_delta_free (&act_delta);
4543 }
4544
4545 if (!best_delta)
4546 {
4547 *delta = NULL;
4548 return best_cost;
4549 }
4550
4551 /* Recurse to possibly remove other unnecessary ivs. */
4552 iv_ca_delta_commit (data, ivs, best_delta, true);
4553 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
4554 iv_ca_delta_commit (data, ivs, best_delta, false);
4555 *delta = iv_ca_delta_join (best_delta, *delta);
4556 return best_cost;
4557 }
4558
4559 /* Tries to extend the sets IVS in the best possible way in order
4560 to express the USE. */
4561
4562 static bool
4563 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
4564 struct iv_use *use)
4565 {
4566 unsigned best_cost, act_cost;
4567 unsigned i;
4568 bitmap_iterator bi;
4569 struct iv_cand *cand;
4570 struct iv_ca_delta *best_delta = NULL, *act_delta;
4571 struct cost_pair *cp;
4572
4573 iv_ca_add_use (data, ivs, use);
4574 best_cost = iv_ca_cost (ivs);
4575
4576 cp = iv_ca_cand_for_use (ivs, use);
4577 if (cp)
4578 {
4579 best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
4580 iv_ca_set_no_cp (data, ivs, use);
4581 }
4582
4583 /* First try important candidates. Only if it fails, try the specific ones.
4584 Rationale -- in loops with many variables the best choice often is to use
4585 just one generic biv. If we added here many ivs specific to the uses,
4586 the optimization algorithm later would be likely to get stuck in a local
4587 minimum, thus causing us to create too many ivs. The approach from
4588 few ivs to more seems more likely to be successful -- starting from few
4589 ivs, replacing an expensive use by a specific iv should always be a
4590 win. */
4591 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
4592 {
4593 cand = iv_cand (data, i);
4594
4595 if (iv_ca_cand_used_p (ivs, cand))
4596 continue;
4597
4598 cp = get_use_iv_cost (data, use, cand);
4599 if (!cp)
4600 continue;
4601
4602 iv_ca_set_cp (data, ivs, use, cp);
4603 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
4604 iv_ca_set_no_cp (data, ivs, use);
4605 act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
4606
4607 if (act_cost < best_cost)
4608 {
4609 best_cost = act_cost;
4610
4611 iv_ca_delta_free (&best_delta);
4612 best_delta = act_delta;
4613 }
4614 else
4615 iv_ca_delta_free (&act_delta);
4616 }
4617
4618 if (best_cost == INFTY)
4619 {
4620 for (i = 0; i < use->n_map_members; i++)
4621 {
4622 cp = use->cost_map + i;
4623 cand = cp->cand;
4624 if (!cand)
4625 continue;
4626
4627 /* Already tried this. */
4628 if (cand->important)
4629 continue;
4630
4631 if (iv_ca_cand_used_p (ivs, cand))
4632 continue;
4633
4634 act_delta = NULL;
4635 iv_ca_set_cp (data, ivs, use, cp);
4636 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL);
4637 iv_ca_set_no_cp (data, ivs, use);
4638 act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
4639 cp, act_delta);
4640
4641 if (act_cost < best_cost)
4642 {
4643 best_cost = act_cost;
4644
4645 if (best_delta)
4646 iv_ca_delta_free (&best_delta);
4647 best_delta = act_delta;
4648 }
4649 else
4650 iv_ca_delta_free (&act_delta);
4651 }
4652 }
4653
4654 iv_ca_delta_commit (data, ivs, best_delta, true);
4655 iv_ca_delta_free (&best_delta);
4656
4657 return (best_cost != INFTY);
4658 }
4659
4660 /* Finds an initial assignment of candidates to uses. */
4661
4662 static struct iv_ca *
4663 get_initial_solution (struct ivopts_data *data)
4664 {
4665 struct iv_ca *ivs = iv_ca_new (data);
4666 unsigned i;
4667
4668 for (i = 0; i < n_iv_uses (data); i++)
4669 if (!try_add_cand_for (data, ivs, iv_use (data, i)))
4670 {
4671 iv_ca_free (&ivs);
4672 return NULL;
4673 }
4674
4675 return ivs;
4676 }
4677
4678 /* Tries to improve set of induction variables IVS. */
4679
4680 static bool
4681 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
4682 {
4683 unsigned i, acost, best_cost = iv_ca_cost (ivs), n_ivs;
4684 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
4685 struct iv_cand *cand;
4686
4687 /* Try extending the set of induction variables by one. */
4688 for (i = 0; i < n_iv_cands (data); i++)
4689 {
4690 cand = iv_cand (data, i);
4691
4692 if (iv_ca_cand_used_p (ivs, cand))
4693 continue;
4694
4695 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs);
4696 if (!act_delta)
4697 continue;
4698
4699 /* If we successfully added the candidate and the set is small enough,
4700 try optimizing it by removing other candidates. */
4701 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
4702 {
4703 iv_ca_delta_commit (data, ivs, act_delta, true);
4704 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
4705 iv_ca_delta_commit (data, ivs, act_delta, false);
4706 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
4707 }
4708
4709 if (acost < best_cost)
4710 {
4711 best_cost = acost;
4712 iv_ca_delta_free (&best_delta);
4713 best_delta = act_delta;
4714 }
4715 else
4716 iv_ca_delta_free (&act_delta);
4717 }
4718
4719 if (!best_delta)
4720 {
4721 /* Try removing the candidates from the set instead. */
4722 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
4723
4724 /* Nothing more we can do. */
4725 if (!best_delta)
4726 return false;
4727 }
4728
4729 iv_ca_delta_commit (data, ivs, best_delta, true);
4730 gcc_assert (best_cost == iv_ca_cost (ivs));
4731 iv_ca_delta_free (&best_delta);
4732 return true;
4733 }
4734
4735 /* Attempts to find the optimal set of induction variables. We do simple
4736 greedy heuristic -- we try to replace at most one candidate in the selected
4737 solution and remove the unused ivs while this improves the cost. */
4738
4739 static struct iv_ca *
4740 find_optimal_iv_set (struct ivopts_data *data)
4741 {
4742 unsigned i;
4743 struct iv_ca *set;
4744 struct iv_use *use;
4745
4746 /* Get the initial solution. */
4747 set = get_initial_solution (data);
4748 if (!set)
4749 {
4750 if (dump_file && (dump_flags & TDF_DETAILS))
4751 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
4752 return NULL;
4753 }
4754
4755 if (dump_file && (dump_flags & TDF_DETAILS))
4756 {
4757 fprintf (dump_file, "Initial set of candidates:\n");
4758 iv_ca_dump (data, dump_file, set);
4759 }
4760
4761 while (try_improve_iv_set (data, set))
4762 {
4763 if (dump_file && (dump_flags & TDF_DETAILS))
4764 {
4765 fprintf (dump_file, "Improved to:\n");
4766 iv_ca_dump (data, dump_file, set);
4767 }
4768 }
4769
4770 if (dump_file && (dump_flags & TDF_DETAILS))
4771 fprintf (dump_file, "Final cost %d\n\n", iv_ca_cost (set));
4772
4773 for (i = 0; i < n_iv_uses (data); i++)
4774 {
4775 use = iv_use (data, i);
4776 use->selected = iv_ca_cand_for_use (set, use)->cand;
4777 }
4778
4779 return set;
4780 }
4781
4782 /* Creates a new induction variable corresponding to CAND. */
4783
4784 static void
4785 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
4786 {
4787 block_stmt_iterator incr_pos;
4788 tree base;
4789 bool after = false;
4790
4791 if (!cand->iv)
4792 return;
4793
4794 switch (cand->pos)
4795 {
4796 case IP_NORMAL:
4797 incr_pos = bsi_last (ip_normal_pos (data->current_loop));
4798 break;
4799
4800 case IP_END:
4801 incr_pos = bsi_last (ip_end_pos (data->current_loop));
4802 after = true;
4803 break;
4804
4805 case IP_ORIGINAL:
4806 /* Mark that the iv is preserved. */
4807 name_info (data, cand->var_before)->preserve_biv = true;
4808 name_info (data, cand->var_after)->preserve_biv = true;
4809
4810 /* Rewrite the increment so that it uses var_before directly. */
4811 find_interesting_uses_op (data, cand->var_after)->selected = cand;
4812
4813 return;
4814 }
4815
4816 gimple_add_tmp_var (cand->var_before);
4817 add_referenced_var (cand->var_before);
4818
4819 base = unshare_expr (cand->iv->base);
4820
4821 create_iv (base, unshare_expr (cand->iv->step),
4822 cand->var_before, data->current_loop,
4823 &incr_pos, after, &cand->var_before, &cand->var_after);
4824 }
4825
4826 /* Creates new induction variables described in SET. */
4827
4828 static void
4829 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
4830 {
4831 unsigned i;
4832 struct iv_cand *cand;
4833 bitmap_iterator bi;
4834
4835 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
4836 {
4837 cand = iv_cand (data, i);
4838 create_new_iv (data, cand);
4839 }
4840 }
4841
4842 /* Removes statement STMT (real or a phi node). If INCLUDING_DEFINED_NAME
4843 is true, remove also the ssa name defined by the statement. */
4844
4845 static void
4846 remove_statement (tree stmt, bool including_defined_name)
4847 {
4848 if (TREE_CODE (stmt) == PHI_NODE)
4849 {
4850 remove_phi_node (stmt, NULL_TREE, including_defined_name);
4851 }
4852 else
4853 {
4854 block_stmt_iterator bsi = bsi_for_stmt (stmt);
4855
4856 bsi_remove (&bsi, true);
4857 release_defs (stmt);
4858 }
4859 }
4860
4861 /* Rewrites USE (definition of iv used in a nonlinear expression)
4862 using candidate CAND. */
4863
4864 static void
4865 rewrite_use_nonlinear_expr (struct ivopts_data *data,
4866 struct iv_use *use, struct iv_cand *cand)
4867 {
4868 tree comp;
4869 tree op, stmts, tgt, ass;
4870 block_stmt_iterator bsi;
4871
4872 /* An important special case -- if we are asked to express value of
4873 the original iv by itself, just exit; there is no need to
4874 introduce a new computation (that might also need casting the
4875 variable to unsigned and back). */
4876 if (cand->pos == IP_ORIGINAL
4877 && cand->incremented_at == use->stmt)
4878 {
4879 tree step, ctype, utype;
4880 enum tree_code incr_code = PLUS_EXPR;
4881
4882 gcc_assert (TREE_CODE (use->stmt) == GIMPLE_MODIFY_STMT);
4883 gcc_assert (GIMPLE_STMT_OPERAND (use->stmt, 0) == cand->var_after);
4884
4885 step = cand->iv->step;
4886 ctype = TREE_TYPE (step);
4887 utype = TREE_TYPE (cand->var_after);
4888 if (TREE_CODE (step) == NEGATE_EXPR)
4889 {
4890 incr_code = MINUS_EXPR;
4891 step = TREE_OPERAND (step, 0);
4892 }
4893
4894 /* Check whether we may leave the computation unchanged.
4895 This is the case only if it does not rely on other
4896 computations in the loop -- otherwise, the computation
4897 we rely upon may be removed in remove_unused_ivs,
4898 thus leading to ICE. */
4899 op = GIMPLE_STMT_OPERAND (use->stmt, 1);
4900 if (TREE_CODE (op) == PLUS_EXPR
4901 || TREE_CODE (op) == MINUS_EXPR)
4902 {
4903 if (TREE_OPERAND (op, 0) == cand->var_before)
4904 op = TREE_OPERAND (op, 1);
4905 else if (TREE_CODE (op) == PLUS_EXPR
4906 && TREE_OPERAND (op, 1) == cand->var_before)
4907 op = TREE_OPERAND (op, 0);
4908 else
4909 op = NULL_TREE;
4910 }
4911 else
4912 op = NULL_TREE;
4913
4914 if (op
4915 && (TREE_CODE (op) == INTEGER_CST
4916 || operand_equal_p (op, step, 0)))
4917 return;
4918
4919 /* Otherwise, add the necessary computations to express
4920 the iv. */
4921 op = fold_convert (ctype, cand->var_before);
4922 comp = fold_convert (utype,
4923 build2 (incr_code, ctype, op,
4924 unshare_expr (step)));
4925 }
4926 else
4927 {
4928 comp = get_computation (data->current_loop, use, cand);
4929 gcc_assert (comp != NULL_TREE);
4930 }
4931
4932 switch (TREE_CODE (use->stmt))
4933 {
4934 case PHI_NODE:
4935 tgt = PHI_RESULT (use->stmt);
4936
4937 /* If we should keep the biv, do not replace it. */
4938 if (name_info (data, tgt)->preserve_biv)
4939 return;
4940
4941 bsi = bsi_after_labels (bb_for_stmt (use->stmt));
4942 break;
4943
4944 case GIMPLE_MODIFY_STMT:
4945 tgt = GIMPLE_STMT_OPERAND (use->stmt, 0);
4946 bsi = bsi_for_stmt (use->stmt);
4947 break;
4948
4949 default:
4950 gcc_unreachable ();
4951 }
4952
4953 op = force_gimple_operand (comp, &stmts, false, SSA_NAME_VAR (tgt));
4954 if (stmts)
4955 bsi_insert_before (&bsi, stmts, BSI_SAME_STMT);
4956
4957 if (TREE_CODE (use->stmt) == PHI_NODE)
4958 {
4959 ass = build_gimple_modify_stmt (tgt, op);
4960 bsi_insert_before (&bsi, ass, BSI_SAME_STMT);
4961 remove_statement (use->stmt, false);
4962 SSA_NAME_DEF_STMT (tgt) = ass;
4963 }
4964 else
4965 GIMPLE_STMT_OPERAND (use->stmt, 1) = op;
4966 }
4967
4968 /* Replaces ssa name in index IDX by its basic variable. Callback for
4969 for_each_index. */
4970
4971 static bool
4972 idx_remove_ssa_names (tree base, tree *idx,
4973 void *data ATTRIBUTE_UNUSED)
4974 {
4975 tree *op;
4976
4977 if (TREE_CODE (*idx) == SSA_NAME)
4978 *idx = SSA_NAME_VAR (*idx);
4979
4980 if (TREE_CODE (base) == ARRAY_REF)
4981 {
4982 op = &TREE_OPERAND (base, 2);
4983 if (*op
4984 && TREE_CODE (*op) == SSA_NAME)
4985 *op = SSA_NAME_VAR (*op);
4986 op = &TREE_OPERAND (base, 3);
4987 if (*op
4988 && TREE_CODE (*op) == SSA_NAME)
4989 *op = SSA_NAME_VAR (*op);
4990 }
4991
4992 return true;
4993 }
4994
4995 /* Unshares REF and replaces ssa names inside it by their basic variables. */
4996
4997 static tree
4998 unshare_and_remove_ssa_names (tree ref)
4999 {
5000 ref = unshare_expr (ref);
5001 for_each_index (&ref, idx_remove_ssa_names, NULL);
5002
5003 return ref;
5004 }
5005
5006 /* Extract the alias analysis info for the memory reference REF. There are
5007 several ways how this information may be stored and what precisely is
5008 its semantics depending on the type of the reference, but there always is
5009 somewhere hidden one _DECL node that is used to determine the set of
5010 virtual operands for the reference. The code below deciphers this jungle
5011 and extracts this single useful piece of information. */
5012
5013 static tree
5014 get_ref_tag (tree ref, tree orig)
5015 {
5016 tree var = get_base_address (ref);
5017 tree aref = NULL_TREE, tag, sv;
5018 HOST_WIDE_INT offset, size, maxsize;
5019
5020 for (sv = orig; handled_component_p (sv); sv = TREE_OPERAND (sv, 0))
5021 {
5022 aref = get_ref_base_and_extent (sv, &offset, &size, &maxsize);
5023 if (ref)
5024 break;
5025 }
5026
5027 if (aref && SSA_VAR_P (aref) && get_subvars_for_var (aref))
5028 return unshare_expr (sv);
5029
5030 if (!var)
5031 return NULL_TREE;
5032
5033 if (TREE_CODE (var) == INDIRECT_REF)
5034 {
5035 /* If the base is a dereference of a pointer, first check its name memory
5036 tag. If it does not have one, use its symbol memory tag. */
5037 var = TREE_OPERAND (var, 0);
5038 if (TREE_CODE (var) != SSA_NAME)
5039 return NULL_TREE;
5040
5041 if (SSA_NAME_PTR_INFO (var))
5042 {
5043 tag = SSA_NAME_PTR_INFO (var)->name_mem_tag;
5044 if (tag)
5045 return tag;
5046 }
5047
5048 var = SSA_NAME_VAR (var);
5049 tag = symbol_mem_tag (var);
5050 gcc_assert (tag != NULL_TREE);
5051 return tag;
5052 }
5053 else
5054 {
5055 if (!DECL_P (var))
5056 return NULL_TREE;
5057
5058 tag = symbol_mem_tag (var);
5059 if (tag)
5060 return tag;
5061
5062 return var;
5063 }
5064 }
5065
5066 /* Copies the reference information from OLD_REF to NEW_REF. */
5067
5068 static void
5069 copy_ref_info (tree new_ref, tree old_ref)
5070 {
5071 if (TREE_CODE (old_ref) == TARGET_MEM_REF)
5072 copy_mem_ref_info (new_ref, old_ref);
5073 else
5074 {
5075 TMR_ORIGINAL (new_ref) = unshare_and_remove_ssa_names (old_ref);
5076 TMR_TAG (new_ref) = get_ref_tag (old_ref, TMR_ORIGINAL (new_ref));
5077 }
5078 }
5079
5080 /* Rewrites USE (address that is an iv) using candidate CAND. */
5081
5082 static void
5083 rewrite_use_address (struct ivopts_data *data,
5084 struct iv_use *use, struct iv_cand *cand)
5085 {
5086 aff_tree aff;
5087 block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5088 tree ref;
5089 bool ok;
5090
5091 ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
5092 gcc_assert (ok);
5093 unshare_aff_combination (&aff);
5094
5095 ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff);
5096 copy_ref_info (ref, *use->op_p);
5097 *use->op_p = ref;
5098 }
5099
5100 /* Rewrites USE (the condition such that one of the arguments is an iv) using
5101 candidate CAND. */
5102
5103 static void
5104 rewrite_use_compare (struct ivopts_data *data,
5105 struct iv_use *use, struct iv_cand *cand)
5106 {
5107 tree comp, *var_p, op, bound;
5108 block_stmt_iterator bsi = bsi_for_stmt (use->stmt);
5109 enum tree_code compare;
5110 struct cost_pair *cp = get_use_iv_cost (data, use, cand);
5111 bool ok;
5112
5113 bound = cp->value;
5114 if (bound)
5115 {
5116 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
5117 tree var_type = TREE_TYPE (var);
5118
5119 compare = iv_elimination_compare (data, use);
5120 bound = unshare_expr (fold_convert (var_type, bound));
5121 op = force_gimple_operand_bsi (&bsi, bound, true, NULL_TREE);
5122
5123 *use->op_p = build2 (compare, boolean_type_node, var, op);
5124 return;
5125 }
5126
5127 /* The induction variable elimination failed; just express the original
5128 giv. */
5129 comp = get_computation (data->current_loop, use, cand);
5130 gcc_assert (comp != NULL_TREE);
5131
5132 ok = extract_cond_operands (data, use->op_p, &var_p, NULL, NULL, NULL);
5133 gcc_assert (ok);
5134
5135 *var_p = force_gimple_operand_bsi (&bsi, comp, true, SSA_NAME_VAR (*var_p));
5136 }
5137
5138 /* Rewrites USE using candidate CAND. */
5139
5140 static void
5141 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
5142 {
5143 push_stmt_changes (&use->stmt);
5144
5145 switch (use->type)
5146 {
5147 case USE_NONLINEAR_EXPR:
5148 rewrite_use_nonlinear_expr (data, use, cand);
5149 break;
5150
5151 case USE_ADDRESS:
5152 rewrite_use_address (data, use, cand);
5153 break;
5154
5155 case USE_COMPARE:
5156 rewrite_use_compare (data, use, cand);
5157 break;
5158
5159 default:
5160 gcc_unreachable ();
5161 }
5162
5163 pop_stmt_changes (&use->stmt);
5164 }
5165
5166 /* Rewrite the uses using the selected induction variables. */
5167
5168 static void
5169 rewrite_uses (struct ivopts_data *data)
5170 {
5171 unsigned i;
5172 struct iv_cand *cand;
5173 struct iv_use *use;
5174
5175 for (i = 0; i < n_iv_uses (data); i++)
5176 {
5177 use = iv_use (data, i);
5178 cand = use->selected;
5179 gcc_assert (cand);
5180
5181 rewrite_use (data, use, cand);
5182 }
5183 }
5184
5185 /* Removes the ivs that are not used after rewriting. */
5186
5187 static void
5188 remove_unused_ivs (struct ivopts_data *data)
5189 {
5190 unsigned j;
5191 bitmap_iterator bi;
5192
5193 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5194 {
5195 struct version_info *info;
5196
5197 info = ver_info (data, j);
5198 if (info->iv
5199 && !integer_zerop (info->iv->step)
5200 && !info->inv_id
5201 && !info->iv->have_use_for
5202 && !info->preserve_biv)
5203 remove_statement (SSA_NAME_DEF_STMT (info->iv->ssa_name), true);
5204 }
5205 }
5206
5207 /* Frees data allocated by the optimization of a single loop. */
5208
5209 static void
5210 free_loop_data (struct ivopts_data *data)
5211 {
5212 unsigned i, j;
5213 bitmap_iterator bi;
5214 tree obj;
5215
5216 if (data->niters)
5217 {
5218 pointer_map_destroy (data->niters);
5219 data->niters = NULL;
5220 }
5221
5222 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
5223 {
5224 struct version_info *info;
5225
5226 info = ver_info (data, i);
5227 if (info->iv)
5228 free (info->iv);
5229 info->iv = NULL;
5230 info->has_nonlin_use = false;
5231 info->preserve_biv = false;
5232 info->inv_id = 0;
5233 }
5234 bitmap_clear (data->relevant);
5235 bitmap_clear (data->important_candidates);
5236
5237 for (i = 0; i < n_iv_uses (data); i++)
5238 {
5239 struct iv_use *use = iv_use (data, i);
5240
5241 free (use->iv);
5242 BITMAP_FREE (use->related_cands);
5243 for (j = 0; j < use->n_map_members; j++)
5244 if (use->cost_map[j].depends_on)
5245 BITMAP_FREE (use->cost_map[j].depends_on);
5246 free (use->cost_map);
5247 free (use);
5248 }
5249 VEC_truncate (iv_use_p, data->iv_uses, 0);
5250
5251 for (i = 0; i < n_iv_cands (data); i++)
5252 {
5253 struct iv_cand *cand = iv_cand (data, i);
5254
5255 if (cand->iv)
5256 free (cand->iv);
5257 if (cand->depends_on)
5258 BITMAP_FREE (cand->depends_on);
5259 free (cand);
5260 }
5261 VEC_truncate (iv_cand_p, data->iv_candidates, 0);
5262
5263 if (data->version_info_size < num_ssa_names)
5264 {
5265 data->version_info_size = 2 * num_ssa_names;
5266 free (data->version_info);
5267 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
5268 }
5269
5270 data->max_inv_id = 0;
5271
5272 for (i = 0; VEC_iterate (tree, decl_rtl_to_reset, i, obj); i++)
5273 SET_DECL_RTL (obj, NULL_RTX);
5274
5275 VEC_truncate (tree, decl_rtl_to_reset, 0);
5276 }
5277
5278 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
5279 loop tree. */
5280
5281 static void
5282 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
5283 {
5284 free_loop_data (data);
5285 free (data->version_info);
5286 BITMAP_FREE (data->relevant);
5287 BITMAP_FREE (data->important_candidates);
5288
5289 VEC_free (tree, heap, decl_rtl_to_reset);
5290 VEC_free (iv_use_p, heap, data->iv_uses);
5291 VEC_free (iv_cand_p, heap, data->iv_candidates);
5292 }
5293
5294 /* Optimizes the LOOP. Returns true if anything changed. */
5295
5296 static bool
5297 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
5298 {
5299 bool changed = false;
5300 struct iv_ca *iv_ca;
5301 edge exit;
5302
5303 gcc_assert (!data->niters);
5304 data->current_loop = loop;
5305
5306 if (dump_file && (dump_flags & TDF_DETAILS))
5307 {
5308 fprintf (dump_file, "Processing loop %d\n", loop->num);
5309
5310 exit = single_dom_exit (loop);
5311 if (exit)
5312 {
5313 fprintf (dump_file, " single exit %d -> %d, exit condition ",
5314 exit->src->index, exit->dest->index);
5315 print_generic_expr (dump_file, last_stmt (exit->src), TDF_SLIM);
5316 fprintf (dump_file, "\n");
5317 }
5318
5319 fprintf (dump_file, "\n");
5320 }
5321
5322 /* For each ssa name determines whether it behaves as an induction variable
5323 in some loop. */
5324 if (!find_induction_variables (data))
5325 goto finish;
5326
5327 /* Finds interesting uses (item 1). */
5328 find_interesting_uses (data);
5329 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
5330 goto finish;
5331
5332 /* Finds candidates for the induction variables (item 2). */
5333 find_iv_candidates (data);
5334
5335 /* Calculates the costs (item 3, part 1). */
5336 determine_use_iv_costs (data);
5337 determine_iv_costs (data);
5338 determine_set_costs (data);
5339
5340 /* Find the optimal set of induction variables (item 3, part 2). */
5341 iv_ca = find_optimal_iv_set (data);
5342 if (!iv_ca)
5343 goto finish;
5344 changed = true;
5345
5346 /* Create the new induction variables (item 4, part 1). */
5347 create_new_ivs (data, iv_ca);
5348 iv_ca_free (&iv_ca);
5349
5350 /* Rewrite the uses (item 4, part 2). */
5351 rewrite_uses (data);
5352
5353 /* Remove the ivs that are unused after rewriting. */
5354 remove_unused_ivs (data);
5355
5356 /* We have changed the structure of induction variables; it might happen
5357 that definitions in the scev database refer to some of them that were
5358 eliminated. */
5359 scev_reset ();
5360
5361 finish:
5362 free_loop_data (data);
5363
5364 return changed;
5365 }
5366
5367 /* Main entry point. Optimizes induction variables in loops. */
5368
5369 void
5370 tree_ssa_iv_optimize (void)
5371 {
5372 struct loop *loop;
5373 struct ivopts_data data;
5374 loop_iterator li;
5375
5376 tree_ssa_iv_optimize_init (&data);
5377
5378 /* Optimize the loops starting with the innermost ones. */
5379 FOR_EACH_LOOP (li, loop, LI_FROM_INNERMOST)
5380 {
5381 if (dump_file && (dump_flags & TDF_DETAILS))
5382 flow_loop_dump (loop, dump_file, NULL, 1);
5383
5384 tree_ssa_iv_optimize_loop (&data, loop);
5385 }
5386
5387 tree_ssa_iv_optimize_finalize (&data);
5388 }