]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/tree-ssa-loop-ivopts.c
Merge with trunk.
[thirdparty/gcc.git] / gcc / tree-ssa-loop-ivopts.c
1 /* Induction variable optimizations.
2 Copyright (C) 2003-2013 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
10
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
19
20 /* This pass tries to find the optimal set of induction variables for the loop.
21 It optimizes just the basic linear induction variables (although adding
22 support for other types should not be too hard). It includes the
23 optimizations commonly known as strength reduction, induction variable
24 coalescing and induction variable elimination. It does it in the
25 following steps:
26
27 1) The interesting uses of induction variables are found. This includes
28
29 -- uses of induction variables in non-linear expressions
30 -- addresses of arrays
31 -- comparisons of induction variables
32
33 2) Candidates for the induction variables are found. This includes
34
35 -- old induction variables
36 -- the variables defined by expressions derived from the "interesting
37 uses" above
38
39 3) The optimal (w.r. to a cost function) set of variables is chosen. The
40 cost function assigns a cost to sets of induction variables and consists
41 of three parts:
42
43 -- The use costs. Each of the interesting uses chooses the best induction
44 variable in the set and adds its cost to the sum. The cost reflects
45 the time spent on modifying the induction variables value to be usable
46 for the given purpose (adding base and offset for arrays, etc.).
47 -- The variable costs. Each of the variables has a cost assigned that
48 reflects the costs associated with incrementing the value of the
49 variable. The original variables are somewhat preferred.
50 -- The set cost. Depending on the size of the set, extra cost may be
51 added to reflect register pressure.
52
53 All the costs are defined in a machine-specific way, using the target
54 hooks and machine descriptions to determine them.
55
56 4) The trees are transformed to use the new variables, the dead code is
57 removed.
58
59 All of this is done loop by loop. Doing it globally is theoretically
60 possible, it might give a better performance and it might enable us
61 to decide costs more precisely, but getting all the interactions right
62 would be complicated. */
63
64 #include "config.h"
65 #include "system.h"
66 #include "coretypes.h"
67 #include "tm.h"
68 #include "tree.h"
69 #include "stor-layout.h"
70 #include "tm_p.h"
71 #include "basic-block.h"
72 #include "gimple-pretty-print.h"
73 #include "pointer-set.h"
74 #include "hash-table.h"
75 #include "tree-ssa-alias.h"
76 #include "internal-fn.h"
77 #include "tree-eh.h"
78 #include "gimple-expr.h"
79 #include "is-a.h"
80 #include "gimple.h"
81 #include "gimplify.h"
82 #include "gimple-iterator.h"
83 #include "gimplify-me.h"
84 #include "gimple-ssa.h"
85 #include "cgraph.h"
86 #include "tree-cfg.h"
87 #include "tree-phinodes.h"
88 #include "ssa-iterators.h"
89 #include "stringpool.h"
90 #include "tree-ssanames.h"
91 #include "tree-ssa-loop-ivopts.h"
92 #include "tree-ssa-loop-manip.h"
93 #include "tree-ssa-loop-niter.h"
94 #include "tree-ssa-loop.h"
95 #include "expr.h"
96 #include "tree-dfa.h"
97 #include "tree-ssa.h"
98 #include "cfgloop.h"
99 #include "tree-pass.h"
100 #include "insn-config.h"
101 #include "tree-chrec.h"
102 #include "tree-scalar-evolution.h"
103 #include "cfgloop.h"
104 #include "params.h"
105 #include "langhooks.h"
106 #include "tree-affine.h"
107 #include "target.h"
108 #include "tree-inline.h"
109 #include "tree-ssa-propagate.h"
110 #include "expmed.h"
111 #include "tree-ssa-address.h"
112
113 /* FIXME: Expressions are expanded to RTL in this pass to determine the
114 cost of different addressing modes. This should be moved to a TBD
115 interface between the GIMPLE and RTL worlds. */
116 #include "expr.h"
117 #include "recog.h"
118
119 /* The infinite cost. */
120 #define INFTY 10000000
121
122 #define AVG_LOOP_NITER(LOOP) 5
123
124 /* Returns the expected number of loop iterations for LOOP.
125 The average trip count is computed from profile data if it
126 exists. */
127
128 static inline HOST_WIDE_INT
129 avg_loop_niter (struct loop *loop)
130 {
131 HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
132 if (niter == -1)
133 return AVG_LOOP_NITER (loop);
134
135 return niter;
136 }
137
138 /* Representation of the induction variable. */
139 struct iv
140 {
141 tree base; /* Initial value of the iv. */
142 tree base_object; /* A memory object to that the induction variable points. */
143 tree step; /* Step of the iv (constant only). */
144 tree ssa_name; /* The ssa name with the value. */
145 bool biv_p; /* Is it a biv? */
146 bool have_use_for; /* Do we already have a use for it? */
147 unsigned use_id; /* The identifier in the use if it is the case. */
148 };
149
150 /* Per-ssa version information (induction variable descriptions, etc.). */
151 struct version_info
152 {
153 tree name; /* The ssa name. */
154 struct iv *iv; /* Induction variable description. */
155 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
156 an expression that is not an induction variable. */
157 bool preserve_biv; /* For the original biv, whether to preserve it. */
158 unsigned inv_id; /* Id of an invariant. */
159 };
160
161 /* Types of uses. */
162 enum use_type
163 {
164 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
165 USE_ADDRESS, /* Use in an address. */
166 USE_COMPARE /* Use is a compare. */
167 };
168
169 /* Cost of a computation. */
170 typedef struct
171 {
172 int cost; /* The runtime cost. */
173 unsigned complexity; /* The estimate of the complexity of the code for
174 the computation (in no concrete units --
175 complexity field should be larger for more
176 complex expressions and addressing modes). */
177 } comp_cost;
178
179 static const comp_cost no_cost = {0, 0};
180 static const comp_cost infinite_cost = {INFTY, INFTY};
181
182 /* The candidate - cost pair. */
183 struct cost_pair
184 {
185 struct iv_cand *cand; /* The candidate. */
186 comp_cost cost; /* The cost. */
187 bitmap depends_on; /* The list of invariants that have to be
188 preserved. */
189 tree value; /* For final value elimination, the expression for
190 the final value of the iv. For iv elimination,
191 the new bound to compare with. */
192 enum tree_code comp; /* For iv elimination, the comparison. */
193 int inv_expr_id; /* Loop invariant expression id. */
194 };
195
196 /* Use. */
197 struct iv_use
198 {
199 unsigned id; /* The id of the use. */
200 enum use_type type; /* Type of the use. */
201 struct iv *iv; /* The induction variable it is based on. */
202 gimple stmt; /* Statement in that it occurs. */
203 tree *op_p; /* The place where it occurs. */
204 bitmap related_cands; /* The set of "related" iv candidates, plus the common
205 important ones. */
206
207 unsigned n_map_members; /* Number of candidates in the cost_map list. */
208 struct cost_pair *cost_map;
209 /* The costs wrto the iv candidates. */
210
211 struct iv_cand *selected;
212 /* The selected candidate. */
213 };
214
215 /* The position where the iv is computed. */
216 enum iv_position
217 {
218 IP_NORMAL, /* At the end, just before the exit condition. */
219 IP_END, /* At the end of the latch block. */
220 IP_BEFORE_USE, /* Immediately before a specific use. */
221 IP_AFTER_USE, /* Immediately after a specific use. */
222 IP_ORIGINAL /* The original biv. */
223 };
224
225 /* The induction variable candidate. */
226 struct iv_cand
227 {
228 unsigned id; /* The number of the candidate. */
229 bool important; /* Whether this is an "important" candidate, i.e. such
230 that it should be considered by all uses. */
231 ENUM_BITFIELD(iv_position) pos : 8; /* Where it is computed. */
232 gimple incremented_at;/* For original biv, the statement where it is
233 incremented. */
234 tree var_before; /* The variable used for it before increment. */
235 tree var_after; /* The variable used for it after increment. */
236 struct iv *iv; /* The value of the candidate. NULL for
237 "pseudocandidate" used to indicate the possibility
238 to replace the final value of an iv by direct
239 computation of the value. */
240 unsigned cost; /* Cost of the candidate. */
241 unsigned cost_step; /* Cost of the candidate's increment operation. */
242 struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
243 where it is incremented. */
244 bitmap depends_on; /* The list of invariants that are used in step of the
245 biv. */
246 };
247
248 /* Loop invariant expression hashtable entry. */
249 struct iv_inv_expr_ent
250 {
251 tree expr;
252 int id;
253 hashval_t hash;
254 };
255
256 /* The data used by the induction variable optimizations. */
257
258 typedef struct iv_use *iv_use_p;
259
260 typedef struct iv_cand *iv_cand_p;
261
262 /* Hashtable helpers. */
263
264 struct iv_inv_expr_hasher : typed_free_remove <iv_inv_expr_ent>
265 {
266 typedef iv_inv_expr_ent value_type;
267 typedef iv_inv_expr_ent compare_type;
268 static inline hashval_t hash (const value_type *);
269 static inline bool equal (const value_type *, const compare_type *);
270 };
271
272 /* Hash function for loop invariant expressions. */
273
274 inline hashval_t
275 iv_inv_expr_hasher::hash (const value_type *expr)
276 {
277 return expr->hash;
278 }
279
280 /* Hash table equality function for expressions. */
281
282 inline bool
283 iv_inv_expr_hasher::equal (const value_type *expr1, const compare_type *expr2)
284 {
285 return expr1->hash == expr2->hash
286 && operand_equal_p (expr1->expr, expr2->expr, 0);
287 }
288
289 struct ivopts_data
290 {
291 /* The currently optimized loop. */
292 struct loop *current_loop;
293
294 /* Numbers of iterations for all exits of the current loop. */
295 struct pointer_map_t *niters;
296
297 /* Number of registers used in it. */
298 unsigned regs_used;
299
300 /* The size of version_info array allocated. */
301 unsigned version_info_size;
302
303 /* The array of information for the ssa names. */
304 struct version_info *version_info;
305
306 /* The hashtable of loop invariant expressions created
307 by ivopt. */
308 hash_table <iv_inv_expr_hasher> inv_expr_tab;
309
310 /* Loop invariant expression id. */
311 int inv_expr_id;
312
313 /* The bitmap of indices in version_info whose value was changed. */
314 bitmap relevant;
315
316 /* The uses of induction variables. */
317 vec<iv_use_p> iv_uses;
318
319 /* The candidates. */
320 vec<iv_cand_p> iv_candidates;
321
322 /* A bitmap of important candidates. */
323 bitmap important_candidates;
324
325 /* The maximum invariant id. */
326 unsigned max_inv_id;
327
328 /* Whether to consider just related and important candidates when replacing a
329 use. */
330 bool consider_all_candidates;
331
332 /* Are we optimizing for speed? */
333 bool speed;
334
335 /* Whether the loop body includes any function calls. */
336 bool body_includes_call;
337
338 /* Whether the loop body can only be exited via single exit. */
339 bool loop_single_exit_p;
340 };
341
342 /* An assignment of iv candidates to uses. */
343
344 struct iv_ca
345 {
346 /* The number of uses covered by the assignment. */
347 unsigned upto;
348
349 /* Number of uses that cannot be expressed by the candidates in the set. */
350 unsigned bad_uses;
351
352 /* Candidate assigned to a use, together with the related costs. */
353 struct cost_pair **cand_for_use;
354
355 /* Number of times each candidate is used. */
356 unsigned *n_cand_uses;
357
358 /* The candidates used. */
359 bitmap cands;
360
361 /* The number of candidates in the set. */
362 unsigned n_cands;
363
364 /* Total number of registers needed. */
365 unsigned n_regs;
366
367 /* Total cost of expressing uses. */
368 comp_cost cand_use_cost;
369
370 /* Total cost of candidates. */
371 unsigned cand_cost;
372
373 /* Number of times each invariant is used. */
374 unsigned *n_invariant_uses;
375
376 /* The array holding the number of uses of each loop
377 invariant expressions created by ivopt. */
378 unsigned *used_inv_expr;
379
380 /* The number of created loop invariants. */
381 unsigned num_used_inv_expr;
382
383 /* Total cost of the assignment. */
384 comp_cost cost;
385 };
386
387 /* Difference of two iv candidate assignments. */
388
389 struct iv_ca_delta
390 {
391 /* Changed use. */
392 struct iv_use *use;
393
394 /* An old assignment (for rollback purposes). */
395 struct cost_pair *old_cp;
396
397 /* A new assignment. */
398 struct cost_pair *new_cp;
399
400 /* Next change in the list. */
401 struct iv_ca_delta *next_change;
402 };
403
404 /* Bound on number of candidates below that all candidates are considered. */
405
406 #define CONSIDER_ALL_CANDIDATES_BOUND \
407 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
408
409 /* If there are more iv occurrences, we just give up (it is quite unlikely that
410 optimizing such a loop would help, and it would take ages). */
411
412 #define MAX_CONSIDERED_USES \
413 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
414
415 /* If there are at most this number of ivs in the set, try removing unnecessary
416 ivs from the set always. */
417
418 #define ALWAYS_PRUNE_CAND_SET_BOUND \
419 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
420
421 /* The list of trees for that the decl_rtl field must be reset is stored
422 here. */
423
424 static vec<tree> decl_rtl_to_reset;
425
426 static comp_cost force_expr_to_var_cost (tree, bool);
427
428 /* Number of uses recorded in DATA. */
429
430 static inline unsigned
431 n_iv_uses (struct ivopts_data *data)
432 {
433 return data->iv_uses.length ();
434 }
435
436 /* Ith use recorded in DATA. */
437
438 static inline struct iv_use *
439 iv_use (struct ivopts_data *data, unsigned i)
440 {
441 return data->iv_uses[i];
442 }
443
444 /* Number of candidates recorded in DATA. */
445
446 static inline unsigned
447 n_iv_cands (struct ivopts_data *data)
448 {
449 return data->iv_candidates.length ();
450 }
451
452 /* Ith candidate recorded in DATA. */
453
454 static inline struct iv_cand *
455 iv_cand (struct ivopts_data *data, unsigned i)
456 {
457 return data->iv_candidates[i];
458 }
459
460 /* The single loop exit if it dominates the latch, NULL otherwise. */
461
462 edge
463 single_dom_exit (struct loop *loop)
464 {
465 edge exit = single_exit (loop);
466
467 if (!exit)
468 return NULL;
469
470 if (!just_once_each_iteration_p (loop, exit->src))
471 return NULL;
472
473 return exit;
474 }
475
476 /* Dumps information about the induction variable IV to FILE. */
477
478 void
479 dump_iv (FILE *file, struct iv *iv)
480 {
481 if (iv->ssa_name)
482 {
483 fprintf (file, "ssa name ");
484 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
485 fprintf (file, "\n");
486 }
487
488 fprintf (file, " type ");
489 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
490 fprintf (file, "\n");
491
492 if (iv->step)
493 {
494 fprintf (file, " base ");
495 print_generic_expr (file, iv->base, TDF_SLIM);
496 fprintf (file, "\n");
497
498 fprintf (file, " step ");
499 print_generic_expr (file, iv->step, TDF_SLIM);
500 fprintf (file, "\n");
501 }
502 else
503 {
504 fprintf (file, " invariant ");
505 print_generic_expr (file, iv->base, TDF_SLIM);
506 fprintf (file, "\n");
507 }
508
509 if (iv->base_object)
510 {
511 fprintf (file, " base object ");
512 print_generic_expr (file, iv->base_object, TDF_SLIM);
513 fprintf (file, "\n");
514 }
515
516 if (iv->biv_p)
517 fprintf (file, " is a biv\n");
518 }
519
520 /* Dumps information about the USE to FILE. */
521
522 void
523 dump_use (FILE *file, struct iv_use *use)
524 {
525 fprintf (file, "use %d\n", use->id);
526
527 switch (use->type)
528 {
529 case USE_NONLINEAR_EXPR:
530 fprintf (file, " generic\n");
531 break;
532
533 case USE_ADDRESS:
534 fprintf (file, " address\n");
535 break;
536
537 case USE_COMPARE:
538 fprintf (file, " compare\n");
539 break;
540
541 default:
542 gcc_unreachable ();
543 }
544
545 fprintf (file, " in statement ");
546 print_gimple_stmt (file, use->stmt, 0, 0);
547 fprintf (file, "\n");
548
549 fprintf (file, " at position ");
550 if (use->op_p)
551 print_generic_expr (file, *use->op_p, TDF_SLIM);
552 fprintf (file, "\n");
553
554 dump_iv (file, use->iv);
555
556 if (use->related_cands)
557 {
558 fprintf (file, " related candidates ");
559 dump_bitmap (file, use->related_cands);
560 }
561 }
562
563 /* Dumps information about the uses to FILE. */
564
565 void
566 dump_uses (FILE *file, struct ivopts_data *data)
567 {
568 unsigned i;
569 struct iv_use *use;
570
571 for (i = 0; i < n_iv_uses (data); i++)
572 {
573 use = iv_use (data, i);
574
575 dump_use (file, use);
576 fprintf (file, "\n");
577 }
578 }
579
580 /* Dumps information about induction variable candidate CAND to FILE. */
581
582 void
583 dump_cand (FILE *file, struct iv_cand *cand)
584 {
585 struct iv *iv = cand->iv;
586
587 fprintf (file, "candidate %d%s\n",
588 cand->id, cand->important ? " (important)" : "");
589
590 if (cand->depends_on)
591 {
592 fprintf (file, " depends on ");
593 dump_bitmap (file, cand->depends_on);
594 }
595
596 if (!iv)
597 {
598 fprintf (file, " final value replacement\n");
599 return;
600 }
601
602 if (cand->var_before)
603 {
604 fprintf (file, " var_before ");
605 print_generic_expr (file, cand->var_before, TDF_SLIM);
606 fprintf (file, "\n");
607 }
608 if (cand->var_after)
609 {
610 fprintf (file, " var_after ");
611 print_generic_expr (file, cand->var_after, TDF_SLIM);
612 fprintf (file, "\n");
613 }
614
615 switch (cand->pos)
616 {
617 case IP_NORMAL:
618 fprintf (file, " incremented before exit test\n");
619 break;
620
621 case IP_BEFORE_USE:
622 fprintf (file, " incremented before use %d\n", cand->ainc_use->id);
623 break;
624
625 case IP_AFTER_USE:
626 fprintf (file, " incremented after use %d\n", cand->ainc_use->id);
627 break;
628
629 case IP_END:
630 fprintf (file, " incremented at end\n");
631 break;
632
633 case IP_ORIGINAL:
634 fprintf (file, " original biv\n");
635 break;
636 }
637
638 dump_iv (file, iv);
639 }
640
641 /* Returns the info for ssa version VER. */
642
643 static inline struct version_info *
644 ver_info (struct ivopts_data *data, unsigned ver)
645 {
646 return data->version_info + ver;
647 }
648
649 /* Returns the info for ssa name NAME. */
650
651 static inline struct version_info *
652 name_info (struct ivopts_data *data, tree name)
653 {
654 return ver_info (data, SSA_NAME_VERSION (name));
655 }
656
657 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
658 emitted in LOOP. */
659
660 static bool
661 stmt_after_ip_normal_pos (struct loop *loop, gimple stmt)
662 {
663 basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
664
665 gcc_assert (bb);
666
667 if (sbb == loop->latch)
668 return true;
669
670 if (sbb != bb)
671 return false;
672
673 return stmt == last_stmt (bb);
674 }
675
676 /* Returns true if STMT if after the place where the original induction
677 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
678 if the positions are identical. */
679
680 static bool
681 stmt_after_inc_pos (struct iv_cand *cand, gimple stmt, bool true_if_equal)
682 {
683 basic_block cand_bb = gimple_bb (cand->incremented_at);
684 basic_block stmt_bb = gimple_bb (stmt);
685
686 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
687 return false;
688
689 if (stmt_bb != cand_bb)
690 return true;
691
692 if (true_if_equal
693 && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
694 return true;
695 return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
696 }
697
698 /* Returns true if STMT if after the place where the induction variable
699 CAND is incremented in LOOP. */
700
701 static bool
702 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple stmt)
703 {
704 switch (cand->pos)
705 {
706 case IP_END:
707 return false;
708
709 case IP_NORMAL:
710 return stmt_after_ip_normal_pos (loop, stmt);
711
712 case IP_ORIGINAL:
713 case IP_AFTER_USE:
714 return stmt_after_inc_pos (cand, stmt, false);
715
716 case IP_BEFORE_USE:
717 return stmt_after_inc_pos (cand, stmt, true);
718
719 default:
720 gcc_unreachable ();
721 }
722 }
723
724 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
725
726 static bool
727 abnormal_ssa_name_p (tree exp)
728 {
729 if (!exp)
730 return false;
731
732 if (TREE_CODE (exp) != SSA_NAME)
733 return false;
734
735 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
736 }
737
738 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
739 abnormal phi node. Callback for for_each_index. */
740
741 static bool
742 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
743 void *data ATTRIBUTE_UNUSED)
744 {
745 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
746 {
747 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
748 return false;
749 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
750 return false;
751 }
752
753 return !abnormal_ssa_name_p (*index);
754 }
755
756 /* Returns true if EXPR contains a ssa name that occurs in an
757 abnormal phi node. */
758
759 bool
760 contains_abnormal_ssa_name_p (tree expr)
761 {
762 enum tree_code code;
763 enum tree_code_class codeclass;
764
765 if (!expr)
766 return false;
767
768 code = TREE_CODE (expr);
769 codeclass = TREE_CODE_CLASS (code);
770
771 if (code == SSA_NAME)
772 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
773
774 if (code == INTEGER_CST
775 || is_gimple_min_invariant (expr))
776 return false;
777
778 if (code == ADDR_EXPR)
779 return !for_each_index (&TREE_OPERAND (expr, 0),
780 idx_contains_abnormal_ssa_name_p,
781 NULL);
782
783 if (code == COND_EXPR)
784 return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
785 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
786 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
787
788 switch (codeclass)
789 {
790 case tcc_binary:
791 case tcc_comparison:
792 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
793 return true;
794
795 /* Fallthru. */
796 case tcc_unary:
797 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
798 return true;
799
800 break;
801
802 default:
803 gcc_unreachable ();
804 }
805
806 return false;
807 }
808
809 /* Returns the structure describing number of iterations determined from
810 EXIT of DATA->current_loop, or NULL if something goes wrong. */
811
812 static struct tree_niter_desc *
813 niter_for_exit (struct ivopts_data *data, edge exit)
814 {
815 struct tree_niter_desc *desc;
816 void **slot;
817
818 if (!data->niters)
819 {
820 data->niters = pointer_map_create ();
821 slot = NULL;
822 }
823 else
824 slot = pointer_map_contains (data->niters, exit);
825
826 if (!slot)
827 {
828 /* Try to determine number of iterations. We cannot safely work with ssa
829 names that appear in phi nodes on abnormal edges, so that we do not
830 create overlapping life ranges for them (PR 27283). */
831 desc = XNEW (struct tree_niter_desc);
832 if (!number_of_iterations_exit (data->current_loop,
833 exit, desc, true)
834 || contains_abnormal_ssa_name_p (desc->niter))
835 {
836 XDELETE (desc);
837 desc = NULL;
838 }
839 slot = pointer_map_insert (data->niters, exit);
840 *slot = desc;
841 }
842 else
843 desc = (struct tree_niter_desc *) *slot;
844
845 return desc;
846 }
847
848 /* Returns the structure describing number of iterations determined from
849 single dominating exit of DATA->current_loop, or NULL if something
850 goes wrong. */
851
852 static struct tree_niter_desc *
853 niter_for_single_dom_exit (struct ivopts_data *data)
854 {
855 edge exit = single_dom_exit (data->current_loop);
856
857 if (!exit)
858 return NULL;
859
860 return niter_for_exit (data, exit);
861 }
862
863 /* Initializes data structures used by the iv optimization pass, stored
864 in DATA. */
865
866 static void
867 tree_ssa_iv_optimize_init (struct ivopts_data *data)
868 {
869 data->version_info_size = 2 * num_ssa_names;
870 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
871 data->relevant = BITMAP_ALLOC (NULL);
872 data->important_candidates = BITMAP_ALLOC (NULL);
873 data->max_inv_id = 0;
874 data->niters = NULL;
875 data->iv_uses.create (20);
876 data->iv_candidates.create (20);
877 data->inv_expr_tab.create (10);
878 data->inv_expr_id = 0;
879 decl_rtl_to_reset.create (20);
880 }
881
882 /* Returns a memory object to that EXPR points. In case we are able to
883 determine that it does not point to any such object, NULL is returned. */
884
885 static tree
886 determine_base_object (tree expr)
887 {
888 enum tree_code code = TREE_CODE (expr);
889 tree base, obj;
890
891 /* If this is a pointer casted to any type, we need to determine
892 the base object for the pointer; so handle conversions before
893 throwing away non-pointer expressions. */
894 if (CONVERT_EXPR_P (expr))
895 return determine_base_object (TREE_OPERAND (expr, 0));
896
897 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
898 return NULL_TREE;
899
900 switch (code)
901 {
902 case INTEGER_CST:
903 return NULL_TREE;
904
905 case ADDR_EXPR:
906 obj = TREE_OPERAND (expr, 0);
907 base = get_base_address (obj);
908
909 if (!base)
910 return expr;
911
912 if (TREE_CODE (base) == MEM_REF)
913 return determine_base_object (TREE_OPERAND (base, 0));
914
915 return fold_convert (ptr_type_node,
916 build_fold_addr_expr (base));
917
918 case POINTER_PLUS_EXPR:
919 return determine_base_object (TREE_OPERAND (expr, 0));
920
921 case PLUS_EXPR:
922 case MINUS_EXPR:
923 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
924 gcc_unreachable ();
925
926 default:
927 return fold_convert (ptr_type_node, expr);
928 }
929 }
930
931 /* Allocates an induction variable with given initial value BASE and step STEP
932 for loop LOOP. */
933
934 static struct iv *
935 alloc_iv (tree base, tree step)
936 {
937 tree base_object = base;
938 struct iv *iv = XCNEW (struct iv);
939 gcc_assert (step != NULL_TREE);
940
941 /* Lower all address expressions except ones with DECL_P as operand.
942 By doing this:
943 1) More accurate cost can be computed for address expressions;
944 2) Duplicate candidates won't be created for bases in different
945 forms, like &a[0] and &a. */
946 STRIP_NOPS (base_object);
947 if (TREE_CODE (base_object) == ADDR_EXPR
948 && !DECL_P (TREE_OPERAND (base_object, 0)))
949 {
950 aff_tree comb;
951 widest_int size;
952 base_object = get_inner_reference_aff (TREE_OPERAND (base_object, 0),
953 &comb, &size);
954 gcc_assert (base_object != NULL_TREE);
955 base_object = build_fold_addr_expr (base_object);
956 base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
957 }
958
959 iv->base = base;
960 iv->base_object = determine_base_object (base_object);
961 iv->step = step;
962 iv->biv_p = false;
963 iv->have_use_for = false;
964 iv->use_id = 0;
965 iv->ssa_name = NULL_TREE;
966
967 return iv;
968 }
969
970 /* Sets STEP and BASE for induction variable IV. */
971
972 static void
973 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
974 {
975 struct version_info *info = name_info (data, iv);
976
977 gcc_assert (!info->iv);
978
979 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
980 info->iv = alloc_iv (base, step);
981 info->iv->ssa_name = iv;
982 }
983
984 /* Finds induction variable declaration for VAR. */
985
986 static struct iv *
987 get_iv (struct ivopts_data *data, tree var)
988 {
989 basic_block bb;
990 tree type = TREE_TYPE (var);
991
992 if (!POINTER_TYPE_P (type)
993 && !INTEGRAL_TYPE_P (type))
994 return NULL;
995
996 if (!name_info (data, var)->iv)
997 {
998 bb = gimple_bb (SSA_NAME_DEF_STMT (var));
999
1000 if (!bb
1001 || !flow_bb_inside_loop_p (data->current_loop, bb))
1002 set_iv (data, var, var, build_int_cst (type, 0));
1003 }
1004
1005 return name_info (data, var)->iv;
1006 }
1007
1008 /* Determines the step of a biv defined in PHI. Returns NULL if PHI does
1009 not define a simple affine biv with nonzero step. */
1010
1011 static tree
1012 determine_biv_step (gimple phi)
1013 {
1014 struct loop *loop = gimple_bb (phi)->loop_father;
1015 tree name = PHI_RESULT (phi);
1016 affine_iv iv;
1017
1018 if (virtual_operand_p (name))
1019 return NULL_TREE;
1020
1021 if (!simple_iv (loop, loop, name, &iv, true))
1022 return NULL_TREE;
1023
1024 return integer_zerop (iv.step) ? NULL_TREE : iv.step;
1025 }
1026
1027 /* Finds basic ivs. */
1028
1029 static bool
1030 find_bivs (struct ivopts_data *data)
1031 {
1032 gimple phi;
1033 tree step, type, base;
1034 bool found = false;
1035 struct loop *loop = data->current_loop;
1036 gimple_stmt_iterator psi;
1037
1038 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1039 {
1040 phi = gsi_stmt (psi);
1041
1042 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1043 continue;
1044
1045 step = determine_biv_step (phi);
1046 if (!step)
1047 continue;
1048
1049 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1050 base = expand_simple_operations (base);
1051 if (contains_abnormal_ssa_name_p (base)
1052 || contains_abnormal_ssa_name_p (step))
1053 continue;
1054
1055 type = TREE_TYPE (PHI_RESULT (phi));
1056 base = fold_convert (type, base);
1057 if (step)
1058 {
1059 if (POINTER_TYPE_P (type))
1060 step = convert_to_ptrofftype (step);
1061 else
1062 step = fold_convert (type, step);
1063 }
1064
1065 set_iv (data, PHI_RESULT (phi), base, step);
1066 found = true;
1067 }
1068
1069 return found;
1070 }
1071
1072 /* Marks basic ivs. */
1073
1074 static void
1075 mark_bivs (struct ivopts_data *data)
1076 {
1077 gimple phi;
1078 tree var;
1079 struct iv *iv, *incr_iv;
1080 struct loop *loop = data->current_loop;
1081 basic_block incr_bb;
1082 gimple_stmt_iterator psi;
1083
1084 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1085 {
1086 phi = gsi_stmt (psi);
1087
1088 iv = get_iv (data, PHI_RESULT (phi));
1089 if (!iv)
1090 continue;
1091
1092 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1093 incr_iv = get_iv (data, var);
1094 if (!incr_iv)
1095 continue;
1096
1097 /* If the increment is in the subloop, ignore it. */
1098 incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1099 if (incr_bb->loop_father != data->current_loop
1100 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1101 continue;
1102
1103 iv->biv_p = true;
1104 incr_iv->biv_p = true;
1105 }
1106 }
1107
1108 /* Checks whether STMT defines a linear induction variable and stores its
1109 parameters to IV. */
1110
1111 static bool
1112 find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv)
1113 {
1114 tree lhs;
1115 struct loop *loop = data->current_loop;
1116
1117 iv->base = NULL_TREE;
1118 iv->step = NULL_TREE;
1119
1120 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1121 return false;
1122
1123 lhs = gimple_assign_lhs (stmt);
1124 if (TREE_CODE (lhs) != SSA_NAME)
1125 return false;
1126
1127 if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1128 return false;
1129 iv->base = expand_simple_operations (iv->base);
1130
1131 if (contains_abnormal_ssa_name_p (iv->base)
1132 || contains_abnormal_ssa_name_p (iv->step))
1133 return false;
1134
1135 /* If STMT could throw, then do not consider STMT as defining a GIV.
1136 While this will suppress optimizations, we can not safely delete this
1137 GIV and associated statements, even if it appears it is not used. */
1138 if (stmt_could_throw_p (stmt))
1139 return false;
1140
1141 return true;
1142 }
1143
1144 /* Finds general ivs in statement STMT. */
1145
1146 static void
1147 find_givs_in_stmt (struct ivopts_data *data, gimple stmt)
1148 {
1149 affine_iv iv;
1150
1151 if (!find_givs_in_stmt_scev (data, stmt, &iv))
1152 return;
1153
1154 set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step);
1155 }
1156
1157 /* Finds general ivs in basic block BB. */
1158
1159 static void
1160 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1161 {
1162 gimple_stmt_iterator bsi;
1163
1164 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1165 find_givs_in_stmt (data, gsi_stmt (bsi));
1166 }
1167
1168 /* Finds general ivs. */
1169
1170 static void
1171 find_givs (struct ivopts_data *data)
1172 {
1173 struct loop *loop = data->current_loop;
1174 basic_block *body = get_loop_body_in_dom_order (loop);
1175 unsigned i;
1176
1177 for (i = 0; i < loop->num_nodes; i++)
1178 find_givs_in_bb (data, body[i]);
1179 free (body);
1180 }
1181
1182 /* For each ssa name defined in LOOP determines whether it is an induction
1183 variable and if so, its initial value and step. */
1184
1185 static bool
1186 find_induction_variables (struct ivopts_data *data)
1187 {
1188 unsigned i;
1189 bitmap_iterator bi;
1190
1191 if (!find_bivs (data))
1192 return false;
1193
1194 find_givs (data);
1195 mark_bivs (data);
1196
1197 if (dump_file && (dump_flags & TDF_DETAILS))
1198 {
1199 struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
1200
1201 if (niter)
1202 {
1203 fprintf (dump_file, " number of iterations ");
1204 print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1205 if (!integer_zerop (niter->may_be_zero))
1206 {
1207 fprintf (dump_file, "; zero if ");
1208 print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1209 }
1210 fprintf (dump_file, "\n\n");
1211 };
1212
1213 fprintf (dump_file, "Induction variables:\n\n");
1214
1215 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1216 {
1217 if (ver_info (data, i)->iv)
1218 dump_iv (dump_file, ver_info (data, i)->iv);
1219 }
1220 }
1221
1222 return true;
1223 }
1224
1225 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1226
1227 static struct iv_use *
1228 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1229 gimple stmt, enum use_type use_type)
1230 {
1231 struct iv_use *use = XCNEW (struct iv_use);
1232
1233 use->id = n_iv_uses (data);
1234 use->type = use_type;
1235 use->iv = iv;
1236 use->stmt = stmt;
1237 use->op_p = use_p;
1238 use->related_cands = BITMAP_ALLOC (NULL);
1239
1240 /* To avoid showing ssa name in the dumps, if it was not reset by the
1241 caller. */
1242 iv->ssa_name = NULL_TREE;
1243
1244 if (dump_file && (dump_flags & TDF_DETAILS))
1245 dump_use (dump_file, use);
1246
1247 data->iv_uses.safe_push (use);
1248
1249 return use;
1250 }
1251
1252 /* Checks whether OP is a loop-level invariant and if so, records it.
1253 NONLINEAR_USE is true if the invariant is used in a way we do not
1254 handle specially. */
1255
1256 static void
1257 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1258 {
1259 basic_block bb;
1260 struct version_info *info;
1261
1262 if (TREE_CODE (op) != SSA_NAME
1263 || virtual_operand_p (op))
1264 return;
1265
1266 bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1267 if (bb
1268 && flow_bb_inside_loop_p (data->current_loop, bb))
1269 return;
1270
1271 info = name_info (data, op);
1272 info->name = op;
1273 info->has_nonlin_use |= nonlinear_use;
1274 if (!info->inv_id)
1275 info->inv_id = ++data->max_inv_id;
1276 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1277 }
1278
1279 /* Checks whether the use OP is interesting and if so, records it. */
1280
1281 static struct iv_use *
1282 find_interesting_uses_op (struct ivopts_data *data, tree op)
1283 {
1284 struct iv *iv;
1285 struct iv *civ;
1286 gimple stmt;
1287 struct iv_use *use;
1288
1289 if (TREE_CODE (op) != SSA_NAME)
1290 return NULL;
1291
1292 iv = get_iv (data, op);
1293 if (!iv)
1294 return NULL;
1295
1296 if (iv->have_use_for)
1297 {
1298 use = iv_use (data, iv->use_id);
1299
1300 gcc_assert (use->type == USE_NONLINEAR_EXPR);
1301 return use;
1302 }
1303
1304 if (integer_zerop (iv->step))
1305 {
1306 record_invariant (data, op, true);
1307 return NULL;
1308 }
1309 iv->have_use_for = true;
1310
1311 civ = XNEW (struct iv);
1312 *civ = *iv;
1313
1314 stmt = SSA_NAME_DEF_STMT (op);
1315 gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1316 || is_gimple_assign (stmt));
1317
1318 use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1319 iv->use_id = use->id;
1320
1321 return use;
1322 }
1323
1324 /* Given a condition in statement STMT, checks whether it is a compare
1325 of an induction variable and an invariant. If this is the case,
1326 CONTROL_VAR is set to location of the iv, BOUND to the location of
1327 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1328 induction variable descriptions, and true is returned. If this is not
1329 the case, CONTROL_VAR and BOUND are set to the arguments of the
1330 condition and false is returned. */
1331
1332 static bool
1333 extract_cond_operands (struct ivopts_data *data, gimple stmt,
1334 tree **control_var, tree **bound,
1335 struct iv **iv_var, struct iv **iv_bound)
1336 {
1337 /* The objects returned when COND has constant operands. */
1338 static struct iv const_iv;
1339 static tree zero;
1340 tree *op0 = &zero, *op1 = &zero, *tmp_op;
1341 struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1342 bool ret = false;
1343
1344 if (gimple_code (stmt) == GIMPLE_COND)
1345 {
1346 op0 = gimple_cond_lhs_ptr (stmt);
1347 op1 = gimple_cond_rhs_ptr (stmt);
1348 }
1349 else
1350 {
1351 op0 = gimple_assign_rhs1_ptr (stmt);
1352 op1 = gimple_assign_rhs2_ptr (stmt);
1353 }
1354
1355 zero = integer_zero_node;
1356 const_iv.step = integer_zero_node;
1357
1358 if (TREE_CODE (*op0) == SSA_NAME)
1359 iv0 = get_iv (data, *op0);
1360 if (TREE_CODE (*op1) == SSA_NAME)
1361 iv1 = get_iv (data, *op1);
1362
1363 /* Exactly one of the compared values must be an iv, and the other one must
1364 be an invariant. */
1365 if (!iv0 || !iv1)
1366 goto end;
1367
1368 if (integer_zerop (iv0->step))
1369 {
1370 /* Control variable may be on the other side. */
1371 tmp_op = op0; op0 = op1; op1 = tmp_op;
1372 tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1373 }
1374 ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1375
1376 end:
1377 if (control_var)
1378 *control_var = op0;;
1379 if (iv_var)
1380 *iv_var = iv0;;
1381 if (bound)
1382 *bound = op1;
1383 if (iv_bound)
1384 *iv_bound = iv1;
1385
1386 return ret;
1387 }
1388
1389 /* Checks whether the condition in STMT is interesting and if so,
1390 records it. */
1391
1392 static void
1393 find_interesting_uses_cond (struct ivopts_data *data, gimple stmt)
1394 {
1395 tree *var_p, *bound_p;
1396 struct iv *var_iv, *civ;
1397
1398 if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1399 {
1400 find_interesting_uses_op (data, *var_p);
1401 find_interesting_uses_op (data, *bound_p);
1402 return;
1403 }
1404
1405 civ = XNEW (struct iv);
1406 *civ = *var_iv;
1407 record_use (data, NULL, civ, stmt, USE_COMPARE);
1408 }
1409
1410 /* Returns the outermost loop EXPR is obviously invariant in
1411 relative to the loop LOOP, i.e. if all its operands are defined
1412 outside of the returned loop. Returns NULL if EXPR is not
1413 even obviously invariant in LOOP. */
1414
1415 struct loop *
1416 outermost_invariant_loop_for_expr (struct loop *loop, tree expr)
1417 {
1418 basic_block def_bb;
1419 unsigned i, len;
1420
1421 if (is_gimple_min_invariant (expr))
1422 return current_loops->tree_root;
1423
1424 if (TREE_CODE (expr) == SSA_NAME)
1425 {
1426 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1427 if (def_bb)
1428 {
1429 if (flow_bb_inside_loop_p (loop, def_bb))
1430 return NULL;
1431 return superloop_at_depth (loop,
1432 loop_depth (def_bb->loop_father) + 1);
1433 }
1434
1435 return current_loops->tree_root;
1436 }
1437
1438 if (!EXPR_P (expr))
1439 return NULL;
1440
1441 unsigned maxdepth = 0;
1442 len = TREE_OPERAND_LENGTH (expr);
1443 for (i = 0; i < len; i++)
1444 {
1445 struct loop *ivloop;
1446 if (!TREE_OPERAND (expr, i))
1447 continue;
1448
1449 ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1450 if (!ivloop)
1451 return NULL;
1452 maxdepth = MAX (maxdepth, loop_depth (ivloop));
1453 }
1454
1455 return superloop_at_depth (loop, maxdepth);
1456 }
1457
1458 /* Returns true if expression EXPR is obviously invariant in LOOP,
1459 i.e. if all its operands are defined outside of the LOOP. LOOP
1460 should not be the function body. */
1461
1462 bool
1463 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1464 {
1465 basic_block def_bb;
1466 unsigned i, len;
1467
1468 gcc_assert (loop_depth (loop) > 0);
1469
1470 if (is_gimple_min_invariant (expr))
1471 return true;
1472
1473 if (TREE_CODE (expr) == SSA_NAME)
1474 {
1475 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1476 if (def_bb
1477 && flow_bb_inside_loop_p (loop, def_bb))
1478 return false;
1479
1480 return true;
1481 }
1482
1483 if (!EXPR_P (expr))
1484 return false;
1485
1486 len = TREE_OPERAND_LENGTH (expr);
1487 for (i = 0; i < len; i++)
1488 if (TREE_OPERAND (expr, i)
1489 && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1490 return false;
1491
1492 return true;
1493 }
1494
1495 /* Cumulates the steps of indices into DATA and replaces their values with the
1496 initial ones. Returns false when the value of the index cannot be determined.
1497 Callback for for_each_index. */
1498
1499 struct ifs_ivopts_data
1500 {
1501 struct ivopts_data *ivopts_data;
1502 gimple stmt;
1503 tree step;
1504 };
1505
1506 static bool
1507 idx_find_step (tree base, tree *idx, void *data)
1508 {
1509 struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1510 struct iv *iv;
1511 tree step, iv_base, iv_step, lbound, off;
1512 struct loop *loop = dta->ivopts_data->current_loop;
1513
1514 /* If base is a component ref, require that the offset of the reference
1515 be invariant. */
1516 if (TREE_CODE (base) == COMPONENT_REF)
1517 {
1518 off = component_ref_field_offset (base);
1519 return expr_invariant_in_loop_p (loop, off);
1520 }
1521
1522 /* If base is array, first check whether we will be able to move the
1523 reference out of the loop (in order to take its address in strength
1524 reduction). In order for this to work we need both lower bound
1525 and step to be loop invariants. */
1526 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1527 {
1528 /* Moreover, for a range, the size needs to be invariant as well. */
1529 if (TREE_CODE (base) == ARRAY_RANGE_REF
1530 && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1531 return false;
1532
1533 step = array_ref_element_size (base);
1534 lbound = array_ref_low_bound (base);
1535
1536 if (!expr_invariant_in_loop_p (loop, step)
1537 || !expr_invariant_in_loop_p (loop, lbound))
1538 return false;
1539 }
1540
1541 if (TREE_CODE (*idx) != SSA_NAME)
1542 return true;
1543
1544 iv = get_iv (dta->ivopts_data, *idx);
1545 if (!iv)
1546 return false;
1547
1548 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1549 *&x[0], which is not folded and does not trigger the
1550 ARRAY_REF path below. */
1551 *idx = iv->base;
1552
1553 if (integer_zerop (iv->step))
1554 return true;
1555
1556 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1557 {
1558 step = array_ref_element_size (base);
1559
1560 /* We only handle addresses whose step is an integer constant. */
1561 if (TREE_CODE (step) != INTEGER_CST)
1562 return false;
1563 }
1564 else
1565 /* The step for pointer arithmetics already is 1 byte. */
1566 step = size_one_node;
1567
1568 iv_base = iv->base;
1569 iv_step = iv->step;
1570 if (!convert_affine_scev (dta->ivopts_data->current_loop,
1571 sizetype, &iv_base, &iv_step, dta->stmt,
1572 false))
1573 {
1574 /* The index might wrap. */
1575 return false;
1576 }
1577
1578 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1579 dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1580
1581 return true;
1582 }
1583
1584 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1585 object is passed to it in DATA. */
1586
1587 static bool
1588 idx_record_use (tree base, tree *idx,
1589 void *vdata)
1590 {
1591 struct ivopts_data *data = (struct ivopts_data *) vdata;
1592 find_interesting_uses_op (data, *idx);
1593 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1594 {
1595 find_interesting_uses_op (data, array_ref_element_size (base));
1596 find_interesting_uses_op (data, array_ref_low_bound (base));
1597 }
1598 return true;
1599 }
1600
1601 /* If we can prove that TOP = cst * BOT for some constant cst,
1602 store cst to MUL and return true. Otherwise return false.
1603 The returned value is always sign-extended, regardless of the
1604 signedness of TOP and BOT. */
1605
1606 static bool
1607 constant_multiple_of (tree top, tree bot, widest_int *mul)
1608 {
1609 tree mby;
1610 enum tree_code code;
1611 unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1612 widest_int res, p0, p1;
1613
1614 STRIP_NOPS (top);
1615 STRIP_NOPS (bot);
1616
1617 if (operand_equal_p (top, bot, 0))
1618 {
1619 *mul = 1;
1620 return true;
1621 }
1622
1623 code = TREE_CODE (top);
1624 switch (code)
1625 {
1626 case MULT_EXPR:
1627 mby = TREE_OPERAND (top, 1);
1628 if (TREE_CODE (mby) != INTEGER_CST)
1629 return false;
1630
1631 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1632 return false;
1633
1634 *mul = wi::sext (res * wi::to_widest (mby), precision);
1635 return true;
1636
1637 case PLUS_EXPR:
1638 case MINUS_EXPR:
1639 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1640 || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1641 return false;
1642
1643 if (code == MINUS_EXPR)
1644 p1 = -p1;
1645 *mul = wi::sext (p0 + p1, precision);
1646 return true;
1647
1648 case INTEGER_CST:
1649 if (TREE_CODE (bot) != INTEGER_CST)
1650 return false;
1651
1652 p0 = widest_int::from (top, SIGNED);
1653 p1 = widest_int::from (bot, SIGNED);
1654 if (p1 == 0)
1655 return false;
1656 *mul = wi::sext (wi::divmod_trunc (p0, p1, SIGNED, &res), precision);
1657 return res == 0;
1658
1659 default:
1660 return false;
1661 }
1662 }
1663
1664 /* Returns true if memory reference REF with step STEP may be unaligned. */
1665
1666 static bool
1667 may_be_unaligned_p (tree ref, tree step)
1668 {
1669 tree base;
1670 tree base_type;
1671 HOST_WIDE_INT bitsize;
1672 HOST_WIDE_INT bitpos;
1673 tree toffset;
1674 enum machine_mode mode;
1675 int unsignedp, volatilep;
1676 unsigned base_align;
1677
1678 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1679 thus they are not misaligned. */
1680 if (TREE_CODE (ref) == TARGET_MEM_REF)
1681 return false;
1682
1683 /* The test below is basically copy of what expr.c:normal_inner_ref
1684 does to check whether the object must be loaded by parts when
1685 STRICT_ALIGNMENT is true. */
1686 base = get_inner_reference (ref, &bitsize, &bitpos, &toffset, &mode,
1687 &unsignedp, &volatilep, true);
1688 base_type = TREE_TYPE (base);
1689 base_align = get_object_alignment (base);
1690 base_align = MAX (base_align, TYPE_ALIGN (base_type));
1691
1692 if (mode != BLKmode)
1693 {
1694 unsigned mode_align = GET_MODE_ALIGNMENT (mode);
1695
1696 if (base_align < mode_align
1697 || (bitpos % mode_align) != 0
1698 || (bitpos % BITS_PER_UNIT) != 0)
1699 return true;
1700
1701 if (toffset
1702 && (highest_pow2_factor (toffset) * BITS_PER_UNIT) < mode_align)
1703 return true;
1704
1705 if ((highest_pow2_factor (step) * BITS_PER_UNIT) < mode_align)
1706 return true;
1707 }
1708
1709 return false;
1710 }
1711
1712 /* Return true if EXPR may be non-addressable. */
1713
1714 bool
1715 may_be_nonaddressable_p (tree expr)
1716 {
1717 switch (TREE_CODE (expr))
1718 {
1719 case TARGET_MEM_REF:
1720 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1721 target, thus they are always addressable. */
1722 return false;
1723
1724 case COMPONENT_REF:
1725 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1726 || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1727
1728 case VIEW_CONVERT_EXPR:
1729 /* This kind of view-conversions may wrap non-addressable objects
1730 and make them look addressable. After some processing the
1731 non-addressability may be uncovered again, causing ADDR_EXPRs
1732 of inappropriate objects to be built. */
1733 if (is_gimple_reg (TREE_OPERAND (expr, 0))
1734 || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
1735 return true;
1736
1737 /* ... fall through ... */
1738
1739 case ARRAY_REF:
1740 case ARRAY_RANGE_REF:
1741 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1742
1743 CASE_CONVERT:
1744 return true;
1745
1746 default:
1747 break;
1748 }
1749
1750 return false;
1751 }
1752
1753 /* Finds addresses in *OP_P inside STMT. */
1754
1755 static void
1756 find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p)
1757 {
1758 tree base = *op_p, step = size_zero_node;
1759 struct iv *civ;
1760 struct ifs_ivopts_data ifs_ivopts_data;
1761
1762 /* Do not play with volatile memory references. A bit too conservative,
1763 perhaps, but safe. */
1764 if (gimple_has_volatile_ops (stmt))
1765 goto fail;
1766
1767 /* Ignore bitfields for now. Not really something terribly complicated
1768 to handle. TODO. */
1769 if (TREE_CODE (base) == BIT_FIELD_REF)
1770 goto fail;
1771
1772 base = unshare_expr (base);
1773
1774 if (TREE_CODE (base) == TARGET_MEM_REF)
1775 {
1776 tree type = build_pointer_type (TREE_TYPE (base));
1777 tree astep;
1778
1779 if (TMR_BASE (base)
1780 && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1781 {
1782 civ = get_iv (data, TMR_BASE (base));
1783 if (!civ)
1784 goto fail;
1785
1786 TMR_BASE (base) = civ->base;
1787 step = civ->step;
1788 }
1789 if (TMR_INDEX2 (base)
1790 && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
1791 {
1792 civ = get_iv (data, TMR_INDEX2 (base));
1793 if (!civ)
1794 goto fail;
1795
1796 TMR_INDEX2 (base) = civ->base;
1797 step = civ->step;
1798 }
1799 if (TMR_INDEX (base)
1800 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1801 {
1802 civ = get_iv (data, TMR_INDEX (base));
1803 if (!civ)
1804 goto fail;
1805
1806 TMR_INDEX (base) = civ->base;
1807 astep = civ->step;
1808
1809 if (astep)
1810 {
1811 if (TMR_STEP (base))
1812 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1813
1814 step = fold_build2 (PLUS_EXPR, type, step, astep);
1815 }
1816 }
1817
1818 if (integer_zerop (step))
1819 goto fail;
1820 base = tree_mem_ref_addr (type, base);
1821 }
1822 else
1823 {
1824 ifs_ivopts_data.ivopts_data = data;
1825 ifs_ivopts_data.stmt = stmt;
1826 ifs_ivopts_data.step = size_zero_node;
1827 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1828 || integer_zerop (ifs_ivopts_data.step))
1829 goto fail;
1830 step = ifs_ivopts_data.step;
1831
1832 /* Check that the base expression is addressable. This needs
1833 to be done after substituting bases of IVs into it. */
1834 if (may_be_nonaddressable_p (base))
1835 goto fail;
1836
1837 /* Moreover, on strict alignment platforms, check that it is
1838 sufficiently aligned. */
1839 if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
1840 goto fail;
1841
1842 base = build_fold_addr_expr (base);
1843
1844 /* Substituting bases of IVs into the base expression might
1845 have caused folding opportunities. */
1846 if (TREE_CODE (base) == ADDR_EXPR)
1847 {
1848 tree *ref = &TREE_OPERAND (base, 0);
1849 while (handled_component_p (*ref))
1850 ref = &TREE_OPERAND (*ref, 0);
1851 if (TREE_CODE (*ref) == MEM_REF)
1852 {
1853 tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
1854 TREE_OPERAND (*ref, 0),
1855 TREE_OPERAND (*ref, 1));
1856 if (tem)
1857 *ref = tem;
1858 }
1859 }
1860 }
1861
1862 civ = alloc_iv (base, step);
1863 record_use (data, op_p, civ, stmt, USE_ADDRESS);
1864 return;
1865
1866 fail:
1867 for_each_index (op_p, idx_record_use, data);
1868 }
1869
1870 /* Finds and records invariants used in STMT. */
1871
1872 static void
1873 find_invariants_stmt (struct ivopts_data *data, gimple stmt)
1874 {
1875 ssa_op_iter iter;
1876 use_operand_p use_p;
1877 tree op;
1878
1879 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1880 {
1881 op = USE_FROM_PTR (use_p);
1882 record_invariant (data, op, false);
1883 }
1884 }
1885
1886 /* Finds interesting uses of induction variables in the statement STMT. */
1887
1888 static void
1889 find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt)
1890 {
1891 struct iv *iv;
1892 tree op, *lhs, *rhs;
1893 ssa_op_iter iter;
1894 use_operand_p use_p;
1895 enum tree_code code;
1896
1897 find_invariants_stmt (data, stmt);
1898
1899 if (gimple_code (stmt) == GIMPLE_COND)
1900 {
1901 find_interesting_uses_cond (data, stmt);
1902 return;
1903 }
1904
1905 if (is_gimple_assign (stmt))
1906 {
1907 lhs = gimple_assign_lhs_ptr (stmt);
1908 rhs = gimple_assign_rhs1_ptr (stmt);
1909
1910 if (TREE_CODE (*lhs) == SSA_NAME)
1911 {
1912 /* If the statement defines an induction variable, the uses are not
1913 interesting by themselves. */
1914
1915 iv = get_iv (data, *lhs);
1916
1917 if (iv && !integer_zerop (iv->step))
1918 return;
1919 }
1920
1921 code = gimple_assign_rhs_code (stmt);
1922 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
1923 && (REFERENCE_CLASS_P (*rhs)
1924 || is_gimple_val (*rhs)))
1925 {
1926 if (REFERENCE_CLASS_P (*rhs))
1927 find_interesting_uses_address (data, stmt, rhs);
1928 else
1929 find_interesting_uses_op (data, *rhs);
1930
1931 if (REFERENCE_CLASS_P (*lhs))
1932 find_interesting_uses_address (data, stmt, lhs);
1933 return;
1934 }
1935 else if (TREE_CODE_CLASS (code) == tcc_comparison)
1936 {
1937 find_interesting_uses_cond (data, stmt);
1938 return;
1939 }
1940
1941 /* TODO -- we should also handle address uses of type
1942
1943 memory = call (whatever);
1944
1945 and
1946
1947 call (memory). */
1948 }
1949
1950 if (gimple_code (stmt) == GIMPLE_PHI
1951 && gimple_bb (stmt) == data->current_loop->header)
1952 {
1953 iv = get_iv (data, PHI_RESULT (stmt));
1954
1955 if (iv && !integer_zerop (iv->step))
1956 return;
1957 }
1958
1959 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1960 {
1961 op = USE_FROM_PTR (use_p);
1962
1963 if (TREE_CODE (op) != SSA_NAME)
1964 continue;
1965
1966 iv = get_iv (data, op);
1967 if (!iv)
1968 continue;
1969
1970 find_interesting_uses_op (data, op);
1971 }
1972 }
1973
1974 /* Finds interesting uses of induction variables outside of loops
1975 on loop exit edge EXIT. */
1976
1977 static void
1978 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1979 {
1980 gimple phi;
1981 gimple_stmt_iterator psi;
1982 tree def;
1983
1984 for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
1985 {
1986 phi = gsi_stmt (psi);
1987 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
1988 if (!virtual_operand_p (def))
1989 find_interesting_uses_op (data, def);
1990 }
1991 }
1992
1993 /* Finds uses of the induction variables that are interesting. */
1994
1995 static void
1996 find_interesting_uses (struct ivopts_data *data)
1997 {
1998 basic_block bb;
1999 gimple_stmt_iterator bsi;
2000 basic_block *body = get_loop_body (data->current_loop);
2001 unsigned i;
2002 struct version_info *info;
2003 edge e;
2004
2005 if (dump_file && (dump_flags & TDF_DETAILS))
2006 fprintf (dump_file, "Uses:\n\n");
2007
2008 for (i = 0; i < data->current_loop->num_nodes; i++)
2009 {
2010 edge_iterator ei;
2011 bb = body[i];
2012
2013 FOR_EACH_EDGE (e, ei, bb->succs)
2014 if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2015 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2016 find_interesting_uses_outside (data, e);
2017
2018 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2019 find_interesting_uses_stmt (data, gsi_stmt (bsi));
2020 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2021 if (!is_gimple_debug (gsi_stmt (bsi)))
2022 find_interesting_uses_stmt (data, gsi_stmt (bsi));
2023 }
2024
2025 if (dump_file && (dump_flags & TDF_DETAILS))
2026 {
2027 bitmap_iterator bi;
2028
2029 fprintf (dump_file, "\n");
2030
2031 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2032 {
2033 info = ver_info (data, i);
2034 if (info->inv_id)
2035 {
2036 fprintf (dump_file, " ");
2037 print_generic_expr (dump_file, info->name, TDF_SLIM);
2038 fprintf (dump_file, " is invariant (%d)%s\n",
2039 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
2040 }
2041 }
2042
2043 fprintf (dump_file, "\n");
2044 }
2045
2046 free (body);
2047 }
2048
2049 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
2050 is true, assume we are inside an address. If TOP_COMPREF is true, assume
2051 we are at the top-level of the processed address. */
2052
2053 static tree
2054 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2055 HOST_WIDE_INT *offset)
2056 {
2057 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2058 enum tree_code code;
2059 tree type, orig_type = TREE_TYPE (expr);
2060 HOST_WIDE_INT off0, off1, st;
2061 tree orig_expr = expr;
2062
2063 STRIP_NOPS (expr);
2064
2065 type = TREE_TYPE (expr);
2066 code = TREE_CODE (expr);
2067 *offset = 0;
2068
2069 switch (code)
2070 {
2071 case INTEGER_CST:
2072 if (!cst_and_fits_in_hwi (expr)
2073 || integer_zerop (expr))
2074 return orig_expr;
2075
2076 *offset = int_cst_value (expr);
2077 return build_int_cst (orig_type, 0);
2078
2079 case POINTER_PLUS_EXPR:
2080 case PLUS_EXPR:
2081 case MINUS_EXPR:
2082 op0 = TREE_OPERAND (expr, 0);
2083 op1 = TREE_OPERAND (expr, 1);
2084
2085 op0 = strip_offset_1 (op0, false, false, &off0);
2086 op1 = strip_offset_1 (op1, false, false, &off1);
2087
2088 *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2089 if (op0 == TREE_OPERAND (expr, 0)
2090 && op1 == TREE_OPERAND (expr, 1))
2091 return orig_expr;
2092
2093 if (integer_zerop (op1))
2094 expr = op0;
2095 else if (integer_zerop (op0))
2096 {
2097 if (code == MINUS_EXPR)
2098 expr = fold_build1 (NEGATE_EXPR, type, op1);
2099 else
2100 expr = op1;
2101 }
2102 else
2103 expr = fold_build2 (code, type, op0, op1);
2104
2105 return fold_convert (orig_type, expr);
2106
2107 case MULT_EXPR:
2108 op1 = TREE_OPERAND (expr, 1);
2109 if (!cst_and_fits_in_hwi (op1))
2110 return orig_expr;
2111
2112 op0 = TREE_OPERAND (expr, 0);
2113 op0 = strip_offset_1 (op0, false, false, &off0);
2114 if (op0 == TREE_OPERAND (expr, 0))
2115 return orig_expr;
2116
2117 *offset = off0 * int_cst_value (op1);
2118 if (integer_zerop (op0))
2119 expr = op0;
2120 else
2121 expr = fold_build2 (MULT_EXPR, type, op0, op1);
2122
2123 return fold_convert (orig_type, expr);
2124
2125 case ARRAY_REF:
2126 case ARRAY_RANGE_REF:
2127 if (!inside_addr)
2128 return orig_expr;
2129
2130 step = array_ref_element_size (expr);
2131 if (!cst_and_fits_in_hwi (step))
2132 break;
2133
2134 st = int_cst_value (step);
2135 op1 = TREE_OPERAND (expr, 1);
2136 op1 = strip_offset_1 (op1, false, false, &off1);
2137 *offset = off1 * st;
2138
2139 if (top_compref
2140 && integer_zerop (op1))
2141 {
2142 /* Strip the component reference completely. */
2143 op0 = TREE_OPERAND (expr, 0);
2144 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2145 *offset += off0;
2146 return op0;
2147 }
2148 break;
2149
2150 case COMPONENT_REF:
2151 {
2152 tree field;
2153
2154 if (!inside_addr)
2155 return orig_expr;
2156
2157 tmp = component_ref_field_offset (expr);
2158 field = TREE_OPERAND (expr, 1);
2159 if (top_compref
2160 && cst_and_fits_in_hwi (tmp)
2161 && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2162 {
2163 HOST_WIDE_INT boffset, abs_off;
2164
2165 /* Strip the component reference completely. */
2166 op0 = TREE_OPERAND (expr, 0);
2167 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2168 boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2169 abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2170 if (boffset < 0)
2171 abs_off = -abs_off;
2172
2173 *offset = off0 + int_cst_value (tmp) + abs_off;
2174 return op0;
2175 }
2176 }
2177 break;
2178
2179 case ADDR_EXPR:
2180 op0 = TREE_OPERAND (expr, 0);
2181 op0 = strip_offset_1 (op0, true, true, &off0);
2182 *offset += off0;
2183
2184 if (op0 == TREE_OPERAND (expr, 0))
2185 return orig_expr;
2186
2187 expr = build_fold_addr_expr (op0);
2188 return fold_convert (orig_type, expr);
2189
2190 case MEM_REF:
2191 /* ??? Offset operand? */
2192 inside_addr = false;
2193 break;
2194
2195 default:
2196 return orig_expr;
2197 }
2198
2199 /* Default handling of expressions for that we want to recurse into
2200 the first operand. */
2201 op0 = TREE_OPERAND (expr, 0);
2202 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2203 *offset += off0;
2204
2205 if (op0 == TREE_OPERAND (expr, 0)
2206 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2207 return orig_expr;
2208
2209 expr = copy_node (expr);
2210 TREE_OPERAND (expr, 0) = op0;
2211 if (op1)
2212 TREE_OPERAND (expr, 1) = op1;
2213
2214 /* Inside address, we might strip the top level component references,
2215 thus changing type of the expression. Handling of ADDR_EXPR
2216 will fix that. */
2217 expr = fold_convert (orig_type, expr);
2218
2219 return expr;
2220 }
2221
2222 /* Strips constant offsets from EXPR and stores them to OFFSET. */
2223
2224 static tree
2225 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2226 {
2227 HOST_WIDE_INT off;
2228 tree core = strip_offset_1 (expr, false, false, &off);
2229 *offset = off;
2230 return core;
2231 }
2232
2233 /* Returns variant of TYPE that can be used as base for different uses.
2234 We return unsigned type with the same precision, which avoids problems
2235 with overflows. */
2236
2237 static tree
2238 generic_type_for (tree type)
2239 {
2240 if (POINTER_TYPE_P (type))
2241 return unsigned_type_for (type);
2242
2243 if (TYPE_UNSIGNED (type))
2244 return type;
2245
2246 return unsigned_type_for (type);
2247 }
2248
2249 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2250 the bitmap to that we should store it. */
2251
2252 static struct ivopts_data *fd_ivopts_data;
2253 static tree
2254 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2255 {
2256 bitmap *depends_on = (bitmap *) data;
2257 struct version_info *info;
2258
2259 if (TREE_CODE (*expr_p) != SSA_NAME)
2260 return NULL_TREE;
2261 info = name_info (fd_ivopts_data, *expr_p);
2262
2263 if (!info->inv_id || info->has_nonlin_use)
2264 return NULL_TREE;
2265
2266 if (!*depends_on)
2267 *depends_on = BITMAP_ALLOC (NULL);
2268 bitmap_set_bit (*depends_on, info->inv_id);
2269
2270 return NULL_TREE;
2271 }
2272
2273 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2274 position to POS. If USE is not NULL, the candidate is set as related to
2275 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
2276 replacement of the final value of the iv by a direct computation. */
2277
2278 static struct iv_cand *
2279 add_candidate_1 (struct ivopts_data *data,
2280 tree base, tree step, bool important, enum iv_position pos,
2281 struct iv_use *use, gimple incremented_at)
2282 {
2283 unsigned i;
2284 struct iv_cand *cand = NULL;
2285 tree type, orig_type;
2286
2287 /* For non-original variables, make sure their values are computed in a type
2288 that does not invoke undefined behavior on overflows (since in general,
2289 we cannot prove that these induction variables are non-wrapping). */
2290 if (pos != IP_ORIGINAL)
2291 {
2292 orig_type = TREE_TYPE (base);
2293 type = generic_type_for (orig_type);
2294 if (type != orig_type)
2295 {
2296 base = fold_convert (type, base);
2297 step = fold_convert (type, step);
2298 }
2299 }
2300
2301 for (i = 0; i < n_iv_cands (data); i++)
2302 {
2303 cand = iv_cand (data, i);
2304
2305 if (cand->pos != pos)
2306 continue;
2307
2308 if (cand->incremented_at != incremented_at
2309 || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2310 && cand->ainc_use != use))
2311 continue;
2312
2313 if (!cand->iv)
2314 {
2315 if (!base && !step)
2316 break;
2317
2318 continue;
2319 }
2320
2321 if (!base && !step)
2322 continue;
2323
2324 if (operand_equal_p (base, cand->iv->base, 0)
2325 && operand_equal_p (step, cand->iv->step, 0)
2326 && (TYPE_PRECISION (TREE_TYPE (base))
2327 == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
2328 break;
2329 }
2330
2331 if (i == n_iv_cands (data))
2332 {
2333 cand = XCNEW (struct iv_cand);
2334 cand->id = i;
2335
2336 if (!base && !step)
2337 cand->iv = NULL;
2338 else
2339 cand->iv = alloc_iv (base, step);
2340
2341 cand->pos = pos;
2342 if (pos != IP_ORIGINAL && cand->iv)
2343 {
2344 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2345 cand->var_after = cand->var_before;
2346 }
2347 cand->important = important;
2348 cand->incremented_at = incremented_at;
2349 data->iv_candidates.safe_push (cand);
2350
2351 if (step
2352 && TREE_CODE (step) != INTEGER_CST)
2353 {
2354 fd_ivopts_data = data;
2355 walk_tree (&step, find_depends, &cand->depends_on, NULL);
2356 }
2357
2358 if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2359 cand->ainc_use = use;
2360 else
2361 cand->ainc_use = NULL;
2362
2363 if (dump_file && (dump_flags & TDF_DETAILS))
2364 dump_cand (dump_file, cand);
2365 }
2366
2367 if (important && !cand->important)
2368 {
2369 cand->important = true;
2370 if (dump_file && (dump_flags & TDF_DETAILS))
2371 fprintf (dump_file, "Candidate %d is important\n", cand->id);
2372 }
2373
2374 if (use)
2375 {
2376 bitmap_set_bit (use->related_cands, i);
2377 if (dump_file && (dump_flags & TDF_DETAILS))
2378 fprintf (dump_file, "Candidate %d is related to use %d\n",
2379 cand->id, use->id);
2380 }
2381
2382 return cand;
2383 }
2384
2385 /* Returns true if incrementing the induction variable at the end of the LOOP
2386 is allowed.
2387
2388 The purpose is to avoid splitting latch edge with a biv increment, thus
2389 creating a jump, possibly confusing other optimization passes and leaving
2390 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2391 is not available (so we do not have a better alternative), or if the latch
2392 edge is already nonempty. */
2393
2394 static bool
2395 allow_ip_end_pos_p (struct loop *loop)
2396 {
2397 if (!ip_normal_pos (loop))
2398 return true;
2399
2400 if (!empty_block_p (ip_end_pos (loop)))
2401 return true;
2402
2403 return false;
2404 }
2405
2406 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2407 Important field is set to IMPORTANT. */
2408
2409 static void
2410 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2411 bool important, struct iv_use *use)
2412 {
2413 basic_block use_bb = gimple_bb (use->stmt);
2414 enum machine_mode mem_mode;
2415 unsigned HOST_WIDE_INT cstepi;
2416
2417 /* If we insert the increment in any position other than the standard
2418 ones, we must ensure that it is incremented once per iteration.
2419 It must not be in an inner nested loop, or one side of an if
2420 statement. */
2421 if (use_bb->loop_father != data->current_loop
2422 || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2423 || stmt_could_throw_p (use->stmt)
2424 || !cst_and_fits_in_hwi (step))
2425 return;
2426
2427 cstepi = int_cst_value (step);
2428
2429 mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2430 if (((USE_LOAD_PRE_INCREMENT (mem_mode)
2431 || USE_STORE_PRE_INCREMENT (mem_mode))
2432 && GET_MODE_SIZE (mem_mode) == cstepi)
2433 || ((USE_LOAD_PRE_DECREMENT (mem_mode)
2434 || USE_STORE_PRE_DECREMENT (mem_mode))
2435 && GET_MODE_SIZE (mem_mode) == -cstepi))
2436 {
2437 enum tree_code code = MINUS_EXPR;
2438 tree new_base;
2439 tree new_step = step;
2440
2441 if (POINTER_TYPE_P (TREE_TYPE (base)))
2442 {
2443 new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2444 code = POINTER_PLUS_EXPR;
2445 }
2446 else
2447 new_step = fold_convert (TREE_TYPE (base), new_step);
2448 new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2449 add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2450 use->stmt);
2451 }
2452 if (((USE_LOAD_POST_INCREMENT (mem_mode)
2453 || USE_STORE_POST_INCREMENT (mem_mode))
2454 && GET_MODE_SIZE (mem_mode) == cstepi)
2455 || ((USE_LOAD_POST_DECREMENT (mem_mode)
2456 || USE_STORE_POST_DECREMENT (mem_mode))
2457 && GET_MODE_SIZE (mem_mode) == -cstepi))
2458 {
2459 add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
2460 use->stmt);
2461 }
2462 }
2463
2464 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2465 position to POS. If USE is not NULL, the candidate is set as related to
2466 it. The candidate computation is scheduled on all available positions. */
2467
2468 static void
2469 add_candidate (struct ivopts_data *data,
2470 tree base, tree step, bool important, struct iv_use *use)
2471 {
2472 if (ip_normal_pos (data->current_loop))
2473 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL);
2474 if (ip_end_pos (data->current_loop)
2475 && allow_ip_end_pos_p (data->current_loop))
2476 add_candidate_1 (data, base, step, important, IP_END, use, NULL);
2477
2478 if (use != NULL && use->type == USE_ADDRESS)
2479 add_autoinc_candidates (data, base, step, important, use);
2480 }
2481
2482 /* Adds standard iv candidates. */
2483
2484 static void
2485 add_standard_iv_candidates (struct ivopts_data *data)
2486 {
2487 add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
2488
2489 /* The same for a double-integer type if it is still fast enough. */
2490 if (TYPE_PRECISION
2491 (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
2492 && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
2493 add_candidate (data, build_int_cst (long_integer_type_node, 0),
2494 build_int_cst (long_integer_type_node, 1), true, NULL);
2495
2496 /* The same for a double-integer type if it is still fast enough. */
2497 if (TYPE_PRECISION
2498 (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
2499 && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
2500 add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
2501 build_int_cst (long_long_integer_type_node, 1), true, NULL);
2502 }
2503
2504
2505 /* Adds candidates bases on the old induction variable IV. */
2506
2507 static void
2508 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2509 {
2510 gimple phi;
2511 tree def;
2512 struct iv_cand *cand;
2513
2514 add_candidate (data, iv->base, iv->step, true, NULL);
2515
2516 /* The same, but with initial value zero. */
2517 if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
2518 add_candidate (data, size_int (0), iv->step, true, NULL);
2519 else
2520 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2521 iv->step, true, NULL);
2522
2523 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2524 if (gimple_code (phi) == GIMPLE_PHI)
2525 {
2526 /* Additionally record the possibility of leaving the original iv
2527 untouched. */
2528 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2529 cand = add_candidate_1 (data,
2530 iv->base, iv->step, true, IP_ORIGINAL, NULL,
2531 SSA_NAME_DEF_STMT (def));
2532 cand->var_before = iv->ssa_name;
2533 cand->var_after = def;
2534 }
2535 }
2536
2537 /* Adds candidates based on the old induction variables. */
2538
2539 static void
2540 add_old_ivs_candidates (struct ivopts_data *data)
2541 {
2542 unsigned i;
2543 struct iv *iv;
2544 bitmap_iterator bi;
2545
2546 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2547 {
2548 iv = ver_info (data, i)->iv;
2549 if (iv && iv->biv_p && !integer_zerop (iv->step))
2550 add_old_iv_candidates (data, iv);
2551 }
2552 }
2553
2554 /* Adds candidates based on the value of the induction variable IV and USE. */
2555
2556 static void
2557 add_iv_value_candidates (struct ivopts_data *data,
2558 struct iv *iv, struct iv_use *use)
2559 {
2560 unsigned HOST_WIDE_INT offset;
2561 tree base;
2562 tree basetype;
2563
2564 add_candidate (data, iv->base, iv->step, false, use);
2565
2566 /* The same, but with initial value zero. Make such variable important,
2567 since it is generic enough so that possibly many uses may be based
2568 on it. */
2569 basetype = TREE_TYPE (iv->base);
2570 if (POINTER_TYPE_P (basetype))
2571 basetype = sizetype;
2572 add_candidate (data, build_int_cst (basetype, 0),
2573 iv->step, true, use);
2574
2575 /* Third, try removing the constant offset. Make sure to even
2576 add a candidate for &a[0] vs. (T *)&a. */
2577 base = strip_offset (iv->base, &offset);
2578 if (offset
2579 || base != iv->base)
2580 add_candidate (data, base, iv->step, false, use);
2581 }
2582
2583 /* Adds candidates based on the uses. */
2584
2585 static void
2586 add_derived_ivs_candidates (struct ivopts_data *data)
2587 {
2588 unsigned i;
2589
2590 for (i = 0; i < n_iv_uses (data); i++)
2591 {
2592 struct iv_use *use = iv_use (data, i);
2593
2594 if (!use)
2595 continue;
2596
2597 switch (use->type)
2598 {
2599 case USE_NONLINEAR_EXPR:
2600 case USE_COMPARE:
2601 case USE_ADDRESS:
2602 /* Just add the ivs based on the value of the iv used here. */
2603 add_iv_value_candidates (data, use->iv, use);
2604 break;
2605
2606 default:
2607 gcc_unreachable ();
2608 }
2609 }
2610 }
2611
2612 /* Record important candidates and add them to related_cands bitmaps
2613 if needed. */
2614
2615 static void
2616 record_important_candidates (struct ivopts_data *data)
2617 {
2618 unsigned i;
2619 struct iv_use *use;
2620
2621 for (i = 0; i < n_iv_cands (data); i++)
2622 {
2623 struct iv_cand *cand = iv_cand (data, i);
2624
2625 if (cand->important)
2626 bitmap_set_bit (data->important_candidates, i);
2627 }
2628
2629 data->consider_all_candidates = (n_iv_cands (data)
2630 <= CONSIDER_ALL_CANDIDATES_BOUND);
2631
2632 if (data->consider_all_candidates)
2633 {
2634 /* We will not need "related_cands" bitmaps in this case,
2635 so release them to decrease peak memory consumption. */
2636 for (i = 0; i < n_iv_uses (data); i++)
2637 {
2638 use = iv_use (data, i);
2639 BITMAP_FREE (use->related_cands);
2640 }
2641 }
2642 else
2643 {
2644 /* Add important candidates to the related_cands bitmaps. */
2645 for (i = 0; i < n_iv_uses (data); i++)
2646 bitmap_ior_into (iv_use (data, i)->related_cands,
2647 data->important_candidates);
2648 }
2649 }
2650
2651 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2652 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2653 we allocate a simple list to every use. */
2654
2655 static void
2656 alloc_use_cost_map (struct ivopts_data *data)
2657 {
2658 unsigned i, size, s;
2659
2660 for (i = 0; i < n_iv_uses (data); i++)
2661 {
2662 struct iv_use *use = iv_use (data, i);
2663
2664 if (data->consider_all_candidates)
2665 size = n_iv_cands (data);
2666 else
2667 {
2668 s = bitmap_count_bits (use->related_cands);
2669
2670 /* Round up to the power of two, so that moduling by it is fast. */
2671 size = s ? (1 << ceil_log2 (s)) : 1;
2672 }
2673
2674 use->n_map_members = size;
2675 use->cost_map = XCNEWVEC (struct cost_pair, size);
2676 }
2677 }
2678
2679 /* Returns description of computation cost of expression whose runtime
2680 cost is RUNTIME and complexity corresponds to COMPLEXITY. */
2681
2682 static comp_cost
2683 new_cost (unsigned runtime, unsigned complexity)
2684 {
2685 comp_cost cost;
2686
2687 cost.cost = runtime;
2688 cost.complexity = complexity;
2689
2690 return cost;
2691 }
2692
2693 /* Adds costs COST1 and COST2. */
2694
2695 static comp_cost
2696 add_costs (comp_cost cost1, comp_cost cost2)
2697 {
2698 cost1.cost += cost2.cost;
2699 cost1.complexity += cost2.complexity;
2700
2701 return cost1;
2702 }
2703 /* Subtracts costs COST1 and COST2. */
2704
2705 static comp_cost
2706 sub_costs (comp_cost cost1, comp_cost cost2)
2707 {
2708 cost1.cost -= cost2.cost;
2709 cost1.complexity -= cost2.complexity;
2710
2711 return cost1;
2712 }
2713
2714 /* Returns a negative number if COST1 < COST2, a positive number if
2715 COST1 > COST2, and 0 if COST1 = COST2. */
2716
2717 static int
2718 compare_costs (comp_cost cost1, comp_cost cost2)
2719 {
2720 if (cost1.cost == cost2.cost)
2721 return cost1.complexity - cost2.complexity;
2722
2723 return cost1.cost - cost2.cost;
2724 }
2725
2726 /* Returns true if COST is infinite. */
2727
2728 static bool
2729 infinite_cost_p (comp_cost cost)
2730 {
2731 return cost.cost == INFTY;
2732 }
2733
2734 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2735 on invariants DEPENDS_ON and that the value used in expressing it
2736 is VALUE, and in case of iv elimination the comparison operator is COMP. */
2737
2738 static void
2739 set_use_iv_cost (struct ivopts_data *data,
2740 struct iv_use *use, struct iv_cand *cand,
2741 comp_cost cost, bitmap depends_on, tree value,
2742 enum tree_code comp, int inv_expr_id)
2743 {
2744 unsigned i, s;
2745
2746 if (infinite_cost_p (cost))
2747 {
2748 BITMAP_FREE (depends_on);
2749 return;
2750 }
2751
2752 if (data->consider_all_candidates)
2753 {
2754 use->cost_map[cand->id].cand = cand;
2755 use->cost_map[cand->id].cost = cost;
2756 use->cost_map[cand->id].depends_on = depends_on;
2757 use->cost_map[cand->id].value = value;
2758 use->cost_map[cand->id].comp = comp;
2759 use->cost_map[cand->id].inv_expr_id = inv_expr_id;
2760 return;
2761 }
2762
2763 /* n_map_members is a power of two, so this computes modulo. */
2764 s = cand->id & (use->n_map_members - 1);
2765 for (i = s; i < use->n_map_members; i++)
2766 if (!use->cost_map[i].cand)
2767 goto found;
2768 for (i = 0; i < s; i++)
2769 if (!use->cost_map[i].cand)
2770 goto found;
2771
2772 gcc_unreachable ();
2773
2774 found:
2775 use->cost_map[i].cand = cand;
2776 use->cost_map[i].cost = cost;
2777 use->cost_map[i].depends_on = depends_on;
2778 use->cost_map[i].value = value;
2779 use->cost_map[i].comp = comp;
2780 use->cost_map[i].inv_expr_id = inv_expr_id;
2781 }
2782
2783 /* Gets cost of (USE, CANDIDATE) pair. */
2784
2785 static struct cost_pair *
2786 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2787 struct iv_cand *cand)
2788 {
2789 unsigned i, s;
2790 struct cost_pair *ret;
2791
2792 if (!cand)
2793 return NULL;
2794
2795 if (data->consider_all_candidates)
2796 {
2797 ret = use->cost_map + cand->id;
2798 if (!ret->cand)
2799 return NULL;
2800
2801 return ret;
2802 }
2803
2804 /* n_map_members is a power of two, so this computes modulo. */
2805 s = cand->id & (use->n_map_members - 1);
2806 for (i = s; i < use->n_map_members; i++)
2807 if (use->cost_map[i].cand == cand)
2808 return use->cost_map + i;
2809 else if (use->cost_map[i].cand == NULL)
2810 return NULL;
2811 for (i = 0; i < s; i++)
2812 if (use->cost_map[i].cand == cand)
2813 return use->cost_map + i;
2814 else if (use->cost_map[i].cand == NULL)
2815 return NULL;
2816
2817 return NULL;
2818 }
2819
2820 /* Returns estimate on cost of computing SEQ. */
2821
2822 static unsigned
2823 seq_cost (rtx seq, bool speed)
2824 {
2825 unsigned cost = 0;
2826 rtx set;
2827
2828 for (; seq; seq = NEXT_INSN (seq))
2829 {
2830 set = single_set (seq);
2831 if (set)
2832 cost += set_src_cost (SET_SRC (set), speed);
2833 else
2834 cost++;
2835 }
2836
2837 return cost;
2838 }
2839
2840 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2841 static rtx
2842 produce_memory_decl_rtl (tree obj, int *regno)
2843 {
2844 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
2845 enum machine_mode address_mode = targetm.addr_space.address_mode (as);
2846 rtx x;
2847
2848 gcc_assert (obj);
2849 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2850 {
2851 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2852 x = gen_rtx_SYMBOL_REF (address_mode, name);
2853 SET_SYMBOL_REF_DECL (x, obj);
2854 x = gen_rtx_MEM (DECL_MODE (obj), x);
2855 set_mem_addr_space (x, as);
2856 targetm.encode_section_info (obj, x, true);
2857 }
2858 else
2859 {
2860 x = gen_raw_REG (address_mode, (*regno)++);
2861 x = gen_rtx_MEM (DECL_MODE (obj), x);
2862 set_mem_addr_space (x, as);
2863 }
2864
2865 return x;
2866 }
2867
2868 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2869 walk_tree. DATA contains the actual fake register number. */
2870
2871 static tree
2872 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2873 {
2874 tree obj = NULL_TREE;
2875 rtx x = NULL_RTX;
2876 int *regno = (int *) data;
2877
2878 switch (TREE_CODE (*expr_p))
2879 {
2880 case ADDR_EXPR:
2881 for (expr_p = &TREE_OPERAND (*expr_p, 0);
2882 handled_component_p (*expr_p);
2883 expr_p = &TREE_OPERAND (*expr_p, 0))
2884 continue;
2885 obj = *expr_p;
2886 if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
2887 x = produce_memory_decl_rtl (obj, regno);
2888 break;
2889
2890 case SSA_NAME:
2891 *ws = 0;
2892 obj = SSA_NAME_VAR (*expr_p);
2893 /* Defer handling of anonymous SSA_NAMEs to the expander. */
2894 if (!obj)
2895 return NULL_TREE;
2896 if (!DECL_RTL_SET_P (obj))
2897 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2898 break;
2899
2900 case VAR_DECL:
2901 case PARM_DECL:
2902 case RESULT_DECL:
2903 *ws = 0;
2904 obj = *expr_p;
2905
2906 if (DECL_RTL_SET_P (obj))
2907 break;
2908
2909 if (DECL_MODE (obj) == BLKmode)
2910 x = produce_memory_decl_rtl (obj, regno);
2911 else
2912 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2913
2914 break;
2915
2916 default:
2917 break;
2918 }
2919
2920 if (x)
2921 {
2922 decl_rtl_to_reset.safe_push (obj);
2923 SET_DECL_RTL (obj, x);
2924 }
2925
2926 return NULL_TREE;
2927 }
2928
2929 /* Determines cost of the computation of EXPR. */
2930
2931 static unsigned
2932 computation_cost (tree expr, bool speed)
2933 {
2934 rtx seq, rslt;
2935 tree type = TREE_TYPE (expr);
2936 unsigned cost;
2937 /* Avoid using hard regs in ways which may be unsupported. */
2938 int regno = LAST_VIRTUAL_REGISTER + 1;
2939 struct cgraph_node *node = cgraph_get_node (current_function_decl);
2940 enum node_frequency real_frequency = node->frequency;
2941
2942 node->frequency = NODE_FREQUENCY_NORMAL;
2943 crtl->maybe_hot_insn_p = speed;
2944 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2945 start_sequence ();
2946 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2947 seq = get_insns ();
2948 end_sequence ();
2949 default_rtl_profile ();
2950 node->frequency = real_frequency;
2951
2952 cost = seq_cost (seq, speed);
2953 if (MEM_P (rslt))
2954 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
2955 TYPE_ADDR_SPACE (type), speed);
2956 else if (!REG_P (rslt))
2957 cost += set_src_cost (rslt, speed);
2958
2959 return cost;
2960 }
2961
2962 /* Returns variable containing the value of candidate CAND at statement AT. */
2963
2964 static tree
2965 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple stmt)
2966 {
2967 if (stmt_after_increment (loop, cand, stmt))
2968 return cand->var_after;
2969 else
2970 return cand->var_before;
2971 }
2972
2973 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2974 same precision that is at least as wide as the precision of TYPE, stores
2975 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
2976 type of A and B. */
2977
2978 static tree
2979 determine_common_wider_type (tree *a, tree *b)
2980 {
2981 tree wider_type = NULL;
2982 tree suba, subb;
2983 tree atype = TREE_TYPE (*a);
2984
2985 if (CONVERT_EXPR_P (*a))
2986 {
2987 suba = TREE_OPERAND (*a, 0);
2988 wider_type = TREE_TYPE (suba);
2989 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
2990 return atype;
2991 }
2992 else
2993 return atype;
2994
2995 if (CONVERT_EXPR_P (*b))
2996 {
2997 subb = TREE_OPERAND (*b, 0);
2998 if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
2999 return atype;
3000 }
3001 else
3002 return atype;
3003
3004 *a = suba;
3005 *b = subb;
3006 return wider_type;
3007 }
3008
3009 /* Determines the expression by that USE is expressed from induction variable
3010 CAND at statement AT in LOOP. The expression is stored in a decomposed
3011 form into AFF. Returns false if USE cannot be expressed using CAND. */
3012
3013 static bool
3014 get_computation_aff (struct loop *loop,
3015 struct iv_use *use, struct iv_cand *cand, gimple at,
3016 struct affine_tree_combination *aff)
3017 {
3018 tree ubase = use->iv->base;
3019 tree ustep = use->iv->step;
3020 tree cbase = cand->iv->base;
3021 tree cstep = cand->iv->step, cstep_common;
3022 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3023 tree common_type, var;
3024 tree uutype;
3025 aff_tree cbase_aff, var_aff;
3026 widest_int rat;
3027
3028 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3029 {
3030 /* We do not have a precision to express the values of use. */
3031 return false;
3032 }
3033
3034 var = var_at_stmt (loop, cand, at);
3035 uutype = unsigned_type_for (utype);
3036
3037 /* If the conversion is not noop, perform it. */
3038 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3039 {
3040 cstep = fold_convert (uutype, cstep);
3041 cbase = fold_convert (uutype, cbase);
3042 var = fold_convert (uutype, var);
3043 }
3044
3045 if (!constant_multiple_of (ustep, cstep, &rat))
3046 return false;
3047
3048 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3049 type, we achieve better folding by computing their difference in this
3050 wider type, and cast the result to UUTYPE. We do not need to worry about
3051 overflows, as all the arithmetics will in the end be performed in UUTYPE
3052 anyway. */
3053 common_type = determine_common_wider_type (&ubase, &cbase);
3054
3055 /* use = ubase - ratio * cbase + ratio * var. */
3056 tree_to_aff_combination (ubase, common_type, aff);
3057 tree_to_aff_combination (cbase, common_type, &cbase_aff);
3058 tree_to_aff_combination (var, uutype, &var_aff);
3059
3060 /* We need to shift the value if we are after the increment. */
3061 if (stmt_after_increment (loop, cand, at))
3062 {
3063 aff_tree cstep_aff;
3064
3065 if (common_type != uutype)
3066 cstep_common = fold_convert (common_type, cstep);
3067 else
3068 cstep_common = cstep;
3069
3070 tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
3071 aff_combination_add (&cbase_aff, &cstep_aff);
3072 }
3073
3074 aff_combination_scale (&cbase_aff, -rat);
3075 aff_combination_add (aff, &cbase_aff);
3076 if (common_type != uutype)
3077 aff_combination_convert (aff, uutype);
3078
3079 aff_combination_scale (&var_aff, rat);
3080 aff_combination_add (aff, &var_aff);
3081
3082 return true;
3083 }
3084
3085 /* Return the type of USE. */
3086
3087 static tree
3088 get_use_type (struct iv_use *use)
3089 {
3090 tree base_type = TREE_TYPE (use->iv->base);
3091 tree type;
3092
3093 if (use->type == USE_ADDRESS)
3094 {
3095 /* The base_type may be a void pointer. Create a pointer type based on
3096 the mem_ref instead. */
3097 type = build_pointer_type (TREE_TYPE (*use->op_p));
3098 gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
3099 == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
3100 }
3101 else
3102 type = base_type;
3103
3104 return type;
3105 }
3106
3107 /* Determines the expression by that USE is expressed from induction variable
3108 CAND at statement AT in LOOP. The computation is unshared. */
3109
3110 static tree
3111 get_computation_at (struct loop *loop,
3112 struct iv_use *use, struct iv_cand *cand, gimple at)
3113 {
3114 aff_tree aff;
3115 tree type = get_use_type (use);
3116
3117 if (!get_computation_aff (loop, use, cand, at, &aff))
3118 return NULL_TREE;
3119 unshare_aff_combination (&aff);
3120 return fold_convert (type, aff_combination_to_tree (&aff));
3121 }
3122
3123 /* Determines the expression by that USE is expressed from induction variable
3124 CAND in LOOP. The computation is unshared. */
3125
3126 static tree
3127 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3128 {
3129 return get_computation_at (loop, use, cand, use->stmt);
3130 }
3131
3132 /* Adjust the cost COST for being in loop setup rather than loop body.
3133 If we're optimizing for space, the loop setup overhead is constant;
3134 if we're optimizing for speed, amortize it over the per-iteration cost. */
3135 static unsigned
3136 adjust_setup_cost (struct ivopts_data *data, unsigned cost)
3137 {
3138 if (cost == INFTY)
3139 return cost;
3140 else if (optimize_loop_for_speed_p (data->current_loop))
3141 return cost / avg_loop_niter (data->current_loop);
3142 else
3143 return cost;
3144 }
3145
3146 /* Returns true if multiplying by RATIO is allowed in an address. Test the
3147 validity for a memory reference accessing memory of mode MODE in
3148 address space AS. */
3149
3150
3151 bool
3152 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode,
3153 addr_space_t as)
3154 {
3155 #define MAX_RATIO 128
3156 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3157 static vec<sbitmap> valid_mult_list;
3158 sbitmap valid_mult;
3159
3160 if (data_index >= valid_mult_list.length ())
3161 valid_mult_list.safe_grow_cleared (data_index + 1);
3162
3163 valid_mult = valid_mult_list[data_index];
3164 if (!valid_mult)
3165 {
3166 enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3167 rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3168 rtx reg2 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3169 rtx addr, scaled;
3170 HOST_WIDE_INT i;
3171
3172 valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3173 bitmap_clear (valid_mult);
3174 scaled = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3175 addr = gen_rtx_fmt_ee (PLUS, address_mode, scaled, reg2);
3176 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3177 {
3178 XEXP (scaled, 1) = gen_int_mode (i, address_mode);
3179 if (memory_address_addr_space_p (mode, addr, as)
3180 || memory_address_addr_space_p (mode, scaled, as))
3181 bitmap_set_bit (valid_mult, i + MAX_RATIO);
3182 }
3183
3184 if (dump_file && (dump_flags & TDF_DETAILS))
3185 {
3186 fprintf (dump_file, " allowed multipliers:");
3187 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3188 if (bitmap_bit_p (valid_mult, i + MAX_RATIO))
3189 fprintf (dump_file, " %d", (int) i);
3190 fprintf (dump_file, "\n");
3191 fprintf (dump_file, "\n");
3192 }
3193
3194 valid_mult_list[data_index] = valid_mult;
3195 }
3196
3197 if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3198 return false;
3199
3200 return bitmap_bit_p (valid_mult, ratio + MAX_RATIO);
3201 }
3202
3203 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3204 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3205 variable is omitted. Compute the cost for a memory reference that accesses
3206 a memory location of mode MEM_MODE in address space AS.
3207
3208 MAY_AUTOINC is set to true if the autoincrement (increasing index by
3209 size of MEM_MODE / RATIO) is available. To make this determination, we
3210 look at the size of the increment to be made, which is given in CSTEP.
3211 CSTEP may be zero if the step is unknown.
3212 STMT_AFTER_INC is true iff the statement we're looking at is after the
3213 increment of the original biv.
3214
3215 TODO -- there must be some better way. This all is quite crude. */
3216
3217 enum ainc_type
3218 {
3219 AINC_PRE_INC, /* Pre increment. */
3220 AINC_PRE_DEC, /* Pre decrement. */
3221 AINC_POST_INC, /* Post increment. */
3222 AINC_POST_DEC, /* Post decrement. */
3223 AINC_NONE /* Also the number of auto increment types. */
3224 };
3225
3226 typedef struct address_cost_data_s
3227 {
3228 HOST_WIDE_INT min_offset, max_offset;
3229 unsigned costs[2][2][2][2];
3230 unsigned ainc_costs[AINC_NONE];
3231 } *address_cost_data;
3232
3233
3234 static comp_cost
3235 get_address_cost (bool symbol_present, bool var_present,
3236 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3237 HOST_WIDE_INT cstep, enum machine_mode mem_mode,
3238 addr_space_t as, bool speed,
3239 bool stmt_after_inc, bool *may_autoinc)
3240 {
3241 enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3242 static vec<address_cost_data> address_cost_data_list;
3243 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3244 address_cost_data data;
3245 static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3246 static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3247 unsigned cost, acost, complexity;
3248 enum ainc_type autoinc_type;
3249 bool offset_p, ratio_p, autoinc;
3250 HOST_WIDE_INT s_offset, autoinc_offset, msize;
3251 unsigned HOST_WIDE_INT mask;
3252 unsigned bits;
3253
3254 if (data_index >= address_cost_data_list.length ())
3255 address_cost_data_list.safe_grow_cleared (data_index + 1);
3256
3257 data = address_cost_data_list[data_index];
3258 if (!data)
3259 {
3260 HOST_WIDE_INT i;
3261 HOST_WIDE_INT rat, off = 0;
3262 int old_cse_not_expected, width;
3263 unsigned sym_p, var_p, off_p, rat_p, add_c;
3264 rtx seq, addr, base;
3265 rtx reg0, reg1;
3266
3267 data = (address_cost_data) xcalloc (1, sizeof (*data));
3268
3269 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3270
3271 width = GET_MODE_BITSIZE (address_mode) - 1;
3272 if (width > (HOST_BITS_PER_WIDE_INT - 1))
3273 width = HOST_BITS_PER_WIDE_INT - 1;
3274 addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
3275
3276 for (i = width; i >= 0; i--)
3277 {
3278 off = -((unsigned HOST_WIDE_INT) 1 << i);
3279 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3280 if (memory_address_addr_space_p (mem_mode, addr, as))
3281 break;
3282 }
3283 data->min_offset = (i == -1? 0 : off);
3284
3285 for (i = width; i >= 0; i--)
3286 {
3287 off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
3288 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3289 if (memory_address_addr_space_p (mem_mode, addr, as))
3290 break;
3291 }
3292 if (i == -1)
3293 off = 0;
3294 data->max_offset = off;
3295
3296 if (dump_file && (dump_flags & TDF_DETAILS))
3297 {
3298 fprintf (dump_file, "get_address_cost:\n");
3299 fprintf (dump_file, " min offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3300 GET_MODE_NAME (mem_mode),
3301 data->min_offset);
3302 fprintf (dump_file, " max offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3303 GET_MODE_NAME (mem_mode),
3304 data->max_offset);
3305 }
3306
3307 rat = 1;
3308 for (i = 2; i <= MAX_RATIO; i++)
3309 if (multiplier_allowed_in_address_p (i, mem_mode, as))
3310 {
3311 rat = i;
3312 break;
3313 }
3314
3315 /* Compute the cost of various addressing modes. */
3316 acost = 0;
3317 reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3318 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3319
3320 if (USE_LOAD_PRE_DECREMENT (mem_mode)
3321 || USE_STORE_PRE_DECREMENT (mem_mode))
3322 {
3323 addr = gen_rtx_PRE_DEC (address_mode, reg0);
3324 has_predec[mem_mode]
3325 = memory_address_addr_space_p (mem_mode, addr, as);
3326
3327 if (has_predec[mem_mode])
3328 data->ainc_costs[AINC_PRE_DEC]
3329 = address_cost (addr, mem_mode, as, speed);
3330 }
3331 if (USE_LOAD_POST_DECREMENT (mem_mode)
3332 || USE_STORE_POST_DECREMENT (mem_mode))
3333 {
3334 addr = gen_rtx_POST_DEC (address_mode, reg0);
3335 has_postdec[mem_mode]
3336 = memory_address_addr_space_p (mem_mode, addr, as);
3337
3338 if (has_postdec[mem_mode])
3339 data->ainc_costs[AINC_POST_DEC]
3340 = address_cost (addr, mem_mode, as, speed);
3341 }
3342 if (USE_LOAD_PRE_INCREMENT (mem_mode)
3343 || USE_STORE_PRE_DECREMENT (mem_mode))
3344 {
3345 addr = gen_rtx_PRE_INC (address_mode, reg0);
3346 has_preinc[mem_mode]
3347 = memory_address_addr_space_p (mem_mode, addr, as);
3348
3349 if (has_preinc[mem_mode])
3350 data->ainc_costs[AINC_PRE_INC]
3351 = address_cost (addr, mem_mode, as, speed);
3352 }
3353 if (USE_LOAD_POST_INCREMENT (mem_mode)
3354 || USE_STORE_POST_INCREMENT (mem_mode))
3355 {
3356 addr = gen_rtx_POST_INC (address_mode, reg0);
3357 has_postinc[mem_mode]
3358 = memory_address_addr_space_p (mem_mode, addr, as);
3359
3360 if (has_postinc[mem_mode])
3361 data->ainc_costs[AINC_POST_INC]
3362 = address_cost (addr, mem_mode, as, speed);
3363 }
3364 for (i = 0; i < 16; i++)
3365 {
3366 sym_p = i & 1;
3367 var_p = (i >> 1) & 1;
3368 off_p = (i >> 2) & 1;
3369 rat_p = (i >> 3) & 1;
3370
3371 addr = reg0;
3372 if (rat_p)
3373 addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
3374 gen_int_mode (rat, address_mode));
3375
3376 if (var_p)
3377 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
3378
3379 if (sym_p)
3380 {
3381 base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
3382 /* ??? We can run into trouble with some backends by presenting
3383 it with symbols which haven't been properly passed through
3384 targetm.encode_section_info. By setting the local bit, we
3385 enhance the probability of things working. */
3386 SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3387
3388 if (off_p)
3389 base = gen_rtx_fmt_e (CONST, address_mode,
3390 gen_rtx_fmt_ee
3391 (PLUS, address_mode, base,
3392 gen_int_mode (off, address_mode)));
3393 }
3394 else if (off_p)
3395 base = gen_int_mode (off, address_mode);
3396 else
3397 base = NULL_RTX;
3398
3399 if (base)
3400 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
3401
3402 start_sequence ();
3403 /* To avoid splitting addressing modes, pretend that no cse will
3404 follow. */
3405 old_cse_not_expected = cse_not_expected;
3406 cse_not_expected = true;
3407 addr = memory_address_addr_space (mem_mode, addr, as);
3408 cse_not_expected = old_cse_not_expected;
3409 seq = get_insns ();
3410 end_sequence ();
3411
3412 acost = seq_cost (seq, speed);
3413 acost += address_cost (addr, mem_mode, as, speed);
3414
3415 if (!acost)
3416 acost = 1;
3417 data->costs[sym_p][var_p][off_p][rat_p] = acost;
3418 }
3419
3420 /* On some targets, it is quite expensive to load symbol to a register,
3421 which makes addresses that contain symbols look much more expensive.
3422 However, the symbol will have to be loaded in any case before the
3423 loop (and quite likely we have it in register already), so it does not
3424 make much sense to penalize them too heavily. So make some final
3425 tweaks for the SYMBOL_PRESENT modes:
3426
3427 If VAR_PRESENT is false, and the mode obtained by changing symbol to
3428 var is cheaper, use this mode with small penalty.
3429 If VAR_PRESENT is true, try whether the mode with
3430 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3431 if this is the case, use it. */
3432 add_c = add_cost (speed, address_mode);
3433 for (i = 0; i < 8; i++)
3434 {
3435 var_p = i & 1;
3436 off_p = (i >> 1) & 1;
3437 rat_p = (i >> 2) & 1;
3438
3439 acost = data->costs[0][1][off_p][rat_p] + 1;
3440 if (var_p)
3441 acost += add_c;
3442
3443 if (acost < data->costs[1][var_p][off_p][rat_p])
3444 data->costs[1][var_p][off_p][rat_p] = acost;
3445 }
3446
3447 if (dump_file && (dump_flags & TDF_DETAILS))
3448 {
3449 fprintf (dump_file, "Address costs:\n");
3450
3451 for (i = 0; i < 16; i++)
3452 {
3453 sym_p = i & 1;
3454 var_p = (i >> 1) & 1;
3455 off_p = (i >> 2) & 1;
3456 rat_p = (i >> 3) & 1;
3457
3458 fprintf (dump_file, " ");
3459 if (sym_p)
3460 fprintf (dump_file, "sym + ");
3461 if (var_p)
3462 fprintf (dump_file, "var + ");
3463 if (off_p)
3464 fprintf (dump_file, "cst + ");
3465 if (rat_p)
3466 fprintf (dump_file, "rat * ");
3467
3468 acost = data->costs[sym_p][var_p][off_p][rat_p];
3469 fprintf (dump_file, "index costs %d\n", acost);
3470 }
3471 if (has_predec[mem_mode] || has_postdec[mem_mode]
3472 || has_preinc[mem_mode] || has_postinc[mem_mode])
3473 fprintf (dump_file, " May include autoinc/dec\n");
3474 fprintf (dump_file, "\n");
3475 }
3476
3477 address_cost_data_list[data_index] = data;
3478 }
3479
3480 bits = GET_MODE_BITSIZE (address_mode);
3481 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3482 offset &= mask;
3483 if ((offset >> (bits - 1) & 1))
3484 offset |= ~mask;
3485 s_offset = offset;
3486
3487 autoinc = false;
3488 autoinc_type = AINC_NONE;
3489 msize = GET_MODE_SIZE (mem_mode);
3490 autoinc_offset = offset;
3491 if (stmt_after_inc)
3492 autoinc_offset += ratio * cstep;
3493 if (symbol_present || var_present || ratio != 1)
3494 autoinc = false;
3495 else
3496 {
3497 if (has_postinc[mem_mode] && autoinc_offset == 0
3498 && msize == cstep)
3499 autoinc_type = AINC_POST_INC;
3500 else if (has_postdec[mem_mode] && autoinc_offset == 0
3501 && msize == -cstep)
3502 autoinc_type = AINC_POST_DEC;
3503 else if (has_preinc[mem_mode] && autoinc_offset == msize
3504 && msize == cstep)
3505 autoinc_type = AINC_PRE_INC;
3506 else if (has_predec[mem_mode] && autoinc_offset == -msize
3507 && msize == -cstep)
3508 autoinc_type = AINC_PRE_DEC;
3509
3510 if (autoinc_type != AINC_NONE)
3511 autoinc = true;
3512 }
3513
3514 cost = 0;
3515 offset_p = (s_offset != 0
3516 && data->min_offset <= s_offset
3517 && s_offset <= data->max_offset);
3518 ratio_p = (ratio != 1
3519 && multiplier_allowed_in_address_p (ratio, mem_mode, as));
3520
3521 if (ratio != 1 && !ratio_p)
3522 cost += mult_by_coeff_cost (ratio, address_mode, speed);
3523
3524 if (s_offset && !offset_p && !symbol_present)
3525 cost += add_cost (speed, address_mode);
3526
3527 if (may_autoinc)
3528 *may_autoinc = autoinc;
3529 if (autoinc)
3530 acost = data->ainc_costs[autoinc_type];
3531 else
3532 acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
3533 complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3534 return new_cost (cost + acost, complexity);
3535 }
3536
3537 /* Calculate the SPEED or size cost of shiftadd EXPR in MODE. MULT is the
3538 the EXPR operand holding the shift. COST0 and COST1 are the costs for
3539 calculating the operands of EXPR. Returns true if successful, and returns
3540 the cost in COST. */
3541
3542 static bool
3543 get_shiftadd_cost (tree expr, enum machine_mode mode, comp_cost cost0,
3544 comp_cost cost1, tree mult, bool speed, comp_cost *cost)
3545 {
3546 comp_cost res;
3547 tree op1 = TREE_OPERAND (expr, 1);
3548 tree cst = TREE_OPERAND (mult, 1);
3549 tree multop = TREE_OPERAND (mult, 0);
3550 int m = exact_log2 (int_cst_value (cst));
3551 int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
3552 int sa_cost;
3553 bool equal_p = false;
3554
3555 if (!(m >= 0 && m < maxm))
3556 return false;
3557
3558 if (operand_equal_p (op1, mult, 0))
3559 equal_p = true;
3560
3561 sa_cost = (TREE_CODE (expr) != MINUS_EXPR
3562 ? shiftadd_cost (speed, mode, m)
3563 : (equal_p
3564 ? shiftsub1_cost (speed, mode, m)
3565 : shiftsub0_cost (speed, mode, m)));
3566 res = new_cost (sa_cost, 0);
3567 res = add_costs (res, equal_p ? cost0 : cost1);
3568
3569 STRIP_NOPS (multop);
3570 if (!is_gimple_val (multop))
3571 res = add_costs (res, force_expr_to_var_cost (multop, speed));
3572
3573 *cost = res;
3574 return true;
3575 }
3576
3577 /* Estimates cost of forcing expression EXPR into a variable. */
3578
3579 static comp_cost
3580 force_expr_to_var_cost (tree expr, bool speed)
3581 {
3582 static bool costs_initialized = false;
3583 static unsigned integer_cost [2];
3584 static unsigned symbol_cost [2];
3585 static unsigned address_cost [2];
3586 tree op0, op1;
3587 comp_cost cost0, cost1, cost;
3588 enum machine_mode mode;
3589
3590 if (!costs_initialized)
3591 {
3592 tree type = build_pointer_type (integer_type_node);
3593 tree var, addr;
3594 rtx x;
3595 int i;
3596
3597 var = create_tmp_var_raw (integer_type_node, "test_var");
3598 TREE_STATIC (var) = 1;
3599 x = produce_memory_decl_rtl (var, NULL);
3600 SET_DECL_RTL (var, x);
3601
3602 addr = build1 (ADDR_EXPR, type, var);
3603
3604
3605 for (i = 0; i < 2; i++)
3606 {
3607 integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
3608 2000), i);
3609
3610 symbol_cost[i] = computation_cost (addr, i) + 1;
3611
3612 address_cost[i]
3613 = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
3614 if (dump_file && (dump_flags & TDF_DETAILS))
3615 {
3616 fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
3617 fprintf (dump_file, " integer %d\n", (int) integer_cost[i]);
3618 fprintf (dump_file, " symbol %d\n", (int) symbol_cost[i]);
3619 fprintf (dump_file, " address %d\n", (int) address_cost[i]);
3620 fprintf (dump_file, " other %d\n", (int) target_spill_cost[i]);
3621 fprintf (dump_file, "\n");
3622 }
3623 }
3624
3625 costs_initialized = true;
3626 }
3627
3628 STRIP_NOPS (expr);
3629
3630 if (SSA_VAR_P (expr))
3631 return no_cost;
3632
3633 if (is_gimple_min_invariant (expr))
3634 {
3635 if (TREE_CODE (expr) == INTEGER_CST)
3636 return new_cost (integer_cost [speed], 0);
3637
3638 if (TREE_CODE (expr) == ADDR_EXPR)
3639 {
3640 tree obj = TREE_OPERAND (expr, 0);
3641
3642 if (TREE_CODE (obj) == VAR_DECL
3643 || TREE_CODE (obj) == PARM_DECL
3644 || TREE_CODE (obj) == RESULT_DECL)
3645 return new_cost (symbol_cost [speed], 0);
3646 }
3647
3648 return new_cost (address_cost [speed], 0);
3649 }
3650
3651 switch (TREE_CODE (expr))
3652 {
3653 case POINTER_PLUS_EXPR:
3654 case PLUS_EXPR:
3655 case MINUS_EXPR:
3656 case MULT_EXPR:
3657 op0 = TREE_OPERAND (expr, 0);
3658 op1 = TREE_OPERAND (expr, 1);
3659 STRIP_NOPS (op0);
3660 STRIP_NOPS (op1);
3661 break;
3662
3663 CASE_CONVERT:
3664 case NEGATE_EXPR:
3665 op0 = TREE_OPERAND (expr, 0);
3666 STRIP_NOPS (op0);
3667 op1 = NULL_TREE;
3668 break;
3669
3670 default:
3671 /* Just an arbitrary value, FIXME. */
3672 return new_cost (target_spill_cost[speed], 0);
3673 }
3674
3675 if (op0 == NULL_TREE
3676 || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
3677 cost0 = no_cost;
3678 else
3679 cost0 = force_expr_to_var_cost (op0, speed);
3680
3681 if (op1 == NULL_TREE
3682 || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
3683 cost1 = no_cost;
3684 else
3685 cost1 = force_expr_to_var_cost (op1, speed);
3686
3687 mode = TYPE_MODE (TREE_TYPE (expr));
3688 switch (TREE_CODE (expr))
3689 {
3690 case POINTER_PLUS_EXPR:
3691 case PLUS_EXPR:
3692 case MINUS_EXPR:
3693 case NEGATE_EXPR:
3694 cost = new_cost (add_cost (speed, mode), 0);
3695 if (TREE_CODE (expr) != NEGATE_EXPR)
3696 {
3697 tree mult = NULL_TREE;
3698 comp_cost sa_cost;
3699 if (TREE_CODE (op1) == MULT_EXPR)
3700 mult = op1;
3701 else if (TREE_CODE (op0) == MULT_EXPR)
3702 mult = op0;
3703
3704 if (mult != NULL_TREE
3705 && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
3706 && get_shiftadd_cost (expr, mode, cost0, cost1, mult,
3707 speed, &sa_cost))
3708 return sa_cost;
3709 }
3710 break;
3711
3712 CASE_CONVERT:
3713 {
3714 tree inner_mode, outer_mode;
3715 outer_mode = TREE_TYPE (expr);
3716 inner_mode = TREE_TYPE (op0);
3717 cost = new_cost (convert_cost (TYPE_MODE (outer_mode),
3718 TYPE_MODE (inner_mode), speed), 0);
3719 }
3720 break;
3721
3722 case MULT_EXPR:
3723 if (cst_and_fits_in_hwi (op0))
3724 cost = new_cost (mult_by_coeff_cost (int_cst_value (op0),
3725 mode, speed), 0);
3726 else if (cst_and_fits_in_hwi (op1))
3727 cost = new_cost (mult_by_coeff_cost (int_cst_value (op1),
3728 mode, speed), 0);
3729 else
3730 return new_cost (target_spill_cost [speed], 0);
3731 break;
3732
3733 default:
3734 gcc_unreachable ();
3735 }
3736
3737 cost = add_costs (cost, cost0);
3738 cost = add_costs (cost, cost1);
3739
3740 /* Bound the cost by target_spill_cost. The parts of complicated
3741 computations often are either loop invariant or at least can
3742 be shared between several iv uses, so letting this grow without
3743 limits would not give reasonable results. */
3744 if (cost.cost > (int) target_spill_cost [speed])
3745 cost.cost = target_spill_cost [speed];
3746
3747 return cost;
3748 }
3749
3750 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
3751 invariants the computation depends on. */
3752
3753 static comp_cost
3754 force_var_cost (struct ivopts_data *data,
3755 tree expr, bitmap *depends_on)
3756 {
3757 if (depends_on)
3758 {
3759 fd_ivopts_data = data;
3760 walk_tree (&expr, find_depends, depends_on, NULL);
3761 }
3762
3763 return force_expr_to_var_cost (expr, data->speed);
3764 }
3765
3766 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
3767 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3768 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3769 invariants the computation depends on. */
3770
3771 static comp_cost
3772 split_address_cost (struct ivopts_data *data,
3773 tree addr, bool *symbol_present, bool *var_present,
3774 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3775 {
3776 tree core;
3777 HOST_WIDE_INT bitsize;
3778 HOST_WIDE_INT bitpos;
3779 tree toffset;
3780 enum machine_mode mode;
3781 int unsignedp, volatilep;
3782
3783 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3784 &unsignedp, &volatilep, false);
3785
3786 if (toffset != 0
3787 || bitpos % BITS_PER_UNIT != 0
3788 || TREE_CODE (core) != VAR_DECL)
3789 {
3790 *symbol_present = false;
3791 *var_present = true;
3792 fd_ivopts_data = data;
3793 walk_tree (&addr, find_depends, depends_on, NULL);
3794 return new_cost (target_spill_cost[data->speed], 0);
3795 }
3796
3797 *offset += bitpos / BITS_PER_UNIT;
3798 if (TREE_STATIC (core)
3799 || DECL_EXTERNAL (core))
3800 {
3801 *symbol_present = true;
3802 *var_present = false;
3803 return no_cost;
3804 }
3805
3806 *symbol_present = false;
3807 *var_present = true;
3808 return no_cost;
3809 }
3810
3811 /* Estimates cost of expressing difference of addresses E1 - E2 as
3812 var + symbol + offset. The value of offset is added to OFFSET,
3813 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3814 part is missing. DEPENDS_ON is a set of the invariants the computation
3815 depends on. */
3816
3817 static comp_cost
3818 ptr_difference_cost (struct ivopts_data *data,
3819 tree e1, tree e2, bool *symbol_present, bool *var_present,
3820 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3821 {
3822 HOST_WIDE_INT diff = 0;
3823 aff_tree aff_e1, aff_e2;
3824 tree type;
3825
3826 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3827
3828 if (ptr_difference_const (e1, e2, &diff))
3829 {
3830 *offset += diff;
3831 *symbol_present = false;
3832 *var_present = false;
3833 return no_cost;
3834 }
3835
3836 if (integer_zerop (e2))
3837 return split_address_cost (data, TREE_OPERAND (e1, 0),
3838 symbol_present, var_present, offset, depends_on);
3839
3840 *symbol_present = false;
3841 *var_present = true;
3842
3843 type = signed_type_for (TREE_TYPE (e1));
3844 tree_to_aff_combination (e1, type, &aff_e1);
3845 tree_to_aff_combination (e2, type, &aff_e2);
3846 aff_combination_scale (&aff_e2, -1);
3847 aff_combination_add (&aff_e1, &aff_e2);
3848
3849 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3850 }
3851
3852 /* Estimates cost of expressing difference E1 - E2 as
3853 var + symbol + offset. The value of offset is added to OFFSET,
3854 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3855 part is missing. DEPENDS_ON is a set of the invariants the computation
3856 depends on. */
3857
3858 static comp_cost
3859 difference_cost (struct ivopts_data *data,
3860 tree e1, tree e2, bool *symbol_present, bool *var_present,
3861 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3862 {
3863 enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3864 unsigned HOST_WIDE_INT off1, off2;
3865 aff_tree aff_e1, aff_e2;
3866 tree type;
3867
3868 e1 = strip_offset (e1, &off1);
3869 e2 = strip_offset (e2, &off2);
3870 *offset += off1 - off2;
3871
3872 STRIP_NOPS (e1);
3873 STRIP_NOPS (e2);
3874
3875 if (TREE_CODE (e1) == ADDR_EXPR)
3876 return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
3877 offset, depends_on);
3878 *symbol_present = false;
3879
3880 if (operand_equal_p (e1, e2, 0))
3881 {
3882 *var_present = false;
3883 return no_cost;
3884 }
3885
3886 *var_present = true;
3887
3888 if (integer_zerop (e2))
3889 return force_var_cost (data, e1, depends_on);
3890
3891 if (integer_zerop (e1))
3892 {
3893 comp_cost cost = force_var_cost (data, e2, depends_on);
3894 cost.cost += mult_by_coeff_cost (-1, mode, data->speed);
3895 return cost;
3896 }
3897
3898 type = signed_type_for (TREE_TYPE (e1));
3899 tree_to_aff_combination (e1, type, &aff_e1);
3900 tree_to_aff_combination (e2, type, &aff_e2);
3901 aff_combination_scale (&aff_e2, -1);
3902 aff_combination_add (&aff_e1, &aff_e2);
3903
3904 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3905 }
3906
3907 /* Returns true if AFF1 and AFF2 are identical. */
3908
3909 static bool
3910 compare_aff_trees (aff_tree *aff1, aff_tree *aff2)
3911 {
3912 unsigned i;
3913
3914 if (aff1->n != aff2->n)
3915 return false;
3916
3917 for (i = 0; i < aff1->n; i++)
3918 {
3919 if (aff1->elts[i].coef != aff2->elts[i].coef)
3920 return false;
3921
3922 if (!operand_equal_p (aff1->elts[i].val, aff2->elts[i].val, 0))
3923 return false;
3924 }
3925 return true;
3926 }
3927
3928 /* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id. */
3929
3930 static int
3931 get_expr_id (struct ivopts_data *data, tree expr)
3932 {
3933 struct iv_inv_expr_ent ent;
3934 struct iv_inv_expr_ent **slot;
3935
3936 ent.expr = expr;
3937 ent.hash = iterative_hash_expr (expr, 0);
3938 slot = data->inv_expr_tab.find_slot (&ent, INSERT);
3939 if (*slot)
3940 return (*slot)->id;
3941
3942 *slot = XNEW (struct iv_inv_expr_ent);
3943 (*slot)->expr = expr;
3944 (*slot)->hash = ent.hash;
3945 (*slot)->id = data->inv_expr_id++;
3946 return (*slot)->id;
3947 }
3948
3949 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
3950 requires a new compiler generated temporary. Returns -1 otherwise.
3951 ADDRESS_P is a flag indicating if the expression is for address
3952 computation. */
3953
3954 static int
3955 get_loop_invariant_expr_id (struct ivopts_data *data, tree ubase,
3956 tree cbase, HOST_WIDE_INT ratio,
3957 bool address_p)
3958 {
3959 aff_tree ubase_aff, cbase_aff;
3960 tree expr, ub, cb;
3961
3962 STRIP_NOPS (ubase);
3963 STRIP_NOPS (cbase);
3964 ub = ubase;
3965 cb = cbase;
3966
3967 if ((TREE_CODE (ubase) == INTEGER_CST)
3968 && (TREE_CODE (cbase) == INTEGER_CST))
3969 return -1;
3970
3971 /* Strips the constant part. */
3972 if (TREE_CODE (ubase) == PLUS_EXPR
3973 || TREE_CODE (ubase) == MINUS_EXPR
3974 || TREE_CODE (ubase) == POINTER_PLUS_EXPR)
3975 {
3976 if (TREE_CODE (TREE_OPERAND (ubase, 1)) == INTEGER_CST)
3977 ubase = TREE_OPERAND (ubase, 0);
3978 }
3979
3980 /* Strips the constant part. */
3981 if (TREE_CODE (cbase) == PLUS_EXPR
3982 || TREE_CODE (cbase) == MINUS_EXPR
3983 || TREE_CODE (cbase) == POINTER_PLUS_EXPR)
3984 {
3985 if (TREE_CODE (TREE_OPERAND (cbase, 1)) == INTEGER_CST)
3986 cbase = TREE_OPERAND (cbase, 0);
3987 }
3988
3989 if (address_p)
3990 {
3991 if (((TREE_CODE (ubase) == SSA_NAME)
3992 || (TREE_CODE (ubase) == ADDR_EXPR
3993 && is_gimple_min_invariant (ubase)))
3994 && (TREE_CODE (cbase) == INTEGER_CST))
3995 return -1;
3996
3997 if (((TREE_CODE (cbase) == SSA_NAME)
3998 || (TREE_CODE (cbase) == ADDR_EXPR
3999 && is_gimple_min_invariant (cbase)))
4000 && (TREE_CODE (ubase) == INTEGER_CST))
4001 return -1;
4002 }
4003
4004 if (ratio == 1)
4005 {
4006 if (operand_equal_p (ubase, cbase, 0))
4007 return -1;
4008
4009 if (TREE_CODE (ubase) == ADDR_EXPR
4010 && TREE_CODE (cbase) == ADDR_EXPR)
4011 {
4012 tree usym, csym;
4013
4014 usym = TREE_OPERAND (ubase, 0);
4015 csym = TREE_OPERAND (cbase, 0);
4016 if (TREE_CODE (usym) == ARRAY_REF)
4017 {
4018 tree ind = TREE_OPERAND (usym, 1);
4019 if (TREE_CODE (ind) == INTEGER_CST
4020 && tree_fits_shwi_p (ind)
4021 && tree_to_shwi (ind) == 0)
4022 usym = TREE_OPERAND (usym, 0);
4023 }
4024 if (TREE_CODE (csym) == ARRAY_REF)
4025 {
4026 tree ind = TREE_OPERAND (csym, 1);
4027 if (TREE_CODE (ind) == INTEGER_CST
4028 && tree_fits_shwi_p (ind)
4029 && tree_to_shwi (ind) == 0)
4030 csym = TREE_OPERAND (csym, 0);
4031 }
4032 if (operand_equal_p (usym, csym, 0))
4033 return -1;
4034 }
4035 /* Now do more complex comparison */
4036 tree_to_aff_combination (ubase, TREE_TYPE (ubase), &ubase_aff);
4037 tree_to_aff_combination (cbase, TREE_TYPE (cbase), &cbase_aff);
4038 if (compare_aff_trees (&ubase_aff, &cbase_aff))
4039 return -1;
4040 }
4041
4042 tree_to_aff_combination (ub, TREE_TYPE (ub), &ubase_aff);
4043 tree_to_aff_combination (cb, TREE_TYPE (cb), &cbase_aff);
4044
4045 aff_combination_scale (&cbase_aff, -1 * ratio);
4046 aff_combination_add (&ubase_aff, &cbase_aff);
4047 expr = aff_combination_to_tree (&ubase_aff);
4048 return get_expr_id (data, expr);
4049 }
4050
4051
4052
4053 /* Determines the cost of the computation by that USE is expressed
4054 from induction variable CAND. If ADDRESS_P is true, we just need
4055 to create an address from it, otherwise we want to get it into
4056 register. A set of invariants we depend on is stored in
4057 DEPENDS_ON. AT is the statement at that the value is computed.
4058 If CAN_AUTOINC is nonnull, use it to record whether autoinc
4059 addressing is likely. */
4060
4061 static comp_cost
4062 get_computation_cost_at (struct ivopts_data *data,
4063 struct iv_use *use, struct iv_cand *cand,
4064 bool address_p, bitmap *depends_on, gimple at,
4065 bool *can_autoinc,
4066 int *inv_expr_id)
4067 {
4068 tree ubase = use->iv->base, ustep = use->iv->step;
4069 tree cbase, cstep;
4070 tree utype = TREE_TYPE (ubase), ctype;
4071 unsigned HOST_WIDE_INT cstepi, offset = 0;
4072 HOST_WIDE_INT ratio, aratio;
4073 bool var_present, symbol_present, stmt_is_after_inc;
4074 comp_cost cost;
4075 widest_int rat;
4076 bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4077 enum machine_mode mem_mode = (address_p
4078 ? TYPE_MODE (TREE_TYPE (*use->op_p))
4079 : VOIDmode);
4080
4081 *depends_on = NULL;
4082
4083 /* Only consider real candidates. */
4084 if (!cand->iv)
4085 return infinite_cost;
4086
4087 cbase = cand->iv->base;
4088 cstep = cand->iv->step;
4089 ctype = TREE_TYPE (cbase);
4090
4091 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4092 {
4093 /* We do not have a precision to express the values of use. */
4094 return infinite_cost;
4095 }
4096
4097 if (address_p
4098 || (use->iv->base_object
4099 && cand->iv->base_object
4100 && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4101 && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4102 {
4103 /* Do not try to express address of an object with computation based
4104 on address of a different object. This may cause problems in rtl
4105 level alias analysis (that does not expect this to be happening,
4106 as this is illegal in C), and would be unlikely to be useful
4107 anyway. */
4108 if (use->iv->base_object
4109 && cand->iv->base_object
4110 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4111 return infinite_cost;
4112 }
4113
4114 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4115 {
4116 /* TODO -- add direct handling of this case. */
4117 goto fallback;
4118 }
4119
4120 /* CSTEPI is removed from the offset in case statement is after the
4121 increment. If the step is not constant, we use zero instead.
4122 This is a bit imprecise (there is the extra addition), but
4123 redundancy elimination is likely to transform the code so that
4124 it uses value of the variable before increment anyway,
4125 so it is not that much unrealistic. */
4126 if (cst_and_fits_in_hwi (cstep))
4127 cstepi = int_cst_value (cstep);
4128 else
4129 cstepi = 0;
4130
4131 if (!constant_multiple_of (ustep, cstep, &rat))
4132 return infinite_cost;
4133
4134 if (wi::fits_shwi_p (rat))
4135 ratio = rat.to_shwi ();
4136 else
4137 return infinite_cost;
4138
4139 STRIP_NOPS (cbase);
4140 ctype = TREE_TYPE (cbase);
4141
4142 stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
4143
4144 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
4145 or ratio == 1, it is better to handle this like
4146
4147 ubase - ratio * cbase + ratio * var
4148
4149 (also holds in the case ratio == -1, TODO. */
4150
4151 if (cst_and_fits_in_hwi (cbase))
4152 {
4153 offset = - ratio * int_cst_value (cbase);
4154 cost = difference_cost (data,
4155 ubase, build_int_cst (utype, 0),
4156 &symbol_present, &var_present, &offset,
4157 depends_on);
4158 cost.cost /= avg_loop_niter (data->current_loop);
4159 }
4160 else if (ratio == 1)
4161 {
4162 tree real_cbase = cbase;
4163
4164 /* Check to see if any adjustment is needed. */
4165 if (cstepi == 0 && stmt_is_after_inc)
4166 {
4167 aff_tree real_cbase_aff;
4168 aff_tree cstep_aff;
4169
4170 tree_to_aff_combination (cbase, TREE_TYPE (real_cbase),
4171 &real_cbase_aff);
4172 tree_to_aff_combination (cstep, TREE_TYPE (cstep), &cstep_aff);
4173
4174 aff_combination_add (&real_cbase_aff, &cstep_aff);
4175 real_cbase = aff_combination_to_tree (&real_cbase_aff);
4176 }
4177
4178 cost = difference_cost (data,
4179 ubase, real_cbase,
4180 &symbol_present, &var_present, &offset,
4181 depends_on);
4182 cost.cost /= avg_loop_niter (data->current_loop);
4183 }
4184 else if (address_p
4185 && !POINTER_TYPE_P (ctype)
4186 && multiplier_allowed_in_address_p
4187 (ratio, mem_mode,
4188 TYPE_ADDR_SPACE (TREE_TYPE (utype))))
4189 {
4190 cbase
4191 = fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio));
4192 cost = difference_cost (data,
4193 ubase, cbase,
4194 &symbol_present, &var_present, &offset,
4195 depends_on);
4196 cost.cost /= avg_loop_niter (data->current_loop);
4197 }
4198 else
4199 {
4200 cost = force_var_cost (data, cbase, depends_on);
4201 cost = add_costs (cost,
4202 difference_cost (data,
4203 ubase, build_int_cst (utype, 0),
4204 &symbol_present, &var_present,
4205 &offset, depends_on));
4206 cost.cost /= avg_loop_niter (data->current_loop);
4207 cost.cost += add_cost (data->speed, TYPE_MODE (ctype));
4208 }
4209
4210 if (inv_expr_id)
4211 {
4212 *inv_expr_id =
4213 get_loop_invariant_expr_id (data, ubase, cbase, ratio, address_p);
4214 /* Clear depends on. */
4215 if (*inv_expr_id != -1 && depends_on && *depends_on)
4216 bitmap_clear (*depends_on);
4217 }
4218
4219 /* If we are after the increment, the value of the candidate is higher by
4220 one iteration. */
4221 if (stmt_is_after_inc)
4222 offset -= ratio * cstepi;
4223
4224 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4225 (symbol/var1/const parts may be omitted). If we are looking for an
4226 address, find the cost of addressing this. */
4227 if (address_p)
4228 return add_costs (cost,
4229 get_address_cost (symbol_present, var_present,
4230 offset, ratio, cstepi,
4231 mem_mode,
4232 TYPE_ADDR_SPACE (TREE_TYPE (utype)),
4233 speed, stmt_is_after_inc,
4234 can_autoinc));
4235
4236 /* Otherwise estimate the costs for computing the expression. */
4237 if (!symbol_present && !var_present && !offset)
4238 {
4239 if (ratio != 1)
4240 cost.cost += mult_by_coeff_cost (ratio, TYPE_MODE (ctype), speed);
4241 return cost;
4242 }
4243
4244 /* Symbol + offset should be compile-time computable so consider that they
4245 are added once to the variable, if present. */
4246 if (var_present && (symbol_present || offset))
4247 cost.cost += adjust_setup_cost (data,
4248 add_cost (speed, TYPE_MODE (ctype)));
4249
4250 /* Having offset does not affect runtime cost in case it is added to
4251 symbol, but it increases complexity. */
4252 if (offset)
4253 cost.complexity++;
4254
4255 cost.cost += add_cost (speed, TYPE_MODE (ctype));
4256
4257 aratio = ratio > 0 ? ratio : -ratio;
4258 if (aratio != 1)
4259 cost.cost += mult_by_coeff_cost (aratio, TYPE_MODE (ctype), speed);
4260 return cost;
4261
4262 fallback:
4263 if (can_autoinc)
4264 *can_autoinc = false;
4265
4266 {
4267 /* Just get the expression, expand it and measure the cost. */
4268 tree comp = get_computation_at (data->current_loop, use, cand, at);
4269
4270 if (!comp)
4271 return infinite_cost;
4272
4273 if (address_p)
4274 comp = build_simple_mem_ref (comp);
4275
4276 return new_cost (computation_cost (comp, speed), 0);
4277 }
4278 }
4279
4280 /* Determines the cost of the computation by that USE is expressed
4281 from induction variable CAND. If ADDRESS_P is true, we just need
4282 to create an address from it, otherwise we want to get it into
4283 register. A set of invariants we depend on is stored in
4284 DEPENDS_ON. If CAN_AUTOINC is nonnull, use it to record whether
4285 autoinc addressing is likely. */
4286
4287 static comp_cost
4288 get_computation_cost (struct ivopts_data *data,
4289 struct iv_use *use, struct iv_cand *cand,
4290 bool address_p, bitmap *depends_on,
4291 bool *can_autoinc, int *inv_expr_id)
4292 {
4293 return get_computation_cost_at (data,
4294 use, cand, address_p, depends_on, use->stmt,
4295 can_autoinc, inv_expr_id);
4296 }
4297
4298 /* Determines cost of basing replacement of USE on CAND in a generic
4299 expression. */
4300
4301 static bool
4302 determine_use_iv_cost_generic (struct ivopts_data *data,
4303 struct iv_use *use, struct iv_cand *cand)
4304 {
4305 bitmap depends_on;
4306 comp_cost cost;
4307 int inv_expr_id = -1;
4308
4309 /* The simple case first -- if we need to express value of the preserved
4310 original biv, the cost is 0. This also prevents us from counting the
4311 cost of increment twice -- once at this use and once in the cost of
4312 the candidate. */
4313 if (cand->pos == IP_ORIGINAL
4314 && cand->incremented_at == use->stmt)
4315 {
4316 set_use_iv_cost (data, use, cand, no_cost, NULL, NULL_TREE,
4317 ERROR_MARK, -1);
4318 return true;
4319 }
4320
4321 cost = get_computation_cost (data, use, cand, false, &depends_on,
4322 NULL, &inv_expr_id);
4323
4324 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4325 inv_expr_id);
4326
4327 return !infinite_cost_p (cost);
4328 }
4329
4330 /* Determines cost of basing replacement of USE on CAND in an address. */
4331
4332 static bool
4333 determine_use_iv_cost_address (struct ivopts_data *data,
4334 struct iv_use *use, struct iv_cand *cand)
4335 {
4336 bitmap depends_on;
4337 bool can_autoinc;
4338 int inv_expr_id = -1;
4339 comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
4340 &can_autoinc, &inv_expr_id);
4341
4342 if (cand->ainc_use == use)
4343 {
4344 if (can_autoinc)
4345 cost.cost -= cand->cost_step;
4346 /* If we generated the candidate solely for exploiting autoincrement
4347 opportunities, and it turns out it can't be used, set the cost to
4348 infinity to make sure we ignore it. */
4349 else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
4350 cost = infinite_cost;
4351 }
4352 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4353 inv_expr_id);
4354
4355 return !infinite_cost_p (cost);
4356 }
4357
4358 /* Computes value of candidate CAND at position AT in iteration NITER, and
4359 stores it to VAL. */
4360
4361 static void
4362 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple at, tree niter,
4363 aff_tree *val)
4364 {
4365 aff_tree step, delta, nit;
4366 struct iv *iv = cand->iv;
4367 tree type = TREE_TYPE (iv->base);
4368 tree steptype = type;
4369 if (POINTER_TYPE_P (type))
4370 steptype = sizetype;
4371
4372 tree_to_aff_combination (iv->step, steptype, &step);
4373 tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
4374 aff_combination_convert (&nit, steptype);
4375 aff_combination_mult (&nit, &step, &delta);
4376 if (stmt_after_increment (loop, cand, at))
4377 aff_combination_add (&delta, &step);
4378
4379 tree_to_aff_combination (iv->base, type, val);
4380 aff_combination_add (val, &delta);
4381 }
4382
4383 /* Returns period of induction variable iv. */
4384
4385 static tree
4386 iv_period (struct iv *iv)
4387 {
4388 tree step = iv->step, period, type;
4389 tree pow2div;
4390
4391 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4392
4393 type = unsigned_type_for (TREE_TYPE (step));
4394 /* Period of the iv is lcm (step, type_range)/step -1,
4395 i.e., N*type_range/step - 1. Since type range is power
4396 of two, N == (step >> num_of_ending_zeros_binary (step),
4397 so the final result is
4398
4399 (type_range >> num_of_ending_zeros_binary (step)) - 1
4400
4401 */
4402 pow2div = num_ending_zeros (step);
4403
4404 period = build_low_bits_mask (type,
4405 (TYPE_PRECISION (type)
4406 - tree_to_uhwi (pow2div)));
4407
4408 return period;
4409 }
4410
4411 /* Returns the comparison operator used when eliminating the iv USE. */
4412
4413 static enum tree_code
4414 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4415 {
4416 struct loop *loop = data->current_loop;
4417 basic_block ex_bb;
4418 edge exit;
4419
4420 ex_bb = gimple_bb (use->stmt);
4421 exit = EDGE_SUCC (ex_bb, 0);
4422 if (flow_bb_inside_loop_p (loop, exit->dest))
4423 exit = EDGE_SUCC (ex_bb, 1);
4424
4425 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4426 }
4427
4428 static tree
4429 strip_wrap_conserving_type_conversions (tree exp)
4430 {
4431 while (tree_ssa_useless_type_conversion (exp)
4432 && (nowrap_type_p (TREE_TYPE (exp))
4433 == nowrap_type_p (TREE_TYPE (TREE_OPERAND (exp, 0)))))
4434 exp = TREE_OPERAND (exp, 0);
4435 return exp;
4436 }
4437
4438 /* Walk the SSA form and check whether E == WHAT. Fairly simplistic, we
4439 check for an exact match. */
4440
4441 static bool
4442 expr_equal_p (tree e, tree what)
4443 {
4444 gimple stmt;
4445 enum tree_code code;
4446
4447 e = strip_wrap_conserving_type_conversions (e);
4448 what = strip_wrap_conserving_type_conversions (what);
4449
4450 code = TREE_CODE (what);
4451 if (TREE_TYPE (e) != TREE_TYPE (what))
4452 return false;
4453
4454 if (operand_equal_p (e, what, 0))
4455 return true;
4456
4457 if (TREE_CODE (e) != SSA_NAME)
4458 return false;
4459
4460 stmt = SSA_NAME_DEF_STMT (e);
4461 if (gimple_code (stmt) != GIMPLE_ASSIGN
4462 || gimple_assign_rhs_code (stmt) != code)
4463 return false;
4464
4465 switch (get_gimple_rhs_class (code))
4466 {
4467 case GIMPLE_BINARY_RHS:
4468 if (!expr_equal_p (gimple_assign_rhs2 (stmt), TREE_OPERAND (what, 1)))
4469 return false;
4470 /* Fallthru. */
4471
4472 case GIMPLE_UNARY_RHS:
4473 case GIMPLE_SINGLE_RHS:
4474 return expr_equal_p (gimple_assign_rhs1 (stmt), TREE_OPERAND (what, 0));
4475 default:
4476 return false;
4477 }
4478 }
4479
4480 /* Returns true if we can prove that BASE - OFFSET does not overflow. For now,
4481 we only detect the situation that BASE = SOMETHING + OFFSET, where the
4482 calculation is performed in non-wrapping type.
4483
4484 TODO: More generally, we could test for the situation that
4485 BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
4486 This would require knowing the sign of OFFSET.
4487
4488 Also, we only look for the first addition in the computation of BASE.
4489 More complex analysis would be better, but introducing it just for
4490 this optimization seems like an overkill. */
4491
4492 static bool
4493 difference_cannot_overflow_p (tree base, tree offset)
4494 {
4495 enum tree_code code;
4496 tree e1, e2;
4497
4498 if (!nowrap_type_p (TREE_TYPE (base)))
4499 return false;
4500
4501 base = expand_simple_operations (base);
4502
4503 if (TREE_CODE (base) == SSA_NAME)
4504 {
4505 gimple stmt = SSA_NAME_DEF_STMT (base);
4506
4507 if (gimple_code (stmt) != GIMPLE_ASSIGN)
4508 return false;
4509
4510 code = gimple_assign_rhs_code (stmt);
4511 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4512 return false;
4513
4514 e1 = gimple_assign_rhs1 (stmt);
4515 e2 = gimple_assign_rhs2 (stmt);
4516 }
4517 else
4518 {
4519 code = TREE_CODE (base);
4520 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4521 return false;
4522 e1 = TREE_OPERAND (base, 0);
4523 e2 = TREE_OPERAND (base, 1);
4524 }
4525
4526 /* TODO: deeper inspection may be necessary to prove the equality. */
4527 switch (code)
4528 {
4529 case PLUS_EXPR:
4530 return expr_equal_p (e1, offset) || expr_equal_p (e2, offset);
4531 case POINTER_PLUS_EXPR:
4532 return expr_equal_p (e2, offset);
4533
4534 default:
4535 return false;
4536 }
4537 }
4538
4539 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
4540 comparison with CAND. NITER describes the number of iterations of
4541 the loops. If successful, the comparison in COMP_P is altered accordingly.
4542
4543 We aim to handle the following situation:
4544
4545 sometype *base, *p;
4546 int a, b, i;
4547
4548 i = a;
4549 p = p_0 = base + a;
4550
4551 do
4552 {
4553 bla (*p);
4554 p++;
4555 i++;
4556 }
4557 while (i < b);
4558
4559 Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
4560 We aim to optimize this to
4561
4562 p = p_0 = base + a;
4563 do
4564 {
4565 bla (*p);
4566 p++;
4567 }
4568 while (p < p_0 - a + b);
4569
4570 This preserves the correctness, since the pointer arithmetics does not
4571 overflow. More precisely:
4572
4573 1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
4574 overflow in computing it or the values of p.
4575 2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
4576 overflow. To prove this, we use the fact that p_0 = base + a. */
4577
4578 static bool
4579 iv_elimination_compare_lt (struct ivopts_data *data,
4580 struct iv_cand *cand, enum tree_code *comp_p,
4581 struct tree_niter_desc *niter)
4582 {
4583 tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
4584 struct affine_tree_combination nit, tmpa, tmpb;
4585 enum tree_code comp;
4586 HOST_WIDE_INT step;
4587
4588 /* We need to know that the candidate induction variable does not overflow.
4589 While more complex analysis may be used to prove this, for now just
4590 check that the variable appears in the original program and that it
4591 is computed in a type that guarantees no overflows. */
4592 cand_type = TREE_TYPE (cand->iv->base);
4593 if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
4594 return false;
4595
4596 /* Make sure that the loop iterates till the loop bound is hit, as otherwise
4597 the calculation of the BOUND could overflow, making the comparison
4598 invalid. */
4599 if (!data->loop_single_exit_p)
4600 return false;
4601
4602 /* We need to be able to decide whether candidate is increasing or decreasing
4603 in order to choose the right comparison operator. */
4604 if (!cst_and_fits_in_hwi (cand->iv->step))
4605 return false;
4606 step = int_cst_value (cand->iv->step);
4607
4608 /* Check that the number of iterations matches the expected pattern:
4609 a + 1 > b ? 0 : b - a - 1. */
4610 mbz = niter->may_be_zero;
4611 if (TREE_CODE (mbz) == GT_EXPR)
4612 {
4613 /* Handle a + 1 > b. */
4614 tree op0 = TREE_OPERAND (mbz, 0);
4615 if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
4616 {
4617 a = TREE_OPERAND (op0, 0);
4618 b = TREE_OPERAND (mbz, 1);
4619 }
4620 else
4621 return false;
4622 }
4623 else if (TREE_CODE (mbz) == LT_EXPR)
4624 {
4625 tree op1 = TREE_OPERAND (mbz, 1);
4626
4627 /* Handle b < a + 1. */
4628 if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
4629 {
4630 a = TREE_OPERAND (op1, 0);
4631 b = TREE_OPERAND (mbz, 0);
4632 }
4633 else
4634 return false;
4635 }
4636 else
4637 return false;
4638
4639 /* Expected number of iterations is B - A - 1. Check that it matches
4640 the actual number, i.e., that B - A - NITER = 1. */
4641 tree_to_aff_combination (niter->niter, nit_type, &nit);
4642 tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
4643 tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
4644 aff_combination_scale (&nit, -1);
4645 aff_combination_scale (&tmpa, -1);
4646 aff_combination_add (&tmpb, &tmpa);
4647 aff_combination_add (&tmpb, &nit);
4648 if (tmpb.n != 0 || tmpb.offset != 1)
4649 return false;
4650
4651 /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
4652 overflow. */
4653 offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
4654 cand->iv->step,
4655 fold_convert (TREE_TYPE (cand->iv->step), a));
4656 if (!difference_cannot_overflow_p (cand->iv->base, offset))
4657 return false;
4658
4659 /* Determine the new comparison operator. */
4660 comp = step < 0 ? GT_EXPR : LT_EXPR;
4661 if (*comp_p == NE_EXPR)
4662 *comp_p = comp;
4663 else if (*comp_p == EQ_EXPR)
4664 *comp_p = invert_tree_comparison (comp, false);
4665 else
4666 gcc_unreachable ();
4667
4668 return true;
4669 }
4670
4671 /* Check whether it is possible to express the condition in USE by comparison
4672 of candidate CAND. If so, store the value compared with to BOUND, and the
4673 comparison operator to COMP. */
4674
4675 static bool
4676 may_eliminate_iv (struct ivopts_data *data,
4677 struct iv_use *use, struct iv_cand *cand, tree *bound,
4678 enum tree_code *comp)
4679 {
4680 basic_block ex_bb;
4681 edge exit;
4682 tree period;
4683 struct loop *loop = data->current_loop;
4684 aff_tree bnd;
4685 struct tree_niter_desc *desc = NULL;
4686
4687 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
4688 return false;
4689
4690 /* For now works only for exits that dominate the loop latch.
4691 TODO: extend to other conditions inside loop body. */
4692 ex_bb = gimple_bb (use->stmt);
4693 if (use->stmt != last_stmt (ex_bb)
4694 || gimple_code (use->stmt) != GIMPLE_COND
4695 || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
4696 return false;
4697
4698 exit = EDGE_SUCC (ex_bb, 0);
4699 if (flow_bb_inside_loop_p (loop, exit->dest))
4700 exit = EDGE_SUCC (ex_bb, 1);
4701 if (flow_bb_inside_loop_p (loop, exit->dest))
4702 return false;
4703
4704 desc = niter_for_exit (data, exit);
4705 if (!desc)
4706 return false;
4707
4708 /* Determine whether we can use the variable to test the exit condition.
4709 This is the case iff the period of the induction variable is greater
4710 than the number of iterations for which the exit condition is true. */
4711 period = iv_period (cand->iv);
4712
4713 /* If the number of iterations is constant, compare against it directly. */
4714 if (TREE_CODE (desc->niter) == INTEGER_CST)
4715 {
4716 /* See cand_value_at. */
4717 if (stmt_after_increment (loop, cand, use->stmt))
4718 {
4719 if (!tree_int_cst_lt (desc->niter, period))
4720 return false;
4721 }
4722 else
4723 {
4724 if (tree_int_cst_lt (period, desc->niter))
4725 return false;
4726 }
4727 }
4728
4729 /* If not, and if this is the only possible exit of the loop, see whether
4730 we can get a conservative estimate on the number of iterations of the
4731 entire loop and compare against that instead. */
4732 else
4733 {
4734 widest_int period_value, max_niter;
4735
4736 max_niter = desc->max;
4737 if (stmt_after_increment (loop, cand, use->stmt))
4738 max_niter += 1;
4739 period_value = wi::to_widest (period);
4740 if (wi::gtu_p (max_niter, period_value))
4741 {
4742 /* See if we can take advantage of inferred loop bound information. */
4743 if (data->loop_single_exit_p)
4744 {
4745 if (!max_loop_iterations (loop, &max_niter))
4746 return false;
4747 /* The loop bound is already adjusted by adding 1. */
4748 if (wi::gtu_p (max_niter, period_value))
4749 return false;
4750 }
4751 else
4752 return false;
4753 }
4754 }
4755
4756 cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
4757
4758 *bound = aff_combination_to_tree (&bnd);
4759 *comp = iv_elimination_compare (data, use);
4760
4761 /* It is unlikely that computing the number of iterations using division
4762 would be more profitable than keeping the original induction variable. */
4763 if (expression_expensive_p (*bound))
4764 return false;
4765
4766 /* Sometimes, it is possible to handle the situation that the number of
4767 iterations may be zero unless additional assumtions by using <
4768 instead of != in the exit condition.
4769
4770 TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
4771 base the exit condition on it. However, that is often too
4772 expensive. */
4773 if (!integer_zerop (desc->may_be_zero))
4774 return iv_elimination_compare_lt (data, cand, comp, desc);
4775
4776 return true;
4777 }
4778
4779 /* Calculates the cost of BOUND, if it is a PARM_DECL. A PARM_DECL must
4780 be copied, if is is used in the loop body and DATA->body_includes_call. */
4781
4782 static int
4783 parm_decl_cost (struct ivopts_data *data, tree bound)
4784 {
4785 tree sbound = bound;
4786 STRIP_NOPS (sbound);
4787
4788 if (TREE_CODE (sbound) == SSA_NAME
4789 && SSA_NAME_IS_DEFAULT_DEF (sbound)
4790 && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
4791 && data->body_includes_call)
4792 return COSTS_N_INSNS (1);
4793
4794 return 0;
4795 }
4796
4797 /* Determines cost of basing replacement of USE on CAND in a condition. */
4798
4799 static bool
4800 determine_use_iv_cost_condition (struct ivopts_data *data,
4801 struct iv_use *use, struct iv_cand *cand)
4802 {
4803 tree bound = NULL_TREE;
4804 struct iv *cmp_iv;
4805 bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
4806 comp_cost elim_cost, express_cost, cost, bound_cost;
4807 bool ok;
4808 int elim_inv_expr_id = -1, express_inv_expr_id = -1, inv_expr_id;
4809 tree *control_var, *bound_cst;
4810 enum tree_code comp = ERROR_MARK;
4811
4812 /* Only consider real candidates. */
4813 if (!cand->iv)
4814 {
4815 set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE,
4816 ERROR_MARK, -1);
4817 return false;
4818 }
4819
4820 /* Try iv elimination. */
4821 if (may_eliminate_iv (data, use, cand, &bound, &comp))
4822 {
4823 elim_cost = force_var_cost (data, bound, &depends_on_elim);
4824 if (elim_cost.cost == 0)
4825 elim_cost.cost = parm_decl_cost (data, bound);
4826 else if (TREE_CODE (bound) == INTEGER_CST)
4827 elim_cost.cost = 0;
4828 /* If we replace a loop condition 'i < n' with 'p < base + n',
4829 depends_on_elim will have 'base' and 'n' set, which implies
4830 that both 'base' and 'n' will be live during the loop. More likely,
4831 'base + n' will be loop invariant, resulting in only one live value
4832 during the loop. So in that case we clear depends_on_elim and set
4833 elim_inv_expr_id instead. */
4834 if (depends_on_elim && bitmap_count_bits (depends_on_elim) > 1)
4835 {
4836 elim_inv_expr_id = get_expr_id (data, bound);
4837 bitmap_clear (depends_on_elim);
4838 }
4839 /* The bound is a loop invariant, so it will be only computed
4840 once. */
4841 elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
4842 }
4843 else
4844 elim_cost = infinite_cost;
4845
4846 /* Try expressing the original giv. If it is compared with an invariant,
4847 note that we cannot get rid of it. */
4848 ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
4849 NULL, &cmp_iv);
4850 gcc_assert (ok);
4851
4852 /* When the condition is a comparison of the candidate IV against
4853 zero, prefer this IV.
4854
4855 TODO: The constant that we're subtracting from the cost should
4856 be target-dependent. This information should be added to the
4857 target costs for each backend. */
4858 if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */
4859 && integer_zerop (*bound_cst)
4860 && (operand_equal_p (*control_var, cand->var_after, 0)
4861 || operand_equal_p (*control_var, cand->var_before, 0)))
4862 elim_cost.cost -= 1;
4863
4864 express_cost = get_computation_cost (data, use, cand, false,
4865 &depends_on_express, NULL,
4866 &express_inv_expr_id);
4867 fd_ivopts_data = data;
4868 walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
4869
4870 /* Count the cost of the original bound as well. */
4871 bound_cost = force_var_cost (data, *bound_cst, NULL);
4872 if (bound_cost.cost == 0)
4873 bound_cost.cost = parm_decl_cost (data, *bound_cst);
4874 else if (TREE_CODE (*bound_cst) == INTEGER_CST)
4875 bound_cost.cost = 0;
4876 express_cost.cost += bound_cost.cost;
4877
4878 /* Choose the better approach, preferring the eliminated IV. */
4879 if (compare_costs (elim_cost, express_cost) <= 0)
4880 {
4881 cost = elim_cost;
4882 depends_on = depends_on_elim;
4883 depends_on_elim = NULL;
4884 inv_expr_id = elim_inv_expr_id;
4885 }
4886 else
4887 {
4888 cost = express_cost;
4889 depends_on = depends_on_express;
4890 depends_on_express = NULL;
4891 bound = NULL_TREE;
4892 comp = ERROR_MARK;
4893 inv_expr_id = express_inv_expr_id;
4894 }
4895
4896 set_use_iv_cost (data, use, cand, cost, depends_on, bound, comp, inv_expr_id);
4897
4898 if (depends_on_elim)
4899 BITMAP_FREE (depends_on_elim);
4900 if (depends_on_express)
4901 BITMAP_FREE (depends_on_express);
4902
4903 return !infinite_cost_p (cost);
4904 }
4905
4906 /* Determines cost of basing replacement of USE on CAND. Returns false
4907 if USE cannot be based on CAND. */
4908
4909 static bool
4910 determine_use_iv_cost (struct ivopts_data *data,
4911 struct iv_use *use, struct iv_cand *cand)
4912 {
4913 switch (use->type)
4914 {
4915 case USE_NONLINEAR_EXPR:
4916 return determine_use_iv_cost_generic (data, use, cand);
4917
4918 case USE_ADDRESS:
4919 return determine_use_iv_cost_address (data, use, cand);
4920
4921 case USE_COMPARE:
4922 return determine_use_iv_cost_condition (data, use, cand);
4923
4924 default:
4925 gcc_unreachable ();
4926 }
4927 }
4928
4929 /* Return true if get_computation_cost indicates that autoincrement is
4930 a possibility for the pair of USE and CAND, false otherwise. */
4931
4932 static bool
4933 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
4934 struct iv_cand *cand)
4935 {
4936 bitmap depends_on;
4937 bool can_autoinc;
4938 comp_cost cost;
4939
4940 if (use->type != USE_ADDRESS)
4941 return false;
4942
4943 cost = get_computation_cost (data, use, cand, true, &depends_on,
4944 &can_autoinc, NULL);
4945
4946 BITMAP_FREE (depends_on);
4947
4948 return !infinite_cost_p (cost) && can_autoinc;
4949 }
4950
4951 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
4952 use that allows autoincrement, and set their AINC_USE if possible. */
4953
4954 static void
4955 set_autoinc_for_original_candidates (struct ivopts_data *data)
4956 {
4957 unsigned i, j;
4958
4959 for (i = 0; i < n_iv_cands (data); i++)
4960 {
4961 struct iv_cand *cand = iv_cand (data, i);
4962 struct iv_use *closest_before = NULL;
4963 struct iv_use *closest_after = NULL;
4964 if (cand->pos != IP_ORIGINAL)
4965 continue;
4966
4967 for (j = 0; j < n_iv_uses (data); j++)
4968 {
4969 struct iv_use *use = iv_use (data, j);
4970 unsigned uid = gimple_uid (use->stmt);
4971
4972 if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
4973 continue;
4974
4975 if (uid < gimple_uid (cand->incremented_at)
4976 && (closest_before == NULL
4977 || uid > gimple_uid (closest_before->stmt)))
4978 closest_before = use;
4979
4980 if (uid > gimple_uid (cand->incremented_at)
4981 && (closest_after == NULL
4982 || uid < gimple_uid (closest_after->stmt)))
4983 closest_after = use;
4984 }
4985
4986 if (closest_before != NULL
4987 && autoinc_possible_for_pair (data, closest_before, cand))
4988 cand->ainc_use = closest_before;
4989 else if (closest_after != NULL
4990 && autoinc_possible_for_pair (data, closest_after, cand))
4991 cand->ainc_use = closest_after;
4992 }
4993 }
4994
4995 /* Finds the candidates for the induction variables. */
4996
4997 static void
4998 find_iv_candidates (struct ivopts_data *data)
4999 {
5000 /* Add commonly used ivs. */
5001 add_standard_iv_candidates (data);
5002
5003 /* Add old induction variables. */
5004 add_old_ivs_candidates (data);
5005
5006 /* Add induction variables derived from uses. */
5007 add_derived_ivs_candidates (data);
5008
5009 set_autoinc_for_original_candidates (data);
5010
5011 /* Record the important candidates. */
5012 record_important_candidates (data);
5013 }
5014
5015 /* Determines costs of basing the use of the iv on an iv candidate. */
5016
5017 static void
5018 determine_use_iv_costs (struct ivopts_data *data)
5019 {
5020 unsigned i, j;
5021 struct iv_use *use;
5022 struct iv_cand *cand;
5023 bitmap to_clear = BITMAP_ALLOC (NULL);
5024
5025 alloc_use_cost_map (data);
5026
5027 for (i = 0; i < n_iv_uses (data); i++)
5028 {
5029 use = iv_use (data, i);
5030
5031 if (data->consider_all_candidates)
5032 {
5033 for (j = 0; j < n_iv_cands (data); j++)
5034 {
5035 cand = iv_cand (data, j);
5036 determine_use_iv_cost (data, use, cand);
5037 }
5038 }
5039 else
5040 {
5041 bitmap_iterator bi;
5042
5043 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
5044 {
5045 cand = iv_cand (data, j);
5046 if (!determine_use_iv_cost (data, use, cand))
5047 bitmap_set_bit (to_clear, j);
5048 }
5049
5050 /* Remove the candidates for that the cost is infinite from
5051 the list of related candidates. */
5052 bitmap_and_compl_into (use->related_cands, to_clear);
5053 bitmap_clear (to_clear);
5054 }
5055 }
5056
5057 BITMAP_FREE (to_clear);
5058
5059 if (dump_file && (dump_flags & TDF_DETAILS))
5060 {
5061 fprintf (dump_file, "Use-candidate costs:\n");
5062
5063 for (i = 0; i < n_iv_uses (data); i++)
5064 {
5065 use = iv_use (data, i);
5066
5067 fprintf (dump_file, "Use %d:\n", i);
5068 fprintf (dump_file, " cand\tcost\tcompl.\tdepends on\n");
5069 for (j = 0; j < use->n_map_members; j++)
5070 {
5071 if (!use->cost_map[j].cand
5072 || infinite_cost_p (use->cost_map[j].cost))
5073 continue;
5074
5075 fprintf (dump_file, " %d\t%d\t%d\t",
5076 use->cost_map[j].cand->id,
5077 use->cost_map[j].cost.cost,
5078 use->cost_map[j].cost.complexity);
5079 if (use->cost_map[j].depends_on)
5080 bitmap_print (dump_file,
5081 use->cost_map[j].depends_on, "","");
5082 if (use->cost_map[j].inv_expr_id != -1)
5083 fprintf (dump_file, " inv_expr:%d", use->cost_map[j].inv_expr_id);
5084 fprintf (dump_file, "\n");
5085 }
5086
5087 fprintf (dump_file, "\n");
5088 }
5089 fprintf (dump_file, "\n");
5090 }
5091 }
5092
5093 /* Determines cost of the candidate CAND. */
5094
5095 static void
5096 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5097 {
5098 comp_cost cost_base;
5099 unsigned cost, cost_step;
5100 tree base;
5101
5102 if (!cand->iv)
5103 {
5104 cand->cost = 0;
5105 return;
5106 }
5107
5108 /* There are two costs associated with the candidate -- its increment
5109 and its initialization. The second is almost negligible for any loop
5110 that rolls enough, so we take it just very little into account. */
5111
5112 base = cand->iv->base;
5113 cost_base = force_var_cost (data, base, NULL);
5114 /* It will be exceptional that the iv register happens to be initialized with
5115 the proper value at no cost. In general, there will at least be a regcopy
5116 or a const set. */
5117 if (cost_base.cost == 0)
5118 cost_base.cost = COSTS_N_INSNS (1);
5119 cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
5120
5121 cost = cost_step + adjust_setup_cost (data, cost_base.cost);
5122
5123 /* Prefer the original ivs unless we may gain something by replacing it.
5124 The reason is to make debugging simpler; so this is not relevant for
5125 artificial ivs created by other optimization passes. */
5126 if (cand->pos != IP_ORIGINAL
5127 || !SSA_NAME_VAR (cand->var_before)
5128 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
5129 cost++;
5130
5131 /* Prefer not to insert statements into latch unless there are some
5132 already (so that we do not create unnecessary jumps). */
5133 if (cand->pos == IP_END
5134 && empty_block_p (ip_end_pos (data->current_loop)))
5135 cost++;
5136
5137 cand->cost = cost;
5138 cand->cost_step = cost_step;
5139 }
5140
5141 /* Determines costs of computation of the candidates. */
5142
5143 static void
5144 determine_iv_costs (struct ivopts_data *data)
5145 {
5146 unsigned i;
5147
5148 if (dump_file && (dump_flags & TDF_DETAILS))
5149 {
5150 fprintf (dump_file, "Candidate costs:\n");
5151 fprintf (dump_file, " cand\tcost\n");
5152 }
5153
5154 for (i = 0; i < n_iv_cands (data); i++)
5155 {
5156 struct iv_cand *cand = iv_cand (data, i);
5157
5158 determine_iv_cost (data, cand);
5159
5160 if (dump_file && (dump_flags & TDF_DETAILS))
5161 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
5162 }
5163
5164 if (dump_file && (dump_flags & TDF_DETAILS))
5165 fprintf (dump_file, "\n");
5166 }
5167
5168 /* Calculates cost for having SIZE induction variables. */
5169
5170 static unsigned
5171 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
5172 {
5173 /* We add size to the cost, so that we prefer eliminating ivs
5174 if possible. */
5175 return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed,
5176 data->body_includes_call);
5177 }
5178
5179 /* For each size of the induction variable set determine the penalty. */
5180
5181 static void
5182 determine_set_costs (struct ivopts_data *data)
5183 {
5184 unsigned j, n;
5185 gimple phi;
5186 gimple_stmt_iterator psi;
5187 tree op;
5188 struct loop *loop = data->current_loop;
5189 bitmap_iterator bi;
5190
5191 if (dump_file && (dump_flags & TDF_DETAILS))
5192 {
5193 fprintf (dump_file, "Global costs:\n");
5194 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
5195 fprintf (dump_file, " target_clobbered_regs %d\n", target_clobbered_regs);
5196 fprintf (dump_file, " target_reg_cost %d\n", target_reg_cost[data->speed]);
5197 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost[data->speed]);
5198 }
5199
5200 n = 0;
5201 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
5202 {
5203 phi = gsi_stmt (psi);
5204 op = PHI_RESULT (phi);
5205
5206 if (virtual_operand_p (op))
5207 continue;
5208
5209 if (get_iv (data, op))
5210 continue;
5211
5212 n++;
5213 }
5214
5215 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5216 {
5217 struct version_info *info = ver_info (data, j);
5218
5219 if (info->inv_id && info->has_nonlin_use)
5220 n++;
5221 }
5222
5223 data->regs_used = n;
5224 if (dump_file && (dump_flags & TDF_DETAILS))
5225 fprintf (dump_file, " regs_used %d\n", n);
5226
5227 if (dump_file && (dump_flags & TDF_DETAILS))
5228 {
5229 fprintf (dump_file, " cost for size:\n");
5230 fprintf (dump_file, " ivs\tcost\n");
5231 for (j = 0; j <= 2 * target_avail_regs; j++)
5232 fprintf (dump_file, " %d\t%d\n", j,
5233 ivopts_global_cost_for_size (data, j));
5234 fprintf (dump_file, "\n");
5235 }
5236 }
5237
5238 /* Returns true if A is a cheaper cost pair than B. */
5239
5240 static bool
5241 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
5242 {
5243 int cmp;
5244
5245 if (!a)
5246 return false;
5247
5248 if (!b)
5249 return true;
5250
5251 cmp = compare_costs (a->cost, b->cost);
5252 if (cmp < 0)
5253 return true;
5254
5255 if (cmp > 0)
5256 return false;
5257
5258 /* In case the costs are the same, prefer the cheaper candidate. */
5259 if (a->cand->cost < b->cand->cost)
5260 return true;
5261
5262 return false;
5263 }
5264
5265
5266 /* Returns candidate by that USE is expressed in IVS. */
5267
5268 static struct cost_pair *
5269 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
5270 {
5271 return ivs->cand_for_use[use->id];
5272 }
5273
5274 /* Computes the cost field of IVS structure. */
5275
5276 static void
5277 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
5278 {
5279 comp_cost cost = ivs->cand_use_cost;
5280
5281 cost.cost += ivs->cand_cost;
5282
5283 cost.cost += ivopts_global_cost_for_size (data,
5284 ivs->n_regs + ivs->num_used_inv_expr);
5285
5286 ivs->cost = cost;
5287 }
5288
5289 /* Remove invariants in set INVS to set IVS. */
5290
5291 static void
5292 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
5293 {
5294 bitmap_iterator bi;
5295 unsigned iid;
5296
5297 if (!invs)
5298 return;
5299
5300 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5301 {
5302 ivs->n_invariant_uses[iid]--;
5303 if (ivs->n_invariant_uses[iid] == 0)
5304 ivs->n_regs--;
5305 }
5306 }
5307
5308 /* Set USE not to be expressed by any candidate in IVS. */
5309
5310 static void
5311 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
5312 struct iv_use *use)
5313 {
5314 unsigned uid = use->id, cid;
5315 struct cost_pair *cp;
5316
5317 cp = ivs->cand_for_use[uid];
5318 if (!cp)
5319 return;
5320 cid = cp->cand->id;
5321
5322 ivs->bad_uses++;
5323 ivs->cand_for_use[uid] = NULL;
5324 ivs->n_cand_uses[cid]--;
5325
5326 if (ivs->n_cand_uses[cid] == 0)
5327 {
5328 bitmap_clear_bit (ivs->cands, cid);
5329 /* Do not count the pseudocandidates. */
5330 if (cp->cand->iv)
5331 ivs->n_regs--;
5332 ivs->n_cands--;
5333 ivs->cand_cost -= cp->cand->cost;
5334
5335 iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
5336 }
5337
5338 ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
5339
5340 iv_ca_set_remove_invariants (ivs, cp->depends_on);
5341
5342 if (cp->inv_expr_id != -1)
5343 {
5344 ivs->used_inv_expr[cp->inv_expr_id]--;
5345 if (ivs->used_inv_expr[cp->inv_expr_id] == 0)
5346 ivs->num_used_inv_expr--;
5347 }
5348 iv_ca_recount_cost (data, ivs);
5349 }
5350
5351 /* Add invariants in set INVS to set IVS. */
5352
5353 static void
5354 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
5355 {
5356 bitmap_iterator bi;
5357 unsigned iid;
5358
5359 if (!invs)
5360 return;
5361
5362 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5363 {
5364 ivs->n_invariant_uses[iid]++;
5365 if (ivs->n_invariant_uses[iid] == 1)
5366 ivs->n_regs++;
5367 }
5368 }
5369
5370 /* Set cost pair for USE in set IVS to CP. */
5371
5372 static void
5373 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
5374 struct iv_use *use, struct cost_pair *cp)
5375 {
5376 unsigned uid = use->id, cid;
5377
5378 if (ivs->cand_for_use[uid] == cp)
5379 return;
5380
5381 if (ivs->cand_for_use[uid])
5382 iv_ca_set_no_cp (data, ivs, use);
5383
5384 if (cp)
5385 {
5386 cid = cp->cand->id;
5387
5388 ivs->bad_uses--;
5389 ivs->cand_for_use[uid] = cp;
5390 ivs->n_cand_uses[cid]++;
5391 if (ivs->n_cand_uses[cid] == 1)
5392 {
5393 bitmap_set_bit (ivs->cands, cid);
5394 /* Do not count the pseudocandidates. */
5395 if (cp->cand->iv)
5396 ivs->n_regs++;
5397 ivs->n_cands++;
5398 ivs->cand_cost += cp->cand->cost;
5399
5400 iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
5401 }
5402
5403 ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
5404 iv_ca_set_add_invariants (ivs, cp->depends_on);
5405
5406 if (cp->inv_expr_id != -1)
5407 {
5408 ivs->used_inv_expr[cp->inv_expr_id]++;
5409 if (ivs->used_inv_expr[cp->inv_expr_id] == 1)
5410 ivs->num_used_inv_expr++;
5411 }
5412 iv_ca_recount_cost (data, ivs);
5413 }
5414 }
5415
5416 /* Extend set IVS by expressing USE by some of the candidates in it
5417 if possible. All important candidates will be considered
5418 if IMPORTANT_CANDIDATES is true. */
5419
5420 static void
5421 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
5422 struct iv_use *use, bool important_candidates)
5423 {
5424 struct cost_pair *best_cp = NULL, *cp;
5425 bitmap_iterator bi;
5426 bitmap cands;
5427 unsigned i;
5428
5429 gcc_assert (ivs->upto >= use->id);
5430
5431 if (ivs->upto == use->id)
5432 {
5433 ivs->upto++;
5434 ivs->bad_uses++;
5435 }
5436
5437 cands = (important_candidates ? data->important_candidates : ivs->cands);
5438 EXECUTE_IF_SET_IN_BITMAP (cands, 0, i, bi)
5439 {
5440 struct iv_cand *cand = iv_cand (data, i);
5441
5442 cp = get_use_iv_cost (data, use, cand);
5443
5444 if (cheaper_cost_pair (cp, best_cp))
5445 best_cp = cp;
5446 }
5447
5448 iv_ca_set_cp (data, ivs, use, best_cp);
5449 }
5450
5451 /* Get cost for assignment IVS. */
5452
5453 static comp_cost
5454 iv_ca_cost (struct iv_ca *ivs)
5455 {
5456 /* This was a conditional expression but it triggered a bug in
5457 Sun C 5.5. */
5458 if (ivs->bad_uses)
5459 return infinite_cost;
5460 else
5461 return ivs->cost;
5462 }
5463
5464 /* Returns true if all dependences of CP are among invariants in IVS. */
5465
5466 static bool
5467 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
5468 {
5469 unsigned i;
5470 bitmap_iterator bi;
5471
5472 if (!cp->depends_on)
5473 return true;
5474
5475 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
5476 {
5477 if (ivs->n_invariant_uses[i] == 0)
5478 return false;
5479 }
5480
5481 return true;
5482 }
5483
5484 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
5485 it before NEXT_CHANGE. */
5486
5487 static struct iv_ca_delta *
5488 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
5489 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
5490 {
5491 struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
5492
5493 change->use = use;
5494 change->old_cp = old_cp;
5495 change->new_cp = new_cp;
5496 change->next_change = next_change;
5497
5498 return change;
5499 }
5500
5501 /* Joins two lists of changes L1 and L2. Destructive -- old lists
5502 are rewritten. */
5503
5504 static struct iv_ca_delta *
5505 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
5506 {
5507 struct iv_ca_delta *last;
5508
5509 if (!l2)
5510 return l1;
5511
5512 if (!l1)
5513 return l2;
5514
5515 for (last = l1; last->next_change; last = last->next_change)
5516 continue;
5517 last->next_change = l2;
5518
5519 return l1;
5520 }
5521
5522 /* Reverse the list of changes DELTA, forming the inverse to it. */
5523
5524 static struct iv_ca_delta *
5525 iv_ca_delta_reverse (struct iv_ca_delta *delta)
5526 {
5527 struct iv_ca_delta *act, *next, *prev = NULL;
5528 struct cost_pair *tmp;
5529
5530 for (act = delta; act; act = next)
5531 {
5532 next = act->next_change;
5533 act->next_change = prev;
5534 prev = act;
5535
5536 tmp = act->old_cp;
5537 act->old_cp = act->new_cp;
5538 act->new_cp = tmp;
5539 }
5540
5541 return prev;
5542 }
5543
5544 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
5545 reverted instead. */
5546
5547 static void
5548 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
5549 struct iv_ca_delta *delta, bool forward)
5550 {
5551 struct cost_pair *from, *to;
5552 struct iv_ca_delta *act;
5553
5554 if (!forward)
5555 delta = iv_ca_delta_reverse (delta);
5556
5557 for (act = delta; act; act = act->next_change)
5558 {
5559 from = act->old_cp;
5560 to = act->new_cp;
5561 gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
5562 iv_ca_set_cp (data, ivs, act->use, to);
5563 }
5564
5565 if (!forward)
5566 iv_ca_delta_reverse (delta);
5567 }
5568
5569 /* Returns true if CAND is used in IVS. */
5570
5571 static bool
5572 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
5573 {
5574 return ivs->n_cand_uses[cand->id] > 0;
5575 }
5576
5577 /* Returns number of induction variable candidates in the set IVS. */
5578
5579 static unsigned
5580 iv_ca_n_cands (struct iv_ca *ivs)
5581 {
5582 return ivs->n_cands;
5583 }
5584
5585 /* Free the list of changes DELTA. */
5586
5587 static void
5588 iv_ca_delta_free (struct iv_ca_delta **delta)
5589 {
5590 struct iv_ca_delta *act, *next;
5591
5592 for (act = *delta; act; act = next)
5593 {
5594 next = act->next_change;
5595 free (act);
5596 }
5597
5598 *delta = NULL;
5599 }
5600
5601 /* Allocates new iv candidates assignment. */
5602
5603 static struct iv_ca *
5604 iv_ca_new (struct ivopts_data *data)
5605 {
5606 struct iv_ca *nw = XNEW (struct iv_ca);
5607
5608 nw->upto = 0;
5609 nw->bad_uses = 0;
5610 nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
5611 nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
5612 nw->cands = BITMAP_ALLOC (NULL);
5613 nw->n_cands = 0;
5614 nw->n_regs = 0;
5615 nw->cand_use_cost = no_cost;
5616 nw->cand_cost = 0;
5617 nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
5618 nw->cost = no_cost;
5619 nw->used_inv_expr = XCNEWVEC (unsigned, data->inv_expr_id + 1);
5620 nw->num_used_inv_expr = 0;
5621
5622 return nw;
5623 }
5624
5625 /* Free memory occupied by the set IVS. */
5626
5627 static void
5628 iv_ca_free (struct iv_ca **ivs)
5629 {
5630 free ((*ivs)->cand_for_use);
5631 free ((*ivs)->n_cand_uses);
5632 BITMAP_FREE ((*ivs)->cands);
5633 free ((*ivs)->n_invariant_uses);
5634 free ((*ivs)->used_inv_expr);
5635 free (*ivs);
5636 *ivs = NULL;
5637 }
5638
5639 /* Dumps IVS to FILE. */
5640
5641 static void
5642 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
5643 {
5644 const char *pref = " invariants ";
5645 unsigned i;
5646 comp_cost cost = iv_ca_cost (ivs);
5647
5648 fprintf (file, " cost: %d (complexity %d)\n", cost.cost, cost.complexity);
5649 fprintf (file, " cand_cost: %d\n cand_use_cost: %d (complexity %d)\n",
5650 ivs->cand_cost, ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
5651 bitmap_print (file, ivs->cands, " candidates: ","\n");
5652
5653 for (i = 0; i < ivs->upto; i++)
5654 {
5655 struct iv_use *use = iv_use (data, i);
5656 struct cost_pair *cp = iv_ca_cand_for_use (ivs, use);
5657 if (cp)
5658 fprintf (file, " use:%d --> iv_cand:%d, cost=(%d,%d)\n",
5659 use->id, cp->cand->id, cp->cost.cost, cp->cost.complexity);
5660 else
5661 fprintf (file, " use:%d --> ??\n", use->id);
5662 }
5663
5664 for (i = 1; i <= data->max_inv_id; i++)
5665 if (ivs->n_invariant_uses[i])
5666 {
5667 fprintf (file, "%s%d", pref, i);
5668 pref = ", ";
5669 }
5670 fprintf (file, "\n\n");
5671 }
5672
5673 /* Try changing candidate in IVS to CAND for each use. Return cost of the
5674 new set, and store differences in DELTA. Number of induction variables
5675 in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
5676 the function will try to find a solution with mimimal iv candidates. */
5677
5678 static comp_cost
5679 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
5680 struct iv_cand *cand, struct iv_ca_delta **delta,
5681 unsigned *n_ivs, bool min_ncand)
5682 {
5683 unsigned i;
5684 comp_cost cost;
5685 struct iv_use *use;
5686 struct cost_pair *old_cp, *new_cp;
5687
5688 *delta = NULL;
5689 for (i = 0; i < ivs->upto; i++)
5690 {
5691 use = iv_use (data, i);
5692 old_cp = iv_ca_cand_for_use (ivs, use);
5693
5694 if (old_cp
5695 && old_cp->cand == cand)
5696 continue;
5697
5698 new_cp = get_use_iv_cost (data, use, cand);
5699 if (!new_cp)
5700 continue;
5701
5702 if (!min_ncand && !iv_ca_has_deps (ivs, new_cp))
5703 continue;
5704
5705 if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp))
5706 continue;
5707
5708 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5709 }
5710
5711 iv_ca_delta_commit (data, ivs, *delta, true);
5712 cost = iv_ca_cost (ivs);
5713 if (n_ivs)
5714 *n_ivs = iv_ca_n_cands (ivs);
5715 iv_ca_delta_commit (data, ivs, *delta, false);
5716
5717 return cost;
5718 }
5719
5720 /* Try narrowing set IVS by removing CAND. Return the cost of
5721 the new set and store the differences in DELTA. */
5722
5723 static comp_cost
5724 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
5725 struct iv_cand *cand, struct iv_ca_delta **delta)
5726 {
5727 unsigned i, ci;
5728 struct iv_use *use;
5729 struct cost_pair *old_cp, *new_cp, *cp;
5730 bitmap_iterator bi;
5731 struct iv_cand *cnd;
5732 comp_cost cost;
5733
5734 *delta = NULL;
5735 for (i = 0; i < n_iv_uses (data); i++)
5736 {
5737 use = iv_use (data, i);
5738
5739 old_cp = iv_ca_cand_for_use (ivs, use);
5740 if (old_cp->cand != cand)
5741 continue;
5742
5743 new_cp = NULL;
5744
5745 if (data->consider_all_candidates)
5746 {
5747 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
5748 {
5749 if (ci == cand->id)
5750 continue;
5751
5752 cnd = iv_cand (data, ci);
5753
5754 cp = get_use_iv_cost (data, use, cnd);
5755 if (!cp)
5756 continue;
5757
5758 if (!iv_ca_has_deps (ivs, cp))
5759 continue;
5760
5761 if (!cheaper_cost_pair (cp, new_cp))
5762 continue;
5763
5764 new_cp = cp;
5765 }
5766 }
5767 else
5768 {
5769 EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
5770 {
5771 if (ci == cand->id)
5772 continue;
5773
5774 cnd = iv_cand (data, ci);
5775
5776 cp = get_use_iv_cost (data, use, cnd);
5777 if (!cp)
5778 continue;
5779 if (!iv_ca_has_deps (ivs, cp))
5780 continue;
5781
5782 if (!cheaper_cost_pair (cp, new_cp))
5783 continue;
5784
5785 new_cp = cp;
5786 }
5787 }
5788
5789 if (!new_cp)
5790 {
5791 iv_ca_delta_free (delta);
5792 return infinite_cost;
5793 }
5794
5795 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5796 }
5797
5798 iv_ca_delta_commit (data, ivs, *delta, true);
5799 cost = iv_ca_cost (ivs);
5800 iv_ca_delta_commit (data, ivs, *delta, false);
5801
5802 return cost;
5803 }
5804
5805 /* Try optimizing the set of candidates IVS by removing candidates different
5806 from to EXCEPT_CAND from it. Return cost of the new set, and store
5807 differences in DELTA. */
5808
5809 static comp_cost
5810 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
5811 struct iv_cand *except_cand, struct iv_ca_delta **delta)
5812 {
5813 bitmap_iterator bi;
5814 struct iv_ca_delta *act_delta, *best_delta;
5815 unsigned i;
5816 comp_cost best_cost, acost;
5817 struct iv_cand *cand;
5818
5819 best_delta = NULL;
5820 best_cost = iv_ca_cost (ivs);
5821
5822 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5823 {
5824 cand = iv_cand (data, i);
5825
5826 if (cand == except_cand)
5827 continue;
5828
5829 acost = iv_ca_narrow (data, ivs, cand, &act_delta);
5830
5831 if (compare_costs (acost, best_cost) < 0)
5832 {
5833 best_cost = acost;
5834 iv_ca_delta_free (&best_delta);
5835 best_delta = act_delta;
5836 }
5837 else
5838 iv_ca_delta_free (&act_delta);
5839 }
5840
5841 if (!best_delta)
5842 {
5843 *delta = NULL;
5844 return best_cost;
5845 }
5846
5847 /* Recurse to possibly remove other unnecessary ivs. */
5848 iv_ca_delta_commit (data, ivs, best_delta, true);
5849 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
5850 iv_ca_delta_commit (data, ivs, best_delta, false);
5851 *delta = iv_ca_delta_join (best_delta, *delta);
5852 return best_cost;
5853 }
5854
5855 /* Tries to extend the sets IVS in the best possible way in order
5856 to express the USE. If ORIGINALP is true, prefer candidates from
5857 the original set of IVs, otherwise favor important candidates not
5858 based on any memory object. */
5859
5860 static bool
5861 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
5862 struct iv_use *use, bool originalp)
5863 {
5864 comp_cost best_cost, act_cost;
5865 unsigned i;
5866 bitmap_iterator bi;
5867 struct iv_cand *cand;
5868 struct iv_ca_delta *best_delta = NULL, *act_delta;
5869 struct cost_pair *cp;
5870
5871 iv_ca_add_use (data, ivs, use, false);
5872 best_cost = iv_ca_cost (ivs);
5873
5874 cp = iv_ca_cand_for_use (ivs, use);
5875 if (!cp)
5876 {
5877 ivs->upto--;
5878 ivs->bad_uses--;
5879 iv_ca_add_use (data, ivs, use, true);
5880 best_cost = iv_ca_cost (ivs);
5881 cp = iv_ca_cand_for_use (ivs, use);
5882 }
5883 if (cp)
5884 {
5885 best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
5886 iv_ca_set_no_cp (data, ivs, use);
5887 }
5888
5889 /* If ORIGINALP is true, try to find the original IV for the use. Otherwise
5890 first try important candidates not based on any memory object. Only if
5891 this fails, try the specific ones. Rationale -- in loops with many
5892 variables the best choice often is to use just one generic biv. If we
5893 added here many ivs specific to the uses, the optimization algorithm later
5894 would be likely to get stuck in a local minimum, thus causing us to create
5895 too many ivs. The approach from few ivs to more seems more likely to be
5896 successful -- starting from few ivs, replacing an expensive use by a
5897 specific iv should always be a win. */
5898 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5899 {
5900 cand = iv_cand (data, i);
5901
5902 if (originalp && cand->pos !=IP_ORIGINAL)
5903 continue;
5904
5905 if (!originalp && cand->iv->base_object != NULL_TREE)
5906 continue;
5907
5908 if (iv_ca_cand_used_p (ivs, cand))
5909 continue;
5910
5911 cp = get_use_iv_cost (data, use, cand);
5912 if (!cp)
5913 continue;
5914
5915 iv_ca_set_cp (data, ivs, use, cp);
5916 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
5917 true);
5918 iv_ca_set_no_cp (data, ivs, use);
5919 act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
5920
5921 if (compare_costs (act_cost, best_cost) < 0)
5922 {
5923 best_cost = act_cost;
5924
5925 iv_ca_delta_free (&best_delta);
5926 best_delta = act_delta;
5927 }
5928 else
5929 iv_ca_delta_free (&act_delta);
5930 }
5931
5932 if (infinite_cost_p (best_cost))
5933 {
5934 for (i = 0; i < use->n_map_members; i++)
5935 {
5936 cp = use->cost_map + i;
5937 cand = cp->cand;
5938 if (!cand)
5939 continue;
5940
5941 /* Already tried this. */
5942 if (cand->important)
5943 {
5944 if (originalp && cand->pos == IP_ORIGINAL)
5945 continue;
5946 if (!originalp && cand->iv->base_object == NULL_TREE)
5947 continue;
5948 }
5949
5950 if (iv_ca_cand_used_p (ivs, cand))
5951 continue;
5952
5953 act_delta = NULL;
5954 iv_ca_set_cp (data, ivs, use, cp);
5955 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
5956 iv_ca_set_no_cp (data, ivs, use);
5957 act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
5958 cp, act_delta);
5959
5960 if (compare_costs (act_cost, best_cost) < 0)
5961 {
5962 best_cost = act_cost;
5963
5964 if (best_delta)
5965 iv_ca_delta_free (&best_delta);
5966 best_delta = act_delta;
5967 }
5968 else
5969 iv_ca_delta_free (&act_delta);
5970 }
5971 }
5972
5973 iv_ca_delta_commit (data, ivs, best_delta, true);
5974 iv_ca_delta_free (&best_delta);
5975
5976 return !infinite_cost_p (best_cost);
5977 }
5978
5979 /* Finds an initial assignment of candidates to uses. */
5980
5981 static struct iv_ca *
5982 get_initial_solution (struct ivopts_data *data, bool originalp)
5983 {
5984 struct iv_ca *ivs = iv_ca_new (data);
5985 unsigned i;
5986
5987 for (i = 0; i < n_iv_uses (data); i++)
5988 if (!try_add_cand_for (data, ivs, iv_use (data, i), originalp))
5989 {
5990 iv_ca_free (&ivs);
5991 return NULL;
5992 }
5993
5994 return ivs;
5995 }
5996
5997 /* Tries to improve set of induction variables IVS. */
5998
5999 static bool
6000 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
6001 {
6002 unsigned i, n_ivs;
6003 comp_cost acost, best_cost = iv_ca_cost (ivs);
6004 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
6005 struct iv_cand *cand;
6006
6007 /* Try extending the set of induction variables by one. */
6008 for (i = 0; i < n_iv_cands (data); i++)
6009 {
6010 cand = iv_cand (data, i);
6011
6012 if (iv_ca_cand_used_p (ivs, cand))
6013 continue;
6014
6015 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
6016 if (!act_delta)
6017 continue;
6018
6019 /* If we successfully added the candidate and the set is small enough,
6020 try optimizing it by removing other candidates. */
6021 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
6022 {
6023 iv_ca_delta_commit (data, ivs, act_delta, true);
6024 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
6025 iv_ca_delta_commit (data, ivs, act_delta, false);
6026 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6027 }
6028
6029 if (compare_costs (acost, best_cost) < 0)
6030 {
6031 best_cost = acost;
6032 iv_ca_delta_free (&best_delta);
6033 best_delta = act_delta;
6034 }
6035 else
6036 iv_ca_delta_free (&act_delta);
6037 }
6038
6039 if (!best_delta)
6040 {
6041 /* Try removing the candidates from the set instead. */
6042 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
6043
6044 /* Nothing more we can do. */
6045 if (!best_delta)
6046 return false;
6047 }
6048
6049 iv_ca_delta_commit (data, ivs, best_delta, true);
6050 gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
6051 iv_ca_delta_free (&best_delta);
6052 return true;
6053 }
6054
6055 /* Attempts to find the optimal set of induction variables. We do simple
6056 greedy heuristic -- we try to replace at most one candidate in the selected
6057 solution and remove the unused ivs while this improves the cost. */
6058
6059 static struct iv_ca *
6060 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
6061 {
6062 struct iv_ca *set;
6063
6064 /* Get the initial solution. */
6065 set = get_initial_solution (data, originalp);
6066 if (!set)
6067 {
6068 if (dump_file && (dump_flags & TDF_DETAILS))
6069 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
6070 return NULL;
6071 }
6072
6073 if (dump_file && (dump_flags & TDF_DETAILS))
6074 {
6075 fprintf (dump_file, "Initial set of candidates:\n");
6076 iv_ca_dump (data, dump_file, set);
6077 }
6078
6079 while (try_improve_iv_set (data, set))
6080 {
6081 if (dump_file && (dump_flags & TDF_DETAILS))
6082 {
6083 fprintf (dump_file, "Improved to:\n");
6084 iv_ca_dump (data, dump_file, set);
6085 }
6086 }
6087
6088 return set;
6089 }
6090
6091 static struct iv_ca *
6092 find_optimal_iv_set (struct ivopts_data *data)
6093 {
6094 unsigned i;
6095 struct iv_ca *set, *origset;
6096 struct iv_use *use;
6097 comp_cost cost, origcost;
6098
6099 /* Determine the cost based on a strategy that starts with original IVs,
6100 and try again using a strategy that prefers candidates not based
6101 on any IVs. */
6102 origset = find_optimal_iv_set_1 (data, true);
6103 set = find_optimal_iv_set_1 (data, false);
6104
6105 if (!origset && !set)
6106 return NULL;
6107
6108 origcost = origset ? iv_ca_cost (origset) : infinite_cost;
6109 cost = set ? iv_ca_cost (set) : infinite_cost;
6110
6111 if (dump_file && (dump_flags & TDF_DETAILS))
6112 {
6113 fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
6114 origcost.cost, origcost.complexity);
6115 fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
6116 cost.cost, cost.complexity);
6117 }
6118
6119 /* Choose the one with the best cost. */
6120 if (compare_costs (origcost, cost) <= 0)
6121 {
6122 if (set)
6123 iv_ca_free (&set);
6124 set = origset;
6125 }
6126 else if (origset)
6127 iv_ca_free (&origset);
6128
6129 for (i = 0; i < n_iv_uses (data); i++)
6130 {
6131 use = iv_use (data, i);
6132 use->selected = iv_ca_cand_for_use (set, use)->cand;
6133 }
6134
6135 return set;
6136 }
6137
6138 /* Creates a new induction variable corresponding to CAND. */
6139
6140 static void
6141 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
6142 {
6143 gimple_stmt_iterator incr_pos;
6144 tree base;
6145 bool after = false;
6146
6147 if (!cand->iv)
6148 return;
6149
6150 switch (cand->pos)
6151 {
6152 case IP_NORMAL:
6153 incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
6154 break;
6155
6156 case IP_END:
6157 incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
6158 after = true;
6159 break;
6160
6161 case IP_AFTER_USE:
6162 after = true;
6163 /* fall through */
6164 case IP_BEFORE_USE:
6165 incr_pos = gsi_for_stmt (cand->incremented_at);
6166 break;
6167
6168 case IP_ORIGINAL:
6169 /* Mark that the iv is preserved. */
6170 name_info (data, cand->var_before)->preserve_biv = true;
6171 name_info (data, cand->var_after)->preserve_biv = true;
6172
6173 /* Rewrite the increment so that it uses var_before directly. */
6174 find_interesting_uses_op (data, cand->var_after)->selected = cand;
6175 return;
6176 }
6177
6178 gimple_add_tmp_var (cand->var_before);
6179
6180 base = unshare_expr (cand->iv->base);
6181
6182 create_iv (base, unshare_expr (cand->iv->step),
6183 cand->var_before, data->current_loop,
6184 &incr_pos, after, &cand->var_before, &cand->var_after);
6185 }
6186
6187 /* Creates new induction variables described in SET. */
6188
6189 static void
6190 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
6191 {
6192 unsigned i;
6193 struct iv_cand *cand;
6194 bitmap_iterator bi;
6195
6196 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6197 {
6198 cand = iv_cand (data, i);
6199 create_new_iv (data, cand);
6200 }
6201
6202 if (dump_file && (dump_flags & TDF_DETAILS))
6203 {
6204 fprintf (dump_file, "\nSelected IV set: \n");
6205 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6206 {
6207 cand = iv_cand (data, i);
6208 dump_cand (dump_file, cand);
6209 }
6210 fprintf (dump_file, "\n");
6211 }
6212 }
6213
6214 /* Rewrites USE (definition of iv used in a nonlinear expression)
6215 using candidate CAND. */
6216
6217 static void
6218 rewrite_use_nonlinear_expr (struct ivopts_data *data,
6219 struct iv_use *use, struct iv_cand *cand)
6220 {
6221 tree comp;
6222 tree op, tgt;
6223 gimple ass;
6224 gimple_stmt_iterator bsi;
6225
6226 /* An important special case -- if we are asked to express value of
6227 the original iv by itself, just exit; there is no need to
6228 introduce a new computation (that might also need casting the
6229 variable to unsigned and back). */
6230 if (cand->pos == IP_ORIGINAL
6231 && cand->incremented_at == use->stmt)
6232 {
6233 enum tree_code stmt_code;
6234
6235 gcc_assert (is_gimple_assign (use->stmt));
6236 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
6237
6238 /* Check whether we may leave the computation unchanged.
6239 This is the case only if it does not rely on other
6240 computations in the loop -- otherwise, the computation
6241 we rely upon may be removed in remove_unused_ivs,
6242 thus leading to ICE. */
6243 stmt_code = gimple_assign_rhs_code (use->stmt);
6244 if (stmt_code == PLUS_EXPR
6245 || stmt_code == MINUS_EXPR
6246 || stmt_code == POINTER_PLUS_EXPR)
6247 {
6248 if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
6249 op = gimple_assign_rhs2 (use->stmt);
6250 else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
6251 op = gimple_assign_rhs1 (use->stmt);
6252 else
6253 op = NULL_TREE;
6254 }
6255 else
6256 op = NULL_TREE;
6257
6258 if (op && expr_invariant_in_loop_p (data->current_loop, op))
6259 return;
6260 }
6261
6262 comp = get_computation (data->current_loop, use, cand);
6263 gcc_assert (comp != NULL_TREE);
6264
6265 switch (gimple_code (use->stmt))
6266 {
6267 case GIMPLE_PHI:
6268 tgt = PHI_RESULT (use->stmt);
6269
6270 /* If we should keep the biv, do not replace it. */
6271 if (name_info (data, tgt)->preserve_biv)
6272 return;
6273
6274 bsi = gsi_after_labels (gimple_bb (use->stmt));
6275 break;
6276
6277 case GIMPLE_ASSIGN:
6278 tgt = gimple_assign_lhs (use->stmt);
6279 bsi = gsi_for_stmt (use->stmt);
6280 break;
6281
6282 default:
6283 gcc_unreachable ();
6284 }
6285
6286 if (!valid_gimple_rhs_p (comp)
6287 || (gimple_code (use->stmt) != GIMPLE_PHI
6288 /* We can't allow re-allocating the stmt as it might be pointed
6289 to still. */
6290 && (get_gimple_rhs_num_ops (TREE_CODE (comp))
6291 >= gimple_num_ops (gsi_stmt (bsi)))))
6292 {
6293 comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE,
6294 true, GSI_SAME_STMT);
6295 if (POINTER_TYPE_P (TREE_TYPE (tgt)))
6296 {
6297 duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
6298 /* As this isn't a plain copy we have to reset alignment
6299 information. */
6300 if (SSA_NAME_PTR_INFO (comp))
6301 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
6302 }
6303 }
6304
6305 if (gimple_code (use->stmt) == GIMPLE_PHI)
6306 {
6307 ass = gimple_build_assign (tgt, comp);
6308 gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
6309
6310 bsi = gsi_for_stmt (use->stmt);
6311 remove_phi_node (&bsi, false);
6312 }
6313 else
6314 {
6315 gimple_assign_set_rhs_from_tree (&bsi, comp);
6316 use->stmt = gsi_stmt (bsi);
6317 }
6318 }
6319
6320 /* Performs a peephole optimization to reorder the iv update statement with
6321 a mem ref to enable instruction combining in later phases. The mem ref uses
6322 the iv value before the update, so the reordering transformation requires
6323 adjustment of the offset. CAND is the selected IV_CAND.
6324
6325 Example:
6326
6327 t = MEM_REF (base, iv1, 8, 16); // base, index, stride, offset
6328 iv2 = iv1 + 1;
6329
6330 if (t < val) (1)
6331 goto L;
6332 goto Head;
6333
6334
6335 directly propagating t over to (1) will introduce overlapping live range
6336 thus increase register pressure. This peephole transform it into:
6337
6338
6339 iv2 = iv1 + 1;
6340 t = MEM_REF (base, iv2, 8, 8);
6341 if (t < val)
6342 goto L;
6343 goto Head;
6344 */
6345
6346 static void
6347 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
6348 {
6349 tree var_after;
6350 gimple iv_update, stmt;
6351 basic_block bb;
6352 gimple_stmt_iterator gsi, gsi_iv;
6353
6354 if (cand->pos != IP_NORMAL)
6355 return;
6356
6357 var_after = cand->var_after;
6358 iv_update = SSA_NAME_DEF_STMT (var_after);
6359
6360 bb = gimple_bb (iv_update);
6361 gsi = gsi_last_nondebug_bb (bb);
6362 stmt = gsi_stmt (gsi);
6363
6364 /* Only handle conditional statement for now. */
6365 if (gimple_code (stmt) != GIMPLE_COND)
6366 return;
6367
6368 gsi_prev_nondebug (&gsi);
6369 stmt = gsi_stmt (gsi);
6370 if (stmt != iv_update)
6371 return;
6372
6373 gsi_prev_nondebug (&gsi);
6374 if (gsi_end_p (gsi))
6375 return;
6376
6377 stmt = gsi_stmt (gsi);
6378 if (gimple_code (stmt) != GIMPLE_ASSIGN)
6379 return;
6380
6381 if (stmt != use->stmt)
6382 return;
6383
6384 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
6385 return;
6386
6387 if (dump_file && (dump_flags & TDF_DETAILS))
6388 {
6389 fprintf (dump_file, "Reordering \n");
6390 print_gimple_stmt (dump_file, iv_update, 0, 0);
6391 print_gimple_stmt (dump_file, use->stmt, 0, 0);
6392 fprintf (dump_file, "\n");
6393 }
6394
6395 gsi = gsi_for_stmt (use->stmt);
6396 gsi_iv = gsi_for_stmt (iv_update);
6397 gsi_move_before (&gsi_iv, &gsi);
6398
6399 cand->pos = IP_BEFORE_USE;
6400 cand->incremented_at = use->stmt;
6401 }
6402
6403 /* Rewrites USE (address that is an iv) using candidate CAND. */
6404
6405 static void
6406 rewrite_use_address (struct ivopts_data *data,
6407 struct iv_use *use, struct iv_cand *cand)
6408 {
6409 aff_tree aff;
6410 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6411 tree base_hint = NULL_TREE;
6412 tree ref, iv;
6413 bool ok;
6414
6415 adjust_iv_update_pos (cand, use);
6416 ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
6417 gcc_assert (ok);
6418 unshare_aff_combination (&aff);
6419
6420 /* To avoid undefined overflow problems, all IV candidates use unsigned
6421 integer types. The drawback is that this makes it impossible for
6422 create_mem_ref to distinguish an IV that is based on a memory object
6423 from one that represents simply an offset.
6424
6425 To work around this problem, we pass a hint to create_mem_ref that
6426 indicates which variable (if any) in aff is an IV based on a memory
6427 object. Note that we only consider the candidate. If this is not
6428 based on an object, the base of the reference is in some subexpression
6429 of the use -- but these will use pointer types, so they are recognized
6430 by the create_mem_ref heuristics anyway. */
6431 if (cand->iv->base_object)
6432 base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
6433
6434 iv = var_at_stmt (data->current_loop, cand, use->stmt);
6435 ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff,
6436 reference_alias_ptr_type (*use->op_p),
6437 iv, base_hint, data->speed);
6438 copy_ref_info (ref, *use->op_p);
6439 *use->op_p = ref;
6440 }
6441
6442 /* Rewrites USE (the condition such that one of the arguments is an iv) using
6443 candidate CAND. */
6444
6445 static void
6446 rewrite_use_compare (struct ivopts_data *data,
6447 struct iv_use *use, struct iv_cand *cand)
6448 {
6449 tree comp, *var_p, op, bound;
6450 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6451 enum tree_code compare;
6452 struct cost_pair *cp = get_use_iv_cost (data, use, cand);
6453 bool ok;
6454
6455 bound = cp->value;
6456 if (bound)
6457 {
6458 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
6459 tree var_type = TREE_TYPE (var);
6460 gimple_seq stmts;
6461
6462 if (dump_file && (dump_flags & TDF_DETAILS))
6463 {
6464 fprintf (dump_file, "Replacing exit test: ");
6465 print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
6466 }
6467 compare = cp->comp;
6468 bound = unshare_expr (fold_convert (var_type, bound));
6469 op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
6470 if (stmts)
6471 gsi_insert_seq_on_edge_immediate (
6472 loop_preheader_edge (data->current_loop),
6473 stmts);
6474
6475 gimple_cond_set_lhs (use->stmt, var);
6476 gimple_cond_set_code (use->stmt, compare);
6477 gimple_cond_set_rhs (use->stmt, op);
6478 return;
6479 }
6480
6481 /* The induction variable elimination failed; just express the original
6482 giv. */
6483 comp = get_computation (data->current_loop, use, cand);
6484 gcc_assert (comp != NULL_TREE);
6485
6486 ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
6487 gcc_assert (ok);
6488
6489 *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
6490 true, GSI_SAME_STMT);
6491 }
6492
6493 /* Rewrites USE using candidate CAND. */
6494
6495 static void
6496 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
6497 {
6498 switch (use->type)
6499 {
6500 case USE_NONLINEAR_EXPR:
6501 rewrite_use_nonlinear_expr (data, use, cand);
6502 break;
6503
6504 case USE_ADDRESS:
6505 rewrite_use_address (data, use, cand);
6506 break;
6507
6508 case USE_COMPARE:
6509 rewrite_use_compare (data, use, cand);
6510 break;
6511
6512 default:
6513 gcc_unreachable ();
6514 }
6515
6516 update_stmt (use->stmt);
6517 }
6518
6519 /* Rewrite the uses using the selected induction variables. */
6520
6521 static void
6522 rewrite_uses (struct ivopts_data *data)
6523 {
6524 unsigned i;
6525 struct iv_cand *cand;
6526 struct iv_use *use;
6527
6528 for (i = 0; i < n_iv_uses (data); i++)
6529 {
6530 use = iv_use (data, i);
6531 cand = use->selected;
6532 gcc_assert (cand);
6533
6534 rewrite_use (data, use, cand);
6535 }
6536 }
6537
6538 /* Removes the ivs that are not used after rewriting. */
6539
6540 static void
6541 remove_unused_ivs (struct ivopts_data *data)
6542 {
6543 unsigned j;
6544 bitmap_iterator bi;
6545 bitmap toremove = BITMAP_ALLOC (NULL);
6546
6547 /* Figure out an order in which to release SSA DEFs so that we don't
6548 release something that we'd have to propagate into a debug stmt
6549 afterwards. */
6550 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
6551 {
6552 struct version_info *info;
6553
6554 info = ver_info (data, j);
6555 if (info->iv
6556 && !integer_zerop (info->iv->step)
6557 && !info->inv_id
6558 && !info->iv->have_use_for
6559 && !info->preserve_biv)
6560 {
6561 bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
6562
6563 tree def = info->iv->ssa_name;
6564
6565 if (MAY_HAVE_DEBUG_STMTS && SSA_NAME_DEF_STMT (def))
6566 {
6567 imm_use_iterator imm_iter;
6568 use_operand_p use_p;
6569 gimple stmt;
6570 int count = 0;
6571
6572 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
6573 {
6574 if (!gimple_debug_bind_p (stmt))
6575 continue;
6576
6577 /* We just want to determine whether to do nothing
6578 (count == 0), to substitute the computed
6579 expression into a single use of the SSA DEF by
6580 itself (count == 1), or to use a debug temp
6581 because the SSA DEF is used multiple times or as
6582 part of a larger expression (count > 1). */
6583 count++;
6584 if (gimple_debug_bind_get_value (stmt) != def)
6585 count++;
6586
6587 if (count > 1)
6588 BREAK_FROM_IMM_USE_STMT (imm_iter);
6589 }
6590
6591 if (!count)
6592 continue;
6593
6594 struct iv_use dummy_use;
6595 struct iv_cand *best_cand = NULL, *cand;
6596 unsigned i, best_pref = 0, cand_pref;
6597
6598 memset (&dummy_use, 0, sizeof (dummy_use));
6599 dummy_use.iv = info->iv;
6600 for (i = 0; i < n_iv_uses (data) && i < 64; i++)
6601 {
6602 cand = iv_use (data, i)->selected;
6603 if (cand == best_cand)
6604 continue;
6605 cand_pref = operand_equal_p (cand->iv->step,
6606 info->iv->step, 0)
6607 ? 4 : 0;
6608 cand_pref
6609 += TYPE_MODE (TREE_TYPE (cand->iv->base))
6610 == TYPE_MODE (TREE_TYPE (info->iv->base))
6611 ? 2 : 0;
6612 cand_pref
6613 += TREE_CODE (cand->iv->base) == INTEGER_CST
6614 ? 1 : 0;
6615 if (best_cand == NULL || best_pref < cand_pref)
6616 {
6617 best_cand = cand;
6618 best_pref = cand_pref;
6619 }
6620 }
6621
6622 if (!best_cand)
6623 continue;
6624
6625 tree comp = get_computation_at (data->current_loop,
6626 &dummy_use, best_cand,
6627 SSA_NAME_DEF_STMT (def));
6628 if (!comp)
6629 continue;
6630
6631 if (count > 1)
6632 {
6633 tree vexpr = make_node (DEBUG_EXPR_DECL);
6634 DECL_ARTIFICIAL (vexpr) = 1;
6635 TREE_TYPE (vexpr) = TREE_TYPE (comp);
6636 if (SSA_NAME_VAR (def))
6637 DECL_MODE (vexpr) = DECL_MODE (SSA_NAME_VAR (def));
6638 else
6639 DECL_MODE (vexpr) = TYPE_MODE (TREE_TYPE (vexpr));
6640 gimple def_temp = gimple_build_debug_bind (vexpr, comp, NULL);
6641 gimple_stmt_iterator gsi;
6642
6643 if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
6644 gsi = gsi_after_labels (gimple_bb
6645 (SSA_NAME_DEF_STMT (def)));
6646 else
6647 gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
6648
6649 gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
6650 comp = vexpr;
6651 }
6652
6653 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
6654 {
6655 if (!gimple_debug_bind_p (stmt))
6656 continue;
6657
6658 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
6659 SET_USE (use_p, comp);
6660
6661 update_stmt (stmt);
6662 }
6663 }
6664 }
6665 }
6666
6667 release_defs_bitset (toremove);
6668
6669 BITMAP_FREE (toremove);
6670 }
6671
6672 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
6673 for pointer_map_traverse. */
6674
6675 static bool
6676 free_tree_niter_desc (const void *key ATTRIBUTE_UNUSED, void **value,
6677 void *data ATTRIBUTE_UNUSED)
6678 {
6679 struct tree_niter_desc *const niter = (struct tree_niter_desc *) *value;
6680
6681 free (niter);
6682 return true;
6683 }
6684
6685 /* Frees data allocated by the optimization of a single loop. */
6686
6687 static void
6688 free_loop_data (struct ivopts_data *data)
6689 {
6690 unsigned i, j;
6691 bitmap_iterator bi;
6692 tree obj;
6693
6694 if (data->niters)
6695 {
6696 pointer_map_traverse (data->niters, free_tree_niter_desc, NULL);
6697 pointer_map_destroy (data->niters);
6698 data->niters = NULL;
6699 }
6700
6701 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
6702 {
6703 struct version_info *info;
6704
6705 info = ver_info (data, i);
6706 free (info->iv);
6707 info->iv = NULL;
6708 info->has_nonlin_use = false;
6709 info->preserve_biv = false;
6710 info->inv_id = 0;
6711 }
6712 bitmap_clear (data->relevant);
6713 bitmap_clear (data->important_candidates);
6714
6715 for (i = 0; i < n_iv_uses (data); i++)
6716 {
6717 struct iv_use *use = iv_use (data, i);
6718
6719 free (use->iv);
6720 BITMAP_FREE (use->related_cands);
6721 for (j = 0; j < use->n_map_members; j++)
6722 if (use->cost_map[j].depends_on)
6723 BITMAP_FREE (use->cost_map[j].depends_on);
6724 free (use->cost_map);
6725 free (use);
6726 }
6727 data->iv_uses.truncate (0);
6728
6729 for (i = 0; i < n_iv_cands (data); i++)
6730 {
6731 struct iv_cand *cand = iv_cand (data, i);
6732
6733 free (cand->iv);
6734 if (cand->depends_on)
6735 BITMAP_FREE (cand->depends_on);
6736 free (cand);
6737 }
6738 data->iv_candidates.truncate (0);
6739
6740 if (data->version_info_size < num_ssa_names)
6741 {
6742 data->version_info_size = 2 * num_ssa_names;
6743 free (data->version_info);
6744 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
6745 }
6746
6747 data->max_inv_id = 0;
6748
6749 FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
6750 SET_DECL_RTL (obj, NULL_RTX);
6751
6752 decl_rtl_to_reset.truncate (0);
6753
6754 data->inv_expr_tab.empty ();
6755 data->inv_expr_id = 0;
6756 }
6757
6758 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
6759 loop tree. */
6760
6761 static void
6762 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
6763 {
6764 free_loop_data (data);
6765 free (data->version_info);
6766 BITMAP_FREE (data->relevant);
6767 BITMAP_FREE (data->important_candidates);
6768
6769 decl_rtl_to_reset.release ();
6770 data->iv_uses.release ();
6771 data->iv_candidates.release ();
6772 data->inv_expr_tab.dispose ();
6773 }
6774
6775 /* Returns true if the loop body BODY includes any function calls. */
6776
6777 static bool
6778 loop_body_includes_call (basic_block *body, unsigned num_nodes)
6779 {
6780 gimple_stmt_iterator gsi;
6781 unsigned i;
6782
6783 for (i = 0; i < num_nodes; i++)
6784 for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
6785 {
6786 gimple stmt = gsi_stmt (gsi);
6787 if (is_gimple_call (stmt)
6788 && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
6789 return true;
6790 }
6791 return false;
6792 }
6793
6794 /* Optimizes the LOOP. Returns true if anything changed. */
6795
6796 static bool
6797 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
6798 {
6799 bool changed = false;
6800 struct iv_ca *iv_ca;
6801 edge exit = single_dom_exit (loop);
6802 basic_block *body;
6803
6804 gcc_assert (!data->niters);
6805 data->current_loop = loop;
6806 data->speed = optimize_loop_for_speed_p (loop);
6807
6808 if (dump_file && (dump_flags & TDF_DETAILS))
6809 {
6810 fprintf (dump_file, "Processing loop %d\n", loop->num);
6811
6812 if (exit)
6813 {
6814 fprintf (dump_file, " single exit %d -> %d, exit condition ",
6815 exit->src->index, exit->dest->index);
6816 print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
6817 fprintf (dump_file, "\n");
6818 }
6819
6820 fprintf (dump_file, "\n");
6821 }
6822
6823 body = get_loop_body (loop);
6824 data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
6825 renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
6826 free (body);
6827
6828 data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
6829
6830 /* For each ssa name determines whether it behaves as an induction variable
6831 in some loop. */
6832 if (!find_induction_variables (data))
6833 goto finish;
6834
6835 /* Finds interesting uses (item 1). */
6836 find_interesting_uses (data);
6837 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
6838 goto finish;
6839
6840 /* Finds candidates for the induction variables (item 2). */
6841 find_iv_candidates (data);
6842
6843 /* Calculates the costs (item 3, part 1). */
6844 determine_iv_costs (data);
6845 determine_use_iv_costs (data);
6846 determine_set_costs (data);
6847
6848 /* Find the optimal set of induction variables (item 3, part 2). */
6849 iv_ca = find_optimal_iv_set (data);
6850 if (!iv_ca)
6851 goto finish;
6852 changed = true;
6853
6854 /* Create the new induction variables (item 4, part 1). */
6855 create_new_ivs (data, iv_ca);
6856 iv_ca_free (&iv_ca);
6857
6858 /* Rewrite the uses (item 4, part 2). */
6859 rewrite_uses (data);
6860
6861 /* Remove the ivs that are unused after rewriting. */
6862 remove_unused_ivs (data);
6863
6864 /* We have changed the structure of induction variables; it might happen
6865 that definitions in the scev database refer to some of them that were
6866 eliminated. */
6867 scev_reset ();
6868
6869 finish:
6870 free_loop_data (data);
6871
6872 return changed;
6873 }
6874
6875 /* Main entry point. Optimizes induction variables in loops. */
6876
6877 void
6878 tree_ssa_iv_optimize (void)
6879 {
6880 struct loop *loop;
6881 struct ivopts_data data;
6882
6883 tree_ssa_iv_optimize_init (&data);
6884
6885 /* Optimize the loops starting with the innermost ones. */
6886 FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
6887 {
6888 if (dump_file && (dump_flags & TDF_DETAILS))
6889 flow_loop_dump (loop, dump_file, NULL, 1);
6890
6891 tree_ssa_iv_optimize_loop (&data, loop);
6892 }
6893
6894 tree_ssa_iv_optimize_finalize (&data);
6895 }