]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/tree-ssa-loop-ivopts.c
2015-06-17 Andrew MacLeod <amacleod@redhat.com>
[thirdparty/gcc.git] / gcc / tree-ssa-loop-ivopts.c
1 /* Induction variable optimizations.
2 Copyright (C) 2003-2015 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
10
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
19
20 /* This pass tries to find the optimal set of induction variables for the loop.
21 It optimizes just the basic linear induction variables (although adding
22 support for other types should not be too hard). It includes the
23 optimizations commonly known as strength reduction, induction variable
24 coalescing and induction variable elimination. It does it in the
25 following steps:
26
27 1) The interesting uses of induction variables are found. This includes
28
29 -- uses of induction variables in non-linear expressions
30 -- addresses of arrays
31 -- comparisons of induction variables
32
33 2) Candidates for the induction variables are found. This includes
34
35 -- old induction variables
36 -- the variables defined by expressions derived from the "interesting
37 uses" above
38
39 3) The optimal (w.r. to a cost function) set of variables is chosen. The
40 cost function assigns a cost to sets of induction variables and consists
41 of three parts:
42
43 -- The use costs. Each of the interesting uses chooses the best induction
44 variable in the set and adds its cost to the sum. The cost reflects
45 the time spent on modifying the induction variables value to be usable
46 for the given purpose (adding base and offset for arrays, etc.).
47 -- The variable costs. Each of the variables has a cost assigned that
48 reflects the costs associated with incrementing the value of the
49 variable. The original variables are somewhat preferred.
50 -- The set cost. Depending on the size of the set, extra cost may be
51 added to reflect register pressure.
52
53 All the costs are defined in a machine-specific way, using the target
54 hooks and machine descriptions to determine them.
55
56 4) The trees are transformed to use the new variables, the dead code is
57 removed.
58
59 All of this is done loop by loop. Doing it globally is theoretically
60 possible, it might give a better performance and it might enable us
61 to decide costs more precisely, but getting all the interactions right
62 would be complicated. */
63
64 #include "config.h"
65 #include "system.h"
66 #include "coretypes.h"
67 #include "tm.h"
68 #include "alias.h"
69 #include "symtab.h"
70 #include "tree.h"
71 #include "fold-const.h"
72 #include "stor-layout.h"
73 #include "tm_p.h"
74 #include "predict.h"
75 #include "hard-reg-set.h"
76 #include "function.h"
77 #include "dominance.h"
78 #include "cfg.h"
79 #include "basic-block.h"
80 #include "gimple-pretty-print.h"
81 #include "tree-ssa-alias.h"
82 #include "internal-fn.h"
83 #include "tree-eh.h"
84 #include "gimple-expr.h"
85 #include "gimple.h"
86 #include "gimplify.h"
87 #include "gimple-iterator.h"
88 #include "gimplify-me.h"
89 #include "gimple-ssa.h"
90 #include "plugin-api.h"
91 #include "ipa-ref.h"
92 #include "cgraph.h"
93 #include "tree-cfg.h"
94 #include "tree-phinodes.h"
95 #include "ssa-iterators.h"
96 #include "stringpool.h"
97 #include "tree-ssanames.h"
98 #include "tree-ssa-loop-ivopts.h"
99 #include "tree-ssa-loop-manip.h"
100 #include "tree-ssa-loop-niter.h"
101 #include "tree-ssa-loop.h"
102 #include "rtl.h"
103 #include "flags.h"
104 #include "insn-config.h"
105 #include "expmed.h"
106 #include "dojump.h"
107 #include "explow.h"
108 #include "calls.h"
109 #include "emit-rtl.h"
110 #include "varasm.h"
111 #include "stmt.h"
112 #include "expr.h"
113 #include "tree-dfa.h"
114 #include "tree-ssa.h"
115 #include "cfgloop.h"
116 #include "tree-pass.h"
117 #include "tree-chrec.h"
118 #include "tree-scalar-evolution.h"
119 #include "params.h"
120 #include "langhooks.h"
121 #include "tree-affine.h"
122 #include "target.h"
123 #include "tree-inline.h"
124 #include "tree-ssa-propagate.h"
125 #include "tree-ssa-address.h"
126 #include "builtins.h"
127 #include "tree-vectorizer.h"
128
129 /* FIXME: Expressions are expanded to RTL in this pass to determine the
130 cost of different addressing modes. This should be moved to a TBD
131 interface between the GIMPLE and RTL worlds. */
132 #include "recog.h"
133
134 /* The infinite cost. */
135 #define INFTY 10000000
136
137 #define AVG_LOOP_NITER(LOOP) 5
138
139 /* Returns the expected number of loop iterations for LOOP.
140 The average trip count is computed from profile data if it
141 exists. */
142
143 static inline HOST_WIDE_INT
144 avg_loop_niter (struct loop *loop)
145 {
146 HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
147 if (niter == -1)
148 return AVG_LOOP_NITER (loop);
149
150 return niter;
151 }
152
153 /* Representation of the induction variable. */
154 struct iv
155 {
156 tree base; /* Initial value of the iv. */
157 tree base_object; /* A memory object to that the induction variable points. */
158 tree step; /* Step of the iv (constant only). */
159 tree ssa_name; /* The ssa name with the value. */
160 unsigned use_id; /* The identifier in the use if it is the case. */
161 bool biv_p; /* Is it a biv? */
162 bool have_use_for; /* Do we already have a use for it? */
163 bool no_overflow; /* True if the iv doesn't overflow. */
164 };
165
166 /* Per-ssa version information (induction variable descriptions, etc.). */
167 struct version_info
168 {
169 tree name; /* The ssa name. */
170 struct iv *iv; /* Induction variable description. */
171 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
172 an expression that is not an induction variable. */
173 bool preserve_biv; /* For the original biv, whether to preserve it. */
174 unsigned inv_id; /* Id of an invariant. */
175 };
176
177 /* Types of uses. */
178 enum use_type
179 {
180 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
181 USE_ADDRESS, /* Use in an address. */
182 USE_COMPARE /* Use is a compare. */
183 };
184
185 /* Cost of a computation. */
186 typedef struct
187 {
188 int cost; /* The runtime cost. */
189 unsigned complexity; /* The estimate of the complexity of the code for
190 the computation (in no concrete units --
191 complexity field should be larger for more
192 complex expressions and addressing modes). */
193 } comp_cost;
194
195 static const comp_cost no_cost = {0, 0};
196 static const comp_cost infinite_cost = {INFTY, INFTY};
197
198 /* The candidate - cost pair. */
199 struct cost_pair
200 {
201 struct iv_cand *cand; /* The candidate. */
202 comp_cost cost; /* The cost. */
203 bitmap depends_on; /* The list of invariants that have to be
204 preserved. */
205 tree value; /* For final value elimination, the expression for
206 the final value of the iv. For iv elimination,
207 the new bound to compare with. */
208 enum tree_code comp; /* For iv elimination, the comparison. */
209 int inv_expr_id; /* Loop invariant expression id. */
210 };
211
212 /* Use. */
213 struct iv_use
214 {
215 unsigned id; /* The id of the use. */
216 unsigned sub_id; /* The id of the sub use. */
217 enum use_type type; /* Type of the use. */
218 struct iv *iv; /* The induction variable it is based on. */
219 gimple stmt; /* Statement in that it occurs. */
220 tree *op_p; /* The place where it occurs. */
221 bitmap related_cands; /* The set of "related" iv candidates, plus the common
222 important ones. */
223
224 unsigned n_map_members; /* Number of candidates in the cost_map list. */
225 struct cost_pair *cost_map;
226 /* The costs wrto the iv candidates. */
227
228 struct iv_cand *selected;
229 /* The selected candidate. */
230
231 struct iv_use *next; /* The next sub use. */
232 tree addr_base; /* Base address with const offset stripped. */
233 unsigned HOST_WIDE_INT addr_offset;
234 /* Const offset stripped from base address. */
235 };
236
237 /* The position where the iv is computed. */
238 enum iv_position
239 {
240 IP_NORMAL, /* At the end, just before the exit condition. */
241 IP_END, /* At the end of the latch block. */
242 IP_BEFORE_USE, /* Immediately before a specific use. */
243 IP_AFTER_USE, /* Immediately after a specific use. */
244 IP_ORIGINAL /* The original biv. */
245 };
246
247 /* The induction variable candidate. */
248 struct iv_cand
249 {
250 unsigned id; /* The number of the candidate. */
251 bool important; /* Whether this is an "important" candidate, i.e. such
252 that it should be considered by all uses. */
253 ENUM_BITFIELD(iv_position) pos : 8; /* Where it is computed. */
254 gimple incremented_at;/* For original biv, the statement where it is
255 incremented. */
256 tree var_before; /* The variable used for it before increment. */
257 tree var_after; /* The variable used for it after increment. */
258 struct iv *iv; /* The value of the candidate. NULL for
259 "pseudocandidate" used to indicate the possibility
260 to replace the final value of an iv by direct
261 computation of the value. */
262 unsigned cost; /* Cost of the candidate. */
263 unsigned cost_step; /* Cost of the candidate's increment operation. */
264 struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
265 where it is incremented. */
266 bitmap depends_on; /* The list of invariants that are used in step of the
267 biv. */
268 };
269
270 /* Loop invariant expression hashtable entry. */
271 struct iv_inv_expr_ent
272 {
273 tree expr;
274 int id;
275 hashval_t hash;
276 };
277
278 /* The data used by the induction variable optimizations. */
279
280 typedef struct iv_use *iv_use_p;
281
282 typedef struct iv_cand *iv_cand_p;
283
284 /* Hashtable helpers. */
285
286 struct iv_inv_expr_hasher : typed_free_remove <iv_inv_expr_ent>
287 {
288 typedef iv_inv_expr_ent *value_type;
289 typedef iv_inv_expr_ent *compare_type;
290 static inline hashval_t hash (const iv_inv_expr_ent *);
291 static inline bool equal (const iv_inv_expr_ent *, const iv_inv_expr_ent *);
292 };
293
294 /* Hash function for loop invariant expressions. */
295
296 inline hashval_t
297 iv_inv_expr_hasher::hash (const iv_inv_expr_ent *expr)
298 {
299 return expr->hash;
300 }
301
302 /* Hash table equality function for expressions. */
303
304 inline bool
305 iv_inv_expr_hasher::equal (const iv_inv_expr_ent *expr1,
306 const iv_inv_expr_ent *expr2)
307 {
308 return expr1->hash == expr2->hash
309 && operand_equal_p (expr1->expr, expr2->expr, 0);
310 }
311
312 struct ivopts_data
313 {
314 /* The currently optimized loop. */
315 struct loop *current_loop;
316 source_location loop_loc;
317
318 /* Numbers of iterations for all exits of the current loop. */
319 hash_map<edge, tree_niter_desc *> *niters;
320
321 /* Number of registers used in it. */
322 unsigned regs_used;
323
324 /* The size of version_info array allocated. */
325 unsigned version_info_size;
326
327 /* The array of information for the ssa names. */
328 struct version_info *version_info;
329
330 /* The hashtable of loop invariant expressions created
331 by ivopt. */
332 hash_table<iv_inv_expr_hasher> *inv_expr_tab;
333
334 /* Loop invariant expression id. */
335 int inv_expr_id;
336
337 /* The bitmap of indices in version_info whose value was changed. */
338 bitmap relevant;
339
340 /* The uses of induction variables. */
341 vec<iv_use_p> iv_uses;
342
343 /* The candidates. */
344 vec<iv_cand_p> iv_candidates;
345
346 /* A bitmap of important candidates. */
347 bitmap important_candidates;
348
349 /* Cache used by tree_to_aff_combination_expand. */
350 hash_map<tree, name_expansion *> *name_expansion_cache;
351
352 /* The maximum invariant id. */
353 unsigned max_inv_id;
354
355 /* Whether to consider just related and important candidates when replacing a
356 use. */
357 bool consider_all_candidates;
358
359 /* Are we optimizing for speed? */
360 bool speed;
361
362 /* Whether the loop body includes any function calls. */
363 bool body_includes_call;
364
365 /* Whether the loop body can only be exited via single exit. */
366 bool loop_single_exit_p;
367 };
368
369 /* An assignment of iv candidates to uses. */
370
371 struct iv_ca
372 {
373 /* The number of uses covered by the assignment. */
374 unsigned upto;
375
376 /* Number of uses that cannot be expressed by the candidates in the set. */
377 unsigned bad_uses;
378
379 /* Candidate assigned to a use, together with the related costs. */
380 struct cost_pair **cand_for_use;
381
382 /* Number of times each candidate is used. */
383 unsigned *n_cand_uses;
384
385 /* The candidates used. */
386 bitmap cands;
387
388 /* The number of candidates in the set. */
389 unsigned n_cands;
390
391 /* Total number of registers needed. */
392 unsigned n_regs;
393
394 /* Total cost of expressing uses. */
395 comp_cost cand_use_cost;
396
397 /* Total cost of candidates. */
398 unsigned cand_cost;
399
400 /* Number of times each invariant is used. */
401 unsigned *n_invariant_uses;
402
403 /* The array holding the number of uses of each loop
404 invariant expressions created by ivopt. */
405 unsigned *used_inv_expr;
406
407 /* The number of created loop invariants. */
408 unsigned num_used_inv_expr;
409
410 /* Total cost of the assignment. */
411 comp_cost cost;
412 };
413
414 /* Difference of two iv candidate assignments. */
415
416 struct iv_ca_delta
417 {
418 /* Changed use. */
419 struct iv_use *use;
420
421 /* An old assignment (for rollback purposes). */
422 struct cost_pair *old_cp;
423
424 /* A new assignment. */
425 struct cost_pair *new_cp;
426
427 /* Next change in the list. */
428 struct iv_ca_delta *next_change;
429 };
430
431 /* Bound on number of candidates below that all candidates are considered. */
432
433 #define CONSIDER_ALL_CANDIDATES_BOUND \
434 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
435
436 /* If there are more iv occurrences, we just give up (it is quite unlikely that
437 optimizing such a loop would help, and it would take ages). */
438
439 #define MAX_CONSIDERED_USES \
440 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
441
442 /* If there are at most this number of ivs in the set, try removing unnecessary
443 ivs from the set always. */
444
445 #define ALWAYS_PRUNE_CAND_SET_BOUND \
446 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
447
448 /* The list of trees for that the decl_rtl field must be reset is stored
449 here. */
450
451 static vec<tree> decl_rtl_to_reset;
452
453 static comp_cost force_expr_to_var_cost (tree, bool);
454
455 /* Number of uses recorded in DATA. */
456
457 static inline unsigned
458 n_iv_uses (struct ivopts_data *data)
459 {
460 return data->iv_uses.length ();
461 }
462
463 /* Ith use recorded in DATA. */
464
465 static inline struct iv_use *
466 iv_use (struct ivopts_data *data, unsigned i)
467 {
468 return data->iv_uses[i];
469 }
470
471 /* Number of candidates recorded in DATA. */
472
473 static inline unsigned
474 n_iv_cands (struct ivopts_data *data)
475 {
476 return data->iv_candidates.length ();
477 }
478
479 /* Ith candidate recorded in DATA. */
480
481 static inline struct iv_cand *
482 iv_cand (struct ivopts_data *data, unsigned i)
483 {
484 return data->iv_candidates[i];
485 }
486
487 /* The single loop exit if it dominates the latch, NULL otherwise. */
488
489 edge
490 single_dom_exit (struct loop *loop)
491 {
492 edge exit = single_exit (loop);
493
494 if (!exit)
495 return NULL;
496
497 if (!just_once_each_iteration_p (loop, exit->src))
498 return NULL;
499
500 return exit;
501 }
502
503 /* Dumps information about the induction variable IV to FILE. */
504
505 void
506 dump_iv (FILE *file, struct iv *iv, bool dump_name)
507 {
508 if (iv->ssa_name && dump_name)
509 {
510 fprintf (file, "ssa name ");
511 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
512 fprintf (file, "\n");
513 }
514
515 fprintf (file, " type ");
516 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
517 fprintf (file, "\n");
518
519 if (iv->step)
520 {
521 fprintf (file, " base ");
522 print_generic_expr (file, iv->base, TDF_SLIM);
523 fprintf (file, "\n");
524
525 fprintf (file, " step ");
526 print_generic_expr (file, iv->step, TDF_SLIM);
527 fprintf (file, "\n");
528 }
529 else
530 {
531 fprintf (file, " invariant ");
532 print_generic_expr (file, iv->base, TDF_SLIM);
533 fprintf (file, "\n");
534 }
535
536 if (iv->base_object)
537 {
538 fprintf (file, " base object ");
539 print_generic_expr (file, iv->base_object, TDF_SLIM);
540 fprintf (file, "\n");
541 }
542
543 if (iv->biv_p)
544 fprintf (file, " is a biv\n");
545 }
546
547 /* Dumps information about the USE to FILE. */
548
549 void
550 dump_use (FILE *file, struct iv_use *use)
551 {
552 fprintf (file, "use %d", use->id);
553 if (use->sub_id)
554 fprintf (file, ".%d", use->sub_id);
555
556 fprintf (file, "\n");
557
558 switch (use->type)
559 {
560 case USE_NONLINEAR_EXPR:
561 fprintf (file, " generic\n");
562 break;
563
564 case USE_ADDRESS:
565 fprintf (file, " address\n");
566 break;
567
568 case USE_COMPARE:
569 fprintf (file, " compare\n");
570 break;
571
572 default:
573 gcc_unreachable ();
574 }
575
576 fprintf (file, " in statement ");
577 print_gimple_stmt (file, use->stmt, 0, 0);
578 fprintf (file, "\n");
579
580 fprintf (file, " at position ");
581 if (use->op_p)
582 print_generic_expr (file, *use->op_p, TDF_SLIM);
583 fprintf (file, "\n");
584
585 dump_iv (file, use->iv, false);
586
587 if (use->related_cands)
588 {
589 fprintf (file, " related candidates ");
590 dump_bitmap (file, use->related_cands);
591 }
592 }
593
594 /* Dumps information about the uses to FILE. */
595
596 void
597 dump_uses (FILE *file, struct ivopts_data *data)
598 {
599 unsigned i;
600 struct iv_use *use;
601
602 for (i = 0; i < n_iv_uses (data); i++)
603 {
604 use = iv_use (data, i);
605 do
606 {
607 dump_use (file, use);
608 use = use->next;
609 }
610 while (use);
611 fprintf (file, "\n");
612 }
613 }
614
615 /* Dumps information about induction variable candidate CAND to FILE. */
616
617 void
618 dump_cand (FILE *file, struct iv_cand *cand)
619 {
620 struct iv *iv = cand->iv;
621
622 fprintf (file, "candidate %d%s\n",
623 cand->id, cand->important ? " (important)" : "");
624
625 if (cand->depends_on)
626 {
627 fprintf (file, " depends on ");
628 dump_bitmap (file, cand->depends_on);
629 }
630
631 if (!iv)
632 {
633 fprintf (file, " final value replacement\n");
634 return;
635 }
636
637 if (cand->var_before)
638 {
639 fprintf (file, " var_before ");
640 print_generic_expr (file, cand->var_before, TDF_SLIM);
641 fprintf (file, "\n");
642 }
643 if (cand->var_after)
644 {
645 fprintf (file, " var_after ");
646 print_generic_expr (file, cand->var_after, TDF_SLIM);
647 fprintf (file, "\n");
648 }
649
650 switch (cand->pos)
651 {
652 case IP_NORMAL:
653 fprintf (file, " incremented before exit test\n");
654 break;
655
656 case IP_BEFORE_USE:
657 fprintf (file, " incremented before use %d\n", cand->ainc_use->id);
658 break;
659
660 case IP_AFTER_USE:
661 fprintf (file, " incremented after use %d\n", cand->ainc_use->id);
662 break;
663
664 case IP_END:
665 fprintf (file, " incremented at end\n");
666 break;
667
668 case IP_ORIGINAL:
669 fprintf (file, " original biv\n");
670 break;
671 }
672
673 dump_iv (file, iv, false);
674 }
675
676 /* Returns the info for ssa version VER. */
677
678 static inline struct version_info *
679 ver_info (struct ivopts_data *data, unsigned ver)
680 {
681 return data->version_info + ver;
682 }
683
684 /* Returns the info for ssa name NAME. */
685
686 static inline struct version_info *
687 name_info (struct ivopts_data *data, tree name)
688 {
689 return ver_info (data, SSA_NAME_VERSION (name));
690 }
691
692 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
693 emitted in LOOP. */
694
695 static bool
696 stmt_after_ip_normal_pos (struct loop *loop, gimple stmt)
697 {
698 basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
699
700 gcc_assert (bb);
701
702 if (sbb == loop->latch)
703 return true;
704
705 if (sbb != bb)
706 return false;
707
708 return stmt == last_stmt (bb);
709 }
710
711 /* Returns true if STMT if after the place where the original induction
712 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
713 if the positions are identical. */
714
715 static bool
716 stmt_after_inc_pos (struct iv_cand *cand, gimple stmt, bool true_if_equal)
717 {
718 basic_block cand_bb = gimple_bb (cand->incremented_at);
719 basic_block stmt_bb = gimple_bb (stmt);
720
721 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
722 return false;
723
724 if (stmt_bb != cand_bb)
725 return true;
726
727 if (true_if_equal
728 && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
729 return true;
730 return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
731 }
732
733 /* Returns true if STMT if after the place where the induction variable
734 CAND is incremented in LOOP. */
735
736 static bool
737 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple stmt)
738 {
739 switch (cand->pos)
740 {
741 case IP_END:
742 return false;
743
744 case IP_NORMAL:
745 return stmt_after_ip_normal_pos (loop, stmt);
746
747 case IP_ORIGINAL:
748 case IP_AFTER_USE:
749 return stmt_after_inc_pos (cand, stmt, false);
750
751 case IP_BEFORE_USE:
752 return stmt_after_inc_pos (cand, stmt, true);
753
754 default:
755 gcc_unreachable ();
756 }
757 }
758
759 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
760
761 static bool
762 abnormal_ssa_name_p (tree exp)
763 {
764 if (!exp)
765 return false;
766
767 if (TREE_CODE (exp) != SSA_NAME)
768 return false;
769
770 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
771 }
772
773 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
774 abnormal phi node. Callback for for_each_index. */
775
776 static bool
777 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
778 void *data ATTRIBUTE_UNUSED)
779 {
780 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
781 {
782 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
783 return false;
784 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
785 return false;
786 }
787
788 return !abnormal_ssa_name_p (*index);
789 }
790
791 /* Returns true if EXPR contains a ssa name that occurs in an
792 abnormal phi node. */
793
794 bool
795 contains_abnormal_ssa_name_p (tree expr)
796 {
797 enum tree_code code;
798 enum tree_code_class codeclass;
799
800 if (!expr)
801 return false;
802
803 code = TREE_CODE (expr);
804 codeclass = TREE_CODE_CLASS (code);
805
806 if (code == SSA_NAME)
807 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
808
809 if (code == INTEGER_CST
810 || is_gimple_min_invariant (expr))
811 return false;
812
813 if (code == ADDR_EXPR)
814 return !for_each_index (&TREE_OPERAND (expr, 0),
815 idx_contains_abnormal_ssa_name_p,
816 NULL);
817
818 if (code == COND_EXPR)
819 return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
820 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
821 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
822
823 switch (codeclass)
824 {
825 case tcc_binary:
826 case tcc_comparison:
827 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
828 return true;
829
830 /* Fallthru. */
831 case tcc_unary:
832 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
833 return true;
834
835 break;
836
837 default:
838 gcc_unreachable ();
839 }
840
841 return false;
842 }
843
844 /* Returns the structure describing number of iterations determined from
845 EXIT of DATA->current_loop, or NULL if something goes wrong. */
846
847 static struct tree_niter_desc *
848 niter_for_exit (struct ivopts_data *data, edge exit)
849 {
850 struct tree_niter_desc *desc;
851 tree_niter_desc **slot;
852
853 if (!data->niters)
854 {
855 data->niters = new hash_map<edge, tree_niter_desc *>;
856 slot = NULL;
857 }
858 else
859 slot = data->niters->get (exit);
860
861 if (!slot)
862 {
863 /* Try to determine number of iterations. We cannot safely work with ssa
864 names that appear in phi nodes on abnormal edges, so that we do not
865 create overlapping life ranges for them (PR 27283). */
866 desc = XNEW (struct tree_niter_desc);
867 if (!number_of_iterations_exit (data->current_loop,
868 exit, desc, true)
869 || contains_abnormal_ssa_name_p (desc->niter))
870 {
871 XDELETE (desc);
872 desc = NULL;
873 }
874 data->niters->put (exit, desc);
875 }
876 else
877 desc = *slot;
878
879 return desc;
880 }
881
882 /* Returns the structure describing number of iterations determined from
883 single dominating exit of DATA->current_loop, or NULL if something
884 goes wrong. */
885
886 static struct tree_niter_desc *
887 niter_for_single_dom_exit (struct ivopts_data *data)
888 {
889 edge exit = single_dom_exit (data->current_loop);
890
891 if (!exit)
892 return NULL;
893
894 return niter_for_exit (data, exit);
895 }
896
897 /* Initializes data structures used by the iv optimization pass, stored
898 in DATA. */
899
900 static void
901 tree_ssa_iv_optimize_init (struct ivopts_data *data)
902 {
903 data->version_info_size = 2 * num_ssa_names;
904 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
905 data->relevant = BITMAP_ALLOC (NULL);
906 data->important_candidates = BITMAP_ALLOC (NULL);
907 data->max_inv_id = 0;
908 data->niters = NULL;
909 data->iv_uses.create (20);
910 data->iv_candidates.create (20);
911 data->inv_expr_tab = new hash_table<iv_inv_expr_hasher> (10);
912 data->inv_expr_id = 0;
913 data->name_expansion_cache = NULL;
914 decl_rtl_to_reset.create (20);
915 }
916
917 /* Returns a memory object to that EXPR points. In case we are able to
918 determine that it does not point to any such object, NULL is returned. */
919
920 static tree
921 determine_base_object (tree expr)
922 {
923 enum tree_code code = TREE_CODE (expr);
924 tree base, obj;
925
926 /* If this is a pointer casted to any type, we need to determine
927 the base object for the pointer; so handle conversions before
928 throwing away non-pointer expressions. */
929 if (CONVERT_EXPR_P (expr))
930 return determine_base_object (TREE_OPERAND (expr, 0));
931
932 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
933 return NULL_TREE;
934
935 switch (code)
936 {
937 case INTEGER_CST:
938 return NULL_TREE;
939
940 case ADDR_EXPR:
941 obj = TREE_OPERAND (expr, 0);
942 base = get_base_address (obj);
943
944 if (!base)
945 return expr;
946
947 if (TREE_CODE (base) == MEM_REF)
948 return determine_base_object (TREE_OPERAND (base, 0));
949
950 return fold_convert (ptr_type_node,
951 build_fold_addr_expr (base));
952
953 case POINTER_PLUS_EXPR:
954 return determine_base_object (TREE_OPERAND (expr, 0));
955
956 case PLUS_EXPR:
957 case MINUS_EXPR:
958 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
959 gcc_unreachable ();
960
961 default:
962 return fold_convert (ptr_type_node, expr);
963 }
964 }
965
966 /* Return true if address expression with non-DECL_P operand appears
967 in EXPR. */
968
969 static bool
970 contain_complex_addr_expr (tree expr)
971 {
972 bool res = false;
973
974 STRIP_NOPS (expr);
975 switch (TREE_CODE (expr))
976 {
977 case POINTER_PLUS_EXPR:
978 case PLUS_EXPR:
979 case MINUS_EXPR:
980 res |= contain_complex_addr_expr (TREE_OPERAND (expr, 0));
981 res |= contain_complex_addr_expr (TREE_OPERAND (expr, 1));
982 break;
983
984 case ADDR_EXPR:
985 return (!DECL_P (TREE_OPERAND (expr, 0)));
986
987 default:
988 return false;
989 }
990
991 return res;
992 }
993
994 /* Allocates an induction variable with given initial value BASE and step STEP
995 for loop LOOP. NO_OVERFLOW implies the iv doesn't overflow. */
996
997 static struct iv *
998 alloc_iv (tree base, tree step, bool no_overflow = false)
999 {
1000 tree expr = base;
1001 struct iv *iv = XCNEW (struct iv);
1002 gcc_assert (step != NULL_TREE);
1003
1004 /* Lower address expression in base except ones with DECL_P as operand.
1005 By doing this:
1006 1) More accurate cost can be computed for address expressions;
1007 2) Duplicate candidates won't be created for bases in different
1008 forms, like &a[0] and &a. */
1009 STRIP_NOPS (expr);
1010 if ((TREE_CODE (expr) == ADDR_EXPR && !DECL_P (TREE_OPERAND (expr, 0)))
1011 || contain_complex_addr_expr (expr))
1012 {
1013 aff_tree comb;
1014 tree_to_aff_combination (expr, TREE_TYPE (base), &comb);
1015 base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
1016 }
1017
1018 iv->base = base;
1019 iv->base_object = determine_base_object (base);
1020 iv->step = step;
1021 iv->biv_p = false;
1022 iv->have_use_for = false;
1023 iv->use_id = 0;
1024 iv->ssa_name = NULL_TREE;
1025 iv->no_overflow = no_overflow;
1026
1027 return iv;
1028 }
1029
1030 /* Sets STEP and BASE for induction variable IV. NO_OVERFLOW implies the IV
1031 doesn't overflow. */
1032
1033 static void
1034 set_iv (struct ivopts_data *data, tree iv, tree base, tree step,
1035 bool no_overflow)
1036 {
1037 struct version_info *info = name_info (data, iv);
1038
1039 gcc_assert (!info->iv);
1040
1041 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
1042 info->iv = alloc_iv (base, step, no_overflow);
1043 info->iv->ssa_name = iv;
1044 }
1045
1046 /* Finds induction variable declaration for VAR. */
1047
1048 static struct iv *
1049 get_iv (struct ivopts_data *data, tree var)
1050 {
1051 basic_block bb;
1052 tree type = TREE_TYPE (var);
1053
1054 if (!POINTER_TYPE_P (type)
1055 && !INTEGRAL_TYPE_P (type))
1056 return NULL;
1057
1058 if (!name_info (data, var)->iv)
1059 {
1060 bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1061
1062 if (!bb
1063 || !flow_bb_inside_loop_p (data->current_loop, bb))
1064 set_iv (data, var, var, build_int_cst (type, 0), true);
1065 }
1066
1067 return name_info (data, var)->iv;
1068 }
1069
1070 /* Return the first non-invariant ssa var found in EXPR. */
1071
1072 static tree
1073 extract_single_var_from_expr (tree expr)
1074 {
1075 int i, n;
1076 tree tmp;
1077 enum tree_code code;
1078
1079 if (!expr || is_gimple_min_invariant (expr))
1080 return NULL;
1081
1082 code = TREE_CODE (expr);
1083 if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1084 {
1085 n = TREE_OPERAND_LENGTH (expr);
1086 for (i = 0; i < n; i++)
1087 {
1088 tmp = extract_single_var_from_expr (TREE_OPERAND (expr, i));
1089
1090 if (tmp)
1091 return tmp;
1092 }
1093 }
1094 return (TREE_CODE (expr) == SSA_NAME) ? expr : NULL;
1095 }
1096
1097 /* Finds basic ivs. */
1098
1099 static bool
1100 find_bivs (struct ivopts_data *data)
1101 {
1102 gphi *phi;
1103 affine_iv iv;
1104 tree step, type, base, stop;
1105 bool found = false;
1106 struct loop *loop = data->current_loop;
1107 gphi_iterator psi;
1108
1109 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1110 {
1111 phi = psi.phi ();
1112
1113 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1114 continue;
1115
1116 if (virtual_operand_p (PHI_RESULT (phi)))
1117 continue;
1118
1119 if (!simple_iv (loop, loop, PHI_RESULT (phi), &iv, true))
1120 continue;
1121
1122 if (integer_zerop (iv.step))
1123 continue;
1124
1125 step = iv.step;
1126 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1127 /* Stop expanding iv base at the first ssa var referred by iv step.
1128 Ideally we should stop at any ssa var, because that's expensive
1129 and unusual to happen, we just do it on the first one.
1130
1131 See PR64705 for the rationale. */
1132 stop = extract_single_var_from_expr (step);
1133 base = expand_simple_operations (base, stop);
1134 if (contains_abnormal_ssa_name_p (base)
1135 || contains_abnormal_ssa_name_p (step))
1136 continue;
1137
1138 type = TREE_TYPE (PHI_RESULT (phi));
1139 base = fold_convert (type, base);
1140 if (step)
1141 {
1142 if (POINTER_TYPE_P (type))
1143 step = convert_to_ptrofftype (step);
1144 else
1145 step = fold_convert (type, step);
1146 }
1147
1148 set_iv (data, PHI_RESULT (phi), base, step, iv.no_overflow);
1149 found = true;
1150 }
1151
1152 return found;
1153 }
1154
1155 /* Marks basic ivs. */
1156
1157 static void
1158 mark_bivs (struct ivopts_data *data)
1159 {
1160 gphi *phi;
1161 gimple def;
1162 tree var;
1163 struct iv *iv, *incr_iv;
1164 struct loop *loop = data->current_loop;
1165 basic_block incr_bb;
1166 gphi_iterator psi;
1167
1168 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1169 {
1170 phi = psi.phi ();
1171
1172 iv = get_iv (data, PHI_RESULT (phi));
1173 if (!iv)
1174 continue;
1175
1176 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1177 def = SSA_NAME_DEF_STMT (var);
1178 /* Don't mark iv peeled from other one as biv. */
1179 if (def
1180 && gimple_code (def) == GIMPLE_PHI
1181 && gimple_bb (def) == loop->header)
1182 continue;
1183
1184 incr_iv = get_iv (data, var);
1185 if (!incr_iv)
1186 continue;
1187
1188 /* If the increment is in the subloop, ignore it. */
1189 incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1190 if (incr_bb->loop_father != data->current_loop
1191 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1192 continue;
1193
1194 iv->biv_p = true;
1195 incr_iv->biv_p = true;
1196 }
1197 }
1198
1199 /* Checks whether STMT defines a linear induction variable and stores its
1200 parameters to IV. */
1201
1202 static bool
1203 find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv)
1204 {
1205 tree lhs, stop;
1206 struct loop *loop = data->current_loop;
1207
1208 iv->base = NULL_TREE;
1209 iv->step = NULL_TREE;
1210
1211 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1212 return false;
1213
1214 lhs = gimple_assign_lhs (stmt);
1215 if (TREE_CODE (lhs) != SSA_NAME)
1216 return false;
1217
1218 if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1219 return false;
1220
1221 /* Stop expanding iv base at the first ssa var referred by iv step.
1222 Ideally we should stop at any ssa var, because that's expensive
1223 and unusual to happen, we just do it on the first one.
1224
1225 See PR64705 for the rationale. */
1226 stop = extract_single_var_from_expr (iv->step);
1227 iv->base = expand_simple_operations (iv->base, stop);
1228 if (contains_abnormal_ssa_name_p (iv->base)
1229 || contains_abnormal_ssa_name_p (iv->step))
1230 return false;
1231
1232 /* If STMT could throw, then do not consider STMT as defining a GIV.
1233 While this will suppress optimizations, we can not safely delete this
1234 GIV and associated statements, even if it appears it is not used. */
1235 if (stmt_could_throw_p (stmt))
1236 return false;
1237
1238 return true;
1239 }
1240
1241 /* Finds general ivs in statement STMT. */
1242
1243 static void
1244 find_givs_in_stmt (struct ivopts_data *data, gimple stmt)
1245 {
1246 affine_iv iv;
1247
1248 if (!find_givs_in_stmt_scev (data, stmt, &iv))
1249 return;
1250
1251 set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step, iv.no_overflow);
1252 }
1253
1254 /* Finds general ivs in basic block BB. */
1255
1256 static void
1257 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1258 {
1259 gimple_stmt_iterator bsi;
1260
1261 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1262 find_givs_in_stmt (data, gsi_stmt (bsi));
1263 }
1264
1265 /* Finds general ivs. */
1266
1267 static void
1268 find_givs (struct ivopts_data *data)
1269 {
1270 struct loop *loop = data->current_loop;
1271 basic_block *body = get_loop_body_in_dom_order (loop);
1272 unsigned i;
1273
1274 for (i = 0; i < loop->num_nodes; i++)
1275 find_givs_in_bb (data, body[i]);
1276 free (body);
1277 }
1278
1279 /* For each ssa name defined in LOOP determines whether it is an induction
1280 variable and if so, its initial value and step. */
1281
1282 static bool
1283 find_induction_variables (struct ivopts_data *data)
1284 {
1285 unsigned i;
1286 bitmap_iterator bi;
1287
1288 if (!find_bivs (data))
1289 return false;
1290
1291 find_givs (data);
1292 mark_bivs (data);
1293
1294 if (dump_file && (dump_flags & TDF_DETAILS))
1295 {
1296 struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
1297
1298 if (niter)
1299 {
1300 fprintf (dump_file, " number of iterations ");
1301 print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1302 if (!integer_zerop (niter->may_be_zero))
1303 {
1304 fprintf (dump_file, "; zero if ");
1305 print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1306 }
1307 fprintf (dump_file, "\n\n");
1308 };
1309
1310 fprintf (dump_file, "Induction variables:\n\n");
1311
1312 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1313 {
1314 if (ver_info (data, i)->iv)
1315 dump_iv (dump_file, ver_info (data, i)->iv, true);
1316 }
1317 }
1318
1319 return true;
1320 }
1321
1322 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.
1323 For address type use, ADDR_BASE is the stripped IV base, ADDR_OFFSET
1324 is the const offset stripped from IV base. For uses of other types,
1325 ADDR_BASE and ADDR_OFFSET are zero by default. */
1326
1327 static struct iv_use *
1328 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1329 gimple stmt, enum use_type use_type, tree addr_base = NULL,
1330 unsigned HOST_WIDE_INT addr_offset = 0)
1331 {
1332 struct iv_use *use = XCNEW (struct iv_use);
1333
1334 use->id = n_iv_uses (data);
1335 use->sub_id = 0;
1336 use->type = use_type;
1337 use->iv = iv;
1338 use->stmt = stmt;
1339 use->op_p = use_p;
1340 use->related_cands = BITMAP_ALLOC (NULL);
1341 use->next = NULL;
1342 use->addr_base = addr_base;
1343 use->addr_offset = addr_offset;
1344
1345 data->iv_uses.safe_push (use);
1346
1347 return use;
1348 }
1349
1350 /* Records a sub use of type USE_TYPE at *USE_P in STMT whose value is IV.
1351 The sub use is recorded under the one whose use id is ID_GROUP. */
1352
1353 static struct iv_use *
1354 record_sub_use (struct ivopts_data *data, tree *use_p,
1355 struct iv *iv, gimple stmt, enum use_type use_type,
1356 tree addr_base, unsigned HOST_WIDE_INT addr_offset,
1357 unsigned int id_group)
1358 {
1359 struct iv_use *use = XCNEW (struct iv_use);
1360 struct iv_use *group = iv_use (data, id_group);
1361
1362 use->id = group->id;
1363 use->sub_id = 0;
1364 use->type = use_type;
1365 use->iv = iv;
1366 use->stmt = stmt;
1367 use->op_p = use_p;
1368 use->related_cands = NULL;
1369 use->addr_base = addr_base;
1370 use->addr_offset = addr_offset;
1371
1372 /* Sub use list is maintained in offset ascending order. */
1373 if (addr_offset <= group->addr_offset)
1374 {
1375 use->related_cands = group->related_cands;
1376 group->related_cands = NULL;
1377 use->next = group;
1378 data->iv_uses[id_group] = use;
1379 }
1380 else
1381 {
1382 struct iv_use *pre;
1383 do
1384 {
1385 pre = group;
1386 group = group->next;
1387 }
1388 while (group && addr_offset > group->addr_offset);
1389 use->next = pre->next;
1390 pre->next = use;
1391 }
1392
1393 /* To avoid showing ssa name in the dumps, if it was not reset by the
1394 caller. */
1395 iv->ssa_name = NULL_TREE;
1396
1397 return use;
1398 }
1399
1400 /* Checks whether OP is a loop-level invariant and if so, records it.
1401 NONLINEAR_USE is true if the invariant is used in a way we do not
1402 handle specially. */
1403
1404 static void
1405 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1406 {
1407 basic_block bb;
1408 struct version_info *info;
1409
1410 if (TREE_CODE (op) != SSA_NAME
1411 || virtual_operand_p (op))
1412 return;
1413
1414 bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1415 if (bb
1416 && flow_bb_inside_loop_p (data->current_loop, bb))
1417 return;
1418
1419 info = name_info (data, op);
1420 info->name = op;
1421 info->has_nonlin_use |= nonlinear_use;
1422 if (!info->inv_id)
1423 info->inv_id = ++data->max_inv_id;
1424 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1425 }
1426
1427 /* Checks whether the use OP is interesting and if so, records it. */
1428
1429 static struct iv_use *
1430 find_interesting_uses_op (struct ivopts_data *data, tree op)
1431 {
1432 struct iv *iv;
1433 struct iv *civ;
1434 gimple stmt;
1435 struct iv_use *use;
1436
1437 if (TREE_CODE (op) != SSA_NAME)
1438 return NULL;
1439
1440 iv = get_iv (data, op);
1441 if (!iv)
1442 return NULL;
1443
1444 if (iv->have_use_for)
1445 {
1446 use = iv_use (data, iv->use_id);
1447
1448 gcc_assert (use->type == USE_NONLINEAR_EXPR);
1449 return use;
1450 }
1451
1452 if (integer_zerop (iv->step))
1453 {
1454 record_invariant (data, op, true);
1455 return NULL;
1456 }
1457 iv->have_use_for = true;
1458
1459 civ = XNEW (struct iv);
1460 *civ = *iv;
1461
1462 stmt = SSA_NAME_DEF_STMT (op);
1463 gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1464 || is_gimple_assign (stmt));
1465
1466 use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1467 iv->use_id = use->id;
1468
1469 return use;
1470 }
1471
1472 /* Given a condition in statement STMT, checks whether it is a compare
1473 of an induction variable and an invariant. If this is the case,
1474 CONTROL_VAR is set to location of the iv, BOUND to the location of
1475 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1476 induction variable descriptions, and true is returned. If this is not
1477 the case, CONTROL_VAR and BOUND are set to the arguments of the
1478 condition and false is returned. */
1479
1480 static bool
1481 extract_cond_operands (struct ivopts_data *data, gimple stmt,
1482 tree **control_var, tree **bound,
1483 struct iv **iv_var, struct iv **iv_bound)
1484 {
1485 /* The objects returned when COND has constant operands. */
1486 static struct iv const_iv;
1487 static tree zero;
1488 tree *op0 = &zero, *op1 = &zero, *tmp_op;
1489 struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1490 bool ret = false;
1491
1492 if (gimple_code (stmt) == GIMPLE_COND)
1493 {
1494 gcond *cond_stmt = as_a <gcond *> (stmt);
1495 op0 = gimple_cond_lhs_ptr (cond_stmt);
1496 op1 = gimple_cond_rhs_ptr (cond_stmt);
1497 }
1498 else
1499 {
1500 op0 = gimple_assign_rhs1_ptr (stmt);
1501 op1 = gimple_assign_rhs2_ptr (stmt);
1502 }
1503
1504 zero = integer_zero_node;
1505 const_iv.step = integer_zero_node;
1506
1507 if (TREE_CODE (*op0) == SSA_NAME)
1508 iv0 = get_iv (data, *op0);
1509 if (TREE_CODE (*op1) == SSA_NAME)
1510 iv1 = get_iv (data, *op1);
1511
1512 /* Exactly one of the compared values must be an iv, and the other one must
1513 be an invariant. */
1514 if (!iv0 || !iv1)
1515 goto end;
1516
1517 if (integer_zerop (iv0->step))
1518 {
1519 /* Control variable may be on the other side. */
1520 tmp_op = op0; op0 = op1; op1 = tmp_op;
1521 tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1522 }
1523 ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1524
1525 end:
1526 if (control_var)
1527 *control_var = op0;
1528 if (iv_var)
1529 *iv_var = iv0;
1530 if (bound)
1531 *bound = op1;
1532 if (iv_bound)
1533 *iv_bound = iv1;
1534
1535 return ret;
1536 }
1537
1538 /* Checks whether the condition in STMT is interesting and if so,
1539 records it. */
1540
1541 static void
1542 find_interesting_uses_cond (struct ivopts_data *data, gimple stmt)
1543 {
1544 tree *var_p, *bound_p;
1545 struct iv *var_iv, *civ;
1546
1547 if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1548 {
1549 find_interesting_uses_op (data, *var_p);
1550 find_interesting_uses_op (data, *bound_p);
1551 return;
1552 }
1553
1554 civ = XNEW (struct iv);
1555 *civ = *var_iv;
1556 record_use (data, NULL, civ, stmt, USE_COMPARE);
1557 }
1558
1559 /* Returns the outermost loop EXPR is obviously invariant in
1560 relative to the loop LOOP, i.e. if all its operands are defined
1561 outside of the returned loop. Returns NULL if EXPR is not
1562 even obviously invariant in LOOP. */
1563
1564 struct loop *
1565 outermost_invariant_loop_for_expr (struct loop *loop, tree expr)
1566 {
1567 basic_block def_bb;
1568 unsigned i, len;
1569
1570 if (is_gimple_min_invariant (expr))
1571 return current_loops->tree_root;
1572
1573 if (TREE_CODE (expr) == SSA_NAME)
1574 {
1575 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1576 if (def_bb)
1577 {
1578 if (flow_bb_inside_loop_p (loop, def_bb))
1579 return NULL;
1580 return superloop_at_depth (loop,
1581 loop_depth (def_bb->loop_father) + 1);
1582 }
1583
1584 return current_loops->tree_root;
1585 }
1586
1587 if (!EXPR_P (expr))
1588 return NULL;
1589
1590 unsigned maxdepth = 0;
1591 len = TREE_OPERAND_LENGTH (expr);
1592 for (i = 0; i < len; i++)
1593 {
1594 struct loop *ivloop;
1595 if (!TREE_OPERAND (expr, i))
1596 continue;
1597
1598 ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1599 if (!ivloop)
1600 return NULL;
1601 maxdepth = MAX (maxdepth, loop_depth (ivloop));
1602 }
1603
1604 return superloop_at_depth (loop, maxdepth);
1605 }
1606
1607 /* Returns true if expression EXPR is obviously invariant in LOOP,
1608 i.e. if all its operands are defined outside of the LOOP. LOOP
1609 should not be the function body. */
1610
1611 bool
1612 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1613 {
1614 basic_block def_bb;
1615 unsigned i, len;
1616
1617 gcc_assert (loop_depth (loop) > 0);
1618
1619 if (is_gimple_min_invariant (expr))
1620 return true;
1621
1622 if (TREE_CODE (expr) == SSA_NAME)
1623 {
1624 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1625 if (def_bb
1626 && flow_bb_inside_loop_p (loop, def_bb))
1627 return false;
1628
1629 return true;
1630 }
1631
1632 if (!EXPR_P (expr))
1633 return false;
1634
1635 len = TREE_OPERAND_LENGTH (expr);
1636 for (i = 0; i < len; i++)
1637 if (TREE_OPERAND (expr, i)
1638 && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1639 return false;
1640
1641 return true;
1642 }
1643
1644 /* Cumulates the steps of indices into DATA and replaces their values with the
1645 initial ones. Returns false when the value of the index cannot be determined.
1646 Callback for for_each_index. */
1647
1648 struct ifs_ivopts_data
1649 {
1650 struct ivopts_data *ivopts_data;
1651 gimple stmt;
1652 tree step;
1653 };
1654
1655 static bool
1656 idx_find_step (tree base, tree *idx, void *data)
1657 {
1658 struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1659 struct iv *iv;
1660 bool use_overflow_semantics = false;
1661 tree step, iv_base, iv_step, lbound, off;
1662 struct loop *loop = dta->ivopts_data->current_loop;
1663
1664 /* If base is a component ref, require that the offset of the reference
1665 be invariant. */
1666 if (TREE_CODE (base) == COMPONENT_REF)
1667 {
1668 off = component_ref_field_offset (base);
1669 return expr_invariant_in_loop_p (loop, off);
1670 }
1671
1672 /* If base is array, first check whether we will be able to move the
1673 reference out of the loop (in order to take its address in strength
1674 reduction). In order for this to work we need both lower bound
1675 and step to be loop invariants. */
1676 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1677 {
1678 /* Moreover, for a range, the size needs to be invariant as well. */
1679 if (TREE_CODE (base) == ARRAY_RANGE_REF
1680 && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1681 return false;
1682
1683 step = array_ref_element_size (base);
1684 lbound = array_ref_low_bound (base);
1685
1686 if (!expr_invariant_in_loop_p (loop, step)
1687 || !expr_invariant_in_loop_p (loop, lbound))
1688 return false;
1689 }
1690
1691 if (TREE_CODE (*idx) != SSA_NAME)
1692 return true;
1693
1694 iv = get_iv (dta->ivopts_data, *idx);
1695 if (!iv)
1696 return false;
1697
1698 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1699 *&x[0], which is not folded and does not trigger the
1700 ARRAY_REF path below. */
1701 *idx = iv->base;
1702
1703 if (integer_zerop (iv->step))
1704 return true;
1705
1706 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1707 {
1708 step = array_ref_element_size (base);
1709
1710 /* We only handle addresses whose step is an integer constant. */
1711 if (TREE_CODE (step) != INTEGER_CST)
1712 return false;
1713 }
1714 else
1715 /* The step for pointer arithmetics already is 1 byte. */
1716 step = size_one_node;
1717
1718 iv_base = iv->base;
1719 iv_step = iv->step;
1720 if (iv->no_overflow && nowrap_type_p (TREE_TYPE (iv_step)))
1721 use_overflow_semantics = true;
1722
1723 if (!convert_affine_scev (dta->ivopts_data->current_loop,
1724 sizetype, &iv_base, &iv_step, dta->stmt,
1725 use_overflow_semantics))
1726 {
1727 /* The index might wrap. */
1728 return false;
1729 }
1730
1731 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1732 dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1733
1734 return true;
1735 }
1736
1737 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1738 object is passed to it in DATA. */
1739
1740 static bool
1741 idx_record_use (tree base, tree *idx,
1742 void *vdata)
1743 {
1744 struct ivopts_data *data = (struct ivopts_data *) vdata;
1745 find_interesting_uses_op (data, *idx);
1746 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1747 {
1748 find_interesting_uses_op (data, array_ref_element_size (base));
1749 find_interesting_uses_op (data, array_ref_low_bound (base));
1750 }
1751 return true;
1752 }
1753
1754 /* If we can prove that TOP = cst * BOT for some constant cst,
1755 store cst to MUL and return true. Otherwise return false.
1756 The returned value is always sign-extended, regardless of the
1757 signedness of TOP and BOT. */
1758
1759 static bool
1760 constant_multiple_of (tree top, tree bot, widest_int *mul)
1761 {
1762 tree mby;
1763 enum tree_code code;
1764 unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1765 widest_int res, p0, p1;
1766
1767 STRIP_NOPS (top);
1768 STRIP_NOPS (bot);
1769
1770 if (operand_equal_p (top, bot, 0))
1771 {
1772 *mul = 1;
1773 return true;
1774 }
1775
1776 code = TREE_CODE (top);
1777 switch (code)
1778 {
1779 case MULT_EXPR:
1780 mby = TREE_OPERAND (top, 1);
1781 if (TREE_CODE (mby) != INTEGER_CST)
1782 return false;
1783
1784 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1785 return false;
1786
1787 *mul = wi::sext (res * wi::to_widest (mby), precision);
1788 return true;
1789
1790 case PLUS_EXPR:
1791 case MINUS_EXPR:
1792 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1793 || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1794 return false;
1795
1796 if (code == MINUS_EXPR)
1797 p1 = -p1;
1798 *mul = wi::sext (p0 + p1, precision);
1799 return true;
1800
1801 case INTEGER_CST:
1802 if (TREE_CODE (bot) != INTEGER_CST)
1803 return false;
1804
1805 p0 = widest_int::from (top, SIGNED);
1806 p1 = widest_int::from (bot, SIGNED);
1807 if (p1 == 0)
1808 return false;
1809 *mul = wi::sext (wi::divmod_trunc (p0, p1, SIGNED, &res), precision);
1810 return res == 0;
1811
1812 default:
1813 return false;
1814 }
1815 }
1816
1817 /* Return true if memory reference REF with step STEP may be unaligned. */
1818
1819 static bool
1820 may_be_unaligned_p (tree ref, tree step)
1821 {
1822 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1823 thus they are not misaligned. */
1824 if (TREE_CODE (ref) == TARGET_MEM_REF)
1825 return false;
1826
1827 unsigned int align = TYPE_ALIGN (TREE_TYPE (ref));
1828 if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))) > align)
1829 align = GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref)));
1830
1831 unsigned HOST_WIDE_INT bitpos;
1832 unsigned int ref_align;
1833 get_object_alignment_1 (ref, &ref_align, &bitpos);
1834 if (ref_align < align
1835 || (bitpos % align) != 0
1836 || (bitpos % BITS_PER_UNIT) != 0)
1837 return true;
1838
1839 unsigned int trailing_zeros = tree_ctz (step);
1840 if (trailing_zeros < HOST_BITS_PER_INT
1841 && (1U << trailing_zeros) * BITS_PER_UNIT < align)
1842 return true;
1843
1844 return false;
1845 }
1846
1847 /* Return true if EXPR may be non-addressable. */
1848
1849 bool
1850 may_be_nonaddressable_p (tree expr)
1851 {
1852 switch (TREE_CODE (expr))
1853 {
1854 case TARGET_MEM_REF:
1855 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1856 target, thus they are always addressable. */
1857 return false;
1858
1859 case COMPONENT_REF:
1860 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1861 || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1862
1863 case VIEW_CONVERT_EXPR:
1864 /* This kind of view-conversions may wrap non-addressable objects
1865 and make them look addressable. After some processing the
1866 non-addressability may be uncovered again, causing ADDR_EXPRs
1867 of inappropriate objects to be built. */
1868 if (is_gimple_reg (TREE_OPERAND (expr, 0))
1869 || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
1870 return true;
1871
1872 /* ... fall through ... */
1873
1874 case ARRAY_REF:
1875 case ARRAY_RANGE_REF:
1876 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1877
1878 CASE_CONVERT:
1879 return true;
1880
1881 default:
1882 break;
1883 }
1884
1885 return false;
1886 }
1887
1888 static tree
1889 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset);
1890
1891 /* Record a use of type USE_TYPE at *USE_P in STMT whose value is IV.
1892 If there is an existing use which has same stripped iv base and step,
1893 this function records this one as a sub use to that; otherwise records
1894 it as a normal one. */
1895
1896 static struct iv_use *
1897 record_group_use (struct ivopts_data *data, tree *use_p,
1898 struct iv *iv, gimple stmt, enum use_type use_type)
1899 {
1900 unsigned int i;
1901 struct iv_use *use;
1902 tree addr_base;
1903 unsigned HOST_WIDE_INT addr_offset;
1904
1905 /* Only support sub use for address type uses, that is, with base
1906 object. */
1907 if (!iv->base_object)
1908 return record_use (data, use_p, iv, stmt, use_type);
1909
1910 addr_base = strip_offset (iv->base, &addr_offset);
1911 for (i = 0; i < n_iv_uses (data); i++)
1912 {
1913 use = iv_use (data, i);
1914 if (use->type != USE_ADDRESS || !use->iv->base_object)
1915 continue;
1916
1917 /* Check if it has the same stripped base and step. */
1918 if (operand_equal_p (iv->base_object, use->iv->base_object, 0)
1919 && operand_equal_p (iv->step, use->iv->step, 0)
1920 && operand_equal_p (addr_base, use->addr_base, 0))
1921 break;
1922 }
1923
1924 if (i == n_iv_uses (data))
1925 return record_use (data, use_p, iv, stmt,
1926 use_type, addr_base, addr_offset);
1927 else
1928 return record_sub_use (data, use_p, iv, stmt,
1929 use_type, addr_base, addr_offset, i);
1930 }
1931
1932 /* Finds addresses in *OP_P inside STMT. */
1933
1934 static void
1935 find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p)
1936 {
1937 tree base = *op_p, step = size_zero_node;
1938 struct iv *civ;
1939 struct ifs_ivopts_data ifs_ivopts_data;
1940
1941 /* Do not play with volatile memory references. A bit too conservative,
1942 perhaps, but safe. */
1943 if (gimple_has_volatile_ops (stmt))
1944 goto fail;
1945
1946 /* Ignore bitfields for now. Not really something terribly complicated
1947 to handle. TODO. */
1948 if (TREE_CODE (base) == BIT_FIELD_REF)
1949 goto fail;
1950
1951 base = unshare_expr (base);
1952
1953 if (TREE_CODE (base) == TARGET_MEM_REF)
1954 {
1955 tree type = build_pointer_type (TREE_TYPE (base));
1956 tree astep;
1957
1958 if (TMR_BASE (base)
1959 && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1960 {
1961 civ = get_iv (data, TMR_BASE (base));
1962 if (!civ)
1963 goto fail;
1964
1965 TMR_BASE (base) = civ->base;
1966 step = civ->step;
1967 }
1968 if (TMR_INDEX2 (base)
1969 && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
1970 {
1971 civ = get_iv (data, TMR_INDEX2 (base));
1972 if (!civ)
1973 goto fail;
1974
1975 TMR_INDEX2 (base) = civ->base;
1976 step = civ->step;
1977 }
1978 if (TMR_INDEX (base)
1979 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1980 {
1981 civ = get_iv (data, TMR_INDEX (base));
1982 if (!civ)
1983 goto fail;
1984
1985 TMR_INDEX (base) = civ->base;
1986 astep = civ->step;
1987
1988 if (astep)
1989 {
1990 if (TMR_STEP (base))
1991 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1992
1993 step = fold_build2 (PLUS_EXPR, type, step, astep);
1994 }
1995 }
1996
1997 if (integer_zerop (step))
1998 goto fail;
1999 base = tree_mem_ref_addr (type, base);
2000 }
2001 else
2002 {
2003 ifs_ivopts_data.ivopts_data = data;
2004 ifs_ivopts_data.stmt = stmt;
2005 ifs_ivopts_data.step = size_zero_node;
2006 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
2007 || integer_zerop (ifs_ivopts_data.step))
2008 goto fail;
2009 step = ifs_ivopts_data.step;
2010
2011 /* Check that the base expression is addressable. This needs
2012 to be done after substituting bases of IVs into it. */
2013 if (may_be_nonaddressable_p (base))
2014 goto fail;
2015
2016 /* Moreover, on strict alignment platforms, check that it is
2017 sufficiently aligned. */
2018 if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
2019 goto fail;
2020
2021 base = build_fold_addr_expr (base);
2022
2023 /* Substituting bases of IVs into the base expression might
2024 have caused folding opportunities. */
2025 if (TREE_CODE (base) == ADDR_EXPR)
2026 {
2027 tree *ref = &TREE_OPERAND (base, 0);
2028 while (handled_component_p (*ref))
2029 ref = &TREE_OPERAND (*ref, 0);
2030 if (TREE_CODE (*ref) == MEM_REF)
2031 {
2032 tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
2033 TREE_OPERAND (*ref, 0),
2034 TREE_OPERAND (*ref, 1));
2035 if (tem)
2036 *ref = tem;
2037 }
2038 }
2039 }
2040
2041 civ = alloc_iv (base, step);
2042 record_group_use (data, op_p, civ, stmt, USE_ADDRESS);
2043 return;
2044
2045 fail:
2046 for_each_index (op_p, idx_record_use, data);
2047 }
2048
2049 /* Finds and records invariants used in STMT. */
2050
2051 static void
2052 find_invariants_stmt (struct ivopts_data *data, gimple stmt)
2053 {
2054 ssa_op_iter iter;
2055 use_operand_p use_p;
2056 tree op;
2057
2058 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2059 {
2060 op = USE_FROM_PTR (use_p);
2061 record_invariant (data, op, false);
2062 }
2063 }
2064
2065 /* Finds interesting uses of induction variables in the statement STMT. */
2066
2067 static void
2068 find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt)
2069 {
2070 struct iv *iv;
2071 tree op, *lhs, *rhs;
2072 ssa_op_iter iter;
2073 use_operand_p use_p;
2074 enum tree_code code;
2075
2076 find_invariants_stmt (data, stmt);
2077
2078 if (gimple_code (stmt) == GIMPLE_COND)
2079 {
2080 find_interesting_uses_cond (data, stmt);
2081 return;
2082 }
2083
2084 if (is_gimple_assign (stmt))
2085 {
2086 lhs = gimple_assign_lhs_ptr (stmt);
2087 rhs = gimple_assign_rhs1_ptr (stmt);
2088
2089 if (TREE_CODE (*lhs) == SSA_NAME)
2090 {
2091 /* If the statement defines an induction variable, the uses are not
2092 interesting by themselves. */
2093
2094 iv = get_iv (data, *lhs);
2095
2096 if (iv && !integer_zerop (iv->step))
2097 return;
2098 }
2099
2100 code = gimple_assign_rhs_code (stmt);
2101 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
2102 && (REFERENCE_CLASS_P (*rhs)
2103 || is_gimple_val (*rhs)))
2104 {
2105 if (REFERENCE_CLASS_P (*rhs))
2106 find_interesting_uses_address (data, stmt, rhs);
2107 else
2108 find_interesting_uses_op (data, *rhs);
2109
2110 if (REFERENCE_CLASS_P (*lhs))
2111 find_interesting_uses_address (data, stmt, lhs);
2112 return;
2113 }
2114 else if (TREE_CODE_CLASS (code) == tcc_comparison)
2115 {
2116 find_interesting_uses_cond (data, stmt);
2117 return;
2118 }
2119
2120 /* TODO -- we should also handle address uses of type
2121
2122 memory = call (whatever);
2123
2124 and
2125
2126 call (memory). */
2127 }
2128
2129 if (gimple_code (stmt) == GIMPLE_PHI
2130 && gimple_bb (stmt) == data->current_loop->header)
2131 {
2132 iv = get_iv (data, PHI_RESULT (stmt));
2133
2134 if (iv && !integer_zerop (iv->step))
2135 return;
2136 }
2137
2138 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2139 {
2140 op = USE_FROM_PTR (use_p);
2141
2142 if (TREE_CODE (op) != SSA_NAME)
2143 continue;
2144
2145 iv = get_iv (data, op);
2146 if (!iv)
2147 continue;
2148
2149 find_interesting_uses_op (data, op);
2150 }
2151 }
2152
2153 /* Finds interesting uses of induction variables outside of loops
2154 on loop exit edge EXIT. */
2155
2156 static void
2157 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
2158 {
2159 gphi *phi;
2160 gphi_iterator psi;
2161 tree def;
2162
2163 for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
2164 {
2165 phi = psi.phi ();
2166 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2167 if (!virtual_operand_p (def))
2168 find_interesting_uses_op (data, def);
2169 }
2170 }
2171
2172 /* Finds uses of the induction variables that are interesting. */
2173
2174 static void
2175 find_interesting_uses (struct ivopts_data *data)
2176 {
2177 basic_block bb;
2178 gimple_stmt_iterator bsi;
2179 basic_block *body = get_loop_body (data->current_loop);
2180 unsigned i;
2181 struct version_info *info;
2182 edge e;
2183
2184 if (dump_file && (dump_flags & TDF_DETAILS))
2185 fprintf (dump_file, "Uses:\n\n");
2186
2187 for (i = 0; i < data->current_loop->num_nodes; i++)
2188 {
2189 edge_iterator ei;
2190 bb = body[i];
2191
2192 FOR_EACH_EDGE (e, ei, bb->succs)
2193 if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2194 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2195 find_interesting_uses_outside (data, e);
2196
2197 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2198 find_interesting_uses_stmt (data, gsi_stmt (bsi));
2199 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2200 if (!is_gimple_debug (gsi_stmt (bsi)))
2201 find_interesting_uses_stmt (data, gsi_stmt (bsi));
2202 }
2203
2204 if (dump_file && (dump_flags & TDF_DETAILS))
2205 {
2206 bitmap_iterator bi;
2207
2208 fprintf (dump_file, "\n");
2209
2210 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2211 {
2212 info = ver_info (data, i);
2213 if (info->inv_id)
2214 {
2215 fprintf (dump_file, " ");
2216 print_generic_expr (dump_file, info->name, TDF_SLIM);
2217 fprintf (dump_file, " is invariant (%d)%s\n",
2218 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
2219 }
2220 }
2221
2222 fprintf (dump_file, "\n");
2223 }
2224
2225 free (body);
2226 }
2227
2228 /* Compute maximum offset of [base + offset] addressing mode
2229 for memory reference represented by USE. */
2230
2231 static HOST_WIDE_INT
2232 compute_max_addr_offset (struct iv_use *use)
2233 {
2234 int width;
2235 rtx reg, addr;
2236 HOST_WIDE_INT i, off;
2237 unsigned list_index, num;
2238 addr_space_t as;
2239 machine_mode mem_mode, addr_mode;
2240 static vec<HOST_WIDE_INT> max_offset_list;
2241
2242 as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
2243 mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2244
2245 num = max_offset_list.length ();
2246 list_index = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
2247 if (list_index >= num)
2248 {
2249 max_offset_list.safe_grow (list_index + MAX_MACHINE_MODE);
2250 for (; num < max_offset_list.length (); num++)
2251 max_offset_list[num] = -1;
2252 }
2253
2254 off = max_offset_list[list_index];
2255 if (off != -1)
2256 return off;
2257
2258 addr_mode = targetm.addr_space.address_mode (as);
2259 reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
2260 addr = gen_rtx_fmt_ee (PLUS, addr_mode, reg, NULL_RTX);
2261
2262 width = GET_MODE_BITSIZE (addr_mode) - 1;
2263 if (width > (HOST_BITS_PER_WIDE_INT - 1))
2264 width = HOST_BITS_PER_WIDE_INT - 1;
2265
2266 for (i = width; i > 0; i--)
2267 {
2268 off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
2269 XEXP (addr, 1) = gen_int_mode (off, addr_mode);
2270 if (memory_address_addr_space_p (mem_mode, addr, as))
2271 break;
2272
2273 /* For some strict-alignment targets, the offset must be naturally
2274 aligned. Try an aligned offset if mem_mode is not QImode. */
2275 off = ((unsigned HOST_WIDE_INT) 1 << i);
2276 if (off > GET_MODE_SIZE (mem_mode) && mem_mode != QImode)
2277 {
2278 off -= GET_MODE_SIZE (mem_mode);
2279 XEXP (addr, 1) = gen_int_mode (off, addr_mode);
2280 if (memory_address_addr_space_p (mem_mode, addr, as))
2281 break;
2282 }
2283 }
2284 if (i == 0)
2285 off = 0;
2286
2287 max_offset_list[list_index] = off;
2288 return off;
2289 }
2290
2291 /* Check if all small groups should be split. Return true if and
2292 only if:
2293
2294 1) At least one groups contain two uses with different offsets.
2295 2) No group contains more than two uses with different offsets.
2296
2297 Return false otherwise. We want to split such groups because:
2298
2299 1) Small groups don't have much benefit and may interfer with
2300 general candidate selection.
2301 2) Size for problem with only small groups is usually small and
2302 general algorithm can handle it well.
2303
2304 TODO -- Above claim may not hold when auto increment is supported. */
2305
2306 static bool
2307 split_all_small_groups (struct ivopts_data *data)
2308 {
2309 bool split_p = false;
2310 unsigned int i, n, distinct;
2311 struct iv_use *pre, *use;
2312
2313 n = n_iv_uses (data);
2314 for (i = 0; i < n; i++)
2315 {
2316 use = iv_use (data, i);
2317 if (!use->next)
2318 continue;
2319
2320 distinct = 1;
2321 gcc_assert (use->type == USE_ADDRESS);
2322 for (pre = use, use = use->next; use; pre = use, use = use->next)
2323 {
2324 if (pre->addr_offset != use->addr_offset)
2325 distinct++;
2326
2327 if (distinct > 2)
2328 return false;
2329 }
2330 if (distinct == 2)
2331 split_p = true;
2332 }
2333
2334 return split_p;
2335 }
2336
2337 /* For each group of address type uses, this function further groups
2338 these uses according to the maximum offset supported by target's
2339 [base + offset] addressing mode. */
2340
2341 static void
2342 group_address_uses (struct ivopts_data *data)
2343 {
2344 HOST_WIDE_INT max_offset = -1;
2345 unsigned int i, n, sub_id;
2346 struct iv_use *pre, *use;
2347 unsigned HOST_WIDE_INT addr_offset_first;
2348
2349 /* Reset max offset to split all small groups. */
2350 if (split_all_small_groups (data))
2351 max_offset = 0;
2352
2353 n = n_iv_uses (data);
2354 for (i = 0; i < n; i++)
2355 {
2356 use = iv_use (data, i);
2357 if (!use->next)
2358 continue;
2359
2360 gcc_assert (use->type == USE_ADDRESS);
2361 if (max_offset != 0)
2362 max_offset = compute_max_addr_offset (use);
2363
2364 while (use)
2365 {
2366 sub_id = 0;
2367 addr_offset_first = use->addr_offset;
2368 /* Only uses with offset that can fit in offset part against
2369 the first use can be grouped together. */
2370 for (pre = use, use = use->next;
2371 use && (use->addr_offset - addr_offset_first
2372 <= (unsigned HOST_WIDE_INT) max_offset);
2373 pre = use, use = use->next)
2374 {
2375 use->id = pre->id;
2376 use->sub_id = ++sub_id;
2377 }
2378
2379 /* Break the list and create new group. */
2380 if (use)
2381 {
2382 pre->next = NULL;
2383 use->id = n_iv_uses (data);
2384 use->related_cands = BITMAP_ALLOC (NULL);
2385 data->iv_uses.safe_push (use);
2386 }
2387 }
2388 }
2389
2390 if (dump_file && (dump_flags & TDF_DETAILS))
2391 dump_uses (dump_file, data);
2392 }
2393
2394 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
2395 is true, assume we are inside an address. If TOP_COMPREF is true, assume
2396 we are at the top-level of the processed address. */
2397
2398 static tree
2399 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2400 HOST_WIDE_INT *offset)
2401 {
2402 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2403 enum tree_code code;
2404 tree type, orig_type = TREE_TYPE (expr);
2405 HOST_WIDE_INT off0, off1, st;
2406 tree orig_expr = expr;
2407
2408 STRIP_NOPS (expr);
2409
2410 type = TREE_TYPE (expr);
2411 code = TREE_CODE (expr);
2412 *offset = 0;
2413
2414 switch (code)
2415 {
2416 case INTEGER_CST:
2417 if (!cst_and_fits_in_hwi (expr)
2418 || integer_zerop (expr))
2419 return orig_expr;
2420
2421 *offset = int_cst_value (expr);
2422 return build_int_cst (orig_type, 0);
2423
2424 case POINTER_PLUS_EXPR:
2425 case PLUS_EXPR:
2426 case MINUS_EXPR:
2427 op0 = TREE_OPERAND (expr, 0);
2428 op1 = TREE_OPERAND (expr, 1);
2429
2430 op0 = strip_offset_1 (op0, false, false, &off0);
2431 op1 = strip_offset_1 (op1, false, false, &off1);
2432
2433 *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2434 if (op0 == TREE_OPERAND (expr, 0)
2435 && op1 == TREE_OPERAND (expr, 1))
2436 return orig_expr;
2437
2438 if (integer_zerop (op1))
2439 expr = op0;
2440 else if (integer_zerop (op0))
2441 {
2442 if (code == MINUS_EXPR)
2443 expr = fold_build1 (NEGATE_EXPR, type, op1);
2444 else
2445 expr = op1;
2446 }
2447 else
2448 expr = fold_build2 (code, type, op0, op1);
2449
2450 return fold_convert (orig_type, expr);
2451
2452 case MULT_EXPR:
2453 op1 = TREE_OPERAND (expr, 1);
2454 if (!cst_and_fits_in_hwi (op1))
2455 return orig_expr;
2456
2457 op0 = TREE_OPERAND (expr, 0);
2458 op0 = strip_offset_1 (op0, false, false, &off0);
2459 if (op0 == TREE_OPERAND (expr, 0))
2460 return orig_expr;
2461
2462 *offset = off0 * int_cst_value (op1);
2463 if (integer_zerop (op0))
2464 expr = op0;
2465 else
2466 expr = fold_build2 (MULT_EXPR, type, op0, op1);
2467
2468 return fold_convert (orig_type, expr);
2469
2470 case ARRAY_REF:
2471 case ARRAY_RANGE_REF:
2472 if (!inside_addr)
2473 return orig_expr;
2474
2475 step = array_ref_element_size (expr);
2476 if (!cst_and_fits_in_hwi (step))
2477 break;
2478
2479 st = int_cst_value (step);
2480 op1 = TREE_OPERAND (expr, 1);
2481 op1 = strip_offset_1 (op1, false, false, &off1);
2482 *offset = off1 * st;
2483
2484 if (top_compref
2485 && integer_zerop (op1))
2486 {
2487 /* Strip the component reference completely. */
2488 op0 = TREE_OPERAND (expr, 0);
2489 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2490 *offset += off0;
2491 return op0;
2492 }
2493 break;
2494
2495 case COMPONENT_REF:
2496 {
2497 tree field;
2498
2499 if (!inside_addr)
2500 return orig_expr;
2501
2502 tmp = component_ref_field_offset (expr);
2503 field = TREE_OPERAND (expr, 1);
2504 if (top_compref
2505 && cst_and_fits_in_hwi (tmp)
2506 && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2507 {
2508 HOST_WIDE_INT boffset, abs_off;
2509
2510 /* Strip the component reference completely. */
2511 op0 = TREE_OPERAND (expr, 0);
2512 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2513 boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2514 abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2515 if (boffset < 0)
2516 abs_off = -abs_off;
2517
2518 *offset = off0 + int_cst_value (tmp) + abs_off;
2519 return op0;
2520 }
2521 }
2522 break;
2523
2524 case ADDR_EXPR:
2525 op0 = TREE_OPERAND (expr, 0);
2526 op0 = strip_offset_1 (op0, true, true, &off0);
2527 *offset += off0;
2528
2529 if (op0 == TREE_OPERAND (expr, 0))
2530 return orig_expr;
2531
2532 expr = build_fold_addr_expr (op0);
2533 return fold_convert (orig_type, expr);
2534
2535 case MEM_REF:
2536 /* ??? Offset operand? */
2537 inside_addr = false;
2538 break;
2539
2540 default:
2541 return orig_expr;
2542 }
2543
2544 /* Default handling of expressions for that we want to recurse into
2545 the first operand. */
2546 op0 = TREE_OPERAND (expr, 0);
2547 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2548 *offset += off0;
2549
2550 if (op0 == TREE_OPERAND (expr, 0)
2551 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2552 return orig_expr;
2553
2554 expr = copy_node (expr);
2555 TREE_OPERAND (expr, 0) = op0;
2556 if (op1)
2557 TREE_OPERAND (expr, 1) = op1;
2558
2559 /* Inside address, we might strip the top level component references,
2560 thus changing type of the expression. Handling of ADDR_EXPR
2561 will fix that. */
2562 expr = fold_convert (orig_type, expr);
2563
2564 return expr;
2565 }
2566
2567 /* Strips constant offsets from EXPR and stores them to OFFSET. */
2568
2569 static tree
2570 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2571 {
2572 HOST_WIDE_INT off;
2573 tree core = strip_offset_1 (expr, false, false, &off);
2574 *offset = off;
2575 return core;
2576 }
2577
2578 /* Returns variant of TYPE that can be used as base for different uses.
2579 We return unsigned type with the same precision, which avoids problems
2580 with overflows. */
2581
2582 static tree
2583 generic_type_for (tree type)
2584 {
2585 if (POINTER_TYPE_P (type))
2586 return unsigned_type_for (type);
2587
2588 if (TYPE_UNSIGNED (type))
2589 return type;
2590
2591 return unsigned_type_for (type);
2592 }
2593
2594 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2595 the bitmap to that we should store it. */
2596
2597 static struct ivopts_data *fd_ivopts_data;
2598 static tree
2599 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2600 {
2601 bitmap *depends_on = (bitmap *) data;
2602 struct version_info *info;
2603
2604 if (TREE_CODE (*expr_p) != SSA_NAME)
2605 return NULL_TREE;
2606 info = name_info (fd_ivopts_data, *expr_p);
2607
2608 if (!info->inv_id || info->has_nonlin_use)
2609 return NULL_TREE;
2610
2611 if (!*depends_on)
2612 *depends_on = BITMAP_ALLOC (NULL);
2613 bitmap_set_bit (*depends_on, info->inv_id);
2614
2615 return NULL_TREE;
2616 }
2617
2618 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2619 position to POS. If USE is not NULL, the candidate is set as related to
2620 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
2621 replacement of the final value of the iv by a direct computation. */
2622
2623 static struct iv_cand *
2624 add_candidate_1 (struct ivopts_data *data,
2625 tree base, tree step, bool important, enum iv_position pos,
2626 struct iv_use *use, gimple incremented_at)
2627 {
2628 unsigned i;
2629 struct iv_cand *cand = NULL;
2630 tree type, orig_type;
2631
2632 /* For non-original variables, make sure their values are computed in a type
2633 that does not invoke undefined behavior on overflows (since in general,
2634 we cannot prove that these induction variables are non-wrapping). */
2635 if (pos != IP_ORIGINAL)
2636 {
2637 orig_type = TREE_TYPE (base);
2638 type = generic_type_for (orig_type);
2639 if (type != orig_type)
2640 {
2641 base = fold_convert (type, base);
2642 step = fold_convert (type, step);
2643 }
2644 }
2645
2646 for (i = 0; i < n_iv_cands (data); i++)
2647 {
2648 cand = iv_cand (data, i);
2649
2650 if (cand->pos != pos)
2651 continue;
2652
2653 if (cand->incremented_at != incremented_at
2654 || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2655 && cand->ainc_use != use))
2656 continue;
2657
2658 if (!cand->iv)
2659 {
2660 if (!base && !step)
2661 break;
2662
2663 continue;
2664 }
2665
2666 if (!base && !step)
2667 continue;
2668
2669 if (operand_equal_p (base, cand->iv->base, 0)
2670 && operand_equal_p (step, cand->iv->step, 0)
2671 && (TYPE_PRECISION (TREE_TYPE (base))
2672 == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
2673 break;
2674 }
2675
2676 if (i == n_iv_cands (data))
2677 {
2678 cand = XCNEW (struct iv_cand);
2679 cand->id = i;
2680
2681 if (!base && !step)
2682 cand->iv = NULL;
2683 else
2684 cand->iv = alloc_iv (base, step);
2685
2686 cand->pos = pos;
2687 if (pos != IP_ORIGINAL && cand->iv)
2688 {
2689 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2690 cand->var_after = cand->var_before;
2691 }
2692 cand->important = important;
2693 cand->incremented_at = incremented_at;
2694 data->iv_candidates.safe_push (cand);
2695
2696 if (step
2697 && TREE_CODE (step) != INTEGER_CST)
2698 {
2699 fd_ivopts_data = data;
2700 walk_tree (&step, find_depends, &cand->depends_on, NULL);
2701 }
2702
2703 if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2704 cand->ainc_use = use;
2705 else
2706 cand->ainc_use = NULL;
2707
2708 if (dump_file && (dump_flags & TDF_DETAILS))
2709 dump_cand (dump_file, cand);
2710 }
2711
2712 if (important && !cand->important)
2713 {
2714 cand->important = true;
2715 if (dump_file && (dump_flags & TDF_DETAILS))
2716 fprintf (dump_file, "Candidate %d is important\n", cand->id);
2717 }
2718
2719 if (use)
2720 {
2721 bitmap_set_bit (use->related_cands, i);
2722 if (dump_file && (dump_flags & TDF_DETAILS))
2723 fprintf (dump_file, "Candidate %d is related to use %d\n",
2724 cand->id, use->id);
2725 }
2726
2727 return cand;
2728 }
2729
2730 /* Returns true if incrementing the induction variable at the end of the LOOP
2731 is allowed.
2732
2733 The purpose is to avoid splitting latch edge with a biv increment, thus
2734 creating a jump, possibly confusing other optimization passes and leaving
2735 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2736 is not available (so we do not have a better alternative), or if the latch
2737 edge is already nonempty. */
2738
2739 static bool
2740 allow_ip_end_pos_p (struct loop *loop)
2741 {
2742 if (!ip_normal_pos (loop))
2743 return true;
2744
2745 if (!empty_block_p (ip_end_pos (loop)))
2746 return true;
2747
2748 return false;
2749 }
2750
2751 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2752 Important field is set to IMPORTANT. */
2753
2754 static void
2755 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2756 bool important, struct iv_use *use)
2757 {
2758 basic_block use_bb = gimple_bb (use->stmt);
2759 machine_mode mem_mode;
2760 unsigned HOST_WIDE_INT cstepi;
2761
2762 /* If we insert the increment in any position other than the standard
2763 ones, we must ensure that it is incremented once per iteration.
2764 It must not be in an inner nested loop, or one side of an if
2765 statement. */
2766 if (use_bb->loop_father != data->current_loop
2767 || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2768 || stmt_could_throw_p (use->stmt)
2769 || !cst_and_fits_in_hwi (step))
2770 return;
2771
2772 cstepi = int_cst_value (step);
2773
2774 mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2775 if (((USE_LOAD_PRE_INCREMENT (mem_mode)
2776 || USE_STORE_PRE_INCREMENT (mem_mode))
2777 && GET_MODE_SIZE (mem_mode) == cstepi)
2778 || ((USE_LOAD_PRE_DECREMENT (mem_mode)
2779 || USE_STORE_PRE_DECREMENT (mem_mode))
2780 && GET_MODE_SIZE (mem_mode) == -cstepi))
2781 {
2782 enum tree_code code = MINUS_EXPR;
2783 tree new_base;
2784 tree new_step = step;
2785
2786 if (POINTER_TYPE_P (TREE_TYPE (base)))
2787 {
2788 new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2789 code = POINTER_PLUS_EXPR;
2790 }
2791 else
2792 new_step = fold_convert (TREE_TYPE (base), new_step);
2793 new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2794 add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2795 use->stmt);
2796 }
2797 if (((USE_LOAD_POST_INCREMENT (mem_mode)
2798 || USE_STORE_POST_INCREMENT (mem_mode))
2799 && GET_MODE_SIZE (mem_mode) == cstepi)
2800 || ((USE_LOAD_POST_DECREMENT (mem_mode)
2801 || USE_STORE_POST_DECREMENT (mem_mode))
2802 && GET_MODE_SIZE (mem_mode) == -cstepi))
2803 {
2804 add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
2805 use->stmt);
2806 }
2807 }
2808
2809 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2810 position to POS. If USE is not NULL, the candidate is set as related to
2811 it. The candidate computation is scheduled on all available positions. */
2812
2813 static void
2814 add_candidate (struct ivopts_data *data,
2815 tree base, tree step, bool important, struct iv_use *use)
2816 {
2817 gcc_assert (use == NULL || use->sub_id == 0);
2818
2819 if (ip_normal_pos (data->current_loop))
2820 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL);
2821 if (ip_end_pos (data->current_loop)
2822 && allow_ip_end_pos_p (data->current_loop))
2823 add_candidate_1 (data, base, step, important, IP_END, use, NULL);
2824
2825 if (use != NULL && use->type == USE_ADDRESS)
2826 add_autoinc_candidates (data, base, step, important, use);
2827 }
2828
2829 /* Adds standard iv candidates. */
2830
2831 static void
2832 add_standard_iv_candidates (struct ivopts_data *data)
2833 {
2834 add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
2835
2836 /* The same for a double-integer type if it is still fast enough. */
2837 if (TYPE_PRECISION
2838 (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
2839 && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
2840 add_candidate (data, build_int_cst (long_integer_type_node, 0),
2841 build_int_cst (long_integer_type_node, 1), true, NULL);
2842
2843 /* The same for a double-integer type if it is still fast enough. */
2844 if (TYPE_PRECISION
2845 (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
2846 && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
2847 add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
2848 build_int_cst (long_long_integer_type_node, 1), true, NULL);
2849 }
2850
2851
2852 /* Adds candidates bases on the old induction variable IV. */
2853
2854 static void
2855 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2856 {
2857 gimple phi;
2858 tree def;
2859 struct iv_cand *cand;
2860
2861 add_candidate (data, iv->base, iv->step, true, NULL);
2862
2863 /* The same, but with initial value zero. */
2864 if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
2865 add_candidate (data, size_int (0), iv->step, true, NULL);
2866 else
2867 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2868 iv->step, true, NULL);
2869
2870 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2871 if (gimple_code (phi) == GIMPLE_PHI)
2872 {
2873 /* Additionally record the possibility of leaving the original iv
2874 untouched. */
2875 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2876 /* Don't add candidate if it's from another PHI node because
2877 it's an affine iv appearing in the form of PEELED_CHREC. */
2878 phi = SSA_NAME_DEF_STMT (def);
2879 if (gimple_code (phi) != GIMPLE_PHI)
2880 {
2881 cand = add_candidate_1 (data,
2882 iv->base, iv->step, true, IP_ORIGINAL, NULL,
2883 SSA_NAME_DEF_STMT (def));
2884 cand->var_before = iv->ssa_name;
2885 cand->var_after = def;
2886 }
2887 else
2888 gcc_assert (gimple_bb (phi) == data->current_loop->header);
2889 }
2890 }
2891
2892 /* Adds candidates based on the old induction variables. */
2893
2894 static void
2895 add_old_ivs_candidates (struct ivopts_data *data)
2896 {
2897 unsigned i;
2898 struct iv *iv;
2899 bitmap_iterator bi;
2900
2901 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2902 {
2903 iv = ver_info (data, i)->iv;
2904 if (iv && iv->biv_p && !integer_zerop (iv->step))
2905 add_old_iv_candidates (data, iv);
2906 }
2907 }
2908
2909 /* Adds candidates based on the value of the induction variable IV and USE. */
2910
2911 static void
2912 add_iv_value_candidates (struct ivopts_data *data,
2913 struct iv *iv, struct iv_use *use)
2914 {
2915 unsigned HOST_WIDE_INT offset;
2916 tree base;
2917 tree basetype;
2918
2919 add_candidate (data, iv->base, iv->step, false, use);
2920
2921 /* The same, but with initial value zero. Make such variable important,
2922 since it is generic enough so that possibly many uses may be based
2923 on it. */
2924 basetype = TREE_TYPE (iv->base);
2925 if (POINTER_TYPE_P (basetype))
2926 basetype = sizetype;
2927 add_candidate (data, build_int_cst (basetype, 0),
2928 iv->step, true, use);
2929
2930 /* Third, try removing the constant offset. Make sure to even
2931 add a candidate for &a[0] vs. (T *)&a. */
2932 base = strip_offset (iv->base, &offset);
2933 if (offset
2934 || base != iv->base)
2935 add_candidate (data, base, iv->step, false, use);
2936 }
2937
2938 /* Adds candidates based on the uses. */
2939
2940 static void
2941 add_derived_ivs_candidates (struct ivopts_data *data)
2942 {
2943 unsigned i;
2944
2945 for (i = 0; i < n_iv_uses (data); i++)
2946 {
2947 struct iv_use *use = iv_use (data, i);
2948
2949 if (!use)
2950 continue;
2951
2952 switch (use->type)
2953 {
2954 case USE_NONLINEAR_EXPR:
2955 case USE_COMPARE:
2956 case USE_ADDRESS:
2957 /* Just add the ivs based on the value of the iv used here. */
2958 add_iv_value_candidates (data, use->iv, use);
2959 break;
2960
2961 default:
2962 gcc_unreachable ();
2963 }
2964 }
2965 }
2966
2967 /* Record important candidates and add them to related_cands bitmaps
2968 if needed. */
2969
2970 static void
2971 record_important_candidates (struct ivopts_data *data)
2972 {
2973 unsigned i;
2974 struct iv_use *use;
2975
2976 for (i = 0; i < n_iv_cands (data); i++)
2977 {
2978 struct iv_cand *cand = iv_cand (data, i);
2979
2980 if (cand->important)
2981 bitmap_set_bit (data->important_candidates, i);
2982 }
2983
2984 data->consider_all_candidates = (n_iv_cands (data)
2985 <= CONSIDER_ALL_CANDIDATES_BOUND);
2986
2987 if (data->consider_all_candidates)
2988 {
2989 /* We will not need "related_cands" bitmaps in this case,
2990 so release them to decrease peak memory consumption. */
2991 for (i = 0; i < n_iv_uses (data); i++)
2992 {
2993 use = iv_use (data, i);
2994 BITMAP_FREE (use->related_cands);
2995 }
2996 }
2997 else
2998 {
2999 /* Add important candidates to the related_cands bitmaps. */
3000 for (i = 0; i < n_iv_uses (data); i++)
3001 bitmap_ior_into (iv_use (data, i)->related_cands,
3002 data->important_candidates);
3003 }
3004 }
3005
3006 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
3007 If consider_all_candidates is true, we use a two-dimensional array, otherwise
3008 we allocate a simple list to every use. */
3009
3010 static void
3011 alloc_use_cost_map (struct ivopts_data *data)
3012 {
3013 unsigned i, size, s;
3014
3015 for (i = 0; i < n_iv_uses (data); i++)
3016 {
3017 struct iv_use *use = iv_use (data, i);
3018
3019 if (data->consider_all_candidates)
3020 size = n_iv_cands (data);
3021 else
3022 {
3023 s = bitmap_count_bits (use->related_cands);
3024
3025 /* Round up to the power of two, so that moduling by it is fast. */
3026 size = s ? (1 << ceil_log2 (s)) : 1;
3027 }
3028
3029 use->n_map_members = size;
3030 use->cost_map = XCNEWVEC (struct cost_pair, size);
3031 }
3032 }
3033
3034 /* Returns description of computation cost of expression whose runtime
3035 cost is RUNTIME and complexity corresponds to COMPLEXITY. */
3036
3037 static comp_cost
3038 new_cost (unsigned runtime, unsigned complexity)
3039 {
3040 comp_cost cost;
3041
3042 cost.cost = runtime;
3043 cost.complexity = complexity;
3044
3045 return cost;
3046 }
3047
3048 /* Returns true if COST is infinite. */
3049
3050 static bool
3051 infinite_cost_p (comp_cost cost)
3052 {
3053 return cost.cost == INFTY;
3054 }
3055
3056 /* Adds costs COST1 and COST2. */
3057
3058 static comp_cost
3059 add_costs (comp_cost cost1, comp_cost cost2)
3060 {
3061 if (infinite_cost_p (cost1) || infinite_cost_p (cost2))
3062 return infinite_cost;
3063
3064 cost1.cost += cost2.cost;
3065 cost1.complexity += cost2.complexity;
3066
3067 return cost1;
3068 }
3069 /* Subtracts costs COST1 and COST2. */
3070
3071 static comp_cost
3072 sub_costs (comp_cost cost1, comp_cost cost2)
3073 {
3074 cost1.cost -= cost2.cost;
3075 cost1.complexity -= cost2.complexity;
3076
3077 return cost1;
3078 }
3079
3080 /* Returns a negative number if COST1 < COST2, a positive number if
3081 COST1 > COST2, and 0 if COST1 = COST2. */
3082
3083 static int
3084 compare_costs (comp_cost cost1, comp_cost cost2)
3085 {
3086 if (cost1.cost == cost2.cost)
3087 return cost1.complexity - cost2.complexity;
3088
3089 return cost1.cost - cost2.cost;
3090 }
3091
3092 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
3093 on invariants DEPENDS_ON and that the value used in expressing it
3094 is VALUE, and in case of iv elimination the comparison operator is COMP. */
3095
3096 static void
3097 set_use_iv_cost (struct ivopts_data *data,
3098 struct iv_use *use, struct iv_cand *cand,
3099 comp_cost cost, bitmap depends_on, tree value,
3100 enum tree_code comp, int inv_expr_id)
3101 {
3102 unsigned i, s;
3103
3104 if (infinite_cost_p (cost))
3105 {
3106 BITMAP_FREE (depends_on);
3107 return;
3108 }
3109
3110 if (data->consider_all_candidates)
3111 {
3112 use->cost_map[cand->id].cand = cand;
3113 use->cost_map[cand->id].cost = cost;
3114 use->cost_map[cand->id].depends_on = depends_on;
3115 use->cost_map[cand->id].value = value;
3116 use->cost_map[cand->id].comp = comp;
3117 use->cost_map[cand->id].inv_expr_id = inv_expr_id;
3118 return;
3119 }
3120
3121 /* n_map_members is a power of two, so this computes modulo. */
3122 s = cand->id & (use->n_map_members - 1);
3123 for (i = s; i < use->n_map_members; i++)
3124 if (!use->cost_map[i].cand)
3125 goto found;
3126 for (i = 0; i < s; i++)
3127 if (!use->cost_map[i].cand)
3128 goto found;
3129
3130 gcc_unreachable ();
3131
3132 found:
3133 use->cost_map[i].cand = cand;
3134 use->cost_map[i].cost = cost;
3135 use->cost_map[i].depends_on = depends_on;
3136 use->cost_map[i].value = value;
3137 use->cost_map[i].comp = comp;
3138 use->cost_map[i].inv_expr_id = inv_expr_id;
3139 }
3140
3141 /* Gets cost of (USE, CANDIDATE) pair. */
3142
3143 static struct cost_pair *
3144 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
3145 struct iv_cand *cand)
3146 {
3147 unsigned i, s;
3148 struct cost_pair *ret;
3149
3150 if (!cand)
3151 return NULL;
3152
3153 if (data->consider_all_candidates)
3154 {
3155 ret = use->cost_map + cand->id;
3156 if (!ret->cand)
3157 return NULL;
3158
3159 return ret;
3160 }
3161
3162 /* n_map_members is a power of two, so this computes modulo. */
3163 s = cand->id & (use->n_map_members - 1);
3164 for (i = s; i < use->n_map_members; i++)
3165 if (use->cost_map[i].cand == cand)
3166 return use->cost_map + i;
3167 else if (use->cost_map[i].cand == NULL)
3168 return NULL;
3169 for (i = 0; i < s; i++)
3170 if (use->cost_map[i].cand == cand)
3171 return use->cost_map + i;
3172 else if (use->cost_map[i].cand == NULL)
3173 return NULL;
3174
3175 return NULL;
3176 }
3177
3178 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
3179 static rtx
3180 produce_memory_decl_rtl (tree obj, int *regno)
3181 {
3182 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
3183 machine_mode address_mode = targetm.addr_space.address_mode (as);
3184 rtx x;
3185
3186 gcc_assert (obj);
3187 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
3188 {
3189 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
3190 x = gen_rtx_SYMBOL_REF (address_mode, name);
3191 SET_SYMBOL_REF_DECL (x, obj);
3192 x = gen_rtx_MEM (DECL_MODE (obj), x);
3193 set_mem_addr_space (x, as);
3194 targetm.encode_section_info (obj, x, true);
3195 }
3196 else
3197 {
3198 x = gen_raw_REG (address_mode, (*regno)++);
3199 x = gen_rtx_MEM (DECL_MODE (obj), x);
3200 set_mem_addr_space (x, as);
3201 }
3202
3203 return x;
3204 }
3205
3206 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
3207 walk_tree. DATA contains the actual fake register number. */
3208
3209 static tree
3210 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
3211 {
3212 tree obj = NULL_TREE;
3213 rtx x = NULL_RTX;
3214 int *regno = (int *) data;
3215
3216 switch (TREE_CODE (*expr_p))
3217 {
3218 case ADDR_EXPR:
3219 for (expr_p = &TREE_OPERAND (*expr_p, 0);
3220 handled_component_p (*expr_p);
3221 expr_p = &TREE_OPERAND (*expr_p, 0))
3222 continue;
3223 obj = *expr_p;
3224 if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
3225 x = produce_memory_decl_rtl (obj, regno);
3226 break;
3227
3228 case SSA_NAME:
3229 *ws = 0;
3230 obj = SSA_NAME_VAR (*expr_p);
3231 /* Defer handling of anonymous SSA_NAMEs to the expander. */
3232 if (!obj)
3233 return NULL_TREE;
3234 if (!DECL_RTL_SET_P (obj))
3235 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3236 break;
3237
3238 case VAR_DECL:
3239 case PARM_DECL:
3240 case RESULT_DECL:
3241 *ws = 0;
3242 obj = *expr_p;
3243
3244 if (DECL_RTL_SET_P (obj))
3245 break;
3246
3247 if (DECL_MODE (obj) == BLKmode)
3248 x = produce_memory_decl_rtl (obj, regno);
3249 else
3250 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3251
3252 break;
3253
3254 default:
3255 break;
3256 }
3257
3258 if (x)
3259 {
3260 decl_rtl_to_reset.safe_push (obj);
3261 SET_DECL_RTL (obj, x);
3262 }
3263
3264 return NULL_TREE;
3265 }
3266
3267 /* Determines cost of the computation of EXPR. */
3268
3269 static unsigned
3270 computation_cost (tree expr, bool speed)
3271 {
3272 rtx_insn *seq;
3273 rtx rslt;
3274 tree type = TREE_TYPE (expr);
3275 unsigned cost;
3276 /* Avoid using hard regs in ways which may be unsupported. */
3277 int regno = LAST_VIRTUAL_REGISTER + 1;
3278 struct cgraph_node *node = cgraph_node::get (current_function_decl);
3279 enum node_frequency real_frequency = node->frequency;
3280
3281 node->frequency = NODE_FREQUENCY_NORMAL;
3282 crtl->maybe_hot_insn_p = speed;
3283 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
3284 start_sequence ();
3285 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
3286 seq = get_insns ();
3287 end_sequence ();
3288 default_rtl_profile ();
3289 node->frequency = real_frequency;
3290
3291 cost = seq_cost (seq, speed);
3292 if (MEM_P (rslt))
3293 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
3294 TYPE_ADDR_SPACE (type), speed);
3295 else if (!REG_P (rslt))
3296 cost += set_src_cost (rslt, speed);
3297
3298 return cost;
3299 }
3300
3301 /* Returns variable containing the value of candidate CAND at statement AT. */
3302
3303 static tree
3304 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple stmt)
3305 {
3306 if (stmt_after_increment (loop, cand, stmt))
3307 return cand->var_after;
3308 else
3309 return cand->var_before;
3310 }
3311
3312 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
3313 same precision that is at least as wide as the precision of TYPE, stores
3314 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
3315 type of A and B. */
3316
3317 static tree
3318 determine_common_wider_type (tree *a, tree *b)
3319 {
3320 tree wider_type = NULL;
3321 tree suba, subb;
3322 tree atype = TREE_TYPE (*a);
3323
3324 if (CONVERT_EXPR_P (*a))
3325 {
3326 suba = TREE_OPERAND (*a, 0);
3327 wider_type = TREE_TYPE (suba);
3328 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
3329 return atype;
3330 }
3331 else
3332 return atype;
3333
3334 if (CONVERT_EXPR_P (*b))
3335 {
3336 subb = TREE_OPERAND (*b, 0);
3337 if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
3338 return atype;
3339 }
3340 else
3341 return atype;
3342
3343 *a = suba;
3344 *b = subb;
3345 return wider_type;
3346 }
3347
3348 /* Determines the expression by that USE is expressed from induction variable
3349 CAND at statement AT in LOOP. The expression is stored in a decomposed
3350 form into AFF. Returns false if USE cannot be expressed using CAND. */
3351
3352 static bool
3353 get_computation_aff (struct loop *loop,
3354 struct iv_use *use, struct iv_cand *cand, gimple at,
3355 struct aff_tree *aff)
3356 {
3357 tree ubase = use->iv->base;
3358 tree ustep = use->iv->step;
3359 tree cbase = cand->iv->base;
3360 tree cstep = cand->iv->step, cstep_common;
3361 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3362 tree common_type, var;
3363 tree uutype;
3364 aff_tree cbase_aff, var_aff;
3365 widest_int rat;
3366
3367 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3368 {
3369 /* We do not have a precision to express the values of use. */
3370 return false;
3371 }
3372
3373 var = var_at_stmt (loop, cand, at);
3374 uutype = unsigned_type_for (utype);
3375
3376 /* If the conversion is not noop, perform it. */
3377 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3378 {
3379 cstep = fold_convert (uutype, cstep);
3380 cbase = fold_convert (uutype, cbase);
3381 var = fold_convert (uutype, var);
3382 }
3383
3384 if (!constant_multiple_of (ustep, cstep, &rat))
3385 return false;
3386
3387 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3388 type, we achieve better folding by computing their difference in this
3389 wider type, and cast the result to UUTYPE. We do not need to worry about
3390 overflows, as all the arithmetics will in the end be performed in UUTYPE
3391 anyway. */
3392 common_type = determine_common_wider_type (&ubase, &cbase);
3393
3394 /* use = ubase - ratio * cbase + ratio * var. */
3395 tree_to_aff_combination (ubase, common_type, aff);
3396 tree_to_aff_combination (cbase, common_type, &cbase_aff);
3397 tree_to_aff_combination (var, uutype, &var_aff);
3398
3399 /* We need to shift the value if we are after the increment. */
3400 if (stmt_after_increment (loop, cand, at))
3401 {
3402 aff_tree cstep_aff;
3403
3404 if (common_type != uutype)
3405 cstep_common = fold_convert (common_type, cstep);
3406 else
3407 cstep_common = cstep;
3408
3409 tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
3410 aff_combination_add (&cbase_aff, &cstep_aff);
3411 }
3412
3413 aff_combination_scale (&cbase_aff, -rat);
3414 aff_combination_add (aff, &cbase_aff);
3415 if (common_type != uutype)
3416 aff_combination_convert (aff, uutype);
3417
3418 aff_combination_scale (&var_aff, rat);
3419 aff_combination_add (aff, &var_aff);
3420
3421 return true;
3422 }
3423
3424 /* Return the type of USE. */
3425
3426 static tree
3427 get_use_type (struct iv_use *use)
3428 {
3429 tree base_type = TREE_TYPE (use->iv->base);
3430 tree type;
3431
3432 if (use->type == USE_ADDRESS)
3433 {
3434 /* The base_type may be a void pointer. Create a pointer type based on
3435 the mem_ref instead. */
3436 type = build_pointer_type (TREE_TYPE (*use->op_p));
3437 gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
3438 == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
3439 }
3440 else
3441 type = base_type;
3442
3443 return type;
3444 }
3445
3446 /* Determines the expression by that USE is expressed from induction variable
3447 CAND at statement AT in LOOP. The computation is unshared. */
3448
3449 static tree
3450 get_computation_at (struct loop *loop,
3451 struct iv_use *use, struct iv_cand *cand, gimple at)
3452 {
3453 aff_tree aff;
3454 tree type = get_use_type (use);
3455
3456 if (!get_computation_aff (loop, use, cand, at, &aff))
3457 return NULL_TREE;
3458 unshare_aff_combination (&aff);
3459 return fold_convert (type, aff_combination_to_tree (&aff));
3460 }
3461
3462 /* Determines the expression by that USE is expressed from induction variable
3463 CAND in LOOP. The computation is unshared. */
3464
3465 static tree
3466 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3467 {
3468 return get_computation_at (loop, use, cand, use->stmt);
3469 }
3470
3471 /* Adjust the cost COST for being in loop setup rather than loop body.
3472 If we're optimizing for space, the loop setup overhead is constant;
3473 if we're optimizing for speed, amortize it over the per-iteration cost. */
3474 static unsigned
3475 adjust_setup_cost (struct ivopts_data *data, unsigned cost)
3476 {
3477 if (cost == INFTY)
3478 return cost;
3479 else if (optimize_loop_for_speed_p (data->current_loop))
3480 return cost / avg_loop_niter (data->current_loop);
3481 else
3482 return cost;
3483 }
3484
3485 /* Returns true if multiplying by RATIO is allowed in an address. Test the
3486 validity for a memory reference accessing memory of mode MODE in
3487 address space AS. */
3488
3489
3490 bool
3491 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, machine_mode mode,
3492 addr_space_t as)
3493 {
3494 #define MAX_RATIO 128
3495 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3496 static vec<sbitmap> valid_mult_list;
3497 sbitmap valid_mult;
3498
3499 if (data_index >= valid_mult_list.length ())
3500 valid_mult_list.safe_grow_cleared (data_index + 1);
3501
3502 valid_mult = valid_mult_list[data_index];
3503 if (!valid_mult)
3504 {
3505 machine_mode address_mode = targetm.addr_space.address_mode (as);
3506 rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3507 rtx reg2 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3508 rtx addr, scaled;
3509 HOST_WIDE_INT i;
3510
3511 valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3512 bitmap_clear (valid_mult);
3513 scaled = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3514 addr = gen_rtx_fmt_ee (PLUS, address_mode, scaled, reg2);
3515 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3516 {
3517 XEXP (scaled, 1) = gen_int_mode (i, address_mode);
3518 if (memory_address_addr_space_p (mode, addr, as)
3519 || memory_address_addr_space_p (mode, scaled, as))
3520 bitmap_set_bit (valid_mult, i + MAX_RATIO);
3521 }
3522
3523 if (dump_file && (dump_flags & TDF_DETAILS))
3524 {
3525 fprintf (dump_file, " allowed multipliers:");
3526 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3527 if (bitmap_bit_p (valid_mult, i + MAX_RATIO))
3528 fprintf (dump_file, " %d", (int) i);
3529 fprintf (dump_file, "\n");
3530 fprintf (dump_file, "\n");
3531 }
3532
3533 valid_mult_list[data_index] = valid_mult;
3534 }
3535
3536 if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3537 return false;
3538
3539 return bitmap_bit_p (valid_mult, ratio + MAX_RATIO);
3540 }
3541
3542 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3543 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3544 variable is omitted. Compute the cost for a memory reference that accesses
3545 a memory location of mode MEM_MODE in address space AS.
3546
3547 MAY_AUTOINC is set to true if the autoincrement (increasing index by
3548 size of MEM_MODE / RATIO) is available. To make this determination, we
3549 look at the size of the increment to be made, which is given in CSTEP.
3550 CSTEP may be zero if the step is unknown.
3551 STMT_AFTER_INC is true iff the statement we're looking at is after the
3552 increment of the original biv.
3553
3554 TODO -- there must be some better way. This all is quite crude. */
3555
3556 enum ainc_type
3557 {
3558 AINC_PRE_INC, /* Pre increment. */
3559 AINC_PRE_DEC, /* Pre decrement. */
3560 AINC_POST_INC, /* Post increment. */
3561 AINC_POST_DEC, /* Post decrement. */
3562 AINC_NONE /* Also the number of auto increment types. */
3563 };
3564
3565 typedef struct address_cost_data_s
3566 {
3567 HOST_WIDE_INT min_offset, max_offset;
3568 unsigned costs[2][2][2][2];
3569 unsigned ainc_costs[AINC_NONE];
3570 } *address_cost_data;
3571
3572
3573 static comp_cost
3574 get_address_cost (bool symbol_present, bool var_present,
3575 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3576 HOST_WIDE_INT cstep, machine_mode mem_mode,
3577 addr_space_t as, bool speed,
3578 bool stmt_after_inc, bool *may_autoinc)
3579 {
3580 machine_mode address_mode = targetm.addr_space.address_mode (as);
3581 static vec<address_cost_data> address_cost_data_list;
3582 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3583 address_cost_data data;
3584 static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3585 static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3586 unsigned cost, acost, complexity;
3587 enum ainc_type autoinc_type;
3588 bool offset_p, ratio_p, autoinc;
3589 HOST_WIDE_INT s_offset, autoinc_offset, msize;
3590 unsigned HOST_WIDE_INT mask;
3591 unsigned bits;
3592
3593 if (data_index >= address_cost_data_list.length ())
3594 address_cost_data_list.safe_grow_cleared (data_index + 1);
3595
3596 data = address_cost_data_list[data_index];
3597 if (!data)
3598 {
3599 HOST_WIDE_INT i;
3600 HOST_WIDE_INT rat, off = 0;
3601 int old_cse_not_expected, width;
3602 unsigned sym_p, var_p, off_p, rat_p, add_c;
3603 rtx_insn *seq;
3604 rtx addr, base;
3605 rtx reg0, reg1;
3606
3607 data = (address_cost_data) xcalloc (1, sizeof (*data));
3608
3609 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3610
3611 width = GET_MODE_BITSIZE (address_mode) - 1;
3612 if (width > (HOST_BITS_PER_WIDE_INT - 1))
3613 width = HOST_BITS_PER_WIDE_INT - 1;
3614 addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
3615
3616 for (i = width; i >= 0; i--)
3617 {
3618 off = -((unsigned HOST_WIDE_INT) 1 << i);
3619 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3620 if (memory_address_addr_space_p (mem_mode, addr, as))
3621 break;
3622 }
3623 data->min_offset = (i == -1? 0 : off);
3624
3625 for (i = width; i >= 0; i--)
3626 {
3627 off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
3628 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3629 if (memory_address_addr_space_p (mem_mode, addr, as))
3630 break;
3631 /* For some strict-alignment targets, the offset must be naturally
3632 aligned. Try an aligned offset if mem_mode is not QImode. */
3633 off = mem_mode != QImode
3634 ? ((unsigned HOST_WIDE_INT) 1 << i)
3635 - GET_MODE_SIZE (mem_mode)
3636 : 0;
3637 if (off > 0)
3638 {
3639 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3640 if (memory_address_addr_space_p (mem_mode, addr, as))
3641 break;
3642 }
3643 }
3644 if (i == -1)
3645 off = 0;
3646 data->max_offset = off;
3647
3648 if (dump_file && (dump_flags & TDF_DETAILS))
3649 {
3650 fprintf (dump_file, "get_address_cost:\n");
3651 fprintf (dump_file, " min offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3652 GET_MODE_NAME (mem_mode),
3653 data->min_offset);
3654 fprintf (dump_file, " max offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3655 GET_MODE_NAME (mem_mode),
3656 data->max_offset);
3657 }
3658
3659 rat = 1;
3660 for (i = 2; i <= MAX_RATIO; i++)
3661 if (multiplier_allowed_in_address_p (i, mem_mode, as))
3662 {
3663 rat = i;
3664 break;
3665 }
3666
3667 /* Compute the cost of various addressing modes. */
3668 acost = 0;
3669 reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3670 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3671
3672 if (USE_LOAD_PRE_DECREMENT (mem_mode)
3673 || USE_STORE_PRE_DECREMENT (mem_mode))
3674 {
3675 addr = gen_rtx_PRE_DEC (address_mode, reg0);
3676 has_predec[mem_mode]
3677 = memory_address_addr_space_p (mem_mode, addr, as);
3678
3679 if (has_predec[mem_mode])
3680 data->ainc_costs[AINC_PRE_DEC]
3681 = address_cost (addr, mem_mode, as, speed);
3682 }
3683 if (USE_LOAD_POST_DECREMENT (mem_mode)
3684 || USE_STORE_POST_DECREMENT (mem_mode))
3685 {
3686 addr = gen_rtx_POST_DEC (address_mode, reg0);
3687 has_postdec[mem_mode]
3688 = memory_address_addr_space_p (mem_mode, addr, as);
3689
3690 if (has_postdec[mem_mode])
3691 data->ainc_costs[AINC_POST_DEC]
3692 = address_cost (addr, mem_mode, as, speed);
3693 }
3694 if (USE_LOAD_PRE_INCREMENT (mem_mode)
3695 || USE_STORE_PRE_DECREMENT (mem_mode))
3696 {
3697 addr = gen_rtx_PRE_INC (address_mode, reg0);
3698 has_preinc[mem_mode]
3699 = memory_address_addr_space_p (mem_mode, addr, as);
3700
3701 if (has_preinc[mem_mode])
3702 data->ainc_costs[AINC_PRE_INC]
3703 = address_cost (addr, mem_mode, as, speed);
3704 }
3705 if (USE_LOAD_POST_INCREMENT (mem_mode)
3706 || USE_STORE_POST_INCREMENT (mem_mode))
3707 {
3708 addr = gen_rtx_POST_INC (address_mode, reg0);
3709 has_postinc[mem_mode]
3710 = memory_address_addr_space_p (mem_mode, addr, as);
3711
3712 if (has_postinc[mem_mode])
3713 data->ainc_costs[AINC_POST_INC]
3714 = address_cost (addr, mem_mode, as, speed);
3715 }
3716 for (i = 0; i < 16; i++)
3717 {
3718 sym_p = i & 1;
3719 var_p = (i >> 1) & 1;
3720 off_p = (i >> 2) & 1;
3721 rat_p = (i >> 3) & 1;
3722
3723 addr = reg0;
3724 if (rat_p)
3725 addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
3726 gen_int_mode (rat, address_mode));
3727
3728 if (var_p)
3729 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
3730
3731 if (sym_p)
3732 {
3733 base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
3734 /* ??? We can run into trouble with some backends by presenting
3735 it with symbols which haven't been properly passed through
3736 targetm.encode_section_info. By setting the local bit, we
3737 enhance the probability of things working. */
3738 SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3739
3740 if (off_p)
3741 base = gen_rtx_fmt_e (CONST, address_mode,
3742 gen_rtx_fmt_ee
3743 (PLUS, address_mode, base,
3744 gen_int_mode (off, address_mode)));
3745 }
3746 else if (off_p)
3747 base = gen_int_mode (off, address_mode);
3748 else
3749 base = NULL_RTX;
3750
3751 if (base)
3752 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
3753
3754 start_sequence ();
3755 /* To avoid splitting addressing modes, pretend that no cse will
3756 follow. */
3757 old_cse_not_expected = cse_not_expected;
3758 cse_not_expected = true;
3759 addr = memory_address_addr_space (mem_mode, addr, as);
3760 cse_not_expected = old_cse_not_expected;
3761 seq = get_insns ();
3762 end_sequence ();
3763
3764 acost = seq_cost (seq, speed);
3765 acost += address_cost (addr, mem_mode, as, speed);
3766
3767 if (!acost)
3768 acost = 1;
3769 data->costs[sym_p][var_p][off_p][rat_p] = acost;
3770 }
3771
3772 /* On some targets, it is quite expensive to load symbol to a register,
3773 which makes addresses that contain symbols look much more expensive.
3774 However, the symbol will have to be loaded in any case before the
3775 loop (and quite likely we have it in register already), so it does not
3776 make much sense to penalize them too heavily. So make some final
3777 tweaks for the SYMBOL_PRESENT modes:
3778
3779 If VAR_PRESENT is false, and the mode obtained by changing symbol to
3780 var is cheaper, use this mode with small penalty.
3781 If VAR_PRESENT is true, try whether the mode with
3782 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3783 if this is the case, use it. */
3784 add_c = add_cost (speed, address_mode);
3785 for (i = 0; i < 8; i++)
3786 {
3787 var_p = i & 1;
3788 off_p = (i >> 1) & 1;
3789 rat_p = (i >> 2) & 1;
3790
3791 acost = data->costs[0][1][off_p][rat_p] + 1;
3792 if (var_p)
3793 acost += add_c;
3794
3795 if (acost < data->costs[1][var_p][off_p][rat_p])
3796 data->costs[1][var_p][off_p][rat_p] = acost;
3797 }
3798
3799 if (dump_file && (dump_flags & TDF_DETAILS))
3800 {
3801 fprintf (dump_file, "Address costs:\n");
3802
3803 for (i = 0; i < 16; i++)
3804 {
3805 sym_p = i & 1;
3806 var_p = (i >> 1) & 1;
3807 off_p = (i >> 2) & 1;
3808 rat_p = (i >> 3) & 1;
3809
3810 fprintf (dump_file, " ");
3811 if (sym_p)
3812 fprintf (dump_file, "sym + ");
3813 if (var_p)
3814 fprintf (dump_file, "var + ");
3815 if (off_p)
3816 fprintf (dump_file, "cst + ");
3817 if (rat_p)
3818 fprintf (dump_file, "rat * ");
3819
3820 acost = data->costs[sym_p][var_p][off_p][rat_p];
3821 fprintf (dump_file, "index costs %d\n", acost);
3822 }
3823 if (has_predec[mem_mode] || has_postdec[mem_mode]
3824 || has_preinc[mem_mode] || has_postinc[mem_mode])
3825 fprintf (dump_file, " May include autoinc/dec\n");
3826 fprintf (dump_file, "\n");
3827 }
3828
3829 address_cost_data_list[data_index] = data;
3830 }
3831
3832 bits = GET_MODE_BITSIZE (address_mode);
3833 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3834 offset &= mask;
3835 if ((offset >> (bits - 1) & 1))
3836 offset |= ~mask;
3837 s_offset = offset;
3838
3839 autoinc = false;
3840 autoinc_type = AINC_NONE;
3841 msize = GET_MODE_SIZE (mem_mode);
3842 autoinc_offset = offset;
3843 if (stmt_after_inc)
3844 autoinc_offset += ratio * cstep;
3845 if (symbol_present || var_present || ratio != 1)
3846 autoinc = false;
3847 else
3848 {
3849 if (has_postinc[mem_mode] && autoinc_offset == 0
3850 && msize == cstep)
3851 autoinc_type = AINC_POST_INC;
3852 else if (has_postdec[mem_mode] && autoinc_offset == 0
3853 && msize == -cstep)
3854 autoinc_type = AINC_POST_DEC;
3855 else if (has_preinc[mem_mode] && autoinc_offset == msize
3856 && msize == cstep)
3857 autoinc_type = AINC_PRE_INC;
3858 else if (has_predec[mem_mode] && autoinc_offset == -msize
3859 && msize == -cstep)
3860 autoinc_type = AINC_PRE_DEC;
3861
3862 if (autoinc_type != AINC_NONE)
3863 autoinc = true;
3864 }
3865
3866 cost = 0;
3867 offset_p = (s_offset != 0
3868 && data->min_offset <= s_offset
3869 && s_offset <= data->max_offset);
3870 ratio_p = (ratio != 1
3871 && multiplier_allowed_in_address_p (ratio, mem_mode, as));
3872
3873 if (ratio != 1 && !ratio_p)
3874 cost += mult_by_coeff_cost (ratio, address_mode, speed);
3875
3876 if (s_offset && !offset_p && !symbol_present)
3877 cost += add_cost (speed, address_mode);
3878
3879 if (may_autoinc)
3880 *may_autoinc = autoinc;
3881 if (autoinc)
3882 acost = data->ainc_costs[autoinc_type];
3883 else
3884 acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
3885 complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3886 return new_cost (cost + acost, complexity);
3887 }
3888
3889 /* Calculate the SPEED or size cost of shiftadd EXPR in MODE. MULT is the
3890 the EXPR operand holding the shift. COST0 and COST1 are the costs for
3891 calculating the operands of EXPR. Returns true if successful, and returns
3892 the cost in COST. */
3893
3894 static bool
3895 get_shiftadd_cost (tree expr, machine_mode mode, comp_cost cost0,
3896 comp_cost cost1, tree mult, bool speed, comp_cost *cost)
3897 {
3898 comp_cost res;
3899 tree op1 = TREE_OPERAND (expr, 1);
3900 tree cst = TREE_OPERAND (mult, 1);
3901 tree multop = TREE_OPERAND (mult, 0);
3902 int m = exact_log2 (int_cst_value (cst));
3903 int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
3904 int as_cost, sa_cost;
3905 bool mult_in_op1;
3906
3907 if (!(m >= 0 && m < maxm))
3908 return false;
3909
3910 mult_in_op1 = operand_equal_p (op1, mult, 0);
3911
3912 as_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
3913
3914 /* If the target has a cheap shift-and-add or shift-and-sub instruction,
3915 use that in preference to a shift insn followed by an add insn. */
3916 sa_cost = (TREE_CODE (expr) != MINUS_EXPR
3917 ? shiftadd_cost (speed, mode, m)
3918 : (mult_in_op1
3919 ? shiftsub1_cost (speed, mode, m)
3920 : shiftsub0_cost (speed, mode, m)));
3921
3922 res = new_cost (MIN (as_cost, sa_cost), 0);
3923 res = add_costs (res, mult_in_op1 ? cost0 : cost1);
3924
3925 STRIP_NOPS (multop);
3926 if (!is_gimple_val (multop))
3927 res = add_costs (res, force_expr_to_var_cost (multop, speed));
3928
3929 *cost = res;
3930 return true;
3931 }
3932
3933 /* Estimates cost of forcing expression EXPR into a variable. */
3934
3935 static comp_cost
3936 force_expr_to_var_cost (tree expr, bool speed)
3937 {
3938 static bool costs_initialized = false;
3939 static unsigned integer_cost [2];
3940 static unsigned symbol_cost [2];
3941 static unsigned address_cost [2];
3942 tree op0, op1;
3943 comp_cost cost0, cost1, cost;
3944 machine_mode mode;
3945
3946 if (!costs_initialized)
3947 {
3948 tree type = build_pointer_type (integer_type_node);
3949 tree var, addr;
3950 rtx x;
3951 int i;
3952
3953 var = create_tmp_var_raw (integer_type_node, "test_var");
3954 TREE_STATIC (var) = 1;
3955 x = produce_memory_decl_rtl (var, NULL);
3956 SET_DECL_RTL (var, x);
3957
3958 addr = build1 (ADDR_EXPR, type, var);
3959
3960
3961 for (i = 0; i < 2; i++)
3962 {
3963 integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
3964 2000), i);
3965
3966 symbol_cost[i] = computation_cost (addr, i) + 1;
3967
3968 address_cost[i]
3969 = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
3970 if (dump_file && (dump_flags & TDF_DETAILS))
3971 {
3972 fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
3973 fprintf (dump_file, " integer %d\n", (int) integer_cost[i]);
3974 fprintf (dump_file, " symbol %d\n", (int) symbol_cost[i]);
3975 fprintf (dump_file, " address %d\n", (int) address_cost[i]);
3976 fprintf (dump_file, " other %d\n", (int) target_spill_cost[i]);
3977 fprintf (dump_file, "\n");
3978 }
3979 }
3980
3981 costs_initialized = true;
3982 }
3983
3984 STRIP_NOPS (expr);
3985
3986 if (SSA_VAR_P (expr))
3987 return no_cost;
3988
3989 if (is_gimple_min_invariant (expr))
3990 {
3991 if (TREE_CODE (expr) == INTEGER_CST)
3992 return new_cost (integer_cost [speed], 0);
3993
3994 if (TREE_CODE (expr) == ADDR_EXPR)
3995 {
3996 tree obj = TREE_OPERAND (expr, 0);
3997
3998 if (TREE_CODE (obj) == VAR_DECL
3999 || TREE_CODE (obj) == PARM_DECL
4000 || TREE_CODE (obj) == RESULT_DECL)
4001 return new_cost (symbol_cost [speed], 0);
4002 }
4003
4004 return new_cost (address_cost [speed], 0);
4005 }
4006
4007 switch (TREE_CODE (expr))
4008 {
4009 case POINTER_PLUS_EXPR:
4010 case PLUS_EXPR:
4011 case MINUS_EXPR:
4012 case MULT_EXPR:
4013 op0 = TREE_OPERAND (expr, 0);
4014 op1 = TREE_OPERAND (expr, 1);
4015 STRIP_NOPS (op0);
4016 STRIP_NOPS (op1);
4017 break;
4018
4019 CASE_CONVERT:
4020 case NEGATE_EXPR:
4021 op0 = TREE_OPERAND (expr, 0);
4022 STRIP_NOPS (op0);
4023 op1 = NULL_TREE;
4024 break;
4025
4026 default:
4027 /* Just an arbitrary value, FIXME. */
4028 return new_cost (target_spill_cost[speed], 0);
4029 }
4030
4031 if (op0 == NULL_TREE
4032 || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
4033 cost0 = no_cost;
4034 else
4035 cost0 = force_expr_to_var_cost (op0, speed);
4036
4037 if (op1 == NULL_TREE
4038 || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
4039 cost1 = no_cost;
4040 else
4041 cost1 = force_expr_to_var_cost (op1, speed);
4042
4043 mode = TYPE_MODE (TREE_TYPE (expr));
4044 switch (TREE_CODE (expr))
4045 {
4046 case POINTER_PLUS_EXPR:
4047 case PLUS_EXPR:
4048 case MINUS_EXPR:
4049 case NEGATE_EXPR:
4050 cost = new_cost (add_cost (speed, mode), 0);
4051 if (TREE_CODE (expr) != NEGATE_EXPR)
4052 {
4053 tree mult = NULL_TREE;
4054 comp_cost sa_cost;
4055 if (TREE_CODE (op1) == MULT_EXPR)
4056 mult = op1;
4057 else if (TREE_CODE (op0) == MULT_EXPR)
4058 mult = op0;
4059
4060 if (mult != NULL_TREE
4061 && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
4062 && get_shiftadd_cost (expr, mode, cost0, cost1, mult,
4063 speed, &sa_cost))
4064 return sa_cost;
4065 }
4066 break;
4067
4068 CASE_CONVERT:
4069 {
4070 tree inner_mode, outer_mode;
4071 outer_mode = TREE_TYPE (expr);
4072 inner_mode = TREE_TYPE (op0);
4073 cost = new_cost (convert_cost (TYPE_MODE (outer_mode),
4074 TYPE_MODE (inner_mode), speed), 0);
4075 }
4076 break;
4077
4078 case MULT_EXPR:
4079 if (cst_and_fits_in_hwi (op0))
4080 cost = new_cost (mult_by_coeff_cost (int_cst_value (op0),
4081 mode, speed), 0);
4082 else if (cst_and_fits_in_hwi (op1))
4083 cost = new_cost (mult_by_coeff_cost (int_cst_value (op1),
4084 mode, speed), 0);
4085 else
4086 return new_cost (target_spill_cost [speed], 0);
4087 break;
4088
4089 default:
4090 gcc_unreachable ();
4091 }
4092
4093 cost = add_costs (cost, cost0);
4094 cost = add_costs (cost, cost1);
4095
4096 /* Bound the cost by target_spill_cost. The parts of complicated
4097 computations often are either loop invariant or at least can
4098 be shared between several iv uses, so letting this grow without
4099 limits would not give reasonable results. */
4100 if (cost.cost > (int) target_spill_cost [speed])
4101 cost.cost = target_spill_cost [speed];
4102
4103 return cost;
4104 }
4105
4106 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
4107 invariants the computation depends on. */
4108
4109 static comp_cost
4110 force_var_cost (struct ivopts_data *data,
4111 tree expr, bitmap *depends_on)
4112 {
4113 if (depends_on)
4114 {
4115 fd_ivopts_data = data;
4116 walk_tree (&expr, find_depends, depends_on, NULL);
4117 }
4118
4119 return force_expr_to_var_cost (expr, data->speed);
4120 }
4121
4122 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
4123 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
4124 to false if the corresponding part is missing. DEPENDS_ON is a set of the
4125 invariants the computation depends on. */
4126
4127 static comp_cost
4128 split_address_cost (struct ivopts_data *data,
4129 tree addr, bool *symbol_present, bool *var_present,
4130 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4131 {
4132 tree core;
4133 HOST_WIDE_INT bitsize;
4134 HOST_WIDE_INT bitpos;
4135 tree toffset;
4136 machine_mode mode;
4137 int unsignedp, volatilep;
4138
4139 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
4140 &unsignedp, &volatilep, false);
4141
4142 if (toffset != 0
4143 || bitpos % BITS_PER_UNIT != 0
4144 || TREE_CODE (core) != VAR_DECL)
4145 {
4146 *symbol_present = false;
4147 *var_present = true;
4148 fd_ivopts_data = data;
4149 walk_tree (&addr, find_depends, depends_on, NULL);
4150 return new_cost (target_spill_cost[data->speed], 0);
4151 }
4152
4153 *offset += bitpos / BITS_PER_UNIT;
4154 if (TREE_STATIC (core)
4155 || DECL_EXTERNAL (core))
4156 {
4157 *symbol_present = true;
4158 *var_present = false;
4159 return no_cost;
4160 }
4161
4162 *symbol_present = false;
4163 *var_present = true;
4164 return no_cost;
4165 }
4166
4167 /* Estimates cost of expressing difference of addresses E1 - E2 as
4168 var + symbol + offset. The value of offset is added to OFFSET,
4169 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
4170 part is missing. DEPENDS_ON is a set of the invariants the computation
4171 depends on. */
4172
4173 static comp_cost
4174 ptr_difference_cost (struct ivopts_data *data,
4175 tree e1, tree e2, bool *symbol_present, bool *var_present,
4176 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4177 {
4178 HOST_WIDE_INT diff = 0;
4179 aff_tree aff_e1, aff_e2;
4180 tree type;
4181
4182 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
4183
4184 if (ptr_difference_const (e1, e2, &diff))
4185 {
4186 *offset += diff;
4187 *symbol_present = false;
4188 *var_present = false;
4189 return no_cost;
4190 }
4191
4192 if (integer_zerop (e2))
4193 return split_address_cost (data, TREE_OPERAND (e1, 0),
4194 symbol_present, var_present, offset, depends_on);
4195
4196 *symbol_present = false;
4197 *var_present = true;
4198
4199 type = signed_type_for (TREE_TYPE (e1));
4200 tree_to_aff_combination (e1, type, &aff_e1);
4201 tree_to_aff_combination (e2, type, &aff_e2);
4202 aff_combination_scale (&aff_e2, -1);
4203 aff_combination_add (&aff_e1, &aff_e2);
4204
4205 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
4206 }
4207
4208 /* Estimates cost of expressing difference E1 - E2 as
4209 var + symbol + offset. The value of offset is added to OFFSET,
4210 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
4211 part is missing. DEPENDS_ON is a set of the invariants the computation
4212 depends on. */
4213
4214 static comp_cost
4215 difference_cost (struct ivopts_data *data,
4216 tree e1, tree e2, bool *symbol_present, bool *var_present,
4217 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4218 {
4219 machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
4220 unsigned HOST_WIDE_INT off1, off2;
4221 aff_tree aff_e1, aff_e2;
4222 tree type;
4223
4224 e1 = strip_offset (e1, &off1);
4225 e2 = strip_offset (e2, &off2);
4226 *offset += off1 - off2;
4227
4228 STRIP_NOPS (e1);
4229 STRIP_NOPS (e2);
4230
4231 if (TREE_CODE (e1) == ADDR_EXPR)
4232 return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
4233 offset, depends_on);
4234 *symbol_present = false;
4235
4236 if (operand_equal_p (e1, e2, 0))
4237 {
4238 *var_present = false;
4239 return no_cost;
4240 }
4241
4242 *var_present = true;
4243
4244 if (integer_zerop (e2))
4245 return force_var_cost (data, e1, depends_on);
4246
4247 if (integer_zerop (e1))
4248 {
4249 comp_cost cost = force_var_cost (data, e2, depends_on);
4250 cost.cost += mult_by_coeff_cost (-1, mode, data->speed);
4251 return cost;
4252 }
4253
4254 type = signed_type_for (TREE_TYPE (e1));
4255 tree_to_aff_combination (e1, type, &aff_e1);
4256 tree_to_aff_combination (e2, type, &aff_e2);
4257 aff_combination_scale (&aff_e2, -1);
4258 aff_combination_add (&aff_e1, &aff_e2);
4259
4260 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
4261 }
4262
4263 /* Returns true if AFF1 and AFF2 are identical. */
4264
4265 static bool
4266 compare_aff_trees (aff_tree *aff1, aff_tree *aff2)
4267 {
4268 unsigned i;
4269
4270 if (aff1->n != aff2->n)
4271 return false;
4272
4273 for (i = 0; i < aff1->n; i++)
4274 {
4275 if (aff1->elts[i].coef != aff2->elts[i].coef)
4276 return false;
4277
4278 if (!operand_equal_p (aff1->elts[i].val, aff2->elts[i].val, 0))
4279 return false;
4280 }
4281 return true;
4282 }
4283
4284 /* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id. */
4285
4286 static int
4287 get_expr_id (struct ivopts_data *data, tree expr)
4288 {
4289 struct iv_inv_expr_ent ent;
4290 struct iv_inv_expr_ent **slot;
4291
4292 ent.expr = expr;
4293 ent.hash = iterative_hash_expr (expr, 0);
4294 slot = data->inv_expr_tab->find_slot (&ent, INSERT);
4295 if (*slot)
4296 return (*slot)->id;
4297
4298 *slot = XNEW (struct iv_inv_expr_ent);
4299 (*slot)->expr = expr;
4300 (*slot)->hash = ent.hash;
4301 (*slot)->id = data->inv_expr_id++;
4302 return (*slot)->id;
4303 }
4304
4305 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
4306 requires a new compiler generated temporary. Returns -1 otherwise.
4307 ADDRESS_P is a flag indicating if the expression is for address
4308 computation. */
4309
4310 static int
4311 get_loop_invariant_expr_id (struct ivopts_data *data, tree ubase,
4312 tree cbase, HOST_WIDE_INT ratio,
4313 bool address_p)
4314 {
4315 aff_tree ubase_aff, cbase_aff;
4316 tree expr, ub, cb;
4317
4318 STRIP_NOPS (ubase);
4319 STRIP_NOPS (cbase);
4320 ub = ubase;
4321 cb = cbase;
4322
4323 if ((TREE_CODE (ubase) == INTEGER_CST)
4324 && (TREE_CODE (cbase) == INTEGER_CST))
4325 return -1;
4326
4327 /* Strips the constant part. */
4328 if (TREE_CODE (ubase) == PLUS_EXPR
4329 || TREE_CODE (ubase) == MINUS_EXPR
4330 || TREE_CODE (ubase) == POINTER_PLUS_EXPR)
4331 {
4332 if (TREE_CODE (TREE_OPERAND (ubase, 1)) == INTEGER_CST)
4333 ubase = TREE_OPERAND (ubase, 0);
4334 }
4335
4336 /* Strips the constant part. */
4337 if (TREE_CODE (cbase) == PLUS_EXPR
4338 || TREE_CODE (cbase) == MINUS_EXPR
4339 || TREE_CODE (cbase) == POINTER_PLUS_EXPR)
4340 {
4341 if (TREE_CODE (TREE_OPERAND (cbase, 1)) == INTEGER_CST)
4342 cbase = TREE_OPERAND (cbase, 0);
4343 }
4344
4345 if (address_p)
4346 {
4347 if (((TREE_CODE (ubase) == SSA_NAME)
4348 || (TREE_CODE (ubase) == ADDR_EXPR
4349 && is_gimple_min_invariant (ubase)))
4350 && (TREE_CODE (cbase) == INTEGER_CST))
4351 return -1;
4352
4353 if (((TREE_CODE (cbase) == SSA_NAME)
4354 || (TREE_CODE (cbase) == ADDR_EXPR
4355 && is_gimple_min_invariant (cbase)))
4356 && (TREE_CODE (ubase) == INTEGER_CST))
4357 return -1;
4358 }
4359
4360 if (ratio == 1)
4361 {
4362 if (operand_equal_p (ubase, cbase, 0))
4363 return -1;
4364
4365 if (TREE_CODE (ubase) == ADDR_EXPR
4366 && TREE_CODE (cbase) == ADDR_EXPR)
4367 {
4368 tree usym, csym;
4369
4370 usym = TREE_OPERAND (ubase, 0);
4371 csym = TREE_OPERAND (cbase, 0);
4372 if (TREE_CODE (usym) == ARRAY_REF)
4373 {
4374 tree ind = TREE_OPERAND (usym, 1);
4375 if (TREE_CODE (ind) == INTEGER_CST
4376 && tree_fits_shwi_p (ind)
4377 && tree_to_shwi (ind) == 0)
4378 usym = TREE_OPERAND (usym, 0);
4379 }
4380 if (TREE_CODE (csym) == ARRAY_REF)
4381 {
4382 tree ind = TREE_OPERAND (csym, 1);
4383 if (TREE_CODE (ind) == INTEGER_CST
4384 && tree_fits_shwi_p (ind)
4385 && tree_to_shwi (ind) == 0)
4386 csym = TREE_OPERAND (csym, 0);
4387 }
4388 if (operand_equal_p (usym, csym, 0))
4389 return -1;
4390 }
4391 /* Now do more complex comparison */
4392 tree_to_aff_combination (ubase, TREE_TYPE (ubase), &ubase_aff);
4393 tree_to_aff_combination (cbase, TREE_TYPE (cbase), &cbase_aff);
4394 if (compare_aff_trees (&ubase_aff, &cbase_aff))
4395 return -1;
4396 }
4397
4398 tree_to_aff_combination (ub, TREE_TYPE (ub), &ubase_aff);
4399 tree_to_aff_combination (cb, TREE_TYPE (cb), &cbase_aff);
4400
4401 aff_combination_scale (&cbase_aff, -1 * ratio);
4402 aff_combination_add (&ubase_aff, &cbase_aff);
4403 expr = aff_combination_to_tree (&ubase_aff);
4404 return get_expr_id (data, expr);
4405 }
4406
4407
4408
4409 /* Determines the cost of the computation by that USE is expressed
4410 from induction variable CAND. If ADDRESS_P is true, we just need
4411 to create an address from it, otherwise we want to get it into
4412 register. A set of invariants we depend on is stored in
4413 DEPENDS_ON. AT is the statement at that the value is computed.
4414 If CAN_AUTOINC is nonnull, use it to record whether autoinc
4415 addressing is likely. */
4416
4417 static comp_cost
4418 get_computation_cost_at (struct ivopts_data *data,
4419 struct iv_use *use, struct iv_cand *cand,
4420 bool address_p, bitmap *depends_on, gimple at,
4421 bool *can_autoinc,
4422 int *inv_expr_id)
4423 {
4424 tree ubase = use->iv->base, ustep = use->iv->step;
4425 tree cbase, cstep;
4426 tree utype = TREE_TYPE (ubase), ctype;
4427 unsigned HOST_WIDE_INT cstepi, offset = 0;
4428 HOST_WIDE_INT ratio, aratio;
4429 bool var_present, symbol_present, stmt_is_after_inc;
4430 comp_cost cost;
4431 widest_int rat;
4432 bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4433 machine_mode mem_mode = (address_p
4434 ? TYPE_MODE (TREE_TYPE (*use->op_p))
4435 : VOIDmode);
4436
4437 *depends_on = NULL;
4438
4439 /* Only consider real candidates. */
4440 if (!cand->iv)
4441 return infinite_cost;
4442
4443 cbase = cand->iv->base;
4444 cstep = cand->iv->step;
4445 ctype = TREE_TYPE (cbase);
4446
4447 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4448 {
4449 /* We do not have a precision to express the values of use. */
4450 return infinite_cost;
4451 }
4452
4453 if (address_p
4454 || (use->iv->base_object
4455 && cand->iv->base_object
4456 && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4457 && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4458 {
4459 /* Do not try to express address of an object with computation based
4460 on address of a different object. This may cause problems in rtl
4461 level alias analysis (that does not expect this to be happening,
4462 as this is illegal in C), and would be unlikely to be useful
4463 anyway. */
4464 if (use->iv->base_object
4465 && cand->iv->base_object
4466 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4467 return infinite_cost;
4468 }
4469
4470 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4471 {
4472 /* TODO -- add direct handling of this case. */
4473 goto fallback;
4474 }
4475
4476 /* CSTEPI is removed from the offset in case statement is after the
4477 increment. If the step is not constant, we use zero instead.
4478 This is a bit imprecise (there is the extra addition), but
4479 redundancy elimination is likely to transform the code so that
4480 it uses value of the variable before increment anyway,
4481 so it is not that much unrealistic. */
4482 if (cst_and_fits_in_hwi (cstep))
4483 cstepi = int_cst_value (cstep);
4484 else
4485 cstepi = 0;
4486
4487 if (!constant_multiple_of (ustep, cstep, &rat))
4488 return infinite_cost;
4489
4490 if (wi::fits_shwi_p (rat))
4491 ratio = rat.to_shwi ();
4492 else
4493 return infinite_cost;
4494
4495 STRIP_NOPS (cbase);
4496 ctype = TREE_TYPE (cbase);
4497
4498 stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
4499
4500 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
4501 or ratio == 1, it is better to handle this like
4502
4503 ubase - ratio * cbase + ratio * var
4504
4505 (also holds in the case ratio == -1, TODO. */
4506
4507 if (cst_and_fits_in_hwi (cbase))
4508 {
4509 offset = - ratio * (unsigned HOST_WIDE_INT) int_cst_value (cbase);
4510 cost = difference_cost (data,
4511 ubase, build_int_cst (utype, 0),
4512 &symbol_present, &var_present, &offset,
4513 depends_on);
4514 cost.cost /= avg_loop_niter (data->current_loop);
4515 }
4516 else if (ratio == 1)
4517 {
4518 tree real_cbase = cbase;
4519
4520 /* Check to see if any adjustment is needed. */
4521 if (cstepi == 0 && stmt_is_after_inc)
4522 {
4523 aff_tree real_cbase_aff;
4524 aff_tree cstep_aff;
4525
4526 tree_to_aff_combination (cbase, TREE_TYPE (real_cbase),
4527 &real_cbase_aff);
4528 tree_to_aff_combination (cstep, TREE_TYPE (cstep), &cstep_aff);
4529
4530 aff_combination_add (&real_cbase_aff, &cstep_aff);
4531 real_cbase = aff_combination_to_tree (&real_cbase_aff);
4532 }
4533
4534 cost = difference_cost (data,
4535 ubase, real_cbase,
4536 &symbol_present, &var_present, &offset,
4537 depends_on);
4538 cost.cost /= avg_loop_niter (data->current_loop);
4539 }
4540 else if (address_p
4541 && !POINTER_TYPE_P (ctype)
4542 && multiplier_allowed_in_address_p
4543 (ratio, mem_mode,
4544 TYPE_ADDR_SPACE (TREE_TYPE (utype))))
4545 {
4546 cbase
4547 = fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio));
4548 cost = difference_cost (data,
4549 ubase, cbase,
4550 &symbol_present, &var_present, &offset,
4551 depends_on);
4552 cost.cost /= avg_loop_niter (data->current_loop);
4553 }
4554 else
4555 {
4556 cost = force_var_cost (data, cbase, depends_on);
4557 cost = add_costs (cost,
4558 difference_cost (data,
4559 ubase, build_int_cst (utype, 0),
4560 &symbol_present, &var_present,
4561 &offset, depends_on));
4562 cost.cost /= avg_loop_niter (data->current_loop);
4563 cost.cost += add_cost (data->speed, TYPE_MODE (ctype));
4564 }
4565
4566 /* Set of invariants depended on by sub use has already been computed
4567 for the first use in the group. */
4568 if (use->sub_id)
4569 {
4570 cost.cost = 0;
4571 if (depends_on && *depends_on)
4572 bitmap_clear (*depends_on);
4573 }
4574 else if (inv_expr_id)
4575 {
4576 *inv_expr_id =
4577 get_loop_invariant_expr_id (data, ubase, cbase, ratio, address_p);
4578 /* Clear depends on. */
4579 if (*inv_expr_id != -1 && depends_on && *depends_on)
4580 bitmap_clear (*depends_on);
4581 }
4582
4583 /* If we are after the increment, the value of the candidate is higher by
4584 one iteration. */
4585 if (stmt_is_after_inc)
4586 offset -= ratio * cstepi;
4587
4588 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4589 (symbol/var1/const parts may be omitted). If we are looking for an
4590 address, find the cost of addressing this. */
4591 if (address_p)
4592 return add_costs (cost,
4593 get_address_cost (symbol_present, var_present,
4594 offset, ratio, cstepi,
4595 mem_mode,
4596 TYPE_ADDR_SPACE (TREE_TYPE (utype)),
4597 speed, stmt_is_after_inc,
4598 can_autoinc));
4599
4600 /* Otherwise estimate the costs for computing the expression. */
4601 if (!symbol_present && !var_present && !offset)
4602 {
4603 if (ratio != 1)
4604 cost.cost += mult_by_coeff_cost (ratio, TYPE_MODE (ctype), speed);
4605 return cost;
4606 }
4607
4608 /* Symbol + offset should be compile-time computable so consider that they
4609 are added once to the variable, if present. */
4610 if (var_present && (symbol_present || offset))
4611 cost.cost += adjust_setup_cost (data,
4612 add_cost (speed, TYPE_MODE (ctype)));
4613
4614 /* Having offset does not affect runtime cost in case it is added to
4615 symbol, but it increases complexity. */
4616 if (offset)
4617 cost.complexity++;
4618
4619 cost.cost += add_cost (speed, TYPE_MODE (ctype));
4620
4621 aratio = ratio > 0 ? ratio : -ratio;
4622 if (aratio != 1)
4623 cost.cost += mult_by_coeff_cost (aratio, TYPE_MODE (ctype), speed);
4624 return cost;
4625
4626 fallback:
4627 if (can_autoinc)
4628 *can_autoinc = false;
4629
4630 {
4631 /* Just get the expression, expand it and measure the cost. */
4632 tree comp = get_computation_at (data->current_loop, use, cand, at);
4633
4634 if (!comp)
4635 return infinite_cost;
4636
4637 if (address_p)
4638 comp = build_simple_mem_ref (comp);
4639
4640 return new_cost (computation_cost (comp, speed), 0);
4641 }
4642 }
4643
4644 /* Determines the cost of the computation by that USE is expressed
4645 from induction variable CAND. If ADDRESS_P is true, we just need
4646 to create an address from it, otherwise we want to get it into
4647 register. A set of invariants we depend on is stored in
4648 DEPENDS_ON. If CAN_AUTOINC is nonnull, use it to record whether
4649 autoinc addressing is likely. */
4650
4651 static comp_cost
4652 get_computation_cost (struct ivopts_data *data,
4653 struct iv_use *use, struct iv_cand *cand,
4654 bool address_p, bitmap *depends_on,
4655 bool *can_autoinc, int *inv_expr_id)
4656 {
4657 return get_computation_cost_at (data,
4658 use, cand, address_p, depends_on, use->stmt,
4659 can_autoinc, inv_expr_id);
4660 }
4661
4662 /* Determines cost of basing replacement of USE on CAND in a generic
4663 expression. */
4664
4665 static bool
4666 determine_use_iv_cost_generic (struct ivopts_data *data,
4667 struct iv_use *use, struct iv_cand *cand)
4668 {
4669 bitmap depends_on;
4670 comp_cost cost;
4671 int inv_expr_id = -1;
4672
4673 /* The simple case first -- if we need to express value of the preserved
4674 original biv, the cost is 0. This also prevents us from counting the
4675 cost of increment twice -- once at this use and once in the cost of
4676 the candidate. */
4677 if (cand->pos == IP_ORIGINAL
4678 && cand->incremented_at == use->stmt)
4679 {
4680 set_use_iv_cost (data, use, cand, no_cost, NULL, NULL_TREE,
4681 ERROR_MARK, -1);
4682 return true;
4683 }
4684
4685 cost = get_computation_cost (data, use, cand, false, &depends_on,
4686 NULL, &inv_expr_id);
4687
4688 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4689 inv_expr_id);
4690
4691 return !infinite_cost_p (cost);
4692 }
4693
4694 /* Determines cost of basing replacement of USE on CAND in an address. */
4695
4696 static bool
4697 determine_use_iv_cost_address (struct ivopts_data *data,
4698 struct iv_use *use, struct iv_cand *cand)
4699 {
4700 bitmap depends_on;
4701 bool can_autoinc;
4702 int inv_expr_id = -1;
4703 struct iv_use *sub_use;
4704 comp_cost sub_cost;
4705 comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
4706 &can_autoinc, &inv_expr_id);
4707
4708 if (cand->ainc_use == use)
4709 {
4710 if (can_autoinc)
4711 cost.cost -= cand->cost_step;
4712 /* If we generated the candidate solely for exploiting autoincrement
4713 opportunities, and it turns out it can't be used, set the cost to
4714 infinity to make sure we ignore it. */
4715 else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
4716 cost = infinite_cost;
4717 }
4718 for (sub_use = use->next;
4719 sub_use && !infinite_cost_p (cost);
4720 sub_use = sub_use->next)
4721 {
4722 sub_cost = get_computation_cost (data, sub_use, cand, true, &depends_on,
4723 &can_autoinc, &inv_expr_id);
4724 cost = add_costs (cost, sub_cost);
4725 }
4726
4727 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4728 inv_expr_id);
4729
4730 return !infinite_cost_p (cost);
4731 }
4732
4733 /* Computes value of candidate CAND at position AT in iteration NITER, and
4734 stores it to VAL. */
4735
4736 static void
4737 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple at, tree niter,
4738 aff_tree *val)
4739 {
4740 aff_tree step, delta, nit;
4741 struct iv *iv = cand->iv;
4742 tree type = TREE_TYPE (iv->base);
4743 tree steptype = type;
4744 if (POINTER_TYPE_P (type))
4745 steptype = sizetype;
4746 steptype = unsigned_type_for (type);
4747
4748 tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step);
4749 aff_combination_convert (&step, steptype);
4750 tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
4751 aff_combination_convert (&nit, steptype);
4752 aff_combination_mult (&nit, &step, &delta);
4753 if (stmt_after_increment (loop, cand, at))
4754 aff_combination_add (&delta, &step);
4755
4756 tree_to_aff_combination (iv->base, type, val);
4757 if (!POINTER_TYPE_P (type))
4758 aff_combination_convert (val, steptype);
4759 aff_combination_add (val, &delta);
4760 }
4761
4762 /* Returns period of induction variable iv. */
4763
4764 static tree
4765 iv_period (struct iv *iv)
4766 {
4767 tree step = iv->step, period, type;
4768 tree pow2div;
4769
4770 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4771
4772 type = unsigned_type_for (TREE_TYPE (step));
4773 /* Period of the iv is lcm (step, type_range)/step -1,
4774 i.e., N*type_range/step - 1. Since type range is power
4775 of two, N == (step >> num_of_ending_zeros_binary (step),
4776 so the final result is
4777
4778 (type_range >> num_of_ending_zeros_binary (step)) - 1
4779
4780 */
4781 pow2div = num_ending_zeros (step);
4782
4783 period = build_low_bits_mask (type,
4784 (TYPE_PRECISION (type)
4785 - tree_to_uhwi (pow2div)));
4786
4787 return period;
4788 }
4789
4790 /* Returns the comparison operator used when eliminating the iv USE. */
4791
4792 static enum tree_code
4793 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4794 {
4795 struct loop *loop = data->current_loop;
4796 basic_block ex_bb;
4797 edge exit;
4798
4799 ex_bb = gimple_bb (use->stmt);
4800 exit = EDGE_SUCC (ex_bb, 0);
4801 if (flow_bb_inside_loop_p (loop, exit->dest))
4802 exit = EDGE_SUCC (ex_bb, 1);
4803
4804 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4805 }
4806
4807 /* Returns true if we can prove that BASE - OFFSET does not overflow. For now,
4808 we only detect the situation that BASE = SOMETHING + OFFSET, where the
4809 calculation is performed in non-wrapping type.
4810
4811 TODO: More generally, we could test for the situation that
4812 BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
4813 This would require knowing the sign of OFFSET. */
4814
4815 static bool
4816 difference_cannot_overflow_p (struct ivopts_data *data, tree base, tree offset)
4817 {
4818 enum tree_code code;
4819 tree e1, e2;
4820 aff_tree aff_e1, aff_e2, aff_offset;
4821
4822 if (!nowrap_type_p (TREE_TYPE (base)))
4823 return false;
4824
4825 base = expand_simple_operations (base);
4826
4827 if (TREE_CODE (base) == SSA_NAME)
4828 {
4829 gimple stmt = SSA_NAME_DEF_STMT (base);
4830
4831 if (gimple_code (stmt) != GIMPLE_ASSIGN)
4832 return false;
4833
4834 code = gimple_assign_rhs_code (stmt);
4835 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4836 return false;
4837
4838 e1 = gimple_assign_rhs1 (stmt);
4839 e2 = gimple_assign_rhs2 (stmt);
4840 }
4841 else
4842 {
4843 code = TREE_CODE (base);
4844 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4845 return false;
4846 e1 = TREE_OPERAND (base, 0);
4847 e2 = TREE_OPERAND (base, 1);
4848 }
4849
4850 /* Use affine expansion as deeper inspection to prove the equality. */
4851 tree_to_aff_combination_expand (e2, TREE_TYPE (e2),
4852 &aff_e2, &data->name_expansion_cache);
4853 tree_to_aff_combination_expand (offset, TREE_TYPE (offset),
4854 &aff_offset, &data->name_expansion_cache);
4855 aff_combination_scale (&aff_offset, -1);
4856 switch (code)
4857 {
4858 case PLUS_EXPR:
4859 aff_combination_add (&aff_e2, &aff_offset);
4860 if (aff_combination_zero_p (&aff_e2))
4861 return true;
4862
4863 tree_to_aff_combination_expand (e1, TREE_TYPE (e1),
4864 &aff_e1, &data->name_expansion_cache);
4865 aff_combination_add (&aff_e1, &aff_offset);
4866 return aff_combination_zero_p (&aff_e1);
4867
4868 case POINTER_PLUS_EXPR:
4869 aff_combination_add (&aff_e2, &aff_offset);
4870 return aff_combination_zero_p (&aff_e2);
4871
4872 default:
4873 return false;
4874 }
4875 }
4876
4877 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
4878 comparison with CAND. NITER describes the number of iterations of
4879 the loops. If successful, the comparison in COMP_P is altered accordingly.
4880
4881 We aim to handle the following situation:
4882
4883 sometype *base, *p;
4884 int a, b, i;
4885
4886 i = a;
4887 p = p_0 = base + a;
4888
4889 do
4890 {
4891 bla (*p);
4892 p++;
4893 i++;
4894 }
4895 while (i < b);
4896
4897 Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
4898 We aim to optimize this to
4899
4900 p = p_0 = base + a;
4901 do
4902 {
4903 bla (*p);
4904 p++;
4905 }
4906 while (p < p_0 - a + b);
4907
4908 This preserves the correctness, since the pointer arithmetics does not
4909 overflow. More precisely:
4910
4911 1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
4912 overflow in computing it or the values of p.
4913 2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
4914 overflow. To prove this, we use the fact that p_0 = base + a. */
4915
4916 static bool
4917 iv_elimination_compare_lt (struct ivopts_data *data,
4918 struct iv_cand *cand, enum tree_code *comp_p,
4919 struct tree_niter_desc *niter)
4920 {
4921 tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
4922 struct aff_tree nit, tmpa, tmpb;
4923 enum tree_code comp;
4924 HOST_WIDE_INT step;
4925
4926 /* We need to know that the candidate induction variable does not overflow.
4927 While more complex analysis may be used to prove this, for now just
4928 check that the variable appears in the original program and that it
4929 is computed in a type that guarantees no overflows. */
4930 cand_type = TREE_TYPE (cand->iv->base);
4931 if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
4932 return false;
4933
4934 /* Make sure that the loop iterates till the loop bound is hit, as otherwise
4935 the calculation of the BOUND could overflow, making the comparison
4936 invalid. */
4937 if (!data->loop_single_exit_p)
4938 return false;
4939
4940 /* We need to be able to decide whether candidate is increasing or decreasing
4941 in order to choose the right comparison operator. */
4942 if (!cst_and_fits_in_hwi (cand->iv->step))
4943 return false;
4944 step = int_cst_value (cand->iv->step);
4945
4946 /* Check that the number of iterations matches the expected pattern:
4947 a + 1 > b ? 0 : b - a - 1. */
4948 mbz = niter->may_be_zero;
4949 if (TREE_CODE (mbz) == GT_EXPR)
4950 {
4951 /* Handle a + 1 > b. */
4952 tree op0 = TREE_OPERAND (mbz, 0);
4953 if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
4954 {
4955 a = TREE_OPERAND (op0, 0);
4956 b = TREE_OPERAND (mbz, 1);
4957 }
4958 else
4959 return false;
4960 }
4961 else if (TREE_CODE (mbz) == LT_EXPR)
4962 {
4963 tree op1 = TREE_OPERAND (mbz, 1);
4964
4965 /* Handle b < a + 1. */
4966 if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
4967 {
4968 a = TREE_OPERAND (op1, 0);
4969 b = TREE_OPERAND (mbz, 0);
4970 }
4971 else
4972 return false;
4973 }
4974 else
4975 return false;
4976
4977 /* Expected number of iterations is B - A - 1. Check that it matches
4978 the actual number, i.e., that B - A - NITER = 1. */
4979 tree_to_aff_combination (niter->niter, nit_type, &nit);
4980 tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
4981 tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
4982 aff_combination_scale (&nit, -1);
4983 aff_combination_scale (&tmpa, -1);
4984 aff_combination_add (&tmpb, &tmpa);
4985 aff_combination_add (&tmpb, &nit);
4986 if (tmpb.n != 0 || tmpb.offset != 1)
4987 return false;
4988
4989 /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
4990 overflow. */
4991 offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
4992 cand->iv->step,
4993 fold_convert (TREE_TYPE (cand->iv->step), a));
4994 if (!difference_cannot_overflow_p (data, cand->iv->base, offset))
4995 return false;
4996
4997 /* Determine the new comparison operator. */
4998 comp = step < 0 ? GT_EXPR : LT_EXPR;
4999 if (*comp_p == NE_EXPR)
5000 *comp_p = comp;
5001 else if (*comp_p == EQ_EXPR)
5002 *comp_p = invert_tree_comparison (comp, false);
5003 else
5004 gcc_unreachable ();
5005
5006 return true;
5007 }
5008
5009 /* Check whether it is possible to express the condition in USE by comparison
5010 of candidate CAND. If so, store the value compared with to BOUND, and the
5011 comparison operator to COMP. */
5012
5013 static bool
5014 may_eliminate_iv (struct ivopts_data *data,
5015 struct iv_use *use, struct iv_cand *cand, tree *bound,
5016 enum tree_code *comp)
5017 {
5018 basic_block ex_bb;
5019 edge exit;
5020 tree period;
5021 struct loop *loop = data->current_loop;
5022 aff_tree bnd;
5023 struct tree_niter_desc *desc = NULL;
5024
5025 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
5026 return false;
5027
5028 /* For now works only for exits that dominate the loop latch.
5029 TODO: extend to other conditions inside loop body. */
5030 ex_bb = gimple_bb (use->stmt);
5031 if (use->stmt != last_stmt (ex_bb)
5032 || gimple_code (use->stmt) != GIMPLE_COND
5033 || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
5034 return false;
5035
5036 exit = EDGE_SUCC (ex_bb, 0);
5037 if (flow_bb_inside_loop_p (loop, exit->dest))
5038 exit = EDGE_SUCC (ex_bb, 1);
5039 if (flow_bb_inside_loop_p (loop, exit->dest))
5040 return false;
5041
5042 desc = niter_for_exit (data, exit);
5043 if (!desc)
5044 return false;
5045
5046 /* Determine whether we can use the variable to test the exit condition.
5047 This is the case iff the period of the induction variable is greater
5048 than the number of iterations for which the exit condition is true. */
5049 period = iv_period (cand->iv);
5050
5051 /* If the number of iterations is constant, compare against it directly. */
5052 if (TREE_CODE (desc->niter) == INTEGER_CST)
5053 {
5054 /* See cand_value_at. */
5055 if (stmt_after_increment (loop, cand, use->stmt))
5056 {
5057 if (!tree_int_cst_lt (desc->niter, period))
5058 return false;
5059 }
5060 else
5061 {
5062 if (tree_int_cst_lt (period, desc->niter))
5063 return false;
5064 }
5065 }
5066
5067 /* If not, and if this is the only possible exit of the loop, see whether
5068 we can get a conservative estimate on the number of iterations of the
5069 entire loop and compare against that instead. */
5070 else
5071 {
5072 widest_int period_value, max_niter;
5073
5074 max_niter = desc->max;
5075 if (stmt_after_increment (loop, cand, use->stmt))
5076 max_niter += 1;
5077 period_value = wi::to_widest (period);
5078 if (wi::gtu_p (max_niter, period_value))
5079 {
5080 /* See if we can take advantage of inferred loop bound information. */
5081 if (data->loop_single_exit_p)
5082 {
5083 if (!max_loop_iterations (loop, &max_niter))
5084 return false;
5085 /* The loop bound is already adjusted by adding 1. */
5086 if (wi::gtu_p (max_niter, period_value))
5087 return false;
5088 }
5089 else
5090 return false;
5091 }
5092 }
5093
5094 cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
5095
5096 *bound = fold_convert (TREE_TYPE (cand->iv->base),
5097 aff_combination_to_tree (&bnd));
5098 *comp = iv_elimination_compare (data, use);
5099
5100 /* It is unlikely that computing the number of iterations using division
5101 would be more profitable than keeping the original induction variable. */
5102 if (expression_expensive_p (*bound))
5103 return false;
5104
5105 /* Sometimes, it is possible to handle the situation that the number of
5106 iterations may be zero unless additional assumtions by using <
5107 instead of != in the exit condition.
5108
5109 TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
5110 base the exit condition on it. However, that is often too
5111 expensive. */
5112 if (!integer_zerop (desc->may_be_zero))
5113 return iv_elimination_compare_lt (data, cand, comp, desc);
5114
5115 return true;
5116 }
5117
5118 /* Calculates the cost of BOUND, if it is a PARM_DECL. A PARM_DECL must
5119 be copied, if is is used in the loop body and DATA->body_includes_call. */
5120
5121 static int
5122 parm_decl_cost (struct ivopts_data *data, tree bound)
5123 {
5124 tree sbound = bound;
5125 STRIP_NOPS (sbound);
5126
5127 if (TREE_CODE (sbound) == SSA_NAME
5128 && SSA_NAME_IS_DEFAULT_DEF (sbound)
5129 && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
5130 && data->body_includes_call)
5131 return COSTS_N_INSNS (1);
5132
5133 return 0;
5134 }
5135
5136 /* Determines cost of basing replacement of USE on CAND in a condition. */
5137
5138 static bool
5139 determine_use_iv_cost_condition (struct ivopts_data *data,
5140 struct iv_use *use, struct iv_cand *cand)
5141 {
5142 tree bound = NULL_TREE;
5143 struct iv *cmp_iv;
5144 bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
5145 comp_cost elim_cost, express_cost, cost, bound_cost;
5146 bool ok;
5147 int elim_inv_expr_id = -1, express_inv_expr_id = -1, inv_expr_id;
5148 tree *control_var, *bound_cst;
5149 enum tree_code comp = ERROR_MARK;
5150
5151 /* Only consider real candidates. */
5152 if (!cand->iv)
5153 {
5154 set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE,
5155 ERROR_MARK, -1);
5156 return false;
5157 }
5158
5159 /* Try iv elimination. */
5160 if (may_eliminate_iv (data, use, cand, &bound, &comp))
5161 {
5162 elim_cost = force_var_cost (data, bound, &depends_on_elim);
5163 if (elim_cost.cost == 0)
5164 elim_cost.cost = parm_decl_cost (data, bound);
5165 else if (TREE_CODE (bound) == INTEGER_CST)
5166 elim_cost.cost = 0;
5167 /* If we replace a loop condition 'i < n' with 'p < base + n',
5168 depends_on_elim will have 'base' and 'n' set, which implies
5169 that both 'base' and 'n' will be live during the loop. More likely,
5170 'base + n' will be loop invariant, resulting in only one live value
5171 during the loop. So in that case we clear depends_on_elim and set
5172 elim_inv_expr_id instead. */
5173 if (depends_on_elim && bitmap_count_bits (depends_on_elim) > 1)
5174 {
5175 elim_inv_expr_id = get_expr_id (data, bound);
5176 bitmap_clear (depends_on_elim);
5177 }
5178 /* The bound is a loop invariant, so it will be only computed
5179 once. */
5180 elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
5181 }
5182 else
5183 elim_cost = infinite_cost;
5184
5185 /* Try expressing the original giv. If it is compared with an invariant,
5186 note that we cannot get rid of it. */
5187 ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
5188 NULL, &cmp_iv);
5189 gcc_assert (ok);
5190
5191 /* When the condition is a comparison of the candidate IV against
5192 zero, prefer this IV.
5193
5194 TODO: The constant that we're subtracting from the cost should
5195 be target-dependent. This information should be added to the
5196 target costs for each backend. */
5197 if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */
5198 && integer_zerop (*bound_cst)
5199 && (operand_equal_p (*control_var, cand->var_after, 0)
5200 || operand_equal_p (*control_var, cand->var_before, 0)))
5201 elim_cost.cost -= 1;
5202
5203 express_cost = get_computation_cost (data, use, cand, false,
5204 &depends_on_express, NULL,
5205 &express_inv_expr_id);
5206 fd_ivopts_data = data;
5207 walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
5208
5209 /* Count the cost of the original bound as well. */
5210 bound_cost = force_var_cost (data, *bound_cst, NULL);
5211 if (bound_cost.cost == 0)
5212 bound_cost.cost = parm_decl_cost (data, *bound_cst);
5213 else if (TREE_CODE (*bound_cst) == INTEGER_CST)
5214 bound_cost.cost = 0;
5215 express_cost.cost += bound_cost.cost;
5216
5217 /* Choose the better approach, preferring the eliminated IV. */
5218 if (compare_costs (elim_cost, express_cost) <= 0)
5219 {
5220 cost = elim_cost;
5221 depends_on = depends_on_elim;
5222 depends_on_elim = NULL;
5223 inv_expr_id = elim_inv_expr_id;
5224 }
5225 else
5226 {
5227 cost = express_cost;
5228 depends_on = depends_on_express;
5229 depends_on_express = NULL;
5230 bound = NULL_TREE;
5231 comp = ERROR_MARK;
5232 inv_expr_id = express_inv_expr_id;
5233 }
5234
5235 set_use_iv_cost (data, use, cand, cost, depends_on, bound, comp, inv_expr_id);
5236
5237 if (depends_on_elim)
5238 BITMAP_FREE (depends_on_elim);
5239 if (depends_on_express)
5240 BITMAP_FREE (depends_on_express);
5241
5242 return !infinite_cost_p (cost);
5243 }
5244
5245 /* Determines cost of basing replacement of USE on CAND. Returns false
5246 if USE cannot be based on CAND. */
5247
5248 static bool
5249 determine_use_iv_cost (struct ivopts_data *data,
5250 struct iv_use *use, struct iv_cand *cand)
5251 {
5252 switch (use->type)
5253 {
5254 case USE_NONLINEAR_EXPR:
5255 return determine_use_iv_cost_generic (data, use, cand);
5256
5257 case USE_ADDRESS:
5258 return determine_use_iv_cost_address (data, use, cand);
5259
5260 case USE_COMPARE:
5261 return determine_use_iv_cost_condition (data, use, cand);
5262
5263 default:
5264 gcc_unreachable ();
5265 }
5266 }
5267
5268 /* Return true if get_computation_cost indicates that autoincrement is
5269 a possibility for the pair of USE and CAND, false otherwise. */
5270
5271 static bool
5272 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
5273 struct iv_cand *cand)
5274 {
5275 bitmap depends_on;
5276 bool can_autoinc;
5277 comp_cost cost;
5278
5279 if (use->type != USE_ADDRESS)
5280 return false;
5281
5282 cost = get_computation_cost (data, use, cand, true, &depends_on,
5283 &can_autoinc, NULL);
5284
5285 BITMAP_FREE (depends_on);
5286
5287 return !infinite_cost_p (cost) && can_autoinc;
5288 }
5289
5290 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
5291 use that allows autoincrement, and set their AINC_USE if possible. */
5292
5293 static void
5294 set_autoinc_for_original_candidates (struct ivopts_data *data)
5295 {
5296 unsigned i, j;
5297
5298 for (i = 0; i < n_iv_cands (data); i++)
5299 {
5300 struct iv_cand *cand = iv_cand (data, i);
5301 struct iv_use *closest_before = NULL;
5302 struct iv_use *closest_after = NULL;
5303 if (cand->pos != IP_ORIGINAL)
5304 continue;
5305
5306 for (j = 0; j < n_iv_uses (data); j++)
5307 {
5308 struct iv_use *use = iv_use (data, j);
5309 unsigned uid = gimple_uid (use->stmt);
5310
5311 if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
5312 continue;
5313
5314 if (uid < gimple_uid (cand->incremented_at)
5315 && (closest_before == NULL
5316 || uid > gimple_uid (closest_before->stmt)))
5317 closest_before = use;
5318
5319 if (uid > gimple_uid (cand->incremented_at)
5320 && (closest_after == NULL
5321 || uid < gimple_uid (closest_after->stmt)))
5322 closest_after = use;
5323 }
5324
5325 if (closest_before != NULL
5326 && autoinc_possible_for_pair (data, closest_before, cand))
5327 cand->ainc_use = closest_before;
5328 else if (closest_after != NULL
5329 && autoinc_possible_for_pair (data, closest_after, cand))
5330 cand->ainc_use = closest_after;
5331 }
5332 }
5333
5334 /* Finds the candidates for the induction variables. */
5335
5336 static void
5337 find_iv_candidates (struct ivopts_data *data)
5338 {
5339 /* Add commonly used ivs. */
5340 add_standard_iv_candidates (data);
5341
5342 /* Add old induction variables. */
5343 add_old_ivs_candidates (data);
5344
5345 /* Add induction variables derived from uses. */
5346 add_derived_ivs_candidates (data);
5347
5348 set_autoinc_for_original_candidates (data);
5349
5350 /* Record the important candidates. */
5351 record_important_candidates (data);
5352 }
5353
5354 /* Determines costs of basing the use of the iv on an iv candidate. */
5355
5356 static void
5357 determine_use_iv_costs (struct ivopts_data *data)
5358 {
5359 unsigned i, j;
5360 struct iv_use *use;
5361 struct iv_cand *cand;
5362 bitmap to_clear = BITMAP_ALLOC (NULL);
5363
5364 alloc_use_cost_map (data);
5365
5366 for (i = 0; i < n_iv_uses (data); i++)
5367 {
5368 use = iv_use (data, i);
5369
5370 if (data->consider_all_candidates)
5371 {
5372 for (j = 0; j < n_iv_cands (data); j++)
5373 {
5374 cand = iv_cand (data, j);
5375 determine_use_iv_cost (data, use, cand);
5376 }
5377 }
5378 else
5379 {
5380 bitmap_iterator bi;
5381
5382 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
5383 {
5384 cand = iv_cand (data, j);
5385 if (!determine_use_iv_cost (data, use, cand))
5386 bitmap_set_bit (to_clear, j);
5387 }
5388
5389 /* Remove the candidates for that the cost is infinite from
5390 the list of related candidates. */
5391 bitmap_and_compl_into (use->related_cands, to_clear);
5392 bitmap_clear (to_clear);
5393 }
5394 }
5395
5396 BITMAP_FREE (to_clear);
5397
5398 if (dump_file && (dump_flags & TDF_DETAILS))
5399 {
5400 fprintf (dump_file, "Use-candidate costs:\n");
5401
5402 for (i = 0; i < n_iv_uses (data); i++)
5403 {
5404 use = iv_use (data, i);
5405
5406 fprintf (dump_file, "Use %d:\n", i);
5407 fprintf (dump_file, " cand\tcost\tcompl.\tdepends on\n");
5408 for (j = 0; j < use->n_map_members; j++)
5409 {
5410 if (!use->cost_map[j].cand
5411 || infinite_cost_p (use->cost_map[j].cost))
5412 continue;
5413
5414 fprintf (dump_file, " %d\t%d\t%d\t",
5415 use->cost_map[j].cand->id,
5416 use->cost_map[j].cost.cost,
5417 use->cost_map[j].cost.complexity);
5418 if (use->cost_map[j].depends_on)
5419 bitmap_print (dump_file,
5420 use->cost_map[j].depends_on, "","");
5421 if (use->cost_map[j].inv_expr_id != -1)
5422 fprintf (dump_file, " inv_expr:%d", use->cost_map[j].inv_expr_id);
5423 fprintf (dump_file, "\n");
5424 }
5425
5426 fprintf (dump_file, "\n");
5427 }
5428 fprintf (dump_file, "\n");
5429 }
5430 }
5431
5432 /* Determines cost of the candidate CAND. */
5433
5434 static void
5435 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5436 {
5437 comp_cost cost_base;
5438 unsigned cost, cost_step;
5439 tree base;
5440
5441 if (!cand->iv)
5442 {
5443 cand->cost = 0;
5444 return;
5445 }
5446
5447 /* There are two costs associated with the candidate -- its increment
5448 and its initialization. The second is almost negligible for any loop
5449 that rolls enough, so we take it just very little into account. */
5450
5451 base = cand->iv->base;
5452 cost_base = force_var_cost (data, base, NULL);
5453 /* It will be exceptional that the iv register happens to be initialized with
5454 the proper value at no cost. In general, there will at least be a regcopy
5455 or a const set. */
5456 if (cost_base.cost == 0)
5457 cost_base.cost = COSTS_N_INSNS (1);
5458 cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
5459
5460 cost = cost_step + adjust_setup_cost (data, cost_base.cost);
5461
5462 /* Prefer the original ivs unless we may gain something by replacing it.
5463 The reason is to make debugging simpler; so this is not relevant for
5464 artificial ivs created by other optimization passes. */
5465 if (cand->pos != IP_ORIGINAL
5466 || !SSA_NAME_VAR (cand->var_before)
5467 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
5468 cost++;
5469
5470 /* Prefer not to insert statements into latch unless there are some
5471 already (so that we do not create unnecessary jumps). */
5472 if (cand->pos == IP_END
5473 && empty_block_p (ip_end_pos (data->current_loop)))
5474 cost++;
5475
5476 cand->cost = cost;
5477 cand->cost_step = cost_step;
5478 }
5479
5480 /* Determines costs of computation of the candidates. */
5481
5482 static void
5483 determine_iv_costs (struct ivopts_data *data)
5484 {
5485 unsigned i;
5486
5487 if (dump_file && (dump_flags & TDF_DETAILS))
5488 {
5489 fprintf (dump_file, "Candidate costs:\n");
5490 fprintf (dump_file, " cand\tcost\n");
5491 }
5492
5493 for (i = 0; i < n_iv_cands (data); i++)
5494 {
5495 struct iv_cand *cand = iv_cand (data, i);
5496
5497 determine_iv_cost (data, cand);
5498
5499 if (dump_file && (dump_flags & TDF_DETAILS))
5500 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
5501 }
5502
5503 if (dump_file && (dump_flags & TDF_DETAILS))
5504 fprintf (dump_file, "\n");
5505 }
5506
5507 /* Calculates cost for having SIZE induction variables. */
5508
5509 static unsigned
5510 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
5511 {
5512 /* We add size to the cost, so that we prefer eliminating ivs
5513 if possible. */
5514 return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed,
5515 data->body_includes_call);
5516 }
5517
5518 /* For each size of the induction variable set determine the penalty. */
5519
5520 static void
5521 determine_set_costs (struct ivopts_data *data)
5522 {
5523 unsigned j, n;
5524 gphi *phi;
5525 gphi_iterator psi;
5526 tree op;
5527 struct loop *loop = data->current_loop;
5528 bitmap_iterator bi;
5529
5530 if (dump_file && (dump_flags & TDF_DETAILS))
5531 {
5532 fprintf (dump_file, "Global costs:\n");
5533 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
5534 fprintf (dump_file, " target_clobbered_regs %d\n", target_clobbered_regs);
5535 fprintf (dump_file, " target_reg_cost %d\n", target_reg_cost[data->speed]);
5536 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost[data->speed]);
5537 }
5538
5539 n = 0;
5540 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
5541 {
5542 phi = psi.phi ();
5543 op = PHI_RESULT (phi);
5544
5545 if (virtual_operand_p (op))
5546 continue;
5547
5548 if (get_iv (data, op))
5549 continue;
5550
5551 n++;
5552 }
5553
5554 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5555 {
5556 struct version_info *info = ver_info (data, j);
5557
5558 if (info->inv_id && info->has_nonlin_use)
5559 n++;
5560 }
5561
5562 data->regs_used = n;
5563 if (dump_file && (dump_flags & TDF_DETAILS))
5564 fprintf (dump_file, " regs_used %d\n", n);
5565
5566 if (dump_file && (dump_flags & TDF_DETAILS))
5567 {
5568 fprintf (dump_file, " cost for size:\n");
5569 fprintf (dump_file, " ivs\tcost\n");
5570 for (j = 0; j <= 2 * target_avail_regs; j++)
5571 fprintf (dump_file, " %d\t%d\n", j,
5572 ivopts_global_cost_for_size (data, j));
5573 fprintf (dump_file, "\n");
5574 }
5575 }
5576
5577 /* Returns true if A is a cheaper cost pair than B. */
5578
5579 static bool
5580 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
5581 {
5582 int cmp;
5583
5584 if (!a)
5585 return false;
5586
5587 if (!b)
5588 return true;
5589
5590 cmp = compare_costs (a->cost, b->cost);
5591 if (cmp < 0)
5592 return true;
5593
5594 if (cmp > 0)
5595 return false;
5596
5597 /* In case the costs are the same, prefer the cheaper candidate. */
5598 if (a->cand->cost < b->cand->cost)
5599 return true;
5600
5601 return false;
5602 }
5603
5604
5605 /* Returns candidate by that USE is expressed in IVS. */
5606
5607 static struct cost_pair *
5608 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
5609 {
5610 return ivs->cand_for_use[use->id];
5611 }
5612
5613 /* Computes the cost field of IVS structure. */
5614
5615 static void
5616 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
5617 {
5618 comp_cost cost = ivs->cand_use_cost;
5619
5620 cost.cost += ivs->cand_cost;
5621
5622 cost.cost += ivopts_global_cost_for_size (data,
5623 ivs->n_regs + ivs->num_used_inv_expr);
5624
5625 ivs->cost = cost;
5626 }
5627
5628 /* Remove invariants in set INVS to set IVS. */
5629
5630 static void
5631 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
5632 {
5633 bitmap_iterator bi;
5634 unsigned iid;
5635
5636 if (!invs)
5637 return;
5638
5639 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5640 {
5641 ivs->n_invariant_uses[iid]--;
5642 if (ivs->n_invariant_uses[iid] == 0)
5643 ivs->n_regs--;
5644 }
5645 }
5646
5647 /* Set USE not to be expressed by any candidate in IVS. */
5648
5649 static void
5650 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
5651 struct iv_use *use)
5652 {
5653 unsigned uid = use->id, cid;
5654 struct cost_pair *cp;
5655
5656 cp = ivs->cand_for_use[uid];
5657 if (!cp)
5658 return;
5659 cid = cp->cand->id;
5660
5661 ivs->bad_uses++;
5662 ivs->cand_for_use[uid] = NULL;
5663 ivs->n_cand_uses[cid]--;
5664
5665 if (ivs->n_cand_uses[cid] == 0)
5666 {
5667 bitmap_clear_bit (ivs->cands, cid);
5668 /* Do not count the pseudocandidates. */
5669 if (cp->cand->iv)
5670 ivs->n_regs--;
5671 ivs->n_cands--;
5672 ivs->cand_cost -= cp->cand->cost;
5673
5674 iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
5675 }
5676
5677 ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
5678
5679 iv_ca_set_remove_invariants (ivs, cp->depends_on);
5680
5681 if (cp->inv_expr_id != -1)
5682 {
5683 ivs->used_inv_expr[cp->inv_expr_id]--;
5684 if (ivs->used_inv_expr[cp->inv_expr_id] == 0)
5685 ivs->num_used_inv_expr--;
5686 }
5687 iv_ca_recount_cost (data, ivs);
5688 }
5689
5690 /* Add invariants in set INVS to set IVS. */
5691
5692 static void
5693 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
5694 {
5695 bitmap_iterator bi;
5696 unsigned iid;
5697
5698 if (!invs)
5699 return;
5700
5701 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5702 {
5703 ivs->n_invariant_uses[iid]++;
5704 if (ivs->n_invariant_uses[iid] == 1)
5705 ivs->n_regs++;
5706 }
5707 }
5708
5709 /* Set cost pair for USE in set IVS to CP. */
5710
5711 static void
5712 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
5713 struct iv_use *use, struct cost_pair *cp)
5714 {
5715 unsigned uid = use->id, cid;
5716
5717 if (ivs->cand_for_use[uid] == cp)
5718 return;
5719
5720 if (ivs->cand_for_use[uid])
5721 iv_ca_set_no_cp (data, ivs, use);
5722
5723 if (cp)
5724 {
5725 cid = cp->cand->id;
5726
5727 ivs->bad_uses--;
5728 ivs->cand_for_use[uid] = cp;
5729 ivs->n_cand_uses[cid]++;
5730 if (ivs->n_cand_uses[cid] == 1)
5731 {
5732 bitmap_set_bit (ivs->cands, cid);
5733 /* Do not count the pseudocandidates. */
5734 if (cp->cand->iv)
5735 ivs->n_regs++;
5736 ivs->n_cands++;
5737 ivs->cand_cost += cp->cand->cost;
5738
5739 iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
5740 }
5741
5742 ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
5743 iv_ca_set_add_invariants (ivs, cp->depends_on);
5744
5745 if (cp->inv_expr_id != -1)
5746 {
5747 ivs->used_inv_expr[cp->inv_expr_id]++;
5748 if (ivs->used_inv_expr[cp->inv_expr_id] == 1)
5749 ivs->num_used_inv_expr++;
5750 }
5751 iv_ca_recount_cost (data, ivs);
5752 }
5753 }
5754
5755 /* Extend set IVS by expressing USE by some of the candidates in it
5756 if possible. Consider all important candidates if candidates in
5757 set IVS don't give any result. */
5758
5759 static void
5760 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
5761 struct iv_use *use)
5762 {
5763 struct cost_pair *best_cp = NULL, *cp;
5764 bitmap_iterator bi;
5765 unsigned i;
5766 struct iv_cand *cand;
5767
5768 gcc_assert (ivs->upto >= use->id);
5769 ivs->upto++;
5770 ivs->bad_uses++;
5771
5772 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5773 {
5774 cand = iv_cand (data, i);
5775 cp = get_use_iv_cost (data, use, cand);
5776 if (cheaper_cost_pair (cp, best_cp))
5777 best_cp = cp;
5778 }
5779
5780 if (best_cp == NULL)
5781 {
5782 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5783 {
5784 cand = iv_cand (data, i);
5785 cp = get_use_iv_cost (data, use, cand);
5786 if (cheaper_cost_pair (cp, best_cp))
5787 best_cp = cp;
5788 }
5789 }
5790
5791 iv_ca_set_cp (data, ivs, use, best_cp);
5792 }
5793
5794 /* Get cost for assignment IVS. */
5795
5796 static comp_cost
5797 iv_ca_cost (struct iv_ca *ivs)
5798 {
5799 /* This was a conditional expression but it triggered a bug in
5800 Sun C 5.5. */
5801 if (ivs->bad_uses)
5802 return infinite_cost;
5803 else
5804 return ivs->cost;
5805 }
5806
5807 /* Returns true if all dependences of CP are among invariants in IVS. */
5808
5809 static bool
5810 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
5811 {
5812 unsigned i;
5813 bitmap_iterator bi;
5814
5815 if (!cp->depends_on)
5816 return true;
5817
5818 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
5819 {
5820 if (ivs->n_invariant_uses[i] == 0)
5821 return false;
5822 }
5823
5824 return true;
5825 }
5826
5827 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
5828 it before NEXT_CHANGE. */
5829
5830 static struct iv_ca_delta *
5831 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
5832 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
5833 {
5834 struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
5835
5836 change->use = use;
5837 change->old_cp = old_cp;
5838 change->new_cp = new_cp;
5839 change->next_change = next_change;
5840
5841 return change;
5842 }
5843
5844 /* Joins two lists of changes L1 and L2. Destructive -- old lists
5845 are rewritten. */
5846
5847 static struct iv_ca_delta *
5848 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
5849 {
5850 struct iv_ca_delta *last;
5851
5852 if (!l2)
5853 return l1;
5854
5855 if (!l1)
5856 return l2;
5857
5858 for (last = l1; last->next_change; last = last->next_change)
5859 continue;
5860 last->next_change = l2;
5861
5862 return l1;
5863 }
5864
5865 /* Reverse the list of changes DELTA, forming the inverse to it. */
5866
5867 static struct iv_ca_delta *
5868 iv_ca_delta_reverse (struct iv_ca_delta *delta)
5869 {
5870 struct iv_ca_delta *act, *next, *prev = NULL;
5871
5872 for (act = delta; act; act = next)
5873 {
5874 next = act->next_change;
5875 act->next_change = prev;
5876 prev = act;
5877
5878 std::swap (act->old_cp, act->new_cp);
5879 }
5880
5881 return prev;
5882 }
5883
5884 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
5885 reverted instead. */
5886
5887 static void
5888 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
5889 struct iv_ca_delta *delta, bool forward)
5890 {
5891 struct cost_pair *from, *to;
5892 struct iv_ca_delta *act;
5893
5894 if (!forward)
5895 delta = iv_ca_delta_reverse (delta);
5896
5897 for (act = delta; act; act = act->next_change)
5898 {
5899 from = act->old_cp;
5900 to = act->new_cp;
5901 gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
5902 iv_ca_set_cp (data, ivs, act->use, to);
5903 }
5904
5905 if (!forward)
5906 iv_ca_delta_reverse (delta);
5907 }
5908
5909 /* Returns true if CAND is used in IVS. */
5910
5911 static bool
5912 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
5913 {
5914 return ivs->n_cand_uses[cand->id] > 0;
5915 }
5916
5917 /* Returns number of induction variable candidates in the set IVS. */
5918
5919 static unsigned
5920 iv_ca_n_cands (struct iv_ca *ivs)
5921 {
5922 return ivs->n_cands;
5923 }
5924
5925 /* Free the list of changes DELTA. */
5926
5927 static void
5928 iv_ca_delta_free (struct iv_ca_delta **delta)
5929 {
5930 struct iv_ca_delta *act, *next;
5931
5932 for (act = *delta; act; act = next)
5933 {
5934 next = act->next_change;
5935 free (act);
5936 }
5937
5938 *delta = NULL;
5939 }
5940
5941 /* Allocates new iv candidates assignment. */
5942
5943 static struct iv_ca *
5944 iv_ca_new (struct ivopts_data *data)
5945 {
5946 struct iv_ca *nw = XNEW (struct iv_ca);
5947
5948 nw->upto = 0;
5949 nw->bad_uses = 0;
5950 nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
5951 nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
5952 nw->cands = BITMAP_ALLOC (NULL);
5953 nw->n_cands = 0;
5954 nw->n_regs = 0;
5955 nw->cand_use_cost = no_cost;
5956 nw->cand_cost = 0;
5957 nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
5958 nw->cost = no_cost;
5959 nw->used_inv_expr = XCNEWVEC (unsigned, data->inv_expr_id + 1);
5960 nw->num_used_inv_expr = 0;
5961
5962 return nw;
5963 }
5964
5965 /* Free memory occupied by the set IVS. */
5966
5967 static void
5968 iv_ca_free (struct iv_ca **ivs)
5969 {
5970 free ((*ivs)->cand_for_use);
5971 free ((*ivs)->n_cand_uses);
5972 BITMAP_FREE ((*ivs)->cands);
5973 free ((*ivs)->n_invariant_uses);
5974 free ((*ivs)->used_inv_expr);
5975 free (*ivs);
5976 *ivs = NULL;
5977 }
5978
5979 /* Dumps IVS to FILE. */
5980
5981 static void
5982 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
5983 {
5984 const char *pref = " invariants ";
5985 unsigned i;
5986 comp_cost cost = iv_ca_cost (ivs);
5987
5988 fprintf (file, " cost: %d (complexity %d)\n", cost.cost, cost.complexity);
5989 fprintf (file, " cand_cost: %d\n cand_use_cost: %d (complexity %d)\n",
5990 ivs->cand_cost, ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
5991 bitmap_print (file, ivs->cands, " candidates: ","\n");
5992
5993 for (i = 0; i < ivs->upto; i++)
5994 {
5995 struct iv_use *use = iv_use (data, i);
5996 struct cost_pair *cp = iv_ca_cand_for_use (ivs, use);
5997 if (cp)
5998 fprintf (file, " use:%d --> iv_cand:%d, cost=(%d,%d)\n",
5999 use->id, cp->cand->id, cp->cost.cost, cp->cost.complexity);
6000 else
6001 fprintf (file, " use:%d --> ??\n", use->id);
6002 }
6003
6004 for (i = 1; i <= data->max_inv_id; i++)
6005 if (ivs->n_invariant_uses[i])
6006 {
6007 fprintf (file, "%s%d", pref, i);
6008 pref = ", ";
6009 }
6010 fprintf (file, "\n\n");
6011 }
6012
6013 /* Try changing candidate in IVS to CAND for each use. Return cost of the
6014 new set, and store differences in DELTA. Number of induction variables
6015 in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
6016 the function will try to find a solution with mimimal iv candidates. */
6017
6018 static comp_cost
6019 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
6020 struct iv_cand *cand, struct iv_ca_delta **delta,
6021 unsigned *n_ivs, bool min_ncand)
6022 {
6023 unsigned i;
6024 comp_cost cost;
6025 struct iv_use *use;
6026 struct cost_pair *old_cp, *new_cp;
6027
6028 *delta = NULL;
6029 for (i = 0; i < ivs->upto; i++)
6030 {
6031 use = iv_use (data, i);
6032 old_cp = iv_ca_cand_for_use (ivs, use);
6033
6034 if (old_cp
6035 && old_cp->cand == cand)
6036 continue;
6037
6038 new_cp = get_use_iv_cost (data, use, cand);
6039 if (!new_cp)
6040 continue;
6041
6042 if (!min_ncand && !iv_ca_has_deps (ivs, new_cp))
6043 continue;
6044
6045 if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp))
6046 continue;
6047
6048 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
6049 }
6050
6051 iv_ca_delta_commit (data, ivs, *delta, true);
6052 cost = iv_ca_cost (ivs);
6053 if (n_ivs)
6054 *n_ivs = iv_ca_n_cands (ivs);
6055 iv_ca_delta_commit (data, ivs, *delta, false);
6056
6057 return cost;
6058 }
6059
6060 /* Try narrowing set IVS by removing CAND. Return the cost of
6061 the new set and store the differences in DELTA. START is
6062 the candidate with which we start narrowing. */
6063
6064 static comp_cost
6065 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
6066 struct iv_cand *cand, struct iv_cand *start,
6067 struct iv_ca_delta **delta)
6068 {
6069 unsigned i, ci;
6070 struct iv_use *use;
6071 struct cost_pair *old_cp, *new_cp, *cp;
6072 bitmap_iterator bi;
6073 struct iv_cand *cnd;
6074 comp_cost cost, best_cost, acost;
6075
6076 *delta = NULL;
6077 for (i = 0; i < n_iv_uses (data); i++)
6078 {
6079 use = iv_use (data, i);
6080
6081 old_cp = iv_ca_cand_for_use (ivs, use);
6082 if (old_cp->cand != cand)
6083 continue;
6084
6085 best_cost = iv_ca_cost (ivs);
6086 /* Start narrowing with START. */
6087 new_cp = get_use_iv_cost (data, use, start);
6088
6089 if (data->consider_all_candidates)
6090 {
6091 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
6092 {
6093 if (ci == cand->id || (start && ci == start->id))
6094 continue;
6095
6096 cnd = iv_cand (data, ci);
6097
6098 cp = get_use_iv_cost (data, use, cnd);
6099 if (!cp)
6100 continue;
6101
6102 iv_ca_set_cp (data, ivs, use, cp);
6103 acost = iv_ca_cost (ivs);
6104
6105 if (compare_costs (acost, best_cost) < 0)
6106 {
6107 best_cost = acost;
6108 new_cp = cp;
6109 }
6110 }
6111 }
6112 else
6113 {
6114 EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
6115 {
6116 if (ci == cand->id || (start && ci == start->id))
6117 continue;
6118
6119 cnd = iv_cand (data, ci);
6120
6121 cp = get_use_iv_cost (data, use, cnd);
6122 if (!cp)
6123 continue;
6124
6125 iv_ca_set_cp (data, ivs, use, cp);
6126 acost = iv_ca_cost (ivs);
6127
6128 if (compare_costs (acost, best_cost) < 0)
6129 {
6130 best_cost = acost;
6131 new_cp = cp;
6132 }
6133 }
6134 }
6135 /* Restore to old cp for use. */
6136 iv_ca_set_cp (data, ivs, use, old_cp);
6137
6138 if (!new_cp)
6139 {
6140 iv_ca_delta_free (delta);
6141 return infinite_cost;
6142 }
6143
6144 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
6145 }
6146
6147 iv_ca_delta_commit (data, ivs, *delta, true);
6148 cost = iv_ca_cost (ivs);
6149 iv_ca_delta_commit (data, ivs, *delta, false);
6150
6151 return cost;
6152 }
6153
6154 /* Try optimizing the set of candidates IVS by removing candidates different
6155 from to EXCEPT_CAND from it. Return cost of the new set, and store
6156 differences in DELTA. */
6157
6158 static comp_cost
6159 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
6160 struct iv_cand *except_cand, struct iv_ca_delta **delta)
6161 {
6162 bitmap_iterator bi;
6163 struct iv_ca_delta *act_delta, *best_delta;
6164 unsigned i;
6165 comp_cost best_cost, acost;
6166 struct iv_cand *cand;
6167
6168 best_delta = NULL;
6169 best_cost = iv_ca_cost (ivs);
6170
6171 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6172 {
6173 cand = iv_cand (data, i);
6174
6175 if (cand == except_cand)
6176 continue;
6177
6178 acost = iv_ca_narrow (data, ivs, cand, except_cand, &act_delta);
6179
6180 if (compare_costs (acost, best_cost) < 0)
6181 {
6182 best_cost = acost;
6183 iv_ca_delta_free (&best_delta);
6184 best_delta = act_delta;
6185 }
6186 else
6187 iv_ca_delta_free (&act_delta);
6188 }
6189
6190 if (!best_delta)
6191 {
6192 *delta = NULL;
6193 return best_cost;
6194 }
6195
6196 /* Recurse to possibly remove other unnecessary ivs. */
6197 iv_ca_delta_commit (data, ivs, best_delta, true);
6198 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
6199 iv_ca_delta_commit (data, ivs, best_delta, false);
6200 *delta = iv_ca_delta_join (best_delta, *delta);
6201 return best_cost;
6202 }
6203
6204 /* Check if CAND_IDX is a candidate other than OLD_CAND and has
6205 cheaper local cost for USE than BEST_CP. Return pointer to
6206 the corresponding cost_pair, otherwise just return BEST_CP. */
6207
6208 static struct cost_pair*
6209 cheaper_cost_with_cand (struct ivopts_data *data, struct iv_use *use,
6210 unsigned int cand_idx, struct iv_cand *old_cand,
6211 struct cost_pair *best_cp)
6212 {
6213 struct iv_cand *cand;
6214 struct cost_pair *cp;
6215
6216 gcc_assert (old_cand != NULL && best_cp != NULL);
6217 if (cand_idx == old_cand->id)
6218 return best_cp;
6219
6220 cand = iv_cand (data, cand_idx);
6221 cp = get_use_iv_cost (data, use, cand);
6222 if (cp != NULL && cheaper_cost_pair (cp, best_cp))
6223 return cp;
6224
6225 return best_cp;
6226 }
6227
6228 /* Try breaking local optimal fixed-point for IVS by replacing candidates
6229 which are used by more than one iv uses. For each of those candidates,
6230 this function tries to represent iv uses under that candidate using
6231 other ones with lower local cost, then tries to prune the new set.
6232 If the new set has lower cost, It returns the new cost after recording
6233 candidate replacement in list DELTA. */
6234
6235 static comp_cost
6236 iv_ca_replace (struct ivopts_data *data, struct iv_ca *ivs,
6237 struct iv_ca_delta **delta)
6238 {
6239 bitmap_iterator bi, bj;
6240 unsigned int i, j, k;
6241 struct iv_use *use;
6242 struct iv_cand *cand;
6243 comp_cost orig_cost, acost;
6244 struct iv_ca_delta *act_delta, *tmp_delta;
6245 struct cost_pair *old_cp, *best_cp = NULL;
6246
6247 *delta = NULL;
6248 orig_cost = iv_ca_cost (ivs);
6249
6250 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6251 {
6252 if (ivs->n_cand_uses[i] == 1
6253 || ivs->n_cand_uses[i] > ALWAYS_PRUNE_CAND_SET_BOUND)
6254 continue;
6255
6256 cand = iv_cand (data, i);
6257
6258 act_delta = NULL;
6259 /* Represent uses under current candidate using other ones with
6260 lower local cost. */
6261 for (j = 0; j < ivs->upto; j++)
6262 {
6263 use = iv_use (data, j);
6264 old_cp = iv_ca_cand_for_use (ivs, use);
6265
6266 if (old_cp->cand != cand)
6267 continue;
6268
6269 best_cp = old_cp;
6270 if (data->consider_all_candidates)
6271 for (k = 0; k < n_iv_cands (data); k++)
6272 best_cp = cheaper_cost_with_cand (data, use, k,
6273 old_cp->cand, best_cp);
6274 else
6275 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, k, bj)
6276 best_cp = cheaper_cost_with_cand (data, use, k,
6277 old_cp->cand, best_cp);
6278
6279 if (best_cp == old_cp)
6280 continue;
6281
6282 act_delta = iv_ca_delta_add (use, old_cp, best_cp, act_delta);
6283 }
6284 /* No need for further prune. */
6285 if (!act_delta)
6286 continue;
6287
6288 /* Prune the new candidate set. */
6289 iv_ca_delta_commit (data, ivs, act_delta, true);
6290 acost = iv_ca_prune (data, ivs, NULL, &tmp_delta);
6291 iv_ca_delta_commit (data, ivs, act_delta, false);
6292 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6293
6294 if (compare_costs (acost, orig_cost) < 0)
6295 {
6296 *delta = act_delta;
6297 return acost;
6298 }
6299 else
6300 iv_ca_delta_free (&act_delta);
6301 }
6302
6303 return orig_cost;
6304 }
6305
6306 /* Tries to extend the sets IVS in the best possible way in order
6307 to express the USE. If ORIGINALP is true, prefer candidates from
6308 the original set of IVs, otherwise favor important candidates not
6309 based on any memory object. */
6310
6311 static bool
6312 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
6313 struct iv_use *use, bool originalp)
6314 {
6315 comp_cost best_cost, act_cost;
6316 unsigned i;
6317 bitmap_iterator bi;
6318 struct iv_cand *cand;
6319 struct iv_ca_delta *best_delta = NULL, *act_delta;
6320 struct cost_pair *cp;
6321
6322 iv_ca_add_use (data, ivs, use);
6323 best_cost = iv_ca_cost (ivs);
6324 cp = iv_ca_cand_for_use (ivs, use);
6325 if (cp)
6326 {
6327 best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
6328 iv_ca_set_no_cp (data, ivs, use);
6329 }
6330
6331 /* If ORIGINALP is true, try to find the original IV for the use. Otherwise
6332 first try important candidates not based on any memory object. Only if
6333 this fails, try the specific ones. Rationale -- in loops with many
6334 variables the best choice often is to use just one generic biv. If we
6335 added here many ivs specific to the uses, the optimization algorithm later
6336 would be likely to get stuck in a local minimum, thus causing us to create
6337 too many ivs. The approach from few ivs to more seems more likely to be
6338 successful -- starting from few ivs, replacing an expensive use by a
6339 specific iv should always be a win. */
6340 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
6341 {
6342 cand = iv_cand (data, i);
6343
6344 if (originalp && cand->pos !=IP_ORIGINAL)
6345 continue;
6346
6347 if (!originalp && cand->iv->base_object != NULL_TREE)
6348 continue;
6349
6350 if (iv_ca_cand_used_p (ivs, cand))
6351 continue;
6352
6353 cp = get_use_iv_cost (data, use, cand);
6354 if (!cp)
6355 continue;
6356
6357 iv_ca_set_cp (data, ivs, use, cp);
6358 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
6359 true);
6360 iv_ca_set_no_cp (data, ivs, use);
6361 act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
6362
6363 if (compare_costs (act_cost, best_cost) < 0)
6364 {
6365 best_cost = act_cost;
6366
6367 iv_ca_delta_free (&best_delta);
6368 best_delta = act_delta;
6369 }
6370 else
6371 iv_ca_delta_free (&act_delta);
6372 }
6373
6374 if (infinite_cost_p (best_cost))
6375 {
6376 for (i = 0; i < use->n_map_members; i++)
6377 {
6378 cp = use->cost_map + i;
6379 cand = cp->cand;
6380 if (!cand)
6381 continue;
6382
6383 /* Already tried this. */
6384 if (cand->important)
6385 {
6386 if (originalp && cand->pos == IP_ORIGINAL)
6387 continue;
6388 if (!originalp && cand->iv->base_object == NULL_TREE)
6389 continue;
6390 }
6391
6392 if (iv_ca_cand_used_p (ivs, cand))
6393 continue;
6394
6395 act_delta = NULL;
6396 iv_ca_set_cp (data, ivs, use, cp);
6397 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
6398 iv_ca_set_no_cp (data, ivs, use);
6399 act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
6400 cp, act_delta);
6401
6402 if (compare_costs (act_cost, best_cost) < 0)
6403 {
6404 best_cost = act_cost;
6405
6406 if (best_delta)
6407 iv_ca_delta_free (&best_delta);
6408 best_delta = act_delta;
6409 }
6410 else
6411 iv_ca_delta_free (&act_delta);
6412 }
6413 }
6414
6415 iv_ca_delta_commit (data, ivs, best_delta, true);
6416 iv_ca_delta_free (&best_delta);
6417
6418 return !infinite_cost_p (best_cost);
6419 }
6420
6421 /* Finds an initial assignment of candidates to uses. */
6422
6423 static struct iv_ca *
6424 get_initial_solution (struct ivopts_data *data, bool originalp)
6425 {
6426 struct iv_ca *ivs = iv_ca_new (data);
6427 unsigned i;
6428
6429 for (i = 0; i < n_iv_uses (data); i++)
6430 if (!try_add_cand_for (data, ivs, iv_use (data, i), originalp))
6431 {
6432 iv_ca_free (&ivs);
6433 return NULL;
6434 }
6435
6436 return ivs;
6437 }
6438
6439 /* Tries to improve set of induction variables IVS. TRY_REPLACE_P
6440 points to a bool variable, this function tries to break local
6441 optimal fixed-point by replacing candidates in IVS if it's true. */
6442
6443 static bool
6444 try_improve_iv_set (struct ivopts_data *data,
6445 struct iv_ca *ivs, bool *try_replace_p)
6446 {
6447 unsigned i, n_ivs;
6448 comp_cost acost, best_cost = iv_ca_cost (ivs);
6449 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
6450 struct iv_cand *cand;
6451
6452 /* Try extending the set of induction variables by one. */
6453 for (i = 0; i < n_iv_cands (data); i++)
6454 {
6455 cand = iv_cand (data, i);
6456
6457 if (iv_ca_cand_used_p (ivs, cand))
6458 continue;
6459
6460 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
6461 if (!act_delta)
6462 continue;
6463
6464 /* If we successfully added the candidate and the set is small enough,
6465 try optimizing it by removing other candidates. */
6466 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
6467 {
6468 iv_ca_delta_commit (data, ivs, act_delta, true);
6469 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
6470 iv_ca_delta_commit (data, ivs, act_delta, false);
6471 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6472 }
6473
6474 if (compare_costs (acost, best_cost) < 0)
6475 {
6476 best_cost = acost;
6477 iv_ca_delta_free (&best_delta);
6478 best_delta = act_delta;
6479 }
6480 else
6481 iv_ca_delta_free (&act_delta);
6482 }
6483
6484 if (!best_delta)
6485 {
6486 /* Try removing the candidates from the set instead. */
6487 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
6488
6489 if (!best_delta && *try_replace_p)
6490 {
6491 *try_replace_p = false;
6492 /* So far candidate selecting algorithm tends to choose fewer IVs
6493 so that it can handle cases in which loops have many variables
6494 but the best choice is often to use only one general biv. One
6495 weakness is it can't handle opposite cases, in which different
6496 candidates should be chosen with respect to each use. To solve
6497 the problem, we replace candidates in a manner described by the
6498 comments of iv_ca_replace, thus give general algorithm a chance
6499 to break local optimal fixed-point in these cases. */
6500 best_cost = iv_ca_replace (data, ivs, &best_delta);
6501 }
6502
6503 if (!best_delta)
6504 return false;
6505 }
6506
6507 iv_ca_delta_commit (data, ivs, best_delta, true);
6508 gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
6509 iv_ca_delta_free (&best_delta);
6510 return true;
6511 }
6512
6513 /* Attempts to find the optimal set of induction variables. We do simple
6514 greedy heuristic -- we try to replace at most one candidate in the selected
6515 solution and remove the unused ivs while this improves the cost. */
6516
6517 static struct iv_ca *
6518 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
6519 {
6520 struct iv_ca *set;
6521 bool try_replace_p = true;
6522
6523 /* Get the initial solution. */
6524 set = get_initial_solution (data, originalp);
6525 if (!set)
6526 {
6527 if (dump_file && (dump_flags & TDF_DETAILS))
6528 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
6529 return NULL;
6530 }
6531
6532 if (dump_file && (dump_flags & TDF_DETAILS))
6533 {
6534 fprintf (dump_file, "Initial set of candidates:\n");
6535 iv_ca_dump (data, dump_file, set);
6536 }
6537
6538 while (try_improve_iv_set (data, set, &try_replace_p))
6539 {
6540 if (dump_file && (dump_flags & TDF_DETAILS))
6541 {
6542 fprintf (dump_file, "Improved to:\n");
6543 iv_ca_dump (data, dump_file, set);
6544 }
6545 }
6546
6547 return set;
6548 }
6549
6550 static struct iv_ca *
6551 find_optimal_iv_set (struct ivopts_data *data)
6552 {
6553 unsigned i;
6554 struct iv_ca *set, *origset;
6555 struct iv_use *use;
6556 comp_cost cost, origcost;
6557
6558 /* Determine the cost based on a strategy that starts with original IVs,
6559 and try again using a strategy that prefers candidates not based
6560 on any IVs. */
6561 origset = find_optimal_iv_set_1 (data, true);
6562 set = find_optimal_iv_set_1 (data, false);
6563
6564 if (!origset && !set)
6565 return NULL;
6566
6567 origcost = origset ? iv_ca_cost (origset) : infinite_cost;
6568 cost = set ? iv_ca_cost (set) : infinite_cost;
6569
6570 if (dump_file && (dump_flags & TDF_DETAILS))
6571 {
6572 fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
6573 origcost.cost, origcost.complexity);
6574 fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
6575 cost.cost, cost.complexity);
6576 }
6577
6578 /* Choose the one with the best cost. */
6579 if (compare_costs (origcost, cost) <= 0)
6580 {
6581 if (set)
6582 iv_ca_free (&set);
6583 set = origset;
6584 }
6585 else if (origset)
6586 iv_ca_free (&origset);
6587
6588 for (i = 0; i < n_iv_uses (data); i++)
6589 {
6590 use = iv_use (data, i);
6591 use->selected = iv_ca_cand_for_use (set, use)->cand;
6592 }
6593
6594 return set;
6595 }
6596
6597 /* Creates a new induction variable corresponding to CAND. */
6598
6599 static void
6600 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
6601 {
6602 gimple_stmt_iterator incr_pos;
6603 tree base;
6604 bool after = false;
6605
6606 if (!cand->iv)
6607 return;
6608
6609 switch (cand->pos)
6610 {
6611 case IP_NORMAL:
6612 incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
6613 break;
6614
6615 case IP_END:
6616 incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
6617 after = true;
6618 break;
6619
6620 case IP_AFTER_USE:
6621 after = true;
6622 /* fall through */
6623 case IP_BEFORE_USE:
6624 incr_pos = gsi_for_stmt (cand->incremented_at);
6625 break;
6626
6627 case IP_ORIGINAL:
6628 /* Mark that the iv is preserved. */
6629 name_info (data, cand->var_before)->preserve_biv = true;
6630 name_info (data, cand->var_after)->preserve_biv = true;
6631
6632 /* Rewrite the increment so that it uses var_before directly. */
6633 find_interesting_uses_op (data, cand->var_after)->selected = cand;
6634 return;
6635 }
6636
6637 gimple_add_tmp_var (cand->var_before);
6638
6639 base = unshare_expr (cand->iv->base);
6640
6641 create_iv (base, unshare_expr (cand->iv->step),
6642 cand->var_before, data->current_loop,
6643 &incr_pos, after, &cand->var_before, &cand->var_after);
6644 }
6645
6646 /* Creates new induction variables described in SET. */
6647
6648 static void
6649 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
6650 {
6651 unsigned i;
6652 struct iv_cand *cand;
6653 bitmap_iterator bi;
6654
6655 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6656 {
6657 cand = iv_cand (data, i);
6658 create_new_iv (data, cand);
6659 }
6660
6661 if (dump_file && (dump_flags & TDF_DETAILS))
6662 {
6663 fprintf (dump_file, "Selected IV set for loop %d",
6664 data->current_loop->num);
6665 if (data->loop_loc != UNKNOWN_LOCATION)
6666 fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
6667 LOCATION_LINE (data->loop_loc));
6668 fprintf (dump_file, ", %lu IVs:\n", bitmap_count_bits (set->cands));
6669 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6670 {
6671 cand = iv_cand (data, i);
6672 dump_cand (dump_file, cand);
6673 }
6674 fprintf (dump_file, "\n");
6675 }
6676 }
6677
6678 /* Rewrites USE (definition of iv used in a nonlinear expression)
6679 using candidate CAND. */
6680
6681 static void
6682 rewrite_use_nonlinear_expr (struct ivopts_data *data,
6683 struct iv_use *use, struct iv_cand *cand)
6684 {
6685 tree comp;
6686 tree op, tgt;
6687 gassign *ass;
6688 gimple_stmt_iterator bsi;
6689
6690 /* An important special case -- if we are asked to express value of
6691 the original iv by itself, just exit; there is no need to
6692 introduce a new computation (that might also need casting the
6693 variable to unsigned and back). */
6694 if (cand->pos == IP_ORIGINAL
6695 && cand->incremented_at == use->stmt)
6696 {
6697 enum tree_code stmt_code;
6698
6699 gcc_assert (is_gimple_assign (use->stmt));
6700 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
6701
6702 /* Check whether we may leave the computation unchanged.
6703 This is the case only if it does not rely on other
6704 computations in the loop -- otherwise, the computation
6705 we rely upon may be removed in remove_unused_ivs,
6706 thus leading to ICE. */
6707 stmt_code = gimple_assign_rhs_code (use->stmt);
6708 if (stmt_code == PLUS_EXPR
6709 || stmt_code == MINUS_EXPR
6710 || stmt_code == POINTER_PLUS_EXPR)
6711 {
6712 if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
6713 op = gimple_assign_rhs2 (use->stmt);
6714 else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
6715 op = gimple_assign_rhs1 (use->stmt);
6716 else
6717 op = NULL_TREE;
6718 }
6719 else
6720 op = NULL_TREE;
6721
6722 if (op && expr_invariant_in_loop_p (data->current_loop, op))
6723 return;
6724 }
6725
6726 comp = get_computation (data->current_loop, use, cand);
6727 gcc_assert (comp != NULL_TREE);
6728
6729 switch (gimple_code (use->stmt))
6730 {
6731 case GIMPLE_PHI:
6732 tgt = PHI_RESULT (use->stmt);
6733
6734 /* If we should keep the biv, do not replace it. */
6735 if (name_info (data, tgt)->preserve_biv)
6736 return;
6737
6738 bsi = gsi_after_labels (gimple_bb (use->stmt));
6739 break;
6740
6741 case GIMPLE_ASSIGN:
6742 tgt = gimple_assign_lhs (use->stmt);
6743 bsi = gsi_for_stmt (use->stmt);
6744 break;
6745
6746 default:
6747 gcc_unreachable ();
6748 }
6749
6750 if (!valid_gimple_rhs_p (comp)
6751 || (gimple_code (use->stmt) != GIMPLE_PHI
6752 /* We can't allow re-allocating the stmt as it might be pointed
6753 to still. */
6754 && (get_gimple_rhs_num_ops (TREE_CODE (comp))
6755 >= gimple_num_ops (gsi_stmt (bsi)))))
6756 {
6757 comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE,
6758 true, GSI_SAME_STMT);
6759 if (POINTER_TYPE_P (TREE_TYPE (tgt)))
6760 {
6761 duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
6762 /* As this isn't a plain copy we have to reset alignment
6763 information. */
6764 if (SSA_NAME_PTR_INFO (comp))
6765 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
6766 }
6767 }
6768
6769 if (gimple_code (use->stmt) == GIMPLE_PHI)
6770 {
6771 ass = gimple_build_assign (tgt, comp);
6772 gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
6773
6774 bsi = gsi_for_stmt (use->stmt);
6775 remove_phi_node (&bsi, false);
6776 }
6777 else
6778 {
6779 gimple_assign_set_rhs_from_tree (&bsi, comp);
6780 use->stmt = gsi_stmt (bsi);
6781 }
6782 }
6783
6784 /* Performs a peephole optimization to reorder the iv update statement with
6785 a mem ref to enable instruction combining in later phases. The mem ref uses
6786 the iv value before the update, so the reordering transformation requires
6787 adjustment of the offset. CAND is the selected IV_CAND.
6788
6789 Example:
6790
6791 t = MEM_REF (base, iv1, 8, 16); // base, index, stride, offset
6792 iv2 = iv1 + 1;
6793
6794 if (t < val) (1)
6795 goto L;
6796 goto Head;
6797
6798
6799 directly propagating t over to (1) will introduce overlapping live range
6800 thus increase register pressure. This peephole transform it into:
6801
6802
6803 iv2 = iv1 + 1;
6804 t = MEM_REF (base, iv2, 8, 8);
6805 if (t < val)
6806 goto L;
6807 goto Head;
6808 */
6809
6810 static void
6811 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
6812 {
6813 tree var_after;
6814 gimple iv_update, stmt;
6815 basic_block bb;
6816 gimple_stmt_iterator gsi, gsi_iv;
6817
6818 if (cand->pos != IP_NORMAL)
6819 return;
6820
6821 var_after = cand->var_after;
6822 iv_update = SSA_NAME_DEF_STMT (var_after);
6823
6824 bb = gimple_bb (iv_update);
6825 gsi = gsi_last_nondebug_bb (bb);
6826 stmt = gsi_stmt (gsi);
6827
6828 /* Only handle conditional statement for now. */
6829 if (gimple_code (stmt) != GIMPLE_COND)
6830 return;
6831
6832 gsi_prev_nondebug (&gsi);
6833 stmt = gsi_stmt (gsi);
6834 if (stmt != iv_update)
6835 return;
6836
6837 gsi_prev_nondebug (&gsi);
6838 if (gsi_end_p (gsi))
6839 return;
6840
6841 stmt = gsi_stmt (gsi);
6842 if (gimple_code (stmt) != GIMPLE_ASSIGN)
6843 return;
6844
6845 if (stmt != use->stmt)
6846 return;
6847
6848 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
6849 return;
6850
6851 if (dump_file && (dump_flags & TDF_DETAILS))
6852 {
6853 fprintf (dump_file, "Reordering \n");
6854 print_gimple_stmt (dump_file, iv_update, 0, 0);
6855 print_gimple_stmt (dump_file, use->stmt, 0, 0);
6856 fprintf (dump_file, "\n");
6857 }
6858
6859 gsi = gsi_for_stmt (use->stmt);
6860 gsi_iv = gsi_for_stmt (iv_update);
6861 gsi_move_before (&gsi_iv, &gsi);
6862
6863 cand->pos = IP_BEFORE_USE;
6864 cand->incremented_at = use->stmt;
6865 }
6866
6867 /* Rewrites USE (address that is an iv) using candidate CAND. */
6868
6869 static void
6870 rewrite_use_address_1 (struct ivopts_data *data,
6871 struct iv_use *use, struct iv_cand *cand)
6872 {
6873 aff_tree aff;
6874 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6875 tree base_hint = NULL_TREE;
6876 tree ref, iv;
6877 bool ok;
6878
6879 adjust_iv_update_pos (cand, use);
6880 ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
6881 gcc_assert (ok);
6882 unshare_aff_combination (&aff);
6883
6884 /* To avoid undefined overflow problems, all IV candidates use unsigned
6885 integer types. The drawback is that this makes it impossible for
6886 create_mem_ref to distinguish an IV that is based on a memory object
6887 from one that represents simply an offset.
6888
6889 To work around this problem, we pass a hint to create_mem_ref that
6890 indicates which variable (if any) in aff is an IV based on a memory
6891 object. Note that we only consider the candidate. If this is not
6892 based on an object, the base of the reference is in some subexpression
6893 of the use -- but these will use pointer types, so they are recognized
6894 by the create_mem_ref heuristics anyway. */
6895 if (cand->iv->base_object)
6896 base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
6897
6898 iv = var_at_stmt (data->current_loop, cand, use->stmt);
6899 ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff,
6900 reference_alias_ptr_type (*use->op_p),
6901 iv, base_hint, data->speed);
6902 copy_ref_info (ref, *use->op_p);
6903 *use->op_p = ref;
6904 }
6905
6906 /* Rewrites USE (address that is an iv) using candidate CAND. If it's the
6907 first use of a group, rewrites sub uses in the group too. */
6908
6909 static void
6910 rewrite_use_address (struct ivopts_data *data,
6911 struct iv_use *use, struct iv_cand *cand)
6912 {
6913 struct iv_use *next;
6914
6915 gcc_assert (use->sub_id == 0);
6916 rewrite_use_address_1 (data, use, cand);
6917 update_stmt (use->stmt);
6918
6919 for (next = use->next; next != NULL; next = next->next)
6920 {
6921 rewrite_use_address_1 (data, next, cand);
6922 update_stmt (next->stmt);
6923 }
6924
6925 return;
6926 }
6927
6928 /* Rewrites USE (the condition such that one of the arguments is an iv) using
6929 candidate CAND. */
6930
6931 static void
6932 rewrite_use_compare (struct ivopts_data *data,
6933 struct iv_use *use, struct iv_cand *cand)
6934 {
6935 tree comp, *var_p, op, bound;
6936 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6937 enum tree_code compare;
6938 struct cost_pair *cp = get_use_iv_cost (data, use, cand);
6939 bool ok;
6940
6941 bound = cp->value;
6942 if (bound)
6943 {
6944 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
6945 tree var_type = TREE_TYPE (var);
6946 gimple_seq stmts;
6947
6948 if (dump_file && (dump_flags & TDF_DETAILS))
6949 {
6950 fprintf (dump_file, "Replacing exit test: ");
6951 print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
6952 }
6953 compare = cp->comp;
6954 bound = unshare_expr (fold_convert (var_type, bound));
6955 op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
6956 if (stmts)
6957 gsi_insert_seq_on_edge_immediate (
6958 loop_preheader_edge (data->current_loop),
6959 stmts);
6960
6961 gcond *cond_stmt = as_a <gcond *> (use->stmt);
6962 gimple_cond_set_lhs (cond_stmt, var);
6963 gimple_cond_set_code (cond_stmt, compare);
6964 gimple_cond_set_rhs (cond_stmt, op);
6965 return;
6966 }
6967
6968 /* The induction variable elimination failed; just express the original
6969 giv. */
6970 comp = get_computation (data->current_loop, use, cand);
6971 gcc_assert (comp != NULL_TREE);
6972
6973 ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
6974 gcc_assert (ok);
6975
6976 *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
6977 true, GSI_SAME_STMT);
6978 }
6979
6980 /* Rewrites USE using candidate CAND. */
6981
6982 static void
6983 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
6984 {
6985 switch (use->type)
6986 {
6987 case USE_NONLINEAR_EXPR:
6988 rewrite_use_nonlinear_expr (data, use, cand);
6989 break;
6990
6991 case USE_ADDRESS:
6992 rewrite_use_address (data, use, cand);
6993 break;
6994
6995 case USE_COMPARE:
6996 rewrite_use_compare (data, use, cand);
6997 break;
6998
6999 default:
7000 gcc_unreachable ();
7001 }
7002
7003 update_stmt (use->stmt);
7004 }
7005
7006 /* Rewrite the uses using the selected induction variables. */
7007
7008 static void
7009 rewrite_uses (struct ivopts_data *data)
7010 {
7011 unsigned i;
7012 struct iv_cand *cand;
7013 struct iv_use *use;
7014
7015 for (i = 0; i < n_iv_uses (data); i++)
7016 {
7017 use = iv_use (data, i);
7018 cand = use->selected;
7019 gcc_assert (cand);
7020
7021 rewrite_use (data, use, cand);
7022 }
7023 }
7024
7025 /* Removes the ivs that are not used after rewriting. */
7026
7027 static void
7028 remove_unused_ivs (struct ivopts_data *data)
7029 {
7030 unsigned j;
7031 bitmap_iterator bi;
7032 bitmap toremove = BITMAP_ALLOC (NULL);
7033
7034 /* Figure out an order in which to release SSA DEFs so that we don't
7035 release something that we'd have to propagate into a debug stmt
7036 afterwards. */
7037 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
7038 {
7039 struct version_info *info;
7040
7041 info = ver_info (data, j);
7042 if (info->iv
7043 && !integer_zerop (info->iv->step)
7044 && !info->inv_id
7045 && !info->iv->have_use_for
7046 && !info->preserve_biv)
7047 {
7048 bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
7049
7050 tree def = info->iv->ssa_name;
7051
7052 if (MAY_HAVE_DEBUG_STMTS && SSA_NAME_DEF_STMT (def))
7053 {
7054 imm_use_iterator imm_iter;
7055 use_operand_p use_p;
7056 gimple stmt;
7057 int count = 0;
7058
7059 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7060 {
7061 if (!gimple_debug_bind_p (stmt))
7062 continue;
7063
7064 /* We just want to determine whether to do nothing
7065 (count == 0), to substitute the computed
7066 expression into a single use of the SSA DEF by
7067 itself (count == 1), or to use a debug temp
7068 because the SSA DEF is used multiple times or as
7069 part of a larger expression (count > 1). */
7070 count++;
7071 if (gimple_debug_bind_get_value (stmt) != def)
7072 count++;
7073
7074 if (count > 1)
7075 BREAK_FROM_IMM_USE_STMT (imm_iter);
7076 }
7077
7078 if (!count)
7079 continue;
7080
7081 struct iv_use dummy_use;
7082 struct iv_cand *best_cand = NULL, *cand;
7083 unsigned i, best_pref = 0, cand_pref;
7084
7085 memset (&dummy_use, 0, sizeof (dummy_use));
7086 dummy_use.iv = info->iv;
7087 for (i = 0; i < n_iv_uses (data) && i < 64; i++)
7088 {
7089 cand = iv_use (data, i)->selected;
7090 if (cand == best_cand)
7091 continue;
7092 cand_pref = operand_equal_p (cand->iv->step,
7093 info->iv->step, 0)
7094 ? 4 : 0;
7095 cand_pref
7096 += TYPE_MODE (TREE_TYPE (cand->iv->base))
7097 == TYPE_MODE (TREE_TYPE (info->iv->base))
7098 ? 2 : 0;
7099 cand_pref
7100 += TREE_CODE (cand->iv->base) == INTEGER_CST
7101 ? 1 : 0;
7102 if (best_cand == NULL || best_pref < cand_pref)
7103 {
7104 best_cand = cand;
7105 best_pref = cand_pref;
7106 }
7107 }
7108
7109 if (!best_cand)
7110 continue;
7111
7112 tree comp = get_computation_at (data->current_loop,
7113 &dummy_use, best_cand,
7114 SSA_NAME_DEF_STMT (def));
7115 if (!comp)
7116 continue;
7117
7118 if (count > 1)
7119 {
7120 tree vexpr = make_node (DEBUG_EXPR_DECL);
7121 DECL_ARTIFICIAL (vexpr) = 1;
7122 TREE_TYPE (vexpr) = TREE_TYPE (comp);
7123 if (SSA_NAME_VAR (def))
7124 DECL_MODE (vexpr) = DECL_MODE (SSA_NAME_VAR (def));
7125 else
7126 DECL_MODE (vexpr) = TYPE_MODE (TREE_TYPE (vexpr));
7127 gdebug *def_temp
7128 = gimple_build_debug_bind (vexpr, comp, NULL);
7129 gimple_stmt_iterator gsi;
7130
7131 if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
7132 gsi = gsi_after_labels (gimple_bb
7133 (SSA_NAME_DEF_STMT (def)));
7134 else
7135 gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
7136
7137 gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
7138 comp = vexpr;
7139 }
7140
7141 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7142 {
7143 if (!gimple_debug_bind_p (stmt))
7144 continue;
7145
7146 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
7147 SET_USE (use_p, comp);
7148
7149 update_stmt (stmt);
7150 }
7151 }
7152 }
7153 }
7154
7155 release_defs_bitset (toremove);
7156
7157 BITMAP_FREE (toremove);
7158 }
7159
7160 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
7161 for hash_map::traverse. */
7162
7163 bool
7164 free_tree_niter_desc (edge const &, tree_niter_desc *const &value, void *)
7165 {
7166 free (value);
7167 return true;
7168 }
7169
7170 /* Frees data allocated by the optimization of a single loop. */
7171
7172 static void
7173 free_loop_data (struct ivopts_data *data)
7174 {
7175 unsigned i, j;
7176 bitmap_iterator bi;
7177 tree obj;
7178
7179 if (data->niters)
7180 {
7181 data->niters->traverse<void *, free_tree_niter_desc> (NULL);
7182 delete data->niters;
7183 data->niters = NULL;
7184 }
7185
7186 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
7187 {
7188 struct version_info *info;
7189
7190 info = ver_info (data, i);
7191 free (info->iv);
7192 info->iv = NULL;
7193 info->has_nonlin_use = false;
7194 info->preserve_biv = false;
7195 info->inv_id = 0;
7196 }
7197 bitmap_clear (data->relevant);
7198 bitmap_clear (data->important_candidates);
7199
7200 for (i = 0; i < n_iv_uses (data); i++)
7201 {
7202 struct iv_use *use = iv_use (data, i);
7203 struct iv_use *pre = use, *sub = use->next;
7204
7205 while (sub)
7206 {
7207 gcc_assert (sub->related_cands == NULL);
7208 gcc_assert (sub->n_map_members == 0 && sub->cost_map == NULL);
7209
7210 free (sub->iv);
7211 pre = sub;
7212 sub = sub->next;
7213 free (pre);
7214 }
7215
7216 free (use->iv);
7217 BITMAP_FREE (use->related_cands);
7218 for (j = 0; j < use->n_map_members; j++)
7219 if (use->cost_map[j].depends_on)
7220 BITMAP_FREE (use->cost_map[j].depends_on);
7221 free (use->cost_map);
7222 free (use);
7223 }
7224 data->iv_uses.truncate (0);
7225
7226 for (i = 0; i < n_iv_cands (data); i++)
7227 {
7228 struct iv_cand *cand = iv_cand (data, i);
7229
7230 free (cand->iv);
7231 if (cand->depends_on)
7232 BITMAP_FREE (cand->depends_on);
7233 free (cand);
7234 }
7235 data->iv_candidates.truncate (0);
7236
7237 if (data->version_info_size < num_ssa_names)
7238 {
7239 data->version_info_size = 2 * num_ssa_names;
7240 free (data->version_info);
7241 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
7242 }
7243
7244 data->max_inv_id = 0;
7245
7246 FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
7247 SET_DECL_RTL (obj, NULL_RTX);
7248
7249 decl_rtl_to_reset.truncate (0);
7250
7251 data->inv_expr_tab->empty ();
7252 data->inv_expr_id = 0;
7253 }
7254
7255 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
7256 loop tree. */
7257
7258 static void
7259 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
7260 {
7261 free_loop_data (data);
7262 free (data->version_info);
7263 BITMAP_FREE (data->relevant);
7264 BITMAP_FREE (data->important_candidates);
7265
7266 decl_rtl_to_reset.release ();
7267 data->iv_uses.release ();
7268 data->iv_candidates.release ();
7269 delete data->inv_expr_tab;
7270 data->inv_expr_tab = NULL;
7271 free_affine_expand_cache (&data->name_expansion_cache);
7272 }
7273
7274 /* Returns true if the loop body BODY includes any function calls. */
7275
7276 static bool
7277 loop_body_includes_call (basic_block *body, unsigned num_nodes)
7278 {
7279 gimple_stmt_iterator gsi;
7280 unsigned i;
7281
7282 for (i = 0; i < num_nodes; i++)
7283 for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
7284 {
7285 gimple stmt = gsi_stmt (gsi);
7286 if (is_gimple_call (stmt)
7287 && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
7288 return true;
7289 }
7290 return false;
7291 }
7292
7293 /* Optimizes the LOOP. Returns true if anything changed. */
7294
7295 static bool
7296 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
7297 {
7298 bool changed = false;
7299 struct iv_ca *iv_ca;
7300 edge exit = single_dom_exit (loop);
7301 basic_block *body;
7302
7303 gcc_assert (!data->niters);
7304 data->current_loop = loop;
7305 data->loop_loc = find_loop_location (loop);
7306 data->speed = optimize_loop_for_speed_p (loop);
7307
7308 if (dump_file && (dump_flags & TDF_DETAILS))
7309 {
7310 fprintf (dump_file, "Processing loop %d", loop->num);
7311 if (data->loop_loc != UNKNOWN_LOCATION)
7312 fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
7313 LOCATION_LINE (data->loop_loc));
7314 fprintf (dump_file, "\n");
7315
7316 if (exit)
7317 {
7318 fprintf (dump_file, " single exit %d -> %d, exit condition ",
7319 exit->src->index, exit->dest->index);
7320 print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
7321 fprintf (dump_file, "\n");
7322 }
7323
7324 fprintf (dump_file, "\n");
7325 }
7326
7327 body = get_loop_body (loop);
7328 data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
7329 renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
7330 free (body);
7331
7332 data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
7333
7334 /* For each ssa name determines whether it behaves as an induction variable
7335 in some loop. */
7336 if (!find_induction_variables (data))
7337 goto finish;
7338
7339 /* Finds interesting uses (item 1). */
7340 find_interesting_uses (data);
7341 group_address_uses (data);
7342 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
7343 goto finish;
7344
7345 /* Finds candidates for the induction variables (item 2). */
7346 find_iv_candidates (data);
7347
7348 /* Calculates the costs (item 3, part 1). */
7349 determine_iv_costs (data);
7350 determine_use_iv_costs (data);
7351 determine_set_costs (data);
7352
7353 /* Find the optimal set of induction variables (item 3, part 2). */
7354 iv_ca = find_optimal_iv_set (data);
7355 if (!iv_ca)
7356 goto finish;
7357 changed = true;
7358
7359 /* Create the new induction variables (item 4, part 1). */
7360 create_new_ivs (data, iv_ca);
7361 iv_ca_free (&iv_ca);
7362
7363 /* Rewrite the uses (item 4, part 2). */
7364 rewrite_uses (data);
7365
7366 /* Remove the ivs that are unused after rewriting. */
7367 remove_unused_ivs (data);
7368
7369 /* We have changed the structure of induction variables; it might happen
7370 that definitions in the scev database refer to some of them that were
7371 eliminated. */
7372 scev_reset ();
7373
7374 finish:
7375 free_loop_data (data);
7376
7377 return changed;
7378 }
7379
7380 /* Main entry point. Optimizes induction variables in loops. */
7381
7382 void
7383 tree_ssa_iv_optimize (void)
7384 {
7385 struct loop *loop;
7386 struct ivopts_data data;
7387
7388 tree_ssa_iv_optimize_init (&data);
7389
7390 /* Optimize the loops starting with the innermost ones. */
7391 FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
7392 {
7393 if (dump_file && (dump_flags & TDF_DETAILS))
7394 flow_loop_dump (loop, dump_file, NULL, 1);
7395
7396 tree_ssa_iv_optimize_loop (&data, loop);
7397 }
7398
7399 tree_ssa_iv_optimize_finalize (&data);
7400 }