]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/tree-ssa-loop-ivopts.c
2015-06-25 Andrew MacLeod <amacleod@redhat.com>
[thirdparty/gcc.git] / gcc / tree-ssa-loop-ivopts.c
1 /* Induction variable optimizations.
2 Copyright (C) 2003-2015 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
10
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
19
20 /* This pass tries to find the optimal set of induction variables for the loop.
21 It optimizes just the basic linear induction variables (although adding
22 support for other types should not be too hard). It includes the
23 optimizations commonly known as strength reduction, induction variable
24 coalescing and induction variable elimination. It does it in the
25 following steps:
26
27 1) The interesting uses of induction variables are found. This includes
28
29 -- uses of induction variables in non-linear expressions
30 -- addresses of arrays
31 -- comparisons of induction variables
32
33 2) Candidates for the induction variables are found. This includes
34
35 -- old induction variables
36 -- the variables defined by expressions derived from the "interesting
37 uses" above
38
39 3) The optimal (w.r. to a cost function) set of variables is chosen. The
40 cost function assigns a cost to sets of induction variables and consists
41 of three parts:
42
43 -- The use costs. Each of the interesting uses chooses the best induction
44 variable in the set and adds its cost to the sum. The cost reflects
45 the time spent on modifying the induction variables value to be usable
46 for the given purpose (adding base and offset for arrays, etc.).
47 -- The variable costs. Each of the variables has a cost assigned that
48 reflects the costs associated with incrementing the value of the
49 variable. The original variables are somewhat preferred.
50 -- The set cost. Depending on the size of the set, extra cost may be
51 added to reflect register pressure.
52
53 All the costs are defined in a machine-specific way, using the target
54 hooks and machine descriptions to determine them.
55
56 4) The trees are transformed to use the new variables, the dead code is
57 removed.
58
59 All of this is done loop by loop. Doing it globally is theoretically
60 possible, it might give a better performance and it might enable us
61 to decide costs more precisely, but getting all the interactions right
62 would be complicated. */
63
64 #include "config.h"
65 #include "system.h"
66 #include "coretypes.h"
67 #include "tm.h"
68 #include "alias.h"
69 #include "symtab.h"
70 #include "tree.h"
71 #include "fold-const.h"
72 #include "stor-layout.h"
73 #include "tm_p.h"
74 #include "predict.h"
75 #include "hard-reg-set.h"
76 #include "function.h"
77 #include "dominance.h"
78 #include "cfg.h"
79 #include "basic-block.h"
80 #include "gimple-pretty-print.h"
81 #include "tree-ssa-alias.h"
82 #include "internal-fn.h"
83 #include "tree-eh.h"
84 #include "gimple-expr.h"
85 #include "gimple.h"
86 #include "gimplify.h"
87 #include "gimple-iterator.h"
88 #include "gimplify-me.h"
89 #include "gimple-ssa.h"
90 #include "cgraph.h"
91 #include "tree-cfg.h"
92 #include "tree-phinodes.h"
93 #include "ssa-iterators.h"
94 #include "stringpool.h"
95 #include "tree-ssanames.h"
96 #include "tree-ssa-loop-ivopts.h"
97 #include "tree-ssa-loop-manip.h"
98 #include "tree-ssa-loop-niter.h"
99 #include "tree-ssa-loop.h"
100 #include "rtl.h"
101 #include "flags.h"
102 #include "insn-config.h"
103 #include "expmed.h"
104 #include "dojump.h"
105 #include "explow.h"
106 #include "calls.h"
107 #include "emit-rtl.h"
108 #include "varasm.h"
109 #include "stmt.h"
110 #include "expr.h"
111 #include "tree-dfa.h"
112 #include "tree-ssa.h"
113 #include "cfgloop.h"
114 #include "tree-pass.h"
115 #include "tree-chrec.h"
116 #include "tree-scalar-evolution.h"
117 #include "params.h"
118 #include "langhooks.h"
119 #include "tree-affine.h"
120 #include "target.h"
121 #include "tree-inline.h"
122 #include "tree-ssa-propagate.h"
123 #include "tree-ssa-address.h"
124 #include "builtins.h"
125 #include "tree-vectorizer.h"
126
127 /* FIXME: Expressions are expanded to RTL in this pass to determine the
128 cost of different addressing modes. This should be moved to a TBD
129 interface between the GIMPLE and RTL worlds. */
130 #include "recog.h"
131
132 /* The infinite cost. */
133 #define INFTY 10000000
134
135 #define AVG_LOOP_NITER(LOOP) 5
136
137 /* Returns the expected number of loop iterations for LOOP.
138 The average trip count is computed from profile data if it
139 exists. */
140
141 static inline HOST_WIDE_INT
142 avg_loop_niter (struct loop *loop)
143 {
144 HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
145 if (niter == -1)
146 return AVG_LOOP_NITER (loop);
147
148 return niter;
149 }
150
151 /* Representation of the induction variable. */
152 struct iv
153 {
154 tree base; /* Initial value of the iv. */
155 tree base_object; /* A memory object to that the induction variable points. */
156 tree step; /* Step of the iv (constant only). */
157 tree ssa_name; /* The ssa name with the value. */
158 unsigned use_id; /* The identifier in the use if it is the case. */
159 bool biv_p; /* Is it a biv? */
160 bool have_use_for; /* Do we already have a use for it? */
161 bool no_overflow; /* True if the iv doesn't overflow. */
162 };
163
164 /* Per-ssa version information (induction variable descriptions, etc.). */
165 struct version_info
166 {
167 tree name; /* The ssa name. */
168 struct iv *iv; /* Induction variable description. */
169 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
170 an expression that is not an induction variable. */
171 bool preserve_biv; /* For the original biv, whether to preserve it. */
172 unsigned inv_id; /* Id of an invariant. */
173 };
174
175 /* Types of uses. */
176 enum use_type
177 {
178 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
179 USE_ADDRESS, /* Use in an address. */
180 USE_COMPARE /* Use is a compare. */
181 };
182
183 /* Cost of a computation. */
184 typedef struct
185 {
186 int cost; /* The runtime cost. */
187 unsigned complexity; /* The estimate of the complexity of the code for
188 the computation (in no concrete units --
189 complexity field should be larger for more
190 complex expressions and addressing modes). */
191 } comp_cost;
192
193 static const comp_cost no_cost = {0, 0};
194 static const comp_cost infinite_cost = {INFTY, INFTY};
195
196 /* The candidate - cost pair. */
197 struct cost_pair
198 {
199 struct iv_cand *cand; /* The candidate. */
200 comp_cost cost; /* The cost. */
201 bitmap depends_on; /* The list of invariants that have to be
202 preserved. */
203 tree value; /* For final value elimination, the expression for
204 the final value of the iv. For iv elimination,
205 the new bound to compare with. */
206 enum tree_code comp; /* For iv elimination, the comparison. */
207 int inv_expr_id; /* Loop invariant expression id. */
208 };
209
210 /* Use. */
211 struct iv_use
212 {
213 unsigned id; /* The id of the use. */
214 unsigned sub_id; /* The id of the sub use. */
215 enum use_type type; /* Type of the use. */
216 struct iv *iv; /* The induction variable it is based on. */
217 gimple stmt; /* Statement in that it occurs. */
218 tree *op_p; /* The place where it occurs. */
219 bitmap related_cands; /* The set of "related" iv candidates, plus the common
220 important ones. */
221
222 unsigned n_map_members; /* Number of candidates in the cost_map list. */
223 struct cost_pair *cost_map;
224 /* The costs wrto the iv candidates. */
225
226 struct iv_cand *selected;
227 /* The selected candidate. */
228
229 struct iv_use *next; /* The next sub use. */
230 tree addr_base; /* Base address with const offset stripped. */
231 unsigned HOST_WIDE_INT addr_offset;
232 /* Const offset stripped from base address. */
233 };
234
235 /* The position where the iv is computed. */
236 enum iv_position
237 {
238 IP_NORMAL, /* At the end, just before the exit condition. */
239 IP_END, /* At the end of the latch block. */
240 IP_BEFORE_USE, /* Immediately before a specific use. */
241 IP_AFTER_USE, /* Immediately after a specific use. */
242 IP_ORIGINAL /* The original biv. */
243 };
244
245 /* The induction variable candidate. */
246 struct iv_cand
247 {
248 unsigned id; /* The number of the candidate. */
249 bool important; /* Whether this is an "important" candidate, i.e. such
250 that it should be considered by all uses. */
251 ENUM_BITFIELD(iv_position) pos : 8; /* Where it is computed. */
252 gimple incremented_at;/* For original biv, the statement where it is
253 incremented. */
254 tree var_before; /* The variable used for it before increment. */
255 tree var_after; /* The variable used for it after increment. */
256 struct iv *iv; /* The value of the candidate. NULL for
257 "pseudocandidate" used to indicate the possibility
258 to replace the final value of an iv by direct
259 computation of the value. */
260 unsigned cost; /* Cost of the candidate. */
261 unsigned cost_step; /* Cost of the candidate's increment operation. */
262 struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
263 where it is incremented. */
264 bitmap depends_on; /* The list of invariants that are used in step of the
265 biv. */
266 };
267
268 /* Loop invariant expression hashtable entry. */
269 struct iv_inv_expr_ent
270 {
271 tree expr;
272 int id;
273 hashval_t hash;
274 };
275
276 /* The data used by the induction variable optimizations. */
277
278 typedef struct iv_use *iv_use_p;
279
280 typedef struct iv_cand *iv_cand_p;
281
282 /* Hashtable helpers. */
283
284 struct iv_inv_expr_hasher : free_ptr_hash <iv_inv_expr_ent>
285 {
286 static inline hashval_t hash (const iv_inv_expr_ent *);
287 static inline bool equal (const iv_inv_expr_ent *, const iv_inv_expr_ent *);
288 };
289
290 /* Hash function for loop invariant expressions. */
291
292 inline hashval_t
293 iv_inv_expr_hasher::hash (const iv_inv_expr_ent *expr)
294 {
295 return expr->hash;
296 }
297
298 /* Hash table equality function for expressions. */
299
300 inline bool
301 iv_inv_expr_hasher::equal (const iv_inv_expr_ent *expr1,
302 const iv_inv_expr_ent *expr2)
303 {
304 return expr1->hash == expr2->hash
305 && operand_equal_p (expr1->expr, expr2->expr, 0);
306 }
307
308 struct ivopts_data
309 {
310 /* The currently optimized loop. */
311 struct loop *current_loop;
312 source_location loop_loc;
313
314 /* Numbers of iterations for all exits of the current loop. */
315 hash_map<edge, tree_niter_desc *> *niters;
316
317 /* Number of registers used in it. */
318 unsigned regs_used;
319
320 /* The size of version_info array allocated. */
321 unsigned version_info_size;
322
323 /* The array of information for the ssa names. */
324 struct version_info *version_info;
325
326 /* The hashtable of loop invariant expressions created
327 by ivopt. */
328 hash_table<iv_inv_expr_hasher> *inv_expr_tab;
329
330 /* Loop invariant expression id. */
331 int inv_expr_id;
332
333 /* The bitmap of indices in version_info whose value was changed. */
334 bitmap relevant;
335
336 /* The uses of induction variables. */
337 vec<iv_use_p> iv_uses;
338
339 /* The candidates. */
340 vec<iv_cand_p> iv_candidates;
341
342 /* A bitmap of important candidates. */
343 bitmap important_candidates;
344
345 /* Cache used by tree_to_aff_combination_expand. */
346 hash_map<tree, name_expansion *> *name_expansion_cache;
347
348 /* The maximum invariant id. */
349 unsigned max_inv_id;
350
351 /* Whether to consider just related and important candidates when replacing a
352 use. */
353 bool consider_all_candidates;
354
355 /* Are we optimizing for speed? */
356 bool speed;
357
358 /* Whether the loop body includes any function calls. */
359 bool body_includes_call;
360
361 /* Whether the loop body can only be exited via single exit. */
362 bool loop_single_exit_p;
363 };
364
365 /* An assignment of iv candidates to uses. */
366
367 struct iv_ca
368 {
369 /* The number of uses covered by the assignment. */
370 unsigned upto;
371
372 /* Number of uses that cannot be expressed by the candidates in the set. */
373 unsigned bad_uses;
374
375 /* Candidate assigned to a use, together with the related costs. */
376 struct cost_pair **cand_for_use;
377
378 /* Number of times each candidate is used. */
379 unsigned *n_cand_uses;
380
381 /* The candidates used. */
382 bitmap cands;
383
384 /* The number of candidates in the set. */
385 unsigned n_cands;
386
387 /* Total number of registers needed. */
388 unsigned n_regs;
389
390 /* Total cost of expressing uses. */
391 comp_cost cand_use_cost;
392
393 /* Total cost of candidates. */
394 unsigned cand_cost;
395
396 /* Number of times each invariant is used. */
397 unsigned *n_invariant_uses;
398
399 /* The array holding the number of uses of each loop
400 invariant expressions created by ivopt. */
401 unsigned *used_inv_expr;
402
403 /* The number of created loop invariants. */
404 unsigned num_used_inv_expr;
405
406 /* Total cost of the assignment. */
407 comp_cost cost;
408 };
409
410 /* Difference of two iv candidate assignments. */
411
412 struct iv_ca_delta
413 {
414 /* Changed use. */
415 struct iv_use *use;
416
417 /* An old assignment (for rollback purposes). */
418 struct cost_pair *old_cp;
419
420 /* A new assignment. */
421 struct cost_pair *new_cp;
422
423 /* Next change in the list. */
424 struct iv_ca_delta *next_change;
425 };
426
427 /* Bound on number of candidates below that all candidates are considered. */
428
429 #define CONSIDER_ALL_CANDIDATES_BOUND \
430 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
431
432 /* If there are more iv occurrences, we just give up (it is quite unlikely that
433 optimizing such a loop would help, and it would take ages). */
434
435 #define MAX_CONSIDERED_USES \
436 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
437
438 /* If there are at most this number of ivs in the set, try removing unnecessary
439 ivs from the set always. */
440
441 #define ALWAYS_PRUNE_CAND_SET_BOUND \
442 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
443
444 /* The list of trees for that the decl_rtl field must be reset is stored
445 here. */
446
447 static vec<tree> decl_rtl_to_reset;
448
449 static comp_cost force_expr_to_var_cost (tree, bool);
450
451 /* Number of uses recorded in DATA. */
452
453 static inline unsigned
454 n_iv_uses (struct ivopts_data *data)
455 {
456 return data->iv_uses.length ();
457 }
458
459 /* Ith use recorded in DATA. */
460
461 static inline struct iv_use *
462 iv_use (struct ivopts_data *data, unsigned i)
463 {
464 return data->iv_uses[i];
465 }
466
467 /* Number of candidates recorded in DATA. */
468
469 static inline unsigned
470 n_iv_cands (struct ivopts_data *data)
471 {
472 return data->iv_candidates.length ();
473 }
474
475 /* Ith candidate recorded in DATA. */
476
477 static inline struct iv_cand *
478 iv_cand (struct ivopts_data *data, unsigned i)
479 {
480 return data->iv_candidates[i];
481 }
482
483 /* The single loop exit if it dominates the latch, NULL otherwise. */
484
485 edge
486 single_dom_exit (struct loop *loop)
487 {
488 edge exit = single_exit (loop);
489
490 if (!exit)
491 return NULL;
492
493 if (!just_once_each_iteration_p (loop, exit->src))
494 return NULL;
495
496 return exit;
497 }
498
499 /* Dumps information about the induction variable IV to FILE. */
500
501 void
502 dump_iv (FILE *file, struct iv *iv, bool dump_name)
503 {
504 if (iv->ssa_name && dump_name)
505 {
506 fprintf (file, "ssa name ");
507 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
508 fprintf (file, "\n");
509 }
510
511 fprintf (file, " type ");
512 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
513 fprintf (file, "\n");
514
515 if (iv->step)
516 {
517 fprintf (file, " base ");
518 print_generic_expr (file, iv->base, TDF_SLIM);
519 fprintf (file, "\n");
520
521 fprintf (file, " step ");
522 print_generic_expr (file, iv->step, TDF_SLIM);
523 fprintf (file, "\n");
524 }
525 else
526 {
527 fprintf (file, " invariant ");
528 print_generic_expr (file, iv->base, TDF_SLIM);
529 fprintf (file, "\n");
530 }
531
532 if (iv->base_object)
533 {
534 fprintf (file, " base object ");
535 print_generic_expr (file, iv->base_object, TDF_SLIM);
536 fprintf (file, "\n");
537 }
538
539 if (iv->biv_p)
540 fprintf (file, " is a biv\n");
541 }
542
543 /* Dumps information about the USE to FILE. */
544
545 void
546 dump_use (FILE *file, struct iv_use *use)
547 {
548 fprintf (file, "use %d", use->id);
549 if (use->sub_id)
550 fprintf (file, ".%d", use->sub_id);
551
552 fprintf (file, "\n");
553
554 switch (use->type)
555 {
556 case USE_NONLINEAR_EXPR:
557 fprintf (file, " generic\n");
558 break;
559
560 case USE_ADDRESS:
561 fprintf (file, " address\n");
562 break;
563
564 case USE_COMPARE:
565 fprintf (file, " compare\n");
566 break;
567
568 default:
569 gcc_unreachable ();
570 }
571
572 fprintf (file, " in statement ");
573 print_gimple_stmt (file, use->stmt, 0, 0);
574 fprintf (file, "\n");
575
576 fprintf (file, " at position ");
577 if (use->op_p)
578 print_generic_expr (file, *use->op_p, TDF_SLIM);
579 fprintf (file, "\n");
580
581 dump_iv (file, use->iv, false);
582
583 if (use->related_cands)
584 {
585 fprintf (file, " related candidates ");
586 dump_bitmap (file, use->related_cands);
587 }
588 }
589
590 /* Dumps information about the uses to FILE. */
591
592 void
593 dump_uses (FILE *file, struct ivopts_data *data)
594 {
595 unsigned i;
596 struct iv_use *use;
597
598 for (i = 0; i < n_iv_uses (data); i++)
599 {
600 use = iv_use (data, i);
601 do
602 {
603 dump_use (file, use);
604 use = use->next;
605 }
606 while (use);
607 fprintf (file, "\n");
608 }
609 }
610
611 /* Dumps information about induction variable candidate CAND to FILE. */
612
613 void
614 dump_cand (FILE *file, struct iv_cand *cand)
615 {
616 struct iv *iv = cand->iv;
617
618 fprintf (file, "candidate %d%s\n",
619 cand->id, cand->important ? " (important)" : "");
620
621 if (cand->depends_on)
622 {
623 fprintf (file, " depends on ");
624 dump_bitmap (file, cand->depends_on);
625 }
626
627 if (!iv)
628 {
629 fprintf (file, " final value replacement\n");
630 return;
631 }
632
633 if (cand->var_before)
634 {
635 fprintf (file, " var_before ");
636 print_generic_expr (file, cand->var_before, TDF_SLIM);
637 fprintf (file, "\n");
638 }
639 if (cand->var_after)
640 {
641 fprintf (file, " var_after ");
642 print_generic_expr (file, cand->var_after, TDF_SLIM);
643 fprintf (file, "\n");
644 }
645
646 switch (cand->pos)
647 {
648 case IP_NORMAL:
649 fprintf (file, " incremented before exit test\n");
650 break;
651
652 case IP_BEFORE_USE:
653 fprintf (file, " incremented before use %d\n", cand->ainc_use->id);
654 break;
655
656 case IP_AFTER_USE:
657 fprintf (file, " incremented after use %d\n", cand->ainc_use->id);
658 break;
659
660 case IP_END:
661 fprintf (file, " incremented at end\n");
662 break;
663
664 case IP_ORIGINAL:
665 fprintf (file, " original biv\n");
666 break;
667 }
668
669 dump_iv (file, iv, false);
670 }
671
672 /* Returns the info for ssa version VER. */
673
674 static inline struct version_info *
675 ver_info (struct ivopts_data *data, unsigned ver)
676 {
677 return data->version_info + ver;
678 }
679
680 /* Returns the info for ssa name NAME. */
681
682 static inline struct version_info *
683 name_info (struct ivopts_data *data, tree name)
684 {
685 return ver_info (data, SSA_NAME_VERSION (name));
686 }
687
688 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
689 emitted in LOOP. */
690
691 static bool
692 stmt_after_ip_normal_pos (struct loop *loop, gimple stmt)
693 {
694 basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
695
696 gcc_assert (bb);
697
698 if (sbb == loop->latch)
699 return true;
700
701 if (sbb != bb)
702 return false;
703
704 return stmt == last_stmt (bb);
705 }
706
707 /* Returns true if STMT if after the place where the original induction
708 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
709 if the positions are identical. */
710
711 static bool
712 stmt_after_inc_pos (struct iv_cand *cand, gimple stmt, bool true_if_equal)
713 {
714 basic_block cand_bb = gimple_bb (cand->incremented_at);
715 basic_block stmt_bb = gimple_bb (stmt);
716
717 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
718 return false;
719
720 if (stmt_bb != cand_bb)
721 return true;
722
723 if (true_if_equal
724 && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
725 return true;
726 return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
727 }
728
729 /* Returns true if STMT if after the place where the induction variable
730 CAND is incremented in LOOP. */
731
732 static bool
733 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple stmt)
734 {
735 switch (cand->pos)
736 {
737 case IP_END:
738 return false;
739
740 case IP_NORMAL:
741 return stmt_after_ip_normal_pos (loop, stmt);
742
743 case IP_ORIGINAL:
744 case IP_AFTER_USE:
745 return stmt_after_inc_pos (cand, stmt, false);
746
747 case IP_BEFORE_USE:
748 return stmt_after_inc_pos (cand, stmt, true);
749
750 default:
751 gcc_unreachable ();
752 }
753 }
754
755 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
756
757 static bool
758 abnormal_ssa_name_p (tree exp)
759 {
760 if (!exp)
761 return false;
762
763 if (TREE_CODE (exp) != SSA_NAME)
764 return false;
765
766 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
767 }
768
769 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
770 abnormal phi node. Callback for for_each_index. */
771
772 static bool
773 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
774 void *data ATTRIBUTE_UNUSED)
775 {
776 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
777 {
778 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
779 return false;
780 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
781 return false;
782 }
783
784 return !abnormal_ssa_name_p (*index);
785 }
786
787 /* Returns true if EXPR contains a ssa name that occurs in an
788 abnormal phi node. */
789
790 bool
791 contains_abnormal_ssa_name_p (tree expr)
792 {
793 enum tree_code code;
794 enum tree_code_class codeclass;
795
796 if (!expr)
797 return false;
798
799 code = TREE_CODE (expr);
800 codeclass = TREE_CODE_CLASS (code);
801
802 if (code == SSA_NAME)
803 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
804
805 if (code == INTEGER_CST
806 || is_gimple_min_invariant (expr))
807 return false;
808
809 if (code == ADDR_EXPR)
810 return !for_each_index (&TREE_OPERAND (expr, 0),
811 idx_contains_abnormal_ssa_name_p,
812 NULL);
813
814 if (code == COND_EXPR)
815 return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
816 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
817 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
818
819 switch (codeclass)
820 {
821 case tcc_binary:
822 case tcc_comparison:
823 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
824 return true;
825
826 /* Fallthru. */
827 case tcc_unary:
828 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
829 return true;
830
831 break;
832
833 default:
834 gcc_unreachable ();
835 }
836
837 return false;
838 }
839
840 /* Returns the structure describing number of iterations determined from
841 EXIT of DATA->current_loop, or NULL if something goes wrong. */
842
843 static struct tree_niter_desc *
844 niter_for_exit (struct ivopts_data *data, edge exit)
845 {
846 struct tree_niter_desc *desc;
847 tree_niter_desc **slot;
848
849 if (!data->niters)
850 {
851 data->niters = new hash_map<edge, tree_niter_desc *>;
852 slot = NULL;
853 }
854 else
855 slot = data->niters->get (exit);
856
857 if (!slot)
858 {
859 /* Try to determine number of iterations. We cannot safely work with ssa
860 names that appear in phi nodes on abnormal edges, so that we do not
861 create overlapping life ranges for them (PR 27283). */
862 desc = XNEW (struct tree_niter_desc);
863 if (!number_of_iterations_exit (data->current_loop,
864 exit, desc, true)
865 || contains_abnormal_ssa_name_p (desc->niter))
866 {
867 XDELETE (desc);
868 desc = NULL;
869 }
870 data->niters->put (exit, desc);
871 }
872 else
873 desc = *slot;
874
875 return desc;
876 }
877
878 /* Returns the structure describing number of iterations determined from
879 single dominating exit of DATA->current_loop, or NULL if something
880 goes wrong. */
881
882 static struct tree_niter_desc *
883 niter_for_single_dom_exit (struct ivopts_data *data)
884 {
885 edge exit = single_dom_exit (data->current_loop);
886
887 if (!exit)
888 return NULL;
889
890 return niter_for_exit (data, exit);
891 }
892
893 /* Initializes data structures used by the iv optimization pass, stored
894 in DATA. */
895
896 static void
897 tree_ssa_iv_optimize_init (struct ivopts_data *data)
898 {
899 data->version_info_size = 2 * num_ssa_names;
900 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
901 data->relevant = BITMAP_ALLOC (NULL);
902 data->important_candidates = BITMAP_ALLOC (NULL);
903 data->max_inv_id = 0;
904 data->niters = NULL;
905 data->iv_uses.create (20);
906 data->iv_candidates.create (20);
907 data->inv_expr_tab = new hash_table<iv_inv_expr_hasher> (10);
908 data->inv_expr_id = 0;
909 data->name_expansion_cache = NULL;
910 decl_rtl_to_reset.create (20);
911 }
912
913 /* Returns a memory object to that EXPR points. In case we are able to
914 determine that it does not point to any such object, NULL is returned. */
915
916 static tree
917 determine_base_object (tree expr)
918 {
919 enum tree_code code = TREE_CODE (expr);
920 tree base, obj;
921
922 /* If this is a pointer casted to any type, we need to determine
923 the base object for the pointer; so handle conversions before
924 throwing away non-pointer expressions. */
925 if (CONVERT_EXPR_P (expr))
926 return determine_base_object (TREE_OPERAND (expr, 0));
927
928 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
929 return NULL_TREE;
930
931 switch (code)
932 {
933 case INTEGER_CST:
934 return NULL_TREE;
935
936 case ADDR_EXPR:
937 obj = TREE_OPERAND (expr, 0);
938 base = get_base_address (obj);
939
940 if (!base)
941 return expr;
942
943 if (TREE_CODE (base) == MEM_REF)
944 return determine_base_object (TREE_OPERAND (base, 0));
945
946 return fold_convert (ptr_type_node,
947 build_fold_addr_expr (base));
948
949 case POINTER_PLUS_EXPR:
950 return determine_base_object (TREE_OPERAND (expr, 0));
951
952 case PLUS_EXPR:
953 case MINUS_EXPR:
954 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
955 gcc_unreachable ();
956
957 default:
958 return fold_convert (ptr_type_node, expr);
959 }
960 }
961
962 /* Return true if address expression with non-DECL_P operand appears
963 in EXPR. */
964
965 static bool
966 contain_complex_addr_expr (tree expr)
967 {
968 bool res = false;
969
970 STRIP_NOPS (expr);
971 switch (TREE_CODE (expr))
972 {
973 case POINTER_PLUS_EXPR:
974 case PLUS_EXPR:
975 case MINUS_EXPR:
976 res |= contain_complex_addr_expr (TREE_OPERAND (expr, 0));
977 res |= contain_complex_addr_expr (TREE_OPERAND (expr, 1));
978 break;
979
980 case ADDR_EXPR:
981 return (!DECL_P (TREE_OPERAND (expr, 0)));
982
983 default:
984 return false;
985 }
986
987 return res;
988 }
989
990 /* Allocates an induction variable with given initial value BASE and step STEP
991 for loop LOOP. NO_OVERFLOW implies the iv doesn't overflow. */
992
993 static struct iv *
994 alloc_iv (tree base, tree step, bool no_overflow = false)
995 {
996 tree expr = base;
997 struct iv *iv = XCNEW (struct iv);
998 gcc_assert (step != NULL_TREE);
999
1000 /* Lower address expression in base except ones with DECL_P as operand.
1001 By doing this:
1002 1) More accurate cost can be computed for address expressions;
1003 2) Duplicate candidates won't be created for bases in different
1004 forms, like &a[0] and &a. */
1005 STRIP_NOPS (expr);
1006 if ((TREE_CODE (expr) == ADDR_EXPR && !DECL_P (TREE_OPERAND (expr, 0)))
1007 || contain_complex_addr_expr (expr))
1008 {
1009 aff_tree comb;
1010 tree_to_aff_combination (expr, TREE_TYPE (base), &comb);
1011 base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
1012 }
1013
1014 iv->base = base;
1015 iv->base_object = determine_base_object (base);
1016 iv->step = step;
1017 iv->biv_p = false;
1018 iv->have_use_for = false;
1019 iv->use_id = 0;
1020 iv->ssa_name = NULL_TREE;
1021 iv->no_overflow = no_overflow;
1022
1023 return iv;
1024 }
1025
1026 /* Sets STEP and BASE for induction variable IV. NO_OVERFLOW implies the IV
1027 doesn't overflow. */
1028
1029 static void
1030 set_iv (struct ivopts_data *data, tree iv, tree base, tree step,
1031 bool no_overflow)
1032 {
1033 struct version_info *info = name_info (data, iv);
1034
1035 gcc_assert (!info->iv);
1036
1037 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
1038 info->iv = alloc_iv (base, step, no_overflow);
1039 info->iv->ssa_name = iv;
1040 }
1041
1042 /* Finds induction variable declaration for VAR. */
1043
1044 static struct iv *
1045 get_iv (struct ivopts_data *data, tree var)
1046 {
1047 basic_block bb;
1048 tree type = TREE_TYPE (var);
1049
1050 if (!POINTER_TYPE_P (type)
1051 && !INTEGRAL_TYPE_P (type))
1052 return NULL;
1053
1054 if (!name_info (data, var)->iv)
1055 {
1056 bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1057
1058 if (!bb
1059 || !flow_bb_inside_loop_p (data->current_loop, bb))
1060 set_iv (data, var, var, build_int_cst (type, 0), true);
1061 }
1062
1063 return name_info (data, var)->iv;
1064 }
1065
1066 /* Return the first non-invariant ssa var found in EXPR. */
1067
1068 static tree
1069 extract_single_var_from_expr (tree expr)
1070 {
1071 int i, n;
1072 tree tmp;
1073 enum tree_code code;
1074
1075 if (!expr || is_gimple_min_invariant (expr))
1076 return NULL;
1077
1078 code = TREE_CODE (expr);
1079 if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1080 {
1081 n = TREE_OPERAND_LENGTH (expr);
1082 for (i = 0; i < n; i++)
1083 {
1084 tmp = extract_single_var_from_expr (TREE_OPERAND (expr, i));
1085
1086 if (tmp)
1087 return tmp;
1088 }
1089 }
1090 return (TREE_CODE (expr) == SSA_NAME) ? expr : NULL;
1091 }
1092
1093 /* Finds basic ivs. */
1094
1095 static bool
1096 find_bivs (struct ivopts_data *data)
1097 {
1098 gphi *phi;
1099 affine_iv iv;
1100 tree step, type, base, stop;
1101 bool found = false;
1102 struct loop *loop = data->current_loop;
1103 gphi_iterator psi;
1104
1105 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1106 {
1107 phi = psi.phi ();
1108
1109 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1110 continue;
1111
1112 if (virtual_operand_p (PHI_RESULT (phi)))
1113 continue;
1114
1115 if (!simple_iv (loop, loop, PHI_RESULT (phi), &iv, true))
1116 continue;
1117
1118 if (integer_zerop (iv.step))
1119 continue;
1120
1121 step = iv.step;
1122 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1123 /* Stop expanding iv base at the first ssa var referred by iv step.
1124 Ideally we should stop at any ssa var, because that's expensive
1125 and unusual to happen, we just do it on the first one.
1126
1127 See PR64705 for the rationale. */
1128 stop = extract_single_var_from_expr (step);
1129 base = expand_simple_operations (base, stop);
1130 if (contains_abnormal_ssa_name_p (base)
1131 || contains_abnormal_ssa_name_p (step))
1132 continue;
1133
1134 type = TREE_TYPE (PHI_RESULT (phi));
1135 base = fold_convert (type, base);
1136 if (step)
1137 {
1138 if (POINTER_TYPE_P (type))
1139 step = convert_to_ptrofftype (step);
1140 else
1141 step = fold_convert (type, step);
1142 }
1143
1144 set_iv (data, PHI_RESULT (phi), base, step, iv.no_overflow);
1145 found = true;
1146 }
1147
1148 return found;
1149 }
1150
1151 /* Marks basic ivs. */
1152
1153 static void
1154 mark_bivs (struct ivopts_data *data)
1155 {
1156 gphi *phi;
1157 gimple def;
1158 tree var;
1159 struct iv *iv, *incr_iv;
1160 struct loop *loop = data->current_loop;
1161 basic_block incr_bb;
1162 gphi_iterator psi;
1163
1164 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1165 {
1166 phi = psi.phi ();
1167
1168 iv = get_iv (data, PHI_RESULT (phi));
1169 if (!iv)
1170 continue;
1171
1172 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1173 def = SSA_NAME_DEF_STMT (var);
1174 /* Don't mark iv peeled from other one as biv. */
1175 if (def
1176 && gimple_code (def) == GIMPLE_PHI
1177 && gimple_bb (def) == loop->header)
1178 continue;
1179
1180 incr_iv = get_iv (data, var);
1181 if (!incr_iv)
1182 continue;
1183
1184 /* If the increment is in the subloop, ignore it. */
1185 incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1186 if (incr_bb->loop_father != data->current_loop
1187 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1188 continue;
1189
1190 iv->biv_p = true;
1191 incr_iv->biv_p = true;
1192 }
1193 }
1194
1195 /* Checks whether STMT defines a linear induction variable and stores its
1196 parameters to IV. */
1197
1198 static bool
1199 find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv)
1200 {
1201 tree lhs, stop;
1202 struct loop *loop = data->current_loop;
1203
1204 iv->base = NULL_TREE;
1205 iv->step = NULL_TREE;
1206
1207 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1208 return false;
1209
1210 lhs = gimple_assign_lhs (stmt);
1211 if (TREE_CODE (lhs) != SSA_NAME)
1212 return false;
1213
1214 if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1215 return false;
1216
1217 /* Stop expanding iv base at the first ssa var referred by iv step.
1218 Ideally we should stop at any ssa var, because that's expensive
1219 and unusual to happen, we just do it on the first one.
1220
1221 See PR64705 for the rationale. */
1222 stop = extract_single_var_from_expr (iv->step);
1223 iv->base = expand_simple_operations (iv->base, stop);
1224 if (contains_abnormal_ssa_name_p (iv->base)
1225 || contains_abnormal_ssa_name_p (iv->step))
1226 return false;
1227
1228 /* If STMT could throw, then do not consider STMT as defining a GIV.
1229 While this will suppress optimizations, we can not safely delete this
1230 GIV and associated statements, even if it appears it is not used. */
1231 if (stmt_could_throw_p (stmt))
1232 return false;
1233
1234 return true;
1235 }
1236
1237 /* Finds general ivs in statement STMT. */
1238
1239 static void
1240 find_givs_in_stmt (struct ivopts_data *data, gimple stmt)
1241 {
1242 affine_iv iv;
1243
1244 if (!find_givs_in_stmt_scev (data, stmt, &iv))
1245 return;
1246
1247 set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step, iv.no_overflow);
1248 }
1249
1250 /* Finds general ivs in basic block BB. */
1251
1252 static void
1253 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1254 {
1255 gimple_stmt_iterator bsi;
1256
1257 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1258 find_givs_in_stmt (data, gsi_stmt (bsi));
1259 }
1260
1261 /* Finds general ivs. */
1262
1263 static void
1264 find_givs (struct ivopts_data *data)
1265 {
1266 struct loop *loop = data->current_loop;
1267 basic_block *body = get_loop_body_in_dom_order (loop);
1268 unsigned i;
1269
1270 for (i = 0; i < loop->num_nodes; i++)
1271 find_givs_in_bb (data, body[i]);
1272 free (body);
1273 }
1274
1275 /* For each ssa name defined in LOOP determines whether it is an induction
1276 variable and if so, its initial value and step. */
1277
1278 static bool
1279 find_induction_variables (struct ivopts_data *data)
1280 {
1281 unsigned i;
1282 bitmap_iterator bi;
1283
1284 if (!find_bivs (data))
1285 return false;
1286
1287 find_givs (data);
1288 mark_bivs (data);
1289
1290 if (dump_file && (dump_flags & TDF_DETAILS))
1291 {
1292 struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
1293
1294 if (niter)
1295 {
1296 fprintf (dump_file, " number of iterations ");
1297 print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1298 if (!integer_zerop (niter->may_be_zero))
1299 {
1300 fprintf (dump_file, "; zero if ");
1301 print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1302 }
1303 fprintf (dump_file, "\n\n");
1304 };
1305
1306 fprintf (dump_file, "Induction variables:\n\n");
1307
1308 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1309 {
1310 if (ver_info (data, i)->iv)
1311 dump_iv (dump_file, ver_info (data, i)->iv, true);
1312 }
1313 }
1314
1315 return true;
1316 }
1317
1318 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.
1319 For address type use, ADDR_BASE is the stripped IV base, ADDR_OFFSET
1320 is the const offset stripped from IV base. For uses of other types,
1321 ADDR_BASE and ADDR_OFFSET are zero by default. */
1322
1323 static struct iv_use *
1324 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1325 gimple stmt, enum use_type use_type, tree addr_base = NULL,
1326 unsigned HOST_WIDE_INT addr_offset = 0)
1327 {
1328 struct iv_use *use = XCNEW (struct iv_use);
1329
1330 use->id = n_iv_uses (data);
1331 use->sub_id = 0;
1332 use->type = use_type;
1333 use->iv = iv;
1334 use->stmt = stmt;
1335 use->op_p = use_p;
1336 use->related_cands = BITMAP_ALLOC (NULL);
1337 use->next = NULL;
1338 use->addr_base = addr_base;
1339 use->addr_offset = addr_offset;
1340
1341 data->iv_uses.safe_push (use);
1342
1343 return use;
1344 }
1345
1346 /* Records a sub use of type USE_TYPE at *USE_P in STMT whose value is IV.
1347 The sub use is recorded under the one whose use id is ID_GROUP. */
1348
1349 static struct iv_use *
1350 record_sub_use (struct ivopts_data *data, tree *use_p,
1351 struct iv *iv, gimple stmt, enum use_type use_type,
1352 tree addr_base, unsigned HOST_WIDE_INT addr_offset,
1353 unsigned int id_group)
1354 {
1355 struct iv_use *use = XCNEW (struct iv_use);
1356 struct iv_use *group = iv_use (data, id_group);
1357
1358 use->id = group->id;
1359 use->sub_id = 0;
1360 use->type = use_type;
1361 use->iv = iv;
1362 use->stmt = stmt;
1363 use->op_p = use_p;
1364 use->related_cands = NULL;
1365 use->addr_base = addr_base;
1366 use->addr_offset = addr_offset;
1367
1368 /* Sub use list is maintained in offset ascending order. */
1369 if (addr_offset <= group->addr_offset)
1370 {
1371 use->related_cands = group->related_cands;
1372 group->related_cands = NULL;
1373 use->next = group;
1374 data->iv_uses[id_group] = use;
1375 }
1376 else
1377 {
1378 struct iv_use *pre;
1379 do
1380 {
1381 pre = group;
1382 group = group->next;
1383 }
1384 while (group && addr_offset > group->addr_offset);
1385 use->next = pre->next;
1386 pre->next = use;
1387 }
1388
1389 /* To avoid showing ssa name in the dumps, if it was not reset by the
1390 caller. */
1391 iv->ssa_name = NULL_TREE;
1392
1393 return use;
1394 }
1395
1396 /* Checks whether OP is a loop-level invariant and if so, records it.
1397 NONLINEAR_USE is true if the invariant is used in a way we do not
1398 handle specially. */
1399
1400 static void
1401 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1402 {
1403 basic_block bb;
1404 struct version_info *info;
1405
1406 if (TREE_CODE (op) != SSA_NAME
1407 || virtual_operand_p (op))
1408 return;
1409
1410 bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1411 if (bb
1412 && flow_bb_inside_loop_p (data->current_loop, bb))
1413 return;
1414
1415 info = name_info (data, op);
1416 info->name = op;
1417 info->has_nonlin_use |= nonlinear_use;
1418 if (!info->inv_id)
1419 info->inv_id = ++data->max_inv_id;
1420 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1421 }
1422
1423 /* Checks whether the use OP is interesting and if so, records it. */
1424
1425 static struct iv_use *
1426 find_interesting_uses_op (struct ivopts_data *data, tree op)
1427 {
1428 struct iv *iv;
1429 struct iv *civ;
1430 gimple stmt;
1431 struct iv_use *use;
1432
1433 if (TREE_CODE (op) != SSA_NAME)
1434 return NULL;
1435
1436 iv = get_iv (data, op);
1437 if (!iv)
1438 return NULL;
1439
1440 if (iv->have_use_for)
1441 {
1442 use = iv_use (data, iv->use_id);
1443
1444 gcc_assert (use->type == USE_NONLINEAR_EXPR);
1445 return use;
1446 }
1447
1448 if (integer_zerop (iv->step))
1449 {
1450 record_invariant (data, op, true);
1451 return NULL;
1452 }
1453 iv->have_use_for = true;
1454
1455 civ = XNEW (struct iv);
1456 *civ = *iv;
1457
1458 stmt = SSA_NAME_DEF_STMT (op);
1459 gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1460 || is_gimple_assign (stmt));
1461
1462 use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1463 iv->use_id = use->id;
1464
1465 return use;
1466 }
1467
1468 /* Given a condition in statement STMT, checks whether it is a compare
1469 of an induction variable and an invariant. If this is the case,
1470 CONTROL_VAR is set to location of the iv, BOUND to the location of
1471 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1472 induction variable descriptions, and true is returned. If this is not
1473 the case, CONTROL_VAR and BOUND are set to the arguments of the
1474 condition and false is returned. */
1475
1476 static bool
1477 extract_cond_operands (struct ivopts_data *data, gimple stmt,
1478 tree **control_var, tree **bound,
1479 struct iv **iv_var, struct iv **iv_bound)
1480 {
1481 /* The objects returned when COND has constant operands. */
1482 static struct iv const_iv;
1483 static tree zero;
1484 tree *op0 = &zero, *op1 = &zero;
1485 struct iv *iv0 = &const_iv, *iv1 = &const_iv;
1486 bool ret = false;
1487
1488 if (gimple_code (stmt) == GIMPLE_COND)
1489 {
1490 gcond *cond_stmt = as_a <gcond *> (stmt);
1491 op0 = gimple_cond_lhs_ptr (cond_stmt);
1492 op1 = gimple_cond_rhs_ptr (cond_stmt);
1493 }
1494 else
1495 {
1496 op0 = gimple_assign_rhs1_ptr (stmt);
1497 op1 = gimple_assign_rhs2_ptr (stmt);
1498 }
1499
1500 zero = integer_zero_node;
1501 const_iv.step = integer_zero_node;
1502
1503 if (TREE_CODE (*op0) == SSA_NAME)
1504 iv0 = get_iv (data, *op0);
1505 if (TREE_CODE (*op1) == SSA_NAME)
1506 iv1 = get_iv (data, *op1);
1507
1508 /* Exactly one of the compared values must be an iv, and the other one must
1509 be an invariant. */
1510 if (!iv0 || !iv1)
1511 goto end;
1512
1513 if (integer_zerop (iv0->step))
1514 {
1515 /* Control variable may be on the other side. */
1516 std::swap (op0, op1);
1517 std::swap (iv0, iv1);
1518 }
1519 ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1520
1521 end:
1522 if (control_var)
1523 *control_var = op0;
1524 if (iv_var)
1525 *iv_var = iv0;
1526 if (bound)
1527 *bound = op1;
1528 if (iv_bound)
1529 *iv_bound = iv1;
1530
1531 return ret;
1532 }
1533
1534 /* Checks whether the condition in STMT is interesting and if so,
1535 records it. */
1536
1537 static void
1538 find_interesting_uses_cond (struct ivopts_data *data, gimple stmt)
1539 {
1540 tree *var_p, *bound_p;
1541 struct iv *var_iv, *civ;
1542
1543 if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1544 {
1545 find_interesting_uses_op (data, *var_p);
1546 find_interesting_uses_op (data, *bound_p);
1547 return;
1548 }
1549
1550 civ = XNEW (struct iv);
1551 *civ = *var_iv;
1552 record_use (data, NULL, civ, stmt, USE_COMPARE);
1553 }
1554
1555 /* Returns the outermost loop EXPR is obviously invariant in
1556 relative to the loop LOOP, i.e. if all its operands are defined
1557 outside of the returned loop. Returns NULL if EXPR is not
1558 even obviously invariant in LOOP. */
1559
1560 struct loop *
1561 outermost_invariant_loop_for_expr (struct loop *loop, tree expr)
1562 {
1563 basic_block def_bb;
1564 unsigned i, len;
1565
1566 if (is_gimple_min_invariant (expr))
1567 return current_loops->tree_root;
1568
1569 if (TREE_CODE (expr) == SSA_NAME)
1570 {
1571 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1572 if (def_bb)
1573 {
1574 if (flow_bb_inside_loop_p (loop, def_bb))
1575 return NULL;
1576 return superloop_at_depth (loop,
1577 loop_depth (def_bb->loop_father) + 1);
1578 }
1579
1580 return current_loops->tree_root;
1581 }
1582
1583 if (!EXPR_P (expr))
1584 return NULL;
1585
1586 unsigned maxdepth = 0;
1587 len = TREE_OPERAND_LENGTH (expr);
1588 for (i = 0; i < len; i++)
1589 {
1590 struct loop *ivloop;
1591 if (!TREE_OPERAND (expr, i))
1592 continue;
1593
1594 ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1595 if (!ivloop)
1596 return NULL;
1597 maxdepth = MAX (maxdepth, loop_depth (ivloop));
1598 }
1599
1600 return superloop_at_depth (loop, maxdepth);
1601 }
1602
1603 /* Returns true if expression EXPR is obviously invariant in LOOP,
1604 i.e. if all its operands are defined outside of the LOOP. LOOP
1605 should not be the function body. */
1606
1607 bool
1608 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1609 {
1610 basic_block def_bb;
1611 unsigned i, len;
1612
1613 gcc_assert (loop_depth (loop) > 0);
1614
1615 if (is_gimple_min_invariant (expr))
1616 return true;
1617
1618 if (TREE_CODE (expr) == SSA_NAME)
1619 {
1620 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1621 if (def_bb
1622 && flow_bb_inside_loop_p (loop, def_bb))
1623 return false;
1624
1625 return true;
1626 }
1627
1628 if (!EXPR_P (expr))
1629 return false;
1630
1631 len = TREE_OPERAND_LENGTH (expr);
1632 for (i = 0; i < len; i++)
1633 if (TREE_OPERAND (expr, i)
1634 && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1635 return false;
1636
1637 return true;
1638 }
1639
1640 /* Cumulates the steps of indices into DATA and replaces their values with the
1641 initial ones. Returns false when the value of the index cannot be determined.
1642 Callback for for_each_index. */
1643
1644 struct ifs_ivopts_data
1645 {
1646 struct ivopts_data *ivopts_data;
1647 gimple stmt;
1648 tree step;
1649 };
1650
1651 static bool
1652 idx_find_step (tree base, tree *idx, void *data)
1653 {
1654 struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1655 struct iv *iv;
1656 bool use_overflow_semantics = false;
1657 tree step, iv_base, iv_step, lbound, off;
1658 struct loop *loop = dta->ivopts_data->current_loop;
1659
1660 /* If base is a component ref, require that the offset of the reference
1661 be invariant. */
1662 if (TREE_CODE (base) == COMPONENT_REF)
1663 {
1664 off = component_ref_field_offset (base);
1665 return expr_invariant_in_loop_p (loop, off);
1666 }
1667
1668 /* If base is array, first check whether we will be able to move the
1669 reference out of the loop (in order to take its address in strength
1670 reduction). In order for this to work we need both lower bound
1671 and step to be loop invariants. */
1672 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1673 {
1674 /* Moreover, for a range, the size needs to be invariant as well. */
1675 if (TREE_CODE (base) == ARRAY_RANGE_REF
1676 && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1677 return false;
1678
1679 step = array_ref_element_size (base);
1680 lbound = array_ref_low_bound (base);
1681
1682 if (!expr_invariant_in_loop_p (loop, step)
1683 || !expr_invariant_in_loop_p (loop, lbound))
1684 return false;
1685 }
1686
1687 if (TREE_CODE (*idx) != SSA_NAME)
1688 return true;
1689
1690 iv = get_iv (dta->ivopts_data, *idx);
1691 if (!iv)
1692 return false;
1693
1694 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1695 *&x[0], which is not folded and does not trigger the
1696 ARRAY_REF path below. */
1697 *idx = iv->base;
1698
1699 if (integer_zerop (iv->step))
1700 return true;
1701
1702 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1703 {
1704 step = array_ref_element_size (base);
1705
1706 /* We only handle addresses whose step is an integer constant. */
1707 if (TREE_CODE (step) != INTEGER_CST)
1708 return false;
1709 }
1710 else
1711 /* The step for pointer arithmetics already is 1 byte. */
1712 step = size_one_node;
1713
1714 iv_base = iv->base;
1715 iv_step = iv->step;
1716 if (iv->no_overflow && nowrap_type_p (TREE_TYPE (iv_step)))
1717 use_overflow_semantics = true;
1718
1719 if (!convert_affine_scev (dta->ivopts_data->current_loop,
1720 sizetype, &iv_base, &iv_step, dta->stmt,
1721 use_overflow_semantics))
1722 {
1723 /* The index might wrap. */
1724 return false;
1725 }
1726
1727 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1728 dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1729
1730 return true;
1731 }
1732
1733 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1734 object is passed to it in DATA. */
1735
1736 static bool
1737 idx_record_use (tree base, tree *idx,
1738 void *vdata)
1739 {
1740 struct ivopts_data *data = (struct ivopts_data *) vdata;
1741 find_interesting_uses_op (data, *idx);
1742 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1743 {
1744 find_interesting_uses_op (data, array_ref_element_size (base));
1745 find_interesting_uses_op (data, array_ref_low_bound (base));
1746 }
1747 return true;
1748 }
1749
1750 /* If we can prove that TOP = cst * BOT for some constant cst,
1751 store cst to MUL and return true. Otherwise return false.
1752 The returned value is always sign-extended, regardless of the
1753 signedness of TOP and BOT. */
1754
1755 static bool
1756 constant_multiple_of (tree top, tree bot, widest_int *mul)
1757 {
1758 tree mby;
1759 enum tree_code code;
1760 unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1761 widest_int res, p0, p1;
1762
1763 STRIP_NOPS (top);
1764 STRIP_NOPS (bot);
1765
1766 if (operand_equal_p (top, bot, 0))
1767 {
1768 *mul = 1;
1769 return true;
1770 }
1771
1772 code = TREE_CODE (top);
1773 switch (code)
1774 {
1775 case MULT_EXPR:
1776 mby = TREE_OPERAND (top, 1);
1777 if (TREE_CODE (mby) != INTEGER_CST)
1778 return false;
1779
1780 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1781 return false;
1782
1783 *mul = wi::sext (res * wi::to_widest (mby), precision);
1784 return true;
1785
1786 case PLUS_EXPR:
1787 case MINUS_EXPR:
1788 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1789 || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1790 return false;
1791
1792 if (code == MINUS_EXPR)
1793 p1 = -p1;
1794 *mul = wi::sext (p0 + p1, precision);
1795 return true;
1796
1797 case INTEGER_CST:
1798 if (TREE_CODE (bot) != INTEGER_CST)
1799 return false;
1800
1801 p0 = widest_int::from (top, SIGNED);
1802 p1 = widest_int::from (bot, SIGNED);
1803 if (p1 == 0)
1804 return false;
1805 *mul = wi::sext (wi::divmod_trunc (p0, p1, SIGNED, &res), precision);
1806 return res == 0;
1807
1808 default:
1809 return false;
1810 }
1811 }
1812
1813 /* Return true if memory reference REF with step STEP may be unaligned. */
1814
1815 static bool
1816 may_be_unaligned_p (tree ref, tree step)
1817 {
1818 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1819 thus they are not misaligned. */
1820 if (TREE_CODE (ref) == TARGET_MEM_REF)
1821 return false;
1822
1823 unsigned int align = TYPE_ALIGN (TREE_TYPE (ref));
1824 if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))) > align)
1825 align = GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref)));
1826
1827 unsigned HOST_WIDE_INT bitpos;
1828 unsigned int ref_align;
1829 get_object_alignment_1 (ref, &ref_align, &bitpos);
1830 if (ref_align < align
1831 || (bitpos % align) != 0
1832 || (bitpos % BITS_PER_UNIT) != 0)
1833 return true;
1834
1835 unsigned int trailing_zeros = tree_ctz (step);
1836 if (trailing_zeros < HOST_BITS_PER_INT
1837 && (1U << trailing_zeros) * BITS_PER_UNIT < align)
1838 return true;
1839
1840 return false;
1841 }
1842
1843 /* Return true if EXPR may be non-addressable. */
1844
1845 bool
1846 may_be_nonaddressable_p (tree expr)
1847 {
1848 switch (TREE_CODE (expr))
1849 {
1850 case TARGET_MEM_REF:
1851 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1852 target, thus they are always addressable. */
1853 return false;
1854
1855 case COMPONENT_REF:
1856 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1857 || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1858
1859 case VIEW_CONVERT_EXPR:
1860 /* This kind of view-conversions may wrap non-addressable objects
1861 and make them look addressable. After some processing the
1862 non-addressability may be uncovered again, causing ADDR_EXPRs
1863 of inappropriate objects to be built. */
1864 if (is_gimple_reg (TREE_OPERAND (expr, 0))
1865 || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
1866 return true;
1867
1868 /* ... fall through ... */
1869
1870 case ARRAY_REF:
1871 case ARRAY_RANGE_REF:
1872 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1873
1874 CASE_CONVERT:
1875 return true;
1876
1877 default:
1878 break;
1879 }
1880
1881 return false;
1882 }
1883
1884 static tree
1885 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset);
1886
1887 /* Record a use of type USE_TYPE at *USE_P in STMT whose value is IV.
1888 If there is an existing use which has same stripped iv base and step,
1889 this function records this one as a sub use to that; otherwise records
1890 it as a normal one. */
1891
1892 static struct iv_use *
1893 record_group_use (struct ivopts_data *data, tree *use_p,
1894 struct iv *iv, gimple stmt, enum use_type use_type)
1895 {
1896 unsigned int i;
1897 struct iv_use *use;
1898 tree addr_base;
1899 unsigned HOST_WIDE_INT addr_offset;
1900
1901 /* Only support sub use for address type uses, that is, with base
1902 object. */
1903 if (!iv->base_object)
1904 return record_use (data, use_p, iv, stmt, use_type);
1905
1906 addr_base = strip_offset (iv->base, &addr_offset);
1907 for (i = 0; i < n_iv_uses (data); i++)
1908 {
1909 use = iv_use (data, i);
1910 if (use->type != USE_ADDRESS || !use->iv->base_object)
1911 continue;
1912
1913 /* Check if it has the same stripped base and step. */
1914 if (operand_equal_p (iv->base_object, use->iv->base_object, 0)
1915 && operand_equal_p (iv->step, use->iv->step, 0)
1916 && operand_equal_p (addr_base, use->addr_base, 0))
1917 break;
1918 }
1919
1920 if (i == n_iv_uses (data))
1921 return record_use (data, use_p, iv, stmt,
1922 use_type, addr_base, addr_offset);
1923 else
1924 return record_sub_use (data, use_p, iv, stmt,
1925 use_type, addr_base, addr_offset, i);
1926 }
1927
1928 /* Finds addresses in *OP_P inside STMT. */
1929
1930 static void
1931 find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p)
1932 {
1933 tree base = *op_p, step = size_zero_node;
1934 struct iv *civ;
1935 struct ifs_ivopts_data ifs_ivopts_data;
1936
1937 /* Do not play with volatile memory references. A bit too conservative,
1938 perhaps, but safe. */
1939 if (gimple_has_volatile_ops (stmt))
1940 goto fail;
1941
1942 /* Ignore bitfields for now. Not really something terribly complicated
1943 to handle. TODO. */
1944 if (TREE_CODE (base) == BIT_FIELD_REF)
1945 goto fail;
1946
1947 base = unshare_expr (base);
1948
1949 if (TREE_CODE (base) == TARGET_MEM_REF)
1950 {
1951 tree type = build_pointer_type (TREE_TYPE (base));
1952 tree astep;
1953
1954 if (TMR_BASE (base)
1955 && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1956 {
1957 civ = get_iv (data, TMR_BASE (base));
1958 if (!civ)
1959 goto fail;
1960
1961 TMR_BASE (base) = civ->base;
1962 step = civ->step;
1963 }
1964 if (TMR_INDEX2 (base)
1965 && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
1966 {
1967 civ = get_iv (data, TMR_INDEX2 (base));
1968 if (!civ)
1969 goto fail;
1970
1971 TMR_INDEX2 (base) = civ->base;
1972 step = civ->step;
1973 }
1974 if (TMR_INDEX (base)
1975 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1976 {
1977 civ = get_iv (data, TMR_INDEX (base));
1978 if (!civ)
1979 goto fail;
1980
1981 TMR_INDEX (base) = civ->base;
1982 astep = civ->step;
1983
1984 if (astep)
1985 {
1986 if (TMR_STEP (base))
1987 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1988
1989 step = fold_build2 (PLUS_EXPR, type, step, astep);
1990 }
1991 }
1992
1993 if (integer_zerop (step))
1994 goto fail;
1995 base = tree_mem_ref_addr (type, base);
1996 }
1997 else
1998 {
1999 ifs_ivopts_data.ivopts_data = data;
2000 ifs_ivopts_data.stmt = stmt;
2001 ifs_ivopts_data.step = size_zero_node;
2002 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
2003 || integer_zerop (ifs_ivopts_data.step))
2004 goto fail;
2005 step = ifs_ivopts_data.step;
2006
2007 /* Check that the base expression is addressable. This needs
2008 to be done after substituting bases of IVs into it. */
2009 if (may_be_nonaddressable_p (base))
2010 goto fail;
2011
2012 /* Moreover, on strict alignment platforms, check that it is
2013 sufficiently aligned. */
2014 if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
2015 goto fail;
2016
2017 base = build_fold_addr_expr (base);
2018
2019 /* Substituting bases of IVs into the base expression might
2020 have caused folding opportunities. */
2021 if (TREE_CODE (base) == ADDR_EXPR)
2022 {
2023 tree *ref = &TREE_OPERAND (base, 0);
2024 while (handled_component_p (*ref))
2025 ref = &TREE_OPERAND (*ref, 0);
2026 if (TREE_CODE (*ref) == MEM_REF)
2027 {
2028 tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
2029 TREE_OPERAND (*ref, 0),
2030 TREE_OPERAND (*ref, 1));
2031 if (tem)
2032 *ref = tem;
2033 }
2034 }
2035 }
2036
2037 civ = alloc_iv (base, step);
2038 record_group_use (data, op_p, civ, stmt, USE_ADDRESS);
2039 return;
2040
2041 fail:
2042 for_each_index (op_p, idx_record_use, data);
2043 }
2044
2045 /* Finds and records invariants used in STMT. */
2046
2047 static void
2048 find_invariants_stmt (struct ivopts_data *data, gimple stmt)
2049 {
2050 ssa_op_iter iter;
2051 use_operand_p use_p;
2052 tree op;
2053
2054 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2055 {
2056 op = USE_FROM_PTR (use_p);
2057 record_invariant (data, op, false);
2058 }
2059 }
2060
2061 /* Finds interesting uses of induction variables in the statement STMT. */
2062
2063 static void
2064 find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt)
2065 {
2066 struct iv *iv;
2067 tree op, *lhs, *rhs;
2068 ssa_op_iter iter;
2069 use_operand_p use_p;
2070 enum tree_code code;
2071
2072 find_invariants_stmt (data, stmt);
2073
2074 if (gimple_code (stmt) == GIMPLE_COND)
2075 {
2076 find_interesting_uses_cond (data, stmt);
2077 return;
2078 }
2079
2080 if (is_gimple_assign (stmt))
2081 {
2082 lhs = gimple_assign_lhs_ptr (stmt);
2083 rhs = gimple_assign_rhs1_ptr (stmt);
2084
2085 if (TREE_CODE (*lhs) == SSA_NAME)
2086 {
2087 /* If the statement defines an induction variable, the uses are not
2088 interesting by themselves. */
2089
2090 iv = get_iv (data, *lhs);
2091
2092 if (iv && !integer_zerop (iv->step))
2093 return;
2094 }
2095
2096 code = gimple_assign_rhs_code (stmt);
2097 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
2098 && (REFERENCE_CLASS_P (*rhs)
2099 || is_gimple_val (*rhs)))
2100 {
2101 if (REFERENCE_CLASS_P (*rhs))
2102 find_interesting_uses_address (data, stmt, rhs);
2103 else
2104 find_interesting_uses_op (data, *rhs);
2105
2106 if (REFERENCE_CLASS_P (*lhs))
2107 find_interesting_uses_address (data, stmt, lhs);
2108 return;
2109 }
2110 else if (TREE_CODE_CLASS (code) == tcc_comparison)
2111 {
2112 find_interesting_uses_cond (data, stmt);
2113 return;
2114 }
2115
2116 /* TODO -- we should also handle address uses of type
2117
2118 memory = call (whatever);
2119
2120 and
2121
2122 call (memory). */
2123 }
2124
2125 if (gimple_code (stmt) == GIMPLE_PHI
2126 && gimple_bb (stmt) == data->current_loop->header)
2127 {
2128 iv = get_iv (data, PHI_RESULT (stmt));
2129
2130 if (iv && !integer_zerop (iv->step))
2131 return;
2132 }
2133
2134 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2135 {
2136 op = USE_FROM_PTR (use_p);
2137
2138 if (TREE_CODE (op) != SSA_NAME)
2139 continue;
2140
2141 iv = get_iv (data, op);
2142 if (!iv)
2143 continue;
2144
2145 find_interesting_uses_op (data, op);
2146 }
2147 }
2148
2149 /* Finds interesting uses of induction variables outside of loops
2150 on loop exit edge EXIT. */
2151
2152 static void
2153 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
2154 {
2155 gphi *phi;
2156 gphi_iterator psi;
2157 tree def;
2158
2159 for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
2160 {
2161 phi = psi.phi ();
2162 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2163 if (!virtual_operand_p (def))
2164 find_interesting_uses_op (data, def);
2165 }
2166 }
2167
2168 /* Finds uses of the induction variables that are interesting. */
2169
2170 static void
2171 find_interesting_uses (struct ivopts_data *data)
2172 {
2173 basic_block bb;
2174 gimple_stmt_iterator bsi;
2175 basic_block *body = get_loop_body (data->current_loop);
2176 unsigned i;
2177 struct version_info *info;
2178 edge e;
2179
2180 if (dump_file && (dump_flags & TDF_DETAILS))
2181 fprintf (dump_file, "Uses:\n\n");
2182
2183 for (i = 0; i < data->current_loop->num_nodes; i++)
2184 {
2185 edge_iterator ei;
2186 bb = body[i];
2187
2188 FOR_EACH_EDGE (e, ei, bb->succs)
2189 if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2190 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2191 find_interesting_uses_outside (data, e);
2192
2193 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2194 find_interesting_uses_stmt (data, gsi_stmt (bsi));
2195 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2196 if (!is_gimple_debug (gsi_stmt (bsi)))
2197 find_interesting_uses_stmt (data, gsi_stmt (bsi));
2198 }
2199
2200 if (dump_file && (dump_flags & TDF_DETAILS))
2201 {
2202 bitmap_iterator bi;
2203
2204 fprintf (dump_file, "\n");
2205
2206 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2207 {
2208 info = ver_info (data, i);
2209 if (info->inv_id)
2210 {
2211 fprintf (dump_file, " ");
2212 print_generic_expr (dump_file, info->name, TDF_SLIM);
2213 fprintf (dump_file, " is invariant (%d)%s\n",
2214 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
2215 }
2216 }
2217
2218 fprintf (dump_file, "\n");
2219 }
2220
2221 free (body);
2222 }
2223
2224 /* Compute maximum offset of [base + offset] addressing mode
2225 for memory reference represented by USE. */
2226
2227 static HOST_WIDE_INT
2228 compute_max_addr_offset (struct iv_use *use)
2229 {
2230 int width;
2231 rtx reg, addr;
2232 HOST_WIDE_INT i, off;
2233 unsigned list_index, num;
2234 addr_space_t as;
2235 machine_mode mem_mode, addr_mode;
2236 static vec<HOST_WIDE_INT> max_offset_list;
2237
2238 as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
2239 mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2240
2241 num = max_offset_list.length ();
2242 list_index = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
2243 if (list_index >= num)
2244 {
2245 max_offset_list.safe_grow (list_index + MAX_MACHINE_MODE);
2246 for (; num < max_offset_list.length (); num++)
2247 max_offset_list[num] = -1;
2248 }
2249
2250 off = max_offset_list[list_index];
2251 if (off != -1)
2252 return off;
2253
2254 addr_mode = targetm.addr_space.address_mode (as);
2255 reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
2256 addr = gen_rtx_fmt_ee (PLUS, addr_mode, reg, NULL_RTX);
2257
2258 width = GET_MODE_BITSIZE (addr_mode) - 1;
2259 if (width > (HOST_BITS_PER_WIDE_INT - 1))
2260 width = HOST_BITS_PER_WIDE_INT - 1;
2261
2262 for (i = width; i > 0; i--)
2263 {
2264 off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
2265 XEXP (addr, 1) = gen_int_mode (off, addr_mode);
2266 if (memory_address_addr_space_p (mem_mode, addr, as))
2267 break;
2268
2269 /* For some strict-alignment targets, the offset must be naturally
2270 aligned. Try an aligned offset if mem_mode is not QImode. */
2271 off = ((unsigned HOST_WIDE_INT) 1 << i);
2272 if (off > GET_MODE_SIZE (mem_mode) && mem_mode != QImode)
2273 {
2274 off -= GET_MODE_SIZE (mem_mode);
2275 XEXP (addr, 1) = gen_int_mode (off, addr_mode);
2276 if (memory_address_addr_space_p (mem_mode, addr, as))
2277 break;
2278 }
2279 }
2280 if (i == 0)
2281 off = 0;
2282
2283 max_offset_list[list_index] = off;
2284 return off;
2285 }
2286
2287 /* Check if all small groups should be split. Return true if and
2288 only if:
2289
2290 1) At least one groups contain two uses with different offsets.
2291 2) No group contains more than two uses with different offsets.
2292
2293 Return false otherwise. We want to split such groups because:
2294
2295 1) Small groups don't have much benefit and may interfer with
2296 general candidate selection.
2297 2) Size for problem with only small groups is usually small and
2298 general algorithm can handle it well.
2299
2300 TODO -- Above claim may not hold when auto increment is supported. */
2301
2302 static bool
2303 split_all_small_groups (struct ivopts_data *data)
2304 {
2305 bool split_p = false;
2306 unsigned int i, n, distinct;
2307 struct iv_use *pre, *use;
2308
2309 n = n_iv_uses (data);
2310 for (i = 0; i < n; i++)
2311 {
2312 use = iv_use (data, i);
2313 if (!use->next)
2314 continue;
2315
2316 distinct = 1;
2317 gcc_assert (use->type == USE_ADDRESS);
2318 for (pre = use, use = use->next; use; pre = use, use = use->next)
2319 {
2320 if (pre->addr_offset != use->addr_offset)
2321 distinct++;
2322
2323 if (distinct > 2)
2324 return false;
2325 }
2326 if (distinct == 2)
2327 split_p = true;
2328 }
2329
2330 return split_p;
2331 }
2332
2333 /* For each group of address type uses, this function further groups
2334 these uses according to the maximum offset supported by target's
2335 [base + offset] addressing mode. */
2336
2337 static void
2338 group_address_uses (struct ivopts_data *data)
2339 {
2340 HOST_WIDE_INT max_offset = -1;
2341 unsigned int i, n, sub_id;
2342 struct iv_use *pre, *use;
2343 unsigned HOST_WIDE_INT addr_offset_first;
2344
2345 /* Reset max offset to split all small groups. */
2346 if (split_all_small_groups (data))
2347 max_offset = 0;
2348
2349 n = n_iv_uses (data);
2350 for (i = 0; i < n; i++)
2351 {
2352 use = iv_use (data, i);
2353 if (!use->next)
2354 continue;
2355
2356 gcc_assert (use->type == USE_ADDRESS);
2357 if (max_offset != 0)
2358 max_offset = compute_max_addr_offset (use);
2359
2360 while (use)
2361 {
2362 sub_id = 0;
2363 addr_offset_first = use->addr_offset;
2364 /* Only uses with offset that can fit in offset part against
2365 the first use can be grouped together. */
2366 for (pre = use, use = use->next;
2367 use && (use->addr_offset - addr_offset_first
2368 <= (unsigned HOST_WIDE_INT) max_offset);
2369 pre = use, use = use->next)
2370 {
2371 use->id = pre->id;
2372 use->sub_id = ++sub_id;
2373 }
2374
2375 /* Break the list and create new group. */
2376 if (use)
2377 {
2378 pre->next = NULL;
2379 use->id = n_iv_uses (data);
2380 use->related_cands = BITMAP_ALLOC (NULL);
2381 data->iv_uses.safe_push (use);
2382 }
2383 }
2384 }
2385
2386 if (dump_file && (dump_flags & TDF_DETAILS))
2387 dump_uses (dump_file, data);
2388 }
2389
2390 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
2391 is true, assume we are inside an address. If TOP_COMPREF is true, assume
2392 we are at the top-level of the processed address. */
2393
2394 static tree
2395 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2396 HOST_WIDE_INT *offset)
2397 {
2398 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2399 enum tree_code code;
2400 tree type, orig_type = TREE_TYPE (expr);
2401 HOST_WIDE_INT off0, off1, st;
2402 tree orig_expr = expr;
2403
2404 STRIP_NOPS (expr);
2405
2406 type = TREE_TYPE (expr);
2407 code = TREE_CODE (expr);
2408 *offset = 0;
2409
2410 switch (code)
2411 {
2412 case INTEGER_CST:
2413 if (!cst_and_fits_in_hwi (expr)
2414 || integer_zerop (expr))
2415 return orig_expr;
2416
2417 *offset = int_cst_value (expr);
2418 return build_int_cst (orig_type, 0);
2419
2420 case POINTER_PLUS_EXPR:
2421 case PLUS_EXPR:
2422 case MINUS_EXPR:
2423 op0 = TREE_OPERAND (expr, 0);
2424 op1 = TREE_OPERAND (expr, 1);
2425
2426 op0 = strip_offset_1 (op0, false, false, &off0);
2427 op1 = strip_offset_1 (op1, false, false, &off1);
2428
2429 *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2430 if (op0 == TREE_OPERAND (expr, 0)
2431 && op1 == TREE_OPERAND (expr, 1))
2432 return orig_expr;
2433
2434 if (integer_zerop (op1))
2435 expr = op0;
2436 else if (integer_zerop (op0))
2437 {
2438 if (code == MINUS_EXPR)
2439 expr = fold_build1 (NEGATE_EXPR, type, op1);
2440 else
2441 expr = op1;
2442 }
2443 else
2444 expr = fold_build2 (code, type, op0, op1);
2445
2446 return fold_convert (orig_type, expr);
2447
2448 case MULT_EXPR:
2449 op1 = TREE_OPERAND (expr, 1);
2450 if (!cst_and_fits_in_hwi (op1))
2451 return orig_expr;
2452
2453 op0 = TREE_OPERAND (expr, 0);
2454 op0 = strip_offset_1 (op0, false, false, &off0);
2455 if (op0 == TREE_OPERAND (expr, 0))
2456 return orig_expr;
2457
2458 *offset = off0 * int_cst_value (op1);
2459 if (integer_zerop (op0))
2460 expr = op0;
2461 else
2462 expr = fold_build2 (MULT_EXPR, type, op0, op1);
2463
2464 return fold_convert (orig_type, expr);
2465
2466 case ARRAY_REF:
2467 case ARRAY_RANGE_REF:
2468 if (!inside_addr)
2469 return orig_expr;
2470
2471 step = array_ref_element_size (expr);
2472 if (!cst_and_fits_in_hwi (step))
2473 break;
2474
2475 st = int_cst_value (step);
2476 op1 = TREE_OPERAND (expr, 1);
2477 op1 = strip_offset_1 (op1, false, false, &off1);
2478 *offset = off1 * st;
2479
2480 if (top_compref
2481 && integer_zerop (op1))
2482 {
2483 /* Strip the component reference completely. */
2484 op0 = TREE_OPERAND (expr, 0);
2485 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2486 *offset += off0;
2487 return op0;
2488 }
2489 break;
2490
2491 case COMPONENT_REF:
2492 {
2493 tree field;
2494
2495 if (!inside_addr)
2496 return orig_expr;
2497
2498 tmp = component_ref_field_offset (expr);
2499 field = TREE_OPERAND (expr, 1);
2500 if (top_compref
2501 && cst_and_fits_in_hwi (tmp)
2502 && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2503 {
2504 HOST_WIDE_INT boffset, abs_off;
2505
2506 /* Strip the component reference completely. */
2507 op0 = TREE_OPERAND (expr, 0);
2508 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2509 boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2510 abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2511 if (boffset < 0)
2512 abs_off = -abs_off;
2513
2514 *offset = off0 + int_cst_value (tmp) + abs_off;
2515 return op0;
2516 }
2517 }
2518 break;
2519
2520 case ADDR_EXPR:
2521 op0 = TREE_OPERAND (expr, 0);
2522 op0 = strip_offset_1 (op0, true, true, &off0);
2523 *offset += off0;
2524
2525 if (op0 == TREE_OPERAND (expr, 0))
2526 return orig_expr;
2527
2528 expr = build_fold_addr_expr (op0);
2529 return fold_convert (orig_type, expr);
2530
2531 case MEM_REF:
2532 /* ??? Offset operand? */
2533 inside_addr = false;
2534 break;
2535
2536 default:
2537 return orig_expr;
2538 }
2539
2540 /* Default handling of expressions for that we want to recurse into
2541 the first operand. */
2542 op0 = TREE_OPERAND (expr, 0);
2543 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2544 *offset += off0;
2545
2546 if (op0 == TREE_OPERAND (expr, 0)
2547 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2548 return orig_expr;
2549
2550 expr = copy_node (expr);
2551 TREE_OPERAND (expr, 0) = op0;
2552 if (op1)
2553 TREE_OPERAND (expr, 1) = op1;
2554
2555 /* Inside address, we might strip the top level component references,
2556 thus changing type of the expression. Handling of ADDR_EXPR
2557 will fix that. */
2558 expr = fold_convert (orig_type, expr);
2559
2560 return expr;
2561 }
2562
2563 /* Strips constant offsets from EXPR and stores them to OFFSET. */
2564
2565 static tree
2566 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2567 {
2568 HOST_WIDE_INT off;
2569 tree core = strip_offset_1 (expr, false, false, &off);
2570 *offset = off;
2571 return core;
2572 }
2573
2574 /* Returns variant of TYPE that can be used as base for different uses.
2575 We return unsigned type with the same precision, which avoids problems
2576 with overflows. */
2577
2578 static tree
2579 generic_type_for (tree type)
2580 {
2581 if (POINTER_TYPE_P (type))
2582 return unsigned_type_for (type);
2583
2584 if (TYPE_UNSIGNED (type))
2585 return type;
2586
2587 return unsigned_type_for (type);
2588 }
2589
2590 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2591 the bitmap to that we should store it. */
2592
2593 static struct ivopts_data *fd_ivopts_data;
2594 static tree
2595 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2596 {
2597 bitmap *depends_on = (bitmap *) data;
2598 struct version_info *info;
2599
2600 if (TREE_CODE (*expr_p) != SSA_NAME)
2601 return NULL_TREE;
2602 info = name_info (fd_ivopts_data, *expr_p);
2603
2604 if (!info->inv_id || info->has_nonlin_use)
2605 return NULL_TREE;
2606
2607 if (!*depends_on)
2608 *depends_on = BITMAP_ALLOC (NULL);
2609 bitmap_set_bit (*depends_on, info->inv_id);
2610
2611 return NULL_TREE;
2612 }
2613
2614 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2615 position to POS. If USE is not NULL, the candidate is set as related to
2616 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
2617 replacement of the final value of the iv by a direct computation. */
2618
2619 static struct iv_cand *
2620 add_candidate_1 (struct ivopts_data *data,
2621 tree base, tree step, bool important, enum iv_position pos,
2622 struct iv_use *use, gimple incremented_at)
2623 {
2624 unsigned i;
2625 struct iv_cand *cand = NULL;
2626 tree type, orig_type;
2627
2628 /* For non-original variables, make sure their values are computed in a type
2629 that does not invoke undefined behavior on overflows (since in general,
2630 we cannot prove that these induction variables are non-wrapping). */
2631 if (pos != IP_ORIGINAL)
2632 {
2633 orig_type = TREE_TYPE (base);
2634 type = generic_type_for (orig_type);
2635 if (type != orig_type)
2636 {
2637 base = fold_convert (type, base);
2638 step = fold_convert (type, step);
2639 }
2640 }
2641
2642 for (i = 0; i < n_iv_cands (data); i++)
2643 {
2644 cand = iv_cand (data, i);
2645
2646 if (cand->pos != pos)
2647 continue;
2648
2649 if (cand->incremented_at != incremented_at
2650 || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2651 && cand->ainc_use != use))
2652 continue;
2653
2654 if (!cand->iv)
2655 {
2656 if (!base && !step)
2657 break;
2658
2659 continue;
2660 }
2661
2662 if (!base && !step)
2663 continue;
2664
2665 if (operand_equal_p (base, cand->iv->base, 0)
2666 && operand_equal_p (step, cand->iv->step, 0)
2667 && (TYPE_PRECISION (TREE_TYPE (base))
2668 == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
2669 break;
2670 }
2671
2672 if (i == n_iv_cands (data))
2673 {
2674 cand = XCNEW (struct iv_cand);
2675 cand->id = i;
2676
2677 if (!base && !step)
2678 cand->iv = NULL;
2679 else
2680 cand->iv = alloc_iv (base, step);
2681
2682 cand->pos = pos;
2683 if (pos != IP_ORIGINAL && cand->iv)
2684 {
2685 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2686 cand->var_after = cand->var_before;
2687 }
2688 cand->important = important;
2689 cand->incremented_at = incremented_at;
2690 data->iv_candidates.safe_push (cand);
2691
2692 if (step
2693 && TREE_CODE (step) != INTEGER_CST)
2694 {
2695 fd_ivopts_data = data;
2696 walk_tree (&step, find_depends, &cand->depends_on, NULL);
2697 }
2698
2699 if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2700 cand->ainc_use = use;
2701 else
2702 cand->ainc_use = NULL;
2703
2704 if (dump_file && (dump_flags & TDF_DETAILS))
2705 dump_cand (dump_file, cand);
2706 }
2707
2708 if (important && !cand->important)
2709 {
2710 cand->important = true;
2711 if (dump_file && (dump_flags & TDF_DETAILS))
2712 fprintf (dump_file, "Candidate %d is important\n", cand->id);
2713 }
2714
2715 if (use)
2716 {
2717 bitmap_set_bit (use->related_cands, i);
2718 if (dump_file && (dump_flags & TDF_DETAILS))
2719 fprintf (dump_file, "Candidate %d is related to use %d\n",
2720 cand->id, use->id);
2721 }
2722
2723 return cand;
2724 }
2725
2726 /* Returns true if incrementing the induction variable at the end of the LOOP
2727 is allowed.
2728
2729 The purpose is to avoid splitting latch edge with a biv increment, thus
2730 creating a jump, possibly confusing other optimization passes and leaving
2731 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2732 is not available (so we do not have a better alternative), or if the latch
2733 edge is already nonempty. */
2734
2735 static bool
2736 allow_ip_end_pos_p (struct loop *loop)
2737 {
2738 if (!ip_normal_pos (loop))
2739 return true;
2740
2741 if (!empty_block_p (ip_end_pos (loop)))
2742 return true;
2743
2744 return false;
2745 }
2746
2747 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2748 Important field is set to IMPORTANT. */
2749
2750 static void
2751 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2752 bool important, struct iv_use *use)
2753 {
2754 basic_block use_bb = gimple_bb (use->stmt);
2755 machine_mode mem_mode;
2756 unsigned HOST_WIDE_INT cstepi;
2757
2758 /* If we insert the increment in any position other than the standard
2759 ones, we must ensure that it is incremented once per iteration.
2760 It must not be in an inner nested loop, or one side of an if
2761 statement. */
2762 if (use_bb->loop_father != data->current_loop
2763 || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2764 || stmt_could_throw_p (use->stmt)
2765 || !cst_and_fits_in_hwi (step))
2766 return;
2767
2768 cstepi = int_cst_value (step);
2769
2770 mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2771 if (((USE_LOAD_PRE_INCREMENT (mem_mode)
2772 || USE_STORE_PRE_INCREMENT (mem_mode))
2773 && GET_MODE_SIZE (mem_mode) == cstepi)
2774 || ((USE_LOAD_PRE_DECREMENT (mem_mode)
2775 || USE_STORE_PRE_DECREMENT (mem_mode))
2776 && GET_MODE_SIZE (mem_mode) == -cstepi))
2777 {
2778 enum tree_code code = MINUS_EXPR;
2779 tree new_base;
2780 tree new_step = step;
2781
2782 if (POINTER_TYPE_P (TREE_TYPE (base)))
2783 {
2784 new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2785 code = POINTER_PLUS_EXPR;
2786 }
2787 else
2788 new_step = fold_convert (TREE_TYPE (base), new_step);
2789 new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2790 add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2791 use->stmt);
2792 }
2793 if (((USE_LOAD_POST_INCREMENT (mem_mode)
2794 || USE_STORE_POST_INCREMENT (mem_mode))
2795 && GET_MODE_SIZE (mem_mode) == cstepi)
2796 || ((USE_LOAD_POST_DECREMENT (mem_mode)
2797 || USE_STORE_POST_DECREMENT (mem_mode))
2798 && GET_MODE_SIZE (mem_mode) == -cstepi))
2799 {
2800 add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
2801 use->stmt);
2802 }
2803 }
2804
2805 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2806 position to POS. If USE is not NULL, the candidate is set as related to
2807 it. The candidate computation is scheduled on all available positions. */
2808
2809 static void
2810 add_candidate (struct ivopts_data *data,
2811 tree base, tree step, bool important, struct iv_use *use)
2812 {
2813 gcc_assert (use == NULL || use->sub_id == 0);
2814
2815 if (ip_normal_pos (data->current_loop))
2816 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL);
2817 if (ip_end_pos (data->current_loop)
2818 && allow_ip_end_pos_p (data->current_loop))
2819 add_candidate_1 (data, base, step, important, IP_END, use, NULL);
2820
2821 if (use != NULL && use->type == USE_ADDRESS)
2822 add_autoinc_candidates (data, base, step, important, use);
2823 }
2824
2825 /* Adds standard iv candidates. */
2826
2827 static void
2828 add_standard_iv_candidates (struct ivopts_data *data)
2829 {
2830 add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
2831
2832 /* The same for a double-integer type if it is still fast enough. */
2833 if (TYPE_PRECISION
2834 (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
2835 && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
2836 add_candidate (data, build_int_cst (long_integer_type_node, 0),
2837 build_int_cst (long_integer_type_node, 1), true, NULL);
2838
2839 /* The same for a double-integer type if it is still fast enough. */
2840 if (TYPE_PRECISION
2841 (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
2842 && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
2843 add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
2844 build_int_cst (long_long_integer_type_node, 1), true, NULL);
2845 }
2846
2847
2848 /* Adds candidates bases on the old induction variable IV. */
2849
2850 static void
2851 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2852 {
2853 gimple phi;
2854 tree def;
2855 struct iv_cand *cand;
2856
2857 add_candidate (data, iv->base, iv->step, true, NULL);
2858
2859 /* The same, but with initial value zero. */
2860 if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
2861 add_candidate (data, size_int (0), iv->step, true, NULL);
2862 else
2863 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2864 iv->step, true, NULL);
2865
2866 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2867 if (gimple_code (phi) == GIMPLE_PHI)
2868 {
2869 /* Additionally record the possibility of leaving the original iv
2870 untouched. */
2871 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2872 /* Don't add candidate if it's from another PHI node because
2873 it's an affine iv appearing in the form of PEELED_CHREC. */
2874 phi = SSA_NAME_DEF_STMT (def);
2875 if (gimple_code (phi) != GIMPLE_PHI)
2876 {
2877 cand = add_candidate_1 (data,
2878 iv->base, iv->step, true, IP_ORIGINAL, NULL,
2879 SSA_NAME_DEF_STMT (def));
2880 cand->var_before = iv->ssa_name;
2881 cand->var_after = def;
2882 }
2883 else
2884 gcc_assert (gimple_bb (phi) == data->current_loop->header);
2885 }
2886 }
2887
2888 /* Adds candidates based on the old induction variables. */
2889
2890 static void
2891 add_old_ivs_candidates (struct ivopts_data *data)
2892 {
2893 unsigned i;
2894 struct iv *iv;
2895 bitmap_iterator bi;
2896
2897 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2898 {
2899 iv = ver_info (data, i)->iv;
2900 if (iv && iv->biv_p && !integer_zerop (iv->step))
2901 add_old_iv_candidates (data, iv);
2902 }
2903 }
2904
2905 /* Adds candidates based on the value of the induction variable IV and USE. */
2906
2907 static void
2908 add_iv_value_candidates (struct ivopts_data *data,
2909 struct iv *iv, struct iv_use *use)
2910 {
2911 unsigned HOST_WIDE_INT offset;
2912 tree base;
2913 tree basetype;
2914
2915 add_candidate (data, iv->base, iv->step, false, use);
2916
2917 /* The same, but with initial value zero. Make such variable important,
2918 since it is generic enough so that possibly many uses may be based
2919 on it. */
2920 basetype = TREE_TYPE (iv->base);
2921 if (POINTER_TYPE_P (basetype))
2922 basetype = sizetype;
2923 add_candidate (data, build_int_cst (basetype, 0),
2924 iv->step, true, use);
2925
2926 /* Third, try removing the constant offset. Make sure to even
2927 add a candidate for &a[0] vs. (T *)&a. */
2928 base = strip_offset (iv->base, &offset);
2929 if (offset
2930 || base != iv->base)
2931 add_candidate (data, base, iv->step, false, use);
2932 }
2933
2934 /* Adds candidates based on the uses. */
2935
2936 static void
2937 add_derived_ivs_candidates (struct ivopts_data *data)
2938 {
2939 unsigned i;
2940
2941 for (i = 0; i < n_iv_uses (data); i++)
2942 {
2943 struct iv_use *use = iv_use (data, i);
2944
2945 if (!use)
2946 continue;
2947
2948 switch (use->type)
2949 {
2950 case USE_NONLINEAR_EXPR:
2951 case USE_COMPARE:
2952 case USE_ADDRESS:
2953 /* Just add the ivs based on the value of the iv used here. */
2954 add_iv_value_candidates (data, use->iv, use);
2955 break;
2956
2957 default:
2958 gcc_unreachable ();
2959 }
2960 }
2961 }
2962
2963 /* Record important candidates and add them to related_cands bitmaps
2964 if needed. */
2965
2966 static void
2967 record_important_candidates (struct ivopts_data *data)
2968 {
2969 unsigned i;
2970 struct iv_use *use;
2971
2972 for (i = 0; i < n_iv_cands (data); i++)
2973 {
2974 struct iv_cand *cand = iv_cand (data, i);
2975
2976 if (cand->important)
2977 bitmap_set_bit (data->important_candidates, i);
2978 }
2979
2980 data->consider_all_candidates = (n_iv_cands (data)
2981 <= CONSIDER_ALL_CANDIDATES_BOUND);
2982
2983 if (data->consider_all_candidates)
2984 {
2985 /* We will not need "related_cands" bitmaps in this case,
2986 so release them to decrease peak memory consumption. */
2987 for (i = 0; i < n_iv_uses (data); i++)
2988 {
2989 use = iv_use (data, i);
2990 BITMAP_FREE (use->related_cands);
2991 }
2992 }
2993 else
2994 {
2995 /* Add important candidates to the related_cands bitmaps. */
2996 for (i = 0; i < n_iv_uses (data); i++)
2997 bitmap_ior_into (iv_use (data, i)->related_cands,
2998 data->important_candidates);
2999 }
3000 }
3001
3002 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
3003 If consider_all_candidates is true, we use a two-dimensional array, otherwise
3004 we allocate a simple list to every use. */
3005
3006 static void
3007 alloc_use_cost_map (struct ivopts_data *data)
3008 {
3009 unsigned i, size, s;
3010
3011 for (i = 0; i < n_iv_uses (data); i++)
3012 {
3013 struct iv_use *use = iv_use (data, i);
3014
3015 if (data->consider_all_candidates)
3016 size = n_iv_cands (data);
3017 else
3018 {
3019 s = bitmap_count_bits (use->related_cands);
3020
3021 /* Round up to the power of two, so that moduling by it is fast. */
3022 size = s ? (1 << ceil_log2 (s)) : 1;
3023 }
3024
3025 use->n_map_members = size;
3026 use->cost_map = XCNEWVEC (struct cost_pair, size);
3027 }
3028 }
3029
3030 /* Returns description of computation cost of expression whose runtime
3031 cost is RUNTIME and complexity corresponds to COMPLEXITY. */
3032
3033 static comp_cost
3034 new_cost (unsigned runtime, unsigned complexity)
3035 {
3036 comp_cost cost;
3037
3038 cost.cost = runtime;
3039 cost.complexity = complexity;
3040
3041 return cost;
3042 }
3043
3044 /* Returns true if COST is infinite. */
3045
3046 static bool
3047 infinite_cost_p (comp_cost cost)
3048 {
3049 return cost.cost == INFTY;
3050 }
3051
3052 /* Adds costs COST1 and COST2. */
3053
3054 static comp_cost
3055 add_costs (comp_cost cost1, comp_cost cost2)
3056 {
3057 if (infinite_cost_p (cost1) || infinite_cost_p (cost2))
3058 return infinite_cost;
3059
3060 cost1.cost += cost2.cost;
3061 cost1.complexity += cost2.complexity;
3062
3063 return cost1;
3064 }
3065 /* Subtracts costs COST1 and COST2. */
3066
3067 static comp_cost
3068 sub_costs (comp_cost cost1, comp_cost cost2)
3069 {
3070 cost1.cost -= cost2.cost;
3071 cost1.complexity -= cost2.complexity;
3072
3073 return cost1;
3074 }
3075
3076 /* Returns a negative number if COST1 < COST2, a positive number if
3077 COST1 > COST2, and 0 if COST1 = COST2. */
3078
3079 static int
3080 compare_costs (comp_cost cost1, comp_cost cost2)
3081 {
3082 if (cost1.cost == cost2.cost)
3083 return cost1.complexity - cost2.complexity;
3084
3085 return cost1.cost - cost2.cost;
3086 }
3087
3088 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
3089 on invariants DEPENDS_ON and that the value used in expressing it
3090 is VALUE, and in case of iv elimination the comparison operator is COMP. */
3091
3092 static void
3093 set_use_iv_cost (struct ivopts_data *data,
3094 struct iv_use *use, struct iv_cand *cand,
3095 comp_cost cost, bitmap depends_on, tree value,
3096 enum tree_code comp, int inv_expr_id)
3097 {
3098 unsigned i, s;
3099
3100 if (infinite_cost_p (cost))
3101 {
3102 BITMAP_FREE (depends_on);
3103 return;
3104 }
3105
3106 if (data->consider_all_candidates)
3107 {
3108 use->cost_map[cand->id].cand = cand;
3109 use->cost_map[cand->id].cost = cost;
3110 use->cost_map[cand->id].depends_on = depends_on;
3111 use->cost_map[cand->id].value = value;
3112 use->cost_map[cand->id].comp = comp;
3113 use->cost_map[cand->id].inv_expr_id = inv_expr_id;
3114 return;
3115 }
3116
3117 /* n_map_members is a power of two, so this computes modulo. */
3118 s = cand->id & (use->n_map_members - 1);
3119 for (i = s; i < use->n_map_members; i++)
3120 if (!use->cost_map[i].cand)
3121 goto found;
3122 for (i = 0; i < s; i++)
3123 if (!use->cost_map[i].cand)
3124 goto found;
3125
3126 gcc_unreachable ();
3127
3128 found:
3129 use->cost_map[i].cand = cand;
3130 use->cost_map[i].cost = cost;
3131 use->cost_map[i].depends_on = depends_on;
3132 use->cost_map[i].value = value;
3133 use->cost_map[i].comp = comp;
3134 use->cost_map[i].inv_expr_id = inv_expr_id;
3135 }
3136
3137 /* Gets cost of (USE, CANDIDATE) pair. */
3138
3139 static struct cost_pair *
3140 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
3141 struct iv_cand *cand)
3142 {
3143 unsigned i, s;
3144 struct cost_pair *ret;
3145
3146 if (!cand)
3147 return NULL;
3148
3149 if (data->consider_all_candidates)
3150 {
3151 ret = use->cost_map + cand->id;
3152 if (!ret->cand)
3153 return NULL;
3154
3155 return ret;
3156 }
3157
3158 /* n_map_members is a power of two, so this computes modulo. */
3159 s = cand->id & (use->n_map_members - 1);
3160 for (i = s; i < use->n_map_members; i++)
3161 if (use->cost_map[i].cand == cand)
3162 return use->cost_map + i;
3163 else if (use->cost_map[i].cand == NULL)
3164 return NULL;
3165 for (i = 0; i < s; i++)
3166 if (use->cost_map[i].cand == cand)
3167 return use->cost_map + i;
3168 else if (use->cost_map[i].cand == NULL)
3169 return NULL;
3170
3171 return NULL;
3172 }
3173
3174 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
3175 static rtx
3176 produce_memory_decl_rtl (tree obj, int *regno)
3177 {
3178 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
3179 machine_mode address_mode = targetm.addr_space.address_mode (as);
3180 rtx x;
3181
3182 gcc_assert (obj);
3183 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
3184 {
3185 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
3186 x = gen_rtx_SYMBOL_REF (address_mode, name);
3187 SET_SYMBOL_REF_DECL (x, obj);
3188 x = gen_rtx_MEM (DECL_MODE (obj), x);
3189 set_mem_addr_space (x, as);
3190 targetm.encode_section_info (obj, x, true);
3191 }
3192 else
3193 {
3194 x = gen_raw_REG (address_mode, (*regno)++);
3195 x = gen_rtx_MEM (DECL_MODE (obj), x);
3196 set_mem_addr_space (x, as);
3197 }
3198
3199 return x;
3200 }
3201
3202 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
3203 walk_tree. DATA contains the actual fake register number. */
3204
3205 static tree
3206 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
3207 {
3208 tree obj = NULL_TREE;
3209 rtx x = NULL_RTX;
3210 int *regno = (int *) data;
3211
3212 switch (TREE_CODE (*expr_p))
3213 {
3214 case ADDR_EXPR:
3215 for (expr_p = &TREE_OPERAND (*expr_p, 0);
3216 handled_component_p (*expr_p);
3217 expr_p = &TREE_OPERAND (*expr_p, 0))
3218 continue;
3219 obj = *expr_p;
3220 if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
3221 x = produce_memory_decl_rtl (obj, regno);
3222 break;
3223
3224 case SSA_NAME:
3225 *ws = 0;
3226 obj = SSA_NAME_VAR (*expr_p);
3227 /* Defer handling of anonymous SSA_NAMEs to the expander. */
3228 if (!obj)
3229 return NULL_TREE;
3230 if (!DECL_RTL_SET_P (obj))
3231 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3232 break;
3233
3234 case VAR_DECL:
3235 case PARM_DECL:
3236 case RESULT_DECL:
3237 *ws = 0;
3238 obj = *expr_p;
3239
3240 if (DECL_RTL_SET_P (obj))
3241 break;
3242
3243 if (DECL_MODE (obj) == BLKmode)
3244 x = produce_memory_decl_rtl (obj, regno);
3245 else
3246 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3247
3248 break;
3249
3250 default:
3251 break;
3252 }
3253
3254 if (x)
3255 {
3256 decl_rtl_to_reset.safe_push (obj);
3257 SET_DECL_RTL (obj, x);
3258 }
3259
3260 return NULL_TREE;
3261 }
3262
3263 /* Determines cost of the computation of EXPR. */
3264
3265 static unsigned
3266 computation_cost (tree expr, bool speed)
3267 {
3268 rtx_insn *seq;
3269 rtx rslt;
3270 tree type = TREE_TYPE (expr);
3271 unsigned cost;
3272 /* Avoid using hard regs in ways which may be unsupported. */
3273 int regno = LAST_VIRTUAL_REGISTER + 1;
3274 struct cgraph_node *node = cgraph_node::get (current_function_decl);
3275 enum node_frequency real_frequency = node->frequency;
3276
3277 node->frequency = NODE_FREQUENCY_NORMAL;
3278 crtl->maybe_hot_insn_p = speed;
3279 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
3280 start_sequence ();
3281 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
3282 seq = get_insns ();
3283 end_sequence ();
3284 default_rtl_profile ();
3285 node->frequency = real_frequency;
3286
3287 cost = seq_cost (seq, speed);
3288 if (MEM_P (rslt))
3289 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
3290 TYPE_ADDR_SPACE (type), speed);
3291 else if (!REG_P (rslt))
3292 cost += set_src_cost (rslt, speed);
3293
3294 return cost;
3295 }
3296
3297 /* Returns variable containing the value of candidate CAND at statement AT. */
3298
3299 static tree
3300 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple stmt)
3301 {
3302 if (stmt_after_increment (loop, cand, stmt))
3303 return cand->var_after;
3304 else
3305 return cand->var_before;
3306 }
3307
3308 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
3309 same precision that is at least as wide as the precision of TYPE, stores
3310 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
3311 type of A and B. */
3312
3313 static tree
3314 determine_common_wider_type (tree *a, tree *b)
3315 {
3316 tree wider_type = NULL;
3317 tree suba, subb;
3318 tree atype = TREE_TYPE (*a);
3319
3320 if (CONVERT_EXPR_P (*a))
3321 {
3322 suba = TREE_OPERAND (*a, 0);
3323 wider_type = TREE_TYPE (suba);
3324 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
3325 return atype;
3326 }
3327 else
3328 return atype;
3329
3330 if (CONVERT_EXPR_P (*b))
3331 {
3332 subb = TREE_OPERAND (*b, 0);
3333 if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
3334 return atype;
3335 }
3336 else
3337 return atype;
3338
3339 *a = suba;
3340 *b = subb;
3341 return wider_type;
3342 }
3343
3344 /* Determines the expression by that USE is expressed from induction variable
3345 CAND at statement AT in LOOP. The expression is stored in a decomposed
3346 form into AFF. Returns false if USE cannot be expressed using CAND. */
3347
3348 static bool
3349 get_computation_aff (struct loop *loop,
3350 struct iv_use *use, struct iv_cand *cand, gimple at,
3351 struct aff_tree *aff)
3352 {
3353 tree ubase = use->iv->base;
3354 tree ustep = use->iv->step;
3355 tree cbase = cand->iv->base;
3356 tree cstep = cand->iv->step, cstep_common;
3357 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3358 tree common_type, var;
3359 tree uutype;
3360 aff_tree cbase_aff, var_aff;
3361 widest_int rat;
3362
3363 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3364 {
3365 /* We do not have a precision to express the values of use. */
3366 return false;
3367 }
3368
3369 var = var_at_stmt (loop, cand, at);
3370 uutype = unsigned_type_for (utype);
3371
3372 /* If the conversion is not noop, perform it. */
3373 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3374 {
3375 cstep = fold_convert (uutype, cstep);
3376 cbase = fold_convert (uutype, cbase);
3377 var = fold_convert (uutype, var);
3378 }
3379
3380 if (!constant_multiple_of (ustep, cstep, &rat))
3381 return false;
3382
3383 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3384 type, we achieve better folding by computing their difference in this
3385 wider type, and cast the result to UUTYPE. We do not need to worry about
3386 overflows, as all the arithmetics will in the end be performed in UUTYPE
3387 anyway. */
3388 common_type = determine_common_wider_type (&ubase, &cbase);
3389
3390 /* use = ubase - ratio * cbase + ratio * var. */
3391 tree_to_aff_combination (ubase, common_type, aff);
3392 tree_to_aff_combination (cbase, common_type, &cbase_aff);
3393 tree_to_aff_combination (var, uutype, &var_aff);
3394
3395 /* We need to shift the value if we are after the increment. */
3396 if (stmt_after_increment (loop, cand, at))
3397 {
3398 aff_tree cstep_aff;
3399
3400 if (common_type != uutype)
3401 cstep_common = fold_convert (common_type, cstep);
3402 else
3403 cstep_common = cstep;
3404
3405 tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
3406 aff_combination_add (&cbase_aff, &cstep_aff);
3407 }
3408
3409 aff_combination_scale (&cbase_aff, -rat);
3410 aff_combination_add (aff, &cbase_aff);
3411 if (common_type != uutype)
3412 aff_combination_convert (aff, uutype);
3413
3414 aff_combination_scale (&var_aff, rat);
3415 aff_combination_add (aff, &var_aff);
3416
3417 return true;
3418 }
3419
3420 /* Return the type of USE. */
3421
3422 static tree
3423 get_use_type (struct iv_use *use)
3424 {
3425 tree base_type = TREE_TYPE (use->iv->base);
3426 tree type;
3427
3428 if (use->type == USE_ADDRESS)
3429 {
3430 /* The base_type may be a void pointer. Create a pointer type based on
3431 the mem_ref instead. */
3432 type = build_pointer_type (TREE_TYPE (*use->op_p));
3433 gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
3434 == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
3435 }
3436 else
3437 type = base_type;
3438
3439 return type;
3440 }
3441
3442 /* Determines the expression by that USE is expressed from induction variable
3443 CAND at statement AT in LOOP. The computation is unshared. */
3444
3445 static tree
3446 get_computation_at (struct loop *loop,
3447 struct iv_use *use, struct iv_cand *cand, gimple at)
3448 {
3449 aff_tree aff;
3450 tree type = get_use_type (use);
3451
3452 if (!get_computation_aff (loop, use, cand, at, &aff))
3453 return NULL_TREE;
3454 unshare_aff_combination (&aff);
3455 return fold_convert (type, aff_combination_to_tree (&aff));
3456 }
3457
3458 /* Determines the expression by that USE is expressed from induction variable
3459 CAND in LOOP. The computation is unshared. */
3460
3461 static tree
3462 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3463 {
3464 return get_computation_at (loop, use, cand, use->stmt);
3465 }
3466
3467 /* Adjust the cost COST for being in loop setup rather than loop body.
3468 If we're optimizing for space, the loop setup overhead is constant;
3469 if we're optimizing for speed, amortize it over the per-iteration cost. */
3470 static unsigned
3471 adjust_setup_cost (struct ivopts_data *data, unsigned cost)
3472 {
3473 if (cost == INFTY)
3474 return cost;
3475 else if (optimize_loop_for_speed_p (data->current_loop))
3476 return cost / avg_loop_niter (data->current_loop);
3477 else
3478 return cost;
3479 }
3480
3481 /* Returns true if multiplying by RATIO is allowed in an address. Test the
3482 validity for a memory reference accessing memory of mode MODE in
3483 address space AS. */
3484
3485
3486 bool
3487 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, machine_mode mode,
3488 addr_space_t as)
3489 {
3490 #define MAX_RATIO 128
3491 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3492 static vec<sbitmap> valid_mult_list;
3493 sbitmap valid_mult;
3494
3495 if (data_index >= valid_mult_list.length ())
3496 valid_mult_list.safe_grow_cleared (data_index + 1);
3497
3498 valid_mult = valid_mult_list[data_index];
3499 if (!valid_mult)
3500 {
3501 machine_mode address_mode = targetm.addr_space.address_mode (as);
3502 rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3503 rtx reg2 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3504 rtx addr, scaled;
3505 HOST_WIDE_INT i;
3506
3507 valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3508 bitmap_clear (valid_mult);
3509 scaled = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3510 addr = gen_rtx_fmt_ee (PLUS, address_mode, scaled, reg2);
3511 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3512 {
3513 XEXP (scaled, 1) = gen_int_mode (i, address_mode);
3514 if (memory_address_addr_space_p (mode, addr, as)
3515 || memory_address_addr_space_p (mode, scaled, as))
3516 bitmap_set_bit (valid_mult, i + MAX_RATIO);
3517 }
3518
3519 if (dump_file && (dump_flags & TDF_DETAILS))
3520 {
3521 fprintf (dump_file, " allowed multipliers:");
3522 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3523 if (bitmap_bit_p (valid_mult, i + MAX_RATIO))
3524 fprintf (dump_file, " %d", (int) i);
3525 fprintf (dump_file, "\n");
3526 fprintf (dump_file, "\n");
3527 }
3528
3529 valid_mult_list[data_index] = valid_mult;
3530 }
3531
3532 if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3533 return false;
3534
3535 return bitmap_bit_p (valid_mult, ratio + MAX_RATIO);
3536 }
3537
3538 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3539 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3540 variable is omitted. Compute the cost for a memory reference that accesses
3541 a memory location of mode MEM_MODE in address space AS.
3542
3543 MAY_AUTOINC is set to true if the autoincrement (increasing index by
3544 size of MEM_MODE / RATIO) is available. To make this determination, we
3545 look at the size of the increment to be made, which is given in CSTEP.
3546 CSTEP may be zero if the step is unknown.
3547 STMT_AFTER_INC is true iff the statement we're looking at is after the
3548 increment of the original biv.
3549
3550 TODO -- there must be some better way. This all is quite crude. */
3551
3552 enum ainc_type
3553 {
3554 AINC_PRE_INC, /* Pre increment. */
3555 AINC_PRE_DEC, /* Pre decrement. */
3556 AINC_POST_INC, /* Post increment. */
3557 AINC_POST_DEC, /* Post decrement. */
3558 AINC_NONE /* Also the number of auto increment types. */
3559 };
3560
3561 typedef struct address_cost_data_s
3562 {
3563 HOST_WIDE_INT min_offset, max_offset;
3564 unsigned costs[2][2][2][2];
3565 unsigned ainc_costs[AINC_NONE];
3566 } *address_cost_data;
3567
3568
3569 static comp_cost
3570 get_address_cost (bool symbol_present, bool var_present,
3571 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3572 HOST_WIDE_INT cstep, machine_mode mem_mode,
3573 addr_space_t as, bool speed,
3574 bool stmt_after_inc, bool *may_autoinc)
3575 {
3576 machine_mode address_mode = targetm.addr_space.address_mode (as);
3577 static vec<address_cost_data> address_cost_data_list;
3578 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3579 address_cost_data data;
3580 static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3581 static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3582 unsigned cost, acost, complexity;
3583 enum ainc_type autoinc_type;
3584 bool offset_p, ratio_p, autoinc;
3585 HOST_WIDE_INT s_offset, autoinc_offset, msize;
3586 unsigned HOST_WIDE_INT mask;
3587 unsigned bits;
3588
3589 if (data_index >= address_cost_data_list.length ())
3590 address_cost_data_list.safe_grow_cleared (data_index + 1);
3591
3592 data = address_cost_data_list[data_index];
3593 if (!data)
3594 {
3595 HOST_WIDE_INT i;
3596 HOST_WIDE_INT rat, off = 0;
3597 int old_cse_not_expected, width;
3598 unsigned sym_p, var_p, off_p, rat_p, add_c;
3599 rtx_insn *seq;
3600 rtx addr, base;
3601 rtx reg0, reg1;
3602
3603 data = (address_cost_data) xcalloc (1, sizeof (*data));
3604
3605 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3606
3607 width = GET_MODE_BITSIZE (address_mode) - 1;
3608 if (width > (HOST_BITS_PER_WIDE_INT - 1))
3609 width = HOST_BITS_PER_WIDE_INT - 1;
3610 addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
3611
3612 for (i = width; i >= 0; i--)
3613 {
3614 off = -((unsigned HOST_WIDE_INT) 1 << i);
3615 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3616 if (memory_address_addr_space_p (mem_mode, addr, as))
3617 break;
3618 }
3619 data->min_offset = (i == -1? 0 : off);
3620
3621 for (i = width; i >= 0; i--)
3622 {
3623 off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
3624 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3625 if (memory_address_addr_space_p (mem_mode, addr, as))
3626 break;
3627 /* For some strict-alignment targets, the offset must be naturally
3628 aligned. Try an aligned offset if mem_mode is not QImode. */
3629 off = mem_mode != QImode
3630 ? ((unsigned HOST_WIDE_INT) 1 << i)
3631 - GET_MODE_SIZE (mem_mode)
3632 : 0;
3633 if (off > 0)
3634 {
3635 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3636 if (memory_address_addr_space_p (mem_mode, addr, as))
3637 break;
3638 }
3639 }
3640 if (i == -1)
3641 off = 0;
3642 data->max_offset = off;
3643
3644 if (dump_file && (dump_flags & TDF_DETAILS))
3645 {
3646 fprintf (dump_file, "get_address_cost:\n");
3647 fprintf (dump_file, " min offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3648 GET_MODE_NAME (mem_mode),
3649 data->min_offset);
3650 fprintf (dump_file, " max offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3651 GET_MODE_NAME (mem_mode),
3652 data->max_offset);
3653 }
3654
3655 rat = 1;
3656 for (i = 2; i <= MAX_RATIO; i++)
3657 if (multiplier_allowed_in_address_p (i, mem_mode, as))
3658 {
3659 rat = i;
3660 break;
3661 }
3662
3663 /* Compute the cost of various addressing modes. */
3664 acost = 0;
3665 reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3666 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3667
3668 if (USE_LOAD_PRE_DECREMENT (mem_mode)
3669 || USE_STORE_PRE_DECREMENT (mem_mode))
3670 {
3671 addr = gen_rtx_PRE_DEC (address_mode, reg0);
3672 has_predec[mem_mode]
3673 = memory_address_addr_space_p (mem_mode, addr, as);
3674
3675 if (has_predec[mem_mode])
3676 data->ainc_costs[AINC_PRE_DEC]
3677 = address_cost (addr, mem_mode, as, speed);
3678 }
3679 if (USE_LOAD_POST_DECREMENT (mem_mode)
3680 || USE_STORE_POST_DECREMENT (mem_mode))
3681 {
3682 addr = gen_rtx_POST_DEC (address_mode, reg0);
3683 has_postdec[mem_mode]
3684 = memory_address_addr_space_p (mem_mode, addr, as);
3685
3686 if (has_postdec[mem_mode])
3687 data->ainc_costs[AINC_POST_DEC]
3688 = address_cost (addr, mem_mode, as, speed);
3689 }
3690 if (USE_LOAD_PRE_INCREMENT (mem_mode)
3691 || USE_STORE_PRE_DECREMENT (mem_mode))
3692 {
3693 addr = gen_rtx_PRE_INC (address_mode, reg0);
3694 has_preinc[mem_mode]
3695 = memory_address_addr_space_p (mem_mode, addr, as);
3696
3697 if (has_preinc[mem_mode])
3698 data->ainc_costs[AINC_PRE_INC]
3699 = address_cost (addr, mem_mode, as, speed);
3700 }
3701 if (USE_LOAD_POST_INCREMENT (mem_mode)
3702 || USE_STORE_POST_INCREMENT (mem_mode))
3703 {
3704 addr = gen_rtx_POST_INC (address_mode, reg0);
3705 has_postinc[mem_mode]
3706 = memory_address_addr_space_p (mem_mode, addr, as);
3707
3708 if (has_postinc[mem_mode])
3709 data->ainc_costs[AINC_POST_INC]
3710 = address_cost (addr, mem_mode, as, speed);
3711 }
3712 for (i = 0; i < 16; i++)
3713 {
3714 sym_p = i & 1;
3715 var_p = (i >> 1) & 1;
3716 off_p = (i >> 2) & 1;
3717 rat_p = (i >> 3) & 1;
3718
3719 addr = reg0;
3720 if (rat_p)
3721 addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
3722 gen_int_mode (rat, address_mode));
3723
3724 if (var_p)
3725 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
3726
3727 if (sym_p)
3728 {
3729 base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
3730 /* ??? We can run into trouble with some backends by presenting
3731 it with symbols which haven't been properly passed through
3732 targetm.encode_section_info. By setting the local bit, we
3733 enhance the probability of things working. */
3734 SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3735
3736 if (off_p)
3737 base = gen_rtx_fmt_e (CONST, address_mode,
3738 gen_rtx_fmt_ee
3739 (PLUS, address_mode, base,
3740 gen_int_mode (off, address_mode)));
3741 }
3742 else if (off_p)
3743 base = gen_int_mode (off, address_mode);
3744 else
3745 base = NULL_RTX;
3746
3747 if (base)
3748 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
3749
3750 start_sequence ();
3751 /* To avoid splitting addressing modes, pretend that no cse will
3752 follow. */
3753 old_cse_not_expected = cse_not_expected;
3754 cse_not_expected = true;
3755 addr = memory_address_addr_space (mem_mode, addr, as);
3756 cse_not_expected = old_cse_not_expected;
3757 seq = get_insns ();
3758 end_sequence ();
3759
3760 acost = seq_cost (seq, speed);
3761 acost += address_cost (addr, mem_mode, as, speed);
3762
3763 if (!acost)
3764 acost = 1;
3765 data->costs[sym_p][var_p][off_p][rat_p] = acost;
3766 }
3767
3768 /* On some targets, it is quite expensive to load symbol to a register,
3769 which makes addresses that contain symbols look much more expensive.
3770 However, the symbol will have to be loaded in any case before the
3771 loop (and quite likely we have it in register already), so it does not
3772 make much sense to penalize them too heavily. So make some final
3773 tweaks for the SYMBOL_PRESENT modes:
3774
3775 If VAR_PRESENT is false, and the mode obtained by changing symbol to
3776 var is cheaper, use this mode with small penalty.
3777 If VAR_PRESENT is true, try whether the mode with
3778 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3779 if this is the case, use it. */
3780 add_c = add_cost (speed, address_mode);
3781 for (i = 0; i < 8; i++)
3782 {
3783 var_p = i & 1;
3784 off_p = (i >> 1) & 1;
3785 rat_p = (i >> 2) & 1;
3786
3787 acost = data->costs[0][1][off_p][rat_p] + 1;
3788 if (var_p)
3789 acost += add_c;
3790
3791 if (acost < data->costs[1][var_p][off_p][rat_p])
3792 data->costs[1][var_p][off_p][rat_p] = acost;
3793 }
3794
3795 if (dump_file && (dump_flags & TDF_DETAILS))
3796 {
3797 fprintf (dump_file, "Address costs:\n");
3798
3799 for (i = 0; i < 16; i++)
3800 {
3801 sym_p = i & 1;
3802 var_p = (i >> 1) & 1;
3803 off_p = (i >> 2) & 1;
3804 rat_p = (i >> 3) & 1;
3805
3806 fprintf (dump_file, " ");
3807 if (sym_p)
3808 fprintf (dump_file, "sym + ");
3809 if (var_p)
3810 fprintf (dump_file, "var + ");
3811 if (off_p)
3812 fprintf (dump_file, "cst + ");
3813 if (rat_p)
3814 fprintf (dump_file, "rat * ");
3815
3816 acost = data->costs[sym_p][var_p][off_p][rat_p];
3817 fprintf (dump_file, "index costs %d\n", acost);
3818 }
3819 if (has_predec[mem_mode] || has_postdec[mem_mode]
3820 || has_preinc[mem_mode] || has_postinc[mem_mode])
3821 fprintf (dump_file, " May include autoinc/dec\n");
3822 fprintf (dump_file, "\n");
3823 }
3824
3825 address_cost_data_list[data_index] = data;
3826 }
3827
3828 bits = GET_MODE_BITSIZE (address_mode);
3829 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3830 offset &= mask;
3831 if ((offset >> (bits - 1) & 1))
3832 offset |= ~mask;
3833 s_offset = offset;
3834
3835 autoinc = false;
3836 autoinc_type = AINC_NONE;
3837 msize = GET_MODE_SIZE (mem_mode);
3838 autoinc_offset = offset;
3839 if (stmt_after_inc)
3840 autoinc_offset += ratio * cstep;
3841 if (symbol_present || var_present || ratio != 1)
3842 autoinc = false;
3843 else
3844 {
3845 if (has_postinc[mem_mode] && autoinc_offset == 0
3846 && msize == cstep)
3847 autoinc_type = AINC_POST_INC;
3848 else if (has_postdec[mem_mode] && autoinc_offset == 0
3849 && msize == -cstep)
3850 autoinc_type = AINC_POST_DEC;
3851 else if (has_preinc[mem_mode] && autoinc_offset == msize
3852 && msize == cstep)
3853 autoinc_type = AINC_PRE_INC;
3854 else if (has_predec[mem_mode] && autoinc_offset == -msize
3855 && msize == -cstep)
3856 autoinc_type = AINC_PRE_DEC;
3857
3858 if (autoinc_type != AINC_NONE)
3859 autoinc = true;
3860 }
3861
3862 cost = 0;
3863 offset_p = (s_offset != 0
3864 && data->min_offset <= s_offset
3865 && s_offset <= data->max_offset);
3866 ratio_p = (ratio != 1
3867 && multiplier_allowed_in_address_p (ratio, mem_mode, as));
3868
3869 if (ratio != 1 && !ratio_p)
3870 cost += mult_by_coeff_cost (ratio, address_mode, speed);
3871
3872 if (s_offset && !offset_p && !symbol_present)
3873 cost += add_cost (speed, address_mode);
3874
3875 if (may_autoinc)
3876 *may_autoinc = autoinc;
3877 if (autoinc)
3878 acost = data->ainc_costs[autoinc_type];
3879 else
3880 acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
3881 complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3882 return new_cost (cost + acost, complexity);
3883 }
3884
3885 /* Calculate the SPEED or size cost of shiftadd EXPR in MODE. MULT is the
3886 the EXPR operand holding the shift. COST0 and COST1 are the costs for
3887 calculating the operands of EXPR. Returns true if successful, and returns
3888 the cost in COST. */
3889
3890 static bool
3891 get_shiftadd_cost (tree expr, machine_mode mode, comp_cost cost0,
3892 comp_cost cost1, tree mult, bool speed, comp_cost *cost)
3893 {
3894 comp_cost res;
3895 tree op1 = TREE_OPERAND (expr, 1);
3896 tree cst = TREE_OPERAND (mult, 1);
3897 tree multop = TREE_OPERAND (mult, 0);
3898 int m = exact_log2 (int_cst_value (cst));
3899 int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
3900 int as_cost, sa_cost;
3901 bool mult_in_op1;
3902
3903 if (!(m >= 0 && m < maxm))
3904 return false;
3905
3906 mult_in_op1 = operand_equal_p (op1, mult, 0);
3907
3908 as_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
3909
3910 /* If the target has a cheap shift-and-add or shift-and-sub instruction,
3911 use that in preference to a shift insn followed by an add insn. */
3912 sa_cost = (TREE_CODE (expr) != MINUS_EXPR
3913 ? shiftadd_cost (speed, mode, m)
3914 : (mult_in_op1
3915 ? shiftsub1_cost (speed, mode, m)
3916 : shiftsub0_cost (speed, mode, m)));
3917
3918 res = new_cost (MIN (as_cost, sa_cost), 0);
3919 res = add_costs (res, mult_in_op1 ? cost0 : cost1);
3920
3921 STRIP_NOPS (multop);
3922 if (!is_gimple_val (multop))
3923 res = add_costs (res, force_expr_to_var_cost (multop, speed));
3924
3925 *cost = res;
3926 return true;
3927 }
3928
3929 /* Estimates cost of forcing expression EXPR into a variable. */
3930
3931 static comp_cost
3932 force_expr_to_var_cost (tree expr, bool speed)
3933 {
3934 static bool costs_initialized = false;
3935 static unsigned integer_cost [2];
3936 static unsigned symbol_cost [2];
3937 static unsigned address_cost [2];
3938 tree op0, op1;
3939 comp_cost cost0, cost1, cost;
3940 machine_mode mode;
3941
3942 if (!costs_initialized)
3943 {
3944 tree type = build_pointer_type (integer_type_node);
3945 tree var, addr;
3946 rtx x;
3947 int i;
3948
3949 var = create_tmp_var_raw (integer_type_node, "test_var");
3950 TREE_STATIC (var) = 1;
3951 x = produce_memory_decl_rtl (var, NULL);
3952 SET_DECL_RTL (var, x);
3953
3954 addr = build1 (ADDR_EXPR, type, var);
3955
3956
3957 for (i = 0; i < 2; i++)
3958 {
3959 integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
3960 2000), i);
3961
3962 symbol_cost[i] = computation_cost (addr, i) + 1;
3963
3964 address_cost[i]
3965 = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
3966 if (dump_file && (dump_flags & TDF_DETAILS))
3967 {
3968 fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
3969 fprintf (dump_file, " integer %d\n", (int) integer_cost[i]);
3970 fprintf (dump_file, " symbol %d\n", (int) symbol_cost[i]);
3971 fprintf (dump_file, " address %d\n", (int) address_cost[i]);
3972 fprintf (dump_file, " other %d\n", (int) target_spill_cost[i]);
3973 fprintf (dump_file, "\n");
3974 }
3975 }
3976
3977 costs_initialized = true;
3978 }
3979
3980 STRIP_NOPS (expr);
3981
3982 if (SSA_VAR_P (expr))
3983 return no_cost;
3984
3985 if (is_gimple_min_invariant (expr))
3986 {
3987 if (TREE_CODE (expr) == INTEGER_CST)
3988 return new_cost (integer_cost [speed], 0);
3989
3990 if (TREE_CODE (expr) == ADDR_EXPR)
3991 {
3992 tree obj = TREE_OPERAND (expr, 0);
3993
3994 if (TREE_CODE (obj) == VAR_DECL
3995 || TREE_CODE (obj) == PARM_DECL
3996 || TREE_CODE (obj) == RESULT_DECL)
3997 return new_cost (symbol_cost [speed], 0);
3998 }
3999
4000 return new_cost (address_cost [speed], 0);
4001 }
4002
4003 switch (TREE_CODE (expr))
4004 {
4005 case POINTER_PLUS_EXPR:
4006 case PLUS_EXPR:
4007 case MINUS_EXPR:
4008 case MULT_EXPR:
4009 op0 = TREE_OPERAND (expr, 0);
4010 op1 = TREE_OPERAND (expr, 1);
4011 STRIP_NOPS (op0);
4012 STRIP_NOPS (op1);
4013 break;
4014
4015 CASE_CONVERT:
4016 case NEGATE_EXPR:
4017 op0 = TREE_OPERAND (expr, 0);
4018 STRIP_NOPS (op0);
4019 op1 = NULL_TREE;
4020 break;
4021
4022 default:
4023 /* Just an arbitrary value, FIXME. */
4024 return new_cost (target_spill_cost[speed], 0);
4025 }
4026
4027 if (op0 == NULL_TREE
4028 || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
4029 cost0 = no_cost;
4030 else
4031 cost0 = force_expr_to_var_cost (op0, speed);
4032
4033 if (op1 == NULL_TREE
4034 || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
4035 cost1 = no_cost;
4036 else
4037 cost1 = force_expr_to_var_cost (op1, speed);
4038
4039 mode = TYPE_MODE (TREE_TYPE (expr));
4040 switch (TREE_CODE (expr))
4041 {
4042 case POINTER_PLUS_EXPR:
4043 case PLUS_EXPR:
4044 case MINUS_EXPR:
4045 case NEGATE_EXPR:
4046 cost = new_cost (add_cost (speed, mode), 0);
4047 if (TREE_CODE (expr) != NEGATE_EXPR)
4048 {
4049 tree mult = NULL_TREE;
4050 comp_cost sa_cost;
4051 if (TREE_CODE (op1) == MULT_EXPR)
4052 mult = op1;
4053 else if (TREE_CODE (op0) == MULT_EXPR)
4054 mult = op0;
4055
4056 if (mult != NULL_TREE
4057 && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
4058 && get_shiftadd_cost (expr, mode, cost0, cost1, mult,
4059 speed, &sa_cost))
4060 return sa_cost;
4061 }
4062 break;
4063
4064 CASE_CONVERT:
4065 {
4066 tree inner_mode, outer_mode;
4067 outer_mode = TREE_TYPE (expr);
4068 inner_mode = TREE_TYPE (op0);
4069 cost = new_cost (convert_cost (TYPE_MODE (outer_mode),
4070 TYPE_MODE (inner_mode), speed), 0);
4071 }
4072 break;
4073
4074 case MULT_EXPR:
4075 if (cst_and_fits_in_hwi (op0))
4076 cost = new_cost (mult_by_coeff_cost (int_cst_value (op0),
4077 mode, speed), 0);
4078 else if (cst_and_fits_in_hwi (op1))
4079 cost = new_cost (mult_by_coeff_cost (int_cst_value (op1),
4080 mode, speed), 0);
4081 else
4082 return new_cost (target_spill_cost [speed], 0);
4083 break;
4084
4085 default:
4086 gcc_unreachable ();
4087 }
4088
4089 cost = add_costs (cost, cost0);
4090 cost = add_costs (cost, cost1);
4091
4092 /* Bound the cost by target_spill_cost. The parts of complicated
4093 computations often are either loop invariant or at least can
4094 be shared between several iv uses, so letting this grow without
4095 limits would not give reasonable results. */
4096 if (cost.cost > (int) target_spill_cost [speed])
4097 cost.cost = target_spill_cost [speed];
4098
4099 return cost;
4100 }
4101
4102 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
4103 invariants the computation depends on. */
4104
4105 static comp_cost
4106 force_var_cost (struct ivopts_data *data,
4107 tree expr, bitmap *depends_on)
4108 {
4109 if (depends_on)
4110 {
4111 fd_ivopts_data = data;
4112 walk_tree (&expr, find_depends, depends_on, NULL);
4113 }
4114
4115 return force_expr_to_var_cost (expr, data->speed);
4116 }
4117
4118 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
4119 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
4120 to false if the corresponding part is missing. DEPENDS_ON is a set of the
4121 invariants the computation depends on. */
4122
4123 static comp_cost
4124 split_address_cost (struct ivopts_data *data,
4125 tree addr, bool *symbol_present, bool *var_present,
4126 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4127 {
4128 tree core;
4129 HOST_WIDE_INT bitsize;
4130 HOST_WIDE_INT bitpos;
4131 tree toffset;
4132 machine_mode mode;
4133 int unsignedp, volatilep;
4134
4135 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
4136 &unsignedp, &volatilep, false);
4137
4138 if (toffset != 0
4139 || bitpos % BITS_PER_UNIT != 0
4140 || TREE_CODE (core) != VAR_DECL)
4141 {
4142 *symbol_present = false;
4143 *var_present = true;
4144 fd_ivopts_data = data;
4145 walk_tree (&addr, find_depends, depends_on, NULL);
4146 return new_cost (target_spill_cost[data->speed], 0);
4147 }
4148
4149 *offset += bitpos / BITS_PER_UNIT;
4150 if (TREE_STATIC (core)
4151 || DECL_EXTERNAL (core))
4152 {
4153 *symbol_present = true;
4154 *var_present = false;
4155 return no_cost;
4156 }
4157
4158 *symbol_present = false;
4159 *var_present = true;
4160 return no_cost;
4161 }
4162
4163 /* Estimates cost of expressing difference of addresses E1 - E2 as
4164 var + symbol + offset. The value of offset is added to OFFSET,
4165 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
4166 part is missing. DEPENDS_ON is a set of the invariants the computation
4167 depends on. */
4168
4169 static comp_cost
4170 ptr_difference_cost (struct ivopts_data *data,
4171 tree e1, tree e2, bool *symbol_present, bool *var_present,
4172 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4173 {
4174 HOST_WIDE_INT diff = 0;
4175 aff_tree aff_e1, aff_e2;
4176 tree type;
4177
4178 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
4179
4180 if (ptr_difference_const (e1, e2, &diff))
4181 {
4182 *offset += diff;
4183 *symbol_present = false;
4184 *var_present = false;
4185 return no_cost;
4186 }
4187
4188 if (integer_zerop (e2))
4189 return split_address_cost (data, TREE_OPERAND (e1, 0),
4190 symbol_present, var_present, offset, depends_on);
4191
4192 *symbol_present = false;
4193 *var_present = true;
4194
4195 type = signed_type_for (TREE_TYPE (e1));
4196 tree_to_aff_combination (e1, type, &aff_e1);
4197 tree_to_aff_combination (e2, type, &aff_e2);
4198 aff_combination_scale (&aff_e2, -1);
4199 aff_combination_add (&aff_e1, &aff_e2);
4200
4201 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
4202 }
4203
4204 /* Estimates cost of expressing difference E1 - E2 as
4205 var + symbol + offset. The value of offset is added to OFFSET,
4206 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
4207 part is missing. DEPENDS_ON is a set of the invariants the computation
4208 depends on. */
4209
4210 static comp_cost
4211 difference_cost (struct ivopts_data *data,
4212 tree e1, tree e2, bool *symbol_present, bool *var_present,
4213 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4214 {
4215 machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
4216 unsigned HOST_WIDE_INT off1, off2;
4217 aff_tree aff_e1, aff_e2;
4218 tree type;
4219
4220 e1 = strip_offset (e1, &off1);
4221 e2 = strip_offset (e2, &off2);
4222 *offset += off1 - off2;
4223
4224 STRIP_NOPS (e1);
4225 STRIP_NOPS (e2);
4226
4227 if (TREE_CODE (e1) == ADDR_EXPR)
4228 return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
4229 offset, depends_on);
4230 *symbol_present = false;
4231
4232 if (operand_equal_p (e1, e2, 0))
4233 {
4234 *var_present = false;
4235 return no_cost;
4236 }
4237
4238 *var_present = true;
4239
4240 if (integer_zerop (e2))
4241 return force_var_cost (data, e1, depends_on);
4242
4243 if (integer_zerop (e1))
4244 {
4245 comp_cost cost = force_var_cost (data, e2, depends_on);
4246 cost.cost += mult_by_coeff_cost (-1, mode, data->speed);
4247 return cost;
4248 }
4249
4250 type = signed_type_for (TREE_TYPE (e1));
4251 tree_to_aff_combination (e1, type, &aff_e1);
4252 tree_to_aff_combination (e2, type, &aff_e2);
4253 aff_combination_scale (&aff_e2, -1);
4254 aff_combination_add (&aff_e1, &aff_e2);
4255
4256 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
4257 }
4258
4259 /* Returns true if AFF1 and AFF2 are identical. */
4260
4261 static bool
4262 compare_aff_trees (aff_tree *aff1, aff_tree *aff2)
4263 {
4264 unsigned i;
4265
4266 if (aff1->n != aff2->n)
4267 return false;
4268
4269 for (i = 0; i < aff1->n; i++)
4270 {
4271 if (aff1->elts[i].coef != aff2->elts[i].coef)
4272 return false;
4273
4274 if (!operand_equal_p (aff1->elts[i].val, aff2->elts[i].val, 0))
4275 return false;
4276 }
4277 return true;
4278 }
4279
4280 /* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id. */
4281
4282 static int
4283 get_expr_id (struct ivopts_data *data, tree expr)
4284 {
4285 struct iv_inv_expr_ent ent;
4286 struct iv_inv_expr_ent **slot;
4287
4288 ent.expr = expr;
4289 ent.hash = iterative_hash_expr (expr, 0);
4290 slot = data->inv_expr_tab->find_slot (&ent, INSERT);
4291 if (*slot)
4292 return (*slot)->id;
4293
4294 *slot = XNEW (struct iv_inv_expr_ent);
4295 (*slot)->expr = expr;
4296 (*slot)->hash = ent.hash;
4297 (*slot)->id = data->inv_expr_id++;
4298 return (*slot)->id;
4299 }
4300
4301 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
4302 requires a new compiler generated temporary. Returns -1 otherwise.
4303 ADDRESS_P is a flag indicating if the expression is for address
4304 computation. */
4305
4306 static int
4307 get_loop_invariant_expr_id (struct ivopts_data *data, tree ubase,
4308 tree cbase, HOST_WIDE_INT ratio,
4309 bool address_p)
4310 {
4311 aff_tree ubase_aff, cbase_aff;
4312 tree expr, ub, cb;
4313
4314 STRIP_NOPS (ubase);
4315 STRIP_NOPS (cbase);
4316 ub = ubase;
4317 cb = cbase;
4318
4319 if ((TREE_CODE (ubase) == INTEGER_CST)
4320 && (TREE_CODE (cbase) == INTEGER_CST))
4321 return -1;
4322
4323 /* Strips the constant part. */
4324 if (TREE_CODE (ubase) == PLUS_EXPR
4325 || TREE_CODE (ubase) == MINUS_EXPR
4326 || TREE_CODE (ubase) == POINTER_PLUS_EXPR)
4327 {
4328 if (TREE_CODE (TREE_OPERAND (ubase, 1)) == INTEGER_CST)
4329 ubase = TREE_OPERAND (ubase, 0);
4330 }
4331
4332 /* Strips the constant part. */
4333 if (TREE_CODE (cbase) == PLUS_EXPR
4334 || TREE_CODE (cbase) == MINUS_EXPR
4335 || TREE_CODE (cbase) == POINTER_PLUS_EXPR)
4336 {
4337 if (TREE_CODE (TREE_OPERAND (cbase, 1)) == INTEGER_CST)
4338 cbase = TREE_OPERAND (cbase, 0);
4339 }
4340
4341 if (address_p)
4342 {
4343 if (((TREE_CODE (ubase) == SSA_NAME)
4344 || (TREE_CODE (ubase) == ADDR_EXPR
4345 && is_gimple_min_invariant (ubase)))
4346 && (TREE_CODE (cbase) == INTEGER_CST))
4347 return -1;
4348
4349 if (((TREE_CODE (cbase) == SSA_NAME)
4350 || (TREE_CODE (cbase) == ADDR_EXPR
4351 && is_gimple_min_invariant (cbase)))
4352 && (TREE_CODE (ubase) == INTEGER_CST))
4353 return -1;
4354 }
4355
4356 if (ratio == 1)
4357 {
4358 if (operand_equal_p (ubase, cbase, 0))
4359 return -1;
4360
4361 if (TREE_CODE (ubase) == ADDR_EXPR
4362 && TREE_CODE (cbase) == ADDR_EXPR)
4363 {
4364 tree usym, csym;
4365
4366 usym = TREE_OPERAND (ubase, 0);
4367 csym = TREE_OPERAND (cbase, 0);
4368 if (TREE_CODE (usym) == ARRAY_REF)
4369 {
4370 tree ind = TREE_OPERAND (usym, 1);
4371 if (TREE_CODE (ind) == INTEGER_CST
4372 && tree_fits_shwi_p (ind)
4373 && tree_to_shwi (ind) == 0)
4374 usym = TREE_OPERAND (usym, 0);
4375 }
4376 if (TREE_CODE (csym) == ARRAY_REF)
4377 {
4378 tree ind = TREE_OPERAND (csym, 1);
4379 if (TREE_CODE (ind) == INTEGER_CST
4380 && tree_fits_shwi_p (ind)
4381 && tree_to_shwi (ind) == 0)
4382 csym = TREE_OPERAND (csym, 0);
4383 }
4384 if (operand_equal_p (usym, csym, 0))
4385 return -1;
4386 }
4387 /* Now do more complex comparison */
4388 tree_to_aff_combination (ubase, TREE_TYPE (ubase), &ubase_aff);
4389 tree_to_aff_combination (cbase, TREE_TYPE (cbase), &cbase_aff);
4390 if (compare_aff_trees (&ubase_aff, &cbase_aff))
4391 return -1;
4392 }
4393
4394 tree_to_aff_combination (ub, TREE_TYPE (ub), &ubase_aff);
4395 tree_to_aff_combination (cb, TREE_TYPE (cb), &cbase_aff);
4396
4397 aff_combination_scale (&cbase_aff, -1 * ratio);
4398 aff_combination_add (&ubase_aff, &cbase_aff);
4399 expr = aff_combination_to_tree (&ubase_aff);
4400 return get_expr_id (data, expr);
4401 }
4402
4403
4404
4405 /* Determines the cost of the computation by that USE is expressed
4406 from induction variable CAND. If ADDRESS_P is true, we just need
4407 to create an address from it, otherwise we want to get it into
4408 register. A set of invariants we depend on is stored in
4409 DEPENDS_ON. AT is the statement at that the value is computed.
4410 If CAN_AUTOINC is nonnull, use it to record whether autoinc
4411 addressing is likely. */
4412
4413 static comp_cost
4414 get_computation_cost_at (struct ivopts_data *data,
4415 struct iv_use *use, struct iv_cand *cand,
4416 bool address_p, bitmap *depends_on, gimple at,
4417 bool *can_autoinc,
4418 int *inv_expr_id)
4419 {
4420 tree ubase = use->iv->base, ustep = use->iv->step;
4421 tree cbase, cstep;
4422 tree utype = TREE_TYPE (ubase), ctype;
4423 unsigned HOST_WIDE_INT cstepi, offset = 0;
4424 HOST_WIDE_INT ratio, aratio;
4425 bool var_present, symbol_present, stmt_is_after_inc;
4426 comp_cost cost;
4427 widest_int rat;
4428 bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4429 machine_mode mem_mode = (address_p
4430 ? TYPE_MODE (TREE_TYPE (*use->op_p))
4431 : VOIDmode);
4432
4433 *depends_on = NULL;
4434
4435 /* Only consider real candidates. */
4436 if (!cand->iv)
4437 return infinite_cost;
4438
4439 cbase = cand->iv->base;
4440 cstep = cand->iv->step;
4441 ctype = TREE_TYPE (cbase);
4442
4443 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4444 {
4445 /* We do not have a precision to express the values of use. */
4446 return infinite_cost;
4447 }
4448
4449 if (address_p
4450 || (use->iv->base_object
4451 && cand->iv->base_object
4452 && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4453 && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4454 {
4455 /* Do not try to express address of an object with computation based
4456 on address of a different object. This may cause problems in rtl
4457 level alias analysis (that does not expect this to be happening,
4458 as this is illegal in C), and would be unlikely to be useful
4459 anyway. */
4460 if (use->iv->base_object
4461 && cand->iv->base_object
4462 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4463 return infinite_cost;
4464 }
4465
4466 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4467 {
4468 /* TODO -- add direct handling of this case. */
4469 goto fallback;
4470 }
4471
4472 /* CSTEPI is removed from the offset in case statement is after the
4473 increment. If the step is not constant, we use zero instead.
4474 This is a bit imprecise (there is the extra addition), but
4475 redundancy elimination is likely to transform the code so that
4476 it uses value of the variable before increment anyway,
4477 so it is not that much unrealistic. */
4478 if (cst_and_fits_in_hwi (cstep))
4479 cstepi = int_cst_value (cstep);
4480 else
4481 cstepi = 0;
4482
4483 if (!constant_multiple_of (ustep, cstep, &rat))
4484 return infinite_cost;
4485
4486 if (wi::fits_shwi_p (rat))
4487 ratio = rat.to_shwi ();
4488 else
4489 return infinite_cost;
4490
4491 STRIP_NOPS (cbase);
4492 ctype = TREE_TYPE (cbase);
4493
4494 stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
4495
4496 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
4497 or ratio == 1, it is better to handle this like
4498
4499 ubase - ratio * cbase + ratio * var
4500
4501 (also holds in the case ratio == -1, TODO. */
4502
4503 if (cst_and_fits_in_hwi (cbase))
4504 {
4505 offset = - ratio * (unsigned HOST_WIDE_INT) int_cst_value (cbase);
4506 cost = difference_cost (data,
4507 ubase, build_int_cst (utype, 0),
4508 &symbol_present, &var_present, &offset,
4509 depends_on);
4510 cost.cost /= avg_loop_niter (data->current_loop);
4511 }
4512 else if (ratio == 1)
4513 {
4514 tree real_cbase = cbase;
4515
4516 /* Check to see if any adjustment is needed. */
4517 if (cstepi == 0 && stmt_is_after_inc)
4518 {
4519 aff_tree real_cbase_aff;
4520 aff_tree cstep_aff;
4521
4522 tree_to_aff_combination (cbase, TREE_TYPE (real_cbase),
4523 &real_cbase_aff);
4524 tree_to_aff_combination (cstep, TREE_TYPE (cstep), &cstep_aff);
4525
4526 aff_combination_add (&real_cbase_aff, &cstep_aff);
4527 real_cbase = aff_combination_to_tree (&real_cbase_aff);
4528 }
4529
4530 cost = difference_cost (data,
4531 ubase, real_cbase,
4532 &symbol_present, &var_present, &offset,
4533 depends_on);
4534 cost.cost /= avg_loop_niter (data->current_loop);
4535 }
4536 else if (address_p
4537 && !POINTER_TYPE_P (ctype)
4538 && multiplier_allowed_in_address_p
4539 (ratio, mem_mode,
4540 TYPE_ADDR_SPACE (TREE_TYPE (utype))))
4541 {
4542 cbase
4543 = fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio));
4544 cost = difference_cost (data,
4545 ubase, cbase,
4546 &symbol_present, &var_present, &offset,
4547 depends_on);
4548 cost.cost /= avg_loop_niter (data->current_loop);
4549 }
4550 else
4551 {
4552 cost = force_var_cost (data, cbase, depends_on);
4553 cost = add_costs (cost,
4554 difference_cost (data,
4555 ubase, build_int_cst (utype, 0),
4556 &symbol_present, &var_present,
4557 &offset, depends_on));
4558 cost.cost /= avg_loop_niter (data->current_loop);
4559 cost.cost += add_cost (data->speed, TYPE_MODE (ctype));
4560 }
4561
4562 /* Set of invariants depended on by sub use has already been computed
4563 for the first use in the group. */
4564 if (use->sub_id)
4565 {
4566 cost.cost = 0;
4567 if (depends_on && *depends_on)
4568 bitmap_clear (*depends_on);
4569 }
4570 else if (inv_expr_id)
4571 {
4572 *inv_expr_id =
4573 get_loop_invariant_expr_id (data, ubase, cbase, ratio, address_p);
4574 /* Clear depends on. */
4575 if (*inv_expr_id != -1 && depends_on && *depends_on)
4576 bitmap_clear (*depends_on);
4577 }
4578
4579 /* If we are after the increment, the value of the candidate is higher by
4580 one iteration. */
4581 if (stmt_is_after_inc)
4582 offset -= ratio * cstepi;
4583
4584 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4585 (symbol/var1/const parts may be omitted). If we are looking for an
4586 address, find the cost of addressing this. */
4587 if (address_p)
4588 return add_costs (cost,
4589 get_address_cost (symbol_present, var_present,
4590 offset, ratio, cstepi,
4591 mem_mode,
4592 TYPE_ADDR_SPACE (TREE_TYPE (utype)),
4593 speed, stmt_is_after_inc,
4594 can_autoinc));
4595
4596 /* Otherwise estimate the costs for computing the expression. */
4597 if (!symbol_present && !var_present && !offset)
4598 {
4599 if (ratio != 1)
4600 cost.cost += mult_by_coeff_cost (ratio, TYPE_MODE (ctype), speed);
4601 return cost;
4602 }
4603
4604 /* Symbol + offset should be compile-time computable so consider that they
4605 are added once to the variable, if present. */
4606 if (var_present && (symbol_present || offset))
4607 cost.cost += adjust_setup_cost (data,
4608 add_cost (speed, TYPE_MODE (ctype)));
4609
4610 /* Having offset does not affect runtime cost in case it is added to
4611 symbol, but it increases complexity. */
4612 if (offset)
4613 cost.complexity++;
4614
4615 cost.cost += add_cost (speed, TYPE_MODE (ctype));
4616
4617 aratio = ratio > 0 ? ratio : -ratio;
4618 if (aratio != 1)
4619 cost.cost += mult_by_coeff_cost (aratio, TYPE_MODE (ctype), speed);
4620 return cost;
4621
4622 fallback:
4623 if (can_autoinc)
4624 *can_autoinc = false;
4625
4626 {
4627 /* Just get the expression, expand it and measure the cost. */
4628 tree comp = get_computation_at (data->current_loop, use, cand, at);
4629
4630 if (!comp)
4631 return infinite_cost;
4632
4633 if (address_p)
4634 comp = build_simple_mem_ref (comp);
4635
4636 return new_cost (computation_cost (comp, speed), 0);
4637 }
4638 }
4639
4640 /* Determines the cost of the computation by that USE is expressed
4641 from induction variable CAND. If ADDRESS_P is true, we just need
4642 to create an address from it, otherwise we want to get it into
4643 register. A set of invariants we depend on is stored in
4644 DEPENDS_ON. If CAN_AUTOINC is nonnull, use it to record whether
4645 autoinc addressing is likely. */
4646
4647 static comp_cost
4648 get_computation_cost (struct ivopts_data *data,
4649 struct iv_use *use, struct iv_cand *cand,
4650 bool address_p, bitmap *depends_on,
4651 bool *can_autoinc, int *inv_expr_id)
4652 {
4653 return get_computation_cost_at (data,
4654 use, cand, address_p, depends_on, use->stmt,
4655 can_autoinc, inv_expr_id);
4656 }
4657
4658 /* Determines cost of basing replacement of USE on CAND in a generic
4659 expression. */
4660
4661 static bool
4662 determine_use_iv_cost_generic (struct ivopts_data *data,
4663 struct iv_use *use, struct iv_cand *cand)
4664 {
4665 bitmap depends_on;
4666 comp_cost cost;
4667 int inv_expr_id = -1;
4668
4669 /* The simple case first -- if we need to express value of the preserved
4670 original biv, the cost is 0. This also prevents us from counting the
4671 cost of increment twice -- once at this use and once in the cost of
4672 the candidate. */
4673 if (cand->pos == IP_ORIGINAL
4674 && cand->incremented_at == use->stmt)
4675 {
4676 set_use_iv_cost (data, use, cand, no_cost, NULL, NULL_TREE,
4677 ERROR_MARK, -1);
4678 return true;
4679 }
4680
4681 cost = get_computation_cost (data, use, cand, false, &depends_on,
4682 NULL, &inv_expr_id);
4683
4684 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4685 inv_expr_id);
4686
4687 return !infinite_cost_p (cost);
4688 }
4689
4690 /* Determines cost of basing replacement of USE on CAND in an address. */
4691
4692 static bool
4693 determine_use_iv_cost_address (struct ivopts_data *data,
4694 struct iv_use *use, struct iv_cand *cand)
4695 {
4696 bitmap depends_on;
4697 bool can_autoinc;
4698 int inv_expr_id = -1;
4699 struct iv_use *sub_use;
4700 comp_cost sub_cost;
4701 comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
4702 &can_autoinc, &inv_expr_id);
4703
4704 if (cand->ainc_use == use)
4705 {
4706 if (can_autoinc)
4707 cost.cost -= cand->cost_step;
4708 /* If we generated the candidate solely for exploiting autoincrement
4709 opportunities, and it turns out it can't be used, set the cost to
4710 infinity to make sure we ignore it. */
4711 else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
4712 cost = infinite_cost;
4713 }
4714 for (sub_use = use->next;
4715 sub_use && !infinite_cost_p (cost);
4716 sub_use = sub_use->next)
4717 {
4718 sub_cost = get_computation_cost (data, sub_use, cand, true, &depends_on,
4719 &can_autoinc, &inv_expr_id);
4720 cost = add_costs (cost, sub_cost);
4721 }
4722
4723 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4724 inv_expr_id);
4725
4726 return !infinite_cost_p (cost);
4727 }
4728
4729 /* Computes value of candidate CAND at position AT in iteration NITER, and
4730 stores it to VAL. */
4731
4732 static void
4733 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple at, tree niter,
4734 aff_tree *val)
4735 {
4736 aff_tree step, delta, nit;
4737 struct iv *iv = cand->iv;
4738 tree type = TREE_TYPE (iv->base);
4739 tree steptype = type;
4740 if (POINTER_TYPE_P (type))
4741 steptype = sizetype;
4742 steptype = unsigned_type_for (type);
4743
4744 tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step);
4745 aff_combination_convert (&step, steptype);
4746 tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
4747 aff_combination_convert (&nit, steptype);
4748 aff_combination_mult (&nit, &step, &delta);
4749 if (stmt_after_increment (loop, cand, at))
4750 aff_combination_add (&delta, &step);
4751
4752 tree_to_aff_combination (iv->base, type, val);
4753 if (!POINTER_TYPE_P (type))
4754 aff_combination_convert (val, steptype);
4755 aff_combination_add (val, &delta);
4756 }
4757
4758 /* Returns period of induction variable iv. */
4759
4760 static tree
4761 iv_period (struct iv *iv)
4762 {
4763 tree step = iv->step, period, type;
4764 tree pow2div;
4765
4766 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4767
4768 type = unsigned_type_for (TREE_TYPE (step));
4769 /* Period of the iv is lcm (step, type_range)/step -1,
4770 i.e., N*type_range/step - 1. Since type range is power
4771 of two, N == (step >> num_of_ending_zeros_binary (step),
4772 so the final result is
4773
4774 (type_range >> num_of_ending_zeros_binary (step)) - 1
4775
4776 */
4777 pow2div = num_ending_zeros (step);
4778
4779 period = build_low_bits_mask (type,
4780 (TYPE_PRECISION (type)
4781 - tree_to_uhwi (pow2div)));
4782
4783 return period;
4784 }
4785
4786 /* Returns the comparison operator used when eliminating the iv USE. */
4787
4788 static enum tree_code
4789 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4790 {
4791 struct loop *loop = data->current_loop;
4792 basic_block ex_bb;
4793 edge exit;
4794
4795 ex_bb = gimple_bb (use->stmt);
4796 exit = EDGE_SUCC (ex_bb, 0);
4797 if (flow_bb_inside_loop_p (loop, exit->dest))
4798 exit = EDGE_SUCC (ex_bb, 1);
4799
4800 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4801 }
4802
4803 /* Returns true if we can prove that BASE - OFFSET does not overflow. For now,
4804 we only detect the situation that BASE = SOMETHING + OFFSET, where the
4805 calculation is performed in non-wrapping type.
4806
4807 TODO: More generally, we could test for the situation that
4808 BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
4809 This would require knowing the sign of OFFSET. */
4810
4811 static bool
4812 difference_cannot_overflow_p (struct ivopts_data *data, tree base, tree offset)
4813 {
4814 enum tree_code code;
4815 tree e1, e2;
4816 aff_tree aff_e1, aff_e2, aff_offset;
4817
4818 if (!nowrap_type_p (TREE_TYPE (base)))
4819 return false;
4820
4821 base = expand_simple_operations (base);
4822
4823 if (TREE_CODE (base) == SSA_NAME)
4824 {
4825 gimple stmt = SSA_NAME_DEF_STMT (base);
4826
4827 if (gimple_code (stmt) != GIMPLE_ASSIGN)
4828 return false;
4829
4830 code = gimple_assign_rhs_code (stmt);
4831 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4832 return false;
4833
4834 e1 = gimple_assign_rhs1 (stmt);
4835 e2 = gimple_assign_rhs2 (stmt);
4836 }
4837 else
4838 {
4839 code = TREE_CODE (base);
4840 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4841 return false;
4842 e1 = TREE_OPERAND (base, 0);
4843 e2 = TREE_OPERAND (base, 1);
4844 }
4845
4846 /* Use affine expansion as deeper inspection to prove the equality. */
4847 tree_to_aff_combination_expand (e2, TREE_TYPE (e2),
4848 &aff_e2, &data->name_expansion_cache);
4849 tree_to_aff_combination_expand (offset, TREE_TYPE (offset),
4850 &aff_offset, &data->name_expansion_cache);
4851 aff_combination_scale (&aff_offset, -1);
4852 switch (code)
4853 {
4854 case PLUS_EXPR:
4855 aff_combination_add (&aff_e2, &aff_offset);
4856 if (aff_combination_zero_p (&aff_e2))
4857 return true;
4858
4859 tree_to_aff_combination_expand (e1, TREE_TYPE (e1),
4860 &aff_e1, &data->name_expansion_cache);
4861 aff_combination_add (&aff_e1, &aff_offset);
4862 return aff_combination_zero_p (&aff_e1);
4863
4864 case POINTER_PLUS_EXPR:
4865 aff_combination_add (&aff_e2, &aff_offset);
4866 return aff_combination_zero_p (&aff_e2);
4867
4868 default:
4869 return false;
4870 }
4871 }
4872
4873 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
4874 comparison with CAND. NITER describes the number of iterations of
4875 the loops. If successful, the comparison in COMP_P is altered accordingly.
4876
4877 We aim to handle the following situation:
4878
4879 sometype *base, *p;
4880 int a, b, i;
4881
4882 i = a;
4883 p = p_0 = base + a;
4884
4885 do
4886 {
4887 bla (*p);
4888 p++;
4889 i++;
4890 }
4891 while (i < b);
4892
4893 Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
4894 We aim to optimize this to
4895
4896 p = p_0 = base + a;
4897 do
4898 {
4899 bla (*p);
4900 p++;
4901 }
4902 while (p < p_0 - a + b);
4903
4904 This preserves the correctness, since the pointer arithmetics does not
4905 overflow. More precisely:
4906
4907 1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
4908 overflow in computing it or the values of p.
4909 2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
4910 overflow. To prove this, we use the fact that p_0 = base + a. */
4911
4912 static bool
4913 iv_elimination_compare_lt (struct ivopts_data *data,
4914 struct iv_cand *cand, enum tree_code *comp_p,
4915 struct tree_niter_desc *niter)
4916 {
4917 tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
4918 struct aff_tree nit, tmpa, tmpb;
4919 enum tree_code comp;
4920 HOST_WIDE_INT step;
4921
4922 /* We need to know that the candidate induction variable does not overflow.
4923 While more complex analysis may be used to prove this, for now just
4924 check that the variable appears in the original program and that it
4925 is computed in a type that guarantees no overflows. */
4926 cand_type = TREE_TYPE (cand->iv->base);
4927 if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
4928 return false;
4929
4930 /* Make sure that the loop iterates till the loop bound is hit, as otherwise
4931 the calculation of the BOUND could overflow, making the comparison
4932 invalid. */
4933 if (!data->loop_single_exit_p)
4934 return false;
4935
4936 /* We need to be able to decide whether candidate is increasing or decreasing
4937 in order to choose the right comparison operator. */
4938 if (!cst_and_fits_in_hwi (cand->iv->step))
4939 return false;
4940 step = int_cst_value (cand->iv->step);
4941
4942 /* Check that the number of iterations matches the expected pattern:
4943 a + 1 > b ? 0 : b - a - 1. */
4944 mbz = niter->may_be_zero;
4945 if (TREE_CODE (mbz) == GT_EXPR)
4946 {
4947 /* Handle a + 1 > b. */
4948 tree op0 = TREE_OPERAND (mbz, 0);
4949 if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
4950 {
4951 a = TREE_OPERAND (op0, 0);
4952 b = TREE_OPERAND (mbz, 1);
4953 }
4954 else
4955 return false;
4956 }
4957 else if (TREE_CODE (mbz) == LT_EXPR)
4958 {
4959 tree op1 = TREE_OPERAND (mbz, 1);
4960
4961 /* Handle b < a + 1. */
4962 if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
4963 {
4964 a = TREE_OPERAND (op1, 0);
4965 b = TREE_OPERAND (mbz, 0);
4966 }
4967 else
4968 return false;
4969 }
4970 else
4971 return false;
4972
4973 /* Expected number of iterations is B - A - 1. Check that it matches
4974 the actual number, i.e., that B - A - NITER = 1. */
4975 tree_to_aff_combination (niter->niter, nit_type, &nit);
4976 tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
4977 tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
4978 aff_combination_scale (&nit, -1);
4979 aff_combination_scale (&tmpa, -1);
4980 aff_combination_add (&tmpb, &tmpa);
4981 aff_combination_add (&tmpb, &nit);
4982 if (tmpb.n != 0 || tmpb.offset != 1)
4983 return false;
4984
4985 /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
4986 overflow. */
4987 offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
4988 cand->iv->step,
4989 fold_convert (TREE_TYPE (cand->iv->step), a));
4990 if (!difference_cannot_overflow_p (data, cand->iv->base, offset))
4991 return false;
4992
4993 /* Determine the new comparison operator. */
4994 comp = step < 0 ? GT_EXPR : LT_EXPR;
4995 if (*comp_p == NE_EXPR)
4996 *comp_p = comp;
4997 else if (*comp_p == EQ_EXPR)
4998 *comp_p = invert_tree_comparison (comp, false);
4999 else
5000 gcc_unreachable ();
5001
5002 return true;
5003 }
5004
5005 /* Check whether it is possible to express the condition in USE by comparison
5006 of candidate CAND. If so, store the value compared with to BOUND, and the
5007 comparison operator to COMP. */
5008
5009 static bool
5010 may_eliminate_iv (struct ivopts_data *data,
5011 struct iv_use *use, struct iv_cand *cand, tree *bound,
5012 enum tree_code *comp)
5013 {
5014 basic_block ex_bb;
5015 edge exit;
5016 tree period;
5017 struct loop *loop = data->current_loop;
5018 aff_tree bnd;
5019 struct tree_niter_desc *desc = NULL;
5020
5021 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
5022 return false;
5023
5024 /* For now works only for exits that dominate the loop latch.
5025 TODO: extend to other conditions inside loop body. */
5026 ex_bb = gimple_bb (use->stmt);
5027 if (use->stmt != last_stmt (ex_bb)
5028 || gimple_code (use->stmt) != GIMPLE_COND
5029 || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
5030 return false;
5031
5032 exit = EDGE_SUCC (ex_bb, 0);
5033 if (flow_bb_inside_loop_p (loop, exit->dest))
5034 exit = EDGE_SUCC (ex_bb, 1);
5035 if (flow_bb_inside_loop_p (loop, exit->dest))
5036 return false;
5037
5038 desc = niter_for_exit (data, exit);
5039 if (!desc)
5040 return false;
5041
5042 /* Determine whether we can use the variable to test the exit condition.
5043 This is the case iff the period of the induction variable is greater
5044 than the number of iterations for which the exit condition is true. */
5045 period = iv_period (cand->iv);
5046
5047 /* If the number of iterations is constant, compare against it directly. */
5048 if (TREE_CODE (desc->niter) == INTEGER_CST)
5049 {
5050 /* See cand_value_at. */
5051 if (stmt_after_increment (loop, cand, use->stmt))
5052 {
5053 if (!tree_int_cst_lt (desc->niter, period))
5054 return false;
5055 }
5056 else
5057 {
5058 if (tree_int_cst_lt (period, desc->niter))
5059 return false;
5060 }
5061 }
5062
5063 /* If not, and if this is the only possible exit of the loop, see whether
5064 we can get a conservative estimate on the number of iterations of the
5065 entire loop and compare against that instead. */
5066 else
5067 {
5068 widest_int period_value, max_niter;
5069
5070 max_niter = desc->max;
5071 if (stmt_after_increment (loop, cand, use->stmt))
5072 max_niter += 1;
5073 period_value = wi::to_widest (period);
5074 if (wi::gtu_p (max_niter, period_value))
5075 {
5076 /* See if we can take advantage of inferred loop bound information. */
5077 if (data->loop_single_exit_p)
5078 {
5079 if (!max_loop_iterations (loop, &max_niter))
5080 return false;
5081 /* The loop bound is already adjusted by adding 1. */
5082 if (wi::gtu_p (max_niter, period_value))
5083 return false;
5084 }
5085 else
5086 return false;
5087 }
5088 }
5089
5090 cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
5091
5092 *bound = fold_convert (TREE_TYPE (cand->iv->base),
5093 aff_combination_to_tree (&bnd));
5094 *comp = iv_elimination_compare (data, use);
5095
5096 /* It is unlikely that computing the number of iterations using division
5097 would be more profitable than keeping the original induction variable. */
5098 if (expression_expensive_p (*bound))
5099 return false;
5100
5101 /* Sometimes, it is possible to handle the situation that the number of
5102 iterations may be zero unless additional assumtions by using <
5103 instead of != in the exit condition.
5104
5105 TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
5106 base the exit condition on it. However, that is often too
5107 expensive. */
5108 if (!integer_zerop (desc->may_be_zero))
5109 return iv_elimination_compare_lt (data, cand, comp, desc);
5110
5111 return true;
5112 }
5113
5114 /* Calculates the cost of BOUND, if it is a PARM_DECL. A PARM_DECL must
5115 be copied, if is is used in the loop body and DATA->body_includes_call. */
5116
5117 static int
5118 parm_decl_cost (struct ivopts_data *data, tree bound)
5119 {
5120 tree sbound = bound;
5121 STRIP_NOPS (sbound);
5122
5123 if (TREE_CODE (sbound) == SSA_NAME
5124 && SSA_NAME_IS_DEFAULT_DEF (sbound)
5125 && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
5126 && data->body_includes_call)
5127 return COSTS_N_INSNS (1);
5128
5129 return 0;
5130 }
5131
5132 /* Determines cost of basing replacement of USE on CAND in a condition. */
5133
5134 static bool
5135 determine_use_iv_cost_condition (struct ivopts_data *data,
5136 struct iv_use *use, struct iv_cand *cand)
5137 {
5138 tree bound = NULL_TREE;
5139 struct iv *cmp_iv;
5140 bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
5141 comp_cost elim_cost, express_cost, cost, bound_cost;
5142 bool ok;
5143 int elim_inv_expr_id = -1, express_inv_expr_id = -1, inv_expr_id;
5144 tree *control_var, *bound_cst;
5145 enum tree_code comp = ERROR_MARK;
5146
5147 /* Only consider real candidates. */
5148 if (!cand->iv)
5149 {
5150 set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE,
5151 ERROR_MARK, -1);
5152 return false;
5153 }
5154
5155 /* Try iv elimination. */
5156 if (may_eliminate_iv (data, use, cand, &bound, &comp))
5157 {
5158 elim_cost = force_var_cost (data, bound, &depends_on_elim);
5159 if (elim_cost.cost == 0)
5160 elim_cost.cost = parm_decl_cost (data, bound);
5161 else if (TREE_CODE (bound) == INTEGER_CST)
5162 elim_cost.cost = 0;
5163 /* If we replace a loop condition 'i < n' with 'p < base + n',
5164 depends_on_elim will have 'base' and 'n' set, which implies
5165 that both 'base' and 'n' will be live during the loop. More likely,
5166 'base + n' will be loop invariant, resulting in only one live value
5167 during the loop. So in that case we clear depends_on_elim and set
5168 elim_inv_expr_id instead. */
5169 if (depends_on_elim && bitmap_count_bits (depends_on_elim) > 1)
5170 {
5171 elim_inv_expr_id = get_expr_id (data, bound);
5172 bitmap_clear (depends_on_elim);
5173 }
5174 /* The bound is a loop invariant, so it will be only computed
5175 once. */
5176 elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
5177 }
5178 else
5179 elim_cost = infinite_cost;
5180
5181 /* Try expressing the original giv. If it is compared with an invariant,
5182 note that we cannot get rid of it. */
5183 ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
5184 NULL, &cmp_iv);
5185 gcc_assert (ok);
5186
5187 /* When the condition is a comparison of the candidate IV against
5188 zero, prefer this IV.
5189
5190 TODO: The constant that we're subtracting from the cost should
5191 be target-dependent. This information should be added to the
5192 target costs for each backend. */
5193 if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */
5194 && integer_zerop (*bound_cst)
5195 && (operand_equal_p (*control_var, cand->var_after, 0)
5196 || operand_equal_p (*control_var, cand->var_before, 0)))
5197 elim_cost.cost -= 1;
5198
5199 express_cost = get_computation_cost (data, use, cand, false,
5200 &depends_on_express, NULL,
5201 &express_inv_expr_id);
5202 fd_ivopts_data = data;
5203 walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
5204
5205 /* Count the cost of the original bound as well. */
5206 bound_cost = force_var_cost (data, *bound_cst, NULL);
5207 if (bound_cost.cost == 0)
5208 bound_cost.cost = parm_decl_cost (data, *bound_cst);
5209 else if (TREE_CODE (*bound_cst) == INTEGER_CST)
5210 bound_cost.cost = 0;
5211 express_cost.cost += bound_cost.cost;
5212
5213 /* Choose the better approach, preferring the eliminated IV. */
5214 if (compare_costs (elim_cost, express_cost) <= 0)
5215 {
5216 cost = elim_cost;
5217 depends_on = depends_on_elim;
5218 depends_on_elim = NULL;
5219 inv_expr_id = elim_inv_expr_id;
5220 }
5221 else
5222 {
5223 cost = express_cost;
5224 depends_on = depends_on_express;
5225 depends_on_express = NULL;
5226 bound = NULL_TREE;
5227 comp = ERROR_MARK;
5228 inv_expr_id = express_inv_expr_id;
5229 }
5230
5231 set_use_iv_cost (data, use, cand, cost, depends_on, bound, comp, inv_expr_id);
5232
5233 if (depends_on_elim)
5234 BITMAP_FREE (depends_on_elim);
5235 if (depends_on_express)
5236 BITMAP_FREE (depends_on_express);
5237
5238 return !infinite_cost_p (cost);
5239 }
5240
5241 /* Determines cost of basing replacement of USE on CAND. Returns false
5242 if USE cannot be based on CAND. */
5243
5244 static bool
5245 determine_use_iv_cost (struct ivopts_data *data,
5246 struct iv_use *use, struct iv_cand *cand)
5247 {
5248 switch (use->type)
5249 {
5250 case USE_NONLINEAR_EXPR:
5251 return determine_use_iv_cost_generic (data, use, cand);
5252
5253 case USE_ADDRESS:
5254 return determine_use_iv_cost_address (data, use, cand);
5255
5256 case USE_COMPARE:
5257 return determine_use_iv_cost_condition (data, use, cand);
5258
5259 default:
5260 gcc_unreachable ();
5261 }
5262 }
5263
5264 /* Return true if get_computation_cost indicates that autoincrement is
5265 a possibility for the pair of USE and CAND, false otherwise. */
5266
5267 static bool
5268 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
5269 struct iv_cand *cand)
5270 {
5271 bitmap depends_on;
5272 bool can_autoinc;
5273 comp_cost cost;
5274
5275 if (use->type != USE_ADDRESS)
5276 return false;
5277
5278 cost = get_computation_cost (data, use, cand, true, &depends_on,
5279 &can_autoinc, NULL);
5280
5281 BITMAP_FREE (depends_on);
5282
5283 return !infinite_cost_p (cost) && can_autoinc;
5284 }
5285
5286 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
5287 use that allows autoincrement, and set their AINC_USE if possible. */
5288
5289 static void
5290 set_autoinc_for_original_candidates (struct ivopts_data *data)
5291 {
5292 unsigned i, j;
5293
5294 for (i = 0; i < n_iv_cands (data); i++)
5295 {
5296 struct iv_cand *cand = iv_cand (data, i);
5297 struct iv_use *closest_before = NULL;
5298 struct iv_use *closest_after = NULL;
5299 if (cand->pos != IP_ORIGINAL)
5300 continue;
5301
5302 for (j = 0; j < n_iv_uses (data); j++)
5303 {
5304 struct iv_use *use = iv_use (data, j);
5305 unsigned uid = gimple_uid (use->stmt);
5306
5307 if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
5308 continue;
5309
5310 if (uid < gimple_uid (cand->incremented_at)
5311 && (closest_before == NULL
5312 || uid > gimple_uid (closest_before->stmt)))
5313 closest_before = use;
5314
5315 if (uid > gimple_uid (cand->incremented_at)
5316 && (closest_after == NULL
5317 || uid < gimple_uid (closest_after->stmt)))
5318 closest_after = use;
5319 }
5320
5321 if (closest_before != NULL
5322 && autoinc_possible_for_pair (data, closest_before, cand))
5323 cand->ainc_use = closest_before;
5324 else if (closest_after != NULL
5325 && autoinc_possible_for_pair (data, closest_after, cand))
5326 cand->ainc_use = closest_after;
5327 }
5328 }
5329
5330 /* Finds the candidates for the induction variables. */
5331
5332 static void
5333 find_iv_candidates (struct ivopts_data *data)
5334 {
5335 /* Add commonly used ivs. */
5336 add_standard_iv_candidates (data);
5337
5338 /* Add old induction variables. */
5339 add_old_ivs_candidates (data);
5340
5341 /* Add induction variables derived from uses. */
5342 add_derived_ivs_candidates (data);
5343
5344 set_autoinc_for_original_candidates (data);
5345
5346 /* Record the important candidates. */
5347 record_important_candidates (data);
5348 }
5349
5350 /* Determines costs of basing the use of the iv on an iv candidate. */
5351
5352 static void
5353 determine_use_iv_costs (struct ivopts_data *data)
5354 {
5355 unsigned i, j;
5356 struct iv_use *use;
5357 struct iv_cand *cand;
5358 bitmap to_clear = BITMAP_ALLOC (NULL);
5359
5360 alloc_use_cost_map (data);
5361
5362 for (i = 0; i < n_iv_uses (data); i++)
5363 {
5364 use = iv_use (data, i);
5365
5366 if (data->consider_all_candidates)
5367 {
5368 for (j = 0; j < n_iv_cands (data); j++)
5369 {
5370 cand = iv_cand (data, j);
5371 determine_use_iv_cost (data, use, cand);
5372 }
5373 }
5374 else
5375 {
5376 bitmap_iterator bi;
5377
5378 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
5379 {
5380 cand = iv_cand (data, j);
5381 if (!determine_use_iv_cost (data, use, cand))
5382 bitmap_set_bit (to_clear, j);
5383 }
5384
5385 /* Remove the candidates for that the cost is infinite from
5386 the list of related candidates. */
5387 bitmap_and_compl_into (use->related_cands, to_clear);
5388 bitmap_clear (to_clear);
5389 }
5390 }
5391
5392 BITMAP_FREE (to_clear);
5393
5394 if (dump_file && (dump_flags & TDF_DETAILS))
5395 {
5396 fprintf (dump_file, "Use-candidate costs:\n");
5397
5398 for (i = 0; i < n_iv_uses (data); i++)
5399 {
5400 use = iv_use (data, i);
5401
5402 fprintf (dump_file, "Use %d:\n", i);
5403 fprintf (dump_file, " cand\tcost\tcompl.\tdepends on\n");
5404 for (j = 0; j < use->n_map_members; j++)
5405 {
5406 if (!use->cost_map[j].cand
5407 || infinite_cost_p (use->cost_map[j].cost))
5408 continue;
5409
5410 fprintf (dump_file, " %d\t%d\t%d\t",
5411 use->cost_map[j].cand->id,
5412 use->cost_map[j].cost.cost,
5413 use->cost_map[j].cost.complexity);
5414 if (use->cost_map[j].depends_on)
5415 bitmap_print (dump_file,
5416 use->cost_map[j].depends_on, "","");
5417 if (use->cost_map[j].inv_expr_id != -1)
5418 fprintf (dump_file, " inv_expr:%d", use->cost_map[j].inv_expr_id);
5419 fprintf (dump_file, "\n");
5420 }
5421
5422 fprintf (dump_file, "\n");
5423 }
5424 fprintf (dump_file, "\n");
5425 }
5426 }
5427
5428 /* Determines cost of the candidate CAND. */
5429
5430 static void
5431 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5432 {
5433 comp_cost cost_base;
5434 unsigned cost, cost_step;
5435 tree base;
5436
5437 if (!cand->iv)
5438 {
5439 cand->cost = 0;
5440 return;
5441 }
5442
5443 /* There are two costs associated with the candidate -- its increment
5444 and its initialization. The second is almost negligible for any loop
5445 that rolls enough, so we take it just very little into account. */
5446
5447 base = cand->iv->base;
5448 cost_base = force_var_cost (data, base, NULL);
5449 /* It will be exceptional that the iv register happens to be initialized with
5450 the proper value at no cost. In general, there will at least be a regcopy
5451 or a const set. */
5452 if (cost_base.cost == 0)
5453 cost_base.cost = COSTS_N_INSNS (1);
5454 cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
5455
5456 cost = cost_step + adjust_setup_cost (data, cost_base.cost);
5457
5458 /* Prefer the original ivs unless we may gain something by replacing it.
5459 The reason is to make debugging simpler; so this is not relevant for
5460 artificial ivs created by other optimization passes. */
5461 if (cand->pos != IP_ORIGINAL
5462 || !SSA_NAME_VAR (cand->var_before)
5463 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
5464 cost++;
5465
5466 /* Prefer not to insert statements into latch unless there are some
5467 already (so that we do not create unnecessary jumps). */
5468 if (cand->pos == IP_END
5469 && empty_block_p (ip_end_pos (data->current_loop)))
5470 cost++;
5471
5472 cand->cost = cost;
5473 cand->cost_step = cost_step;
5474 }
5475
5476 /* Determines costs of computation of the candidates. */
5477
5478 static void
5479 determine_iv_costs (struct ivopts_data *data)
5480 {
5481 unsigned i;
5482
5483 if (dump_file && (dump_flags & TDF_DETAILS))
5484 {
5485 fprintf (dump_file, "Candidate costs:\n");
5486 fprintf (dump_file, " cand\tcost\n");
5487 }
5488
5489 for (i = 0; i < n_iv_cands (data); i++)
5490 {
5491 struct iv_cand *cand = iv_cand (data, i);
5492
5493 determine_iv_cost (data, cand);
5494
5495 if (dump_file && (dump_flags & TDF_DETAILS))
5496 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
5497 }
5498
5499 if (dump_file && (dump_flags & TDF_DETAILS))
5500 fprintf (dump_file, "\n");
5501 }
5502
5503 /* Calculates cost for having SIZE induction variables. */
5504
5505 static unsigned
5506 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
5507 {
5508 /* We add size to the cost, so that we prefer eliminating ivs
5509 if possible. */
5510 return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed,
5511 data->body_includes_call);
5512 }
5513
5514 /* For each size of the induction variable set determine the penalty. */
5515
5516 static void
5517 determine_set_costs (struct ivopts_data *data)
5518 {
5519 unsigned j, n;
5520 gphi *phi;
5521 gphi_iterator psi;
5522 tree op;
5523 struct loop *loop = data->current_loop;
5524 bitmap_iterator bi;
5525
5526 if (dump_file && (dump_flags & TDF_DETAILS))
5527 {
5528 fprintf (dump_file, "Global costs:\n");
5529 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
5530 fprintf (dump_file, " target_clobbered_regs %d\n", target_clobbered_regs);
5531 fprintf (dump_file, " target_reg_cost %d\n", target_reg_cost[data->speed]);
5532 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost[data->speed]);
5533 }
5534
5535 n = 0;
5536 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
5537 {
5538 phi = psi.phi ();
5539 op = PHI_RESULT (phi);
5540
5541 if (virtual_operand_p (op))
5542 continue;
5543
5544 if (get_iv (data, op))
5545 continue;
5546
5547 n++;
5548 }
5549
5550 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5551 {
5552 struct version_info *info = ver_info (data, j);
5553
5554 if (info->inv_id && info->has_nonlin_use)
5555 n++;
5556 }
5557
5558 data->regs_used = n;
5559 if (dump_file && (dump_flags & TDF_DETAILS))
5560 fprintf (dump_file, " regs_used %d\n", n);
5561
5562 if (dump_file && (dump_flags & TDF_DETAILS))
5563 {
5564 fprintf (dump_file, " cost for size:\n");
5565 fprintf (dump_file, " ivs\tcost\n");
5566 for (j = 0; j <= 2 * target_avail_regs; j++)
5567 fprintf (dump_file, " %d\t%d\n", j,
5568 ivopts_global_cost_for_size (data, j));
5569 fprintf (dump_file, "\n");
5570 }
5571 }
5572
5573 /* Returns true if A is a cheaper cost pair than B. */
5574
5575 static bool
5576 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
5577 {
5578 int cmp;
5579
5580 if (!a)
5581 return false;
5582
5583 if (!b)
5584 return true;
5585
5586 cmp = compare_costs (a->cost, b->cost);
5587 if (cmp < 0)
5588 return true;
5589
5590 if (cmp > 0)
5591 return false;
5592
5593 /* In case the costs are the same, prefer the cheaper candidate. */
5594 if (a->cand->cost < b->cand->cost)
5595 return true;
5596
5597 return false;
5598 }
5599
5600
5601 /* Returns candidate by that USE is expressed in IVS. */
5602
5603 static struct cost_pair *
5604 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
5605 {
5606 return ivs->cand_for_use[use->id];
5607 }
5608
5609 /* Computes the cost field of IVS structure. */
5610
5611 static void
5612 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
5613 {
5614 comp_cost cost = ivs->cand_use_cost;
5615
5616 cost.cost += ivs->cand_cost;
5617
5618 cost.cost += ivopts_global_cost_for_size (data,
5619 ivs->n_regs + ivs->num_used_inv_expr);
5620
5621 ivs->cost = cost;
5622 }
5623
5624 /* Remove invariants in set INVS to set IVS. */
5625
5626 static void
5627 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
5628 {
5629 bitmap_iterator bi;
5630 unsigned iid;
5631
5632 if (!invs)
5633 return;
5634
5635 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5636 {
5637 ivs->n_invariant_uses[iid]--;
5638 if (ivs->n_invariant_uses[iid] == 0)
5639 ivs->n_regs--;
5640 }
5641 }
5642
5643 /* Set USE not to be expressed by any candidate in IVS. */
5644
5645 static void
5646 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
5647 struct iv_use *use)
5648 {
5649 unsigned uid = use->id, cid;
5650 struct cost_pair *cp;
5651
5652 cp = ivs->cand_for_use[uid];
5653 if (!cp)
5654 return;
5655 cid = cp->cand->id;
5656
5657 ivs->bad_uses++;
5658 ivs->cand_for_use[uid] = NULL;
5659 ivs->n_cand_uses[cid]--;
5660
5661 if (ivs->n_cand_uses[cid] == 0)
5662 {
5663 bitmap_clear_bit (ivs->cands, cid);
5664 /* Do not count the pseudocandidates. */
5665 if (cp->cand->iv)
5666 ivs->n_regs--;
5667 ivs->n_cands--;
5668 ivs->cand_cost -= cp->cand->cost;
5669
5670 iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
5671 }
5672
5673 ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
5674
5675 iv_ca_set_remove_invariants (ivs, cp->depends_on);
5676
5677 if (cp->inv_expr_id != -1)
5678 {
5679 ivs->used_inv_expr[cp->inv_expr_id]--;
5680 if (ivs->used_inv_expr[cp->inv_expr_id] == 0)
5681 ivs->num_used_inv_expr--;
5682 }
5683 iv_ca_recount_cost (data, ivs);
5684 }
5685
5686 /* Add invariants in set INVS to set IVS. */
5687
5688 static void
5689 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
5690 {
5691 bitmap_iterator bi;
5692 unsigned iid;
5693
5694 if (!invs)
5695 return;
5696
5697 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5698 {
5699 ivs->n_invariant_uses[iid]++;
5700 if (ivs->n_invariant_uses[iid] == 1)
5701 ivs->n_regs++;
5702 }
5703 }
5704
5705 /* Set cost pair for USE in set IVS to CP. */
5706
5707 static void
5708 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
5709 struct iv_use *use, struct cost_pair *cp)
5710 {
5711 unsigned uid = use->id, cid;
5712
5713 if (ivs->cand_for_use[uid] == cp)
5714 return;
5715
5716 if (ivs->cand_for_use[uid])
5717 iv_ca_set_no_cp (data, ivs, use);
5718
5719 if (cp)
5720 {
5721 cid = cp->cand->id;
5722
5723 ivs->bad_uses--;
5724 ivs->cand_for_use[uid] = cp;
5725 ivs->n_cand_uses[cid]++;
5726 if (ivs->n_cand_uses[cid] == 1)
5727 {
5728 bitmap_set_bit (ivs->cands, cid);
5729 /* Do not count the pseudocandidates. */
5730 if (cp->cand->iv)
5731 ivs->n_regs++;
5732 ivs->n_cands++;
5733 ivs->cand_cost += cp->cand->cost;
5734
5735 iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
5736 }
5737
5738 ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
5739 iv_ca_set_add_invariants (ivs, cp->depends_on);
5740
5741 if (cp->inv_expr_id != -1)
5742 {
5743 ivs->used_inv_expr[cp->inv_expr_id]++;
5744 if (ivs->used_inv_expr[cp->inv_expr_id] == 1)
5745 ivs->num_used_inv_expr++;
5746 }
5747 iv_ca_recount_cost (data, ivs);
5748 }
5749 }
5750
5751 /* Extend set IVS by expressing USE by some of the candidates in it
5752 if possible. Consider all important candidates if candidates in
5753 set IVS don't give any result. */
5754
5755 static void
5756 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
5757 struct iv_use *use)
5758 {
5759 struct cost_pair *best_cp = NULL, *cp;
5760 bitmap_iterator bi;
5761 unsigned i;
5762 struct iv_cand *cand;
5763
5764 gcc_assert (ivs->upto >= use->id);
5765 ivs->upto++;
5766 ivs->bad_uses++;
5767
5768 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5769 {
5770 cand = iv_cand (data, i);
5771 cp = get_use_iv_cost (data, use, cand);
5772 if (cheaper_cost_pair (cp, best_cp))
5773 best_cp = cp;
5774 }
5775
5776 if (best_cp == NULL)
5777 {
5778 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5779 {
5780 cand = iv_cand (data, i);
5781 cp = get_use_iv_cost (data, use, cand);
5782 if (cheaper_cost_pair (cp, best_cp))
5783 best_cp = cp;
5784 }
5785 }
5786
5787 iv_ca_set_cp (data, ivs, use, best_cp);
5788 }
5789
5790 /* Get cost for assignment IVS. */
5791
5792 static comp_cost
5793 iv_ca_cost (struct iv_ca *ivs)
5794 {
5795 /* This was a conditional expression but it triggered a bug in
5796 Sun C 5.5. */
5797 if (ivs->bad_uses)
5798 return infinite_cost;
5799 else
5800 return ivs->cost;
5801 }
5802
5803 /* Returns true if all dependences of CP are among invariants in IVS. */
5804
5805 static bool
5806 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
5807 {
5808 unsigned i;
5809 bitmap_iterator bi;
5810
5811 if (!cp->depends_on)
5812 return true;
5813
5814 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
5815 {
5816 if (ivs->n_invariant_uses[i] == 0)
5817 return false;
5818 }
5819
5820 return true;
5821 }
5822
5823 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
5824 it before NEXT_CHANGE. */
5825
5826 static struct iv_ca_delta *
5827 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
5828 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
5829 {
5830 struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
5831
5832 change->use = use;
5833 change->old_cp = old_cp;
5834 change->new_cp = new_cp;
5835 change->next_change = next_change;
5836
5837 return change;
5838 }
5839
5840 /* Joins two lists of changes L1 and L2. Destructive -- old lists
5841 are rewritten. */
5842
5843 static struct iv_ca_delta *
5844 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
5845 {
5846 struct iv_ca_delta *last;
5847
5848 if (!l2)
5849 return l1;
5850
5851 if (!l1)
5852 return l2;
5853
5854 for (last = l1; last->next_change; last = last->next_change)
5855 continue;
5856 last->next_change = l2;
5857
5858 return l1;
5859 }
5860
5861 /* Reverse the list of changes DELTA, forming the inverse to it. */
5862
5863 static struct iv_ca_delta *
5864 iv_ca_delta_reverse (struct iv_ca_delta *delta)
5865 {
5866 struct iv_ca_delta *act, *next, *prev = NULL;
5867
5868 for (act = delta; act; act = next)
5869 {
5870 next = act->next_change;
5871 act->next_change = prev;
5872 prev = act;
5873
5874 std::swap (act->old_cp, act->new_cp);
5875 }
5876
5877 return prev;
5878 }
5879
5880 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
5881 reverted instead. */
5882
5883 static void
5884 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
5885 struct iv_ca_delta *delta, bool forward)
5886 {
5887 struct cost_pair *from, *to;
5888 struct iv_ca_delta *act;
5889
5890 if (!forward)
5891 delta = iv_ca_delta_reverse (delta);
5892
5893 for (act = delta; act; act = act->next_change)
5894 {
5895 from = act->old_cp;
5896 to = act->new_cp;
5897 gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
5898 iv_ca_set_cp (data, ivs, act->use, to);
5899 }
5900
5901 if (!forward)
5902 iv_ca_delta_reverse (delta);
5903 }
5904
5905 /* Returns true if CAND is used in IVS. */
5906
5907 static bool
5908 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
5909 {
5910 return ivs->n_cand_uses[cand->id] > 0;
5911 }
5912
5913 /* Returns number of induction variable candidates in the set IVS. */
5914
5915 static unsigned
5916 iv_ca_n_cands (struct iv_ca *ivs)
5917 {
5918 return ivs->n_cands;
5919 }
5920
5921 /* Free the list of changes DELTA. */
5922
5923 static void
5924 iv_ca_delta_free (struct iv_ca_delta **delta)
5925 {
5926 struct iv_ca_delta *act, *next;
5927
5928 for (act = *delta; act; act = next)
5929 {
5930 next = act->next_change;
5931 free (act);
5932 }
5933
5934 *delta = NULL;
5935 }
5936
5937 /* Allocates new iv candidates assignment. */
5938
5939 static struct iv_ca *
5940 iv_ca_new (struct ivopts_data *data)
5941 {
5942 struct iv_ca *nw = XNEW (struct iv_ca);
5943
5944 nw->upto = 0;
5945 nw->bad_uses = 0;
5946 nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
5947 nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
5948 nw->cands = BITMAP_ALLOC (NULL);
5949 nw->n_cands = 0;
5950 nw->n_regs = 0;
5951 nw->cand_use_cost = no_cost;
5952 nw->cand_cost = 0;
5953 nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
5954 nw->cost = no_cost;
5955 nw->used_inv_expr = XCNEWVEC (unsigned, data->inv_expr_id + 1);
5956 nw->num_used_inv_expr = 0;
5957
5958 return nw;
5959 }
5960
5961 /* Free memory occupied by the set IVS. */
5962
5963 static void
5964 iv_ca_free (struct iv_ca **ivs)
5965 {
5966 free ((*ivs)->cand_for_use);
5967 free ((*ivs)->n_cand_uses);
5968 BITMAP_FREE ((*ivs)->cands);
5969 free ((*ivs)->n_invariant_uses);
5970 free ((*ivs)->used_inv_expr);
5971 free (*ivs);
5972 *ivs = NULL;
5973 }
5974
5975 /* Dumps IVS to FILE. */
5976
5977 static void
5978 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
5979 {
5980 const char *pref = " invariants ";
5981 unsigned i;
5982 comp_cost cost = iv_ca_cost (ivs);
5983
5984 fprintf (file, " cost: %d (complexity %d)\n", cost.cost, cost.complexity);
5985 fprintf (file, " cand_cost: %d\n cand_use_cost: %d (complexity %d)\n",
5986 ivs->cand_cost, ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
5987 bitmap_print (file, ivs->cands, " candidates: ","\n");
5988
5989 for (i = 0; i < ivs->upto; i++)
5990 {
5991 struct iv_use *use = iv_use (data, i);
5992 struct cost_pair *cp = iv_ca_cand_for_use (ivs, use);
5993 if (cp)
5994 fprintf (file, " use:%d --> iv_cand:%d, cost=(%d,%d)\n",
5995 use->id, cp->cand->id, cp->cost.cost, cp->cost.complexity);
5996 else
5997 fprintf (file, " use:%d --> ??\n", use->id);
5998 }
5999
6000 for (i = 1; i <= data->max_inv_id; i++)
6001 if (ivs->n_invariant_uses[i])
6002 {
6003 fprintf (file, "%s%d", pref, i);
6004 pref = ", ";
6005 }
6006 fprintf (file, "\n\n");
6007 }
6008
6009 /* Try changing candidate in IVS to CAND for each use. Return cost of the
6010 new set, and store differences in DELTA. Number of induction variables
6011 in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
6012 the function will try to find a solution with mimimal iv candidates. */
6013
6014 static comp_cost
6015 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
6016 struct iv_cand *cand, struct iv_ca_delta **delta,
6017 unsigned *n_ivs, bool min_ncand)
6018 {
6019 unsigned i;
6020 comp_cost cost;
6021 struct iv_use *use;
6022 struct cost_pair *old_cp, *new_cp;
6023
6024 *delta = NULL;
6025 for (i = 0; i < ivs->upto; i++)
6026 {
6027 use = iv_use (data, i);
6028 old_cp = iv_ca_cand_for_use (ivs, use);
6029
6030 if (old_cp
6031 && old_cp->cand == cand)
6032 continue;
6033
6034 new_cp = get_use_iv_cost (data, use, cand);
6035 if (!new_cp)
6036 continue;
6037
6038 if (!min_ncand && !iv_ca_has_deps (ivs, new_cp))
6039 continue;
6040
6041 if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp))
6042 continue;
6043
6044 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
6045 }
6046
6047 iv_ca_delta_commit (data, ivs, *delta, true);
6048 cost = iv_ca_cost (ivs);
6049 if (n_ivs)
6050 *n_ivs = iv_ca_n_cands (ivs);
6051 iv_ca_delta_commit (data, ivs, *delta, false);
6052
6053 return cost;
6054 }
6055
6056 /* Try narrowing set IVS by removing CAND. Return the cost of
6057 the new set and store the differences in DELTA. START is
6058 the candidate with which we start narrowing. */
6059
6060 static comp_cost
6061 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
6062 struct iv_cand *cand, struct iv_cand *start,
6063 struct iv_ca_delta **delta)
6064 {
6065 unsigned i, ci;
6066 struct iv_use *use;
6067 struct cost_pair *old_cp, *new_cp, *cp;
6068 bitmap_iterator bi;
6069 struct iv_cand *cnd;
6070 comp_cost cost, best_cost, acost;
6071
6072 *delta = NULL;
6073 for (i = 0; i < n_iv_uses (data); i++)
6074 {
6075 use = iv_use (data, i);
6076
6077 old_cp = iv_ca_cand_for_use (ivs, use);
6078 if (old_cp->cand != cand)
6079 continue;
6080
6081 best_cost = iv_ca_cost (ivs);
6082 /* Start narrowing with START. */
6083 new_cp = get_use_iv_cost (data, use, start);
6084
6085 if (data->consider_all_candidates)
6086 {
6087 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
6088 {
6089 if (ci == cand->id || (start && ci == start->id))
6090 continue;
6091
6092 cnd = iv_cand (data, ci);
6093
6094 cp = get_use_iv_cost (data, use, cnd);
6095 if (!cp)
6096 continue;
6097
6098 iv_ca_set_cp (data, ivs, use, cp);
6099 acost = iv_ca_cost (ivs);
6100
6101 if (compare_costs (acost, best_cost) < 0)
6102 {
6103 best_cost = acost;
6104 new_cp = cp;
6105 }
6106 }
6107 }
6108 else
6109 {
6110 EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
6111 {
6112 if (ci == cand->id || (start && ci == start->id))
6113 continue;
6114
6115 cnd = iv_cand (data, ci);
6116
6117 cp = get_use_iv_cost (data, use, cnd);
6118 if (!cp)
6119 continue;
6120
6121 iv_ca_set_cp (data, ivs, use, cp);
6122 acost = iv_ca_cost (ivs);
6123
6124 if (compare_costs (acost, best_cost) < 0)
6125 {
6126 best_cost = acost;
6127 new_cp = cp;
6128 }
6129 }
6130 }
6131 /* Restore to old cp for use. */
6132 iv_ca_set_cp (data, ivs, use, old_cp);
6133
6134 if (!new_cp)
6135 {
6136 iv_ca_delta_free (delta);
6137 return infinite_cost;
6138 }
6139
6140 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
6141 }
6142
6143 iv_ca_delta_commit (data, ivs, *delta, true);
6144 cost = iv_ca_cost (ivs);
6145 iv_ca_delta_commit (data, ivs, *delta, false);
6146
6147 return cost;
6148 }
6149
6150 /* Try optimizing the set of candidates IVS by removing candidates different
6151 from to EXCEPT_CAND from it. Return cost of the new set, and store
6152 differences in DELTA. */
6153
6154 static comp_cost
6155 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
6156 struct iv_cand *except_cand, struct iv_ca_delta **delta)
6157 {
6158 bitmap_iterator bi;
6159 struct iv_ca_delta *act_delta, *best_delta;
6160 unsigned i;
6161 comp_cost best_cost, acost;
6162 struct iv_cand *cand;
6163
6164 best_delta = NULL;
6165 best_cost = iv_ca_cost (ivs);
6166
6167 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6168 {
6169 cand = iv_cand (data, i);
6170
6171 if (cand == except_cand)
6172 continue;
6173
6174 acost = iv_ca_narrow (data, ivs, cand, except_cand, &act_delta);
6175
6176 if (compare_costs (acost, best_cost) < 0)
6177 {
6178 best_cost = acost;
6179 iv_ca_delta_free (&best_delta);
6180 best_delta = act_delta;
6181 }
6182 else
6183 iv_ca_delta_free (&act_delta);
6184 }
6185
6186 if (!best_delta)
6187 {
6188 *delta = NULL;
6189 return best_cost;
6190 }
6191
6192 /* Recurse to possibly remove other unnecessary ivs. */
6193 iv_ca_delta_commit (data, ivs, best_delta, true);
6194 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
6195 iv_ca_delta_commit (data, ivs, best_delta, false);
6196 *delta = iv_ca_delta_join (best_delta, *delta);
6197 return best_cost;
6198 }
6199
6200 /* Check if CAND_IDX is a candidate other than OLD_CAND and has
6201 cheaper local cost for USE than BEST_CP. Return pointer to
6202 the corresponding cost_pair, otherwise just return BEST_CP. */
6203
6204 static struct cost_pair*
6205 cheaper_cost_with_cand (struct ivopts_data *data, struct iv_use *use,
6206 unsigned int cand_idx, struct iv_cand *old_cand,
6207 struct cost_pair *best_cp)
6208 {
6209 struct iv_cand *cand;
6210 struct cost_pair *cp;
6211
6212 gcc_assert (old_cand != NULL && best_cp != NULL);
6213 if (cand_idx == old_cand->id)
6214 return best_cp;
6215
6216 cand = iv_cand (data, cand_idx);
6217 cp = get_use_iv_cost (data, use, cand);
6218 if (cp != NULL && cheaper_cost_pair (cp, best_cp))
6219 return cp;
6220
6221 return best_cp;
6222 }
6223
6224 /* Try breaking local optimal fixed-point for IVS by replacing candidates
6225 which are used by more than one iv uses. For each of those candidates,
6226 this function tries to represent iv uses under that candidate using
6227 other ones with lower local cost, then tries to prune the new set.
6228 If the new set has lower cost, It returns the new cost after recording
6229 candidate replacement in list DELTA. */
6230
6231 static comp_cost
6232 iv_ca_replace (struct ivopts_data *data, struct iv_ca *ivs,
6233 struct iv_ca_delta **delta)
6234 {
6235 bitmap_iterator bi, bj;
6236 unsigned int i, j, k;
6237 struct iv_use *use;
6238 struct iv_cand *cand;
6239 comp_cost orig_cost, acost;
6240 struct iv_ca_delta *act_delta, *tmp_delta;
6241 struct cost_pair *old_cp, *best_cp = NULL;
6242
6243 *delta = NULL;
6244 orig_cost = iv_ca_cost (ivs);
6245
6246 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6247 {
6248 if (ivs->n_cand_uses[i] == 1
6249 || ivs->n_cand_uses[i] > ALWAYS_PRUNE_CAND_SET_BOUND)
6250 continue;
6251
6252 cand = iv_cand (data, i);
6253
6254 act_delta = NULL;
6255 /* Represent uses under current candidate using other ones with
6256 lower local cost. */
6257 for (j = 0; j < ivs->upto; j++)
6258 {
6259 use = iv_use (data, j);
6260 old_cp = iv_ca_cand_for_use (ivs, use);
6261
6262 if (old_cp->cand != cand)
6263 continue;
6264
6265 best_cp = old_cp;
6266 if (data->consider_all_candidates)
6267 for (k = 0; k < n_iv_cands (data); k++)
6268 best_cp = cheaper_cost_with_cand (data, use, k,
6269 old_cp->cand, best_cp);
6270 else
6271 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, k, bj)
6272 best_cp = cheaper_cost_with_cand (data, use, k,
6273 old_cp->cand, best_cp);
6274
6275 if (best_cp == old_cp)
6276 continue;
6277
6278 act_delta = iv_ca_delta_add (use, old_cp, best_cp, act_delta);
6279 }
6280 /* No need for further prune. */
6281 if (!act_delta)
6282 continue;
6283
6284 /* Prune the new candidate set. */
6285 iv_ca_delta_commit (data, ivs, act_delta, true);
6286 acost = iv_ca_prune (data, ivs, NULL, &tmp_delta);
6287 iv_ca_delta_commit (data, ivs, act_delta, false);
6288 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6289
6290 if (compare_costs (acost, orig_cost) < 0)
6291 {
6292 *delta = act_delta;
6293 return acost;
6294 }
6295 else
6296 iv_ca_delta_free (&act_delta);
6297 }
6298
6299 return orig_cost;
6300 }
6301
6302 /* Tries to extend the sets IVS in the best possible way in order
6303 to express the USE. If ORIGINALP is true, prefer candidates from
6304 the original set of IVs, otherwise favor important candidates not
6305 based on any memory object. */
6306
6307 static bool
6308 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
6309 struct iv_use *use, bool originalp)
6310 {
6311 comp_cost best_cost, act_cost;
6312 unsigned i;
6313 bitmap_iterator bi;
6314 struct iv_cand *cand;
6315 struct iv_ca_delta *best_delta = NULL, *act_delta;
6316 struct cost_pair *cp;
6317
6318 iv_ca_add_use (data, ivs, use);
6319 best_cost = iv_ca_cost (ivs);
6320 cp = iv_ca_cand_for_use (ivs, use);
6321 if (cp)
6322 {
6323 best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
6324 iv_ca_set_no_cp (data, ivs, use);
6325 }
6326
6327 /* If ORIGINALP is true, try to find the original IV for the use. Otherwise
6328 first try important candidates not based on any memory object. Only if
6329 this fails, try the specific ones. Rationale -- in loops with many
6330 variables the best choice often is to use just one generic biv. If we
6331 added here many ivs specific to the uses, the optimization algorithm later
6332 would be likely to get stuck in a local minimum, thus causing us to create
6333 too many ivs. The approach from few ivs to more seems more likely to be
6334 successful -- starting from few ivs, replacing an expensive use by a
6335 specific iv should always be a win. */
6336 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
6337 {
6338 cand = iv_cand (data, i);
6339
6340 if (originalp && cand->pos !=IP_ORIGINAL)
6341 continue;
6342
6343 if (!originalp && cand->iv->base_object != NULL_TREE)
6344 continue;
6345
6346 if (iv_ca_cand_used_p (ivs, cand))
6347 continue;
6348
6349 cp = get_use_iv_cost (data, use, cand);
6350 if (!cp)
6351 continue;
6352
6353 iv_ca_set_cp (data, ivs, use, cp);
6354 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
6355 true);
6356 iv_ca_set_no_cp (data, ivs, use);
6357 act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
6358
6359 if (compare_costs (act_cost, best_cost) < 0)
6360 {
6361 best_cost = act_cost;
6362
6363 iv_ca_delta_free (&best_delta);
6364 best_delta = act_delta;
6365 }
6366 else
6367 iv_ca_delta_free (&act_delta);
6368 }
6369
6370 if (infinite_cost_p (best_cost))
6371 {
6372 for (i = 0; i < use->n_map_members; i++)
6373 {
6374 cp = use->cost_map + i;
6375 cand = cp->cand;
6376 if (!cand)
6377 continue;
6378
6379 /* Already tried this. */
6380 if (cand->important)
6381 {
6382 if (originalp && cand->pos == IP_ORIGINAL)
6383 continue;
6384 if (!originalp && cand->iv->base_object == NULL_TREE)
6385 continue;
6386 }
6387
6388 if (iv_ca_cand_used_p (ivs, cand))
6389 continue;
6390
6391 act_delta = NULL;
6392 iv_ca_set_cp (data, ivs, use, cp);
6393 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
6394 iv_ca_set_no_cp (data, ivs, use);
6395 act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
6396 cp, act_delta);
6397
6398 if (compare_costs (act_cost, best_cost) < 0)
6399 {
6400 best_cost = act_cost;
6401
6402 if (best_delta)
6403 iv_ca_delta_free (&best_delta);
6404 best_delta = act_delta;
6405 }
6406 else
6407 iv_ca_delta_free (&act_delta);
6408 }
6409 }
6410
6411 iv_ca_delta_commit (data, ivs, best_delta, true);
6412 iv_ca_delta_free (&best_delta);
6413
6414 return !infinite_cost_p (best_cost);
6415 }
6416
6417 /* Finds an initial assignment of candidates to uses. */
6418
6419 static struct iv_ca *
6420 get_initial_solution (struct ivopts_data *data, bool originalp)
6421 {
6422 struct iv_ca *ivs = iv_ca_new (data);
6423 unsigned i;
6424
6425 for (i = 0; i < n_iv_uses (data); i++)
6426 if (!try_add_cand_for (data, ivs, iv_use (data, i), originalp))
6427 {
6428 iv_ca_free (&ivs);
6429 return NULL;
6430 }
6431
6432 return ivs;
6433 }
6434
6435 /* Tries to improve set of induction variables IVS. TRY_REPLACE_P
6436 points to a bool variable, this function tries to break local
6437 optimal fixed-point by replacing candidates in IVS if it's true. */
6438
6439 static bool
6440 try_improve_iv_set (struct ivopts_data *data,
6441 struct iv_ca *ivs, bool *try_replace_p)
6442 {
6443 unsigned i, n_ivs;
6444 comp_cost acost, best_cost = iv_ca_cost (ivs);
6445 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
6446 struct iv_cand *cand;
6447
6448 /* Try extending the set of induction variables by one. */
6449 for (i = 0; i < n_iv_cands (data); i++)
6450 {
6451 cand = iv_cand (data, i);
6452
6453 if (iv_ca_cand_used_p (ivs, cand))
6454 continue;
6455
6456 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
6457 if (!act_delta)
6458 continue;
6459
6460 /* If we successfully added the candidate and the set is small enough,
6461 try optimizing it by removing other candidates. */
6462 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
6463 {
6464 iv_ca_delta_commit (data, ivs, act_delta, true);
6465 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
6466 iv_ca_delta_commit (data, ivs, act_delta, false);
6467 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6468 }
6469
6470 if (compare_costs (acost, best_cost) < 0)
6471 {
6472 best_cost = acost;
6473 iv_ca_delta_free (&best_delta);
6474 best_delta = act_delta;
6475 }
6476 else
6477 iv_ca_delta_free (&act_delta);
6478 }
6479
6480 if (!best_delta)
6481 {
6482 /* Try removing the candidates from the set instead. */
6483 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
6484
6485 if (!best_delta && *try_replace_p)
6486 {
6487 *try_replace_p = false;
6488 /* So far candidate selecting algorithm tends to choose fewer IVs
6489 so that it can handle cases in which loops have many variables
6490 but the best choice is often to use only one general biv. One
6491 weakness is it can't handle opposite cases, in which different
6492 candidates should be chosen with respect to each use. To solve
6493 the problem, we replace candidates in a manner described by the
6494 comments of iv_ca_replace, thus give general algorithm a chance
6495 to break local optimal fixed-point in these cases. */
6496 best_cost = iv_ca_replace (data, ivs, &best_delta);
6497 }
6498
6499 if (!best_delta)
6500 return false;
6501 }
6502
6503 iv_ca_delta_commit (data, ivs, best_delta, true);
6504 gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
6505 iv_ca_delta_free (&best_delta);
6506 return true;
6507 }
6508
6509 /* Attempts to find the optimal set of induction variables. We do simple
6510 greedy heuristic -- we try to replace at most one candidate in the selected
6511 solution and remove the unused ivs while this improves the cost. */
6512
6513 static struct iv_ca *
6514 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
6515 {
6516 struct iv_ca *set;
6517 bool try_replace_p = true;
6518
6519 /* Get the initial solution. */
6520 set = get_initial_solution (data, originalp);
6521 if (!set)
6522 {
6523 if (dump_file && (dump_flags & TDF_DETAILS))
6524 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
6525 return NULL;
6526 }
6527
6528 if (dump_file && (dump_flags & TDF_DETAILS))
6529 {
6530 fprintf (dump_file, "Initial set of candidates:\n");
6531 iv_ca_dump (data, dump_file, set);
6532 }
6533
6534 while (try_improve_iv_set (data, set, &try_replace_p))
6535 {
6536 if (dump_file && (dump_flags & TDF_DETAILS))
6537 {
6538 fprintf (dump_file, "Improved to:\n");
6539 iv_ca_dump (data, dump_file, set);
6540 }
6541 }
6542
6543 return set;
6544 }
6545
6546 static struct iv_ca *
6547 find_optimal_iv_set (struct ivopts_data *data)
6548 {
6549 unsigned i;
6550 struct iv_ca *set, *origset;
6551 struct iv_use *use;
6552 comp_cost cost, origcost;
6553
6554 /* Determine the cost based on a strategy that starts with original IVs,
6555 and try again using a strategy that prefers candidates not based
6556 on any IVs. */
6557 origset = find_optimal_iv_set_1 (data, true);
6558 set = find_optimal_iv_set_1 (data, false);
6559
6560 if (!origset && !set)
6561 return NULL;
6562
6563 origcost = origset ? iv_ca_cost (origset) : infinite_cost;
6564 cost = set ? iv_ca_cost (set) : infinite_cost;
6565
6566 if (dump_file && (dump_flags & TDF_DETAILS))
6567 {
6568 fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
6569 origcost.cost, origcost.complexity);
6570 fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
6571 cost.cost, cost.complexity);
6572 }
6573
6574 /* Choose the one with the best cost. */
6575 if (compare_costs (origcost, cost) <= 0)
6576 {
6577 if (set)
6578 iv_ca_free (&set);
6579 set = origset;
6580 }
6581 else if (origset)
6582 iv_ca_free (&origset);
6583
6584 for (i = 0; i < n_iv_uses (data); i++)
6585 {
6586 use = iv_use (data, i);
6587 use->selected = iv_ca_cand_for_use (set, use)->cand;
6588 }
6589
6590 return set;
6591 }
6592
6593 /* Creates a new induction variable corresponding to CAND. */
6594
6595 static void
6596 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
6597 {
6598 gimple_stmt_iterator incr_pos;
6599 tree base;
6600 bool after = false;
6601
6602 if (!cand->iv)
6603 return;
6604
6605 switch (cand->pos)
6606 {
6607 case IP_NORMAL:
6608 incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
6609 break;
6610
6611 case IP_END:
6612 incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
6613 after = true;
6614 break;
6615
6616 case IP_AFTER_USE:
6617 after = true;
6618 /* fall through */
6619 case IP_BEFORE_USE:
6620 incr_pos = gsi_for_stmt (cand->incremented_at);
6621 break;
6622
6623 case IP_ORIGINAL:
6624 /* Mark that the iv is preserved. */
6625 name_info (data, cand->var_before)->preserve_biv = true;
6626 name_info (data, cand->var_after)->preserve_biv = true;
6627
6628 /* Rewrite the increment so that it uses var_before directly. */
6629 find_interesting_uses_op (data, cand->var_after)->selected = cand;
6630 return;
6631 }
6632
6633 gimple_add_tmp_var (cand->var_before);
6634
6635 base = unshare_expr (cand->iv->base);
6636
6637 create_iv (base, unshare_expr (cand->iv->step),
6638 cand->var_before, data->current_loop,
6639 &incr_pos, after, &cand->var_before, &cand->var_after);
6640 }
6641
6642 /* Creates new induction variables described in SET. */
6643
6644 static void
6645 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
6646 {
6647 unsigned i;
6648 struct iv_cand *cand;
6649 bitmap_iterator bi;
6650
6651 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6652 {
6653 cand = iv_cand (data, i);
6654 create_new_iv (data, cand);
6655 }
6656
6657 if (dump_file && (dump_flags & TDF_DETAILS))
6658 {
6659 fprintf (dump_file, "Selected IV set for loop %d",
6660 data->current_loop->num);
6661 if (data->loop_loc != UNKNOWN_LOCATION)
6662 fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
6663 LOCATION_LINE (data->loop_loc));
6664 fprintf (dump_file, ", %lu IVs:\n", bitmap_count_bits (set->cands));
6665 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6666 {
6667 cand = iv_cand (data, i);
6668 dump_cand (dump_file, cand);
6669 }
6670 fprintf (dump_file, "\n");
6671 }
6672 }
6673
6674 /* Rewrites USE (definition of iv used in a nonlinear expression)
6675 using candidate CAND. */
6676
6677 static void
6678 rewrite_use_nonlinear_expr (struct ivopts_data *data,
6679 struct iv_use *use, struct iv_cand *cand)
6680 {
6681 tree comp;
6682 tree op, tgt;
6683 gassign *ass;
6684 gimple_stmt_iterator bsi;
6685
6686 /* An important special case -- if we are asked to express value of
6687 the original iv by itself, just exit; there is no need to
6688 introduce a new computation (that might also need casting the
6689 variable to unsigned and back). */
6690 if (cand->pos == IP_ORIGINAL
6691 && cand->incremented_at == use->stmt)
6692 {
6693 enum tree_code stmt_code;
6694
6695 gcc_assert (is_gimple_assign (use->stmt));
6696 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
6697
6698 /* Check whether we may leave the computation unchanged.
6699 This is the case only if it does not rely on other
6700 computations in the loop -- otherwise, the computation
6701 we rely upon may be removed in remove_unused_ivs,
6702 thus leading to ICE. */
6703 stmt_code = gimple_assign_rhs_code (use->stmt);
6704 if (stmt_code == PLUS_EXPR
6705 || stmt_code == MINUS_EXPR
6706 || stmt_code == POINTER_PLUS_EXPR)
6707 {
6708 if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
6709 op = gimple_assign_rhs2 (use->stmt);
6710 else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
6711 op = gimple_assign_rhs1 (use->stmt);
6712 else
6713 op = NULL_TREE;
6714 }
6715 else
6716 op = NULL_TREE;
6717
6718 if (op && expr_invariant_in_loop_p (data->current_loop, op))
6719 return;
6720 }
6721
6722 comp = get_computation (data->current_loop, use, cand);
6723 gcc_assert (comp != NULL_TREE);
6724
6725 switch (gimple_code (use->stmt))
6726 {
6727 case GIMPLE_PHI:
6728 tgt = PHI_RESULT (use->stmt);
6729
6730 /* If we should keep the biv, do not replace it. */
6731 if (name_info (data, tgt)->preserve_biv)
6732 return;
6733
6734 bsi = gsi_after_labels (gimple_bb (use->stmt));
6735 break;
6736
6737 case GIMPLE_ASSIGN:
6738 tgt = gimple_assign_lhs (use->stmt);
6739 bsi = gsi_for_stmt (use->stmt);
6740 break;
6741
6742 default:
6743 gcc_unreachable ();
6744 }
6745
6746 if (!valid_gimple_rhs_p (comp)
6747 || (gimple_code (use->stmt) != GIMPLE_PHI
6748 /* We can't allow re-allocating the stmt as it might be pointed
6749 to still. */
6750 && (get_gimple_rhs_num_ops (TREE_CODE (comp))
6751 >= gimple_num_ops (gsi_stmt (bsi)))))
6752 {
6753 comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE,
6754 true, GSI_SAME_STMT);
6755 if (POINTER_TYPE_P (TREE_TYPE (tgt)))
6756 {
6757 duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
6758 /* As this isn't a plain copy we have to reset alignment
6759 information. */
6760 if (SSA_NAME_PTR_INFO (comp))
6761 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
6762 }
6763 }
6764
6765 if (gimple_code (use->stmt) == GIMPLE_PHI)
6766 {
6767 ass = gimple_build_assign (tgt, comp);
6768 gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
6769
6770 bsi = gsi_for_stmt (use->stmt);
6771 remove_phi_node (&bsi, false);
6772 }
6773 else
6774 {
6775 gimple_assign_set_rhs_from_tree (&bsi, comp);
6776 use->stmt = gsi_stmt (bsi);
6777 }
6778 }
6779
6780 /* Performs a peephole optimization to reorder the iv update statement with
6781 a mem ref to enable instruction combining in later phases. The mem ref uses
6782 the iv value before the update, so the reordering transformation requires
6783 adjustment of the offset. CAND is the selected IV_CAND.
6784
6785 Example:
6786
6787 t = MEM_REF (base, iv1, 8, 16); // base, index, stride, offset
6788 iv2 = iv1 + 1;
6789
6790 if (t < val) (1)
6791 goto L;
6792 goto Head;
6793
6794
6795 directly propagating t over to (1) will introduce overlapping live range
6796 thus increase register pressure. This peephole transform it into:
6797
6798
6799 iv2 = iv1 + 1;
6800 t = MEM_REF (base, iv2, 8, 8);
6801 if (t < val)
6802 goto L;
6803 goto Head;
6804 */
6805
6806 static void
6807 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
6808 {
6809 tree var_after;
6810 gimple iv_update, stmt;
6811 basic_block bb;
6812 gimple_stmt_iterator gsi, gsi_iv;
6813
6814 if (cand->pos != IP_NORMAL)
6815 return;
6816
6817 var_after = cand->var_after;
6818 iv_update = SSA_NAME_DEF_STMT (var_after);
6819
6820 bb = gimple_bb (iv_update);
6821 gsi = gsi_last_nondebug_bb (bb);
6822 stmt = gsi_stmt (gsi);
6823
6824 /* Only handle conditional statement for now. */
6825 if (gimple_code (stmt) != GIMPLE_COND)
6826 return;
6827
6828 gsi_prev_nondebug (&gsi);
6829 stmt = gsi_stmt (gsi);
6830 if (stmt != iv_update)
6831 return;
6832
6833 gsi_prev_nondebug (&gsi);
6834 if (gsi_end_p (gsi))
6835 return;
6836
6837 stmt = gsi_stmt (gsi);
6838 if (gimple_code (stmt) != GIMPLE_ASSIGN)
6839 return;
6840
6841 if (stmt != use->stmt)
6842 return;
6843
6844 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
6845 return;
6846
6847 if (dump_file && (dump_flags & TDF_DETAILS))
6848 {
6849 fprintf (dump_file, "Reordering \n");
6850 print_gimple_stmt (dump_file, iv_update, 0, 0);
6851 print_gimple_stmt (dump_file, use->stmt, 0, 0);
6852 fprintf (dump_file, "\n");
6853 }
6854
6855 gsi = gsi_for_stmt (use->stmt);
6856 gsi_iv = gsi_for_stmt (iv_update);
6857 gsi_move_before (&gsi_iv, &gsi);
6858
6859 cand->pos = IP_BEFORE_USE;
6860 cand->incremented_at = use->stmt;
6861 }
6862
6863 /* Rewrites USE (address that is an iv) using candidate CAND. */
6864
6865 static void
6866 rewrite_use_address_1 (struct ivopts_data *data,
6867 struct iv_use *use, struct iv_cand *cand)
6868 {
6869 aff_tree aff;
6870 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6871 tree base_hint = NULL_TREE;
6872 tree ref, iv;
6873 bool ok;
6874
6875 adjust_iv_update_pos (cand, use);
6876 ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
6877 gcc_assert (ok);
6878 unshare_aff_combination (&aff);
6879
6880 /* To avoid undefined overflow problems, all IV candidates use unsigned
6881 integer types. The drawback is that this makes it impossible for
6882 create_mem_ref to distinguish an IV that is based on a memory object
6883 from one that represents simply an offset.
6884
6885 To work around this problem, we pass a hint to create_mem_ref that
6886 indicates which variable (if any) in aff is an IV based on a memory
6887 object. Note that we only consider the candidate. If this is not
6888 based on an object, the base of the reference is in some subexpression
6889 of the use -- but these will use pointer types, so they are recognized
6890 by the create_mem_ref heuristics anyway. */
6891 if (cand->iv->base_object)
6892 base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
6893
6894 iv = var_at_stmt (data->current_loop, cand, use->stmt);
6895 ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff,
6896 reference_alias_ptr_type (*use->op_p),
6897 iv, base_hint, data->speed);
6898 copy_ref_info (ref, *use->op_p);
6899 *use->op_p = ref;
6900 }
6901
6902 /* Rewrites USE (address that is an iv) using candidate CAND. If it's the
6903 first use of a group, rewrites sub uses in the group too. */
6904
6905 static void
6906 rewrite_use_address (struct ivopts_data *data,
6907 struct iv_use *use, struct iv_cand *cand)
6908 {
6909 struct iv_use *next;
6910
6911 gcc_assert (use->sub_id == 0);
6912 rewrite_use_address_1 (data, use, cand);
6913 update_stmt (use->stmt);
6914
6915 for (next = use->next; next != NULL; next = next->next)
6916 {
6917 rewrite_use_address_1 (data, next, cand);
6918 update_stmt (next->stmt);
6919 }
6920
6921 return;
6922 }
6923
6924 /* Rewrites USE (the condition such that one of the arguments is an iv) using
6925 candidate CAND. */
6926
6927 static void
6928 rewrite_use_compare (struct ivopts_data *data,
6929 struct iv_use *use, struct iv_cand *cand)
6930 {
6931 tree comp, *var_p, op, bound;
6932 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6933 enum tree_code compare;
6934 struct cost_pair *cp = get_use_iv_cost (data, use, cand);
6935 bool ok;
6936
6937 bound = cp->value;
6938 if (bound)
6939 {
6940 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
6941 tree var_type = TREE_TYPE (var);
6942 gimple_seq stmts;
6943
6944 if (dump_file && (dump_flags & TDF_DETAILS))
6945 {
6946 fprintf (dump_file, "Replacing exit test: ");
6947 print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
6948 }
6949 compare = cp->comp;
6950 bound = unshare_expr (fold_convert (var_type, bound));
6951 op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
6952 if (stmts)
6953 gsi_insert_seq_on_edge_immediate (
6954 loop_preheader_edge (data->current_loop),
6955 stmts);
6956
6957 gcond *cond_stmt = as_a <gcond *> (use->stmt);
6958 gimple_cond_set_lhs (cond_stmt, var);
6959 gimple_cond_set_code (cond_stmt, compare);
6960 gimple_cond_set_rhs (cond_stmt, op);
6961 return;
6962 }
6963
6964 /* The induction variable elimination failed; just express the original
6965 giv. */
6966 comp = get_computation (data->current_loop, use, cand);
6967 gcc_assert (comp != NULL_TREE);
6968
6969 ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
6970 gcc_assert (ok);
6971
6972 *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
6973 true, GSI_SAME_STMT);
6974 }
6975
6976 /* Rewrites USE using candidate CAND. */
6977
6978 static void
6979 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
6980 {
6981 switch (use->type)
6982 {
6983 case USE_NONLINEAR_EXPR:
6984 rewrite_use_nonlinear_expr (data, use, cand);
6985 break;
6986
6987 case USE_ADDRESS:
6988 rewrite_use_address (data, use, cand);
6989 break;
6990
6991 case USE_COMPARE:
6992 rewrite_use_compare (data, use, cand);
6993 break;
6994
6995 default:
6996 gcc_unreachable ();
6997 }
6998
6999 update_stmt (use->stmt);
7000 }
7001
7002 /* Rewrite the uses using the selected induction variables. */
7003
7004 static void
7005 rewrite_uses (struct ivopts_data *data)
7006 {
7007 unsigned i;
7008 struct iv_cand *cand;
7009 struct iv_use *use;
7010
7011 for (i = 0; i < n_iv_uses (data); i++)
7012 {
7013 use = iv_use (data, i);
7014 cand = use->selected;
7015 gcc_assert (cand);
7016
7017 rewrite_use (data, use, cand);
7018 }
7019 }
7020
7021 /* Removes the ivs that are not used after rewriting. */
7022
7023 static void
7024 remove_unused_ivs (struct ivopts_data *data)
7025 {
7026 unsigned j;
7027 bitmap_iterator bi;
7028 bitmap toremove = BITMAP_ALLOC (NULL);
7029
7030 /* Figure out an order in which to release SSA DEFs so that we don't
7031 release something that we'd have to propagate into a debug stmt
7032 afterwards. */
7033 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
7034 {
7035 struct version_info *info;
7036
7037 info = ver_info (data, j);
7038 if (info->iv
7039 && !integer_zerop (info->iv->step)
7040 && !info->inv_id
7041 && !info->iv->have_use_for
7042 && !info->preserve_biv)
7043 {
7044 bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
7045
7046 tree def = info->iv->ssa_name;
7047
7048 if (MAY_HAVE_DEBUG_STMTS && SSA_NAME_DEF_STMT (def))
7049 {
7050 imm_use_iterator imm_iter;
7051 use_operand_p use_p;
7052 gimple stmt;
7053 int count = 0;
7054
7055 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7056 {
7057 if (!gimple_debug_bind_p (stmt))
7058 continue;
7059
7060 /* We just want to determine whether to do nothing
7061 (count == 0), to substitute the computed
7062 expression into a single use of the SSA DEF by
7063 itself (count == 1), or to use a debug temp
7064 because the SSA DEF is used multiple times or as
7065 part of a larger expression (count > 1). */
7066 count++;
7067 if (gimple_debug_bind_get_value (stmt) != def)
7068 count++;
7069
7070 if (count > 1)
7071 BREAK_FROM_IMM_USE_STMT (imm_iter);
7072 }
7073
7074 if (!count)
7075 continue;
7076
7077 struct iv_use dummy_use;
7078 struct iv_cand *best_cand = NULL, *cand;
7079 unsigned i, best_pref = 0, cand_pref;
7080
7081 memset (&dummy_use, 0, sizeof (dummy_use));
7082 dummy_use.iv = info->iv;
7083 for (i = 0; i < n_iv_uses (data) && i < 64; i++)
7084 {
7085 cand = iv_use (data, i)->selected;
7086 if (cand == best_cand)
7087 continue;
7088 cand_pref = operand_equal_p (cand->iv->step,
7089 info->iv->step, 0)
7090 ? 4 : 0;
7091 cand_pref
7092 += TYPE_MODE (TREE_TYPE (cand->iv->base))
7093 == TYPE_MODE (TREE_TYPE (info->iv->base))
7094 ? 2 : 0;
7095 cand_pref
7096 += TREE_CODE (cand->iv->base) == INTEGER_CST
7097 ? 1 : 0;
7098 if (best_cand == NULL || best_pref < cand_pref)
7099 {
7100 best_cand = cand;
7101 best_pref = cand_pref;
7102 }
7103 }
7104
7105 if (!best_cand)
7106 continue;
7107
7108 tree comp = get_computation_at (data->current_loop,
7109 &dummy_use, best_cand,
7110 SSA_NAME_DEF_STMT (def));
7111 if (!comp)
7112 continue;
7113
7114 if (count > 1)
7115 {
7116 tree vexpr = make_node (DEBUG_EXPR_DECL);
7117 DECL_ARTIFICIAL (vexpr) = 1;
7118 TREE_TYPE (vexpr) = TREE_TYPE (comp);
7119 if (SSA_NAME_VAR (def))
7120 DECL_MODE (vexpr) = DECL_MODE (SSA_NAME_VAR (def));
7121 else
7122 DECL_MODE (vexpr) = TYPE_MODE (TREE_TYPE (vexpr));
7123 gdebug *def_temp
7124 = gimple_build_debug_bind (vexpr, comp, NULL);
7125 gimple_stmt_iterator gsi;
7126
7127 if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
7128 gsi = gsi_after_labels (gimple_bb
7129 (SSA_NAME_DEF_STMT (def)));
7130 else
7131 gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
7132
7133 gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
7134 comp = vexpr;
7135 }
7136
7137 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7138 {
7139 if (!gimple_debug_bind_p (stmt))
7140 continue;
7141
7142 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
7143 SET_USE (use_p, comp);
7144
7145 update_stmt (stmt);
7146 }
7147 }
7148 }
7149 }
7150
7151 release_defs_bitset (toremove);
7152
7153 BITMAP_FREE (toremove);
7154 }
7155
7156 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
7157 for hash_map::traverse. */
7158
7159 bool
7160 free_tree_niter_desc (edge const &, tree_niter_desc *const &value, void *)
7161 {
7162 free (value);
7163 return true;
7164 }
7165
7166 /* Frees data allocated by the optimization of a single loop. */
7167
7168 static void
7169 free_loop_data (struct ivopts_data *data)
7170 {
7171 unsigned i, j;
7172 bitmap_iterator bi;
7173 tree obj;
7174
7175 if (data->niters)
7176 {
7177 data->niters->traverse<void *, free_tree_niter_desc> (NULL);
7178 delete data->niters;
7179 data->niters = NULL;
7180 }
7181
7182 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
7183 {
7184 struct version_info *info;
7185
7186 info = ver_info (data, i);
7187 free (info->iv);
7188 info->iv = NULL;
7189 info->has_nonlin_use = false;
7190 info->preserve_biv = false;
7191 info->inv_id = 0;
7192 }
7193 bitmap_clear (data->relevant);
7194 bitmap_clear (data->important_candidates);
7195
7196 for (i = 0; i < n_iv_uses (data); i++)
7197 {
7198 struct iv_use *use = iv_use (data, i);
7199 struct iv_use *pre = use, *sub = use->next;
7200
7201 while (sub)
7202 {
7203 gcc_assert (sub->related_cands == NULL);
7204 gcc_assert (sub->n_map_members == 0 && sub->cost_map == NULL);
7205
7206 free (sub->iv);
7207 pre = sub;
7208 sub = sub->next;
7209 free (pre);
7210 }
7211
7212 free (use->iv);
7213 BITMAP_FREE (use->related_cands);
7214 for (j = 0; j < use->n_map_members; j++)
7215 if (use->cost_map[j].depends_on)
7216 BITMAP_FREE (use->cost_map[j].depends_on);
7217 free (use->cost_map);
7218 free (use);
7219 }
7220 data->iv_uses.truncate (0);
7221
7222 for (i = 0; i < n_iv_cands (data); i++)
7223 {
7224 struct iv_cand *cand = iv_cand (data, i);
7225
7226 free (cand->iv);
7227 if (cand->depends_on)
7228 BITMAP_FREE (cand->depends_on);
7229 free (cand);
7230 }
7231 data->iv_candidates.truncate (0);
7232
7233 if (data->version_info_size < num_ssa_names)
7234 {
7235 data->version_info_size = 2 * num_ssa_names;
7236 free (data->version_info);
7237 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
7238 }
7239
7240 data->max_inv_id = 0;
7241
7242 FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
7243 SET_DECL_RTL (obj, NULL_RTX);
7244
7245 decl_rtl_to_reset.truncate (0);
7246
7247 data->inv_expr_tab->empty ();
7248 data->inv_expr_id = 0;
7249 }
7250
7251 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
7252 loop tree. */
7253
7254 static void
7255 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
7256 {
7257 free_loop_data (data);
7258 free (data->version_info);
7259 BITMAP_FREE (data->relevant);
7260 BITMAP_FREE (data->important_candidates);
7261
7262 decl_rtl_to_reset.release ();
7263 data->iv_uses.release ();
7264 data->iv_candidates.release ();
7265 delete data->inv_expr_tab;
7266 data->inv_expr_tab = NULL;
7267 free_affine_expand_cache (&data->name_expansion_cache);
7268 }
7269
7270 /* Returns true if the loop body BODY includes any function calls. */
7271
7272 static bool
7273 loop_body_includes_call (basic_block *body, unsigned num_nodes)
7274 {
7275 gimple_stmt_iterator gsi;
7276 unsigned i;
7277
7278 for (i = 0; i < num_nodes; i++)
7279 for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
7280 {
7281 gimple stmt = gsi_stmt (gsi);
7282 if (is_gimple_call (stmt)
7283 && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
7284 return true;
7285 }
7286 return false;
7287 }
7288
7289 /* Optimizes the LOOP. Returns true if anything changed. */
7290
7291 static bool
7292 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
7293 {
7294 bool changed = false;
7295 struct iv_ca *iv_ca;
7296 edge exit = single_dom_exit (loop);
7297 basic_block *body;
7298
7299 gcc_assert (!data->niters);
7300 data->current_loop = loop;
7301 data->loop_loc = find_loop_location (loop);
7302 data->speed = optimize_loop_for_speed_p (loop);
7303
7304 if (dump_file && (dump_flags & TDF_DETAILS))
7305 {
7306 fprintf (dump_file, "Processing loop %d", loop->num);
7307 if (data->loop_loc != UNKNOWN_LOCATION)
7308 fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
7309 LOCATION_LINE (data->loop_loc));
7310 fprintf (dump_file, "\n");
7311
7312 if (exit)
7313 {
7314 fprintf (dump_file, " single exit %d -> %d, exit condition ",
7315 exit->src->index, exit->dest->index);
7316 print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
7317 fprintf (dump_file, "\n");
7318 }
7319
7320 fprintf (dump_file, "\n");
7321 }
7322
7323 body = get_loop_body (loop);
7324 data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
7325 renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
7326 free (body);
7327
7328 data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
7329
7330 /* For each ssa name determines whether it behaves as an induction variable
7331 in some loop. */
7332 if (!find_induction_variables (data))
7333 goto finish;
7334
7335 /* Finds interesting uses (item 1). */
7336 find_interesting_uses (data);
7337 group_address_uses (data);
7338 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
7339 goto finish;
7340
7341 /* Finds candidates for the induction variables (item 2). */
7342 find_iv_candidates (data);
7343
7344 /* Calculates the costs (item 3, part 1). */
7345 determine_iv_costs (data);
7346 determine_use_iv_costs (data);
7347 determine_set_costs (data);
7348
7349 /* Find the optimal set of induction variables (item 3, part 2). */
7350 iv_ca = find_optimal_iv_set (data);
7351 if (!iv_ca)
7352 goto finish;
7353 changed = true;
7354
7355 /* Create the new induction variables (item 4, part 1). */
7356 create_new_ivs (data, iv_ca);
7357 iv_ca_free (&iv_ca);
7358
7359 /* Rewrite the uses (item 4, part 2). */
7360 rewrite_uses (data);
7361
7362 /* Remove the ivs that are unused after rewriting. */
7363 remove_unused_ivs (data);
7364
7365 /* We have changed the structure of induction variables; it might happen
7366 that definitions in the scev database refer to some of them that were
7367 eliminated. */
7368 scev_reset ();
7369
7370 finish:
7371 free_loop_data (data);
7372
7373 return changed;
7374 }
7375
7376 /* Main entry point. Optimizes induction variables in loops. */
7377
7378 void
7379 tree_ssa_iv_optimize (void)
7380 {
7381 struct loop *loop;
7382 struct ivopts_data data;
7383
7384 tree_ssa_iv_optimize_init (&data);
7385
7386 /* Optimize the loops starting with the innermost ones. */
7387 FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
7388 {
7389 if (dump_file && (dump_flags & TDF_DETAILS))
7390 flow_loop_dump (loop, dump_file, NULL, 1);
7391
7392 tree_ssa_iv_optimize_loop (&data, loop);
7393 }
7394
7395 tree_ssa_iv_optimize_finalize (&data);
7396 }