]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/tree-ssa-loop-ivopts.c
tree-ssa-loop-ivopts.c (add_candidate): Remove call to add_autoinc_candidates.
[thirdparty/gcc.git] / gcc / tree-ssa-loop-ivopts.c
1 /* Induction variable optimizations.
2 Copyright (C) 2003-2015 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
10
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
19
20 /* This pass tries to find the optimal set of induction variables for the loop.
21 It optimizes just the basic linear induction variables (although adding
22 support for other types should not be too hard). It includes the
23 optimizations commonly known as strength reduction, induction variable
24 coalescing and induction variable elimination. It does it in the
25 following steps:
26
27 1) The interesting uses of induction variables are found. This includes
28
29 -- uses of induction variables in non-linear expressions
30 -- addresses of arrays
31 -- comparisons of induction variables
32
33 2) Candidates for the induction variables are found. This includes
34
35 -- old induction variables
36 -- the variables defined by expressions derived from the "interesting
37 uses" above
38
39 3) The optimal (w.r. to a cost function) set of variables is chosen. The
40 cost function assigns a cost to sets of induction variables and consists
41 of three parts:
42
43 -- The use costs. Each of the interesting uses chooses the best induction
44 variable in the set and adds its cost to the sum. The cost reflects
45 the time spent on modifying the induction variables value to be usable
46 for the given purpose (adding base and offset for arrays, etc.).
47 -- The variable costs. Each of the variables has a cost assigned that
48 reflects the costs associated with incrementing the value of the
49 variable. The original variables are somewhat preferred.
50 -- The set cost. Depending on the size of the set, extra cost may be
51 added to reflect register pressure.
52
53 All the costs are defined in a machine-specific way, using the target
54 hooks and machine descriptions to determine them.
55
56 4) The trees are transformed to use the new variables, the dead code is
57 removed.
58
59 All of this is done loop by loop. Doing it globally is theoretically
60 possible, it might give a better performance and it might enable us
61 to decide costs more precisely, but getting all the interactions right
62 would be complicated. */
63
64 #include "config.h"
65 #include "system.h"
66 #include "coretypes.h"
67 #include "backend.h"
68 #include "cfghooks.h"
69 #include "tree.h"
70 #include "gimple.h"
71 #include "rtl.h"
72 #include "ssa.h"
73 #include "alias.h"
74 #include "fold-const.h"
75 #include "stor-layout.h"
76 #include "tm_p.h"
77 #include "gimple-pretty-print.h"
78 #include "internal-fn.h"
79 #include "tree-eh.h"
80 #include "gimplify.h"
81 #include "gimple-iterator.h"
82 #include "gimplify-me.h"
83 #include "cgraph.h"
84 #include "tree-cfg.h"
85 #include "tree-ssa-loop-ivopts.h"
86 #include "tree-ssa-loop-manip.h"
87 #include "tree-ssa-loop-niter.h"
88 #include "tree-ssa-loop.h"
89 #include "flags.h"
90 #include "insn-config.h"
91 #include "expmed.h"
92 #include "dojump.h"
93 #include "explow.h"
94 #include "calls.h"
95 #include "emit-rtl.h"
96 #include "varasm.h"
97 #include "stmt.h"
98 #include "expr.h"
99 #include "tree-dfa.h"
100 #include "tree-ssa.h"
101 #include "cfgloop.h"
102 #include "tree-pass.h"
103 #include "tree-chrec.h"
104 #include "tree-scalar-evolution.h"
105 #include "params.h"
106 #include "langhooks.h"
107 #include "tree-affine.h"
108 #include "target.h"
109 #include "tree-inline.h"
110 #include "tree-ssa-propagate.h"
111 #include "tree-ssa-address.h"
112 #include "builtins.h"
113 #include "tree-vectorizer.h"
114
115 /* FIXME: Expressions are expanded to RTL in this pass to determine the
116 cost of different addressing modes. This should be moved to a TBD
117 interface between the GIMPLE and RTL worlds. */
118 #include "recog.h"
119
120 /* The infinite cost. */
121 #define INFTY 10000000
122
123 #define AVG_LOOP_NITER(LOOP) 5
124
125 /* Returns the expected number of loop iterations for LOOP.
126 The average trip count is computed from profile data if it
127 exists. */
128
129 static inline HOST_WIDE_INT
130 avg_loop_niter (struct loop *loop)
131 {
132 HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
133 if (niter == -1)
134 return AVG_LOOP_NITER (loop);
135
136 return niter;
137 }
138
139 /* Representation of the induction variable. */
140 struct iv
141 {
142 tree base; /* Initial value of the iv. */
143 tree base_object; /* A memory object to that the induction variable points. */
144 tree step; /* Step of the iv (constant only). */
145 tree ssa_name; /* The ssa name with the value. */
146 unsigned use_id; /* The identifier in the use if it is the case. */
147 bool biv_p; /* Is it a biv? */
148 bool have_use_for; /* Do we already have a use for it? */
149 bool no_overflow; /* True if the iv doesn't overflow. */
150 };
151
152 /* Per-ssa version information (induction variable descriptions, etc.). */
153 struct version_info
154 {
155 tree name; /* The ssa name. */
156 struct iv *iv; /* Induction variable description. */
157 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
158 an expression that is not an induction variable. */
159 bool preserve_biv; /* For the original biv, whether to preserve it. */
160 unsigned inv_id; /* Id of an invariant. */
161 };
162
163 /* Types of uses. */
164 enum use_type
165 {
166 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
167 USE_ADDRESS, /* Use in an address. */
168 USE_COMPARE /* Use is a compare. */
169 };
170
171 /* Cost of a computation. */
172 typedef struct
173 {
174 int cost; /* The runtime cost. */
175 unsigned complexity; /* The estimate of the complexity of the code for
176 the computation (in no concrete units --
177 complexity field should be larger for more
178 complex expressions and addressing modes). */
179 } comp_cost;
180
181 static const comp_cost no_cost = {0, 0};
182 static const comp_cost infinite_cost = {INFTY, INFTY};
183
184 /* The candidate - cost pair. */
185 struct cost_pair
186 {
187 struct iv_cand *cand; /* The candidate. */
188 comp_cost cost; /* The cost. */
189 bitmap depends_on; /* The list of invariants that have to be
190 preserved. */
191 tree value; /* For final value elimination, the expression for
192 the final value of the iv. For iv elimination,
193 the new bound to compare with. */
194 enum tree_code comp; /* For iv elimination, the comparison. */
195 int inv_expr_id; /* Loop invariant expression id. */
196 };
197
198 /* Use. */
199 struct iv_use
200 {
201 unsigned id; /* The id of the use. */
202 unsigned sub_id; /* The id of the sub use. */
203 enum use_type type; /* Type of the use. */
204 struct iv *iv; /* The induction variable it is based on. */
205 gimple stmt; /* Statement in that it occurs. */
206 tree *op_p; /* The place where it occurs. */
207 bitmap related_cands; /* The set of "related" iv candidates, plus the common
208 important ones. */
209
210 unsigned n_map_members; /* Number of candidates in the cost_map list. */
211 struct cost_pair *cost_map;
212 /* The costs wrto the iv candidates. */
213
214 struct iv_cand *selected;
215 /* The selected candidate. */
216
217 struct iv_use *next; /* The next sub use. */
218 tree addr_base; /* Base address with const offset stripped. */
219 unsigned HOST_WIDE_INT addr_offset;
220 /* Const offset stripped from base address. */
221 };
222
223 /* The position where the iv is computed. */
224 enum iv_position
225 {
226 IP_NORMAL, /* At the end, just before the exit condition. */
227 IP_END, /* At the end of the latch block. */
228 IP_BEFORE_USE, /* Immediately before a specific use. */
229 IP_AFTER_USE, /* Immediately after a specific use. */
230 IP_ORIGINAL /* The original biv. */
231 };
232
233 /* The induction variable candidate. */
234 struct iv_cand
235 {
236 unsigned id; /* The number of the candidate. */
237 bool important; /* Whether this is an "important" candidate, i.e. such
238 that it should be considered by all uses. */
239 ENUM_BITFIELD(iv_position) pos : 8; /* Where it is computed. */
240 gimple incremented_at;/* For original biv, the statement where it is
241 incremented. */
242 tree var_before; /* The variable used for it before increment. */
243 tree var_after; /* The variable used for it after increment. */
244 struct iv *iv; /* The value of the candidate. NULL for
245 "pseudocandidate" used to indicate the possibility
246 to replace the final value of an iv by direct
247 computation of the value. */
248 unsigned cost; /* Cost of the candidate. */
249 unsigned cost_step; /* Cost of the candidate's increment operation. */
250 struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
251 where it is incremented. */
252 bitmap depends_on; /* The list of invariants that are used in step of the
253 biv. */
254 };
255
256 /* Loop invariant expression hashtable entry. */
257 struct iv_inv_expr_ent
258 {
259 tree expr;
260 int id;
261 hashval_t hash;
262 };
263
264 /* The data used by the induction variable optimizations. */
265
266 typedef struct iv_use *iv_use_p;
267
268 typedef struct iv_cand *iv_cand_p;
269
270 /* Hashtable helpers. */
271
272 struct iv_inv_expr_hasher : free_ptr_hash <iv_inv_expr_ent>
273 {
274 static inline hashval_t hash (const iv_inv_expr_ent *);
275 static inline bool equal (const iv_inv_expr_ent *, const iv_inv_expr_ent *);
276 };
277
278 /* Hash function for loop invariant expressions. */
279
280 inline hashval_t
281 iv_inv_expr_hasher::hash (const iv_inv_expr_ent *expr)
282 {
283 return expr->hash;
284 }
285
286 /* Hash table equality function for expressions. */
287
288 inline bool
289 iv_inv_expr_hasher::equal (const iv_inv_expr_ent *expr1,
290 const iv_inv_expr_ent *expr2)
291 {
292 return expr1->hash == expr2->hash
293 && operand_equal_p (expr1->expr, expr2->expr, 0);
294 }
295
296 struct ivopts_data
297 {
298 /* The currently optimized loop. */
299 struct loop *current_loop;
300 source_location loop_loc;
301
302 /* Numbers of iterations for all exits of the current loop. */
303 hash_map<edge, tree_niter_desc *> *niters;
304
305 /* Number of registers used in it. */
306 unsigned regs_used;
307
308 /* The size of version_info array allocated. */
309 unsigned version_info_size;
310
311 /* The array of information for the ssa names. */
312 struct version_info *version_info;
313
314 /* The hashtable of loop invariant expressions created
315 by ivopt. */
316 hash_table<iv_inv_expr_hasher> *inv_expr_tab;
317
318 /* Loop invariant expression id. */
319 int inv_expr_id;
320
321 /* The bitmap of indices in version_info whose value was changed. */
322 bitmap relevant;
323
324 /* The uses of induction variables. */
325 vec<iv_use_p> iv_uses;
326
327 /* The candidates. */
328 vec<iv_cand_p> iv_candidates;
329
330 /* A bitmap of important candidates. */
331 bitmap important_candidates;
332
333 /* Cache used by tree_to_aff_combination_expand. */
334 hash_map<tree, name_expansion *> *name_expansion_cache;
335
336 /* The maximum invariant id. */
337 unsigned max_inv_id;
338
339 /* Obstack for iv structure. */
340 struct obstack iv_obstack;
341
342 /* Whether to consider just related and important candidates when replacing a
343 use. */
344 bool consider_all_candidates;
345
346 /* Are we optimizing for speed? */
347 bool speed;
348
349 /* Whether the loop body includes any function calls. */
350 bool body_includes_call;
351
352 /* Whether the loop body can only be exited via single exit. */
353 bool loop_single_exit_p;
354 };
355
356 /* An assignment of iv candidates to uses. */
357
358 struct iv_ca
359 {
360 /* The number of uses covered by the assignment. */
361 unsigned upto;
362
363 /* Number of uses that cannot be expressed by the candidates in the set. */
364 unsigned bad_uses;
365
366 /* Candidate assigned to a use, together with the related costs. */
367 struct cost_pair **cand_for_use;
368
369 /* Number of times each candidate is used. */
370 unsigned *n_cand_uses;
371
372 /* The candidates used. */
373 bitmap cands;
374
375 /* The number of candidates in the set. */
376 unsigned n_cands;
377
378 /* Total number of registers needed. */
379 unsigned n_regs;
380
381 /* Total cost of expressing uses. */
382 comp_cost cand_use_cost;
383
384 /* Total cost of candidates. */
385 unsigned cand_cost;
386
387 /* Number of times each invariant is used. */
388 unsigned *n_invariant_uses;
389
390 /* The array holding the number of uses of each loop
391 invariant expressions created by ivopt. */
392 unsigned *used_inv_expr;
393
394 /* The number of created loop invariants. */
395 unsigned num_used_inv_expr;
396
397 /* Total cost of the assignment. */
398 comp_cost cost;
399 };
400
401 /* Difference of two iv candidate assignments. */
402
403 struct iv_ca_delta
404 {
405 /* Changed use. */
406 struct iv_use *use;
407
408 /* An old assignment (for rollback purposes). */
409 struct cost_pair *old_cp;
410
411 /* A new assignment. */
412 struct cost_pair *new_cp;
413
414 /* Next change in the list. */
415 struct iv_ca_delta *next_change;
416 };
417
418 /* Bound on number of candidates below that all candidates are considered. */
419
420 #define CONSIDER_ALL_CANDIDATES_BOUND \
421 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
422
423 /* If there are more iv occurrences, we just give up (it is quite unlikely that
424 optimizing such a loop would help, and it would take ages). */
425
426 #define MAX_CONSIDERED_USES \
427 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
428
429 /* If there are at most this number of ivs in the set, try removing unnecessary
430 ivs from the set always. */
431
432 #define ALWAYS_PRUNE_CAND_SET_BOUND \
433 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
434
435 /* The list of trees for that the decl_rtl field must be reset is stored
436 here. */
437
438 static vec<tree> decl_rtl_to_reset;
439
440 static comp_cost force_expr_to_var_cost (tree, bool);
441
442 /* Number of uses recorded in DATA. */
443
444 static inline unsigned
445 n_iv_uses (struct ivopts_data *data)
446 {
447 return data->iv_uses.length ();
448 }
449
450 /* Ith use recorded in DATA. */
451
452 static inline struct iv_use *
453 iv_use (struct ivopts_data *data, unsigned i)
454 {
455 return data->iv_uses[i];
456 }
457
458 /* Number of candidates recorded in DATA. */
459
460 static inline unsigned
461 n_iv_cands (struct ivopts_data *data)
462 {
463 return data->iv_candidates.length ();
464 }
465
466 /* Ith candidate recorded in DATA. */
467
468 static inline struct iv_cand *
469 iv_cand (struct ivopts_data *data, unsigned i)
470 {
471 return data->iv_candidates[i];
472 }
473
474 /* The single loop exit if it dominates the latch, NULL otherwise. */
475
476 edge
477 single_dom_exit (struct loop *loop)
478 {
479 edge exit = single_exit (loop);
480
481 if (!exit)
482 return NULL;
483
484 if (!just_once_each_iteration_p (loop, exit->src))
485 return NULL;
486
487 return exit;
488 }
489
490 /* Dumps information about the induction variable IV to FILE. */
491
492 void
493 dump_iv (FILE *file, struct iv *iv, bool dump_name)
494 {
495 if (iv->ssa_name && dump_name)
496 {
497 fprintf (file, "ssa name ");
498 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
499 fprintf (file, "\n");
500 }
501
502 fprintf (file, " type ");
503 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
504 fprintf (file, "\n");
505
506 if (iv->step)
507 {
508 fprintf (file, " base ");
509 print_generic_expr (file, iv->base, TDF_SLIM);
510 fprintf (file, "\n");
511
512 fprintf (file, " step ");
513 print_generic_expr (file, iv->step, TDF_SLIM);
514 fprintf (file, "\n");
515 }
516 else
517 {
518 fprintf (file, " invariant ");
519 print_generic_expr (file, iv->base, TDF_SLIM);
520 fprintf (file, "\n");
521 }
522
523 if (iv->base_object)
524 {
525 fprintf (file, " base object ");
526 print_generic_expr (file, iv->base_object, TDF_SLIM);
527 fprintf (file, "\n");
528 }
529
530 if (iv->biv_p)
531 fprintf (file, " is a biv\n");
532 }
533
534 /* Dumps information about the USE to FILE. */
535
536 void
537 dump_use (FILE *file, struct iv_use *use)
538 {
539 fprintf (file, "use %d", use->id);
540 if (use->sub_id)
541 fprintf (file, ".%d", use->sub_id);
542
543 fprintf (file, "\n");
544
545 switch (use->type)
546 {
547 case USE_NONLINEAR_EXPR:
548 fprintf (file, " generic\n");
549 break;
550
551 case USE_ADDRESS:
552 fprintf (file, " address\n");
553 break;
554
555 case USE_COMPARE:
556 fprintf (file, " compare\n");
557 break;
558
559 default:
560 gcc_unreachable ();
561 }
562
563 fprintf (file, " in statement ");
564 print_gimple_stmt (file, use->stmt, 0, 0);
565 fprintf (file, "\n");
566
567 fprintf (file, " at position ");
568 if (use->op_p)
569 print_generic_expr (file, *use->op_p, TDF_SLIM);
570 fprintf (file, "\n");
571
572 dump_iv (file, use->iv, false);
573
574 if (use->related_cands)
575 {
576 fprintf (file, " related candidates ");
577 dump_bitmap (file, use->related_cands);
578 }
579 }
580
581 /* Dumps information about the uses to FILE. */
582
583 void
584 dump_uses (FILE *file, struct ivopts_data *data)
585 {
586 unsigned i;
587 struct iv_use *use;
588
589 for (i = 0; i < n_iv_uses (data); i++)
590 {
591 use = iv_use (data, i);
592 do
593 {
594 dump_use (file, use);
595 use = use->next;
596 }
597 while (use);
598 fprintf (file, "\n");
599 }
600 }
601
602 /* Dumps information about induction variable candidate CAND to FILE. */
603
604 void
605 dump_cand (FILE *file, struct iv_cand *cand)
606 {
607 struct iv *iv = cand->iv;
608
609 fprintf (file, "candidate %d%s\n",
610 cand->id, cand->important ? " (important)" : "");
611
612 if (cand->depends_on)
613 {
614 fprintf (file, " depends on ");
615 dump_bitmap (file, cand->depends_on);
616 }
617
618 if (!iv)
619 {
620 fprintf (file, " final value replacement\n");
621 return;
622 }
623
624 if (cand->var_before)
625 {
626 fprintf (file, " var_before ");
627 print_generic_expr (file, cand->var_before, TDF_SLIM);
628 fprintf (file, "\n");
629 }
630 if (cand->var_after)
631 {
632 fprintf (file, " var_after ");
633 print_generic_expr (file, cand->var_after, TDF_SLIM);
634 fprintf (file, "\n");
635 }
636
637 switch (cand->pos)
638 {
639 case IP_NORMAL:
640 fprintf (file, " incremented before exit test\n");
641 break;
642
643 case IP_BEFORE_USE:
644 fprintf (file, " incremented before use %d\n", cand->ainc_use->id);
645 break;
646
647 case IP_AFTER_USE:
648 fprintf (file, " incremented after use %d\n", cand->ainc_use->id);
649 break;
650
651 case IP_END:
652 fprintf (file, " incremented at end\n");
653 break;
654
655 case IP_ORIGINAL:
656 fprintf (file, " original biv\n");
657 break;
658 }
659
660 dump_iv (file, iv, false);
661 }
662
663 /* Returns the info for ssa version VER. */
664
665 static inline struct version_info *
666 ver_info (struct ivopts_data *data, unsigned ver)
667 {
668 return data->version_info + ver;
669 }
670
671 /* Returns the info for ssa name NAME. */
672
673 static inline struct version_info *
674 name_info (struct ivopts_data *data, tree name)
675 {
676 return ver_info (data, SSA_NAME_VERSION (name));
677 }
678
679 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
680 emitted in LOOP. */
681
682 static bool
683 stmt_after_ip_normal_pos (struct loop *loop, gimple stmt)
684 {
685 basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
686
687 gcc_assert (bb);
688
689 if (sbb == loop->latch)
690 return true;
691
692 if (sbb != bb)
693 return false;
694
695 return stmt == last_stmt (bb);
696 }
697
698 /* Returns true if STMT if after the place where the original induction
699 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
700 if the positions are identical. */
701
702 static bool
703 stmt_after_inc_pos (struct iv_cand *cand, gimple stmt, bool true_if_equal)
704 {
705 basic_block cand_bb = gimple_bb (cand->incremented_at);
706 basic_block stmt_bb = gimple_bb (stmt);
707
708 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
709 return false;
710
711 if (stmt_bb != cand_bb)
712 return true;
713
714 if (true_if_equal
715 && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
716 return true;
717 return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
718 }
719
720 /* Returns true if STMT if after the place where the induction variable
721 CAND is incremented in LOOP. */
722
723 static bool
724 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple stmt)
725 {
726 switch (cand->pos)
727 {
728 case IP_END:
729 return false;
730
731 case IP_NORMAL:
732 return stmt_after_ip_normal_pos (loop, stmt);
733
734 case IP_ORIGINAL:
735 case IP_AFTER_USE:
736 return stmt_after_inc_pos (cand, stmt, false);
737
738 case IP_BEFORE_USE:
739 return stmt_after_inc_pos (cand, stmt, true);
740
741 default:
742 gcc_unreachable ();
743 }
744 }
745
746 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
747
748 static bool
749 abnormal_ssa_name_p (tree exp)
750 {
751 if (!exp)
752 return false;
753
754 if (TREE_CODE (exp) != SSA_NAME)
755 return false;
756
757 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
758 }
759
760 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
761 abnormal phi node. Callback for for_each_index. */
762
763 static bool
764 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
765 void *data ATTRIBUTE_UNUSED)
766 {
767 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
768 {
769 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
770 return false;
771 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
772 return false;
773 }
774
775 return !abnormal_ssa_name_p (*index);
776 }
777
778 /* Returns true if EXPR contains a ssa name that occurs in an
779 abnormal phi node. */
780
781 bool
782 contains_abnormal_ssa_name_p (tree expr)
783 {
784 enum tree_code code;
785 enum tree_code_class codeclass;
786
787 if (!expr)
788 return false;
789
790 code = TREE_CODE (expr);
791 codeclass = TREE_CODE_CLASS (code);
792
793 if (code == SSA_NAME)
794 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
795
796 if (code == INTEGER_CST
797 || is_gimple_min_invariant (expr))
798 return false;
799
800 if (code == ADDR_EXPR)
801 return !for_each_index (&TREE_OPERAND (expr, 0),
802 idx_contains_abnormal_ssa_name_p,
803 NULL);
804
805 if (code == COND_EXPR)
806 return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
807 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
808 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
809
810 switch (codeclass)
811 {
812 case tcc_binary:
813 case tcc_comparison:
814 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
815 return true;
816
817 /* Fallthru. */
818 case tcc_unary:
819 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
820 return true;
821
822 break;
823
824 default:
825 gcc_unreachable ();
826 }
827
828 return false;
829 }
830
831 /* Returns the structure describing number of iterations determined from
832 EXIT of DATA->current_loop, or NULL if something goes wrong. */
833
834 static struct tree_niter_desc *
835 niter_for_exit (struct ivopts_data *data, edge exit)
836 {
837 struct tree_niter_desc *desc;
838 tree_niter_desc **slot;
839
840 if (!data->niters)
841 {
842 data->niters = new hash_map<edge, tree_niter_desc *>;
843 slot = NULL;
844 }
845 else
846 slot = data->niters->get (exit);
847
848 if (!slot)
849 {
850 /* Try to determine number of iterations. We cannot safely work with ssa
851 names that appear in phi nodes on abnormal edges, so that we do not
852 create overlapping life ranges for them (PR 27283). */
853 desc = XNEW (struct tree_niter_desc);
854 if (!number_of_iterations_exit (data->current_loop,
855 exit, desc, true)
856 || contains_abnormal_ssa_name_p (desc->niter))
857 {
858 XDELETE (desc);
859 desc = NULL;
860 }
861 data->niters->put (exit, desc);
862 }
863 else
864 desc = *slot;
865
866 return desc;
867 }
868
869 /* Returns the structure describing number of iterations determined from
870 single dominating exit of DATA->current_loop, or NULL if something
871 goes wrong. */
872
873 static struct tree_niter_desc *
874 niter_for_single_dom_exit (struct ivopts_data *data)
875 {
876 edge exit = single_dom_exit (data->current_loop);
877
878 if (!exit)
879 return NULL;
880
881 return niter_for_exit (data, exit);
882 }
883
884 /* Initializes data structures used by the iv optimization pass, stored
885 in DATA. */
886
887 static void
888 tree_ssa_iv_optimize_init (struct ivopts_data *data)
889 {
890 data->version_info_size = 2 * num_ssa_names;
891 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
892 data->relevant = BITMAP_ALLOC (NULL);
893 data->important_candidates = BITMAP_ALLOC (NULL);
894 data->max_inv_id = 0;
895 data->niters = NULL;
896 data->iv_uses.create (20);
897 data->iv_candidates.create (20);
898 data->inv_expr_tab = new hash_table<iv_inv_expr_hasher> (10);
899 data->inv_expr_id = 0;
900 data->name_expansion_cache = NULL;
901 decl_rtl_to_reset.create (20);
902 gcc_obstack_init (&data->iv_obstack);
903 }
904
905 /* Returns a memory object to that EXPR points. In case we are able to
906 determine that it does not point to any such object, NULL is returned. */
907
908 static tree
909 determine_base_object (tree expr)
910 {
911 enum tree_code code = TREE_CODE (expr);
912 tree base, obj;
913
914 /* If this is a pointer casted to any type, we need to determine
915 the base object for the pointer; so handle conversions before
916 throwing away non-pointer expressions. */
917 if (CONVERT_EXPR_P (expr))
918 return determine_base_object (TREE_OPERAND (expr, 0));
919
920 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
921 return NULL_TREE;
922
923 switch (code)
924 {
925 case INTEGER_CST:
926 return NULL_TREE;
927
928 case ADDR_EXPR:
929 obj = TREE_OPERAND (expr, 0);
930 base = get_base_address (obj);
931
932 if (!base)
933 return expr;
934
935 if (TREE_CODE (base) == MEM_REF)
936 return determine_base_object (TREE_OPERAND (base, 0));
937
938 return fold_convert (ptr_type_node,
939 build_fold_addr_expr (base));
940
941 case POINTER_PLUS_EXPR:
942 return determine_base_object (TREE_OPERAND (expr, 0));
943
944 case PLUS_EXPR:
945 case MINUS_EXPR:
946 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
947 gcc_unreachable ();
948
949 default:
950 return fold_convert (ptr_type_node, expr);
951 }
952 }
953
954 /* Return true if address expression with non-DECL_P operand appears
955 in EXPR. */
956
957 static bool
958 contain_complex_addr_expr (tree expr)
959 {
960 bool res = false;
961
962 STRIP_NOPS (expr);
963 switch (TREE_CODE (expr))
964 {
965 case POINTER_PLUS_EXPR:
966 case PLUS_EXPR:
967 case MINUS_EXPR:
968 res |= contain_complex_addr_expr (TREE_OPERAND (expr, 0));
969 res |= contain_complex_addr_expr (TREE_OPERAND (expr, 1));
970 break;
971
972 case ADDR_EXPR:
973 return (!DECL_P (TREE_OPERAND (expr, 0)));
974
975 default:
976 return false;
977 }
978
979 return res;
980 }
981
982 /* Allocates an induction variable with given initial value BASE and step STEP
983 for loop LOOP. NO_OVERFLOW implies the iv doesn't overflow. */
984
985 static struct iv *
986 alloc_iv (struct ivopts_data *data, tree base, tree step,
987 bool no_overflow = false)
988 {
989 tree expr = base;
990 struct iv *iv = (struct iv*) obstack_alloc (&data->iv_obstack,
991 sizeof (struct iv));
992 gcc_assert (step != NULL_TREE);
993
994 /* Lower address expression in base except ones with DECL_P as operand.
995 By doing this:
996 1) More accurate cost can be computed for address expressions;
997 2) Duplicate candidates won't be created for bases in different
998 forms, like &a[0] and &a. */
999 STRIP_NOPS (expr);
1000 if ((TREE_CODE (expr) == ADDR_EXPR && !DECL_P (TREE_OPERAND (expr, 0)))
1001 || contain_complex_addr_expr (expr))
1002 {
1003 aff_tree comb;
1004 tree_to_aff_combination (expr, TREE_TYPE (base), &comb);
1005 base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
1006 }
1007
1008 iv->base = base;
1009 iv->base_object = determine_base_object (base);
1010 iv->step = step;
1011 iv->biv_p = false;
1012 iv->have_use_for = false;
1013 iv->use_id = 0;
1014 iv->ssa_name = NULL_TREE;
1015 iv->no_overflow = no_overflow;
1016
1017 return iv;
1018 }
1019
1020 /* Sets STEP and BASE for induction variable IV. NO_OVERFLOW implies the IV
1021 doesn't overflow. */
1022
1023 static void
1024 set_iv (struct ivopts_data *data, tree iv, tree base, tree step,
1025 bool no_overflow)
1026 {
1027 struct version_info *info = name_info (data, iv);
1028
1029 gcc_assert (!info->iv);
1030
1031 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
1032 info->iv = alloc_iv (data, base, step, no_overflow);
1033 info->iv->ssa_name = iv;
1034 }
1035
1036 /* Finds induction variable declaration for VAR. */
1037
1038 static struct iv *
1039 get_iv (struct ivopts_data *data, tree var)
1040 {
1041 basic_block bb;
1042 tree type = TREE_TYPE (var);
1043
1044 if (!POINTER_TYPE_P (type)
1045 && !INTEGRAL_TYPE_P (type))
1046 return NULL;
1047
1048 if (!name_info (data, var)->iv)
1049 {
1050 bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1051
1052 if (!bb
1053 || !flow_bb_inside_loop_p (data->current_loop, bb))
1054 set_iv (data, var, var, build_int_cst (type, 0), true);
1055 }
1056
1057 return name_info (data, var)->iv;
1058 }
1059
1060 /* Return the first non-invariant ssa var found in EXPR. */
1061
1062 static tree
1063 extract_single_var_from_expr (tree expr)
1064 {
1065 int i, n;
1066 tree tmp;
1067 enum tree_code code;
1068
1069 if (!expr || is_gimple_min_invariant (expr))
1070 return NULL;
1071
1072 code = TREE_CODE (expr);
1073 if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1074 {
1075 n = TREE_OPERAND_LENGTH (expr);
1076 for (i = 0; i < n; i++)
1077 {
1078 tmp = extract_single_var_from_expr (TREE_OPERAND (expr, i));
1079
1080 if (tmp)
1081 return tmp;
1082 }
1083 }
1084 return (TREE_CODE (expr) == SSA_NAME) ? expr : NULL;
1085 }
1086
1087 /* Finds basic ivs. */
1088
1089 static bool
1090 find_bivs (struct ivopts_data *data)
1091 {
1092 gphi *phi;
1093 affine_iv iv;
1094 tree step, type, base, stop;
1095 bool found = false;
1096 struct loop *loop = data->current_loop;
1097 gphi_iterator psi;
1098
1099 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1100 {
1101 phi = psi.phi ();
1102
1103 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1104 continue;
1105
1106 if (virtual_operand_p (PHI_RESULT (phi)))
1107 continue;
1108
1109 if (!simple_iv (loop, loop, PHI_RESULT (phi), &iv, true))
1110 continue;
1111
1112 if (integer_zerop (iv.step))
1113 continue;
1114
1115 step = iv.step;
1116 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1117 /* Stop expanding iv base at the first ssa var referred by iv step.
1118 Ideally we should stop at any ssa var, because that's expensive
1119 and unusual to happen, we just do it on the first one.
1120
1121 See PR64705 for the rationale. */
1122 stop = extract_single_var_from_expr (step);
1123 base = expand_simple_operations (base, stop);
1124 if (contains_abnormal_ssa_name_p (base)
1125 || contains_abnormal_ssa_name_p (step))
1126 continue;
1127
1128 type = TREE_TYPE (PHI_RESULT (phi));
1129 base = fold_convert (type, base);
1130 if (step)
1131 {
1132 if (POINTER_TYPE_P (type))
1133 step = convert_to_ptrofftype (step);
1134 else
1135 step = fold_convert (type, step);
1136 }
1137
1138 set_iv (data, PHI_RESULT (phi), base, step, iv.no_overflow);
1139 found = true;
1140 }
1141
1142 return found;
1143 }
1144
1145 /* Marks basic ivs. */
1146
1147 static void
1148 mark_bivs (struct ivopts_data *data)
1149 {
1150 gphi *phi;
1151 gimple def;
1152 tree var;
1153 struct iv *iv, *incr_iv;
1154 struct loop *loop = data->current_loop;
1155 basic_block incr_bb;
1156 gphi_iterator psi;
1157
1158 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1159 {
1160 phi = psi.phi ();
1161
1162 iv = get_iv (data, PHI_RESULT (phi));
1163 if (!iv)
1164 continue;
1165
1166 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1167 def = SSA_NAME_DEF_STMT (var);
1168 /* Don't mark iv peeled from other one as biv. */
1169 if (def
1170 && gimple_code (def) == GIMPLE_PHI
1171 && gimple_bb (def) == loop->header)
1172 continue;
1173
1174 incr_iv = get_iv (data, var);
1175 if (!incr_iv)
1176 continue;
1177
1178 /* If the increment is in the subloop, ignore it. */
1179 incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1180 if (incr_bb->loop_father != data->current_loop
1181 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1182 continue;
1183
1184 iv->biv_p = true;
1185 incr_iv->biv_p = true;
1186 }
1187 }
1188
1189 /* Checks whether STMT defines a linear induction variable and stores its
1190 parameters to IV. */
1191
1192 static bool
1193 find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv)
1194 {
1195 tree lhs, stop;
1196 struct loop *loop = data->current_loop;
1197
1198 iv->base = NULL_TREE;
1199 iv->step = NULL_TREE;
1200
1201 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1202 return false;
1203
1204 lhs = gimple_assign_lhs (stmt);
1205 if (TREE_CODE (lhs) != SSA_NAME)
1206 return false;
1207
1208 if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1209 return false;
1210
1211 /* Stop expanding iv base at the first ssa var referred by iv step.
1212 Ideally we should stop at any ssa var, because that's expensive
1213 and unusual to happen, we just do it on the first one.
1214
1215 See PR64705 for the rationale. */
1216 stop = extract_single_var_from_expr (iv->step);
1217 iv->base = expand_simple_operations (iv->base, stop);
1218 if (contains_abnormal_ssa_name_p (iv->base)
1219 || contains_abnormal_ssa_name_p (iv->step))
1220 return false;
1221
1222 /* If STMT could throw, then do not consider STMT as defining a GIV.
1223 While this will suppress optimizations, we can not safely delete this
1224 GIV and associated statements, even if it appears it is not used. */
1225 if (stmt_could_throw_p (stmt))
1226 return false;
1227
1228 return true;
1229 }
1230
1231 /* Finds general ivs in statement STMT. */
1232
1233 static void
1234 find_givs_in_stmt (struct ivopts_data *data, gimple stmt)
1235 {
1236 affine_iv iv;
1237
1238 if (!find_givs_in_stmt_scev (data, stmt, &iv))
1239 return;
1240
1241 set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step, iv.no_overflow);
1242 }
1243
1244 /* Finds general ivs in basic block BB. */
1245
1246 static void
1247 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1248 {
1249 gimple_stmt_iterator bsi;
1250
1251 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1252 find_givs_in_stmt (data, gsi_stmt (bsi));
1253 }
1254
1255 /* Finds general ivs. */
1256
1257 static void
1258 find_givs (struct ivopts_data *data)
1259 {
1260 struct loop *loop = data->current_loop;
1261 basic_block *body = get_loop_body_in_dom_order (loop);
1262 unsigned i;
1263
1264 for (i = 0; i < loop->num_nodes; i++)
1265 find_givs_in_bb (data, body[i]);
1266 free (body);
1267 }
1268
1269 /* For each ssa name defined in LOOP determines whether it is an induction
1270 variable and if so, its initial value and step. */
1271
1272 static bool
1273 find_induction_variables (struct ivopts_data *data)
1274 {
1275 unsigned i;
1276 bitmap_iterator bi;
1277
1278 if (!find_bivs (data))
1279 return false;
1280
1281 find_givs (data);
1282 mark_bivs (data);
1283
1284 if (dump_file && (dump_flags & TDF_DETAILS))
1285 {
1286 struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
1287
1288 if (niter)
1289 {
1290 fprintf (dump_file, " number of iterations ");
1291 print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1292 if (!integer_zerop (niter->may_be_zero))
1293 {
1294 fprintf (dump_file, "; zero if ");
1295 print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1296 }
1297 fprintf (dump_file, "\n\n");
1298 };
1299
1300 fprintf (dump_file, "Induction variables:\n\n");
1301
1302 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1303 {
1304 if (ver_info (data, i)->iv)
1305 dump_iv (dump_file, ver_info (data, i)->iv, true);
1306 }
1307 }
1308
1309 return true;
1310 }
1311
1312 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.
1313 For address type use, ADDR_BASE is the stripped IV base, ADDR_OFFSET
1314 is the const offset stripped from IV base. For uses of other types,
1315 ADDR_BASE and ADDR_OFFSET are zero by default. */
1316
1317 static struct iv_use *
1318 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1319 gimple stmt, enum use_type use_type, tree addr_base = NULL,
1320 unsigned HOST_WIDE_INT addr_offset = 0)
1321 {
1322 struct iv_use *use = XCNEW (struct iv_use);
1323
1324 use->id = n_iv_uses (data);
1325 use->sub_id = 0;
1326 use->type = use_type;
1327 use->iv = iv;
1328 use->stmt = stmt;
1329 use->op_p = use_p;
1330 use->related_cands = BITMAP_ALLOC (NULL);
1331 use->next = NULL;
1332 use->addr_base = addr_base;
1333 use->addr_offset = addr_offset;
1334
1335 data->iv_uses.safe_push (use);
1336
1337 return use;
1338 }
1339
1340 /* Records a sub use of type USE_TYPE at *USE_P in STMT whose value is IV.
1341 The sub use is recorded under the one whose use id is ID_GROUP. */
1342
1343 static struct iv_use *
1344 record_sub_use (struct ivopts_data *data, tree *use_p,
1345 struct iv *iv, gimple stmt, enum use_type use_type,
1346 tree addr_base, unsigned HOST_WIDE_INT addr_offset,
1347 unsigned int id_group)
1348 {
1349 struct iv_use *use = XCNEW (struct iv_use);
1350 struct iv_use *group = iv_use (data, id_group);
1351
1352 use->id = group->id;
1353 use->sub_id = 0;
1354 use->type = use_type;
1355 use->iv = iv;
1356 use->stmt = stmt;
1357 use->op_p = use_p;
1358 use->related_cands = NULL;
1359 use->addr_base = addr_base;
1360 use->addr_offset = addr_offset;
1361
1362 /* Sub use list is maintained in offset ascending order. */
1363 if (addr_offset <= group->addr_offset)
1364 {
1365 use->related_cands = group->related_cands;
1366 group->related_cands = NULL;
1367 use->next = group;
1368 data->iv_uses[id_group] = use;
1369 }
1370 else
1371 {
1372 struct iv_use *pre;
1373 do
1374 {
1375 pre = group;
1376 group = group->next;
1377 }
1378 while (group && addr_offset > group->addr_offset);
1379 use->next = pre->next;
1380 pre->next = use;
1381 }
1382
1383 return use;
1384 }
1385
1386 /* Checks whether OP is a loop-level invariant and if so, records it.
1387 NONLINEAR_USE is true if the invariant is used in a way we do not
1388 handle specially. */
1389
1390 static void
1391 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1392 {
1393 basic_block bb;
1394 struct version_info *info;
1395
1396 if (TREE_CODE (op) != SSA_NAME
1397 || virtual_operand_p (op))
1398 return;
1399
1400 bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1401 if (bb
1402 && flow_bb_inside_loop_p (data->current_loop, bb))
1403 return;
1404
1405 info = name_info (data, op);
1406 info->name = op;
1407 info->has_nonlin_use |= nonlinear_use;
1408 if (!info->inv_id)
1409 info->inv_id = ++data->max_inv_id;
1410 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1411 }
1412
1413 /* Checks whether the use OP is interesting and if so, records it. */
1414
1415 static struct iv_use *
1416 find_interesting_uses_op (struct ivopts_data *data, tree op)
1417 {
1418 struct iv *iv;
1419 gimple stmt;
1420 struct iv_use *use;
1421
1422 if (TREE_CODE (op) != SSA_NAME)
1423 return NULL;
1424
1425 iv = get_iv (data, op);
1426 if (!iv)
1427 return NULL;
1428
1429 if (iv->have_use_for)
1430 {
1431 use = iv_use (data, iv->use_id);
1432
1433 gcc_assert (use->type == USE_NONLINEAR_EXPR);
1434 return use;
1435 }
1436
1437 if (integer_zerop (iv->step))
1438 {
1439 record_invariant (data, op, true);
1440 return NULL;
1441 }
1442 iv->have_use_for = true;
1443
1444 stmt = SSA_NAME_DEF_STMT (op);
1445 gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1446 || is_gimple_assign (stmt));
1447
1448 use = record_use (data, NULL, iv, stmt, USE_NONLINEAR_EXPR);
1449 iv->use_id = use->id;
1450
1451 return use;
1452 }
1453
1454 /* Given a condition in statement STMT, checks whether it is a compare
1455 of an induction variable and an invariant. If this is the case,
1456 CONTROL_VAR is set to location of the iv, BOUND to the location of
1457 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1458 induction variable descriptions, and true is returned. If this is not
1459 the case, CONTROL_VAR and BOUND are set to the arguments of the
1460 condition and false is returned. */
1461
1462 static bool
1463 extract_cond_operands (struct ivopts_data *data, gimple stmt,
1464 tree **control_var, tree **bound,
1465 struct iv **iv_var, struct iv **iv_bound)
1466 {
1467 /* The objects returned when COND has constant operands. */
1468 static struct iv const_iv;
1469 static tree zero;
1470 tree *op0 = &zero, *op1 = &zero;
1471 struct iv *iv0 = &const_iv, *iv1 = &const_iv;
1472 bool ret = false;
1473
1474 if (gimple_code (stmt) == GIMPLE_COND)
1475 {
1476 gcond *cond_stmt = as_a <gcond *> (stmt);
1477 op0 = gimple_cond_lhs_ptr (cond_stmt);
1478 op1 = gimple_cond_rhs_ptr (cond_stmt);
1479 }
1480 else
1481 {
1482 op0 = gimple_assign_rhs1_ptr (stmt);
1483 op1 = gimple_assign_rhs2_ptr (stmt);
1484 }
1485
1486 zero = integer_zero_node;
1487 const_iv.step = integer_zero_node;
1488
1489 if (TREE_CODE (*op0) == SSA_NAME)
1490 iv0 = get_iv (data, *op0);
1491 if (TREE_CODE (*op1) == SSA_NAME)
1492 iv1 = get_iv (data, *op1);
1493
1494 /* Exactly one of the compared values must be an iv, and the other one must
1495 be an invariant. */
1496 if (!iv0 || !iv1)
1497 goto end;
1498
1499 if (integer_zerop (iv0->step))
1500 {
1501 /* Control variable may be on the other side. */
1502 std::swap (op0, op1);
1503 std::swap (iv0, iv1);
1504 }
1505 ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1506
1507 end:
1508 if (control_var)
1509 *control_var = op0;
1510 if (iv_var)
1511 *iv_var = iv0;
1512 if (bound)
1513 *bound = op1;
1514 if (iv_bound)
1515 *iv_bound = iv1;
1516
1517 return ret;
1518 }
1519
1520 /* Checks whether the condition in STMT is interesting and if so,
1521 records it. */
1522
1523 static void
1524 find_interesting_uses_cond (struct ivopts_data *data, gimple stmt)
1525 {
1526 tree *var_p, *bound_p;
1527 struct iv *var_iv;
1528
1529 if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1530 {
1531 find_interesting_uses_op (data, *var_p);
1532 find_interesting_uses_op (data, *bound_p);
1533 return;
1534 }
1535
1536 record_use (data, NULL, var_iv, stmt, USE_COMPARE);
1537 }
1538
1539 /* Returns the outermost loop EXPR is obviously invariant in
1540 relative to the loop LOOP, i.e. if all its operands are defined
1541 outside of the returned loop. Returns NULL if EXPR is not
1542 even obviously invariant in LOOP. */
1543
1544 struct loop *
1545 outermost_invariant_loop_for_expr (struct loop *loop, tree expr)
1546 {
1547 basic_block def_bb;
1548 unsigned i, len;
1549
1550 if (is_gimple_min_invariant (expr))
1551 return current_loops->tree_root;
1552
1553 if (TREE_CODE (expr) == SSA_NAME)
1554 {
1555 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1556 if (def_bb)
1557 {
1558 if (flow_bb_inside_loop_p (loop, def_bb))
1559 return NULL;
1560 return superloop_at_depth (loop,
1561 loop_depth (def_bb->loop_father) + 1);
1562 }
1563
1564 return current_loops->tree_root;
1565 }
1566
1567 if (!EXPR_P (expr))
1568 return NULL;
1569
1570 unsigned maxdepth = 0;
1571 len = TREE_OPERAND_LENGTH (expr);
1572 for (i = 0; i < len; i++)
1573 {
1574 struct loop *ivloop;
1575 if (!TREE_OPERAND (expr, i))
1576 continue;
1577
1578 ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1579 if (!ivloop)
1580 return NULL;
1581 maxdepth = MAX (maxdepth, loop_depth (ivloop));
1582 }
1583
1584 return superloop_at_depth (loop, maxdepth);
1585 }
1586
1587 /* Returns true if expression EXPR is obviously invariant in LOOP,
1588 i.e. if all its operands are defined outside of the LOOP. LOOP
1589 should not be the function body. */
1590
1591 bool
1592 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1593 {
1594 basic_block def_bb;
1595 unsigned i, len;
1596
1597 gcc_assert (loop_depth (loop) > 0);
1598
1599 if (is_gimple_min_invariant (expr))
1600 return true;
1601
1602 if (TREE_CODE (expr) == SSA_NAME)
1603 {
1604 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1605 if (def_bb
1606 && flow_bb_inside_loop_p (loop, def_bb))
1607 return false;
1608
1609 return true;
1610 }
1611
1612 if (!EXPR_P (expr))
1613 return false;
1614
1615 len = TREE_OPERAND_LENGTH (expr);
1616 for (i = 0; i < len; i++)
1617 if (TREE_OPERAND (expr, i)
1618 && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1619 return false;
1620
1621 return true;
1622 }
1623
1624 /* Cumulates the steps of indices into DATA and replaces their values with the
1625 initial ones. Returns false when the value of the index cannot be determined.
1626 Callback for for_each_index. */
1627
1628 struct ifs_ivopts_data
1629 {
1630 struct ivopts_data *ivopts_data;
1631 gimple stmt;
1632 tree step;
1633 };
1634
1635 static bool
1636 idx_find_step (tree base, tree *idx, void *data)
1637 {
1638 struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1639 struct iv *iv;
1640 bool use_overflow_semantics = false;
1641 tree step, iv_base, iv_step, lbound, off;
1642 struct loop *loop = dta->ivopts_data->current_loop;
1643
1644 /* If base is a component ref, require that the offset of the reference
1645 be invariant. */
1646 if (TREE_CODE (base) == COMPONENT_REF)
1647 {
1648 off = component_ref_field_offset (base);
1649 return expr_invariant_in_loop_p (loop, off);
1650 }
1651
1652 /* If base is array, first check whether we will be able to move the
1653 reference out of the loop (in order to take its address in strength
1654 reduction). In order for this to work we need both lower bound
1655 and step to be loop invariants. */
1656 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1657 {
1658 /* Moreover, for a range, the size needs to be invariant as well. */
1659 if (TREE_CODE (base) == ARRAY_RANGE_REF
1660 && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1661 return false;
1662
1663 step = array_ref_element_size (base);
1664 lbound = array_ref_low_bound (base);
1665
1666 if (!expr_invariant_in_loop_p (loop, step)
1667 || !expr_invariant_in_loop_p (loop, lbound))
1668 return false;
1669 }
1670
1671 if (TREE_CODE (*idx) != SSA_NAME)
1672 return true;
1673
1674 iv = get_iv (dta->ivopts_data, *idx);
1675 if (!iv)
1676 return false;
1677
1678 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1679 *&x[0], which is not folded and does not trigger the
1680 ARRAY_REF path below. */
1681 *idx = iv->base;
1682
1683 if (integer_zerop (iv->step))
1684 return true;
1685
1686 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1687 {
1688 step = array_ref_element_size (base);
1689
1690 /* We only handle addresses whose step is an integer constant. */
1691 if (TREE_CODE (step) != INTEGER_CST)
1692 return false;
1693 }
1694 else
1695 /* The step for pointer arithmetics already is 1 byte. */
1696 step = size_one_node;
1697
1698 iv_base = iv->base;
1699 iv_step = iv->step;
1700 if (iv->no_overflow && nowrap_type_p (TREE_TYPE (iv_step)))
1701 use_overflow_semantics = true;
1702
1703 if (!convert_affine_scev (dta->ivopts_data->current_loop,
1704 sizetype, &iv_base, &iv_step, dta->stmt,
1705 use_overflow_semantics))
1706 {
1707 /* The index might wrap. */
1708 return false;
1709 }
1710
1711 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1712 dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1713
1714 return true;
1715 }
1716
1717 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1718 object is passed to it in DATA. */
1719
1720 static bool
1721 idx_record_use (tree base, tree *idx,
1722 void *vdata)
1723 {
1724 struct ivopts_data *data = (struct ivopts_data *) vdata;
1725 find_interesting_uses_op (data, *idx);
1726 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1727 {
1728 find_interesting_uses_op (data, array_ref_element_size (base));
1729 find_interesting_uses_op (data, array_ref_low_bound (base));
1730 }
1731 return true;
1732 }
1733
1734 /* If we can prove that TOP = cst * BOT for some constant cst,
1735 store cst to MUL and return true. Otherwise return false.
1736 The returned value is always sign-extended, regardless of the
1737 signedness of TOP and BOT. */
1738
1739 static bool
1740 constant_multiple_of (tree top, tree bot, widest_int *mul)
1741 {
1742 tree mby;
1743 enum tree_code code;
1744 unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1745 widest_int res, p0, p1;
1746
1747 STRIP_NOPS (top);
1748 STRIP_NOPS (bot);
1749
1750 if (operand_equal_p (top, bot, 0))
1751 {
1752 *mul = 1;
1753 return true;
1754 }
1755
1756 code = TREE_CODE (top);
1757 switch (code)
1758 {
1759 case MULT_EXPR:
1760 mby = TREE_OPERAND (top, 1);
1761 if (TREE_CODE (mby) != INTEGER_CST)
1762 return false;
1763
1764 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1765 return false;
1766
1767 *mul = wi::sext (res * wi::to_widest (mby), precision);
1768 return true;
1769
1770 case PLUS_EXPR:
1771 case MINUS_EXPR:
1772 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1773 || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1774 return false;
1775
1776 if (code == MINUS_EXPR)
1777 p1 = -p1;
1778 *mul = wi::sext (p0 + p1, precision);
1779 return true;
1780
1781 case INTEGER_CST:
1782 if (TREE_CODE (bot) != INTEGER_CST)
1783 return false;
1784
1785 p0 = widest_int::from (top, SIGNED);
1786 p1 = widest_int::from (bot, SIGNED);
1787 if (p1 == 0)
1788 return false;
1789 *mul = wi::sext (wi::divmod_trunc (p0, p1, SIGNED, &res), precision);
1790 return res == 0;
1791
1792 default:
1793 return false;
1794 }
1795 }
1796
1797 /* Return true if memory reference REF with step STEP may be unaligned. */
1798
1799 static bool
1800 may_be_unaligned_p (tree ref, tree step)
1801 {
1802 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1803 thus they are not misaligned. */
1804 if (TREE_CODE (ref) == TARGET_MEM_REF)
1805 return false;
1806
1807 unsigned int align = TYPE_ALIGN (TREE_TYPE (ref));
1808 if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))) > align)
1809 align = GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref)));
1810
1811 unsigned HOST_WIDE_INT bitpos;
1812 unsigned int ref_align;
1813 get_object_alignment_1 (ref, &ref_align, &bitpos);
1814 if (ref_align < align
1815 || (bitpos % align) != 0
1816 || (bitpos % BITS_PER_UNIT) != 0)
1817 return true;
1818
1819 unsigned int trailing_zeros = tree_ctz (step);
1820 if (trailing_zeros < HOST_BITS_PER_INT
1821 && (1U << trailing_zeros) * BITS_PER_UNIT < align)
1822 return true;
1823
1824 return false;
1825 }
1826
1827 /* Return true if EXPR may be non-addressable. */
1828
1829 bool
1830 may_be_nonaddressable_p (tree expr)
1831 {
1832 switch (TREE_CODE (expr))
1833 {
1834 case TARGET_MEM_REF:
1835 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1836 target, thus they are always addressable. */
1837 return false;
1838
1839 case COMPONENT_REF:
1840 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1841 || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1842
1843 case VIEW_CONVERT_EXPR:
1844 /* This kind of view-conversions may wrap non-addressable objects
1845 and make them look addressable. After some processing the
1846 non-addressability may be uncovered again, causing ADDR_EXPRs
1847 of inappropriate objects to be built. */
1848 if (is_gimple_reg (TREE_OPERAND (expr, 0))
1849 || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
1850 return true;
1851
1852 /* ... fall through ... */
1853
1854 case ARRAY_REF:
1855 case ARRAY_RANGE_REF:
1856 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1857
1858 CASE_CONVERT:
1859 return true;
1860
1861 default:
1862 break;
1863 }
1864
1865 return false;
1866 }
1867
1868 static tree
1869 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset);
1870
1871 /* Record a use of type USE_TYPE at *USE_P in STMT whose value is IV.
1872 If there is an existing use which has same stripped iv base and step,
1873 this function records this one as a sub use to that; otherwise records
1874 it as a normal one. */
1875
1876 static struct iv_use *
1877 record_group_use (struct ivopts_data *data, tree *use_p,
1878 struct iv *iv, gimple stmt, enum use_type use_type)
1879 {
1880 unsigned int i;
1881 struct iv_use *use;
1882 tree addr_base;
1883 unsigned HOST_WIDE_INT addr_offset;
1884
1885 /* Only support sub use for address type uses, that is, with base
1886 object. */
1887 if (!iv->base_object)
1888 return record_use (data, use_p, iv, stmt, use_type);
1889
1890 addr_base = strip_offset (iv->base, &addr_offset);
1891 for (i = 0; i < n_iv_uses (data); i++)
1892 {
1893 use = iv_use (data, i);
1894 if (use->type != USE_ADDRESS || !use->iv->base_object)
1895 continue;
1896
1897 /* Check if it has the same stripped base and step. */
1898 if (operand_equal_p (iv->base_object, use->iv->base_object, 0)
1899 && operand_equal_p (iv->step, use->iv->step, 0)
1900 && operand_equal_p (addr_base, use->addr_base, 0))
1901 break;
1902 }
1903
1904 if (i == n_iv_uses (data))
1905 return record_use (data, use_p, iv, stmt,
1906 use_type, addr_base, addr_offset);
1907 else
1908 return record_sub_use (data, use_p, iv, stmt,
1909 use_type, addr_base, addr_offset, i);
1910 }
1911
1912 /* Finds addresses in *OP_P inside STMT. */
1913
1914 static void
1915 find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p)
1916 {
1917 tree base = *op_p, step = size_zero_node;
1918 struct iv *civ;
1919 struct ifs_ivopts_data ifs_ivopts_data;
1920
1921 /* Do not play with volatile memory references. A bit too conservative,
1922 perhaps, but safe. */
1923 if (gimple_has_volatile_ops (stmt))
1924 goto fail;
1925
1926 /* Ignore bitfields for now. Not really something terribly complicated
1927 to handle. TODO. */
1928 if (TREE_CODE (base) == BIT_FIELD_REF)
1929 goto fail;
1930
1931 base = unshare_expr (base);
1932
1933 if (TREE_CODE (base) == TARGET_MEM_REF)
1934 {
1935 tree type = build_pointer_type (TREE_TYPE (base));
1936 tree astep;
1937
1938 if (TMR_BASE (base)
1939 && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1940 {
1941 civ = get_iv (data, TMR_BASE (base));
1942 if (!civ)
1943 goto fail;
1944
1945 TMR_BASE (base) = civ->base;
1946 step = civ->step;
1947 }
1948 if (TMR_INDEX2 (base)
1949 && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
1950 {
1951 civ = get_iv (data, TMR_INDEX2 (base));
1952 if (!civ)
1953 goto fail;
1954
1955 TMR_INDEX2 (base) = civ->base;
1956 step = civ->step;
1957 }
1958 if (TMR_INDEX (base)
1959 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1960 {
1961 civ = get_iv (data, TMR_INDEX (base));
1962 if (!civ)
1963 goto fail;
1964
1965 TMR_INDEX (base) = civ->base;
1966 astep = civ->step;
1967
1968 if (astep)
1969 {
1970 if (TMR_STEP (base))
1971 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1972
1973 step = fold_build2 (PLUS_EXPR, type, step, astep);
1974 }
1975 }
1976
1977 if (integer_zerop (step))
1978 goto fail;
1979 base = tree_mem_ref_addr (type, base);
1980 }
1981 else
1982 {
1983 ifs_ivopts_data.ivopts_data = data;
1984 ifs_ivopts_data.stmt = stmt;
1985 ifs_ivopts_data.step = size_zero_node;
1986 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1987 || integer_zerop (ifs_ivopts_data.step))
1988 goto fail;
1989 step = ifs_ivopts_data.step;
1990
1991 /* Check that the base expression is addressable. This needs
1992 to be done after substituting bases of IVs into it. */
1993 if (may_be_nonaddressable_p (base))
1994 goto fail;
1995
1996 /* Moreover, on strict alignment platforms, check that it is
1997 sufficiently aligned. */
1998 if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
1999 goto fail;
2000
2001 base = build_fold_addr_expr (base);
2002
2003 /* Substituting bases of IVs into the base expression might
2004 have caused folding opportunities. */
2005 if (TREE_CODE (base) == ADDR_EXPR)
2006 {
2007 tree *ref = &TREE_OPERAND (base, 0);
2008 while (handled_component_p (*ref))
2009 ref = &TREE_OPERAND (*ref, 0);
2010 if (TREE_CODE (*ref) == MEM_REF)
2011 {
2012 tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
2013 TREE_OPERAND (*ref, 0),
2014 TREE_OPERAND (*ref, 1));
2015 if (tem)
2016 *ref = tem;
2017 }
2018 }
2019 }
2020
2021 civ = alloc_iv (data, base, step);
2022 record_group_use (data, op_p, civ, stmt, USE_ADDRESS);
2023 return;
2024
2025 fail:
2026 for_each_index (op_p, idx_record_use, data);
2027 }
2028
2029 /* Finds and records invariants used in STMT. */
2030
2031 static void
2032 find_invariants_stmt (struct ivopts_data *data, gimple stmt)
2033 {
2034 ssa_op_iter iter;
2035 use_operand_p use_p;
2036 tree op;
2037
2038 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2039 {
2040 op = USE_FROM_PTR (use_p);
2041 record_invariant (data, op, false);
2042 }
2043 }
2044
2045 /* Finds interesting uses of induction variables in the statement STMT. */
2046
2047 static void
2048 find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt)
2049 {
2050 struct iv *iv;
2051 tree op, *lhs, *rhs;
2052 ssa_op_iter iter;
2053 use_operand_p use_p;
2054 enum tree_code code;
2055
2056 find_invariants_stmt (data, stmt);
2057
2058 if (gimple_code (stmt) == GIMPLE_COND)
2059 {
2060 find_interesting_uses_cond (data, stmt);
2061 return;
2062 }
2063
2064 if (is_gimple_assign (stmt))
2065 {
2066 lhs = gimple_assign_lhs_ptr (stmt);
2067 rhs = gimple_assign_rhs1_ptr (stmt);
2068
2069 if (TREE_CODE (*lhs) == SSA_NAME)
2070 {
2071 /* If the statement defines an induction variable, the uses are not
2072 interesting by themselves. */
2073
2074 iv = get_iv (data, *lhs);
2075
2076 if (iv && !integer_zerop (iv->step))
2077 return;
2078 }
2079
2080 code = gimple_assign_rhs_code (stmt);
2081 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
2082 && (REFERENCE_CLASS_P (*rhs)
2083 || is_gimple_val (*rhs)))
2084 {
2085 if (REFERENCE_CLASS_P (*rhs))
2086 find_interesting_uses_address (data, stmt, rhs);
2087 else
2088 find_interesting_uses_op (data, *rhs);
2089
2090 if (REFERENCE_CLASS_P (*lhs))
2091 find_interesting_uses_address (data, stmt, lhs);
2092 return;
2093 }
2094 else if (TREE_CODE_CLASS (code) == tcc_comparison)
2095 {
2096 find_interesting_uses_cond (data, stmt);
2097 return;
2098 }
2099
2100 /* TODO -- we should also handle address uses of type
2101
2102 memory = call (whatever);
2103
2104 and
2105
2106 call (memory). */
2107 }
2108
2109 if (gimple_code (stmt) == GIMPLE_PHI
2110 && gimple_bb (stmt) == data->current_loop->header)
2111 {
2112 iv = get_iv (data, PHI_RESULT (stmt));
2113
2114 if (iv && !integer_zerop (iv->step))
2115 return;
2116 }
2117
2118 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2119 {
2120 op = USE_FROM_PTR (use_p);
2121
2122 if (TREE_CODE (op) != SSA_NAME)
2123 continue;
2124
2125 iv = get_iv (data, op);
2126 if (!iv)
2127 continue;
2128
2129 find_interesting_uses_op (data, op);
2130 }
2131 }
2132
2133 /* Finds interesting uses of induction variables outside of loops
2134 on loop exit edge EXIT. */
2135
2136 static void
2137 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
2138 {
2139 gphi *phi;
2140 gphi_iterator psi;
2141 tree def;
2142
2143 for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
2144 {
2145 phi = psi.phi ();
2146 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2147 if (!virtual_operand_p (def))
2148 find_interesting_uses_op (data, def);
2149 }
2150 }
2151
2152 /* Finds uses of the induction variables that are interesting. */
2153
2154 static void
2155 find_interesting_uses (struct ivopts_data *data)
2156 {
2157 basic_block bb;
2158 gimple_stmt_iterator bsi;
2159 basic_block *body = get_loop_body (data->current_loop);
2160 unsigned i;
2161 struct version_info *info;
2162 edge e;
2163
2164 if (dump_file && (dump_flags & TDF_DETAILS))
2165 fprintf (dump_file, "Uses:\n\n");
2166
2167 for (i = 0; i < data->current_loop->num_nodes; i++)
2168 {
2169 edge_iterator ei;
2170 bb = body[i];
2171
2172 FOR_EACH_EDGE (e, ei, bb->succs)
2173 if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2174 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2175 find_interesting_uses_outside (data, e);
2176
2177 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2178 find_interesting_uses_stmt (data, gsi_stmt (bsi));
2179 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2180 if (!is_gimple_debug (gsi_stmt (bsi)))
2181 find_interesting_uses_stmt (data, gsi_stmt (bsi));
2182 }
2183
2184 if (dump_file && (dump_flags & TDF_DETAILS))
2185 {
2186 bitmap_iterator bi;
2187
2188 fprintf (dump_file, "\n");
2189
2190 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2191 {
2192 info = ver_info (data, i);
2193 if (info->inv_id)
2194 {
2195 fprintf (dump_file, " ");
2196 print_generic_expr (dump_file, info->name, TDF_SLIM);
2197 fprintf (dump_file, " is invariant (%d)%s\n",
2198 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
2199 }
2200 }
2201
2202 fprintf (dump_file, "\n");
2203 }
2204
2205 free (body);
2206 }
2207
2208 /* Compute maximum offset of [base + offset] addressing mode
2209 for memory reference represented by USE. */
2210
2211 static HOST_WIDE_INT
2212 compute_max_addr_offset (struct iv_use *use)
2213 {
2214 int width;
2215 rtx reg, addr;
2216 HOST_WIDE_INT i, off;
2217 unsigned list_index, num;
2218 addr_space_t as;
2219 machine_mode mem_mode, addr_mode;
2220 static vec<HOST_WIDE_INT> max_offset_list;
2221
2222 as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
2223 mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2224
2225 num = max_offset_list.length ();
2226 list_index = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
2227 if (list_index >= num)
2228 {
2229 max_offset_list.safe_grow (list_index + MAX_MACHINE_MODE);
2230 for (; num < max_offset_list.length (); num++)
2231 max_offset_list[num] = -1;
2232 }
2233
2234 off = max_offset_list[list_index];
2235 if (off != -1)
2236 return off;
2237
2238 addr_mode = targetm.addr_space.address_mode (as);
2239 reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
2240 addr = gen_rtx_fmt_ee (PLUS, addr_mode, reg, NULL_RTX);
2241
2242 width = GET_MODE_BITSIZE (addr_mode) - 1;
2243 if (width > (HOST_BITS_PER_WIDE_INT - 1))
2244 width = HOST_BITS_PER_WIDE_INT - 1;
2245
2246 for (i = width; i > 0; i--)
2247 {
2248 off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
2249 XEXP (addr, 1) = gen_int_mode (off, addr_mode);
2250 if (memory_address_addr_space_p (mem_mode, addr, as))
2251 break;
2252
2253 /* For some strict-alignment targets, the offset must be naturally
2254 aligned. Try an aligned offset if mem_mode is not QImode. */
2255 off = ((unsigned HOST_WIDE_INT) 1 << i);
2256 if (off > GET_MODE_SIZE (mem_mode) && mem_mode != QImode)
2257 {
2258 off -= GET_MODE_SIZE (mem_mode);
2259 XEXP (addr, 1) = gen_int_mode (off, addr_mode);
2260 if (memory_address_addr_space_p (mem_mode, addr, as))
2261 break;
2262 }
2263 }
2264 if (i == 0)
2265 off = 0;
2266
2267 max_offset_list[list_index] = off;
2268 return off;
2269 }
2270
2271 /* Check if all small groups should be split. Return true if and
2272 only if:
2273
2274 1) At least one groups contain two uses with different offsets.
2275 2) No group contains more than two uses with different offsets.
2276
2277 Return false otherwise. We want to split such groups because:
2278
2279 1) Small groups don't have much benefit and may interfer with
2280 general candidate selection.
2281 2) Size for problem with only small groups is usually small and
2282 general algorithm can handle it well.
2283
2284 TODO -- Above claim may not hold when auto increment is supported. */
2285
2286 static bool
2287 split_all_small_groups (struct ivopts_data *data)
2288 {
2289 bool split_p = false;
2290 unsigned int i, n, distinct;
2291 struct iv_use *pre, *use;
2292
2293 n = n_iv_uses (data);
2294 for (i = 0; i < n; i++)
2295 {
2296 use = iv_use (data, i);
2297 if (!use->next)
2298 continue;
2299
2300 distinct = 1;
2301 gcc_assert (use->type == USE_ADDRESS);
2302 for (pre = use, use = use->next; use; pre = use, use = use->next)
2303 {
2304 if (pre->addr_offset != use->addr_offset)
2305 distinct++;
2306
2307 if (distinct > 2)
2308 return false;
2309 }
2310 if (distinct == 2)
2311 split_p = true;
2312 }
2313
2314 return split_p;
2315 }
2316
2317 /* For each group of address type uses, this function further groups
2318 these uses according to the maximum offset supported by target's
2319 [base + offset] addressing mode. */
2320
2321 static void
2322 group_address_uses (struct ivopts_data *data)
2323 {
2324 HOST_WIDE_INT max_offset = -1;
2325 unsigned int i, n, sub_id;
2326 struct iv_use *pre, *use;
2327 unsigned HOST_WIDE_INT addr_offset_first;
2328
2329 /* Reset max offset to split all small groups. */
2330 if (split_all_small_groups (data))
2331 max_offset = 0;
2332
2333 n = n_iv_uses (data);
2334 for (i = 0; i < n; i++)
2335 {
2336 use = iv_use (data, i);
2337 if (!use->next)
2338 continue;
2339
2340 gcc_assert (use->type == USE_ADDRESS);
2341 if (max_offset != 0)
2342 max_offset = compute_max_addr_offset (use);
2343
2344 while (use)
2345 {
2346 sub_id = 0;
2347 addr_offset_first = use->addr_offset;
2348 /* Only uses with offset that can fit in offset part against
2349 the first use can be grouped together. */
2350 for (pre = use, use = use->next;
2351 use && (use->addr_offset - addr_offset_first
2352 <= (unsigned HOST_WIDE_INT) max_offset);
2353 pre = use, use = use->next)
2354 {
2355 use->id = pre->id;
2356 use->sub_id = ++sub_id;
2357 }
2358
2359 /* Break the list and create new group. */
2360 if (use)
2361 {
2362 pre->next = NULL;
2363 use->id = n_iv_uses (data);
2364 use->related_cands = BITMAP_ALLOC (NULL);
2365 data->iv_uses.safe_push (use);
2366 }
2367 }
2368 }
2369
2370 if (dump_file && (dump_flags & TDF_DETAILS))
2371 dump_uses (dump_file, data);
2372 }
2373
2374 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
2375 is true, assume we are inside an address. If TOP_COMPREF is true, assume
2376 we are at the top-level of the processed address. */
2377
2378 static tree
2379 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2380 HOST_WIDE_INT *offset)
2381 {
2382 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2383 enum tree_code code;
2384 tree type, orig_type = TREE_TYPE (expr);
2385 HOST_WIDE_INT off0, off1, st;
2386 tree orig_expr = expr;
2387
2388 STRIP_NOPS (expr);
2389
2390 type = TREE_TYPE (expr);
2391 code = TREE_CODE (expr);
2392 *offset = 0;
2393
2394 switch (code)
2395 {
2396 case INTEGER_CST:
2397 if (!cst_and_fits_in_hwi (expr)
2398 || integer_zerop (expr))
2399 return orig_expr;
2400
2401 *offset = int_cst_value (expr);
2402 return build_int_cst (orig_type, 0);
2403
2404 case POINTER_PLUS_EXPR:
2405 case PLUS_EXPR:
2406 case MINUS_EXPR:
2407 op0 = TREE_OPERAND (expr, 0);
2408 op1 = TREE_OPERAND (expr, 1);
2409
2410 op0 = strip_offset_1 (op0, false, false, &off0);
2411 op1 = strip_offset_1 (op1, false, false, &off1);
2412
2413 *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2414 if (op0 == TREE_OPERAND (expr, 0)
2415 && op1 == TREE_OPERAND (expr, 1))
2416 return orig_expr;
2417
2418 if (integer_zerop (op1))
2419 expr = op0;
2420 else if (integer_zerop (op0))
2421 {
2422 if (code == MINUS_EXPR)
2423 expr = fold_build1 (NEGATE_EXPR, type, op1);
2424 else
2425 expr = op1;
2426 }
2427 else
2428 expr = fold_build2 (code, type, op0, op1);
2429
2430 return fold_convert (orig_type, expr);
2431
2432 case MULT_EXPR:
2433 op1 = TREE_OPERAND (expr, 1);
2434 if (!cst_and_fits_in_hwi (op1))
2435 return orig_expr;
2436
2437 op0 = TREE_OPERAND (expr, 0);
2438 op0 = strip_offset_1 (op0, false, false, &off0);
2439 if (op0 == TREE_OPERAND (expr, 0))
2440 return orig_expr;
2441
2442 *offset = off0 * int_cst_value (op1);
2443 if (integer_zerop (op0))
2444 expr = op0;
2445 else
2446 expr = fold_build2 (MULT_EXPR, type, op0, op1);
2447
2448 return fold_convert (orig_type, expr);
2449
2450 case ARRAY_REF:
2451 case ARRAY_RANGE_REF:
2452 if (!inside_addr)
2453 return orig_expr;
2454
2455 step = array_ref_element_size (expr);
2456 if (!cst_and_fits_in_hwi (step))
2457 break;
2458
2459 st = int_cst_value (step);
2460 op1 = TREE_OPERAND (expr, 1);
2461 op1 = strip_offset_1 (op1, false, false, &off1);
2462 *offset = off1 * st;
2463
2464 if (top_compref
2465 && integer_zerop (op1))
2466 {
2467 /* Strip the component reference completely. */
2468 op0 = TREE_OPERAND (expr, 0);
2469 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2470 *offset += off0;
2471 return op0;
2472 }
2473 break;
2474
2475 case COMPONENT_REF:
2476 {
2477 tree field;
2478
2479 if (!inside_addr)
2480 return orig_expr;
2481
2482 tmp = component_ref_field_offset (expr);
2483 field = TREE_OPERAND (expr, 1);
2484 if (top_compref
2485 && cst_and_fits_in_hwi (tmp)
2486 && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2487 {
2488 HOST_WIDE_INT boffset, abs_off;
2489
2490 /* Strip the component reference completely. */
2491 op0 = TREE_OPERAND (expr, 0);
2492 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2493 boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2494 abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2495 if (boffset < 0)
2496 abs_off = -abs_off;
2497
2498 *offset = off0 + int_cst_value (tmp) + abs_off;
2499 return op0;
2500 }
2501 }
2502 break;
2503
2504 case ADDR_EXPR:
2505 op0 = TREE_OPERAND (expr, 0);
2506 op0 = strip_offset_1 (op0, true, true, &off0);
2507 *offset += off0;
2508
2509 if (op0 == TREE_OPERAND (expr, 0))
2510 return orig_expr;
2511
2512 expr = build_fold_addr_expr (op0);
2513 return fold_convert (orig_type, expr);
2514
2515 case MEM_REF:
2516 /* ??? Offset operand? */
2517 inside_addr = false;
2518 break;
2519
2520 default:
2521 return orig_expr;
2522 }
2523
2524 /* Default handling of expressions for that we want to recurse into
2525 the first operand. */
2526 op0 = TREE_OPERAND (expr, 0);
2527 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2528 *offset += off0;
2529
2530 if (op0 == TREE_OPERAND (expr, 0)
2531 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2532 return orig_expr;
2533
2534 expr = copy_node (expr);
2535 TREE_OPERAND (expr, 0) = op0;
2536 if (op1)
2537 TREE_OPERAND (expr, 1) = op1;
2538
2539 /* Inside address, we might strip the top level component references,
2540 thus changing type of the expression. Handling of ADDR_EXPR
2541 will fix that. */
2542 expr = fold_convert (orig_type, expr);
2543
2544 return expr;
2545 }
2546
2547 /* Strips constant offsets from EXPR and stores them to OFFSET. */
2548
2549 static tree
2550 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2551 {
2552 HOST_WIDE_INT off;
2553 tree core = strip_offset_1 (expr, false, false, &off);
2554 *offset = off;
2555 return core;
2556 }
2557
2558 /* Returns variant of TYPE that can be used as base for different uses.
2559 We return unsigned type with the same precision, which avoids problems
2560 with overflows. */
2561
2562 static tree
2563 generic_type_for (tree type)
2564 {
2565 if (POINTER_TYPE_P (type))
2566 return unsigned_type_for (type);
2567
2568 if (TYPE_UNSIGNED (type))
2569 return type;
2570
2571 return unsigned_type_for (type);
2572 }
2573
2574 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2575 the bitmap to that we should store it. */
2576
2577 static struct ivopts_data *fd_ivopts_data;
2578 static tree
2579 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2580 {
2581 bitmap *depends_on = (bitmap *) data;
2582 struct version_info *info;
2583
2584 if (TREE_CODE (*expr_p) != SSA_NAME)
2585 return NULL_TREE;
2586 info = name_info (fd_ivopts_data, *expr_p);
2587
2588 if (!info->inv_id || info->has_nonlin_use)
2589 return NULL_TREE;
2590
2591 if (!*depends_on)
2592 *depends_on = BITMAP_ALLOC (NULL);
2593 bitmap_set_bit (*depends_on, info->inv_id);
2594
2595 return NULL_TREE;
2596 }
2597
2598 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2599 position to POS. If USE is not NULL, the candidate is set as related to
2600 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
2601 replacement of the final value of the iv by a direct computation. */
2602
2603 static struct iv_cand *
2604 add_candidate_1 (struct ivopts_data *data,
2605 tree base, tree step, bool important, enum iv_position pos,
2606 struct iv_use *use, gimple incremented_at)
2607 {
2608 unsigned i;
2609 struct iv_cand *cand = NULL;
2610 tree type, orig_type;
2611
2612 /* For non-original variables, make sure their values are computed in a type
2613 that does not invoke undefined behavior on overflows (since in general,
2614 we cannot prove that these induction variables are non-wrapping). */
2615 if (pos != IP_ORIGINAL)
2616 {
2617 orig_type = TREE_TYPE (base);
2618 type = generic_type_for (orig_type);
2619 if (type != orig_type)
2620 {
2621 base = fold_convert (type, base);
2622 step = fold_convert (type, step);
2623 }
2624 }
2625
2626 for (i = 0; i < n_iv_cands (data); i++)
2627 {
2628 cand = iv_cand (data, i);
2629
2630 if (cand->pos != pos)
2631 continue;
2632
2633 if (cand->incremented_at != incremented_at
2634 || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2635 && cand->ainc_use != use))
2636 continue;
2637
2638 if (!cand->iv)
2639 {
2640 if (!base && !step)
2641 break;
2642
2643 continue;
2644 }
2645
2646 if (!base && !step)
2647 continue;
2648
2649 if (operand_equal_p (base, cand->iv->base, 0)
2650 && operand_equal_p (step, cand->iv->step, 0)
2651 && (TYPE_PRECISION (TREE_TYPE (base))
2652 == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
2653 break;
2654 }
2655
2656 if (i == n_iv_cands (data))
2657 {
2658 cand = XCNEW (struct iv_cand);
2659 cand->id = i;
2660
2661 if (!base && !step)
2662 cand->iv = NULL;
2663 else
2664 cand->iv = alloc_iv (data, base, step);
2665
2666 cand->pos = pos;
2667 if (pos != IP_ORIGINAL && cand->iv)
2668 {
2669 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2670 cand->var_after = cand->var_before;
2671 }
2672 cand->important = important;
2673 cand->incremented_at = incremented_at;
2674 data->iv_candidates.safe_push (cand);
2675
2676 if (step
2677 && TREE_CODE (step) != INTEGER_CST)
2678 {
2679 fd_ivopts_data = data;
2680 walk_tree (&step, find_depends, &cand->depends_on, NULL);
2681 }
2682
2683 if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2684 cand->ainc_use = use;
2685 else
2686 cand->ainc_use = NULL;
2687
2688 if (dump_file && (dump_flags & TDF_DETAILS))
2689 dump_cand (dump_file, cand);
2690 }
2691
2692 if (important && !cand->important)
2693 {
2694 cand->important = true;
2695 if (dump_file && (dump_flags & TDF_DETAILS))
2696 fprintf (dump_file, "Candidate %d is important\n", cand->id);
2697 }
2698
2699 if (use)
2700 {
2701 bitmap_set_bit (use->related_cands, i);
2702 if (dump_file && (dump_flags & TDF_DETAILS))
2703 fprintf (dump_file, "Candidate %d is related to use %d\n",
2704 cand->id, use->id);
2705 }
2706
2707 return cand;
2708 }
2709
2710 /* Returns true if incrementing the induction variable at the end of the LOOP
2711 is allowed.
2712
2713 The purpose is to avoid splitting latch edge with a biv increment, thus
2714 creating a jump, possibly confusing other optimization passes and leaving
2715 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2716 is not available (so we do not have a better alternative), or if the latch
2717 edge is already nonempty. */
2718
2719 static bool
2720 allow_ip_end_pos_p (struct loop *loop)
2721 {
2722 if (!ip_normal_pos (loop))
2723 return true;
2724
2725 if (!empty_block_p (ip_end_pos (loop)))
2726 return true;
2727
2728 return false;
2729 }
2730
2731 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2732 Important field is set to IMPORTANT. */
2733
2734 static void
2735 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2736 bool important, struct iv_use *use)
2737 {
2738 basic_block use_bb = gimple_bb (use->stmt);
2739 machine_mode mem_mode;
2740 unsigned HOST_WIDE_INT cstepi;
2741
2742 /* If we insert the increment in any position other than the standard
2743 ones, we must ensure that it is incremented once per iteration.
2744 It must not be in an inner nested loop, or one side of an if
2745 statement. */
2746 if (use_bb->loop_father != data->current_loop
2747 || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2748 || stmt_could_throw_p (use->stmt)
2749 || !cst_and_fits_in_hwi (step))
2750 return;
2751
2752 cstepi = int_cst_value (step);
2753
2754 mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2755 if (((USE_LOAD_PRE_INCREMENT (mem_mode)
2756 || USE_STORE_PRE_INCREMENT (mem_mode))
2757 && GET_MODE_SIZE (mem_mode) == cstepi)
2758 || ((USE_LOAD_PRE_DECREMENT (mem_mode)
2759 || USE_STORE_PRE_DECREMENT (mem_mode))
2760 && GET_MODE_SIZE (mem_mode) == -cstepi))
2761 {
2762 enum tree_code code = MINUS_EXPR;
2763 tree new_base;
2764 tree new_step = step;
2765
2766 if (POINTER_TYPE_P (TREE_TYPE (base)))
2767 {
2768 new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2769 code = POINTER_PLUS_EXPR;
2770 }
2771 else
2772 new_step = fold_convert (TREE_TYPE (base), new_step);
2773 new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2774 add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2775 use->stmt);
2776 }
2777 if (((USE_LOAD_POST_INCREMENT (mem_mode)
2778 || USE_STORE_POST_INCREMENT (mem_mode))
2779 && GET_MODE_SIZE (mem_mode) == cstepi)
2780 || ((USE_LOAD_POST_DECREMENT (mem_mode)
2781 || USE_STORE_POST_DECREMENT (mem_mode))
2782 && GET_MODE_SIZE (mem_mode) == -cstepi))
2783 {
2784 add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
2785 use->stmt);
2786 }
2787 }
2788
2789 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2790 position to POS. If USE is not NULL, the candidate is set as related to
2791 it. The candidate computation is scheduled before exit condition and at
2792 the end of loop. */
2793
2794 static void
2795 add_candidate (struct ivopts_data *data,
2796 tree base, tree step, bool important, struct iv_use *use)
2797 {
2798 gcc_assert (use == NULL || use->sub_id == 0);
2799
2800 if (ip_normal_pos (data->current_loop))
2801 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL);
2802 if (ip_end_pos (data->current_loop)
2803 && allow_ip_end_pos_p (data->current_loop))
2804 add_candidate_1 (data, base, step, important, IP_END, use, NULL);
2805 }
2806
2807 /* Adds standard iv candidates. */
2808
2809 static void
2810 add_standard_iv_candidates (struct ivopts_data *data)
2811 {
2812 add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
2813
2814 /* The same for a double-integer type if it is still fast enough. */
2815 if (TYPE_PRECISION
2816 (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
2817 && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
2818 add_candidate (data, build_int_cst (long_integer_type_node, 0),
2819 build_int_cst (long_integer_type_node, 1), true, NULL);
2820
2821 /* The same for a double-integer type if it is still fast enough. */
2822 if (TYPE_PRECISION
2823 (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
2824 && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
2825 add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
2826 build_int_cst (long_long_integer_type_node, 1), true, NULL);
2827 }
2828
2829
2830 /* Adds candidates bases on the old induction variable IV. */
2831
2832 static void
2833 add_iv_candidate_for_biv (struct ivopts_data *data, struct iv *iv)
2834 {
2835 gimple phi;
2836 tree def;
2837 struct iv_cand *cand;
2838
2839 add_candidate (data, iv->base, iv->step, true, NULL);
2840
2841 /* The same, but with initial value zero. */
2842 if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
2843 add_candidate (data, size_int (0), iv->step, true, NULL);
2844 else
2845 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2846 iv->step, true, NULL);
2847
2848 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2849 if (gimple_code (phi) == GIMPLE_PHI)
2850 {
2851 /* Additionally record the possibility of leaving the original iv
2852 untouched. */
2853 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2854 /* Don't add candidate if it's from another PHI node because
2855 it's an affine iv appearing in the form of PEELED_CHREC. */
2856 phi = SSA_NAME_DEF_STMT (def);
2857 if (gimple_code (phi) != GIMPLE_PHI)
2858 {
2859 cand = add_candidate_1 (data,
2860 iv->base, iv->step, true, IP_ORIGINAL, NULL,
2861 SSA_NAME_DEF_STMT (def));
2862 cand->var_before = iv->ssa_name;
2863 cand->var_after = def;
2864 }
2865 else
2866 gcc_assert (gimple_bb (phi) == data->current_loop->header);
2867 }
2868 }
2869
2870 /* Adds candidates based on the old induction variables. */
2871
2872 static void
2873 add_iv_candidate_for_bivs (struct ivopts_data *data)
2874 {
2875 unsigned i;
2876 struct iv *iv;
2877 bitmap_iterator bi;
2878
2879 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2880 {
2881 iv = ver_info (data, i)->iv;
2882 if (iv && iv->biv_p && !integer_zerop (iv->step))
2883 add_iv_candidate_for_biv (data, iv);
2884 }
2885 }
2886
2887 /* Adds candidates based on the value of USE's iv. */
2888
2889 static void
2890 add_iv_candidate_for_use (struct ivopts_data *data, struct iv_use *use)
2891 {
2892 unsigned HOST_WIDE_INT offset;
2893 tree base;
2894 tree basetype;
2895 struct iv *iv = use->iv;
2896
2897 add_candidate (data, iv->base, iv->step, false, use);
2898
2899 /* The same, but with initial value zero. Make such variable important,
2900 since it is generic enough so that possibly many uses may be based
2901 on it. */
2902 basetype = TREE_TYPE (iv->base);
2903 if (POINTER_TYPE_P (basetype))
2904 basetype = sizetype;
2905 add_candidate (data, build_int_cst (basetype, 0), iv->step, true, use);
2906
2907 /* Third, try removing the constant offset. Make sure to even
2908 add a candidate for &a[0] vs. (T *)&a. */
2909 base = strip_offset (iv->base, &offset);
2910 if (offset || base != iv->base)
2911 add_candidate (data, base, iv->step, false, use);
2912
2913 /* At last, add auto-incremental candidates. Make such variables
2914 important since other iv uses with same base object may be based
2915 on it. */
2916 if (use != NULL && use->type == USE_ADDRESS)
2917 add_autoinc_candidates (data, iv->base, iv->step, true, use);
2918 }
2919
2920 /* Adds candidates based on the uses. */
2921
2922 static void
2923 add_iv_candidate_for_uses (struct ivopts_data *data)
2924 {
2925 unsigned i;
2926
2927 for (i = 0; i < n_iv_uses (data); i++)
2928 {
2929 struct iv_use *use = iv_use (data, i);
2930
2931 if (!use)
2932 continue;
2933
2934 switch (use->type)
2935 {
2936 case USE_NONLINEAR_EXPR:
2937 case USE_COMPARE:
2938 case USE_ADDRESS:
2939 /* Just add the ivs based on the value of the iv used here. */
2940 add_iv_candidate_for_use (data, use);
2941 break;
2942
2943 default:
2944 gcc_unreachable ();
2945 }
2946 }
2947 }
2948
2949 /* Record important candidates and add them to related_cands bitmaps
2950 if needed. */
2951
2952 static void
2953 record_important_candidates (struct ivopts_data *data)
2954 {
2955 unsigned i;
2956 struct iv_use *use;
2957
2958 for (i = 0; i < n_iv_cands (data); i++)
2959 {
2960 struct iv_cand *cand = iv_cand (data, i);
2961
2962 if (cand->important)
2963 bitmap_set_bit (data->important_candidates, i);
2964 }
2965
2966 data->consider_all_candidates = (n_iv_cands (data)
2967 <= CONSIDER_ALL_CANDIDATES_BOUND);
2968
2969 if (data->consider_all_candidates)
2970 {
2971 /* We will not need "related_cands" bitmaps in this case,
2972 so release them to decrease peak memory consumption. */
2973 for (i = 0; i < n_iv_uses (data); i++)
2974 {
2975 use = iv_use (data, i);
2976 BITMAP_FREE (use->related_cands);
2977 }
2978 }
2979 else
2980 {
2981 /* Add important candidates to the related_cands bitmaps. */
2982 for (i = 0; i < n_iv_uses (data); i++)
2983 bitmap_ior_into (iv_use (data, i)->related_cands,
2984 data->important_candidates);
2985 }
2986 }
2987
2988 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2989 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2990 we allocate a simple list to every use. */
2991
2992 static void
2993 alloc_use_cost_map (struct ivopts_data *data)
2994 {
2995 unsigned i, size, s;
2996
2997 for (i = 0; i < n_iv_uses (data); i++)
2998 {
2999 struct iv_use *use = iv_use (data, i);
3000
3001 if (data->consider_all_candidates)
3002 size = n_iv_cands (data);
3003 else
3004 {
3005 s = bitmap_count_bits (use->related_cands);
3006
3007 /* Round up to the power of two, so that moduling by it is fast. */
3008 size = s ? (1 << ceil_log2 (s)) : 1;
3009 }
3010
3011 use->n_map_members = size;
3012 use->cost_map = XCNEWVEC (struct cost_pair, size);
3013 }
3014 }
3015
3016 /* Returns description of computation cost of expression whose runtime
3017 cost is RUNTIME and complexity corresponds to COMPLEXITY. */
3018
3019 static comp_cost
3020 new_cost (unsigned runtime, unsigned complexity)
3021 {
3022 comp_cost cost;
3023
3024 cost.cost = runtime;
3025 cost.complexity = complexity;
3026
3027 return cost;
3028 }
3029
3030 /* Returns true if COST is infinite. */
3031
3032 static bool
3033 infinite_cost_p (comp_cost cost)
3034 {
3035 return cost.cost == INFTY;
3036 }
3037
3038 /* Adds costs COST1 and COST2. */
3039
3040 static comp_cost
3041 add_costs (comp_cost cost1, comp_cost cost2)
3042 {
3043 if (infinite_cost_p (cost1) || infinite_cost_p (cost2))
3044 return infinite_cost;
3045
3046 cost1.cost += cost2.cost;
3047 cost1.complexity += cost2.complexity;
3048
3049 return cost1;
3050 }
3051 /* Subtracts costs COST1 and COST2. */
3052
3053 static comp_cost
3054 sub_costs (comp_cost cost1, comp_cost cost2)
3055 {
3056 cost1.cost -= cost2.cost;
3057 cost1.complexity -= cost2.complexity;
3058
3059 return cost1;
3060 }
3061
3062 /* Returns a negative number if COST1 < COST2, a positive number if
3063 COST1 > COST2, and 0 if COST1 = COST2. */
3064
3065 static int
3066 compare_costs (comp_cost cost1, comp_cost cost2)
3067 {
3068 if (cost1.cost == cost2.cost)
3069 return cost1.complexity - cost2.complexity;
3070
3071 return cost1.cost - cost2.cost;
3072 }
3073
3074 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
3075 on invariants DEPENDS_ON and that the value used in expressing it
3076 is VALUE, and in case of iv elimination the comparison operator is COMP. */
3077
3078 static void
3079 set_use_iv_cost (struct ivopts_data *data,
3080 struct iv_use *use, struct iv_cand *cand,
3081 comp_cost cost, bitmap depends_on, tree value,
3082 enum tree_code comp, int inv_expr_id)
3083 {
3084 unsigned i, s;
3085
3086 if (infinite_cost_p (cost))
3087 {
3088 BITMAP_FREE (depends_on);
3089 return;
3090 }
3091
3092 if (data->consider_all_candidates)
3093 {
3094 use->cost_map[cand->id].cand = cand;
3095 use->cost_map[cand->id].cost = cost;
3096 use->cost_map[cand->id].depends_on = depends_on;
3097 use->cost_map[cand->id].value = value;
3098 use->cost_map[cand->id].comp = comp;
3099 use->cost_map[cand->id].inv_expr_id = inv_expr_id;
3100 return;
3101 }
3102
3103 /* n_map_members is a power of two, so this computes modulo. */
3104 s = cand->id & (use->n_map_members - 1);
3105 for (i = s; i < use->n_map_members; i++)
3106 if (!use->cost_map[i].cand)
3107 goto found;
3108 for (i = 0; i < s; i++)
3109 if (!use->cost_map[i].cand)
3110 goto found;
3111
3112 gcc_unreachable ();
3113
3114 found:
3115 use->cost_map[i].cand = cand;
3116 use->cost_map[i].cost = cost;
3117 use->cost_map[i].depends_on = depends_on;
3118 use->cost_map[i].value = value;
3119 use->cost_map[i].comp = comp;
3120 use->cost_map[i].inv_expr_id = inv_expr_id;
3121 }
3122
3123 /* Gets cost of (USE, CANDIDATE) pair. */
3124
3125 static struct cost_pair *
3126 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
3127 struct iv_cand *cand)
3128 {
3129 unsigned i, s;
3130 struct cost_pair *ret;
3131
3132 if (!cand)
3133 return NULL;
3134
3135 if (data->consider_all_candidates)
3136 {
3137 ret = use->cost_map + cand->id;
3138 if (!ret->cand)
3139 return NULL;
3140
3141 return ret;
3142 }
3143
3144 /* n_map_members is a power of two, so this computes modulo. */
3145 s = cand->id & (use->n_map_members - 1);
3146 for (i = s; i < use->n_map_members; i++)
3147 if (use->cost_map[i].cand == cand)
3148 return use->cost_map + i;
3149 else if (use->cost_map[i].cand == NULL)
3150 return NULL;
3151 for (i = 0; i < s; i++)
3152 if (use->cost_map[i].cand == cand)
3153 return use->cost_map + i;
3154 else if (use->cost_map[i].cand == NULL)
3155 return NULL;
3156
3157 return NULL;
3158 }
3159
3160 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
3161 static rtx
3162 produce_memory_decl_rtl (tree obj, int *regno)
3163 {
3164 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
3165 machine_mode address_mode = targetm.addr_space.address_mode (as);
3166 rtx x;
3167
3168 gcc_assert (obj);
3169 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
3170 {
3171 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
3172 x = gen_rtx_SYMBOL_REF (address_mode, name);
3173 SET_SYMBOL_REF_DECL (x, obj);
3174 x = gen_rtx_MEM (DECL_MODE (obj), x);
3175 set_mem_addr_space (x, as);
3176 targetm.encode_section_info (obj, x, true);
3177 }
3178 else
3179 {
3180 x = gen_raw_REG (address_mode, (*regno)++);
3181 x = gen_rtx_MEM (DECL_MODE (obj), x);
3182 set_mem_addr_space (x, as);
3183 }
3184
3185 return x;
3186 }
3187
3188 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
3189 walk_tree. DATA contains the actual fake register number. */
3190
3191 static tree
3192 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
3193 {
3194 tree obj = NULL_TREE;
3195 rtx x = NULL_RTX;
3196 int *regno = (int *) data;
3197
3198 switch (TREE_CODE (*expr_p))
3199 {
3200 case ADDR_EXPR:
3201 for (expr_p = &TREE_OPERAND (*expr_p, 0);
3202 handled_component_p (*expr_p);
3203 expr_p = &TREE_OPERAND (*expr_p, 0))
3204 continue;
3205 obj = *expr_p;
3206 if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
3207 x = produce_memory_decl_rtl (obj, regno);
3208 break;
3209
3210 case SSA_NAME:
3211 *ws = 0;
3212 obj = SSA_NAME_VAR (*expr_p);
3213 /* Defer handling of anonymous SSA_NAMEs to the expander. */
3214 if (!obj)
3215 return NULL_TREE;
3216 if (!DECL_RTL_SET_P (obj))
3217 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3218 break;
3219
3220 case VAR_DECL:
3221 case PARM_DECL:
3222 case RESULT_DECL:
3223 *ws = 0;
3224 obj = *expr_p;
3225
3226 if (DECL_RTL_SET_P (obj))
3227 break;
3228
3229 if (DECL_MODE (obj) == BLKmode)
3230 x = produce_memory_decl_rtl (obj, regno);
3231 else
3232 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3233
3234 break;
3235
3236 default:
3237 break;
3238 }
3239
3240 if (x)
3241 {
3242 decl_rtl_to_reset.safe_push (obj);
3243 SET_DECL_RTL (obj, x);
3244 }
3245
3246 return NULL_TREE;
3247 }
3248
3249 /* Determines cost of the computation of EXPR. */
3250
3251 static unsigned
3252 computation_cost (tree expr, bool speed)
3253 {
3254 rtx_insn *seq;
3255 rtx rslt;
3256 tree type = TREE_TYPE (expr);
3257 unsigned cost;
3258 /* Avoid using hard regs in ways which may be unsupported. */
3259 int regno = LAST_VIRTUAL_REGISTER + 1;
3260 struct cgraph_node *node = cgraph_node::get (current_function_decl);
3261 enum node_frequency real_frequency = node->frequency;
3262
3263 node->frequency = NODE_FREQUENCY_NORMAL;
3264 crtl->maybe_hot_insn_p = speed;
3265 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
3266 start_sequence ();
3267 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
3268 seq = get_insns ();
3269 end_sequence ();
3270 default_rtl_profile ();
3271 node->frequency = real_frequency;
3272
3273 cost = seq_cost (seq, speed);
3274 if (MEM_P (rslt))
3275 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
3276 TYPE_ADDR_SPACE (type), speed);
3277 else if (!REG_P (rslt))
3278 cost += set_src_cost (rslt, TYPE_MODE (type), speed);
3279
3280 return cost;
3281 }
3282
3283 /* Returns variable containing the value of candidate CAND at statement AT. */
3284
3285 static tree
3286 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple stmt)
3287 {
3288 if (stmt_after_increment (loop, cand, stmt))
3289 return cand->var_after;
3290 else
3291 return cand->var_before;
3292 }
3293
3294 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
3295 same precision that is at least as wide as the precision of TYPE, stores
3296 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
3297 type of A and B. */
3298
3299 static tree
3300 determine_common_wider_type (tree *a, tree *b)
3301 {
3302 tree wider_type = NULL;
3303 tree suba, subb;
3304 tree atype = TREE_TYPE (*a);
3305
3306 if (CONVERT_EXPR_P (*a))
3307 {
3308 suba = TREE_OPERAND (*a, 0);
3309 wider_type = TREE_TYPE (suba);
3310 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
3311 return atype;
3312 }
3313 else
3314 return atype;
3315
3316 if (CONVERT_EXPR_P (*b))
3317 {
3318 subb = TREE_OPERAND (*b, 0);
3319 if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
3320 return atype;
3321 }
3322 else
3323 return atype;
3324
3325 *a = suba;
3326 *b = subb;
3327 return wider_type;
3328 }
3329
3330 /* Determines the expression by that USE is expressed from induction variable
3331 CAND at statement AT in LOOP. The expression is stored in a decomposed
3332 form into AFF. Returns false if USE cannot be expressed using CAND. */
3333
3334 static bool
3335 get_computation_aff (struct loop *loop,
3336 struct iv_use *use, struct iv_cand *cand, gimple at,
3337 struct aff_tree *aff)
3338 {
3339 tree ubase = use->iv->base;
3340 tree ustep = use->iv->step;
3341 tree cbase = cand->iv->base;
3342 tree cstep = cand->iv->step, cstep_common;
3343 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3344 tree common_type, var;
3345 tree uutype;
3346 aff_tree cbase_aff, var_aff;
3347 widest_int rat;
3348
3349 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3350 {
3351 /* We do not have a precision to express the values of use. */
3352 return false;
3353 }
3354
3355 var = var_at_stmt (loop, cand, at);
3356 uutype = unsigned_type_for (utype);
3357
3358 /* If the conversion is not noop, perform it. */
3359 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3360 {
3361 cstep = fold_convert (uutype, cstep);
3362 cbase = fold_convert (uutype, cbase);
3363 var = fold_convert (uutype, var);
3364 }
3365
3366 if (!constant_multiple_of (ustep, cstep, &rat))
3367 return false;
3368
3369 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3370 type, we achieve better folding by computing their difference in this
3371 wider type, and cast the result to UUTYPE. We do not need to worry about
3372 overflows, as all the arithmetics will in the end be performed in UUTYPE
3373 anyway. */
3374 common_type = determine_common_wider_type (&ubase, &cbase);
3375
3376 /* use = ubase - ratio * cbase + ratio * var. */
3377 tree_to_aff_combination (ubase, common_type, aff);
3378 tree_to_aff_combination (cbase, common_type, &cbase_aff);
3379 tree_to_aff_combination (var, uutype, &var_aff);
3380
3381 /* We need to shift the value if we are after the increment. */
3382 if (stmt_after_increment (loop, cand, at))
3383 {
3384 aff_tree cstep_aff;
3385
3386 if (common_type != uutype)
3387 cstep_common = fold_convert (common_type, cstep);
3388 else
3389 cstep_common = cstep;
3390
3391 tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
3392 aff_combination_add (&cbase_aff, &cstep_aff);
3393 }
3394
3395 aff_combination_scale (&cbase_aff, -rat);
3396 aff_combination_add (aff, &cbase_aff);
3397 if (common_type != uutype)
3398 aff_combination_convert (aff, uutype);
3399
3400 aff_combination_scale (&var_aff, rat);
3401 aff_combination_add (aff, &var_aff);
3402
3403 return true;
3404 }
3405
3406 /* Return the type of USE. */
3407
3408 static tree
3409 get_use_type (struct iv_use *use)
3410 {
3411 tree base_type = TREE_TYPE (use->iv->base);
3412 tree type;
3413
3414 if (use->type == USE_ADDRESS)
3415 {
3416 /* The base_type may be a void pointer. Create a pointer type based on
3417 the mem_ref instead. */
3418 type = build_pointer_type (TREE_TYPE (*use->op_p));
3419 gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
3420 == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
3421 }
3422 else
3423 type = base_type;
3424
3425 return type;
3426 }
3427
3428 /* Determines the expression by that USE is expressed from induction variable
3429 CAND at statement AT in LOOP. The computation is unshared. */
3430
3431 static tree
3432 get_computation_at (struct loop *loop,
3433 struct iv_use *use, struct iv_cand *cand, gimple at)
3434 {
3435 aff_tree aff;
3436 tree type = get_use_type (use);
3437
3438 if (!get_computation_aff (loop, use, cand, at, &aff))
3439 return NULL_TREE;
3440 unshare_aff_combination (&aff);
3441 return fold_convert (type, aff_combination_to_tree (&aff));
3442 }
3443
3444 /* Determines the expression by that USE is expressed from induction variable
3445 CAND in LOOP. The computation is unshared. */
3446
3447 static tree
3448 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3449 {
3450 return get_computation_at (loop, use, cand, use->stmt);
3451 }
3452
3453 /* Adjust the cost COST for being in loop setup rather than loop body.
3454 If we're optimizing for space, the loop setup overhead is constant;
3455 if we're optimizing for speed, amortize it over the per-iteration cost. */
3456 static unsigned
3457 adjust_setup_cost (struct ivopts_data *data, unsigned cost)
3458 {
3459 if (cost == INFTY)
3460 return cost;
3461 else if (optimize_loop_for_speed_p (data->current_loop))
3462 return cost / avg_loop_niter (data->current_loop);
3463 else
3464 return cost;
3465 }
3466
3467 /* Returns true if multiplying by RATIO is allowed in an address. Test the
3468 validity for a memory reference accessing memory of mode MODE in
3469 address space AS. */
3470
3471
3472 bool
3473 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, machine_mode mode,
3474 addr_space_t as)
3475 {
3476 #define MAX_RATIO 128
3477 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3478 static vec<sbitmap> valid_mult_list;
3479 sbitmap valid_mult;
3480
3481 if (data_index >= valid_mult_list.length ())
3482 valid_mult_list.safe_grow_cleared (data_index + 1);
3483
3484 valid_mult = valid_mult_list[data_index];
3485 if (!valid_mult)
3486 {
3487 machine_mode address_mode = targetm.addr_space.address_mode (as);
3488 rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3489 rtx reg2 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3490 rtx addr, scaled;
3491 HOST_WIDE_INT i;
3492
3493 valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3494 bitmap_clear (valid_mult);
3495 scaled = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3496 addr = gen_rtx_fmt_ee (PLUS, address_mode, scaled, reg2);
3497 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3498 {
3499 XEXP (scaled, 1) = gen_int_mode (i, address_mode);
3500 if (memory_address_addr_space_p (mode, addr, as)
3501 || memory_address_addr_space_p (mode, scaled, as))
3502 bitmap_set_bit (valid_mult, i + MAX_RATIO);
3503 }
3504
3505 if (dump_file && (dump_flags & TDF_DETAILS))
3506 {
3507 fprintf (dump_file, " allowed multipliers:");
3508 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3509 if (bitmap_bit_p (valid_mult, i + MAX_RATIO))
3510 fprintf (dump_file, " %d", (int) i);
3511 fprintf (dump_file, "\n");
3512 fprintf (dump_file, "\n");
3513 }
3514
3515 valid_mult_list[data_index] = valid_mult;
3516 }
3517
3518 if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3519 return false;
3520
3521 return bitmap_bit_p (valid_mult, ratio + MAX_RATIO);
3522 }
3523
3524 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3525 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3526 variable is omitted. Compute the cost for a memory reference that accesses
3527 a memory location of mode MEM_MODE in address space AS.
3528
3529 MAY_AUTOINC is set to true if the autoincrement (increasing index by
3530 size of MEM_MODE / RATIO) is available. To make this determination, we
3531 look at the size of the increment to be made, which is given in CSTEP.
3532 CSTEP may be zero if the step is unknown.
3533 STMT_AFTER_INC is true iff the statement we're looking at is after the
3534 increment of the original biv.
3535
3536 TODO -- there must be some better way. This all is quite crude. */
3537
3538 enum ainc_type
3539 {
3540 AINC_PRE_INC, /* Pre increment. */
3541 AINC_PRE_DEC, /* Pre decrement. */
3542 AINC_POST_INC, /* Post increment. */
3543 AINC_POST_DEC, /* Post decrement. */
3544 AINC_NONE /* Also the number of auto increment types. */
3545 };
3546
3547 typedef struct address_cost_data_s
3548 {
3549 HOST_WIDE_INT min_offset, max_offset;
3550 unsigned costs[2][2][2][2];
3551 unsigned ainc_costs[AINC_NONE];
3552 } *address_cost_data;
3553
3554
3555 static comp_cost
3556 get_address_cost (bool symbol_present, bool var_present,
3557 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3558 HOST_WIDE_INT cstep, machine_mode mem_mode,
3559 addr_space_t as, bool speed,
3560 bool stmt_after_inc, bool *may_autoinc)
3561 {
3562 machine_mode address_mode = targetm.addr_space.address_mode (as);
3563 static vec<address_cost_data> address_cost_data_list;
3564 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3565 address_cost_data data;
3566 static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3567 static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3568 unsigned cost, acost, complexity;
3569 enum ainc_type autoinc_type;
3570 bool offset_p, ratio_p, autoinc;
3571 HOST_WIDE_INT s_offset, autoinc_offset, msize;
3572 unsigned HOST_WIDE_INT mask;
3573 unsigned bits;
3574
3575 if (data_index >= address_cost_data_list.length ())
3576 address_cost_data_list.safe_grow_cleared (data_index + 1);
3577
3578 data = address_cost_data_list[data_index];
3579 if (!data)
3580 {
3581 HOST_WIDE_INT i;
3582 HOST_WIDE_INT rat, off = 0;
3583 int old_cse_not_expected, width;
3584 unsigned sym_p, var_p, off_p, rat_p, add_c;
3585 rtx_insn *seq;
3586 rtx addr, base;
3587 rtx reg0, reg1;
3588
3589 data = (address_cost_data) xcalloc (1, sizeof (*data));
3590
3591 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3592
3593 width = GET_MODE_BITSIZE (address_mode) - 1;
3594 if (width > (HOST_BITS_PER_WIDE_INT - 1))
3595 width = HOST_BITS_PER_WIDE_INT - 1;
3596 addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
3597
3598 for (i = width; i >= 0; i--)
3599 {
3600 off = -((unsigned HOST_WIDE_INT) 1 << i);
3601 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3602 if (memory_address_addr_space_p (mem_mode, addr, as))
3603 break;
3604 }
3605 data->min_offset = (i == -1? 0 : off);
3606
3607 for (i = width; i >= 0; i--)
3608 {
3609 off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
3610 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3611 if (memory_address_addr_space_p (mem_mode, addr, as))
3612 break;
3613 /* For some strict-alignment targets, the offset must be naturally
3614 aligned. Try an aligned offset if mem_mode is not QImode. */
3615 off = mem_mode != QImode
3616 ? ((unsigned HOST_WIDE_INT) 1 << i)
3617 - GET_MODE_SIZE (mem_mode)
3618 : 0;
3619 if (off > 0)
3620 {
3621 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3622 if (memory_address_addr_space_p (mem_mode, addr, as))
3623 break;
3624 }
3625 }
3626 if (i == -1)
3627 off = 0;
3628 data->max_offset = off;
3629
3630 if (dump_file && (dump_flags & TDF_DETAILS))
3631 {
3632 fprintf (dump_file, "get_address_cost:\n");
3633 fprintf (dump_file, " min offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3634 GET_MODE_NAME (mem_mode),
3635 data->min_offset);
3636 fprintf (dump_file, " max offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3637 GET_MODE_NAME (mem_mode),
3638 data->max_offset);
3639 }
3640
3641 rat = 1;
3642 for (i = 2; i <= MAX_RATIO; i++)
3643 if (multiplier_allowed_in_address_p (i, mem_mode, as))
3644 {
3645 rat = i;
3646 break;
3647 }
3648
3649 /* Compute the cost of various addressing modes. */
3650 acost = 0;
3651 reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3652 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3653
3654 if (USE_LOAD_PRE_DECREMENT (mem_mode)
3655 || USE_STORE_PRE_DECREMENT (mem_mode))
3656 {
3657 addr = gen_rtx_PRE_DEC (address_mode, reg0);
3658 has_predec[mem_mode]
3659 = memory_address_addr_space_p (mem_mode, addr, as);
3660
3661 if (has_predec[mem_mode])
3662 data->ainc_costs[AINC_PRE_DEC]
3663 = address_cost (addr, mem_mode, as, speed);
3664 }
3665 if (USE_LOAD_POST_DECREMENT (mem_mode)
3666 || USE_STORE_POST_DECREMENT (mem_mode))
3667 {
3668 addr = gen_rtx_POST_DEC (address_mode, reg0);
3669 has_postdec[mem_mode]
3670 = memory_address_addr_space_p (mem_mode, addr, as);
3671
3672 if (has_postdec[mem_mode])
3673 data->ainc_costs[AINC_POST_DEC]
3674 = address_cost (addr, mem_mode, as, speed);
3675 }
3676 if (USE_LOAD_PRE_INCREMENT (mem_mode)
3677 || USE_STORE_PRE_DECREMENT (mem_mode))
3678 {
3679 addr = gen_rtx_PRE_INC (address_mode, reg0);
3680 has_preinc[mem_mode]
3681 = memory_address_addr_space_p (mem_mode, addr, as);
3682
3683 if (has_preinc[mem_mode])
3684 data->ainc_costs[AINC_PRE_INC]
3685 = address_cost (addr, mem_mode, as, speed);
3686 }
3687 if (USE_LOAD_POST_INCREMENT (mem_mode)
3688 || USE_STORE_POST_INCREMENT (mem_mode))
3689 {
3690 addr = gen_rtx_POST_INC (address_mode, reg0);
3691 has_postinc[mem_mode]
3692 = memory_address_addr_space_p (mem_mode, addr, as);
3693
3694 if (has_postinc[mem_mode])
3695 data->ainc_costs[AINC_POST_INC]
3696 = address_cost (addr, mem_mode, as, speed);
3697 }
3698 for (i = 0; i < 16; i++)
3699 {
3700 sym_p = i & 1;
3701 var_p = (i >> 1) & 1;
3702 off_p = (i >> 2) & 1;
3703 rat_p = (i >> 3) & 1;
3704
3705 addr = reg0;
3706 if (rat_p)
3707 addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
3708 gen_int_mode (rat, address_mode));
3709
3710 if (var_p)
3711 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
3712
3713 if (sym_p)
3714 {
3715 base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
3716 /* ??? We can run into trouble with some backends by presenting
3717 it with symbols which haven't been properly passed through
3718 targetm.encode_section_info. By setting the local bit, we
3719 enhance the probability of things working. */
3720 SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3721
3722 if (off_p)
3723 base = gen_rtx_fmt_e (CONST, address_mode,
3724 gen_rtx_fmt_ee
3725 (PLUS, address_mode, base,
3726 gen_int_mode (off, address_mode)));
3727 }
3728 else if (off_p)
3729 base = gen_int_mode (off, address_mode);
3730 else
3731 base = NULL_RTX;
3732
3733 if (base)
3734 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
3735
3736 start_sequence ();
3737 /* To avoid splitting addressing modes, pretend that no cse will
3738 follow. */
3739 old_cse_not_expected = cse_not_expected;
3740 cse_not_expected = true;
3741 addr = memory_address_addr_space (mem_mode, addr, as);
3742 cse_not_expected = old_cse_not_expected;
3743 seq = get_insns ();
3744 end_sequence ();
3745
3746 acost = seq_cost (seq, speed);
3747 acost += address_cost (addr, mem_mode, as, speed);
3748
3749 if (!acost)
3750 acost = 1;
3751 data->costs[sym_p][var_p][off_p][rat_p] = acost;
3752 }
3753
3754 /* On some targets, it is quite expensive to load symbol to a register,
3755 which makes addresses that contain symbols look much more expensive.
3756 However, the symbol will have to be loaded in any case before the
3757 loop (and quite likely we have it in register already), so it does not
3758 make much sense to penalize them too heavily. So make some final
3759 tweaks for the SYMBOL_PRESENT modes:
3760
3761 If VAR_PRESENT is false, and the mode obtained by changing symbol to
3762 var is cheaper, use this mode with small penalty.
3763 If VAR_PRESENT is true, try whether the mode with
3764 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3765 if this is the case, use it. */
3766 add_c = add_cost (speed, address_mode);
3767 for (i = 0; i < 8; i++)
3768 {
3769 var_p = i & 1;
3770 off_p = (i >> 1) & 1;
3771 rat_p = (i >> 2) & 1;
3772
3773 acost = data->costs[0][1][off_p][rat_p] + 1;
3774 if (var_p)
3775 acost += add_c;
3776
3777 if (acost < data->costs[1][var_p][off_p][rat_p])
3778 data->costs[1][var_p][off_p][rat_p] = acost;
3779 }
3780
3781 if (dump_file && (dump_flags & TDF_DETAILS))
3782 {
3783 fprintf (dump_file, "Address costs:\n");
3784
3785 for (i = 0; i < 16; i++)
3786 {
3787 sym_p = i & 1;
3788 var_p = (i >> 1) & 1;
3789 off_p = (i >> 2) & 1;
3790 rat_p = (i >> 3) & 1;
3791
3792 fprintf (dump_file, " ");
3793 if (sym_p)
3794 fprintf (dump_file, "sym + ");
3795 if (var_p)
3796 fprintf (dump_file, "var + ");
3797 if (off_p)
3798 fprintf (dump_file, "cst + ");
3799 if (rat_p)
3800 fprintf (dump_file, "rat * ");
3801
3802 acost = data->costs[sym_p][var_p][off_p][rat_p];
3803 fprintf (dump_file, "index costs %d\n", acost);
3804 }
3805 if (has_predec[mem_mode] || has_postdec[mem_mode]
3806 || has_preinc[mem_mode] || has_postinc[mem_mode])
3807 fprintf (dump_file, " May include autoinc/dec\n");
3808 fprintf (dump_file, "\n");
3809 }
3810
3811 address_cost_data_list[data_index] = data;
3812 }
3813
3814 bits = GET_MODE_BITSIZE (address_mode);
3815 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3816 offset &= mask;
3817 if ((offset >> (bits - 1) & 1))
3818 offset |= ~mask;
3819 s_offset = offset;
3820
3821 autoinc = false;
3822 autoinc_type = AINC_NONE;
3823 msize = GET_MODE_SIZE (mem_mode);
3824 autoinc_offset = offset;
3825 if (stmt_after_inc)
3826 autoinc_offset += ratio * cstep;
3827 if (symbol_present || var_present || ratio != 1)
3828 autoinc = false;
3829 else
3830 {
3831 if (has_postinc[mem_mode] && autoinc_offset == 0
3832 && msize == cstep)
3833 autoinc_type = AINC_POST_INC;
3834 else if (has_postdec[mem_mode] && autoinc_offset == 0
3835 && msize == -cstep)
3836 autoinc_type = AINC_POST_DEC;
3837 else if (has_preinc[mem_mode] && autoinc_offset == msize
3838 && msize == cstep)
3839 autoinc_type = AINC_PRE_INC;
3840 else if (has_predec[mem_mode] && autoinc_offset == -msize
3841 && msize == -cstep)
3842 autoinc_type = AINC_PRE_DEC;
3843
3844 if (autoinc_type != AINC_NONE)
3845 autoinc = true;
3846 }
3847
3848 cost = 0;
3849 offset_p = (s_offset != 0
3850 && data->min_offset <= s_offset
3851 && s_offset <= data->max_offset);
3852 ratio_p = (ratio != 1
3853 && multiplier_allowed_in_address_p (ratio, mem_mode, as));
3854
3855 if (ratio != 1 && !ratio_p)
3856 cost += mult_by_coeff_cost (ratio, address_mode, speed);
3857
3858 if (s_offset && !offset_p && !symbol_present)
3859 cost += add_cost (speed, address_mode);
3860
3861 if (may_autoinc)
3862 *may_autoinc = autoinc;
3863 if (autoinc)
3864 acost = data->ainc_costs[autoinc_type];
3865 else
3866 acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
3867 complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3868 return new_cost (cost + acost, complexity);
3869 }
3870
3871 /* Calculate the SPEED or size cost of shiftadd EXPR in MODE. MULT is the
3872 the EXPR operand holding the shift. COST0 and COST1 are the costs for
3873 calculating the operands of EXPR. Returns true if successful, and returns
3874 the cost in COST. */
3875
3876 static bool
3877 get_shiftadd_cost (tree expr, machine_mode mode, comp_cost cost0,
3878 comp_cost cost1, tree mult, bool speed, comp_cost *cost)
3879 {
3880 comp_cost res;
3881 tree op1 = TREE_OPERAND (expr, 1);
3882 tree cst = TREE_OPERAND (mult, 1);
3883 tree multop = TREE_OPERAND (mult, 0);
3884 int m = exact_log2 (int_cst_value (cst));
3885 int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
3886 int as_cost, sa_cost;
3887 bool mult_in_op1;
3888
3889 if (!(m >= 0 && m < maxm))
3890 return false;
3891
3892 mult_in_op1 = operand_equal_p (op1, mult, 0);
3893
3894 as_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
3895
3896 /* If the target has a cheap shift-and-add or shift-and-sub instruction,
3897 use that in preference to a shift insn followed by an add insn. */
3898 sa_cost = (TREE_CODE (expr) != MINUS_EXPR
3899 ? shiftadd_cost (speed, mode, m)
3900 : (mult_in_op1
3901 ? shiftsub1_cost (speed, mode, m)
3902 : shiftsub0_cost (speed, mode, m)));
3903
3904 res = new_cost (MIN (as_cost, sa_cost), 0);
3905 res = add_costs (res, mult_in_op1 ? cost0 : cost1);
3906
3907 STRIP_NOPS (multop);
3908 if (!is_gimple_val (multop))
3909 res = add_costs (res, force_expr_to_var_cost (multop, speed));
3910
3911 *cost = res;
3912 return true;
3913 }
3914
3915 /* Estimates cost of forcing expression EXPR into a variable. */
3916
3917 static comp_cost
3918 force_expr_to_var_cost (tree expr, bool speed)
3919 {
3920 static bool costs_initialized = false;
3921 static unsigned integer_cost [2];
3922 static unsigned symbol_cost [2];
3923 static unsigned address_cost [2];
3924 tree op0, op1;
3925 comp_cost cost0, cost1, cost;
3926 machine_mode mode;
3927
3928 if (!costs_initialized)
3929 {
3930 tree type = build_pointer_type (integer_type_node);
3931 tree var, addr;
3932 rtx x;
3933 int i;
3934
3935 var = create_tmp_var_raw (integer_type_node, "test_var");
3936 TREE_STATIC (var) = 1;
3937 x = produce_memory_decl_rtl (var, NULL);
3938 SET_DECL_RTL (var, x);
3939
3940 addr = build1 (ADDR_EXPR, type, var);
3941
3942
3943 for (i = 0; i < 2; i++)
3944 {
3945 integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
3946 2000), i);
3947
3948 symbol_cost[i] = computation_cost (addr, i) + 1;
3949
3950 address_cost[i]
3951 = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
3952 if (dump_file && (dump_flags & TDF_DETAILS))
3953 {
3954 fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
3955 fprintf (dump_file, " integer %d\n", (int) integer_cost[i]);
3956 fprintf (dump_file, " symbol %d\n", (int) symbol_cost[i]);
3957 fprintf (dump_file, " address %d\n", (int) address_cost[i]);
3958 fprintf (dump_file, " other %d\n", (int) target_spill_cost[i]);
3959 fprintf (dump_file, "\n");
3960 }
3961 }
3962
3963 costs_initialized = true;
3964 }
3965
3966 STRIP_NOPS (expr);
3967
3968 if (SSA_VAR_P (expr))
3969 return no_cost;
3970
3971 if (is_gimple_min_invariant (expr))
3972 {
3973 if (TREE_CODE (expr) == INTEGER_CST)
3974 return new_cost (integer_cost [speed], 0);
3975
3976 if (TREE_CODE (expr) == ADDR_EXPR)
3977 {
3978 tree obj = TREE_OPERAND (expr, 0);
3979
3980 if (TREE_CODE (obj) == VAR_DECL
3981 || TREE_CODE (obj) == PARM_DECL
3982 || TREE_CODE (obj) == RESULT_DECL)
3983 return new_cost (symbol_cost [speed], 0);
3984 }
3985
3986 return new_cost (address_cost [speed], 0);
3987 }
3988
3989 switch (TREE_CODE (expr))
3990 {
3991 case POINTER_PLUS_EXPR:
3992 case PLUS_EXPR:
3993 case MINUS_EXPR:
3994 case MULT_EXPR:
3995 op0 = TREE_OPERAND (expr, 0);
3996 op1 = TREE_OPERAND (expr, 1);
3997 STRIP_NOPS (op0);
3998 STRIP_NOPS (op1);
3999 break;
4000
4001 CASE_CONVERT:
4002 case NEGATE_EXPR:
4003 op0 = TREE_OPERAND (expr, 0);
4004 STRIP_NOPS (op0);
4005 op1 = NULL_TREE;
4006 break;
4007
4008 default:
4009 /* Just an arbitrary value, FIXME. */
4010 return new_cost (target_spill_cost[speed], 0);
4011 }
4012
4013 if (op0 == NULL_TREE
4014 || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
4015 cost0 = no_cost;
4016 else
4017 cost0 = force_expr_to_var_cost (op0, speed);
4018
4019 if (op1 == NULL_TREE
4020 || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
4021 cost1 = no_cost;
4022 else
4023 cost1 = force_expr_to_var_cost (op1, speed);
4024
4025 mode = TYPE_MODE (TREE_TYPE (expr));
4026 switch (TREE_CODE (expr))
4027 {
4028 case POINTER_PLUS_EXPR:
4029 case PLUS_EXPR:
4030 case MINUS_EXPR:
4031 case NEGATE_EXPR:
4032 cost = new_cost (add_cost (speed, mode), 0);
4033 if (TREE_CODE (expr) != NEGATE_EXPR)
4034 {
4035 tree mult = NULL_TREE;
4036 comp_cost sa_cost;
4037 if (TREE_CODE (op1) == MULT_EXPR)
4038 mult = op1;
4039 else if (TREE_CODE (op0) == MULT_EXPR)
4040 mult = op0;
4041
4042 if (mult != NULL_TREE
4043 && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
4044 && get_shiftadd_cost (expr, mode, cost0, cost1, mult,
4045 speed, &sa_cost))
4046 return sa_cost;
4047 }
4048 break;
4049
4050 CASE_CONVERT:
4051 {
4052 tree inner_mode, outer_mode;
4053 outer_mode = TREE_TYPE (expr);
4054 inner_mode = TREE_TYPE (op0);
4055 cost = new_cost (convert_cost (TYPE_MODE (outer_mode),
4056 TYPE_MODE (inner_mode), speed), 0);
4057 }
4058 break;
4059
4060 case MULT_EXPR:
4061 if (cst_and_fits_in_hwi (op0))
4062 cost = new_cost (mult_by_coeff_cost (int_cst_value (op0),
4063 mode, speed), 0);
4064 else if (cst_and_fits_in_hwi (op1))
4065 cost = new_cost (mult_by_coeff_cost (int_cst_value (op1),
4066 mode, speed), 0);
4067 else
4068 return new_cost (target_spill_cost [speed], 0);
4069 break;
4070
4071 default:
4072 gcc_unreachable ();
4073 }
4074
4075 cost = add_costs (cost, cost0);
4076 cost = add_costs (cost, cost1);
4077
4078 /* Bound the cost by target_spill_cost. The parts of complicated
4079 computations often are either loop invariant or at least can
4080 be shared between several iv uses, so letting this grow without
4081 limits would not give reasonable results. */
4082 if (cost.cost > (int) target_spill_cost [speed])
4083 cost.cost = target_spill_cost [speed];
4084
4085 return cost;
4086 }
4087
4088 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
4089 invariants the computation depends on. */
4090
4091 static comp_cost
4092 force_var_cost (struct ivopts_data *data,
4093 tree expr, bitmap *depends_on)
4094 {
4095 if (depends_on)
4096 {
4097 fd_ivopts_data = data;
4098 walk_tree (&expr, find_depends, depends_on, NULL);
4099 }
4100
4101 return force_expr_to_var_cost (expr, data->speed);
4102 }
4103
4104 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
4105 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
4106 to false if the corresponding part is missing. DEPENDS_ON is a set of the
4107 invariants the computation depends on. */
4108
4109 static comp_cost
4110 split_address_cost (struct ivopts_data *data,
4111 tree addr, bool *symbol_present, bool *var_present,
4112 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4113 {
4114 tree core;
4115 HOST_WIDE_INT bitsize;
4116 HOST_WIDE_INT bitpos;
4117 tree toffset;
4118 machine_mode mode;
4119 int unsignedp, volatilep;
4120
4121 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
4122 &unsignedp, &volatilep, false);
4123
4124 if (toffset != 0
4125 || bitpos % BITS_PER_UNIT != 0
4126 || TREE_CODE (core) != VAR_DECL)
4127 {
4128 *symbol_present = false;
4129 *var_present = true;
4130 fd_ivopts_data = data;
4131 walk_tree (&addr, find_depends, depends_on, NULL);
4132 return new_cost (target_spill_cost[data->speed], 0);
4133 }
4134
4135 *offset += bitpos / BITS_PER_UNIT;
4136 if (TREE_STATIC (core)
4137 || DECL_EXTERNAL (core))
4138 {
4139 *symbol_present = true;
4140 *var_present = false;
4141 return no_cost;
4142 }
4143
4144 *symbol_present = false;
4145 *var_present = true;
4146 return no_cost;
4147 }
4148
4149 /* Estimates cost of expressing difference of addresses E1 - E2 as
4150 var + symbol + offset. The value of offset is added to OFFSET,
4151 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
4152 part is missing. DEPENDS_ON is a set of the invariants the computation
4153 depends on. */
4154
4155 static comp_cost
4156 ptr_difference_cost (struct ivopts_data *data,
4157 tree e1, tree e2, bool *symbol_present, bool *var_present,
4158 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4159 {
4160 HOST_WIDE_INT diff = 0;
4161 aff_tree aff_e1, aff_e2;
4162 tree type;
4163
4164 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
4165
4166 if (ptr_difference_const (e1, e2, &diff))
4167 {
4168 *offset += diff;
4169 *symbol_present = false;
4170 *var_present = false;
4171 return no_cost;
4172 }
4173
4174 if (integer_zerop (e2))
4175 return split_address_cost (data, TREE_OPERAND (e1, 0),
4176 symbol_present, var_present, offset, depends_on);
4177
4178 *symbol_present = false;
4179 *var_present = true;
4180
4181 type = signed_type_for (TREE_TYPE (e1));
4182 tree_to_aff_combination (e1, type, &aff_e1);
4183 tree_to_aff_combination (e2, type, &aff_e2);
4184 aff_combination_scale (&aff_e2, -1);
4185 aff_combination_add (&aff_e1, &aff_e2);
4186
4187 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
4188 }
4189
4190 /* Estimates cost of expressing difference E1 - E2 as
4191 var + symbol + offset. The value of offset is added to OFFSET,
4192 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
4193 part is missing. DEPENDS_ON is a set of the invariants the computation
4194 depends on. */
4195
4196 static comp_cost
4197 difference_cost (struct ivopts_data *data,
4198 tree e1, tree e2, bool *symbol_present, bool *var_present,
4199 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4200 {
4201 machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
4202 unsigned HOST_WIDE_INT off1, off2;
4203 aff_tree aff_e1, aff_e2;
4204 tree type;
4205
4206 e1 = strip_offset (e1, &off1);
4207 e2 = strip_offset (e2, &off2);
4208 *offset += off1 - off2;
4209
4210 STRIP_NOPS (e1);
4211 STRIP_NOPS (e2);
4212
4213 if (TREE_CODE (e1) == ADDR_EXPR)
4214 return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
4215 offset, depends_on);
4216 *symbol_present = false;
4217
4218 if (operand_equal_p (e1, e2, 0))
4219 {
4220 *var_present = false;
4221 return no_cost;
4222 }
4223
4224 *var_present = true;
4225
4226 if (integer_zerop (e2))
4227 return force_var_cost (data, e1, depends_on);
4228
4229 if (integer_zerop (e1))
4230 {
4231 comp_cost cost = force_var_cost (data, e2, depends_on);
4232 cost.cost += mult_by_coeff_cost (-1, mode, data->speed);
4233 return cost;
4234 }
4235
4236 type = signed_type_for (TREE_TYPE (e1));
4237 tree_to_aff_combination (e1, type, &aff_e1);
4238 tree_to_aff_combination (e2, type, &aff_e2);
4239 aff_combination_scale (&aff_e2, -1);
4240 aff_combination_add (&aff_e1, &aff_e2);
4241
4242 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
4243 }
4244
4245 /* Returns true if AFF1 and AFF2 are identical. */
4246
4247 static bool
4248 compare_aff_trees (aff_tree *aff1, aff_tree *aff2)
4249 {
4250 unsigned i;
4251
4252 if (aff1->n != aff2->n)
4253 return false;
4254
4255 for (i = 0; i < aff1->n; i++)
4256 {
4257 if (aff1->elts[i].coef != aff2->elts[i].coef)
4258 return false;
4259
4260 if (!operand_equal_p (aff1->elts[i].val, aff2->elts[i].val, 0))
4261 return false;
4262 }
4263 return true;
4264 }
4265
4266 /* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id. */
4267
4268 static int
4269 get_expr_id (struct ivopts_data *data, tree expr)
4270 {
4271 struct iv_inv_expr_ent ent;
4272 struct iv_inv_expr_ent **slot;
4273
4274 ent.expr = expr;
4275 ent.hash = iterative_hash_expr (expr, 0);
4276 slot = data->inv_expr_tab->find_slot (&ent, INSERT);
4277 if (*slot)
4278 return (*slot)->id;
4279
4280 *slot = XNEW (struct iv_inv_expr_ent);
4281 (*slot)->expr = expr;
4282 (*slot)->hash = ent.hash;
4283 (*slot)->id = data->inv_expr_id++;
4284 return (*slot)->id;
4285 }
4286
4287 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
4288 requires a new compiler generated temporary. Returns -1 otherwise.
4289 ADDRESS_P is a flag indicating if the expression is for address
4290 computation. */
4291
4292 static int
4293 get_loop_invariant_expr_id (struct ivopts_data *data, tree ubase,
4294 tree cbase, HOST_WIDE_INT ratio,
4295 bool address_p)
4296 {
4297 aff_tree ubase_aff, cbase_aff;
4298 tree expr, ub, cb;
4299
4300 STRIP_NOPS (ubase);
4301 STRIP_NOPS (cbase);
4302 ub = ubase;
4303 cb = cbase;
4304
4305 if ((TREE_CODE (ubase) == INTEGER_CST)
4306 && (TREE_CODE (cbase) == INTEGER_CST))
4307 return -1;
4308
4309 /* Strips the constant part. */
4310 if (TREE_CODE (ubase) == PLUS_EXPR
4311 || TREE_CODE (ubase) == MINUS_EXPR
4312 || TREE_CODE (ubase) == POINTER_PLUS_EXPR)
4313 {
4314 if (TREE_CODE (TREE_OPERAND (ubase, 1)) == INTEGER_CST)
4315 ubase = TREE_OPERAND (ubase, 0);
4316 }
4317
4318 /* Strips the constant part. */
4319 if (TREE_CODE (cbase) == PLUS_EXPR
4320 || TREE_CODE (cbase) == MINUS_EXPR
4321 || TREE_CODE (cbase) == POINTER_PLUS_EXPR)
4322 {
4323 if (TREE_CODE (TREE_OPERAND (cbase, 1)) == INTEGER_CST)
4324 cbase = TREE_OPERAND (cbase, 0);
4325 }
4326
4327 if (address_p)
4328 {
4329 if (((TREE_CODE (ubase) == SSA_NAME)
4330 || (TREE_CODE (ubase) == ADDR_EXPR
4331 && is_gimple_min_invariant (ubase)))
4332 && (TREE_CODE (cbase) == INTEGER_CST))
4333 return -1;
4334
4335 if (((TREE_CODE (cbase) == SSA_NAME)
4336 || (TREE_CODE (cbase) == ADDR_EXPR
4337 && is_gimple_min_invariant (cbase)))
4338 && (TREE_CODE (ubase) == INTEGER_CST))
4339 return -1;
4340 }
4341
4342 if (ratio == 1)
4343 {
4344 if (operand_equal_p (ubase, cbase, 0))
4345 return -1;
4346
4347 if (TREE_CODE (ubase) == ADDR_EXPR
4348 && TREE_CODE (cbase) == ADDR_EXPR)
4349 {
4350 tree usym, csym;
4351
4352 usym = TREE_OPERAND (ubase, 0);
4353 csym = TREE_OPERAND (cbase, 0);
4354 if (TREE_CODE (usym) == ARRAY_REF)
4355 {
4356 tree ind = TREE_OPERAND (usym, 1);
4357 if (TREE_CODE (ind) == INTEGER_CST
4358 && tree_fits_shwi_p (ind)
4359 && tree_to_shwi (ind) == 0)
4360 usym = TREE_OPERAND (usym, 0);
4361 }
4362 if (TREE_CODE (csym) == ARRAY_REF)
4363 {
4364 tree ind = TREE_OPERAND (csym, 1);
4365 if (TREE_CODE (ind) == INTEGER_CST
4366 && tree_fits_shwi_p (ind)
4367 && tree_to_shwi (ind) == 0)
4368 csym = TREE_OPERAND (csym, 0);
4369 }
4370 if (operand_equal_p (usym, csym, 0))
4371 return -1;
4372 }
4373 /* Now do more complex comparison */
4374 tree_to_aff_combination (ubase, TREE_TYPE (ubase), &ubase_aff);
4375 tree_to_aff_combination (cbase, TREE_TYPE (cbase), &cbase_aff);
4376 if (compare_aff_trees (&ubase_aff, &cbase_aff))
4377 return -1;
4378 }
4379
4380 tree_to_aff_combination (ub, TREE_TYPE (ub), &ubase_aff);
4381 tree_to_aff_combination (cb, TREE_TYPE (cb), &cbase_aff);
4382
4383 aff_combination_scale (&cbase_aff, -1 * ratio);
4384 aff_combination_add (&ubase_aff, &cbase_aff);
4385 expr = aff_combination_to_tree (&ubase_aff);
4386 return get_expr_id (data, expr);
4387 }
4388
4389
4390
4391 /* Determines the cost of the computation by that USE is expressed
4392 from induction variable CAND. If ADDRESS_P is true, we just need
4393 to create an address from it, otherwise we want to get it into
4394 register. A set of invariants we depend on is stored in
4395 DEPENDS_ON. AT is the statement at that the value is computed.
4396 If CAN_AUTOINC is nonnull, use it to record whether autoinc
4397 addressing is likely. */
4398
4399 static comp_cost
4400 get_computation_cost_at (struct ivopts_data *data,
4401 struct iv_use *use, struct iv_cand *cand,
4402 bool address_p, bitmap *depends_on, gimple at,
4403 bool *can_autoinc,
4404 int *inv_expr_id)
4405 {
4406 tree ubase = use->iv->base, ustep = use->iv->step;
4407 tree cbase, cstep;
4408 tree utype = TREE_TYPE (ubase), ctype;
4409 unsigned HOST_WIDE_INT cstepi, offset = 0;
4410 HOST_WIDE_INT ratio, aratio;
4411 bool var_present, symbol_present, stmt_is_after_inc;
4412 comp_cost cost;
4413 widest_int rat;
4414 bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4415 machine_mode mem_mode = (address_p
4416 ? TYPE_MODE (TREE_TYPE (*use->op_p))
4417 : VOIDmode);
4418
4419 *depends_on = NULL;
4420
4421 /* Only consider real candidates. */
4422 if (!cand->iv)
4423 return infinite_cost;
4424
4425 cbase = cand->iv->base;
4426 cstep = cand->iv->step;
4427 ctype = TREE_TYPE (cbase);
4428
4429 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4430 {
4431 /* We do not have a precision to express the values of use. */
4432 return infinite_cost;
4433 }
4434
4435 if (address_p
4436 || (use->iv->base_object
4437 && cand->iv->base_object
4438 && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4439 && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4440 {
4441 /* Do not try to express address of an object with computation based
4442 on address of a different object. This may cause problems in rtl
4443 level alias analysis (that does not expect this to be happening,
4444 as this is illegal in C), and would be unlikely to be useful
4445 anyway. */
4446 if (use->iv->base_object
4447 && cand->iv->base_object
4448 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4449 return infinite_cost;
4450 }
4451
4452 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4453 {
4454 /* TODO -- add direct handling of this case. */
4455 goto fallback;
4456 }
4457
4458 /* CSTEPI is removed from the offset in case statement is after the
4459 increment. If the step is not constant, we use zero instead.
4460 This is a bit imprecise (there is the extra addition), but
4461 redundancy elimination is likely to transform the code so that
4462 it uses value of the variable before increment anyway,
4463 so it is not that much unrealistic. */
4464 if (cst_and_fits_in_hwi (cstep))
4465 cstepi = int_cst_value (cstep);
4466 else
4467 cstepi = 0;
4468
4469 if (!constant_multiple_of (ustep, cstep, &rat))
4470 return infinite_cost;
4471
4472 if (wi::fits_shwi_p (rat))
4473 ratio = rat.to_shwi ();
4474 else
4475 return infinite_cost;
4476
4477 STRIP_NOPS (cbase);
4478 ctype = TREE_TYPE (cbase);
4479
4480 stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
4481
4482 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
4483 or ratio == 1, it is better to handle this like
4484
4485 ubase - ratio * cbase + ratio * var
4486
4487 (also holds in the case ratio == -1, TODO. */
4488
4489 if (cst_and_fits_in_hwi (cbase))
4490 {
4491 offset = - ratio * (unsigned HOST_WIDE_INT) int_cst_value (cbase);
4492 cost = difference_cost (data,
4493 ubase, build_int_cst (utype, 0),
4494 &symbol_present, &var_present, &offset,
4495 depends_on);
4496 cost.cost /= avg_loop_niter (data->current_loop);
4497 }
4498 else if (ratio == 1)
4499 {
4500 tree real_cbase = cbase;
4501
4502 /* Check to see if any adjustment is needed. */
4503 if (cstepi == 0 && stmt_is_after_inc)
4504 {
4505 aff_tree real_cbase_aff;
4506 aff_tree cstep_aff;
4507
4508 tree_to_aff_combination (cbase, TREE_TYPE (real_cbase),
4509 &real_cbase_aff);
4510 tree_to_aff_combination (cstep, TREE_TYPE (cstep), &cstep_aff);
4511
4512 aff_combination_add (&real_cbase_aff, &cstep_aff);
4513 real_cbase = aff_combination_to_tree (&real_cbase_aff);
4514 }
4515
4516 cost = difference_cost (data,
4517 ubase, real_cbase,
4518 &symbol_present, &var_present, &offset,
4519 depends_on);
4520 cost.cost /= avg_loop_niter (data->current_loop);
4521 }
4522 else if (address_p
4523 && !POINTER_TYPE_P (ctype)
4524 && multiplier_allowed_in_address_p
4525 (ratio, mem_mode,
4526 TYPE_ADDR_SPACE (TREE_TYPE (utype))))
4527 {
4528 cbase
4529 = fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio));
4530 cost = difference_cost (data,
4531 ubase, cbase,
4532 &symbol_present, &var_present, &offset,
4533 depends_on);
4534 cost.cost /= avg_loop_niter (data->current_loop);
4535 }
4536 else
4537 {
4538 cost = force_var_cost (data, cbase, depends_on);
4539 cost = add_costs (cost,
4540 difference_cost (data,
4541 ubase, build_int_cst (utype, 0),
4542 &symbol_present, &var_present,
4543 &offset, depends_on));
4544 cost.cost /= avg_loop_niter (data->current_loop);
4545 cost.cost += add_cost (data->speed, TYPE_MODE (ctype));
4546 }
4547
4548 /* Set of invariants depended on by sub use has already been computed
4549 for the first use in the group. */
4550 if (use->sub_id)
4551 {
4552 cost.cost = 0;
4553 if (depends_on && *depends_on)
4554 bitmap_clear (*depends_on);
4555 }
4556 else if (inv_expr_id)
4557 {
4558 *inv_expr_id =
4559 get_loop_invariant_expr_id (data, ubase, cbase, ratio, address_p);
4560 /* Clear depends on. */
4561 if (*inv_expr_id != -1 && depends_on && *depends_on)
4562 bitmap_clear (*depends_on);
4563 }
4564
4565 /* If we are after the increment, the value of the candidate is higher by
4566 one iteration. */
4567 if (stmt_is_after_inc)
4568 offset -= ratio * cstepi;
4569
4570 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4571 (symbol/var1/const parts may be omitted). If we are looking for an
4572 address, find the cost of addressing this. */
4573 if (address_p)
4574 return add_costs (cost,
4575 get_address_cost (symbol_present, var_present,
4576 offset, ratio, cstepi,
4577 mem_mode,
4578 TYPE_ADDR_SPACE (TREE_TYPE (utype)),
4579 speed, stmt_is_after_inc,
4580 can_autoinc));
4581
4582 /* Otherwise estimate the costs for computing the expression. */
4583 if (!symbol_present && !var_present && !offset)
4584 {
4585 if (ratio != 1)
4586 cost.cost += mult_by_coeff_cost (ratio, TYPE_MODE (ctype), speed);
4587 return cost;
4588 }
4589
4590 /* Symbol + offset should be compile-time computable so consider that they
4591 are added once to the variable, if present. */
4592 if (var_present && (symbol_present || offset))
4593 cost.cost += adjust_setup_cost (data,
4594 add_cost (speed, TYPE_MODE (ctype)));
4595
4596 /* Having offset does not affect runtime cost in case it is added to
4597 symbol, but it increases complexity. */
4598 if (offset)
4599 cost.complexity++;
4600
4601 cost.cost += add_cost (speed, TYPE_MODE (ctype));
4602
4603 aratio = ratio > 0 ? ratio : -ratio;
4604 if (aratio != 1)
4605 cost.cost += mult_by_coeff_cost (aratio, TYPE_MODE (ctype), speed);
4606 return cost;
4607
4608 fallback:
4609 if (can_autoinc)
4610 *can_autoinc = false;
4611
4612 {
4613 /* Just get the expression, expand it and measure the cost. */
4614 tree comp = get_computation_at (data->current_loop, use, cand, at);
4615
4616 if (!comp)
4617 return infinite_cost;
4618
4619 if (address_p)
4620 comp = build_simple_mem_ref (comp);
4621
4622 return new_cost (computation_cost (comp, speed), 0);
4623 }
4624 }
4625
4626 /* Determines the cost of the computation by that USE is expressed
4627 from induction variable CAND. If ADDRESS_P is true, we just need
4628 to create an address from it, otherwise we want to get it into
4629 register. A set of invariants we depend on is stored in
4630 DEPENDS_ON. If CAN_AUTOINC is nonnull, use it to record whether
4631 autoinc addressing is likely. */
4632
4633 static comp_cost
4634 get_computation_cost (struct ivopts_data *data,
4635 struct iv_use *use, struct iv_cand *cand,
4636 bool address_p, bitmap *depends_on,
4637 bool *can_autoinc, int *inv_expr_id)
4638 {
4639 return get_computation_cost_at (data,
4640 use, cand, address_p, depends_on, use->stmt,
4641 can_autoinc, inv_expr_id);
4642 }
4643
4644 /* Determines cost of basing replacement of USE on CAND in a generic
4645 expression. */
4646
4647 static bool
4648 determine_use_iv_cost_generic (struct ivopts_data *data,
4649 struct iv_use *use, struct iv_cand *cand)
4650 {
4651 bitmap depends_on;
4652 comp_cost cost;
4653 int inv_expr_id = -1;
4654
4655 /* The simple case first -- if we need to express value of the preserved
4656 original biv, the cost is 0. This also prevents us from counting the
4657 cost of increment twice -- once at this use and once in the cost of
4658 the candidate. */
4659 if (cand->pos == IP_ORIGINAL
4660 && cand->incremented_at == use->stmt)
4661 {
4662 set_use_iv_cost (data, use, cand, no_cost, NULL, NULL_TREE,
4663 ERROR_MARK, -1);
4664 return true;
4665 }
4666
4667 cost = get_computation_cost (data, use, cand, false, &depends_on,
4668 NULL, &inv_expr_id);
4669
4670 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4671 inv_expr_id);
4672
4673 return !infinite_cost_p (cost);
4674 }
4675
4676 /* Determines cost of basing replacement of USE on CAND in an address. */
4677
4678 static bool
4679 determine_use_iv_cost_address (struct ivopts_data *data,
4680 struct iv_use *use, struct iv_cand *cand)
4681 {
4682 bitmap depends_on;
4683 bool can_autoinc;
4684 int inv_expr_id = -1;
4685 struct iv_use *sub_use;
4686 comp_cost sub_cost;
4687 comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
4688 &can_autoinc, &inv_expr_id);
4689
4690 if (cand->ainc_use == use)
4691 {
4692 if (can_autoinc)
4693 cost.cost -= cand->cost_step;
4694 /* If we generated the candidate solely for exploiting autoincrement
4695 opportunities, and it turns out it can't be used, set the cost to
4696 infinity to make sure we ignore it. */
4697 else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
4698 cost = infinite_cost;
4699 }
4700 for (sub_use = use->next;
4701 sub_use && !infinite_cost_p (cost);
4702 sub_use = sub_use->next)
4703 {
4704 sub_cost = get_computation_cost (data, sub_use, cand, true, &depends_on,
4705 &can_autoinc, &inv_expr_id);
4706 cost = add_costs (cost, sub_cost);
4707 }
4708
4709 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4710 inv_expr_id);
4711
4712 return !infinite_cost_p (cost);
4713 }
4714
4715 /* Computes value of candidate CAND at position AT in iteration NITER, and
4716 stores it to VAL. */
4717
4718 static void
4719 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple at, tree niter,
4720 aff_tree *val)
4721 {
4722 aff_tree step, delta, nit;
4723 struct iv *iv = cand->iv;
4724 tree type = TREE_TYPE (iv->base);
4725 tree steptype = type;
4726 if (POINTER_TYPE_P (type))
4727 steptype = sizetype;
4728 steptype = unsigned_type_for (type);
4729
4730 tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step);
4731 aff_combination_convert (&step, steptype);
4732 tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
4733 aff_combination_convert (&nit, steptype);
4734 aff_combination_mult (&nit, &step, &delta);
4735 if (stmt_after_increment (loop, cand, at))
4736 aff_combination_add (&delta, &step);
4737
4738 tree_to_aff_combination (iv->base, type, val);
4739 if (!POINTER_TYPE_P (type))
4740 aff_combination_convert (val, steptype);
4741 aff_combination_add (val, &delta);
4742 }
4743
4744 /* Returns period of induction variable iv. */
4745
4746 static tree
4747 iv_period (struct iv *iv)
4748 {
4749 tree step = iv->step, period, type;
4750 tree pow2div;
4751
4752 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4753
4754 type = unsigned_type_for (TREE_TYPE (step));
4755 /* Period of the iv is lcm (step, type_range)/step -1,
4756 i.e., N*type_range/step - 1. Since type range is power
4757 of two, N == (step >> num_of_ending_zeros_binary (step),
4758 so the final result is
4759
4760 (type_range >> num_of_ending_zeros_binary (step)) - 1
4761
4762 */
4763 pow2div = num_ending_zeros (step);
4764
4765 period = build_low_bits_mask (type,
4766 (TYPE_PRECISION (type)
4767 - tree_to_uhwi (pow2div)));
4768
4769 return period;
4770 }
4771
4772 /* Returns the comparison operator used when eliminating the iv USE. */
4773
4774 static enum tree_code
4775 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4776 {
4777 struct loop *loop = data->current_loop;
4778 basic_block ex_bb;
4779 edge exit;
4780
4781 ex_bb = gimple_bb (use->stmt);
4782 exit = EDGE_SUCC (ex_bb, 0);
4783 if (flow_bb_inside_loop_p (loop, exit->dest))
4784 exit = EDGE_SUCC (ex_bb, 1);
4785
4786 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4787 }
4788
4789 /* Returns true if we can prove that BASE - OFFSET does not overflow. For now,
4790 we only detect the situation that BASE = SOMETHING + OFFSET, where the
4791 calculation is performed in non-wrapping type.
4792
4793 TODO: More generally, we could test for the situation that
4794 BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
4795 This would require knowing the sign of OFFSET. */
4796
4797 static bool
4798 difference_cannot_overflow_p (struct ivopts_data *data, tree base, tree offset)
4799 {
4800 enum tree_code code;
4801 tree e1, e2;
4802 aff_tree aff_e1, aff_e2, aff_offset;
4803
4804 if (!nowrap_type_p (TREE_TYPE (base)))
4805 return false;
4806
4807 base = expand_simple_operations (base);
4808
4809 if (TREE_CODE (base) == SSA_NAME)
4810 {
4811 gimple stmt = SSA_NAME_DEF_STMT (base);
4812
4813 if (gimple_code (stmt) != GIMPLE_ASSIGN)
4814 return false;
4815
4816 code = gimple_assign_rhs_code (stmt);
4817 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4818 return false;
4819
4820 e1 = gimple_assign_rhs1 (stmt);
4821 e2 = gimple_assign_rhs2 (stmt);
4822 }
4823 else
4824 {
4825 code = TREE_CODE (base);
4826 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4827 return false;
4828 e1 = TREE_OPERAND (base, 0);
4829 e2 = TREE_OPERAND (base, 1);
4830 }
4831
4832 /* Use affine expansion as deeper inspection to prove the equality. */
4833 tree_to_aff_combination_expand (e2, TREE_TYPE (e2),
4834 &aff_e2, &data->name_expansion_cache);
4835 tree_to_aff_combination_expand (offset, TREE_TYPE (offset),
4836 &aff_offset, &data->name_expansion_cache);
4837 aff_combination_scale (&aff_offset, -1);
4838 switch (code)
4839 {
4840 case PLUS_EXPR:
4841 aff_combination_add (&aff_e2, &aff_offset);
4842 if (aff_combination_zero_p (&aff_e2))
4843 return true;
4844
4845 tree_to_aff_combination_expand (e1, TREE_TYPE (e1),
4846 &aff_e1, &data->name_expansion_cache);
4847 aff_combination_add (&aff_e1, &aff_offset);
4848 return aff_combination_zero_p (&aff_e1);
4849
4850 case POINTER_PLUS_EXPR:
4851 aff_combination_add (&aff_e2, &aff_offset);
4852 return aff_combination_zero_p (&aff_e2);
4853
4854 default:
4855 return false;
4856 }
4857 }
4858
4859 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
4860 comparison with CAND. NITER describes the number of iterations of
4861 the loops. If successful, the comparison in COMP_P is altered accordingly.
4862
4863 We aim to handle the following situation:
4864
4865 sometype *base, *p;
4866 int a, b, i;
4867
4868 i = a;
4869 p = p_0 = base + a;
4870
4871 do
4872 {
4873 bla (*p);
4874 p++;
4875 i++;
4876 }
4877 while (i < b);
4878
4879 Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
4880 We aim to optimize this to
4881
4882 p = p_0 = base + a;
4883 do
4884 {
4885 bla (*p);
4886 p++;
4887 }
4888 while (p < p_0 - a + b);
4889
4890 This preserves the correctness, since the pointer arithmetics does not
4891 overflow. More precisely:
4892
4893 1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
4894 overflow in computing it or the values of p.
4895 2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
4896 overflow. To prove this, we use the fact that p_0 = base + a. */
4897
4898 static bool
4899 iv_elimination_compare_lt (struct ivopts_data *data,
4900 struct iv_cand *cand, enum tree_code *comp_p,
4901 struct tree_niter_desc *niter)
4902 {
4903 tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
4904 struct aff_tree nit, tmpa, tmpb;
4905 enum tree_code comp;
4906 HOST_WIDE_INT step;
4907
4908 /* We need to know that the candidate induction variable does not overflow.
4909 While more complex analysis may be used to prove this, for now just
4910 check that the variable appears in the original program and that it
4911 is computed in a type that guarantees no overflows. */
4912 cand_type = TREE_TYPE (cand->iv->base);
4913 if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
4914 return false;
4915
4916 /* Make sure that the loop iterates till the loop bound is hit, as otherwise
4917 the calculation of the BOUND could overflow, making the comparison
4918 invalid. */
4919 if (!data->loop_single_exit_p)
4920 return false;
4921
4922 /* We need to be able to decide whether candidate is increasing or decreasing
4923 in order to choose the right comparison operator. */
4924 if (!cst_and_fits_in_hwi (cand->iv->step))
4925 return false;
4926 step = int_cst_value (cand->iv->step);
4927
4928 /* Check that the number of iterations matches the expected pattern:
4929 a + 1 > b ? 0 : b - a - 1. */
4930 mbz = niter->may_be_zero;
4931 if (TREE_CODE (mbz) == GT_EXPR)
4932 {
4933 /* Handle a + 1 > b. */
4934 tree op0 = TREE_OPERAND (mbz, 0);
4935 if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
4936 {
4937 a = TREE_OPERAND (op0, 0);
4938 b = TREE_OPERAND (mbz, 1);
4939 }
4940 else
4941 return false;
4942 }
4943 else if (TREE_CODE (mbz) == LT_EXPR)
4944 {
4945 tree op1 = TREE_OPERAND (mbz, 1);
4946
4947 /* Handle b < a + 1. */
4948 if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
4949 {
4950 a = TREE_OPERAND (op1, 0);
4951 b = TREE_OPERAND (mbz, 0);
4952 }
4953 else
4954 return false;
4955 }
4956 else
4957 return false;
4958
4959 /* Expected number of iterations is B - A - 1. Check that it matches
4960 the actual number, i.e., that B - A - NITER = 1. */
4961 tree_to_aff_combination (niter->niter, nit_type, &nit);
4962 tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
4963 tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
4964 aff_combination_scale (&nit, -1);
4965 aff_combination_scale (&tmpa, -1);
4966 aff_combination_add (&tmpb, &tmpa);
4967 aff_combination_add (&tmpb, &nit);
4968 if (tmpb.n != 0 || tmpb.offset != 1)
4969 return false;
4970
4971 /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
4972 overflow. */
4973 offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
4974 cand->iv->step,
4975 fold_convert (TREE_TYPE (cand->iv->step), a));
4976 if (!difference_cannot_overflow_p (data, cand->iv->base, offset))
4977 return false;
4978
4979 /* Determine the new comparison operator. */
4980 comp = step < 0 ? GT_EXPR : LT_EXPR;
4981 if (*comp_p == NE_EXPR)
4982 *comp_p = comp;
4983 else if (*comp_p == EQ_EXPR)
4984 *comp_p = invert_tree_comparison (comp, false);
4985 else
4986 gcc_unreachable ();
4987
4988 return true;
4989 }
4990
4991 /* Check whether it is possible to express the condition in USE by comparison
4992 of candidate CAND. If so, store the value compared with to BOUND, and the
4993 comparison operator to COMP. */
4994
4995 static bool
4996 may_eliminate_iv (struct ivopts_data *data,
4997 struct iv_use *use, struct iv_cand *cand, tree *bound,
4998 enum tree_code *comp)
4999 {
5000 basic_block ex_bb;
5001 edge exit;
5002 tree period;
5003 struct loop *loop = data->current_loop;
5004 aff_tree bnd;
5005 struct tree_niter_desc *desc = NULL;
5006
5007 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
5008 return false;
5009
5010 /* For now works only for exits that dominate the loop latch.
5011 TODO: extend to other conditions inside loop body. */
5012 ex_bb = gimple_bb (use->stmt);
5013 if (use->stmt != last_stmt (ex_bb)
5014 || gimple_code (use->stmt) != GIMPLE_COND
5015 || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
5016 return false;
5017
5018 exit = EDGE_SUCC (ex_bb, 0);
5019 if (flow_bb_inside_loop_p (loop, exit->dest))
5020 exit = EDGE_SUCC (ex_bb, 1);
5021 if (flow_bb_inside_loop_p (loop, exit->dest))
5022 return false;
5023
5024 desc = niter_for_exit (data, exit);
5025 if (!desc)
5026 return false;
5027
5028 /* Determine whether we can use the variable to test the exit condition.
5029 This is the case iff the period of the induction variable is greater
5030 than the number of iterations for which the exit condition is true. */
5031 period = iv_period (cand->iv);
5032
5033 /* If the number of iterations is constant, compare against it directly. */
5034 if (TREE_CODE (desc->niter) == INTEGER_CST)
5035 {
5036 /* See cand_value_at. */
5037 if (stmt_after_increment (loop, cand, use->stmt))
5038 {
5039 if (!tree_int_cst_lt (desc->niter, period))
5040 return false;
5041 }
5042 else
5043 {
5044 if (tree_int_cst_lt (period, desc->niter))
5045 return false;
5046 }
5047 }
5048
5049 /* If not, and if this is the only possible exit of the loop, see whether
5050 we can get a conservative estimate on the number of iterations of the
5051 entire loop and compare against that instead. */
5052 else
5053 {
5054 widest_int period_value, max_niter;
5055
5056 max_niter = desc->max;
5057 if (stmt_after_increment (loop, cand, use->stmt))
5058 max_niter += 1;
5059 period_value = wi::to_widest (period);
5060 if (wi::gtu_p (max_niter, period_value))
5061 {
5062 /* See if we can take advantage of inferred loop bound information. */
5063 if (data->loop_single_exit_p)
5064 {
5065 if (!max_loop_iterations (loop, &max_niter))
5066 return false;
5067 /* The loop bound is already adjusted by adding 1. */
5068 if (wi::gtu_p (max_niter, period_value))
5069 return false;
5070 }
5071 else
5072 return false;
5073 }
5074 }
5075
5076 cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
5077
5078 *bound = fold_convert (TREE_TYPE (cand->iv->base),
5079 aff_combination_to_tree (&bnd));
5080 *comp = iv_elimination_compare (data, use);
5081
5082 /* It is unlikely that computing the number of iterations using division
5083 would be more profitable than keeping the original induction variable. */
5084 if (expression_expensive_p (*bound))
5085 return false;
5086
5087 /* Sometimes, it is possible to handle the situation that the number of
5088 iterations may be zero unless additional assumtions by using <
5089 instead of != in the exit condition.
5090
5091 TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
5092 base the exit condition on it. However, that is often too
5093 expensive. */
5094 if (!integer_zerop (desc->may_be_zero))
5095 return iv_elimination_compare_lt (data, cand, comp, desc);
5096
5097 return true;
5098 }
5099
5100 /* Calculates the cost of BOUND, if it is a PARM_DECL. A PARM_DECL must
5101 be copied, if it is used in the loop body and DATA->body_includes_call. */
5102
5103 static int
5104 parm_decl_cost (struct ivopts_data *data, tree bound)
5105 {
5106 tree sbound = bound;
5107 STRIP_NOPS (sbound);
5108
5109 if (TREE_CODE (sbound) == SSA_NAME
5110 && SSA_NAME_IS_DEFAULT_DEF (sbound)
5111 && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
5112 && data->body_includes_call)
5113 return COSTS_N_INSNS (1);
5114
5115 return 0;
5116 }
5117
5118 /* Determines cost of basing replacement of USE on CAND in a condition. */
5119
5120 static bool
5121 determine_use_iv_cost_condition (struct ivopts_data *data,
5122 struct iv_use *use, struct iv_cand *cand)
5123 {
5124 tree bound = NULL_TREE;
5125 struct iv *cmp_iv;
5126 bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
5127 comp_cost elim_cost, express_cost, cost, bound_cost;
5128 bool ok;
5129 int elim_inv_expr_id = -1, express_inv_expr_id = -1, inv_expr_id;
5130 tree *control_var, *bound_cst;
5131 enum tree_code comp = ERROR_MARK;
5132
5133 /* Only consider real candidates. */
5134 if (!cand->iv)
5135 {
5136 set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE,
5137 ERROR_MARK, -1);
5138 return false;
5139 }
5140
5141 /* Try iv elimination. */
5142 if (may_eliminate_iv (data, use, cand, &bound, &comp))
5143 {
5144 elim_cost = force_var_cost (data, bound, &depends_on_elim);
5145 if (elim_cost.cost == 0)
5146 elim_cost.cost = parm_decl_cost (data, bound);
5147 else if (TREE_CODE (bound) == INTEGER_CST)
5148 elim_cost.cost = 0;
5149 /* If we replace a loop condition 'i < n' with 'p < base + n',
5150 depends_on_elim will have 'base' and 'n' set, which implies
5151 that both 'base' and 'n' will be live during the loop. More likely,
5152 'base + n' will be loop invariant, resulting in only one live value
5153 during the loop. So in that case we clear depends_on_elim and set
5154 elim_inv_expr_id instead. */
5155 if (depends_on_elim && bitmap_count_bits (depends_on_elim) > 1)
5156 {
5157 elim_inv_expr_id = get_expr_id (data, bound);
5158 bitmap_clear (depends_on_elim);
5159 }
5160 /* The bound is a loop invariant, so it will be only computed
5161 once. */
5162 elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
5163 }
5164 else
5165 elim_cost = infinite_cost;
5166
5167 /* Try expressing the original giv. If it is compared with an invariant,
5168 note that we cannot get rid of it. */
5169 ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
5170 NULL, &cmp_iv);
5171 gcc_assert (ok);
5172
5173 /* When the condition is a comparison of the candidate IV against
5174 zero, prefer this IV.
5175
5176 TODO: The constant that we're subtracting from the cost should
5177 be target-dependent. This information should be added to the
5178 target costs for each backend. */
5179 if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */
5180 && integer_zerop (*bound_cst)
5181 && (operand_equal_p (*control_var, cand->var_after, 0)
5182 || operand_equal_p (*control_var, cand->var_before, 0)))
5183 elim_cost.cost -= 1;
5184
5185 express_cost = get_computation_cost (data, use, cand, false,
5186 &depends_on_express, NULL,
5187 &express_inv_expr_id);
5188 fd_ivopts_data = data;
5189 walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
5190
5191 /* Count the cost of the original bound as well. */
5192 bound_cost = force_var_cost (data, *bound_cst, NULL);
5193 if (bound_cost.cost == 0)
5194 bound_cost.cost = parm_decl_cost (data, *bound_cst);
5195 else if (TREE_CODE (*bound_cst) == INTEGER_CST)
5196 bound_cost.cost = 0;
5197 express_cost.cost += bound_cost.cost;
5198
5199 /* Choose the better approach, preferring the eliminated IV. */
5200 if (compare_costs (elim_cost, express_cost) <= 0)
5201 {
5202 cost = elim_cost;
5203 depends_on = depends_on_elim;
5204 depends_on_elim = NULL;
5205 inv_expr_id = elim_inv_expr_id;
5206 }
5207 else
5208 {
5209 cost = express_cost;
5210 depends_on = depends_on_express;
5211 depends_on_express = NULL;
5212 bound = NULL_TREE;
5213 comp = ERROR_MARK;
5214 inv_expr_id = express_inv_expr_id;
5215 }
5216
5217 set_use_iv_cost (data, use, cand, cost, depends_on, bound, comp, inv_expr_id);
5218
5219 if (depends_on_elim)
5220 BITMAP_FREE (depends_on_elim);
5221 if (depends_on_express)
5222 BITMAP_FREE (depends_on_express);
5223
5224 return !infinite_cost_p (cost);
5225 }
5226
5227 /* Determines cost of basing replacement of USE on CAND. Returns false
5228 if USE cannot be based on CAND. */
5229
5230 static bool
5231 determine_use_iv_cost (struct ivopts_data *data,
5232 struct iv_use *use, struct iv_cand *cand)
5233 {
5234 switch (use->type)
5235 {
5236 case USE_NONLINEAR_EXPR:
5237 return determine_use_iv_cost_generic (data, use, cand);
5238
5239 case USE_ADDRESS:
5240 return determine_use_iv_cost_address (data, use, cand);
5241
5242 case USE_COMPARE:
5243 return determine_use_iv_cost_condition (data, use, cand);
5244
5245 default:
5246 gcc_unreachable ();
5247 }
5248 }
5249
5250 /* Return true if get_computation_cost indicates that autoincrement is
5251 a possibility for the pair of USE and CAND, false otherwise. */
5252
5253 static bool
5254 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
5255 struct iv_cand *cand)
5256 {
5257 bitmap depends_on;
5258 bool can_autoinc;
5259 comp_cost cost;
5260
5261 if (use->type != USE_ADDRESS)
5262 return false;
5263
5264 cost = get_computation_cost (data, use, cand, true, &depends_on,
5265 &can_autoinc, NULL);
5266
5267 BITMAP_FREE (depends_on);
5268
5269 return !infinite_cost_p (cost) && can_autoinc;
5270 }
5271
5272 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
5273 use that allows autoincrement, and set their AINC_USE if possible. */
5274
5275 static void
5276 set_autoinc_for_original_candidates (struct ivopts_data *data)
5277 {
5278 unsigned i, j;
5279
5280 for (i = 0; i < n_iv_cands (data); i++)
5281 {
5282 struct iv_cand *cand = iv_cand (data, i);
5283 struct iv_use *closest_before = NULL;
5284 struct iv_use *closest_after = NULL;
5285 if (cand->pos != IP_ORIGINAL)
5286 continue;
5287
5288 for (j = 0; j < n_iv_uses (data); j++)
5289 {
5290 struct iv_use *use = iv_use (data, j);
5291 unsigned uid = gimple_uid (use->stmt);
5292
5293 if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
5294 continue;
5295
5296 if (uid < gimple_uid (cand->incremented_at)
5297 && (closest_before == NULL
5298 || uid > gimple_uid (closest_before->stmt)))
5299 closest_before = use;
5300
5301 if (uid > gimple_uid (cand->incremented_at)
5302 && (closest_after == NULL
5303 || uid < gimple_uid (closest_after->stmt)))
5304 closest_after = use;
5305 }
5306
5307 if (closest_before != NULL
5308 && autoinc_possible_for_pair (data, closest_before, cand))
5309 cand->ainc_use = closest_before;
5310 else if (closest_after != NULL
5311 && autoinc_possible_for_pair (data, closest_after, cand))
5312 cand->ainc_use = closest_after;
5313 }
5314 }
5315
5316 /* Finds the candidates for the induction variables. */
5317
5318 static void
5319 find_iv_candidates (struct ivopts_data *data)
5320 {
5321 /* Add commonly used ivs. */
5322 add_standard_iv_candidates (data);
5323
5324 /* Add old induction variables. */
5325 add_iv_candidate_for_bivs (data);
5326
5327 /* Add induction variables derived from uses. */
5328 add_iv_candidate_for_uses (data);
5329
5330 set_autoinc_for_original_candidates (data);
5331
5332 /* Record the important candidates. */
5333 record_important_candidates (data);
5334 }
5335
5336 /* Determines costs of basing the use of the iv on an iv candidate. */
5337
5338 static void
5339 determine_use_iv_costs (struct ivopts_data *data)
5340 {
5341 unsigned i, j;
5342 struct iv_use *use;
5343 struct iv_cand *cand;
5344 bitmap to_clear = BITMAP_ALLOC (NULL);
5345
5346 alloc_use_cost_map (data);
5347
5348 for (i = 0; i < n_iv_uses (data); i++)
5349 {
5350 use = iv_use (data, i);
5351
5352 if (data->consider_all_candidates)
5353 {
5354 for (j = 0; j < n_iv_cands (data); j++)
5355 {
5356 cand = iv_cand (data, j);
5357 determine_use_iv_cost (data, use, cand);
5358 }
5359 }
5360 else
5361 {
5362 bitmap_iterator bi;
5363
5364 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
5365 {
5366 cand = iv_cand (data, j);
5367 if (!determine_use_iv_cost (data, use, cand))
5368 bitmap_set_bit (to_clear, j);
5369 }
5370
5371 /* Remove the candidates for that the cost is infinite from
5372 the list of related candidates. */
5373 bitmap_and_compl_into (use->related_cands, to_clear);
5374 bitmap_clear (to_clear);
5375 }
5376 }
5377
5378 BITMAP_FREE (to_clear);
5379
5380 if (dump_file && (dump_flags & TDF_DETAILS))
5381 {
5382 fprintf (dump_file, "Use-candidate costs:\n");
5383
5384 for (i = 0; i < n_iv_uses (data); i++)
5385 {
5386 use = iv_use (data, i);
5387
5388 fprintf (dump_file, "Use %d:\n", i);
5389 fprintf (dump_file, " cand\tcost\tcompl.\tdepends on\n");
5390 for (j = 0; j < use->n_map_members; j++)
5391 {
5392 if (!use->cost_map[j].cand
5393 || infinite_cost_p (use->cost_map[j].cost))
5394 continue;
5395
5396 fprintf (dump_file, " %d\t%d\t%d\t",
5397 use->cost_map[j].cand->id,
5398 use->cost_map[j].cost.cost,
5399 use->cost_map[j].cost.complexity);
5400 if (use->cost_map[j].depends_on)
5401 bitmap_print (dump_file,
5402 use->cost_map[j].depends_on, "","");
5403 if (use->cost_map[j].inv_expr_id != -1)
5404 fprintf (dump_file, " inv_expr:%d", use->cost_map[j].inv_expr_id);
5405 fprintf (dump_file, "\n");
5406 }
5407
5408 fprintf (dump_file, "\n");
5409 }
5410 fprintf (dump_file, "\n");
5411 }
5412 }
5413
5414 /* Determines cost of the candidate CAND. */
5415
5416 static void
5417 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5418 {
5419 comp_cost cost_base;
5420 unsigned cost, cost_step;
5421 tree base;
5422
5423 if (!cand->iv)
5424 {
5425 cand->cost = 0;
5426 return;
5427 }
5428
5429 /* There are two costs associated with the candidate -- its increment
5430 and its initialization. The second is almost negligible for any loop
5431 that rolls enough, so we take it just very little into account. */
5432
5433 base = cand->iv->base;
5434 cost_base = force_var_cost (data, base, NULL);
5435 /* It will be exceptional that the iv register happens to be initialized with
5436 the proper value at no cost. In general, there will at least be a regcopy
5437 or a const set. */
5438 if (cost_base.cost == 0)
5439 cost_base.cost = COSTS_N_INSNS (1);
5440 cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
5441
5442 cost = cost_step + adjust_setup_cost (data, cost_base.cost);
5443
5444 /* Prefer the original ivs unless we may gain something by replacing it.
5445 The reason is to make debugging simpler; so this is not relevant for
5446 artificial ivs created by other optimization passes. */
5447 if (cand->pos != IP_ORIGINAL
5448 || !SSA_NAME_VAR (cand->var_before)
5449 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
5450 cost++;
5451
5452 /* Prefer not to insert statements into latch unless there are some
5453 already (so that we do not create unnecessary jumps). */
5454 if (cand->pos == IP_END
5455 && empty_block_p (ip_end_pos (data->current_loop)))
5456 cost++;
5457
5458 cand->cost = cost;
5459 cand->cost_step = cost_step;
5460 }
5461
5462 /* Determines costs of computation of the candidates. */
5463
5464 static void
5465 determine_iv_costs (struct ivopts_data *data)
5466 {
5467 unsigned i;
5468
5469 if (dump_file && (dump_flags & TDF_DETAILS))
5470 {
5471 fprintf (dump_file, "Candidate costs:\n");
5472 fprintf (dump_file, " cand\tcost\n");
5473 }
5474
5475 for (i = 0; i < n_iv_cands (data); i++)
5476 {
5477 struct iv_cand *cand = iv_cand (data, i);
5478
5479 determine_iv_cost (data, cand);
5480
5481 if (dump_file && (dump_flags & TDF_DETAILS))
5482 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
5483 }
5484
5485 if (dump_file && (dump_flags & TDF_DETAILS))
5486 fprintf (dump_file, "\n");
5487 }
5488
5489 /* Calculates cost for having SIZE induction variables. */
5490
5491 static unsigned
5492 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
5493 {
5494 /* We add size to the cost, so that we prefer eliminating ivs
5495 if possible. */
5496 return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed,
5497 data->body_includes_call);
5498 }
5499
5500 /* For each size of the induction variable set determine the penalty. */
5501
5502 static void
5503 determine_set_costs (struct ivopts_data *data)
5504 {
5505 unsigned j, n;
5506 gphi *phi;
5507 gphi_iterator psi;
5508 tree op;
5509 struct loop *loop = data->current_loop;
5510 bitmap_iterator bi;
5511
5512 if (dump_file && (dump_flags & TDF_DETAILS))
5513 {
5514 fprintf (dump_file, "Global costs:\n");
5515 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
5516 fprintf (dump_file, " target_clobbered_regs %d\n", target_clobbered_regs);
5517 fprintf (dump_file, " target_reg_cost %d\n", target_reg_cost[data->speed]);
5518 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost[data->speed]);
5519 }
5520
5521 n = 0;
5522 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
5523 {
5524 phi = psi.phi ();
5525 op = PHI_RESULT (phi);
5526
5527 if (virtual_operand_p (op))
5528 continue;
5529
5530 if (get_iv (data, op))
5531 continue;
5532
5533 n++;
5534 }
5535
5536 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5537 {
5538 struct version_info *info = ver_info (data, j);
5539
5540 if (info->inv_id && info->has_nonlin_use)
5541 n++;
5542 }
5543
5544 data->regs_used = n;
5545 if (dump_file && (dump_flags & TDF_DETAILS))
5546 fprintf (dump_file, " regs_used %d\n", n);
5547
5548 if (dump_file && (dump_flags & TDF_DETAILS))
5549 {
5550 fprintf (dump_file, " cost for size:\n");
5551 fprintf (dump_file, " ivs\tcost\n");
5552 for (j = 0; j <= 2 * target_avail_regs; j++)
5553 fprintf (dump_file, " %d\t%d\n", j,
5554 ivopts_global_cost_for_size (data, j));
5555 fprintf (dump_file, "\n");
5556 }
5557 }
5558
5559 /* Returns true if A is a cheaper cost pair than B. */
5560
5561 static bool
5562 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
5563 {
5564 int cmp;
5565
5566 if (!a)
5567 return false;
5568
5569 if (!b)
5570 return true;
5571
5572 cmp = compare_costs (a->cost, b->cost);
5573 if (cmp < 0)
5574 return true;
5575
5576 if (cmp > 0)
5577 return false;
5578
5579 /* In case the costs are the same, prefer the cheaper candidate. */
5580 if (a->cand->cost < b->cand->cost)
5581 return true;
5582
5583 return false;
5584 }
5585
5586
5587 /* Returns candidate by that USE is expressed in IVS. */
5588
5589 static struct cost_pair *
5590 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
5591 {
5592 return ivs->cand_for_use[use->id];
5593 }
5594
5595 /* Computes the cost field of IVS structure. */
5596
5597 static void
5598 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
5599 {
5600 comp_cost cost = ivs->cand_use_cost;
5601
5602 cost.cost += ivs->cand_cost;
5603
5604 cost.cost += ivopts_global_cost_for_size (data,
5605 ivs->n_regs + ivs->num_used_inv_expr);
5606
5607 ivs->cost = cost;
5608 }
5609
5610 /* Remove invariants in set INVS to set IVS. */
5611
5612 static void
5613 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
5614 {
5615 bitmap_iterator bi;
5616 unsigned iid;
5617
5618 if (!invs)
5619 return;
5620
5621 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5622 {
5623 ivs->n_invariant_uses[iid]--;
5624 if (ivs->n_invariant_uses[iid] == 0)
5625 ivs->n_regs--;
5626 }
5627 }
5628
5629 /* Set USE not to be expressed by any candidate in IVS. */
5630
5631 static void
5632 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
5633 struct iv_use *use)
5634 {
5635 unsigned uid = use->id, cid;
5636 struct cost_pair *cp;
5637
5638 cp = ivs->cand_for_use[uid];
5639 if (!cp)
5640 return;
5641 cid = cp->cand->id;
5642
5643 ivs->bad_uses++;
5644 ivs->cand_for_use[uid] = NULL;
5645 ivs->n_cand_uses[cid]--;
5646
5647 if (ivs->n_cand_uses[cid] == 0)
5648 {
5649 bitmap_clear_bit (ivs->cands, cid);
5650 /* Do not count the pseudocandidates. */
5651 if (cp->cand->iv)
5652 ivs->n_regs--;
5653 ivs->n_cands--;
5654 ivs->cand_cost -= cp->cand->cost;
5655
5656 iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
5657 }
5658
5659 ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
5660
5661 iv_ca_set_remove_invariants (ivs, cp->depends_on);
5662
5663 if (cp->inv_expr_id != -1)
5664 {
5665 ivs->used_inv_expr[cp->inv_expr_id]--;
5666 if (ivs->used_inv_expr[cp->inv_expr_id] == 0)
5667 ivs->num_used_inv_expr--;
5668 }
5669 iv_ca_recount_cost (data, ivs);
5670 }
5671
5672 /* Add invariants in set INVS to set IVS. */
5673
5674 static void
5675 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
5676 {
5677 bitmap_iterator bi;
5678 unsigned iid;
5679
5680 if (!invs)
5681 return;
5682
5683 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5684 {
5685 ivs->n_invariant_uses[iid]++;
5686 if (ivs->n_invariant_uses[iid] == 1)
5687 ivs->n_regs++;
5688 }
5689 }
5690
5691 /* Set cost pair for USE in set IVS to CP. */
5692
5693 static void
5694 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
5695 struct iv_use *use, struct cost_pair *cp)
5696 {
5697 unsigned uid = use->id, cid;
5698
5699 if (ivs->cand_for_use[uid] == cp)
5700 return;
5701
5702 if (ivs->cand_for_use[uid])
5703 iv_ca_set_no_cp (data, ivs, use);
5704
5705 if (cp)
5706 {
5707 cid = cp->cand->id;
5708
5709 ivs->bad_uses--;
5710 ivs->cand_for_use[uid] = cp;
5711 ivs->n_cand_uses[cid]++;
5712 if (ivs->n_cand_uses[cid] == 1)
5713 {
5714 bitmap_set_bit (ivs->cands, cid);
5715 /* Do not count the pseudocandidates. */
5716 if (cp->cand->iv)
5717 ivs->n_regs++;
5718 ivs->n_cands++;
5719 ivs->cand_cost += cp->cand->cost;
5720
5721 iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
5722 }
5723
5724 ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
5725 iv_ca_set_add_invariants (ivs, cp->depends_on);
5726
5727 if (cp->inv_expr_id != -1)
5728 {
5729 ivs->used_inv_expr[cp->inv_expr_id]++;
5730 if (ivs->used_inv_expr[cp->inv_expr_id] == 1)
5731 ivs->num_used_inv_expr++;
5732 }
5733 iv_ca_recount_cost (data, ivs);
5734 }
5735 }
5736
5737 /* Extend set IVS by expressing USE by some of the candidates in it
5738 if possible. Consider all important candidates if candidates in
5739 set IVS don't give any result. */
5740
5741 static void
5742 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
5743 struct iv_use *use)
5744 {
5745 struct cost_pair *best_cp = NULL, *cp;
5746 bitmap_iterator bi;
5747 unsigned i;
5748 struct iv_cand *cand;
5749
5750 gcc_assert (ivs->upto >= use->id);
5751 ivs->upto++;
5752 ivs->bad_uses++;
5753
5754 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5755 {
5756 cand = iv_cand (data, i);
5757 cp = get_use_iv_cost (data, use, cand);
5758 if (cheaper_cost_pair (cp, best_cp))
5759 best_cp = cp;
5760 }
5761
5762 if (best_cp == NULL)
5763 {
5764 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5765 {
5766 cand = iv_cand (data, i);
5767 cp = get_use_iv_cost (data, use, cand);
5768 if (cheaper_cost_pair (cp, best_cp))
5769 best_cp = cp;
5770 }
5771 }
5772
5773 iv_ca_set_cp (data, ivs, use, best_cp);
5774 }
5775
5776 /* Get cost for assignment IVS. */
5777
5778 static comp_cost
5779 iv_ca_cost (struct iv_ca *ivs)
5780 {
5781 /* This was a conditional expression but it triggered a bug in
5782 Sun C 5.5. */
5783 if (ivs->bad_uses)
5784 return infinite_cost;
5785 else
5786 return ivs->cost;
5787 }
5788
5789 /* Returns true if all dependences of CP are among invariants in IVS. */
5790
5791 static bool
5792 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
5793 {
5794 unsigned i;
5795 bitmap_iterator bi;
5796
5797 if (!cp->depends_on)
5798 return true;
5799
5800 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
5801 {
5802 if (ivs->n_invariant_uses[i] == 0)
5803 return false;
5804 }
5805
5806 return true;
5807 }
5808
5809 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
5810 it before NEXT_CHANGE. */
5811
5812 static struct iv_ca_delta *
5813 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
5814 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
5815 {
5816 struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
5817
5818 change->use = use;
5819 change->old_cp = old_cp;
5820 change->new_cp = new_cp;
5821 change->next_change = next_change;
5822
5823 return change;
5824 }
5825
5826 /* Joins two lists of changes L1 and L2. Destructive -- old lists
5827 are rewritten. */
5828
5829 static struct iv_ca_delta *
5830 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
5831 {
5832 struct iv_ca_delta *last;
5833
5834 if (!l2)
5835 return l1;
5836
5837 if (!l1)
5838 return l2;
5839
5840 for (last = l1; last->next_change; last = last->next_change)
5841 continue;
5842 last->next_change = l2;
5843
5844 return l1;
5845 }
5846
5847 /* Reverse the list of changes DELTA, forming the inverse to it. */
5848
5849 static struct iv_ca_delta *
5850 iv_ca_delta_reverse (struct iv_ca_delta *delta)
5851 {
5852 struct iv_ca_delta *act, *next, *prev = NULL;
5853
5854 for (act = delta; act; act = next)
5855 {
5856 next = act->next_change;
5857 act->next_change = prev;
5858 prev = act;
5859
5860 std::swap (act->old_cp, act->new_cp);
5861 }
5862
5863 return prev;
5864 }
5865
5866 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
5867 reverted instead. */
5868
5869 static void
5870 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
5871 struct iv_ca_delta *delta, bool forward)
5872 {
5873 struct cost_pair *from, *to;
5874 struct iv_ca_delta *act;
5875
5876 if (!forward)
5877 delta = iv_ca_delta_reverse (delta);
5878
5879 for (act = delta; act; act = act->next_change)
5880 {
5881 from = act->old_cp;
5882 to = act->new_cp;
5883 gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
5884 iv_ca_set_cp (data, ivs, act->use, to);
5885 }
5886
5887 if (!forward)
5888 iv_ca_delta_reverse (delta);
5889 }
5890
5891 /* Returns true if CAND is used in IVS. */
5892
5893 static bool
5894 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
5895 {
5896 return ivs->n_cand_uses[cand->id] > 0;
5897 }
5898
5899 /* Returns number of induction variable candidates in the set IVS. */
5900
5901 static unsigned
5902 iv_ca_n_cands (struct iv_ca *ivs)
5903 {
5904 return ivs->n_cands;
5905 }
5906
5907 /* Free the list of changes DELTA. */
5908
5909 static void
5910 iv_ca_delta_free (struct iv_ca_delta **delta)
5911 {
5912 struct iv_ca_delta *act, *next;
5913
5914 for (act = *delta; act; act = next)
5915 {
5916 next = act->next_change;
5917 free (act);
5918 }
5919
5920 *delta = NULL;
5921 }
5922
5923 /* Allocates new iv candidates assignment. */
5924
5925 static struct iv_ca *
5926 iv_ca_new (struct ivopts_data *data)
5927 {
5928 struct iv_ca *nw = XNEW (struct iv_ca);
5929
5930 nw->upto = 0;
5931 nw->bad_uses = 0;
5932 nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
5933 nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
5934 nw->cands = BITMAP_ALLOC (NULL);
5935 nw->n_cands = 0;
5936 nw->n_regs = 0;
5937 nw->cand_use_cost = no_cost;
5938 nw->cand_cost = 0;
5939 nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
5940 nw->cost = no_cost;
5941 nw->used_inv_expr = XCNEWVEC (unsigned, data->inv_expr_id + 1);
5942 nw->num_used_inv_expr = 0;
5943
5944 return nw;
5945 }
5946
5947 /* Free memory occupied by the set IVS. */
5948
5949 static void
5950 iv_ca_free (struct iv_ca **ivs)
5951 {
5952 free ((*ivs)->cand_for_use);
5953 free ((*ivs)->n_cand_uses);
5954 BITMAP_FREE ((*ivs)->cands);
5955 free ((*ivs)->n_invariant_uses);
5956 free ((*ivs)->used_inv_expr);
5957 free (*ivs);
5958 *ivs = NULL;
5959 }
5960
5961 /* Dumps IVS to FILE. */
5962
5963 static void
5964 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
5965 {
5966 const char *pref = " invariants ";
5967 unsigned i;
5968 comp_cost cost = iv_ca_cost (ivs);
5969
5970 fprintf (file, " cost: %d (complexity %d)\n", cost.cost, cost.complexity);
5971 fprintf (file, " cand_cost: %d\n cand_use_cost: %d (complexity %d)\n",
5972 ivs->cand_cost, ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
5973 bitmap_print (file, ivs->cands, " candidates: ","\n");
5974
5975 for (i = 0; i < ivs->upto; i++)
5976 {
5977 struct iv_use *use = iv_use (data, i);
5978 struct cost_pair *cp = iv_ca_cand_for_use (ivs, use);
5979 if (cp)
5980 fprintf (file, " use:%d --> iv_cand:%d, cost=(%d,%d)\n",
5981 use->id, cp->cand->id, cp->cost.cost, cp->cost.complexity);
5982 else
5983 fprintf (file, " use:%d --> ??\n", use->id);
5984 }
5985
5986 for (i = 1; i <= data->max_inv_id; i++)
5987 if (ivs->n_invariant_uses[i])
5988 {
5989 fprintf (file, "%s%d", pref, i);
5990 pref = ", ";
5991 }
5992 fprintf (file, "\n\n");
5993 }
5994
5995 /* Try changing candidate in IVS to CAND for each use. Return cost of the
5996 new set, and store differences in DELTA. Number of induction variables
5997 in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
5998 the function will try to find a solution with mimimal iv candidates. */
5999
6000 static comp_cost
6001 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
6002 struct iv_cand *cand, struct iv_ca_delta **delta,
6003 unsigned *n_ivs, bool min_ncand)
6004 {
6005 unsigned i;
6006 comp_cost cost;
6007 struct iv_use *use;
6008 struct cost_pair *old_cp, *new_cp;
6009
6010 *delta = NULL;
6011 for (i = 0; i < ivs->upto; i++)
6012 {
6013 use = iv_use (data, i);
6014 old_cp = iv_ca_cand_for_use (ivs, use);
6015
6016 if (old_cp
6017 && old_cp->cand == cand)
6018 continue;
6019
6020 new_cp = get_use_iv_cost (data, use, cand);
6021 if (!new_cp)
6022 continue;
6023
6024 if (!min_ncand && !iv_ca_has_deps (ivs, new_cp))
6025 continue;
6026
6027 if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp))
6028 continue;
6029
6030 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
6031 }
6032
6033 iv_ca_delta_commit (data, ivs, *delta, true);
6034 cost = iv_ca_cost (ivs);
6035 if (n_ivs)
6036 *n_ivs = iv_ca_n_cands (ivs);
6037 iv_ca_delta_commit (data, ivs, *delta, false);
6038
6039 return cost;
6040 }
6041
6042 /* Try narrowing set IVS by removing CAND. Return the cost of
6043 the new set and store the differences in DELTA. START is
6044 the candidate with which we start narrowing. */
6045
6046 static comp_cost
6047 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
6048 struct iv_cand *cand, struct iv_cand *start,
6049 struct iv_ca_delta **delta)
6050 {
6051 unsigned i, ci;
6052 struct iv_use *use;
6053 struct cost_pair *old_cp, *new_cp, *cp;
6054 bitmap_iterator bi;
6055 struct iv_cand *cnd;
6056 comp_cost cost, best_cost, acost;
6057
6058 *delta = NULL;
6059 for (i = 0; i < n_iv_uses (data); i++)
6060 {
6061 use = iv_use (data, i);
6062
6063 old_cp = iv_ca_cand_for_use (ivs, use);
6064 if (old_cp->cand != cand)
6065 continue;
6066
6067 best_cost = iv_ca_cost (ivs);
6068 /* Start narrowing with START. */
6069 new_cp = get_use_iv_cost (data, use, start);
6070
6071 if (data->consider_all_candidates)
6072 {
6073 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
6074 {
6075 if (ci == cand->id || (start && ci == start->id))
6076 continue;
6077
6078 cnd = iv_cand (data, ci);
6079
6080 cp = get_use_iv_cost (data, use, cnd);
6081 if (!cp)
6082 continue;
6083
6084 iv_ca_set_cp (data, ivs, use, cp);
6085 acost = iv_ca_cost (ivs);
6086
6087 if (compare_costs (acost, best_cost) < 0)
6088 {
6089 best_cost = acost;
6090 new_cp = cp;
6091 }
6092 }
6093 }
6094 else
6095 {
6096 EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
6097 {
6098 if (ci == cand->id || (start && ci == start->id))
6099 continue;
6100
6101 cnd = iv_cand (data, ci);
6102
6103 cp = get_use_iv_cost (data, use, cnd);
6104 if (!cp)
6105 continue;
6106
6107 iv_ca_set_cp (data, ivs, use, cp);
6108 acost = iv_ca_cost (ivs);
6109
6110 if (compare_costs (acost, best_cost) < 0)
6111 {
6112 best_cost = acost;
6113 new_cp = cp;
6114 }
6115 }
6116 }
6117 /* Restore to old cp for use. */
6118 iv_ca_set_cp (data, ivs, use, old_cp);
6119
6120 if (!new_cp)
6121 {
6122 iv_ca_delta_free (delta);
6123 return infinite_cost;
6124 }
6125
6126 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
6127 }
6128
6129 iv_ca_delta_commit (data, ivs, *delta, true);
6130 cost = iv_ca_cost (ivs);
6131 iv_ca_delta_commit (data, ivs, *delta, false);
6132
6133 return cost;
6134 }
6135
6136 /* Try optimizing the set of candidates IVS by removing candidates different
6137 from to EXCEPT_CAND from it. Return cost of the new set, and store
6138 differences in DELTA. */
6139
6140 static comp_cost
6141 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
6142 struct iv_cand *except_cand, struct iv_ca_delta **delta)
6143 {
6144 bitmap_iterator bi;
6145 struct iv_ca_delta *act_delta, *best_delta;
6146 unsigned i;
6147 comp_cost best_cost, acost;
6148 struct iv_cand *cand;
6149
6150 best_delta = NULL;
6151 best_cost = iv_ca_cost (ivs);
6152
6153 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6154 {
6155 cand = iv_cand (data, i);
6156
6157 if (cand == except_cand)
6158 continue;
6159
6160 acost = iv_ca_narrow (data, ivs, cand, except_cand, &act_delta);
6161
6162 if (compare_costs (acost, best_cost) < 0)
6163 {
6164 best_cost = acost;
6165 iv_ca_delta_free (&best_delta);
6166 best_delta = act_delta;
6167 }
6168 else
6169 iv_ca_delta_free (&act_delta);
6170 }
6171
6172 if (!best_delta)
6173 {
6174 *delta = NULL;
6175 return best_cost;
6176 }
6177
6178 /* Recurse to possibly remove other unnecessary ivs. */
6179 iv_ca_delta_commit (data, ivs, best_delta, true);
6180 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
6181 iv_ca_delta_commit (data, ivs, best_delta, false);
6182 *delta = iv_ca_delta_join (best_delta, *delta);
6183 return best_cost;
6184 }
6185
6186 /* Check if CAND_IDX is a candidate other than OLD_CAND and has
6187 cheaper local cost for USE than BEST_CP. Return pointer to
6188 the corresponding cost_pair, otherwise just return BEST_CP. */
6189
6190 static struct cost_pair*
6191 cheaper_cost_with_cand (struct ivopts_data *data, struct iv_use *use,
6192 unsigned int cand_idx, struct iv_cand *old_cand,
6193 struct cost_pair *best_cp)
6194 {
6195 struct iv_cand *cand;
6196 struct cost_pair *cp;
6197
6198 gcc_assert (old_cand != NULL && best_cp != NULL);
6199 if (cand_idx == old_cand->id)
6200 return best_cp;
6201
6202 cand = iv_cand (data, cand_idx);
6203 cp = get_use_iv_cost (data, use, cand);
6204 if (cp != NULL && cheaper_cost_pair (cp, best_cp))
6205 return cp;
6206
6207 return best_cp;
6208 }
6209
6210 /* Try breaking local optimal fixed-point for IVS by replacing candidates
6211 which are used by more than one iv uses. For each of those candidates,
6212 this function tries to represent iv uses under that candidate using
6213 other ones with lower local cost, then tries to prune the new set.
6214 If the new set has lower cost, It returns the new cost after recording
6215 candidate replacement in list DELTA. */
6216
6217 static comp_cost
6218 iv_ca_replace (struct ivopts_data *data, struct iv_ca *ivs,
6219 struct iv_ca_delta **delta)
6220 {
6221 bitmap_iterator bi, bj;
6222 unsigned int i, j, k;
6223 struct iv_use *use;
6224 struct iv_cand *cand;
6225 comp_cost orig_cost, acost;
6226 struct iv_ca_delta *act_delta, *tmp_delta;
6227 struct cost_pair *old_cp, *best_cp = NULL;
6228
6229 *delta = NULL;
6230 orig_cost = iv_ca_cost (ivs);
6231
6232 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6233 {
6234 if (ivs->n_cand_uses[i] == 1
6235 || ivs->n_cand_uses[i] > ALWAYS_PRUNE_CAND_SET_BOUND)
6236 continue;
6237
6238 cand = iv_cand (data, i);
6239
6240 act_delta = NULL;
6241 /* Represent uses under current candidate using other ones with
6242 lower local cost. */
6243 for (j = 0; j < ivs->upto; j++)
6244 {
6245 use = iv_use (data, j);
6246 old_cp = iv_ca_cand_for_use (ivs, use);
6247
6248 if (old_cp->cand != cand)
6249 continue;
6250
6251 best_cp = old_cp;
6252 if (data->consider_all_candidates)
6253 for (k = 0; k < n_iv_cands (data); k++)
6254 best_cp = cheaper_cost_with_cand (data, use, k,
6255 old_cp->cand, best_cp);
6256 else
6257 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, k, bj)
6258 best_cp = cheaper_cost_with_cand (data, use, k,
6259 old_cp->cand, best_cp);
6260
6261 if (best_cp == old_cp)
6262 continue;
6263
6264 act_delta = iv_ca_delta_add (use, old_cp, best_cp, act_delta);
6265 }
6266 /* No need for further prune. */
6267 if (!act_delta)
6268 continue;
6269
6270 /* Prune the new candidate set. */
6271 iv_ca_delta_commit (data, ivs, act_delta, true);
6272 acost = iv_ca_prune (data, ivs, NULL, &tmp_delta);
6273 iv_ca_delta_commit (data, ivs, act_delta, false);
6274 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6275
6276 if (compare_costs (acost, orig_cost) < 0)
6277 {
6278 *delta = act_delta;
6279 return acost;
6280 }
6281 else
6282 iv_ca_delta_free (&act_delta);
6283 }
6284
6285 return orig_cost;
6286 }
6287
6288 /* Tries to extend the sets IVS in the best possible way in order
6289 to express the USE. If ORIGINALP is true, prefer candidates from
6290 the original set of IVs, otherwise favor important candidates not
6291 based on any memory object. */
6292
6293 static bool
6294 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
6295 struct iv_use *use, bool originalp)
6296 {
6297 comp_cost best_cost, act_cost;
6298 unsigned i;
6299 bitmap_iterator bi;
6300 struct iv_cand *cand;
6301 struct iv_ca_delta *best_delta = NULL, *act_delta;
6302 struct cost_pair *cp;
6303
6304 iv_ca_add_use (data, ivs, use);
6305 best_cost = iv_ca_cost (ivs);
6306 cp = iv_ca_cand_for_use (ivs, use);
6307 if (cp)
6308 {
6309 best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
6310 iv_ca_set_no_cp (data, ivs, use);
6311 }
6312
6313 /* If ORIGINALP is true, try to find the original IV for the use. Otherwise
6314 first try important candidates not based on any memory object. Only if
6315 this fails, try the specific ones. Rationale -- in loops with many
6316 variables the best choice often is to use just one generic biv. If we
6317 added here many ivs specific to the uses, the optimization algorithm later
6318 would be likely to get stuck in a local minimum, thus causing us to create
6319 too many ivs. The approach from few ivs to more seems more likely to be
6320 successful -- starting from few ivs, replacing an expensive use by a
6321 specific iv should always be a win. */
6322 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
6323 {
6324 cand = iv_cand (data, i);
6325
6326 if (originalp && cand->pos !=IP_ORIGINAL)
6327 continue;
6328
6329 if (!originalp && cand->iv->base_object != NULL_TREE)
6330 continue;
6331
6332 if (iv_ca_cand_used_p (ivs, cand))
6333 continue;
6334
6335 cp = get_use_iv_cost (data, use, cand);
6336 if (!cp)
6337 continue;
6338
6339 iv_ca_set_cp (data, ivs, use, cp);
6340 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
6341 true);
6342 iv_ca_set_no_cp (data, ivs, use);
6343 act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
6344
6345 if (compare_costs (act_cost, best_cost) < 0)
6346 {
6347 best_cost = act_cost;
6348
6349 iv_ca_delta_free (&best_delta);
6350 best_delta = act_delta;
6351 }
6352 else
6353 iv_ca_delta_free (&act_delta);
6354 }
6355
6356 if (infinite_cost_p (best_cost))
6357 {
6358 for (i = 0; i < use->n_map_members; i++)
6359 {
6360 cp = use->cost_map + i;
6361 cand = cp->cand;
6362 if (!cand)
6363 continue;
6364
6365 /* Already tried this. */
6366 if (cand->important)
6367 {
6368 if (originalp && cand->pos == IP_ORIGINAL)
6369 continue;
6370 if (!originalp && cand->iv->base_object == NULL_TREE)
6371 continue;
6372 }
6373
6374 if (iv_ca_cand_used_p (ivs, cand))
6375 continue;
6376
6377 act_delta = NULL;
6378 iv_ca_set_cp (data, ivs, use, cp);
6379 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
6380 iv_ca_set_no_cp (data, ivs, use);
6381 act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
6382 cp, act_delta);
6383
6384 if (compare_costs (act_cost, best_cost) < 0)
6385 {
6386 best_cost = act_cost;
6387
6388 if (best_delta)
6389 iv_ca_delta_free (&best_delta);
6390 best_delta = act_delta;
6391 }
6392 else
6393 iv_ca_delta_free (&act_delta);
6394 }
6395 }
6396
6397 iv_ca_delta_commit (data, ivs, best_delta, true);
6398 iv_ca_delta_free (&best_delta);
6399
6400 return !infinite_cost_p (best_cost);
6401 }
6402
6403 /* Finds an initial assignment of candidates to uses. */
6404
6405 static struct iv_ca *
6406 get_initial_solution (struct ivopts_data *data, bool originalp)
6407 {
6408 struct iv_ca *ivs = iv_ca_new (data);
6409 unsigned i;
6410
6411 for (i = 0; i < n_iv_uses (data); i++)
6412 if (!try_add_cand_for (data, ivs, iv_use (data, i), originalp))
6413 {
6414 iv_ca_free (&ivs);
6415 return NULL;
6416 }
6417
6418 return ivs;
6419 }
6420
6421 /* Tries to improve set of induction variables IVS. TRY_REPLACE_P
6422 points to a bool variable, this function tries to break local
6423 optimal fixed-point by replacing candidates in IVS if it's true. */
6424
6425 static bool
6426 try_improve_iv_set (struct ivopts_data *data,
6427 struct iv_ca *ivs, bool *try_replace_p)
6428 {
6429 unsigned i, n_ivs;
6430 comp_cost acost, best_cost = iv_ca_cost (ivs);
6431 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
6432 struct iv_cand *cand;
6433
6434 /* Try extending the set of induction variables by one. */
6435 for (i = 0; i < n_iv_cands (data); i++)
6436 {
6437 cand = iv_cand (data, i);
6438
6439 if (iv_ca_cand_used_p (ivs, cand))
6440 continue;
6441
6442 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
6443 if (!act_delta)
6444 continue;
6445
6446 /* If we successfully added the candidate and the set is small enough,
6447 try optimizing it by removing other candidates. */
6448 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
6449 {
6450 iv_ca_delta_commit (data, ivs, act_delta, true);
6451 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
6452 iv_ca_delta_commit (data, ivs, act_delta, false);
6453 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6454 }
6455
6456 if (compare_costs (acost, best_cost) < 0)
6457 {
6458 best_cost = acost;
6459 iv_ca_delta_free (&best_delta);
6460 best_delta = act_delta;
6461 }
6462 else
6463 iv_ca_delta_free (&act_delta);
6464 }
6465
6466 if (!best_delta)
6467 {
6468 /* Try removing the candidates from the set instead. */
6469 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
6470
6471 if (!best_delta && *try_replace_p)
6472 {
6473 *try_replace_p = false;
6474 /* So far candidate selecting algorithm tends to choose fewer IVs
6475 so that it can handle cases in which loops have many variables
6476 but the best choice is often to use only one general biv. One
6477 weakness is it can't handle opposite cases, in which different
6478 candidates should be chosen with respect to each use. To solve
6479 the problem, we replace candidates in a manner described by the
6480 comments of iv_ca_replace, thus give general algorithm a chance
6481 to break local optimal fixed-point in these cases. */
6482 best_cost = iv_ca_replace (data, ivs, &best_delta);
6483 }
6484
6485 if (!best_delta)
6486 return false;
6487 }
6488
6489 iv_ca_delta_commit (data, ivs, best_delta, true);
6490 gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
6491 iv_ca_delta_free (&best_delta);
6492 return true;
6493 }
6494
6495 /* Attempts to find the optimal set of induction variables. We do simple
6496 greedy heuristic -- we try to replace at most one candidate in the selected
6497 solution and remove the unused ivs while this improves the cost. */
6498
6499 static struct iv_ca *
6500 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
6501 {
6502 struct iv_ca *set;
6503 bool try_replace_p = true;
6504
6505 /* Get the initial solution. */
6506 set = get_initial_solution (data, originalp);
6507 if (!set)
6508 {
6509 if (dump_file && (dump_flags & TDF_DETAILS))
6510 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
6511 return NULL;
6512 }
6513
6514 if (dump_file && (dump_flags & TDF_DETAILS))
6515 {
6516 fprintf (dump_file, "Initial set of candidates:\n");
6517 iv_ca_dump (data, dump_file, set);
6518 }
6519
6520 while (try_improve_iv_set (data, set, &try_replace_p))
6521 {
6522 if (dump_file && (dump_flags & TDF_DETAILS))
6523 {
6524 fprintf (dump_file, "Improved to:\n");
6525 iv_ca_dump (data, dump_file, set);
6526 }
6527 }
6528
6529 return set;
6530 }
6531
6532 static struct iv_ca *
6533 find_optimal_iv_set (struct ivopts_data *data)
6534 {
6535 unsigned i;
6536 struct iv_ca *set, *origset;
6537 struct iv_use *use;
6538 comp_cost cost, origcost;
6539
6540 /* Determine the cost based on a strategy that starts with original IVs,
6541 and try again using a strategy that prefers candidates not based
6542 on any IVs. */
6543 origset = find_optimal_iv_set_1 (data, true);
6544 set = find_optimal_iv_set_1 (data, false);
6545
6546 if (!origset && !set)
6547 return NULL;
6548
6549 origcost = origset ? iv_ca_cost (origset) : infinite_cost;
6550 cost = set ? iv_ca_cost (set) : infinite_cost;
6551
6552 if (dump_file && (dump_flags & TDF_DETAILS))
6553 {
6554 fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
6555 origcost.cost, origcost.complexity);
6556 fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
6557 cost.cost, cost.complexity);
6558 }
6559
6560 /* Choose the one with the best cost. */
6561 if (compare_costs (origcost, cost) <= 0)
6562 {
6563 if (set)
6564 iv_ca_free (&set);
6565 set = origset;
6566 }
6567 else if (origset)
6568 iv_ca_free (&origset);
6569
6570 for (i = 0; i < n_iv_uses (data); i++)
6571 {
6572 use = iv_use (data, i);
6573 use->selected = iv_ca_cand_for_use (set, use)->cand;
6574 }
6575
6576 return set;
6577 }
6578
6579 /* Creates a new induction variable corresponding to CAND. */
6580
6581 static void
6582 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
6583 {
6584 gimple_stmt_iterator incr_pos;
6585 tree base;
6586 bool after = false;
6587
6588 if (!cand->iv)
6589 return;
6590
6591 switch (cand->pos)
6592 {
6593 case IP_NORMAL:
6594 incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
6595 break;
6596
6597 case IP_END:
6598 incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
6599 after = true;
6600 break;
6601
6602 case IP_AFTER_USE:
6603 after = true;
6604 /* fall through */
6605 case IP_BEFORE_USE:
6606 incr_pos = gsi_for_stmt (cand->incremented_at);
6607 break;
6608
6609 case IP_ORIGINAL:
6610 /* Mark that the iv is preserved. */
6611 name_info (data, cand->var_before)->preserve_biv = true;
6612 name_info (data, cand->var_after)->preserve_biv = true;
6613
6614 /* Rewrite the increment so that it uses var_before directly. */
6615 find_interesting_uses_op (data, cand->var_after)->selected = cand;
6616 return;
6617 }
6618
6619 gimple_add_tmp_var (cand->var_before);
6620
6621 base = unshare_expr (cand->iv->base);
6622
6623 create_iv (base, unshare_expr (cand->iv->step),
6624 cand->var_before, data->current_loop,
6625 &incr_pos, after, &cand->var_before, &cand->var_after);
6626 }
6627
6628 /* Creates new induction variables described in SET. */
6629
6630 static void
6631 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
6632 {
6633 unsigned i;
6634 struct iv_cand *cand;
6635 bitmap_iterator bi;
6636
6637 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6638 {
6639 cand = iv_cand (data, i);
6640 create_new_iv (data, cand);
6641 }
6642
6643 if (dump_file && (dump_flags & TDF_DETAILS))
6644 {
6645 fprintf (dump_file, "Selected IV set for loop %d",
6646 data->current_loop->num);
6647 if (data->loop_loc != UNKNOWN_LOCATION)
6648 fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
6649 LOCATION_LINE (data->loop_loc));
6650 fprintf (dump_file, ", %lu IVs:\n", bitmap_count_bits (set->cands));
6651 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6652 {
6653 cand = iv_cand (data, i);
6654 dump_cand (dump_file, cand);
6655 }
6656 fprintf (dump_file, "\n");
6657 }
6658 }
6659
6660 /* Rewrites USE (definition of iv used in a nonlinear expression)
6661 using candidate CAND. */
6662
6663 static void
6664 rewrite_use_nonlinear_expr (struct ivopts_data *data,
6665 struct iv_use *use, struct iv_cand *cand)
6666 {
6667 tree comp;
6668 tree op, tgt;
6669 gassign *ass;
6670 gimple_stmt_iterator bsi;
6671
6672 /* An important special case -- if we are asked to express value of
6673 the original iv by itself, just exit; there is no need to
6674 introduce a new computation (that might also need casting the
6675 variable to unsigned and back). */
6676 if (cand->pos == IP_ORIGINAL
6677 && cand->incremented_at == use->stmt)
6678 {
6679 enum tree_code stmt_code;
6680
6681 gcc_assert (is_gimple_assign (use->stmt));
6682 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
6683
6684 /* Check whether we may leave the computation unchanged.
6685 This is the case only if it does not rely on other
6686 computations in the loop -- otherwise, the computation
6687 we rely upon may be removed in remove_unused_ivs,
6688 thus leading to ICE. */
6689 stmt_code = gimple_assign_rhs_code (use->stmt);
6690 if (stmt_code == PLUS_EXPR
6691 || stmt_code == MINUS_EXPR
6692 || stmt_code == POINTER_PLUS_EXPR)
6693 {
6694 if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
6695 op = gimple_assign_rhs2 (use->stmt);
6696 else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
6697 op = gimple_assign_rhs1 (use->stmt);
6698 else
6699 op = NULL_TREE;
6700 }
6701 else
6702 op = NULL_TREE;
6703
6704 if (op && expr_invariant_in_loop_p (data->current_loop, op))
6705 return;
6706 }
6707
6708 comp = get_computation (data->current_loop, use, cand);
6709 gcc_assert (comp != NULL_TREE);
6710
6711 switch (gimple_code (use->stmt))
6712 {
6713 case GIMPLE_PHI:
6714 tgt = PHI_RESULT (use->stmt);
6715
6716 /* If we should keep the biv, do not replace it. */
6717 if (name_info (data, tgt)->preserve_biv)
6718 return;
6719
6720 bsi = gsi_after_labels (gimple_bb (use->stmt));
6721 break;
6722
6723 case GIMPLE_ASSIGN:
6724 tgt = gimple_assign_lhs (use->stmt);
6725 bsi = gsi_for_stmt (use->stmt);
6726 break;
6727
6728 default:
6729 gcc_unreachable ();
6730 }
6731
6732 if (!valid_gimple_rhs_p (comp)
6733 || (gimple_code (use->stmt) != GIMPLE_PHI
6734 /* We can't allow re-allocating the stmt as it might be pointed
6735 to still. */
6736 && (get_gimple_rhs_num_ops (TREE_CODE (comp))
6737 >= gimple_num_ops (gsi_stmt (bsi)))))
6738 {
6739 comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE,
6740 true, GSI_SAME_STMT);
6741 if (POINTER_TYPE_P (TREE_TYPE (tgt)))
6742 {
6743 duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
6744 /* As this isn't a plain copy we have to reset alignment
6745 information. */
6746 if (SSA_NAME_PTR_INFO (comp))
6747 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
6748 }
6749 }
6750
6751 if (gimple_code (use->stmt) == GIMPLE_PHI)
6752 {
6753 ass = gimple_build_assign (tgt, comp);
6754 gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
6755
6756 bsi = gsi_for_stmt (use->stmt);
6757 remove_phi_node (&bsi, false);
6758 }
6759 else
6760 {
6761 gimple_assign_set_rhs_from_tree (&bsi, comp);
6762 use->stmt = gsi_stmt (bsi);
6763 }
6764 }
6765
6766 /* Performs a peephole optimization to reorder the iv update statement with
6767 a mem ref to enable instruction combining in later phases. The mem ref uses
6768 the iv value before the update, so the reordering transformation requires
6769 adjustment of the offset. CAND is the selected IV_CAND.
6770
6771 Example:
6772
6773 t = MEM_REF (base, iv1, 8, 16); // base, index, stride, offset
6774 iv2 = iv1 + 1;
6775
6776 if (t < val) (1)
6777 goto L;
6778 goto Head;
6779
6780
6781 directly propagating t over to (1) will introduce overlapping live range
6782 thus increase register pressure. This peephole transform it into:
6783
6784
6785 iv2 = iv1 + 1;
6786 t = MEM_REF (base, iv2, 8, 8);
6787 if (t < val)
6788 goto L;
6789 goto Head;
6790 */
6791
6792 static void
6793 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
6794 {
6795 tree var_after;
6796 gimple iv_update, stmt;
6797 basic_block bb;
6798 gimple_stmt_iterator gsi, gsi_iv;
6799
6800 if (cand->pos != IP_NORMAL)
6801 return;
6802
6803 var_after = cand->var_after;
6804 iv_update = SSA_NAME_DEF_STMT (var_after);
6805
6806 bb = gimple_bb (iv_update);
6807 gsi = gsi_last_nondebug_bb (bb);
6808 stmt = gsi_stmt (gsi);
6809
6810 /* Only handle conditional statement for now. */
6811 if (gimple_code (stmt) != GIMPLE_COND)
6812 return;
6813
6814 gsi_prev_nondebug (&gsi);
6815 stmt = gsi_stmt (gsi);
6816 if (stmt != iv_update)
6817 return;
6818
6819 gsi_prev_nondebug (&gsi);
6820 if (gsi_end_p (gsi))
6821 return;
6822
6823 stmt = gsi_stmt (gsi);
6824 if (gimple_code (stmt) != GIMPLE_ASSIGN)
6825 return;
6826
6827 if (stmt != use->stmt)
6828 return;
6829
6830 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
6831 return;
6832
6833 if (dump_file && (dump_flags & TDF_DETAILS))
6834 {
6835 fprintf (dump_file, "Reordering \n");
6836 print_gimple_stmt (dump_file, iv_update, 0, 0);
6837 print_gimple_stmt (dump_file, use->stmt, 0, 0);
6838 fprintf (dump_file, "\n");
6839 }
6840
6841 gsi = gsi_for_stmt (use->stmt);
6842 gsi_iv = gsi_for_stmt (iv_update);
6843 gsi_move_before (&gsi_iv, &gsi);
6844
6845 cand->pos = IP_BEFORE_USE;
6846 cand->incremented_at = use->stmt;
6847 }
6848
6849 /* Rewrites USE (address that is an iv) using candidate CAND. */
6850
6851 static void
6852 rewrite_use_address_1 (struct ivopts_data *data,
6853 struct iv_use *use, struct iv_cand *cand)
6854 {
6855 aff_tree aff;
6856 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6857 tree base_hint = NULL_TREE;
6858 tree ref, iv;
6859 bool ok;
6860
6861 adjust_iv_update_pos (cand, use);
6862 ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
6863 gcc_assert (ok);
6864 unshare_aff_combination (&aff);
6865
6866 /* To avoid undefined overflow problems, all IV candidates use unsigned
6867 integer types. The drawback is that this makes it impossible for
6868 create_mem_ref to distinguish an IV that is based on a memory object
6869 from one that represents simply an offset.
6870
6871 To work around this problem, we pass a hint to create_mem_ref that
6872 indicates which variable (if any) in aff is an IV based on a memory
6873 object. Note that we only consider the candidate. If this is not
6874 based on an object, the base of the reference is in some subexpression
6875 of the use -- but these will use pointer types, so they are recognized
6876 by the create_mem_ref heuristics anyway. */
6877 if (cand->iv->base_object)
6878 base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
6879
6880 iv = var_at_stmt (data->current_loop, cand, use->stmt);
6881 ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff,
6882 reference_alias_ptr_type (*use->op_p),
6883 iv, base_hint, data->speed);
6884 copy_ref_info (ref, *use->op_p);
6885 *use->op_p = ref;
6886 }
6887
6888 /* Rewrites USE (address that is an iv) using candidate CAND. If it's the
6889 first use of a group, rewrites sub uses in the group too. */
6890
6891 static void
6892 rewrite_use_address (struct ivopts_data *data,
6893 struct iv_use *use, struct iv_cand *cand)
6894 {
6895 struct iv_use *next;
6896
6897 gcc_assert (use->sub_id == 0);
6898 rewrite_use_address_1 (data, use, cand);
6899 update_stmt (use->stmt);
6900
6901 for (next = use->next; next != NULL; next = next->next)
6902 {
6903 rewrite_use_address_1 (data, next, cand);
6904 update_stmt (next->stmt);
6905 }
6906
6907 return;
6908 }
6909
6910 /* Rewrites USE (the condition such that one of the arguments is an iv) using
6911 candidate CAND. */
6912
6913 static void
6914 rewrite_use_compare (struct ivopts_data *data,
6915 struct iv_use *use, struct iv_cand *cand)
6916 {
6917 tree comp, *var_p, op, bound;
6918 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6919 enum tree_code compare;
6920 struct cost_pair *cp = get_use_iv_cost (data, use, cand);
6921 bool ok;
6922
6923 bound = cp->value;
6924 if (bound)
6925 {
6926 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
6927 tree var_type = TREE_TYPE (var);
6928 gimple_seq stmts;
6929
6930 if (dump_file && (dump_flags & TDF_DETAILS))
6931 {
6932 fprintf (dump_file, "Replacing exit test: ");
6933 print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
6934 }
6935 compare = cp->comp;
6936 bound = unshare_expr (fold_convert (var_type, bound));
6937 op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
6938 if (stmts)
6939 gsi_insert_seq_on_edge_immediate (
6940 loop_preheader_edge (data->current_loop),
6941 stmts);
6942
6943 gcond *cond_stmt = as_a <gcond *> (use->stmt);
6944 gimple_cond_set_lhs (cond_stmt, var);
6945 gimple_cond_set_code (cond_stmt, compare);
6946 gimple_cond_set_rhs (cond_stmt, op);
6947 return;
6948 }
6949
6950 /* The induction variable elimination failed; just express the original
6951 giv. */
6952 comp = get_computation (data->current_loop, use, cand);
6953 gcc_assert (comp != NULL_TREE);
6954
6955 ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
6956 gcc_assert (ok);
6957
6958 *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
6959 true, GSI_SAME_STMT);
6960 }
6961
6962 /* Rewrites USE using candidate CAND. */
6963
6964 static void
6965 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
6966 {
6967 switch (use->type)
6968 {
6969 case USE_NONLINEAR_EXPR:
6970 rewrite_use_nonlinear_expr (data, use, cand);
6971 break;
6972
6973 case USE_ADDRESS:
6974 rewrite_use_address (data, use, cand);
6975 break;
6976
6977 case USE_COMPARE:
6978 rewrite_use_compare (data, use, cand);
6979 break;
6980
6981 default:
6982 gcc_unreachable ();
6983 }
6984
6985 update_stmt (use->stmt);
6986 }
6987
6988 /* Rewrite the uses using the selected induction variables. */
6989
6990 static void
6991 rewrite_uses (struct ivopts_data *data)
6992 {
6993 unsigned i;
6994 struct iv_cand *cand;
6995 struct iv_use *use;
6996
6997 for (i = 0; i < n_iv_uses (data); i++)
6998 {
6999 use = iv_use (data, i);
7000 cand = use->selected;
7001 gcc_assert (cand);
7002
7003 rewrite_use (data, use, cand);
7004 }
7005 }
7006
7007 /* Removes the ivs that are not used after rewriting. */
7008
7009 static void
7010 remove_unused_ivs (struct ivopts_data *data)
7011 {
7012 unsigned j;
7013 bitmap_iterator bi;
7014 bitmap toremove = BITMAP_ALLOC (NULL);
7015
7016 /* Figure out an order in which to release SSA DEFs so that we don't
7017 release something that we'd have to propagate into a debug stmt
7018 afterwards. */
7019 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
7020 {
7021 struct version_info *info;
7022
7023 info = ver_info (data, j);
7024 if (info->iv
7025 && !integer_zerop (info->iv->step)
7026 && !info->inv_id
7027 && !info->iv->have_use_for
7028 && !info->preserve_biv)
7029 {
7030 bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
7031
7032 tree def = info->iv->ssa_name;
7033
7034 if (MAY_HAVE_DEBUG_STMTS && SSA_NAME_DEF_STMT (def))
7035 {
7036 imm_use_iterator imm_iter;
7037 use_operand_p use_p;
7038 gimple stmt;
7039 int count = 0;
7040
7041 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7042 {
7043 if (!gimple_debug_bind_p (stmt))
7044 continue;
7045
7046 /* We just want to determine whether to do nothing
7047 (count == 0), to substitute the computed
7048 expression into a single use of the SSA DEF by
7049 itself (count == 1), or to use a debug temp
7050 because the SSA DEF is used multiple times or as
7051 part of a larger expression (count > 1). */
7052 count++;
7053 if (gimple_debug_bind_get_value (stmt) != def)
7054 count++;
7055
7056 if (count > 1)
7057 BREAK_FROM_IMM_USE_STMT (imm_iter);
7058 }
7059
7060 if (!count)
7061 continue;
7062
7063 struct iv_use dummy_use;
7064 struct iv_cand *best_cand = NULL, *cand;
7065 unsigned i, best_pref = 0, cand_pref;
7066
7067 memset (&dummy_use, 0, sizeof (dummy_use));
7068 dummy_use.iv = info->iv;
7069 for (i = 0; i < n_iv_uses (data) && i < 64; i++)
7070 {
7071 cand = iv_use (data, i)->selected;
7072 if (cand == best_cand)
7073 continue;
7074 cand_pref = operand_equal_p (cand->iv->step,
7075 info->iv->step, 0)
7076 ? 4 : 0;
7077 cand_pref
7078 += TYPE_MODE (TREE_TYPE (cand->iv->base))
7079 == TYPE_MODE (TREE_TYPE (info->iv->base))
7080 ? 2 : 0;
7081 cand_pref
7082 += TREE_CODE (cand->iv->base) == INTEGER_CST
7083 ? 1 : 0;
7084 if (best_cand == NULL || best_pref < cand_pref)
7085 {
7086 best_cand = cand;
7087 best_pref = cand_pref;
7088 }
7089 }
7090
7091 if (!best_cand)
7092 continue;
7093
7094 tree comp = get_computation_at (data->current_loop,
7095 &dummy_use, best_cand,
7096 SSA_NAME_DEF_STMT (def));
7097 if (!comp)
7098 continue;
7099
7100 if (count > 1)
7101 {
7102 tree vexpr = make_node (DEBUG_EXPR_DECL);
7103 DECL_ARTIFICIAL (vexpr) = 1;
7104 TREE_TYPE (vexpr) = TREE_TYPE (comp);
7105 if (SSA_NAME_VAR (def))
7106 DECL_MODE (vexpr) = DECL_MODE (SSA_NAME_VAR (def));
7107 else
7108 DECL_MODE (vexpr) = TYPE_MODE (TREE_TYPE (vexpr));
7109 gdebug *def_temp
7110 = gimple_build_debug_bind (vexpr, comp, NULL);
7111 gimple_stmt_iterator gsi;
7112
7113 if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
7114 gsi = gsi_after_labels (gimple_bb
7115 (SSA_NAME_DEF_STMT (def)));
7116 else
7117 gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
7118
7119 gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
7120 comp = vexpr;
7121 }
7122
7123 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7124 {
7125 if (!gimple_debug_bind_p (stmt))
7126 continue;
7127
7128 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
7129 SET_USE (use_p, comp);
7130
7131 update_stmt (stmt);
7132 }
7133 }
7134 }
7135 }
7136
7137 release_defs_bitset (toremove);
7138
7139 BITMAP_FREE (toremove);
7140 }
7141
7142 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
7143 for hash_map::traverse. */
7144
7145 bool
7146 free_tree_niter_desc (edge const &, tree_niter_desc *const &value, void *)
7147 {
7148 free (value);
7149 return true;
7150 }
7151
7152 /* Frees data allocated by the optimization of a single loop. */
7153
7154 static void
7155 free_loop_data (struct ivopts_data *data)
7156 {
7157 unsigned i, j;
7158 bitmap_iterator bi;
7159 tree obj;
7160
7161 if (data->niters)
7162 {
7163 data->niters->traverse<void *, free_tree_niter_desc> (NULL);
7164 delete data->niters;
7165 data->niters = NULL;
7166 }
7167
7168 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
7169 {
7170 struct version_info *info;
7171
7172 info = ver_info (data, i);
7173 info->iv = NULL;
7174 info->has_nonlin_use = false;
7175 info->preserve_biv = false;
7176 info->inv_id = 0;
7177 }
7178 bitmap_clear (data->relevant);
7179 bitmap_clear (data->important_candidates);
7180
7181 for (i = 0; i < n_iv_uses (data); i++)
7182 {
7183 struct iv_use *use = iv_use (data, i);
7184 struct iv_use *pre = use, *sub = use->next;
7185
7186 while (sub)
7187 {
7188 gcc_assert (sub->related_cands == NULL);
7189 gcc_assert (sub->n_map_members == 0 && sub->cost_map == NULL);
7190
7191 pre = sub;
7192 sub = sub->next;
7193 free (pre);
7194 }
7195
7196 BITMAP_FREE (use->related_cands);
7197 for (j = 0; j < use->n_map_members; j++)
7198 if (use->cost_map[j].depends_on)
7199 BITMAP_FREE (use->cost_map[j].depends_on);
7200 free (use->cost_map);
7201 free (use);
7202 }
7203 data->iv_uses.truncate (0);
7204
7205 for (i = 0; i < n_iv_cands (data); i++)
7206 {
7207 struct iv_cand *cand = iv_cand (data, i);
7208
7209 if (cand->depends_on)
7210 BITMAP_FREE (cand->depends_on);
7211 free (cand);
7212 }
7213 data->iv_candidates.truncate (0);
7214
7215 if (data->version_info_size < num_ssa_names)
7216 {
7217 data->version_info_size = 2 * num_ssa_names;
7218 free (data->version_info);
7219 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
7220 }
7221
7222 data->max_inv_id = 0;
7223
7224 FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
7225 SET_DECL_RTL (obj, NULL_RTX);
7226
7227 decl_rtl_to_reset.truncate (0);
7228
7229 data->inv_expr_tab->empty ();
7230 data->inv_expr_id = 0;
7231 }
7232
7233 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
7234 loop tree. */
7235
7236 static void
7237 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
7238 {
7239 free_loop_data (data);
7240 free (data->version_info);
7241 BITMAP_FREE (data->relevant);
7242 BITMAP_FREE (data->important_candidates);
7243
7244 decl_rtl_to_reset.release ();
7245 data->iv_uses.release ();
7246 data->iv_candidates.release ();
7247 delete data->inv_expr_tab;
7248 data->inv_expr_tab = NULL;
7249 free_affine_expand_cache (&data->name_expansion_cache);
7250 obstack_free (&data->iv_obstack, NULL);
7251 }
7252
7253 /* Returns true if the loop body BODY includes any function calls. */
7254
7255 static bool
7256 loop_body_includes_call (basic_block *body, unsigned num_nodes)
7257 {
7258 gimple_stmt_iterator gsi;
7259 unsigned i;
7260
7261 for (i = 0; i < num_nodes; i++)
7262 for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
7263 {
7264 gimple stmt = gsi_stmt (gsi);
7265 if (is_gimple_call (stmt)
7266 && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
7267 return true;
7268 }
7269 return false;
7270 }
7271
7272 /* Optimizes the LOOP. Returns true if anything changed. */
7273
7274 static bool
7275 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
7276 {
7277 bool changed = false;
7278 struct iv_ca *iv_ca;
7279 edge exit = single_dom_exit (loop);
7280 basic_block *body;
7281
7282 gcc_assert (!data->niters);
7283 data->current_loop = loop;
7284 data->loop_loc = find_loop_location (loop);
7285 data->speed = optimize_loop_for_speed_p (loop);
7286
7287 if (dump_file && (dump_flags & TDF_DETAILS))
7288 {
7289 fprintf (dump_file, "Processing loop %d", loop->num);
7290 if (data->loop_loc != UNKNOWN_LOCATION)
7291 fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
7292 LOCATION_LINE (data->loop_loc));
7293 fprintf (dump_file, "\n");
7294
7295 if (exit)
7296 {
7297 fprintf (dump_file, " single exit %d -> %d, exit condition ",
7298 exit->src->index, exit->dest->index);
7299 print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
7300 fprintf (dump_file, "\n");
7301 }
7302
7303 fprintf (dump_file, "\n");
7304 }
7305
7306 body = get_loop_body (loop);
7307 data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
7308 renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
7309 free (body);
7310
7311 data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
7312
7313 /* For each ssa name determines whether it behaves as an induction variable
7314 in some loop. */
7315 if (!find_induction_variables (data))
7316 goto finish;
7317
7318 /* Finds interesting uses (item 1). */
7319 find_interesting_uses (data);
7320 group_address_uses (data);
7321 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
7322 goto finish;
7323
7324 /* Finds candidates for the induction variables (item 2). */
7325 find_iv_candidates (data);
7326
7327 /* Calculates the costs (item 3, part 1). */
7328 determine_iv_costs (data);
7329 determine_use_iv_costs (data);
7330 determine_set_costs (data);
7331
7332 /* Find the optimal set of induction variables (item 3, part 2). */
7333 iv_ca = find_optimal_iv_set (data);
7334 if (!iv_ca)
7335 goto finish;
7336 changed = true;
7337
7338 /* Create the new induction variables (item 4, part 1). */
7339 create_new_ivs (data, iv_ca);
7340 iv_ca_free (&iv_ca);
7341
7342 /* Rewrite the uses (item 4, part 2). */
7343 rewrite_uses (data);
7344
7345 /* Remove the ivs that are unused after rewriting. */
7346 remove_unused_ivs (data);
7347
7348 /* We have changed the structure of induction variables; it might happen
7349 that definitions in the scev database refer to some of them that were
7350 eliminated. */
7351 scev_reset ();
7352
7353 finish:
7354 free_loop_data (data);
7355
7356 return changed;
7357 }
7358
7359 /* Main entry point. Optimizes induction variables in loops. */
7360
7361 void
7362 tree_ssa_iv_optimize (void)
7363 {
7364 struct loop *loop;
7365 struct ivopts_data data;
7366
7367 tree_ssa_iv_optimize_init (&data);
7368
7369 /* Optimize the loops starting with the innermost ones. */
7370 FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
7371 {
7372 if (dump_file && (dump_flags & TDF_DETAILS))
7373 flow_loop_dump (loop, dump_file, NULL, 1);
7374
7375 tree_ssa_iv_optimize_loop (&data, loop);
7376 }
7377
7378 tree_ssa_iv_optimize_finalize (&data);
7379 }