]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/tree-ssa-loop-ivopts.c
tree-ssa-loop-ivopts.c (get_shiftadd_cost): Strip unnecessary type conversion in...
[thirdparty/gcc.git] / gcc / tree-ssa-loop-ivopts.c
1 /* Induction variable optimizations.
2 Copyright (C) 2003-2015 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
10
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
19
20 /* This pass tries to find the optimal set of induction variables for the loop.
21 It optimizes just the basic linear induction variables (although adding
22 support for other types should not be too hard). It includes the
23 optimizations commonly known as strength reduction, induction variable
24 coalescing and induction variable elimination. It does it in the
25 following steps:
26
27 1) The interesting uses of induction variables are found. This includes
28
29 -- uses of induction variables in non-linear expressions
30 -- addresses of arrays
31 -- comparisons of induction variables
32
33 2) Candidates for the induction variables are found. This includes
34
35 -- old induction variables
36 -- the variables defined by expressions derived from the "interesting
37 uses" above
38
39 3) The optimal (w.r. to a cost function) set of variables is chosen. The
40 cost function assigns a cost to sets of induction variables and consists
41 of three parts:
42
43 -- The use costs. Each of the interesting uses chooses the best induction
44 variable in the set and adds its cost to the sum. The cost reflects
45 the time spent on modifying the induction variables value to be usable
46 for the given purpose (adding base and offset for arrays, etc.).
47 -- The variable costs. Each of the variables has a cost assigned that
48 reflects the costs associated with incrementing the value of the
49 variable. The original variables are somewhat preferred.
50 -- The set cost. Depending on the size of the set, extra cost may be
51 added to reflect register pressure.
52
53 All the costs are defined in a machine-specific way, using the target
54 hooks and machine descriptions to determine them.
55
56 4) The trees are transformed to use the new variables, the dead code is
57 removed.
58
59 All of this is done loop by loop. Doing it globally is theoretically
60 possible, it might give a better performance and it might enable us
61 to decide costs more precisely, but getting all the interactions right
62 would be complicated. */
63
64 #include "config.h"
65 #include "system.h"
66 #include "coretypes.h"
67 #include "backend.h"
68 #include "cfghooks.h"
69 #include "tree.h"
70 #include "gimple.h"
71 #include "rtl.h"
72 #include "ssa.h"
73 #include "alias.h"
74 #include "fold-const.h"
75 #include "stor-layout.h"
76 #include "tm_p.h"
77 #include "gimple-pretty-print.h"
78 #include "internal-fn.h"
79 #include "tree-eh.h"
80 #include "gimplify.h"
81 #include "gimple-iterator.h"
82 #include "gimplify-me.h"
83 #include "cgraph.h"
84 #include "tree-cfg.h"
85 #include "tree-ssa-loop-ivopts.h"
86 #include "tree-ssa-loop-manip.h"
87 #include "tree-ssa-loop-niter.h"
88 #include "tree-ssa-loop.h"
89 #include "flags.h"
90 #include "insn-config.h"
91 #include "expmed.h"
92 #include "dojump.h"
93 #include "explow.h"
94 #include "calls.h"
95 #include "emit-rtl.h"
96 #include "varasm.h"
97 #include "stmt.h"
98 #include "expr.h"
99 #include "tree-dfa.h"
100 #include "tree-ssa.h"
101 #include "cfgloop.h"
102 #include "tree-pass.h"
103 #include "tree-chrec.h"
104 #include "tree-scalar-evolution.h"
105 #include "params.h"
106 #include "langhooks.h"
107 #include "tree-affine.h"
108 #include "target.h"
109 #include "tree-inline.h"
110 #include "tree-ssa-propagate.h"
111 #include "tree-ssa-address.h"
112 #include "builtins.h"
113 #include "tree-vectorizer.h"
114
115 /* FIXME: Expressions are expanded to RTL in this pass to determine the
116 cost of different addressing modes. This should be moved to a TBD
117 interface between the GIMPLE and RTL worlds. */
118 #include "recog.h"
119
120 /* The infinite cost. */
121 #define INFTY 10000000
122
123 #define AVG_LOOP_NITER(LOOP) 5
124
125 /* Returns the expected number of loop iterations for LOOP.
126 The average trip count is computed from profile data if it
127 exists. */
128
129 static inline HOST_WIDE_INT
130 avg_loop_niter (struct loop *loop)
131 {
132 HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
133 if (niter == -1)
134 return AVG_LOOP_NITER (loop);
135
136 return niter;
137 }
138
139 /* Representation of the induction variable. */
140 struct iv
141 {
142 tree base; /* Initial value of the iv. */
143 tree base_object; /* A memory object to that the induction variable points. */
144 tree step; /* Step of the iv (constant only). */
145 tree ssa_name; /* The ssa name with the value. */
146 unsigned use_id; /* The identifier in the use if it is the case. */
147 bool biv_p; /* Is it a biv? */
148 bool have_use_for; /* Do we already have a use for it? */
149 bool no_overflow; /* True if the iv doesn't overflow. */
150 };
151
152 /* Per-ssa version information (induction variable descriptions, etc.). */
153 struct version_info
154 {
155 tree name; /* The ssa name. */
156 struct iv *iv; /* Induction variable description. */
157 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
158 an expression that is not an induction variable. */
159 bool preserve_biv; /* For the original biv, whether to preserve it. */
160 unsigned inv_id; /* Id of an invariant. */
161 };
162
163 /* Types of uses. */
164 enum use_type
165 {
166 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
167 USE_ADDRESS, /* Use in an address. */
168 USE_COMPARE /* Use is a compare. */
169 };
170
171 /* Cost of a computation. */
172 struct comp_cost
173 {
174 int cost; /* The runtime cost. */
175 unsigned complexity; /* The estimate of the complexity of the code for
176 the computation (in no concrete units --
177 complexity field should be larger for more
178 complex expressions and addressing modes). */
179 };
180
181 static const comp_cost no_cost = {0, 0};
182 static const comp_cost infinite_cost = {INFTY, INFTY};
183
184 /* The candidate - cost pair. */
185 struct cost_pair
186 {
187 struct iv_cand *cand; /* The candidate. */
188 comp_cost cost; /* The cost. */
189 bitmap depends_on; /* The list of invariants that have to be
190 preserved. */
191 tree value; /* For final value elimination, the expression for
192 the final value of the iv. For iv elimination,
193 the new bound to compare with. */
194 enum tree_code comp; /* For iv elimination, the comparison. */
195 int inv_expr_id; /* Loop invariant expression id. */
196 };
197
198 /* Use. */
199 struct iv_use
200 {
201 unsigned id; /* The id of the use. */
202 unsigned sub_id; /* The id of the sub use. */
203 enum use_type type; /* Type of the use. */
204 struct iv *iv; /* The induction variable it is based on. */
205 gimple stmt; /* Statement in that it occurs. */
206 tree *op_p; /* The place where it occurs. */
207 bitmap related_cands; /* The set of "related" iv candidates, plus the common
208 important ones. */
209
210 unsigned n_map_members; /* Number of candidates in the cost_map list. */
211 struct cost_pair *cost_map;
212 /* The costs wrto the iv candidates. */
213
214 struct iv_cand *selected;
215 /* The selected candidate. */
216
217 struct iv_use *next; /* The next sub use. */
218 tree addr_base; /* Base address with const offset stripped. */
219 unsigned HOST_WIDE_INT addr_offset;
220 /* Const offset stripped from base address. */
221 };
222
223 /* The position where the iv is computed. */
224 enum iv_position
225 {
226 IP_NORMAL, /* At the end, just before the exit condition. */
227 IP_END, /* At the end of the latch block. */
228 IP_BEFORE_USE, /* Immediately before a specific use. */
229 IP_AFTER_USE, /* Immediately after a specific use. */
230 IP_ORIGINAL /* The original biv. */
231 };
232
233 /* The induction variable candidate. */
234 struct iv_cand
235 {
236 unsigned id; /* The number of the candidate. */
237 bool important; /* Whether this is an "important" candidate, i.e. such
238 that it should be considered by all uses. */
239 ENUM_BITFIELD(iv_position) pos : 8; /* Where it is computed. */
240 gimple incremented_at;/* For original biv, the statement where it is
241 incremented. */
242 tree var_before; /* The variable used for it before increment. */
243 tree var_after; /* The variable used for it after increment. */
244 struct iv *iv; /* The value of the candidate. NULL for
245 "pseudocandidate" used to indicate the possibility
246 to replace the final value of an iv by direct
247 computation of the value. */
248 unsigned cost; /* Cost of the candidate. */
249 unsigned cost_step; /* Cost of the candidate's increment operation. */
250 struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
251 where it is incremented. */
252 bitmap depends_on; /* The list of invariants that are used in step of the
253 biv. */
254 };
255
256 /* Loop invariant expression hashtable entry. */
257 struct iv_inv_expr_ent
258 {
259 tree expr;
260 int id;
261 hashval_t hash;
262 };
263
264 /* The data used by the induction variable optimizations. */
265
266 typedef struct iv_use *iv_use_p;
267
268 typedef struct iv_cand *iv_cand_p;
269
270 /* Hashtable helpers. */
271
272 struct iv_inv_expr_hasher : free_ptr_hash <iv_inv_expr_ent>
273 {
274 static inline hashval_t hash (const iv_inv_expr_ent *);
275 static inline bool equal (const iv_inv_expr_ent *, const iv_inv_expr_ent *);
276 };
277
278 /* Hash function for loop invariant expressions. */
279
280 inline hashval_t
281 iv_inv_expr_hasher::hash (const iv_inv_expr_ent *expr)
282 {
283 return expr->hash;
284 }
285
286 /* Hash table equality function for expressions. */
287
288 inline bool
289 iv_inv_expr_hasher::equal (const iv_inv_expr_ent *expr1,
290 const iv_inv_expr_ent *expr2)
291 {
292 return expr1->hash == expr2->hash
293 && operand_equal_p (expr1->expr, expr2->expr, 0);
294 }
295
296 struct ivopts_data
297 {
298 /* The currently optimized loop. */
299 struct loop *current_loop;
300 source_location loop_loc;
301
302 /* Numbers of iterations for all exits of the current loop. */
303 hash_map<edge, tree_niter_desc *> *niters;
304
305 /* Number of registers used in it. */
306 unsigned regs_used;
307
308 /* The size of version_info array allocated. */
309 unsigned version_info_size;
310
311 /* The array of information for the ssa names. */
312 struct version_info *version_info;
313
314 /* The hashtable of loop invariant expressions created
315 by ivopt. */
316 hash_table<iv_inv_expr_hasher> *inv_expr_tab;
317
318 /* Loop invariant expression id. */
319 int inv_expr_id;
320
321 /* The bitmap of indices in version_info whose value was changed. */
322 bitmap relevant;
323
324 /* The uses of induction variables. */
325 vec<iv_use_p> iv_uses;
326
327 /* The candidates. */
328 vec<iv_cand_p> iv_candidates;
329
330 /* A bitmap of important candidates. */
331 bitmap important_candidates;
332
333 /* Cache used by tree_to_aff_combination_expand. */
334 hash_map<tree, name_expansion *> *name_expansion_cache;
335
336 /* The maximum invariant id. */
337 unsigned max_inv_id;
338
339 /* Obstack for iv structure. */
340 struct obstack iv_obstack;
341
342 /* Whether to consider just related and important candidates when replacing a
343 use. */
344 bool consider_all_candidates;
345
346 /* Are we optimizing for speed? */
347 bool speed;
348
349 /* Whether the loop body includes any function calls. */
350 bool body_includes_call;
351
352 /* Whether the loop body can only be exited via single exit. */
353 bool loop_single_exit_p;
354 };
355
356 /* An assignment of iv candidates to uses. */
357
358 struct iv_ca
359 {
360 /* The number of uses covered by the assignment. */
361 unsigned upto;
362
363 /* Number of uses that cannot be expressed by the candidates in the set. */
364 unsigned bad_uses;
365
366 /* Candidate assigned to a use, together with the related costs. */
367 struct cost_pair **cand_for_use;
368
369 /* Number of times each candidate is used. */
370 unsigned *n_cand_uses;
371
372 /* The candidates used. */
373 bitmap cands;
374
375 /* The number of candidates in the set. */
376 unsigned n_cands;
377
378 /* Total number of registers needed. */
379 unsigned n_regs;
380
381 /* Total cost of expressing uses. */
382 comp_cost cand_use_cost;
383
384 /* Total cost of candidates. */
385 unsigned cand_cost;
386
387 /* Number of times each invariant is used. */
388 unsigned *n_invariant_uses;
389
390 /* The array holding the number of uses of each loop
391 invariant expressions created by ivopt. */
392 unsigned *used_inv_expr;
393
394 /* The number of created loop invariants. */
395 unsigned num_used_inv_expr;
396
397 /* Total cost of the assignment. */
398 comp_cost cost;
399 };
400
401 /* Difference of two iv candidate assignments. */
402
403 struct iv_ca_delta
404 {
405 /* Changed use. */
406 struct iv_use *use;
407
408 /* An old assignment (for rollback purposes). */
409 struct cost_pair *old_cp;
410
411 /* A new assignment. */
412 struct cost_pair *new_cp;
413
414 /* Next change in the list. */
415 struct iv_ca_delta *next_change;
416 };
417
418 /* Bound on number of candidates below that all candidates are considered. */
419
420 #define CONSIDER_ALL_CANDIDATES_BOUND \
421 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
422
423 /* If there are more iv occurrences, we just give up (it is quite unlikely that
424 optimizing such a loop would help, and it would take ages). */
425
426 #define MAX_CONSIDERED_USES \
427 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
428
429 /* If there are at most this number of ivs in the set, try removing unnecessary
430 ivs from the set always. */
431
432 #define ALWAYS_PRUNE_CAND_SET_BOUND \
433 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
434
435 /* The list of trees for that the decl_rtl field must be reset is stored
436 here. */
437
438 static vec<tree> decl_rtl_to_reset;
439
440 static comp_cost force_expr_to_var_cost (tree, bool);
441
442 /* Number of uses recorded in DATA. */
443
444 static inline unsigned
445 n_iv_uses (struct ivopts_data *data)
446 {
447 return data->iv_uses.length ();
448 }
449
450 /* Ith use recorded in DATA. */
451
452 static inline struct iv_use *
453 iv_use (struct ivopts_data *data, unsigned i)
454 {
455 return data->iv_uses[i];
456 }
457
458 /* Number of candidates recorded in DATA. */
459
460 static inline unsigned
461 n_iv_cands (struct ivopts_data *data)
462 {
463 return data->iv_candidates.length ();
464 }
465
466 /* Ith candidate recorded in DATA. */
467
468 static inline struct iv_cand *
469 iv_cand (struct ivopts_data *data, unsigned i)
470 {
471 return data->iv_candidates[i];
472 }
473
474 /* The single loop exit if it dominates the latch, NULL otherwise. */
475
476 edge
477 single_dom_exit (struct loop *loop)
478 {
479 edge exit = single_exit (loop);
480
481 if (!exit)
482 return NULL;
483
484 if (!just_once_each_iteration_p (loop, exit->src))
485 return NULL;
486
487 return exit;
488 }
489
490 /* Dumps information about the induction variable IV to FILE. */
491
492 void
493 dump_iv (FILE *file, struct iv *iv, bool dump_name)
494 {
495 if (iv->ssa_name && dump_name)
496 {
497 fprintf (file, "ssa name ");
498 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
499 fprintf (file, "\n");
500 }
501
502 fprintf (file, " type ");
503 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
504 fprintf (file, "\n");
505
506 if (iv->step)
507 {
508 fprintf (file, " base ");
509 print_generic_expr (file, iv->base, TDF_SLIM);
510 fprintf (file, "\n");
511
512 fprintf (file, " step ");
513 print_generic_expr (file, iv->step, TDF_SLIM);
514 fprintf (file, "\n");
515 }
516 else
517 {
518 fprintf (file, " invariant ");
519 print_generic_expr (file, iv->base, TDF_SLIM);
520 fprintf (file, "\n");
521 }
522
523 if (iv->base_object)
524 {
525 fprintf (file, " base object ");
526 print_generic_expr (file, iv->base_object, TDF_SLIM);
527 fprintf (file, "\n");
528 }
529
530 if (iv->biv_p)
531 fprintf (file, " is a biv\n");
532 }
533
534 /* Dumps information about the USE to FILE. */
535
536 void
537 dump_use (FILE *file, struct iv_use *use)
538 {
539 fprintf (file, "use %d", use->id);
540 if (use->sub_id)
541 fprintf (file, ".%d", use->sub_id);
542
543 fprintf (file, "\n");
544
545 switch (use->type)
546 {
547 case USE_NONLINEAR_EXPR:
548 fprintf (file, " generic\n");
549 break;
550
551 case USE_ADDRESS:
552 fprintf (file, " address\n");
553 break;
554
555 case USE_COMPARE:
556 fprintf (file, " compare\n");
557 break;
558
559 default:
560 gcc_unreachable ();
561 }
562
563 fprintf (file, " in statement ");
564 print_gimple_stmt (file, use->stmt, 0, 0);
565 fprintf (file, "\n");
566
567 fprintf (file, " at position ");
568 if (use->op_p)
569 print_generic_expr (file, *use->op_p, TDF_SLIM);
570 fprintf (file, "\n");
571
572 dump_iv (file, use->iv, false);
573
574 if (use->related_cands)
575 {
576 fprintf (file, " related candidates ");
577 dump_bitmap (file, use->related_cands);
578 }
579 }
580
581 /* Dumps information about the uses to FILE. */
582
583 void
584 dump_uses (FILE *file, struct ivopts_data *data)
585 {
586 unsigned i;
587 struct iv_use *use;
588
589 for (i = 0; i < n_iv_uses (data); i++)
590 {
591 use = iv_use (data, i);
592 do
593 {
594 dump_use (file, use);
595 use = use->next;
596 }
597 while (use);
598 fprintf (file, "\n");
599 }
600 }
601
602 /* Dumps information about induction variable candidate CAND to FILE. */
603
604 void
605 dump_cand (FILE *file, struct iv_cand *cand)
606 {
607 struct iv *iv = cand->iv;
608
609 fprintf (file, "candidate %d%s\n",
610 cand->id, cand->important ? " (important)" : "");
611
612 if (cand->depends_on)
613 {
614 fprintf (file, " depends on ");
615 dump_bitmap (file, cand->depends_on);
616 }
617
618 if (!iv)
619 {
620 fprintf (file, " final value replacement\n");
621 return;
622 }
623
624 if (cand->var_before)
625 {
626 fprintf (file, " var_before ");
627 print_generic_expr (file, cand->var_before, TDF_SLIM);
628 fprintf (file, "\n");
629 }
630 if (cand->var_after)
631 {
632 fprintf (file, " var_after ");
633 print_generic_expr (file, cand->var_after, TDF_SLIM);
634 fprintf (file, "\n");
635 }
636
637 switch (cand->pos)
638 {
639 case IP_NORMAL:
640 fprintf (file, " incremented before exit test\n");
641 break;
642
643 case IP_BEFORE_USE:
644 fprintf (file, " incremented before use %d\n", cand->ainc_use->id);
645 break;
646
647 case IP_AFTER_USE:
648 fprintf (file, " incremented after use %d\n", cand->ainc_use->id);
649 break;
650
651 case IP_END:
652 fprintf (file, " incremented at end\n");
653 break;
654
655 case IP_ORIGINAL:
656 fprintf (file, " original biv\n");
657 break;
658 }
659
660 dump_iv (file, iv, false);
661 }
662
663 /* Returns the info for ssa version VER. */
664
665 static inline struct version_info *
666 ver_info (struct ivopts_data *data, unsigned ver)
667 {
668 return data->version_info + ver;
669 }
670
671 /* Returns the info for ssa name NAME. */
672
673 static inline struct version_info *
674 name_info (struct ivopts_data *data, tree name)
675 {
676 return ver_info (data, SSA_NAME_VERSION (name));
677 }
678
679 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
680 emitted in LOOP. */
681
682 static bool
683 stmt_after_ip_normal_pos (struct loop *loop, gimple stmt)
684 {
685 basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
686
687 gcc_assert (bb);
688
689 if (sbb == loop->latch)
690 return true;
691
692 if (sbb != bb)
693 return false;
694
695 return stmt == last_stmt (bb);
696 }
697
698 /* Returns true if STMT if after the place where the original induction
699 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
700 if the positions are identical. */
701
702 static bool
703 stmt_after_inc_pos (struct iv_cand *cand, gimple stmt, bool true_if_equal)
704 {
705 basic_block cand_bb = gimple_bb (cand->incremented_at);
706 basic_block stmt_bb = gimple_bb (stmt);
707
708 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
709 return false;
710
711 if (stmt_bb != cand_bb)
712 return true;
713
714 if (true_if_equal
715 && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
716 return true;
717 return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
718 }
719
720 /* Returns true if STMT if after the place where the induction variable
721 CAND is incremented in LOOP. */
722
723 static bool
724 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple stmt)
725 {
726 switch (cand->pos)
727 {
728 case IP_END:
729 return false;
730
731 case IP_NORMAL:
732 return stmt_after_ip_normal_pos (loop, stmt);
733
734 case IP_ORIGINAL:
735 case IP_AFTER_USE:
736 return stmt_after_inc_pos (cand, stmt, false);
737
738 case IP_BEFORE_USE:
739 return stmt_after_inc_pos (cand, stmt, true);
740
741 default:
742 gcc_unreachable ();
743 }
744 }
745
746 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
747
748 static bool
749 abnormal_ssa_name_p (tree exp)
750 {
751 if (!exp)
752 return false;
753
754 if (TREE_CODE (exp) != SSA_NAME)
755 return false;
756
757 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
758 }
759
760 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
761 abnormal phi node. Callback for for_each_index. */
762
763 static bool
764 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
765 void *data ATTRIBUTE_UNUSED)
766 {
767 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
768 {
769 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
770 return false;
771 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
772 return false;
773 }
774
775 return !abnormal_ssa_name_p (*index);
776 }
777
778 /* Returns true if EXPR contains a ssa name that occurs in an
779 abnormal phi node. */
780
781 bool
782 contains_abnormal_ssa_name_p (tree expr)
783 {
784 enum tree_code code;
785 enum tree_code_class codeclass;
786
787 if (!expr)
788 return false;
789
790 code = TREE_CODE (expr);
791 codeclass = TREE_CODE_CLASS (code);
792
793 if (code == SSA_NAME)
794 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
795
796 if (code == INTEGER_CST
797 || is_gimple_min_invariant (expr))
798 return false;
799
800 if (code == ADDR_EXPR)
801 return !for_each_index (&TREE_OPERAND (expr, 0),
802 idx_contains_abnormal_ssa_name_p,
803 NULL);
804
805 if (code == COND_EXPR)
806 return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
807 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
808 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
809
810 switch (codeclass)
811 {
812 case tcc_binary:
813 case tcc_comparison:
814 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
815 return true;
816
817 /* Fallthru. */
818 case tcc_unary:
819 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
820 return true;
821
822 break;
823
824 default:
825 gcc_unreachable ();
826 }
827
828 return false;
829 }
830
831 /* Returns the structure describing number of iterations determined from
832 EXIT of DATA->current_loop, or NULL if something goes wrong. */
833
834 static struct tree_niter_desc *
835 niter_for_exit (struct ivopts_data *data, edge exit)
836 {
837 struct tree_niter_desc *desc;
838 tree_niter_desc **slot;
839
840 if (!data->niters)
841 {
842 data->niters = new hash_map<edge, tree_niter_desc *>;
843 slot = NULL;
844 }
845 else
846 slot = data->niters->get (exit);
847
848 if (!slot)
849 {
850 /* Try to determine number of iterations. We cannot safely work with ssa
851 names that appear in phi nodes on abnormal edges, so that we do not
852 create overlapping life ranges for them (PR 27283). */
853 desc = XNEW (struct tree_niter_desc);
854 if (!number_of_iterations_exit (data->current_loop,
855 exit, desc, true)
856 || contains_abnormal_ssa_name_p (desc->niter))
857 {
858 XDELETE (desc);
859 desc = NULL;
860 }
861 data->niters->put (exit, desc);
862 }
863 else
864 desc = *slot;
865
866 return desc;
867 }
868
869 /* Returns the structure describing number of iterations determined from
870 single dominating exit of DATA->current_loop, or NULL if something
871 goes wrong. */
872
873 static struct tree_niter_desc *
874 niter_for_single_dom_exit (struct ivopts_data *data)
875 {
876 edge exit = single_dom_exit (data->current_loop);
877
878 if (!exit)
879 return NULL;
880
881 return niter_for_exit (data, exit);
882 }
883
884 /* Initializes data structures used by the iv optimization pass, stored
885 in DATA. */
886
887 static void
888 tree_ssa_iv_optimize_init (struct ivopts_data *data)
889 {
890 data->version_info_size = 2 * num_ssa_names;
891 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
892 data->relevant = BITMAP_ALLOC (NULL);
893 data->important_candidates = BITMAP_ALLOC (NULL);
894 data->max_inv_id = 0;
895 data->niters = NULL;
896 data->iv_uses.create (20);
897 data->iv_candidates.create (20);
898 data->inv_expr_tab = new hash_table<iv_inv_expr_hasher> (10);
899 data->inv_expr_id = 0;
900 data->name_expansion_cache = NULL;
901 decl_rtl_to_reset.create (20);
902 gcc_obstack_init (&data->iv_obstack);
903 }
904
905 /* Returns a memory object to that EXPR points. In case we are able to
906 determine that it does not point to any such object, NULL is returned. */
907
908 static tree
909 determine_base_object (tree expr)
910 {
911 enum tree_code code = TREE_CODE (expr);
912 tree base, obj;
913
914 /* If this is a pointer casted to any type, we need to determine
915 the base object for the pointer; so handle conversions before
916 throwing away non-pointer expressions. */
917 if (CONVERT_EXPR_P (expr))
918 return determine_base_object (TREE_OPERAND (expr, 0));
919
920 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
921 return NULL_TREE;
922
923 switch (code)
924 {
925 case INTEGER_CST:
926 return NULL_TREE;
927
928 case ADDR_EXPR:
929 obj = TREE_OPERAND (expr, 0);
930 base = get_base_address (obj);
931
932 if (!base)
933 return expr;
934
935 if (TREE_CODE (base) == MEM_REF)
936 return determine_base_object (TREE_OPERAND (base, 0));
937
938 return fold_convert (ptr_type_node,
939 build_fold_addr_expr (base));
940
941 case POINTER_PLUS_EXPR:
942 return determine_base_object (TREE_OPERAND (expr, 0));
943
944 case PLUS_EXPR:
945 case MINUS_EXPR:
946 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
947 gcc_unreachable ();
948
949 default:
950 return fold_convert (ptr_type_node, expr);
951 }
952 }
953
954 /* Return true if address expression with non-DECL_P operand appears
955 in EXPR. */
956
957 static bool
958 contain_complex_addr_expr (tree expr)
959 {
960 bool res = false;
961
962 STRIP_NOPS (expr);
963 switch (TREE_CODE (expr))
964 {
965 case POINTER_PLUS_EXPR:
966 case PLUS_EXPR:
967 case MINUS_EXPR:
968 res |= contain_complex_addr_expr (TREE_OPERAND (expr, 0));
969 res |= contain_complex_addr_expr (TREE_OPERAND (expr, 1));
970 break;
971
972 case ADDR_EXPR:
973 return (!DECL_P (TREE_OPERAND (expr, 0)));
974
975 default:
976 return false;
977 }
978
979 return res;
980 }
981
982 /* Allocates an induction variable with given initial value BASE and step STEP
983 for loop LOOP. NO_OVERFLOW implies the iv doesn't overflow. */
984
985 static struct iv *
986 alloc_iv (struct ivopts_data *data, tree base, tree step,
987 bool no_overflow = false)
988 {
989 tree expr = base;
990 struct iv *iv = (struct iv*) obstack_alloc (&data->iv_obstack,
991 sizeof (struct iv));
992 gcc_assert (step != NULL_TREE);
993
994 /* Lower address expression in base except ones with DECL_P as operand.
995 By doing this:
996 1) More accurate cost can be computed for address expressions;
997 2) Duplicate candidates won't be created for bases in different
998 forms, like &a[0] and &a. */
999 STRIP_NOPS (expr);
1000 if ((TREE_CODE (expr) == ADDR_EXPR && !DECL_P (TREE_OPERAND (expr, 0)))
1001 || contain_complex_addr_expr (expr))
1002 {
1003 aff_tree comb;
1004 tree_to_aff_combination (expr, TREE_TYPE (base), &comb);
1005 base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
1006 }
1007
1008 iv->base = base;
1009 iv->base_object = determine_base_object (base);
1010 iv->step = step;
1011 iv->biv_p = false;
1012 iv->have_use_for = false;
1013 iv->use_id = 0;
1014 iv->ssa_name = NULL_TREE;
1015 iv->no_overflow = no_overflow;
1016
1017 return iv;
1018 }
1019
1020 /* Sets STEP and BASE for induction variable IV. NO_OVERFLOW implies the IV
1021 doesn't overflow. */
1022
1023 static void
1024 set_iv (struct ivopts_data *data, tree iv, tree base, tree step,
1025 bool no_overflow)
1026 {
1027 struct version_info *info = name_info (data, iv);
1028
1029 gcc_assert (!info->iv);
1030
1031 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
1032 info->iv = alloc_iv (data, base, step, no_overflow);
1033 info->iv->ssa_name = iv;
1034 }
1035
1036 /* Finds induction variable declaration for VAR. */
1037
1038 static struct iv *
1039 get_iv (struct ivopts_data *data, tree var)
1040 {
1041 basic_block bb;
1042 tree type = TREE_TYPE (var);
1043
1044 if (!POINTER_TYPE_P (type)
1045 && !INTEGRAL_TYPE_P (type))
1046 return NULL;
1047
1048 if (!name_info (data, var)->iv)
1049 {
1050 bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1051
1052 if (!bb
1053 || !flow_bb_inside_loop_p (data->current_loop, bb))
1054 set_iv (data, var, var, build_int_cst (type, 0), true);
1055 }
1056
1057 return name_info (data, var)->iv;
1058 }
1059
1060 /* Return the first non-invariant ssa var found in EXPR. */
1061
1062 static tree
1063 extract_single_var_from_expr (tree expr)
1064 {
1065 int i, n;
1066 tree tmp;
1067 enum tree_code code;
1068
1069 if (!expr || is_gimple_min_invariant (expr))
1070 return NULL;
1071
1072 code = TREE_CODE (expr);
1073 if (IS_EXPR_CODE_CLASS (TREE_CODE_CLASS (code)))
1074 {
1075 n = TREE_OPERAND_LENGTH (expr);
1076 for (i = 0; i < n; i++)
1077 {
1078 tmp = extract_single_var_from_expr (TREE_OPERAND (expr, i));
1079
1080 if (tmp)
1081 return tmp;
1082 }
1083 }
1084 return (TREE_CODE (expr) == SSA_NAME) ? expr : NULL;
1085 }
1086
1087 /* Finds basic ivs. */
1088
1089 static bool
1090 find_bivs (struct ivopts_data *data)
1091 {
1092 gphi *phi;
1093 affine_iv iv;
1094 tree step, type, base, stop;
1095 bool found = false;
1096 struct loop *loop = data->current_loop;
1097 gphi_iterator psi;
1098
1099 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1100 {
1101 phi = psi.phi ();
1102
1103 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1104 continue;
1105
1106 if (virtual_operand_p (PHI_RESULT (phi)))
1107 continue;
1108
1109 if (!simple_iv (loop, loop, PHI_RESULT (phi), &iv, true))
1110 continue;
1111
1112 if (integer_zerop (iv.step))
1113 continue;
1114
1115 step = iv.step;
1116 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1117 /* Stop expanding iv base at the first ssa var referred by iv step.
1118 Ideally we should stop at any ssa var, because that's expensive
1119 and unusual to happen, we just do it on the first one.
1120
1121 See PR64705 for the rationale. */
1122 stop = extract_single_var_from_expr (step);
1123 base = expand_simple_operations (base, stop);
1124 if (contains_abnormal_ssa_name_p (base)
1125 || contains_abnormal_ssa_name_p (step))
1126 continue;
1127
1128 type = TREE_TYPE (PHI_RESULT (phi));
1129 base = fold_convert (type, base);
1130 if (step)
1131 {
1132 if (POINTER_TYPE_P (type))
1133 step = convert_to_ptrofftype (step);
1134 else
1135 step = fold_convert (type, step);
1136 }
1137
1138 set_iv (data, PHI_RESULT (phi), base, step, iv.no_overflow);
1139 found = true;
1140 }
1141
1142 return found;
1143 }
1144
1145 /* Marks basic ivs. */
1146
1147 static void
1148 mark_bivs (struct ivopts_data *data)
1149 {
1150 gphi *phi;
1151 gimple def;
1152 tree var;
1153 struct iv *iv, *incr_iv;
1154 struct loop *loop = data->current_loop;
1155 basic_block incr_bb;
1156 gphi_iterator psi;
1157
1158 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1159 {
1160 phi = psi.phi ();
1161
1162 iv = get_iv (data, PHI_RESULT (phi));
1163 if (!iv)
1164 continue;
1165
1166 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1167 def = SSA_NAME_DEF_STMT (var);
1168 /* Don't mark iv peeled from other one as biv. */
1169 if (def
1170 && gimple_code (def) == GIMPLE_PHI
1171 && gimple_bb (def) == loop->header)
1172 continue;
1173
1174 incr_iv = get_iv (data, var);
1175 if (!incr_iv)
1176 continue;
1177
1178 /* If the increment is in the subloop, ignore it. */
1179 incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1180 if (incr_bb->loop_father != data->current_loop
1181 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1182 continue;
1183
1184 iv->biv_p = true;
1185 incr_iv->biv_p = true;
1186 }
1187 }
1188
1189 /* Checks whether STMT defines a linear induction variable and stores its
1190 parameters to IV. */
1191
1192 static bool
1193 find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv)
1194 {
1195 tree lhs, stop;
1196 struct loop *loop = data->current_loop;
1197
1198 iv->base = NULL_TREE;
1199 iv->step = NULL_TREE;
1200
1201 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1202 return false;
1203
1204 lhs = gimple_assign_lhs (stmt);
1205 if (TREE_CODE (lhs) != SSA_NAME)
1206 return false;
1207
1208 if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1209 return false;
1210
1211 /* Stop expanding iv base at the first ssa var referred by iv step.
1212 Ideally we should stop at any ssa var, because that's expensive
1213 and unusual to happen, we just do it on the first one.
1214
1215 See PR64705 for the rationale. */
1216 stop = extract_single_var_from_expr (iv->step);
1217 iv->base = expand_simple_operations (iv->base, stop);
1218 if (contains_abnormal_ssa_name_p (iv->base)
1219 || contains_abnormal_ssa_name_p (iv->step))
1220 return false;
1221
1222 /* If STMT could throw, then do not consider STMT as defining a GIV.
1223 While this will suppress optimizations, we can not safely delete this
1224 GIV and associated statements, even if it appears it is not used. */
1225 if (stmt_could_throw_p (stmt))
1226 return false;
1227
1228 return true;
1229 }
1230
1231 /* Finds general ivs in statement STMT. */
1232
1233 static void
1234 find_givs_in_stmt (struct ivopts_data *data, gimple stmt)
1235 {
1236 affine_iv iv;
1237
1238 if (!find_givs_in_stmt_scev (data, stmt, &iv))
1239 return;
1240
1241 set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step, iv.no_overflow);
1242 }
1243
1244 /* Finds general ivs in basic block BB. */
1245
1246 static void
1247 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1248 {
1249 gimple_stmt_iterator bsi;
1250
1251 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1252 find_givs_in_stmt (data, gsi_stmt (bsi));
1253 }
1254
1255 /* Finds general ivs. */
1256
1257 static void
1258 find_givs (struct ivopts_data *data)
1259 {
1260 struct loop *loop = data->current_loop;
1261 basic_block *body = get_loop_body_in_dom_order (loop);
1262 unsigned i;
1263
1264 for (i = 0; i < loop->num_nodes; i++)
1265 find_givs_in_bb (data, body[i]);
1266 free (body);
1267 }
1268
1269 /* For each ssa name defined in LOOP determines whether it is an induction
1270 variable and if so, its initial value and step. */
1271
1272 static bool
1273 find_induction_variables (struct ivopts_data *data)
1274 {
1275 unsigned i;
1276 bitmap_iterator bi;
1277
1278 if (!find_bivs (data))
1279 return false;
1280
1281 find_givs (data);
1282 mark_bivs (data);
1283
1284 if (dump_file && (dump_flags & TDF_DETAILS))
1285 {
1286 struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
1287
1288 if (niter)
1289 {
1290 fprintf (dump_file, " number of iterations ");
1291 print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1292 if (!integer_zerop (niter->may_be_zero))
1293 {
1294 fprintf (dump_file, "; zero if ");
1295 print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1296 }
1297 fprintf (dump_file, "\n\n");
1298 };
1299
1300 fprintf (dump_file, "Induction variables:\n\n");
1301
1302 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1303 {
1304 if (ver_info (data, i)->iv)
1305 dump_iv (dump_file, ver_info (data, i)->iv, true);
1306 }
1307 }
1308
1309 return true;
1310 }
1311
1312 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV.
1313 For address type use, ADDR_BASE is the stripped IV base, ADDR_OFFSET
1314 is the const offset stripped from IV base. For uses of other types,
1315 ADDR_BASE and ADDR_OFFSET are zero by default. */
1316
1317 static struct iv_use *
1318 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1319 gimple stmt, enum use_type use_type, tree addr_base = NULL,
1320 unsigned HOST_WIDE_INT addr_offset = 0)
1321 {
1322 struct iv_use *use = XCNEW (struct iv_use);
1323
1324 use->id = n_iv_uses (data);
1325 use->sub_id = 0;
1326 use->type = use_type;
1327 use->iv = iv;
1328 use->stmt = stmt;
1329 use->op_p = use_p;
1330 use->related_cands = BITMAP_ALLOC (NULL);
1331 use->next = NULL;
1332 use->addr_base = addr_base;
1333 use->addr_offset = addr_offset;
1334
1335 data->iv_uses.safe_push (use);
1336
1337 return use;
1338 }
1339
1340 /* Records a sub use of type USE_TYPE at *USE_P in STMT whose value is IV.
1341 The sub use is recorded under the one whose use id is ID_GROUP. */
1342
1343 static struct iv_use *
1344 record_sub_use (struct ivopts_data *data, tree *use_p,
1345 struct iv *iv, gimple stmt, enum use_type use_type,
1346 tree addr_base, unsigned HOST_WIDE_INT addr_offset,
1347 unsigned int id_group)
1348 {
1349 struct iv_use *use = XCNEW (struct iv_use);
1350 struct iv_use *group = iv_use (data, id_group);
1351
1352 use->id = group->id;
1353 use->sub_id = 0;
1354 use->type = use_type;
1355 use->iv = iv;
1356 use->stmt = stmt;
1357 use->op_p = use_p;
1358 use->related_cands = NULL;
1359 use->addr_base = addr_base;
1360 use->addr_offset = addr_offset;
1361
1362 /* Sub use list is maintained in offset ascending order. */
1363 if (addr_offset <= group->addr_offset)
1364 {
1365 use->related_cands = group->related_cands;
1366 group->related_cands = NULL;
1367 use->next = group;
1368 data->iv_uses[id_group] = use;
1369 }
1370 else
1371 {
1372 struct iv_use *pre;
1373 do
1374 {
1375 pre = group;
1376 group = group->next;
1377 }
1378 while (group && addr_offset > group->addr_offset);
1379 use->next = pre->next;
1380 pre->next = use;
1381 }
1382
1383 return use;
1384 }
1385
1386 /* Checks whether OP is a loop-level invariant and if so, records it.
1387 NONLINEAR_USE is true if the invariant is used in a way we do not
1388 handle specially. */
1389
1390 static void
1391 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1392 {
1393 basic_block bb;
1394 struct version_info *info;
1395
1396 if (TREE_CODE (op) != SSA_NAME
1397 || virtual_operand_p (op))
1398 return;
1399
1400 bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1401 if (bb
1402 && flow_bb_inside_loop_p (data->current_loop, bb))
1403 return;
1404
1405 info = name_info (data, op);
1406 info->name = op;
1407 info->has_nonlin_use |= nonlinear_use;
1408 if (!info->inv_id)
1409 info->inv_id = ++data->max_inv_id;
1410 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1411 }
1412
1413 /* Checks whether the use OP is interesting and if so, records it. */
1414
1415 static struct iv_use *
1416 find_interesting_uses_op (struct ivopts_data *data, tree op)
1417 {
1418 struct iv *iv;
1419 gimple stmt;
1420 struct iv_use *use;
1421
1422 if (TREE_CODE (op) != SSA_NAME)
1423 return NULL;
1424
1425 iv = get_iv (data, op);
1426 if (!iv)
1427 return NULL;
1428
1429 if (iv->have_use_for)
1430 {
1431 use = iv_use (data, iv->use_id);
1432
1433 gcc_assert (use->type == USE_NONLINEAR_EXPR);
1434 return use;
1435 }
1436
1437 if (integer_zerop (iv->step))
1438 {
1439 record_invariant (data, op, true);
1440 return NULL;
1441 }
1442 iv->have_use_for = true;
1443
1444 stmt = SSA_NAME_DEF_STMT (op);
1445 gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1446 || is_gimple_assign (stmt));
1447
1448 use = record_use (data, NULL, iv, stmt, USE_NONLINEAR_EXPR);
1449 iv->use_id = use->id;
1450
1451 return use;
1452 }
1453
1454 /* Given a condition in statement STMT, checks whether it is a compare
1455 of an induction variable and an invariant. If this is the case,
1456 CONTROL_VAR is set to location of the iv, BOUND to the location of
1457 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1458 induction variable descriptions, and true is returned. If this is not
1459 the case, CONTROL_VAR and BOUND are set to the arguments of the
1460 condition and false is returned. */
1461
1462 static bool
1463 extract_cond_operands (struct ivopts_data *data, gimple stmt,
1464 tree **control_var, tree **bound,
1465 struct iv **iv_var, struct iv **iv_bound)
1466 {
1467 /* The objects returned when COND has constant operands. */
1468 static struct iv const_iv;
1469 static tree zero;
1470 tree *op0 = &zero, *op1 = &zero;
1471 struct iv *iv0 = &const_iv, *iv1 = &const_iv;
1472 bool ret = false;
1473
1474 if (gimple_code (stmt) == GIMPLE_COND)
1475 {
1476 gcond *cond_stmt = as_a <gcond *> (stmt);
1477 op0 = gimple_cond_lhs_ptr (cond_stmt);
1478 op1 = gimple_cond_rhs_ptr (cond_stmt);
1479 }
1480 else
1481 {
1482 op0 = gimple_assign_rhs1_ptr (stmt);
1483 op1 = gimple_assign_rhs2_ptr (stmt);
1484 }
1485
1486 zero = integer_zero_node;
1487 const_iv.step = integer_zero_node;
1488
1489 if (TREE_CODE (*op0) == SSA_NAME)
1490 iv0 = get_iv (data, *op0);
1491 if (TREE_CODE (*op1) == SSA_NAME)
1492 iv1 = get_iv (data, *op1);
1493
1494 /* Exactly one of the compared values must be an iv, and the other one must
1495 be an invariant. */
1496 if (!iv0 || !iv1)
1497 goto end;
1498
1499 if (integer_zerop (iv0->step))
1500 {
1501 /* Control variable may be on the other side. */
1502 std::swap (op0, op1);
1503 std::swap (iv0, iv1);
1504 }
1505 ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1506
1507 end:
1508 if (control_var)
1509 *control_var = op0;
1510 if (iv_var)
1511 *iv_var = iv0;
1512 if (bound)
1513 *bound = op1;
1514 if (iv_bound)
1515 *iv_bound = iv1;
1516
1517 return ret;
1518 }
1519
1520 /* Checks whether the condition in STMT is interesting and if so,
1521 records it. */
1522
1523 static void
1524 find_interesting_uses_cond (struct ivopts_data *data, gimple stmt)
1525 {
1526 tree *var_p, *bound_p;
1527 struct iv *var_iv;
1528
1529 if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1530 {
1531 find_interesting_uses_op (data, *var_p);
1532 find_interesting_uses_op (data, *bound_p);
1533 return;
1534 }
1535
1536 record_use (data, NULL, var_iv, stmt, USE_COMPARE);
1537 }
1538
1539 /* Returns the outermost loop EXPR is obviously invariant in
1540 relative to the loop LOOP, i.e. if all its operands are defined
1541 outside of the returned loop. Returns NULL if EXPR is not
1542 even obviously invariant in LOOP. */
1543
1544 struct loop *
1545 outermost_invariant_loop_for_expr (struct loop *loop, tree expr)
1546 {
1547 basic_block def_bb;
1548 unsigned i, len;
1549
1550 if (is_gimple_min_invariant (expr))
1551 return current_loops->tree_root;
1552
1553 if (TREE_CODE (expr) == SSA_NAME)
1554 {
1555 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1556 if (def_bb)
1557 {
1558 if (flow_bb_inside_loop_p (loop, def_bb))
1559 return NULL;
1560 return superloop_at_depth (loop,
1561 loop_depth (def_bb->loop_father) + 1);
1562 }
1563
1564 return current_loops->tree_root;
1565 }
1566
1567 if (!EXPR_P (expr))
1568 return NULL;
1569
1570 unsigned maxdepth = 0;
1571 len = TREE_OPERAND_LENGTH (expr);
1572 for (i = 0; i < len; i++)
1573 {
1574 struct loop *ivloop;
1575 if (!TREE_OPERAND (expr, i))
1576 continue;
1577
1578 ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1579 if (!ivloop)
1580 return NULL;
1581 maxdepth = MAX (maxdepth, loop_depth (ivloop));
1582 }
1583
1584 return superloop_at_depth (loop, maxdepth);
1585 }
1586
1587 /* Returns true if expression EXPR is obviously invariant in LOOP,
1588 i.e. if all its operands are defined outside of the LOOP. LOOP
1589 should not be the function body. */
1590
1591 bool
1592 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1593 {
1594 basic_block def_bb;
1595 unsigned i, len;
1596
1597 gcc_assert (loop_depth (loop) > 0);
1598
1599 if (is_gimple_min_invariant (expr))
1600 return true;
1601
1602 if (TREE_CODE (expr) == SSA_NAME)
1603 {
1604 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1605 if (def_bb
1606 && flow_bb_inside_loop_p (loop, def_bb))
1607 return false;
1608
1609 return true;
1610 }
1611
1612 if (!EXPR_P (expr))
1613 return false;
1614
1615 len = TREE_OPERAND_LENGTH (expr);
1616 for (i = 0; i < len; i++)
1617 if (TREE_OPERAND (expr, i)
1618 && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1619 return false;
1620
1621 return true;
1622 }
1623
1624 /* Cumulates the steps of indices into DATA and replaces their values with the
1625 initial ones. Returns false when the value of the index cannot be determined.
1626 Callback for for_each_index. */
1627
1628 struct ifs_ivopts_data
1629 {
1630 struct ivopts_data *ivopts_data;
1631 gimple stmt;
1632 tree step;
1633 };
1634
1635 static bool
1636 idx_find_step (tree base, tree *idx, void *data)
1637 {
1638 struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1639 struct iv *iv;
1640 bool use_overflow_semantics = false;
1641 tree step, iv_base, iv_step, lbound, off;
1642 struct loop *loop = dta->ivopts_data->current_loop;
1643
1644 /* If base is a component ref, require that the offset of the reference
1645 be invariant. */
1646 if (TREE_CODE (base) == COMPONENT_REF)
1647 {
1648 off = component_ref_field_offset (base);
1649 return expr_invariant_in_loop_p (loop, off);
1650 }
1651
1652 /* If base is array, first check whether we will be able to move the
1653 reference out of the loop (in order to take its address in strength
1654 reduction). In order for this to work we need both lower bound
1655 and step to be loop invariants. */
1656 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1657 {
1658 /* Moreover, for a range, the size needs to be invariant as well. */
1659 if (TREE_CODE (base) == ARRAY_RANGE_REF
1660 && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1661 return false;
1662
1663 step = array_ref_element_size (base);
1664 lbound = array_ref_low_bound (base);
1665
1666 if (!expr_invariant_in_loop_p (loop, step)
1667 || !expr_invariant_in_loop_p (loop, lbound))
1668 return false;
1669 }
1670
1671 if (TREE_CODE (*idx) != SSA_NAME)
1672 return true;
1673
1674 iv = get_iv (dta->ivopts_data, *idx);
1675 if (!iv)
1676 return false;
1677
1678 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1679 *&x[0], which is not folded and does not trigger the
1680 ARRAY_REF path below. */
1681 *idx = iv->base;
1682
1683 if (integer_zerop (iv->step))
1684 return true;
1685
1686 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1687 {
1688 step = array_ref_element_size (base);
1689
1690 /* We only handle addresses whose step is an integer constant. */
1691 if (TREE_CODE (step) != INTEGER_CST)
1692 return false;
1693 }
1694 else
1695 /* The step for pointer arithmetics already is 1 byte. */
1696 step = size_one_node;
1697
1698 iv_base = iv->base;
1699 iv_step = iv->step;
1700 if (iv->no_overflow && nowrap_type_p (TREE_TYPE (iv_step)))
1701 use_overflow_semantics = true;
1702
1703 if (!convert_affine_scev (dta->ivopts_data->current_loop,
1704 sizetype, &iv_base, &iv_step, dta->stmt,
1705 use_overflow_semantics))
1706 {
1707 /* The index might wrap. */
1708 return false;
1709 }
1710
1711 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1712 dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1713
1714 return true;
1715 }
1716
1717 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1718 object is passed to it in DATA. */
1719
1720 static bool
1721 idx_record_use (tree base, tree *idx,
1722 void *vdata)
1723 {
1724 struct ivopts_data *data = (struct ivopts_data *) vdata;
1725 find_interesting_uses_op (data, *idx);
1726 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1727 {
1728 find_interesting_uses_op (data, array_ref_element_size (base));
1729 find_interesting_uses_op (data, array_ref_low_bound (base));
1730 }
1731 return true;
1732 }
1733
1734 /* If we can prove that TOP = cst * BOT for some constant cst,
1735 store cst to MUL and return true. Otherwise return false.
1736 The returned value is always sign-extended, regardless of the
1737 signedness of TOP and BOT. */
1738
1739 static bool
1740 constant_multiple_of (tree top, tree bot, widest_int *mul)
1741 {
1742 tree mby;
1743 enum tree_code code;
1744 unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1745 widest_int res, p0, p1;
1746
1747 STRIP_NOPS (top);
1748 STRIP_NOPS (bot);
1749
1750 if (operand_equal_p (top, bot, 0))
1751 {
1752 *mul = 1;
1753 return true;
1754 }
1755
1756 code = TREE_CODE (top);
1757 switch (code)
1758 {
1759 case MULT_EXPR:
1760 mby = TREE_OPERAND (top, 1);
1761 if (TREE_CODE (mby) != INTEGER_CST)
1762 return false;
1763
1764 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1765 return false;
1766
1767 *mul = wi::sext (res * wi::to_widest (mby), precision);
1768 return true;
1769
1770 case PLUS_EXPR:
1771 case MINUS_EXPR:
1772 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1773 || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1774 return false;
1775
1776 if (code == MINUS_EXPR)
1777 p1 = -p1;
1778 *mul = wi::sext (p0 + p1, precision);
1779 return true;
1780
1781 case INTEGER_CST:
1782 if (TREE_CODE (bot) != INTEGER_CST)
1783 return false;
1784
1785 p0 = widest_int::from (top, SIGNED);
1786 p1 = widest_int::from (bot, SIGNED);
1787 if (p1 == 0)
1788 return false;
1789 *mul = wi::sext (wi::divmod_trunc (p0, p1, SIGNED, &res), precision);
1790 return res == 0;
1791
1792 default:
1793 return false;
1794 }
1795 }
1796
1797 /* Return true if memory reference REF with step STEP may be unaligned. */
1798
1799 static bool
1800 may_be_unaligned_p (tree ref, tree step)
1801 {
1802 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1803 thus they are not misaligned. */
1804 if (TREE_CODE (ref) == TARGET_MEM_REF)
1805 return false;
1806
1807 unsigned int align = TYPE_ALIGN (TREE_TYPE (ref));
1808 if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))) > align)
1809 align = GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref)));
1810
1811 unsigned HOST_WIDE_INT bitpos;
1812 unsigned int ref_align;
1813 get_object_alignment_1 (ref, &ref_align, &bitpos);
1814 if (ref_align < align
1815 || (bitpos % align) != 0
1816 || (bitpos % BITS_PER_UNIT) != 0)
1817 return true;
1818
1819 unsigned int trailing_zeros = tree_ctz (step);
1820 if (trailing_zeros < HOST_BITS_PER_INT
1821 && (1U << trailing_zeros) * BITS_PER_UNIT < align)
1822 return true;
1823
1824 return false;
1825 }
1826
1827 /* Return true if EXPR may be non-addressable. */
1828
1829 bool
1830 may_be_nonaddressable_p (tree expr)
1831 {
1832 switch (TREE_CODE (expr))
1833 {
1834 case TARGET_MEM_REF:
1835 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1836 target, thus they are always addressable. */
1837 return false;
1838
1839 case COMPONENT_REF:
1840 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1841 || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1842
1843 case VIEW_CONVERT_EXPR:
1844 /* This kind of view-conversions may wrap non-addressable objects
1845 and make them look addressable. After some processing the
1846 non-addressability may be uncovered again, causing ADDR_EXPRs
1847 of inappropriate objects to be built. */
1848 if (is_gimple_reg (TREE_OPERAND (expr, 0))
1849 || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
1850 return true;
1851
1852 /* ... fall through ... */
1853
1854 case ARRAY_REF:
1855 case ARRAY_RANGE_REF:
1856 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1857
1858 CASE_CONVERT:
1859 return true;
1860
1861 default:
1862 break;
1863 }
1864
1865 return false;
1866 }
1867
1868 static tree
1869 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset);
1870
1871 /* Record a use of type USE_TYPE at *USE_P in STMT whose value is IV.
1872 If there is an existing use which has same stripped iv base and step,
1873 this function records this one as a sub use to that; otherwise records
1874 it as a normal one. */
1875
1876 static struct iv_use *
1877 record_group_use (struct ivopts_data *data, tree *use_p,
1878 struct iv *iv, gimple stmt, enum use_type use_type)
1879 {
1880 unsigned int i;
1881 struct iv_use *use;
1882 tree addr_base;
1883 unsigned HOST_WIDE_INT addr_offset;
1884
1885 /* Only support sub use for address type uses, that is, with base
1886 object. */
1887 if (!iv->base_object)
1888 return record_use (data, use_p, iv, stmt, use_type);
1889
1890 addr_base = strip_offset (iv->base, &addr_offset);
1891 for (i = 0; i < n_iv_uses (data); i++)
1892 {
1893 use = iv_use (data, i);
1894 if (use->type != USE_ADDRESS || !use->iv->base_object)
1895 continue;
1896
1897 /* Check if it has the same stripped base and step. */
1898 if (operand_equal_p (iv->base_object, use->iv->base_object, 0)
1899 && operand_equal_p (iv->step, use->iv->step, 0)
1900 && operand_equal_p (addr_base, use->addr_base, 0))
1901 break;
1902 }
1903
1904 if (i == n_iv_uses (data))
1905 return record_use (data, use_p, iv, stmt,
1906 use_type, addr_base, addr_offset);
1907 else
1908 return record_sub_use (data, use_p, iv, stmt,
1909 use_type, addr_base, addr_offset, i);
1910 }
1911
1912 /* Finds addresses in *OP_P inside STMT. */
1913
1914 static void
1915 find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p)
1916 {
1917 tree base = *op_p, step = size_zero_node;
1918 struct iv *civ;
1919 struct ifs_ivopts_data ifs_ivopts_data;
1920
1921 /* Do not play with volatile memory references. A bit too conservative,
1922 perhaps, but safe. */
1923 if (gimple_has_volatile_ops (stmt))
1924 goto fail;
1925
1926 /* Ignore bitfields for now. Not really something terribly complicated
1927 to handle. TODO. */
1928 if (TREE_CODE (base) == BIT_FIELD_REF)
1929 goto fail;
1930
1931 base = unshare_expr (base);
1932
1933 if (TREE_CODE (base) == TARGET_MEM_REF)
1934 {
1935 tree type = build_pointer_type (TREE_TYPE (base));
1936 tree astep;
1937
1938 if (TMR_BASE (base)
1939 && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1940 {
1941 civ = get_iv (data, TMR_BASE (base));
1942 if (!civ)
1943 goto fail;
1944
1945 TMR_BASE (base) = civ->base;
1946 step = civ->step;
1947 }
1948 if (TMR_INDEX2 (base)
1949 && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
1950 {
1951 civ = get_iv (data, TMR_INDEX2 (base));
1952 if (!civ)
1953 goto fail;
1954
1955 TMR_INDEX2 (base) = civ->base;
1956 step = civ->step;
1957 }
1958 if (TMR_INDEX (base)
1959 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1960 {
1961 civ = get_iv (data, TMR_INDEX (base));
1962 if (!civ)
1963 goto fail;
1964
1965 TMR_INDEX (base) = civ->base;
1966 astep = civ->step;
1967
1968 if (astep)
1969 {
1970 if (TMR_STEP (base))
1971 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1972
1973 step = fold_build2 (PLUS_EXPR, type, step, astep);
1974 }
1975 }
1976
1977 if (integer_zerop (step))
1978 goto fail;
1979 base = tree_mem_ref_addr (type, base);
1980 }
1981 else
1982 {
1983 ifs_ivopts_data.ivopts_data = data;
1984 ifs_ivopts_data.stmt = stmt;
1985 ifs_ivopts_data.step = size_zero_node;
1986 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1987 || integer_zerop (ifs_ivopts_data.step))
1988 goto fail;
1989 step = ifs_ivopts_data.step;
1990
1991 /* Check that the base expression is addressable. This needs
1992 to be done after substituting bases of IVs into it. */
1993 if (may_be_nonaddressable_p (base))
1994 goto fail;
1995
1996 /* Moreover, on strict alignment platforms, check that it is
1997 sufficiently aligned. */
1998 if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
1999 goto fail;
2000
2001 base = build_fold_addr_expr (base);
2002
2003 /* Substituting bases of IVs into the base expression might
2004 have caused folding opportunities. */
2005 if (TREE_CODE (base) == ADDR_EXPR)
2006 {
2007 tree *ref = &TREE_OPERAND (base, 0);
2008 while (handled_component_p (*ref))
2009 ref = &TREE_OPERAND (*ref, 0);
2010 if (TREE_CODE (*ref) == MEM_REF)
2011 {
2012 tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
2013 TREE_OPERAND (*ref, 0),
2014 TREE_OPERAND (*ref, 1));
2015 if (tem)
2016 *ref = tem;
2017 }
2018 }
2019 }
2020
2021 civ = alloc_iv (data, base, step);
2022 record_group_use (data, op_p, civ, stmt, USE_ADDRESS);
2023 return;
2024
2025 fail:
2026 for_each_index (op_p, idx_record_use, data);
2027 }
2028
2029 /* Finds and records invariants used in STMT. */
2030
2031 static void
2032 find_invariants_stmt (struct ivopts_data *data, gimple stmt)
2033 {
2034 ssa_op_iter iter;
2035 use_operand_p use_p;
2036 tree op;
2037
2038 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2039 {
2040 op = USE_FROM_PTR (use_p);
2041 record_invariant (data, op, false);
2042 }
2043 }
2044
2045 /* Finds interesting uses of induction variables in the statement STMT. */
2046
2047 static void
2048 find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt)
2049 {
2050 struct iv *iv;
2051 tree op, *lhs, *rhs;
2052 ssa_op_iter iter;
2053 use_operand_p use_p;
2054 enum tree_code code;
2055
2056 find_invariants_stmt (data, stmt);
2057
2058 if (gimple_code (stmt) == GIMPLE_COND)
2059 {
2060 find_interesting_uses_cond (data, stmt);
2061 return;
2062 }
2063
2064 if (is_gimple_assign (stmt))
2065 {
2066 lhs = gimple_assign_lhs_ptr (stmt);
2067 rhs = gimple_assign_rhs1_ptr (stmt);
2068
2069 if (TREE_CODE (*lhs) == SSA_NAME)
2070 {
2071 /* If the statement defines an induction variable, the uses are not
2072 interesting by themselves. */
2073
2074 iv = get_iv (data, *lhs);
2075
2076 if (iv && !integer_zerop (iv->step))
2077 return;
2078 }
2079
2080 code = gimple_assign_rhs_code (stmt);
2081 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
2082 && (REFERENCE_CLASS_P (*rhs)
2083 || is_gimple_val (*rhs)))
2084 {
2085 if (REFERENCE_CLASS_P (*rhs))
2086 find_interesting_uses_address (data, stmt, rhs);
2087 else
2088 find_interesting_uses_op (data, *rhs);
2089
2090 if (REFERENCE_CLASS_P (*lhs))
2091 find_interesting_uses_address (data, stmt, lhs);
2092 return;
2093 }
2094 else if (TREE_CODE_CLASS (code) == tcc_comparison)
2095 {
2096 find_interesting_uses_cond (data, stmt);
2097 return;
2098 }
2099
2100 /* TODO -- we should also handle address uses of type
2101
2102 memory = call (whatever);
2103
2104 and
2105
2106 call (memory). */
2107 }
2108
2109 if (gimple_code (stmt) == GIMPLE_PHI
2110 && gimple_bb (stmt) == data->current_loop->header)
2111 {
2112 iv = get_iv (data, PHI_RESULT (stmt));
2113
2114 if (iv && !integer_zerop (iv->step))
2115 return;
2116 }
2117
2118 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
2119 {
2120 op = USE_FROM_PTR (use_p);
2121
2122 if (TREE_CODE (op) != SSA_NAME)
2123 continue;
2124
2125 iv = get_iv (data, op);
2126 if (!iv)
2127 continue;
2128
2129 find_interesting_uses_op (data, op);
2130 }
2131 }
2132
2133 /* Finds interesting uses of induction variables outside of loops
2134 on loop exit edge EXIT. */
2135
2136 static void
2137 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
2138 {
2139 gphi *phi;
2140 gphi_iterator psi;
2141 tree def;
2142
2143 for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
2144 {
2145 phi = psi.phi ();
2146 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2147 if (!virtual_operand_p (def))
2148 find_interesting_uses_op (data, def);
2149 }
2150 }
2151
2152 /* Finds uses of the induction variables that are interesting. */
2153
2154 static void
2155 find_interesting_uses (struct ivopts_data *data)
2156 {
2157 basic_block bb;
2158 gimple_stmt_iterator bsi;
2159 basic_block *body = get_loop_body (data->current_loop);
2160 unsigned i;
2161 struct version_info *info;
2162 edge e;
2163
2164 if (dump_file && (dump_flags & TDF_DETAILS))
2165 fprintf (dump_file, "Uses:\n\n");
2166
2167 for (i = 0; i < data->current_loop->num_nodes; i++)
2168 {
2169 edge_iterator ei;
2170 bb = body[i];
2171
2172 FOR_EACH_EDGE (e, ei, bb->succs)
2173 if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2174 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2175 find_interesting_uses_outside (data, e);
2176
2177 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2178 find_interesting_uses_stmt (data, gsi_stmt (bsi));
2179 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2180 if (!is_gimple_debug (gsi_stmt (bsi)))
2181 find_interesting_uses_stmt (data, gsi_stmt (bsi));
2182 }
2183
2184 if (dump_file && (dump_flags & TDF_DETAILS))
2185 {
2186 bitmap_iterator bi;
2187
2188 fprintf (dump_file, "\n");
2189
2190 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2191 {
2192 info = ver_info (data, i);
2193 if (info->inv_id)
2194 {
2195 fprintf (dump_file, " ");
2196 print_generic_expr (dump_file, info->name, TDF_SLIM);
2197 fprintf (dump_file, " is invariant (%d)%s\n",
2198 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
2199 }
2200 }
2201
2202 fprintf (dump_file, "\n");
2203 }
2204
2205 free (body);
2206 }
2207
2208 /* Compute maximum offset of [base + offset] addressing mode
2209 for memory reference represented by USE. */
2210
2211 static HOST_WIDE_INT
2212 compute_max_addr_offset (struct iv_use *use)
2213 {
2214 int width;
2215 rtx reg, addr;
2216 HOST_WIDE_INT i, off;
2217 unsigned list_index, num;
2218 addr_space_t as;
2219 machine_mode mem_mode, addr_mode;
2220 static vec<HOST_WIDE_INT> max_offset_list;
2221
2222 as = TYPE_ADDR_SPACE (TREE_TYPE (use->iv->base));
2223 mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2224
2225 num = max_offset_list.length ();
2226 list_index = (unsigned) as * MAX_MACHINE_MODE + (unsigned) mem_mode;
2227 if (list_index >= num)
2228 {
2229 max_offset_list.safe_grow (list_index + MAX_MACHINE_MODE);
2230 for (; num < max_offset_list.length (); num++)
2231 max_offset_list[num] = -1;
2232 }
2233
2234 off = max_offset_list[list_index];
2235 if (off != -1)
2236 return off;
2237
2238 addr_mode = targetm.addr_space.address_mode (as);
2239 reg = gen_raw_REG (addr_mode, LAST_VIRTUAL_REGISTER + 1);
2240 addr = gen_rtx_fmt_ee (PLUS, addr_mode, reg, NULL_RTX);
2241
2242 width = GET_MODE_BITSIZE (addr_mode) - 1;
2243 if (width > (HOST_BITS_PER_WIDE_INT - 1))
2244 width = HOST_BITS_PER_WIDE_INT - 1;
2245
2246 for (i = width; i > 0; i--)
2247 {
2248 off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
2249 XEXP (addr, 1) = gen_int_mode (off, addr_mode);
2250 if (memory_address_addr_space_p (mem_mode, addr, as))
2251 break;
2252
2253 /* For some strict-alignment targets, the offset must be naturally
2254 aligned. Try an aligned offset if mem_mode is not QImode. */
2255 off = ((unsigned HOST_WIDE_INT) 1 << i);
2256 if (off > GET_MODE_SIZE (mem_mode) && mem_mode != QImode)
2257 {
2258 off -= GET_MODE_SIZE (mem_mode);
2259 XEXP (addr, 1) = gen_int_mode (off, addr_mode);
2260 if (memory_address_addr_space_p (mem_mode, addr, as))
2261 break;
2262 }
2263 }
2264 if (i == 0)
2265 off = 0;
2266
2267 max_offset_list[list_index] = off;
2268 return off;
2269 }
2270
2271 /* Check if all small groups should be split. Return true if and
2272 only if:
2273
2274 1) At least one groups contain two uses with different offsets.
2275 2) No group contains more than two uses with different offsets.
2276
2277 Return false otherwise. We want to split such groups because:
2278
2279 1) Small groups don't have much benefit and may interfer with
2280 general candidate selection.
2281 2) Size for problem with only small groups is usually small and
2282 general algorithm can handle it well.
2283
2284 TODO -- Above claim may not hold when auto increment is supported. */
2285
2286 static bool
2287 split_all_small_groups (struct ivopts_data *data)
2288 {
2289 bool split_p = false;
2290 unsigned int i, n, distinct;
2291 struct iv_use *pre, *use;
2292
2293 n = n_iv_uses (data);
2294 for (i = 0; i < n; i++)
2295 {
2296 use = iv_use (data, i);
2297 if (!use->next)
2298 continue;
2299
2300 distinct = 1;
2301 gcc_assert (use->type == USE_ADDRESS);
2302 for (pre = use, use = use->next; use; pre = use, use = use->next)
2303 {
2304 if (pre->addr_offset != use->addr_offset)
2305 distinct++;
2306
2307 if (distinct > 2)
2308 return false;
2309 }
2310 if (distinct == 2)
2311 split_p = true;
2312 }
2313
2314 return split_p;
2315 }
2316
2317 /* For each group of address type uses, this function further groups
2318 these uses according to the maximum offset supported by target's
2319 [base + offset] addressing mode. */
2320
2321 static void
2322 group_address_uses (struct ivopts_data *data)
2323 {
2324 HOST_WIDE_INT max_offset = -1;
2325 unsigned int i, n, sub_id;
2326 struct iv_use *pre, *use;
2327 unsigned HOST_WIDE_INT addr_offset_first;
2328
2329 /* Reset max offset to split all small groups. */
2330 if (split_all_small_groups (data))
2331 max_offset = 0;
2332
2333 n = n_iv_uses (data);
2334 for (i = 0; i < n; i++)
2335 {
2336 use = iv_use (data, i);
2337 if (!use->next)
2338 continue;
2339
2340 gcc_assert (use->type == USE_ADDRESS);
2341 if (max_offset != 0)
2342 max_offset = compute_max_addr_offset (use);
2343
2344 while (use)
2345 {
2346 sub_id = 0;
2347 addr_offset_first = use->addr_offset;
2348 /* Only uses with offset that can fit in offset part against
2349 the first use can be grouped together. */
2350 for (pre = use, use = use->next;
2351 use && (use->addr_offset - addr_offset_first
2352 <= (unsigned HOST_WIDE_INT) max_offset);
2353 pre = use, use = use->next)
2354 {
2355 use->id = pre->id;
2356 use->sub_id = ++sub_id;
2357 }
2358
2359 /* Break the list and create new group. */
2360 if (use)
2361 {
2362 pre->next = NULL;
2363 use->id = n_iv_uses (data);
2364 use->related_cands = BITMAP_ALLOC (NULL);
2365 data->iv_uses.safe_push (use);
2366 }
2367 }
2368 }
2369
2370 if (dump_file && (dump_flags & TDF_DETAILS))
2371 dump_uses (dump_file, data);
2372 }
2373
2374 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
2375 is true, assume we are inside an address. If TOP_COMPREF is true, assume
2376 we are at the top-level of the processed address. */
2377
2378 static tree
2379 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2380 HOST_WIDE_INT *offset)
2381 {
2382 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2383 enum tree_code code;
2384 tree type, orig_type = TREE_TYPE (expr);
2385 HOST_WIDE_INT off0, off1, st;
2386 tree orig_expr = expr;
2387
2388 STRIP_NOPS (expr);
2389
2390 type = TREE_TYPE (expr);
2391 code = TREE_CODE (expr);
2392 *offset = 0;
2393
2394 switch (code)
2395 {
2396 case INTEGER_CST:
2397 if (!cst_and_fits_in_hwi (expr)
2398 || integer_zerop (expr))
2399 return orig_expr;
2400
2401 *offset = int_cst_value (expr);
2402 return build_int_cst (orig_type, 0);
2403
2404 case POINTER_PLUS_EXPR:
2405 case PLUS_EXPR:
2406 case MINUS_EXPR:
2407 op0 = TREE_OPERAND (expr, 0);
2408 op1 = TREE_OPERAND (expr, 1);
2409
2410 op0 = strip_offset_1 (op0, false, false, &off0);
2411 op1 = strip_offset_1 (op1, false, false, &off1);
2412
2413 *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2414 if (op0 == TREE_OPERAND (expr, 0)
2415 && op1 == TREE_OPERAND (expr, 1))
2416 return orig_expr;
2417
2418 if (integer_zerop (op1))
2419 expr = op0;
2420 else if (integer_zerop (op0))
2421 {
2422 if (code == MINUS_EXPR)
2423 expr = fold_build1 (NEGATE_EXPR, type, op1);
2424 else
2425 expr = op1;
2426 }
2427 else
2428 expr = fold_build2 (code, type, op0, op1);
2429
2430 return fold_convert (orig_type, expr);
2431
2432 case MULT_EXPR:
2433 op1 = TREE_OPERAND (expr, 1);
2434 if (!cst_and_fits_in_hwi (op1))
2435 return orig_expr;
2436
2437 op0 = TREE_OPERAND (expr, 0);
2438 op0 = strip_offset_1 (op0, false, false, &off0);
2439 if (op0 == TREE_OPERAND (expr, 0))
2440 return orig_expr;
2441
2442 *offset = off0 * int_cst_value (op1);
2443 if (integer_zerop (op0))
2444 expr = op0;
2445 else
2446 expr = fold_build2 (MULT_EXPR, type, op0, op1);
2447
2448 return fold_convert (orig_type, expr);
2449
2450 case ARRAY_REF:
2451 case ARRAY_RANGE_REF:
2452 if (!inside_addr)
2453 return orig_expr;
2454
2455 step = array_ref_element_size (expr);
2456 if (!cst_and_fits_in_hwi (step))
2457 break;
2458
2459 st = int_cst_value (step);
2460 op1 = TREE_OPERAND (expr, 1);
2461 op1 = strip_offset_1 (op1, false, false, &off1);
2462 *offset = off1 * st;
2463
2464 if (top_compref
2465 && integer_zerop (op1))
2466 {
2467 /* Strip the component reference completely. */
2468 op0 = TREE_OPERAND (expr, 0);
2469 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2470 *offset += off0;
2471 return op0;
2472 }
2473 break;
2474
2475 case COMPONENT_REF:
2476 {
2477 tree field;
2478
2479 if (!inside_addr)
2480 return orig_expr;
2481
2482 tmp = component_ref_field_offset (expr);
2483 field = TREE_OPERAND (expr, 1);
2484 if (top_compref
2485 && cst_and_fits_in_hwi (tmp)
2486 && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2487 {
2488 HOST_WIDE_INT boffset, abs_off;
2489
2490 /* Strip the component reference completely. */
2491 op0 = TREE_OPERAND (expr, 0);
2492 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2493 boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2494 abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2495 if (boffset < 0)
2496 abs_off = -abs_off;
2497
2498 *offset = off0 + int_cst_value (tmp) + abs_off;
2499 return op0;
2500 }
2501 }
2502 break;
2503
2504 case ADDR_EXPR:
2505 op0 = TREE_OPERAND (expr, 0);
2506 op0 = strip_offset_1 (op0, true, true, &off0);
2507 *offset += off0;
2508
2509 if (op0 == TREE_OPERAND (expr, 0))
2510 return orig_expr;
2511
2512 expr = build_fold_addr_expr (op0);
2513 return fold_convert (orig_type, expr);
2514
2515 case MEM_REF:
2516 /* ??? Offset operand? */
2517 inside_addr = false;
2518 break;
2519
2520 default:
2521 return orig_expr;
2522 }
2523
2524 /* Default handling of expressions for that we want to recurse into
2525 the first operand. */
2526 op0 = TREE_OPERAND (expr, 0);
2527 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2528 *offset += off0;
2529
2530 if (op0 == TREE_OPERAND (expr, 0)
2531 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2532 return orig_expr;
2533
2534 expr = copy_node (expr);
2535 TREE_OPERAND (expr, 0) = op0;
2536 if (op1)
2537 TREE_OPERAND (expr, 1) = op1;
2538
2539 /* Inside address, we might strip the top level component references,
2540 thus changing type of the expression. Handling of ADDR_EXPR
2541 will fix that. */
2542 expr = fold_convert (orig_type, expr);
2543
2544 return expr;
2545 }
2546
2547 /* Strips constant offsets from EXPR and stores them to OFFSET. */
2548
2549 static tree
2550 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2551 {
2552 HOST_WIDE_INT off;
2553 tree core = strip_offset_1 (expr, false, false, &off);
2554 *offset = off;
2555 return core;
2556 }
2557
2558 /* Returns variant of TYPE that can be used as base for different uses.
2559 We return unsigned type with the same precision, which avoids problems
2560 with overflows. */
2561
2562 static tree
2563 generic_type_for (tree type)
2564 {
2565 if (POINTER_TYPE_P (type))
2566 return unsigned_type_for (type);
2567
2568 if (TYPE_UNSIGNED (type))
2569 return type;
2570
2571 return unsigned_type_for (type);
2572 }
2573
2574 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2575 the bitmap to that we should store it. */
2576
2577 static struct ivopts_data *fd_ivopts_data;
2578 static tree
2579 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2580 {
2581 bitmap *depends_on = (bitmap *) data;
2582 struct version_info *info;
2583
2584 if (TREE_CODE (*expr_p) != SSA_NAME)
2585 return NULL_TREE;
2586 info = name_info (fd_ivopts_data, *expr_p);
2587
2588 if (!info->inv_id || info->has_nonlin_use)
2589 return NULL_TREE;
2590
2591 if (!*depends_on)
2592 *depends_on = BITMAP_ALLOC (NULL);
2593 bitmap_set_bit (*depends_on, info->inv_id);
2594
2595 return NULL_TREE;
2596 }
2597
2598 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2599 position to POS. If USE is not NULL, the candidate is set as related to
2600 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
2601 replacement of the final value of the iv by a direct computation. */
2602
2603 static struct iv_cand *
2604 add_candidate_1 (struct ivopts_data *data,
2605 tree base, tree step, bool important, enum iv_position pos,
2606 struct iv_use *use, gimple incremented_at)
2607 {
2608 unsigned i;
2609 struct iv_cand *cand = NULL;
2610 tree type, orig_type;
2611
2612 /* For non-original variables, make sure their values are computed in a type
2613 that does not invoke undefined behavior on overflows (since in general,
2614 we cannot prove that these induction variables are non-wrapping). */
2615 if (pos != IP_ORIGINAL)
2616 {
2617 orig_type = TREE_TYPE (base);
2618 type = generic_type_for (orig_type);
2619 if (type != orig_type)
2620 {
2621 base = fold_convert (type, base);
2622 step = fold_convert (type, step);
2623 }
2624 }
2625
2626 for (i = 0; i < n_iv_cands (data); i++)
2627 {
2628 cand = iv_cand (data, i);
2629
2630 if (cand->pos != pos)
2631 continue;
2632
2633 if (cand->incremented_at != incremented_at
2634 || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2635 && cand->ainc_use != use))
2636 continue;
2637
2638 if (!cand->iv)
2639 {
2640 if (!base && !step)
2641 break;
2642
2643 continue;
2644 }
2645
2646 if (!base && !step)
2647 continue;
2648
2649 if (operand_equal_p (base, cand->iv->base, 0)
2650 && operand_equal_p (step, cand->iv->step, 0)
2651 && (TYPE_PRECISION (TREE_TYPE (base))
2652 == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
2653 break;
2654 }
2655
2656 if (i == n_iv_cands (data))
2657 {
2658 cand = XCNEW (struct iv_cand);
2659 cand->id = i;
2660
2661 if (!base && !step)
2662 cand->iv = NULL;
2663 else
2664 cand->iv = alloc_iv (data, base, step);
2665
2666 cand->pos = pos;
2667 if (pos != IP_ORIGINAL && cand->iv)
2668 {
2669 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2670 cand->var_after = cand->var_before;
2671 }
2672 cand->important = important;
2673 cand->incremented_at = incremented_at;
2674 data->iv_candidates.safe_push (cand);
2675
2676 if (step
2677 && TREE_CODE (step) != INTEGER_CST)
2678 {
2679 fd_ivopts_data = data;
2680 walk_tree (&step, find_depends, &cand->depends_on, NULL);
2681 }
2682
2683 if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2684 cand->ainc_use = use;
2685 else
2686 cand->ainc_use = NULL;
2687
2688 if (dump_file && (dump_flags & TDF_DETAILS))
2689 dump_cand (dump_file, cand);
2690 }
2691
2692 if (important && !cand->important)
2693 {
2694 cand->important = true;
2695 if (dump_file && (dump_flags & TDF_DETAILS))
2696 fprintf (dump_file, "Candidate %d is important\n", cand->id);
2697 }
2698
2699 if (use)
2700 {
2701 bitmap_set_bit (use->related_cands, i);
2702 if (dump_file && (dump_flags & TDF_DETAILS))
2703 fprintf (dump_file, "Candidate %d is related to use %d\n",
2704 cand->id, use->id);
2705 }
2706
2707 return cand;
2708 }
2709
2710 /* Returns true if incrementing the induction variable at the end of the LOOP
2711 is allowed.
2712
2713 The purpose is to avoid splitting latch edge with a biv increment, thus
2714 creating a jump, possibly confusing other optimization passes and leaving
2715 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2716 is not available (so we do not have a better alternative), or if the latch
2717 edge is already nonempty. */
2718
2719 static bool
2720 allow_ip_end_pos_p (struct loop *loop)
2721 {
2722 if (!ip_normal_pos (loop))
2723 return true;
2724
2725 if (!empty_block_p (ip_end_pos (loop)))
2726 return true;
2727
2728 return false;
2729 }
2730
2731 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2732 Important field is set to IMPORTANT. */
2733
2734 static void
2735 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2736 bool important, struct iv_use *use)
2737 {
2738 basic_block use_bb = gimple_bb (use->stmt);
2739 machine_mode mem_mode;
2740 unsigned HOST_WIDE_INT cstepi;
2741
2742 /* If we insert the increment in any position other than the standard
2743 ones, we must ensure that it is incremented once per iteration.
2744 It must not be in an inner nested loop, or one side of an if
2745 statement. */
2746 if (use_bb->loop_father != data->current_loop
2747 || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2748 || stmt_could_throw_p (use->stmt)
2749 || !cst_and_fits_in_hwi (step))
2750 return;
2751
2752 cstepi = int_cst_value (step);
2753
2754 mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2755 if (((USE_LOAD_PRE_INCREMENT (mem_mode)
2756 || USE_STORE_PRE_INCREMENT (mem_mode))
2757 && GET_MODE_SIZE (mem_mode) == cstepi)
2758 || ((USE_LOAD_PRE_DECREMENT (mem_mode)
2759 || USE_STORE_PRE_DECREMENT (mem_mode))
2760 && GET_MODE_SIZE (mem_mode) == -cstepi))
2761 {
2762 enum tree_code code = MINUS_EXPR;
2763 tree new_base;
2764 tree new_step = step;
2765
2766 if (POINTER_TYPE_P (TREE_TYPE (base)))
2767 {
2768 new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2769 code = POINTER_PLUS_EXPR;
2770 }
2771 else
2772 new_step = fold_convert (TREE_TYPE (base), new_step);
2773 new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2774 add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2775 use->stmt);
2776 }
2777 if (((USE_LOAD_POST_INCREMENT (mem_mode)
2778 || USE_STORE_POST_INCREMENT (mem_mode))
2779 && GET_MODE_SIZE (mem_mode) == cstepi)
2780 || ((USE_LOAD_POST_DECREMENT (mem_mode)
2781 || USE_STORE_POST_DECREMENT (mem_mode))
2782 && GET_MODE_SIZE (mem_mode) == -cstepi))
2783 {
2784 add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
2785 use->stmt);
2786 }
2787 }
2788
2789 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2790 position to POS. If USE is not NULL, the candidate is set as related to
2791 it. The candidate computation is scheduled before exit condition and at
2792 the end of loop. */
2793
2794 static void
2795 add_candidate (struct ivopts_data *data,
2796 tree base, tree step, bool important, struct iv_use *use)
2797 {
2798 gcc_assert (use == NULL || use->sub_id == 0);
2799
2800 if (ip_normal_pos (data->current_loop))
2801 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL);
2802 if (ip_end_pos (data->current_loop)
2803 && allow_ip_end_pos_p (data->current_loop))
2804 add_candidate_1 (data, base, step, important, IP_END, use, NULL);
2805 }
2806
2807 /* Adds standard iv candidates. */
2808
2809 static void
2810 add_standard_iv_candidates (struct ivopts_data *data)
2811 {
2812 add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
2813
2814 /* The same for a double-integer type if it is still fast enough. */
2815 if (TYPE_PRECISION
2816 (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
2817 && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
2818 add_candidate (data, build_int_cst (long_integer_type_node, 0),
2819 build_int_cst (long_integer_type_node, 1), true, NULL);
2820
2821 /* The same for a double-integer type if it is still fast enough. */
2822 if (TYPE_PRECISION
2823 (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
2824 && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
2825 add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
2826 build_int_cst (long_long_integer_type_node, 1), true, NULL);
2827 }
2828
2829
2830 /* Adds candidates bases on the old induction variable IV. */
2831
2832 static void
2833 add_iv_candidate_for_biv (struct ivopts_data *data, struct iv *iv)
2834 {
2835 gimple phi;
2836 tree def;
2837 struct iv_cand *cand;
2838
2839 add_candidate (data, iv->base, iv->step, true, NULL);
2840
2841 /* The same, but with initial value zero. */
2842 if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
2843 add_candidate (data, size_int (0), iv->step, true, NULL);
2844 else
2845 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2846 iv->step, true, NULL);
2847
2848 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2849 if (gimple_code (phi) == GIMPLE_PHI)
2850 {
2851 /* Additionally record the possibility of leaving the original iv
2852 untouched. */
2853 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2854 /* Don't add candidate if it's from another PHI node because
2855 it's an affine iv appearing in the form of PEELED_CHREC. */
2856 phi = SSA_NAME_DEF_STMT (def);
2857 if (gimple_code (phi) != GIMPLE_PHI)
2858 {
2859 cand = add_candidate_1 (data,
2860 iv->base, iv->step, true, IP_ORIGINAL, NULL,
2861 SSA_NAME_DEF_STMT (def));
2862 cand->var_before = iv->ssa_name;
2863 cand->var_after = def;
2864 }
2865 else
2866 gcc_assert (gimple_bb (phi) == data->current_loop->header);
2867 }
2868 }
2869
2870 /* Adds candidates based on the old induction variables. */
2871
2872 static void
2873 add_iv_candidate_for_bivs (struct ivopts_data *data)
2874 {
2875 unsigned i;
2876 struct iv *iv;
2877 bitmap_iterator bi;
2878
2879 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2880 {
2881 iv = ver_info (data, i)->iv;
2882 if (iv && iv->biv_p && !integer_zerop (iv->step))
2883 add_iv_candidate_for_biv (data, iv);
2884 }
2885 }
2886
2887 /* Adds candidates based on the value of USE's iv. */
2888
2889 static void
2890 add_iv_candidate_for_use (struct ivopts_data *data, struct iv_use *use)
2891 {
2892 unsigned HOST_WIDE_INT offset;
2893 tree base;
2894 tree basetype;
2895 struct iv *iv = use->iv;
2896
2897 add_candidate (data, iv->base, iv->step, false, use);
2898
2899 /* The same, but with initial value zero. Make such variable important,
2900 since it is generic enough so that possibly many uses may be based
2901 on it. */
2902 basetype = TREE_TYPE (iv->base);
2903 if (POINTER_TYPE_P (basetype))
2904 basetype = sizetype;
2905 add_candidate (data, build_int_cst (basetype, 0), iv->step, true, use);
2906
2907 /* Third, try removing the constant offset. Make sure to even
2908 add a candidate for &a[0] vs. (T *)&a. */
2909 base = strip_offset (iv->base, &offset);
2910 if (offset || base != iv->base)
2911 add_candidate (data, base, iv->step, false, use);
2912
2913 /* At last, add auto-incremental candidates. Make such variables
2914 important since other iv uses with same base object may be based
2915 on it. */
2916 if (use != NULL && use->type == USE_ADDRESS)
2917 add_autoinc_candidates (data, iv->base, iv->step, true, use);
2918 }
2919
2920 /* Adds candidates based on the uses. */
2921
2922 static void
2923 add_iv_candidate_for_uses (struct ivopts_data *data)
2924 {
2925 unsigned i;
2926
2927 for (i = 0; i < n_iv_uses (data); i++)
2928 {
2929 struct iv_use *use = iv_use (data, i);
2930
2931 if (!use)
2932 continue;
2933
2934 switch (use->type)
2935 {
2936 case USE_NONLINEAR_EXPR:
2937 case USE_COMPARE:
2938 case USE_ADDRESS:
2939 /* Just add the ivs based on the value of the iv used here. */
2940 add_iv_candidate_for_use (data, use);
2941 break;
2942
2943 default:
2944 gcc_unreachable ();
2945 }
2946 }
2947 }
2948
2949 /* Record important candidates and add them to related_cands bitmaps
2950 if needed. */
2951
2952 static void
2953 record_important_candidates (struct ivopts_data *data)
2954 {
2955 unsigned i;
2956 struct iv_use *use;
2957
2958 for (i = 0; i < n_iv_cands (data); i++)
2959 {
2960 struct iv_cand *cand = iv_cand (data, i);
2961
2962 if (cand->important)
2963 bitmap_set_bit (data->important_candidates, i);
2964 }
2965
2966 data->consider_all_candidates = (n_iv_cands (data)
2967 <= CONSIDER_ALL_CANDIDATES_BOUND);
2968
2969 if (data->consider_all_candidates)
2970 {
2971 /* We will not need "related_cands" bitmaps in this case,
2972 so release them to decrease peak memory consumption. */
2973 for (i = 0; i < n_iv_uses (data); i++)
2974 {
2975 use = iv_use (data, i);
2976 BITMAP_FREE (use->related_cands);
2977 }
2978 }
2979 else
2980 {
2981 /* Add important candidates to the related_cands bitmaps. */
2982 for (i = 0; i < n_iv_uses (data); i++)
2983 bitmap_ior_into (iv_use (data, i)->related_cands,
2984 data->important_candidates);
2985 }
2986 }
2987
2988 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2989 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2990 we allocate a simple list to every use. */
2991
2992 static void
2993 alloc_use_cost_map (struct ivopts_data *data)
2994 {
2995 unsigned i, size, s;
2996
2997 for (i = 0; i < n_iv_uses (data); i++)
2998 {
2999 struct iv_use *use = iv_use (data, i);
3000
3001 if (data->consider_all_candidates)
3002 size = n_iv_cands (data);
3003 else
3004 {
3005 s = bitmap_count_bits (use->related_cands);
3006
3007 /* Round up to the power of two, so that moduling by it is fast. */
3008 size = s ? (1 << ceil_log2 (s)) : 1;
3009 }
3010
3011 use->n_map_members = size;
3012 use->cost_map = XCNEWVEC (struct cost_pair, size);
3013 }
3014 }
3015
3016 /* Returns description of computation cost of expression whose runtime
3017 cost is RUNTIME and complexity corresponds to COMPLEXITY. */
3018
3019 static comp_cost
3020 new_cost (unsigned runtime, unsigned complexity)
3021 {
3022 comp_cost cost;
3023
3024 cost.cost = runtime;
3025 cost.complexity = complexity;
3026
3027 return cost;
3028 }
3029
3030 /* Returns true if COST is infinite. */
3031
3032 static bool
3033 infinite_cost_p (comp_cost cost)
3034 {
3035 return cost.cost == INFTY;
3036 }
3037
3038 /* Adds costs COST1 and COST2. */
3039
3040 static comp_cost
3041 add_costs (comp_cost cost1, comp_cost cost2)
3042 {
3043 if (infinite_cost_p (cost1) || infinite_cost_p (cost2))
3044 return infinite_cost;
3045
3046 cost1.cost += cost2.cost;
3047 cost1.complexity += cost2.complexity;
3048
3049 return cost1;
3050 }
3051 /* Subtracts costs COST1 and COST2. */
3052
3053 static comp_cost
3054 sub_costs (comp_cost cost1, comp_cost cost2)
3055 {
3056 cost1.cost -= cost2.cost;
3057 cost1.complexity -= cost2.complexity;
3058
3059 return cost1;
3060 }
3061
3062 /* Returns a negative number if COST1 < COST2, a positive number if
3063 COST1 > COST2, and 0 if COST1 = COST2. */
3064
3065 static int
3066 compare_costs (comp_cost cost1, comp_cost cost2)
3067 {
3068 if (cost1.cost == cost2.cost)
3069 return cost1.complexity - cost2.complexity;
3070
3071 return cost1.cost - cost2.cost;
3072 }
3073
3074 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
3075 on invariants DEPENDS_ON and that the value used in expressing it
3076 is VALUE, and in case of iv elimination the comparison operator is COMP. */
3077
3078 static void
3079 set_use_iv_cost (struct ivopts_data *data,
3080 struct iv_use *use, struct iv_cand *cand,
3081 comp_cost cost, bitmap depends_on, tree value,
3082 enum tree_code comp, int inv_expr_id)
3083 {
3084 unsigned i, s;
3085
3086 if (infinite_cost_p (cost))
3087 {
3088 BITMAP_FREE (depends_on);
3089 return;
3090 }
3091
3092 if (data->consider_all_candidates)
3093 {
3094 use->cost_map[cand->id].cand = cand;
3095 use->cost_map[cand->id].cost = cost;
3096 use->cost_map[cand->id].depends_on = depends_on;
3097 use->cost_map[cand->id].value = value;
3098 use->cost_map[cand->id].comp = comp;
3099 use->cost_map[cand->id].inv_expr_id = inv_expr_id;
3100 return;
3101 }
3102
3103 /* n_map_members is a power of two, so this computes modulo. */
3104 s = cand->id & (use->n_map_members - 1);
3105 for (i = s; i < use->n_map_members; i++)
3106 if (!use->cost_map[i].cand)
3107 goto found;
3108 for (i = 0; i < s; i++)
3109 if (!use->cost_map[i].cand)
3110 goto found;
3111
3112 gcc_unreachable ();
3113
3114 found:
3115 use->cost_map[i].cand = cand;
3116 use->cost_map[i].cost = cost;
3117 use->cost_map[i].depends_on = depends_on;
3118 use->cost_map[i].value = value;
3119 use->cost_map[i].comp = comp;
3120 use->cost_map[i].inv_expr_id = inv_expr_id;
3121 }
3122
3123 /* Gets cost of (USE, CANDIDATE) pair. */
3124
3125 static struct cost_pair *
3126 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
3127 struct iv_cand *cand)
3128 {
3129 unsigned i, s;
3130 struct cost_pair *ret;
3131
3132 if (!cand)
3133 return NULL;
3134
3135 if (data->consider_all_candidates)
3136 {
3137 ret = use->cost_map + cand->id;
3138 if (!ret->cand)
3139 return NULL;
3140
3141 return ret;
3142 }
3143
3144 /* n_map_members is a power of two, so this computes modulo. */
3145 s = cand->id & (use->n_map_members - 1);
3146 for (i = s; i < use->n_map_members; i++)
3147 if (use->cost_map[i].cand == cand)
3148 return use->cost_map + i;
3149 else if (use->cost_map[i].cand == NULL)
3150 return NULL;
3151 for (i = 0; i < s; i++)
3152 if (use->cost_map[i].cand == cand)
3153 return use->cost_map + i;
3154 else if (use->cost_map[i].cand == NULL)
3155 return NULL;
3156
3157 return NULL;
3158 }
3159
3160 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
3161 static rtx
3162 produce_memory_decl_rtl (tree obj, int *regno)
3163 {
3164 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
3165 machine_mode address_mode = targetm.addr_space.address_mode (as);
3166 rtx x;
3167
3168 gcc_assert (obj);
3169 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
3170 {
3171 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
3172 x = gen_rtx_SYMBOL_REF (address_mode, name);
3173 SET_SYMBOL_REF_DECL (x, obj);
3174 x = gen_rtx_MEM (DECL_MODE (obj), x);
3175 set_mem_addr_space (x, as);
3176 targetm.encode_section_info (obj, x, true);
3177 }
3178 else
3179 {
3180 x = gen_raw_REG (address_mode, (*regno)++);
3181 x = gen_rtx_MEM (DECL_MODE (obj), x);
3182 set_mem_addr_space (x, as);
3183 }
3184
3185 return x;
3186 }
3187
3188 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
3189 walk_tree. DATA contains the actual fake register number. */
3190
3191 static tree
3192 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
3193 {
3194 tree obj = NULL_TREE;
3195 rtx x = NULL_RTX;
3196 int *regno = (int *) data;
3197
3198 switch (TREE_CODE (*expr_p))
3199 {
3200 case ADDR_EXPR:
3201 for (expr_p = &TREE_OPERAND (*expr_p, 0);
3202 handled_component_p (*expr_p);
3203 expr_p = &TREE_OPERAND (*expr_p, 0))
3204 continue;
3205 obj = *expr_p;
3206 if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
3207 x = produce_memory_decl_rtl (obj, regno);
3208 break;
3209
3210 case SSA_NAME:
3211 *ws = 0;
3212 obj = SSA_NAME_VAR (*expr_p);
3213 /* Defer handling of anonymous SSA_NAMEs to the expander. */
3214 if (!obj)
3215 return NULL_TREE;
3216 if (!DECL_RTL_SET_P (obj))
3217 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3218 break;
3219
3220 case VAR_DECL:
3221 case PARM_DECL:
3222 case RESULT_DECL:
3223 *ws = 0;
3224 obj = *expr_p;
3225
3226 if (DECL_RTL_SET_P (obj))
3227 break;
3228
3229 if (DECL_MODE (obj) == BLKmode)
3230 x = produce_memory_decl_rtl (obj, regno);
3231 else
3232 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
3233
3234 break;
3235
3236 default:
3237 break;
3238 }
3239
3240 if (x)
3241 {
3242 decl_rtl_to_reset.safe_push (obj);
3243 SET_DECL_RTL (obj, x);
3244 }
3245
3246 return NULL_TREE;
3247 }
3248
3249 /* Determines cost of the computation of EXPR. */
3250
3251 static unsigned
3252 computation_cost (tree expr, bool speed)
3253 {
3254 rtx_insn *seq;
3255 rtx rslt;
3256 tree type = TREE_TYPE (expr);
3257 unsigned cost;
3258 /* Avoid using hard regs in ways which may be unsupported. */
3259 int regno = LAST_VIRTUAL_REGISTER + 1;
3260 struct cgraph_node *node = cgraph_node::get (current_function_decl);
3261 enum node_frequency real_frequency = node->frequency;
3262
3263 node->frequency = NODE_FREQUENCY_NORMAL;
3264 crtl->maybe_hot_insn_p = speed;
3265 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
3266 start_sequence ();
3267 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
3268 seq = get_insns ();
3269 end_sequence ();
3270 default_rtl_profile ();
3271 node->frequency = real_frequency;
3272
3273 cost = seq_cost (seq, speed);
3274 if (MEM_P (rslt))
3275 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
3276 TYPE_ADDR_SPACE (type), speed);
3277 else if (!REG_P (rslt))
3278 cost += set_src_cost (rslt, TYPE_MODE (type), speed);
3279
3280 return cost;
3281 }
3282
3283 /* Returns variable containing the value of candidate CAND at statement AT. */
3284
3285 static tree
3286 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple stmt)
3287 {
3288 if (stmt_after_increment (loop, cand, stmt))
3289 return cand->var_after;
3290 else
3291 return cand->var_before;
3292 }
3293
3294 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
3295 same precision that is at least as wide as the precision of TYPE, stores
3296 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
3297 type of A and B. */
3298
3299 static tree
3300 determine_common_wider_type (tree *a, tree *b)
3301 {
3302 tree wider_type = NULL;
3303 tree suba, subb;
3304 tree atype = TREE_TYPE (*a);
3305
3306 if (CONVERT_EXPR_P (*a))
3307 {
3308 suba = TREE_OPERAND (*a, 0);
3309 wider_type = TREE_TYPE (suba);
3310 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
3311 return atype;
3312 }
3313 else
3314 return atype;
3315
3316 if (CONVERT_EXPR_P (*b))
3317 {
3318 subb = TREE_OPERAND (*b, 0);
3319 if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
3320 return atype;
3321 }
3322 else
3323 return atype;
3324
3325 *a = suba;
3326 *b = subb;
3327 return wider_type;
3328 }
3329
3330 /* Determines the expression by that USE is expressed from induction variable
3331 CAND at statement AT in LOOP. The expression is stored in a decomposed
3332 form into AFF. Returns false if USE cannot be expressed using CAND. */
3333
3334 static bool
3335 get_computation_aff (struct loop *loop,
3336 struct iv_use *use, struct iv_cand *cand, gimple at,
3337 struct aff_tree *aff)
3338 {
3339 tree ubase = use->iv->base;
3340 tree ustep = use->iv->step;
3341 tree cbase = cand->iv->base;
3342 tree cstep = cand->iv->step, cstep_common;
3343 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3344 tree common_type, var;
3345 tree uutype;
3346 aff_tree cbase_aff, var_aff;
3347 widest_int rat;
3348
3349 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3350 {
3351 /* We do not have a precision to express the values of use. */
3352 return false;
3353 }
3354
3355 var = var_at_stmt (loop, cand, at);
3356 uutype = unsigned_type_for (utype);
3357
3358 /* If the conversion is not noop, perform it. */
3359 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3360 {
3361 cstep = fold_convert (uutype, cstep);
3362 cbase = fold_convert (uutype, cbase);
3363 var = fold_convert (uutype, var);
3364 }
3365
3366 if (!constant_multiple_of (ustep, cstep, &rat))
3367 return false;
3368
3369 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3370 type, we achieve better folding by computing their difference in this
3371 wider type, and cast the result to UUTYPE. We do not need to worry about
3372 overflows, as all the arithmetics will in the end be performed in UUTYPE
3373 anyway. */
3374 common_type = determine_common_wider_type (&ubase, &cbase);
3375
3376 /* use = ubase - ratio * cbase + ratio * var. */
3377 tree_to_aff_combination (ubase, common_type, aff);
3378 tree_to_aff_combination (cbase, common_type, &cbase_aff);
3379 tree_to_aff_combination (var, uutype, &var_aff);
3380
3381 /* We need to shift the value if we are after the increment. */
3382 if (stmt_after_increment (loop, cand, at))
3383 {
3384 aff_tree cstep_aff;
3385
3386 if (common_type != uutype)
3387 cstep_common = fold_convert (common_type, cstep);
3388 else
3389 cstep_common = cstep;
3390
3391 tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
3392 aff_combination_add (&cbase_aff, &cstep_aff);
3393 }
3394
3395 aff_combination_scale (&cbase_aff, -rat);
3396 aff_combination_add (aff, &cbase_aff);
3397 if (common_type != uutype)
3398 aff_combination_convert (aff, uutype);
3399
3400 aff_combination_scale (&var_aff, rat);
3401 aff_combination_add (aff, &var_aff);
3402
3403 return true;
3404 }
3405
3406 /* Return the type of USE. */
3407
3408 static tree
3409 get_use_type (struct iv_use *use)
3410 {
3411 tree base_type = TREE_TYPE (use->iv->base);
3412 tree type;
3413
3414 if (use->type == USE_ADDRESS)
3415 {
3416 /* The base_type may be a void pointer. Create a pointer type based on
3417 the mem_ref instead. */
3418 type = build_pointer_type (TREE_TYPE (*use->op_p));
3419 gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
3420 == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
3421 }
3422 else
3423 type = base_type;
3424
3425 return type;
3426 }
3427
3428 /* Determines the expression by that USE is expressed from induction variable
3429 CAND at statement AT in LOOP. The computation is unshared. */
3430
3431 static tree
3432 get_computation_at (struct loop *loop,
3433 struct iv_use *use, struct iv_cand *cand, gimple at)
3434 {
3435 aff_tree aff;
3436 tree type = get_use_type (use);
3437
3438 if (!get_computation_aff (loop, use, cand, at, &aff))
3439 return NULL_TREE;
3440 unshare_aff_combination (&aff);
3441 return fold_convert (type, aff_combination_to_tree (&aff));
3442 }
3443
3444 /* Determines the expression by that USE is expressed from induction variable
3445 CAND in LOOP. The computation is unshared. */
3446
3447 static tree
3448 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3449 {
3450 return get_computation_at (loop, use, cand, use->stmt);
3451 }
3452
3453 /* Adjust the cost COST for being in loop setup rather than loop body.
3454 If we're optimizing for space, the loop setup overhead is constant;
3455 if we're optimizing for speed, amortize it over the per-iteration cost. */
3456 static unsigned
3457 adjust_setup_cost (struct ivopts_data *data, unsigned cost)
3458 {
3459 if (cost == INFTY)
3460 return cost;
3461 else if (optimize_loop_for_speed_p (data->current_loop))
3462 return cost / avg_loop_niter (data->current_loop);
3463 else
3464 return cost;
3465 }
3466
3467 /* Returns true if multiplying by RATIO is allowed in an address. Test the
3468 validity for a memory reference accessing memory of mode MODE in
3469 address space AS. */
3470
3471
3472 bool
3473 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, machine_mode mode,
3474 addr_space_t as)
3475 {
3476 #define MAX_RATIO 128
3477 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3478 static vec<sbitmap> valid_mult_list;
3479 sbitmap valid_mult;
3480
3481 if (data_index >= valid_mult_list.length ())
3482 valid_mult_list.safe_grow_cleared (data_index + 1);
3483
3484 valid_mult = valid_mult_list[data_index];
3485 if (!valid_mult)
3486 {
3487 machine_mode address_mode = targetm.addr_space.address_mode (as);
3488 rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3489 rtx reg2 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3490 rtx addr, scaled;
3491 HOST_WIDE_INT i;
3492
3493 valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3494 bitmap_clear (valid_mult);
3495 scaled = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3496 addr = gen_rtx_fmt_ee (PLUS, address_mode, scaled, reg2);
3497 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3498 {
3499 XEXP (scaled, 1) = gen_int_mode (i, address_mode);
3500 if (memory_address_addr_space_p (mode, addr, as)
3501 || memory_address_addr_space_p (mode, scaled, as))
3502 bitmap_set_bit (valid_mult, i + MAX_RATIO);
3503 }
3504
3505 if (dump_file && (dump_flags & TDF_DETAILS))
3506 {
3507 fprintf (dump_file, " allowed multipliers:");
3508 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3509 if (bitmap_bit_p (valid_mult, i + MAX_RATIO))
3510 fprintf (dump_file, " %d", (int) i);
3511 fprintf (dump_file, "\n");
3512 fprintf (dump_file, "\n");
3513 }
3514
3515 valid_mult_list[data_index] = valid_mult;
3516 }
3517
3518 if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3519 return false;
3520
3521 return bitmap_bit_p (valid_mult, ratio + MAX_RATIO);
3522 }
3523
3524 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3525 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3526 variable is omitted. Compute the cost for a memory reference that accesses
3527 a memory location of mode MEM_MODE in address space AS.
3528
3529 MAY_AUTOINC is set to true if the autoincrement (increasing index by
3530 size of MEM_MODE / RATIO) is available. To make this determination, we
3531 look at the size of the increment to be made, which is given in CSTEP.
3532 CSTEP may be zero if the step is unknown.
3533 STMT_AFTER_INC is true iff the statement we're looking at is after the
3534 increment of the original biv.
3535
3536 TODO -- there must be some better way. This all is quite crude. */
3537
3538 enum ainc_type
3539 {
3540 AINC_PRE_INC, /* Pre increment. */
3541 AINC_PRE_DEC, /* Pre decrement. */
3542 AINC_POST_INC, /* Post increment. */
3543 AINC_POST_DEC, /* Post decrement. */
3544 AINC_NONE /* Also the number of auto increment types. */
3545 };
3546
3547 typedef struct address_cost_data_s
3548 {
3549 HOST_WIDE_INT min_offset, max_offset;
3550 unsigned costs[2][2][2][2];
3551 unsigned ainc_costs[AINC_NONE];
3552 } *address_cost_data;
3553
3554
3555 static comp_cost
3556 get_address_cost (bool symbol_present, bool var_present,
3557 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3558 HOST_WIDE_INT cstep, machine_mode mem_mode,
3559 addr_space_t as, bool speed,
3560 bool stmt_after_inc, bool *may_autoinc)
3561 {
3562 machine_mode address_mode = targetm.addr_space.address_mode (as);
3563 static vec<address_cost_data> address_cost_data_list;
3564 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3565 address_cost_data data;
3566 static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3567 static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3568 unsigned cost, acost, complexity;
3569 enum ainc_type autoinc_type;
3570 bool offset_p, ratio_p, autoinc;
3571 HOST_WIDE_INT s_offset, autoinc_offset, msize;
3572 unsigned HOST_WIDE_INT mask;
3573 unsigned bits;
3574
3575 if (data_index >= address_cost_data_list.length ())
3576 address_cost_data_list.safe_grow_cleared (data_index + 1);
3577
3578 data = address_cost_data_list[data_index];
3579 if (!data)
3580 {
3581 HOST_WIDE_INT i;
3582 HOST_WIDE_INT rat, off = 0;
3583 int old_cse_not_expected, width;
3584 unsigned sym_p, var_p, off_p, rat_p, add_c;
3585 rtx_insn *seq;
3586 rtx addr, base;
3587 rtx reg0, reg1;
3588
3589 data = (address_cost_data) xcalloc (1, sizeof (*data));
3590
3591 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3592
3593 width = GET_MODE_BITSIZE (address_mode) - 1;
3594 if (width > (HOST_BITS_PER_WIDE_INT - 1))
3595 width = HOST_BITS_PER_WIDE_INT - 1;
3596 addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
3597
3598 for (i = width; i >= 0; i--)
3599 {
3600 off = -((unsigned HOST_WIDE_INT) 1 << i);
3601 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3602 if (memory_address_addr_space_p (mem_mode, addr, as))
3603 break;
3604 }
3605 data->min_offset = (i == -1? 0 : off);
3606
3607 for (i = width; i >= 0; i--)
3608 {
3609 off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
3610 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3611 if (memory_address_addr_space_p (mem_mode, addr, as))
3612 break;
3613 /* For some strict-alignment targets, the offset must be naturally
3614 aligned. Try an aligned offset if mem_mode is not QImode. */
3615 off = mem_mode != QImode
3616 ? ((unsigned HOST_WIDE_INT) 1 << i)
3617 - GET_MODE_SIZE (mem_mode)
3618 : 0;
3619 if (off > 0)
3620 {
3621 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3622 if (memory_address_addr_space_p (mem_mode, addr, as))
3623 break;
3624 }
3625 }
3626 if (i == -1)
3627 off = 0;
3628 data->max_offset = off;
3629
3630 if (dump_file && (dump_flags & TDF_DETAILS))
3631 {
3632 fprintf (dump_file, "get_address_cost:\n");
3633 fprintf (dump_file, " min offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3634 GET_MODE_NAME (mem_mode),
3635 data->min_offset);
3636 fprintf (dump_file, " max offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3637 GET_MODE_NAME (mem_mode),
3638 data->max_offset);
3639 }
3640
3641 rat = 1;
3642 for (i = 2; i <= MAX_RATIO; i++)
3643 if (multiplier_allowed_in_address_p (i, mem_mode, as))
3644 {
3645 rat = i;
3646 break;
3647 }
3648
3649 /* Compute the cost of various addressing modes. */
3650 acost = 0;
3651 reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3652 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3653
3654 if (USE_LOAD_PRE_DECREMENT (mem_mode)
3655 || USE_STORE_PRE_DECREMENT (mem_mode))
3656 {
3657 addr = gen_rtx_PRE_DEC (address_mode, reg0);
3658 has_predec[mem_mode]
3659 = memory_address_addr_space_p (mem_mode, addr, as);
3660
3661 if (has_predec[mem_mode])
3662 data->ainc_costs[AINC_PRE_DEC]
3663 = address_cost (addr, mem_mode, as, speed);
3664 }
3665 if (USE_LOAD_POST_DECREMENT (mem_mode)
3666 || USE_STORE_POST_DECREMENT (mem_mode))
3667 {
3668 addr = gen_rtx_POST_DEC (address_mode, reg0);
3669 has_postdec[mem_mode]
3670 = memory_address_addr_space_p (mem_mode, addr, as);
3671
3672 if (has_postdec[mem_mode])
3673 data->ainc_costs[AINC_POST_DEC]
3674 = address_cost (addr, mem_mode, as, speed);
3675 }
3676 if (USE_LOAD_PRE_INCREMENT (mem_mode)
3677 || USE_STORE_PRE_DECREMENT (mem_mode))
3678 {
3679 addr = gen_rtx_PRE_INC (address_mode, reg0);
3680 has_preinc[mem_mode]
3681 = memory_address_addr_space_p (mem_mode, addr, as);
3682
3683 if (has_preinc[mem_mode])
3684 data->ainc_costs[AINC_PRE_INC]
3685 = address_cost (addr, mem_mode, as, speed);
3686 }
3687 if (USE_LOAD_POST_INCREMENT (mem_mode)
3688 || USE_STORE_POST_INCREMENT (mem_mode))
3689 {
3690 addr = gen_rtx_POST_INC (address_mode, reg0);
3691 has_postinc[mem_mode]
3692 = memory_address_addr_space_p (mem_mode, addr, as);
3693
3694 if (has_postinc[mem_mode])
3695 data->ainc_costs[AINC_POST_INC]
3696 = address_cost (addr, mem_mode, as, speed);
3697 }
3698 for (i = 0; i < 16; i++)
3699 {
3700 sym_p = i & 1;
3701 var_p = (i >> 1) & 1;
3702 off_p = (i >> 2) & 1;
3703 rat_p = (i >> 3) & 1;
3704
3705 addr = reg0;
3706 if (rat_p)
3707 addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
3708 gen_int_mode (rat, address_mode));
3709
3710 if (var_p)
3711 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
3712
3713 if (sym_p)
3714 {
3715 base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
3716 /* ??? We can run into trouble with some backends by presenting
3717 it with symbols which haven't been properly passed through
3718 targetm.encode_section_info. By setting the local bit, we
3719 enhance the probability of things working. */
3720 SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3721
3722 if (off_p)
3723 base = gen_rtx_fmt_e (CONST, address_mode,
3724 gen_rtx_fmt_ee
3725 (PLUS, address_mode, base,
3726 gen_int_mode (off, address_mode)));
3727 }
3728 else if (off_p)
3729 base = gen_int_mode (off, address_mode);
3730 else
3731 base = NULL_RTX;
3732
3733 if (base)
3734 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
3735
3736 start_sequence ();
3737 /* To avoid splitting addressing modes, pretend that no cse will
3738 follow. */
3739 old_cse_not_expected = cse_not_expected;
3740 cse_not_expected = true;
3741 addr = memory_address_addr_space (mem_mode, addr, as);
3742 cse_not_expected = old_cse_not_expected;
3743 seq = get_insns ();
3744 end_sequence ();
3745
3746 acost = seq_cost (seq, speed);
3747 acost += address_cost (addr, mem_mode, as, speed);
3748
3749 if (!acost)
3750 acost = 1;
3751 data->costs[sym_p][var_p][off_p][rat_p] = acost;
3752 }
3753
3754 /* On some targets, it is quite expensive to load symbol to a register,
3755 which makes addresses that contain symbols look much more expensive.
3756 However, the symbol will have to be loaded in any case before the
3757 loop (and quite likely we have it in register already), so it does not
3758 make much sense to penalize them too heavily. So make some final
3759 tweaks for the SYMBOL_PRESENT modes:
3760
3761 If VAR_PRESENT is false, and the mode obtained by changing symbol to
3762 var is cheaper, use this mode with small penalty.
3763 If VAR_PRESENT is true, try whether the mode with
3764 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3765 if this is the case, use it. */
3766 add_c = add_cost (speed, address_mode);
3767 for (i = 0; i < 8; i++)
3768 {
3769 var_p = i & 1;
3770 off_p = (i >> 1) & 1;
3771 rat_p = (i >> 2) & 1;
3772
3773 acost = data->costs[0][1][off_p][rat_p] + 1;
3774 if (var_p)
3775 acost += add_c;
3776
3777 if (acost < data->costs[1][var_p][off_p][rat_p])
3778 data->costs[1][var_p][off_p][rat_p] = acost;
3779 }
3780
3781 if (dump_file && (dump_flags & TDF_DETAILS))
3782 {
3783 fprintf (dump_file, "Address costs:\n");
3784
3785 for (i = 0; i < 16; i++)
3786 {
3787 sym_p = i & 1;
3788 var_p = (i >> 1) & 1;
3789 off_p = (i >> 2) & 1;
3790 rat_p = (i >> 3) & 1;
3791
3792 fprintf (dump_file, " ");
3793 if (sym_p)
3794 fprintf (dump_file, "sym + ");
3795 if (var_p)
3796 fprintf (dump_file, "var + ");
3797 if (off_p)
3798 fprintf (dump_file, "cst + ");
3799 if (rat_p)
3800 fprintf (dump_file, "rat * ");
3801
3802 acost = data->costs[sym_p][var_p][off_p][rat_p];
3803 fprintf (dump_file, "index costs %d\n", acost);
3804 }
3805 if (has_predec[mem_mode] || has_postdec[mem_mode]
3806 || has_preinc[mem_mode] || has_postinc[mem_mode])
3807 fprintf (dump_file, " May include autoinc/dec\n");
3808 fprintf (dump_file, "\n");
3809 }
3810
3811 address_cost_data_list[data_index] = data;
3812 }
3813
3814 bits = GET_MODE_BITSIZE (address_mode);
3815 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3816 offset &= mask;
3817 if ((offset >> (bits - 1) & 1))
3818 offset |= ~mask;
3819 s_offset = offset;
3820
3821 autoinc = false;
3822 autoinc_type = AINC_NONE;
3823 msize = GET_MODE_SIZE (mem_mode);
3824 autoinc_offset = offset;
3825 if (stmt_after_inc)
3826 autoinc_offset += ratio * cstep;
3827 if (symbol_present || var_present || ratio != 1)
3828 autoinc = false;
3829 else
3830 {
3831 if (has_postinc[mem_mode] && autoinc_offset == 0
3832 && msize == cstep)
3833 autoinc_type = AINC_POST_INC;
3834 else if (has_postdec[mem_mode] && autoinc_offset == 0
3835 && msize == -cstep)
3836 autoinc_type = AINC_POST_DEC;
3837 else if (has_preinc[mem_mode] && autoinc_offset == msize
3838 && msize == cstep)
3839 autoinc_type = AINC_PRE_INC;
3840 else if (has_predec[mem_mode] && autoinc_offset == -msize
3841 && msize == -cstep)
3842 autoinc_type = AINC_PRE_DEC;
3843
3844 if (autoinc_type != AINC_NONE)
3845 autoinc = true;
3846 }
3847
3848 cost = 0;
3849 offset_p = (s_offset != 0
3850 && data->min_offset <= s_offset
3851 && s_offset <= data->max_offset);
3852 ratio_p = (ratio != 1
3853 && multiplier_allowed_in_address_p (ratio, mem_mode, as));
3854
3855 if (ratio != 1 && !ratio_p)
3856 cost += mult_by_coeff_cost (ratio, address_mode, speed);
3857
3858 if (s_offset && !offset_p && !symbol_present)
3859 cost += add_cost (speed, address_mode);
3860
3861 if (may_autoinc)
3862 *may_autoinc = autoinc;
3863 if (autoinc)
3864 acost = data->ainc_costs[autoinc_type];
3865 else
3866 acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
3867 complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3868 return new_cost (cost + acost, complexity);
3869 }
3870
3871 /* Calculate the SPEED or size cost of shiftadd EXPR in MODE. MULT is the
3872 the EXPR operand holding the shift. COST0 and COST1 are the costs for
3873 calculating the operands of EXPR. Returns true if successful, and returns
3874 the cost in COST. */
3875
3876 static bool
3877 get_shiftadd_cost (tree expr, machine_mode mode, comp_cost cost0,
3878 comp_cost cost1, tree mult, bool speed, comp_cost *cost)
3879 {
3880 comp_cost res;
3881 tree op1 = TREE_OPERAND (expr, 1);
3882 tree cst = TREE_OPERAND (mult, 1);
3883 tree multop = TREE_OPERAND (mult, 0);
3884 int m = exact_log2 (int_cst_value (cst));
3885 int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
3886 int as_cost, sa_cost;
3887 bool mult_in_op1;
3888
3889 if (!(m >= 0 && m < maxm))
3890 return false;
3891
3892 STRIP_NOPS (op1);
3893 mult_in_op1 = operand_equal_p (op1, mult, 0);
3894
3895 as_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
3896
3897 /* If the target has a cheap shift-and-add or shift-and-sub instruction,
3898 use that in preference to a shift insn followed by an add insn. */
3899 sa_cost = (TREE_CODE (expr) != MINUS_EXPR
3900 ? shiftadd_cost (speed, mode, m)
3901 : (mult_in_op1
3902 ? shiftsub1_cost (speed, mode, m)
3903 : shiftsub0_cost (speed, mode, m)));
3904
3905 res = new_cost (MIN (as_cost, sa_cost), 0);
3906 res = add_costs (res, mult_in_op1 ? cost0 : cost1);
3907
3908 STRIP_NOPS (multop);
3909 if (!is_gimple_val (multop))
3910 res = add_costs (res, force_expr_to_var_cost (multop, speed));
3911
3912 *cost = res;
3913 return true;
3914 }
3915
3916 /* Estimates cost of forcing expression EXPR into a variable. */
3917
3918 static comp_cost
3919 force_expr_to_var_cost (tree expr, bool speed)
3920 {
3921 static bool costs_initialized = false;
3922 static unsigned integer_cost [2];
3923 static unsigned symbol_cost [2];
3924 static unsigned address_cost [2];
3925 tree op0, op1;
3926 comp_cost cost0, cost1, cost;
3927 machine_mode mode;
3928
3929 if (!costs_initialized)
3930 {
3931 tree type = build_pointer_type (integer_type_node);
3932 tree var, addr;
3933 rtx x;
3934 int i;
3935
3936 var = create_tmp_var_raw (integer_type_node, "test_var");
3937 TREE_STATIC (var) = 1;
3938 x = produce_memory_decl_rtl (var, NULL);
3939 SET_DECL_RTL (var, x);
3940
3941 addr = build1 (ADDR_EXPR, type, var);
3942
3943
3944 for (i = 0; i < 2; i++)
3945 {
3946 integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
3947 2000), i);
3948
3949 symbol_cost[i] = computation_cost (addr, i) + 1;
3950
3951 address_cost[i]
3952 = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
3953 if (dump_file && (dump_flags & TDF_DETAILS))
3954 {
3955 fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
3956 fprintf (dump_file, " integer %d\n", (int) integer_cost[i]);
3957 fprintf (dump_file, " symbol %d\n", (int) symbol_cost[i]);
3958 fprintf (dump_file, " address %d\n", (int) address_cost[i]);
3959 fprintf (dump_file, " other %d\n", (int) target_spill_cost[i]);
3960 fprintf (dump_file, "\n");
3961 }
3962 }
3963
3964 costs_initialized = true;
3965 }
3966
3967 STRIP_NOPS (expr);
3968
3969 if (SSA_VAR_P (expr))
3970 return no_cost;
3971
3972 if (is_gimple_min_invariant (expr))
3973 {
3974 if (TREE_CODE (expr) == INTEGER_CST)
3975 return new_cost (integer_cost [speed], 0);
3976
3977 if (TREE_CODE (expr) == ADDR_EXPR)
3978 {
3979 tree obj = TREE_OPERAND (expr, 0);
3980
3981 if (TREE_CODE (obj) == VAR_DECL
3982 || TREE_CODE (obj) == PARM_DECL
3983 || TREE_CODE (obj) == RESULT_DECL)
3984 return new_cost (symbol_cost [speed], 0);
3985 }
3986
3987 return new_cost (address_cost [speed], 0);
3988 }
3989
3990 switch (TREE_CODE (expr))
3991 {
3992 case POINTER_PLUS_EXPR:
3993 case PLUS_EXPR:
3994 case MINUS_EXPR:
3995 case MULT_EXPR:
3996 op0 = TREE_OPERAND (expr, 0);
3997 op1 = TREE_OPERAND (expr, 1);
3998 STRIP_NOPS (op0);
3999 STRIP_NOPS (op1);
4000 break;
4001
4002 CASE_CONVERT:
4003 case NEGATE_EXPR:
4004 op0 = TREE_OPERAND (expr, 0);
4005 STRIP_NOPS (op0);
4006 op1 = NULL_TREE;
4007 break;
4008
4009 default:
4010 /* Just an arbitrary value, FIXME. */
4011 return new_cost (target_spill_cost[speed], 0);
4012 }
4013
4014 if (op0 == NULL_TREE
4015 || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
4016 cost0 = no_cost;
4017 else
4018 cost0 = force_expr_to_var_cost (op0, speed);
4019
4020 if (op1 == NULL_TREE
4021 || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
4022 cost1 = no_cost;
4023 else
4024 cost1 = force_expr_to_var_cost (op1, speed);
4025
4026 mode = TYPE_MODE (TREE_TYPE (expr));
4027 switch (TREE_CODE (expr))
4028 {
4029 case POINTER_PLUS_EXPR:
4030 case PLUS_EXPR:
4031 case MINUS_EXPR:
4032 case NEGATE_EXPR:
4033 cost = new_cost (add_cost (speed, mode), 0);
4034 if (TREE_CODE (expr) != NEGATE_EXPR)
4035 {
4036 tree mult = NULL_TREE;
4037 comp_cost sa_cost;
4038 if (TREE_CODE (op1) == MULT_EXPR)
4039 mult = op1;
4040 else if (TREE_CODE (op0) == MULT_EXPR)
4041 mult = op0;
4042
4043 if (mult != NULL_TREE
4044 && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
4045 && get_shiftadd_cost (expr, mode, cost0, cost1, mult,
4046 speed, &sa_cost))
4047 return sa_cost;
4048 }
4049 break;
4050
4051 CASE_CONVERT:
4052 {
4053 tree inner_mode, outer_mode;
4054 outer_mode = TREE_TYPE (expr);
4055 inner_mode = TREE_TYPE (op0);
4056 cost = new_cost (convert_cost (TYPE_MODE (outer_mode),
4057 TYPE_MODE (inner_mode), speed), 0);
4058 }
4059 break;
4060
4061 case MULT_EXPR:
4062 if (cst_and_fits_in_hwi (op0))
4063 cost = new_cost (mult_by_coeff_cost (int_cst_value (op0),
4064 mode, speed), 0);
4065 else if (cst_and_fits_in_hwi (op1))
4066 cost = new_cost (mult_by_coeff_cost (int_cst_value (op1),
4067 mode, speed), 0);
4068 else
4069 return new_cost (target_spill_cost [speed], 0);
4070 break;
4071
4072 default:
4073 gcc_unreachable ();
4074 }
4075
4076 cost = add_costs (cost, cost0);
4077 cost = add_costs (cost, cost1);
4078
4079 /* Bound the cost by target_spill_cost. The parts of complicated
4080 computations often are either loop invariant or at least can
4081 be shared between several iv uses, so letting this grow without
4082 limits would not give reasonable results. */
4083 if (cost.cost > (int) target_spill_cost [speed])
4084 cost.cost = target_spill_cost [speed];
4085
4086 return cost;
4087 }
4088
4089 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
4090 invariants the computation depends on. */
4091
4092 static comp_cost
4093 force_var_cost (struct ivopts_data *data,
4094 tree expr, bitmap *depends_on)
4095 {
4096 if (depends_on)
4097 {
4098 fd_ivopts_data = data;
4099 walk_tree (&expr, find_depends, depends_on, NULL);
4100 }
4101
4102 return force_expr_to_var_cost (expr, data->speed);
4103 }
4104
4105 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
4106 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
4107 to false if the corresponding part is missing. DEPENDS_ON is a set of the
4108 invariants the computation depends on. */
4109
4110 static comp_cost
4111 split_address_cost (struct ivopts_data *data,
4112 tree addr, bool *symbol_present, bool *var_present,
4113 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4114 {
4115 tree core;
4116 HOST_WIDE_INT bitsize;
4117 HOST_WIDE_INT bitpos;
4118 tree toffset;
4119 machine_mode mode;
4120 int unsignedp, volatilep;
4121
4122 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
4123 &unsignedp, &volatilep, false);
4124
4125 if (toffset != 0
4126 || bitpos % BITS_PER_UNIT != 0
4127 || TREE_CODE (core) != VAR_DECL)
4128 {
4129 *symbol_present = false;
4130 *var_present = true;
4131 fd_ivopts_data = data;
4132 walk_tree (&addr, find_depends, depends_on, NULL);
4133 return new_cost (target_spill_cost[data->speed], 0);
4134 }
4135
4136 *offset += bitpos / BITS_PER_UNIT;
4137 if (TREE_STATIC (core)
4138 || DECL_EXTERNAL (core))
4139 {
4140 *symbol_present = true;
4141 *var_present = false;
4142 return no_cost;
4143 }
4144
4145 *symbol_present = false;
4146 *var_present = true;
4147 return no_cost;
4148 }
4149
4150 /* Estimates cost of expressing difference of addresses E1 - E2 as
4151 var + symbol + offset. The value of offset is added to OFFSET,
4152 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
4153 part is missing. DEPENDS_ON is a set of the invariants the computation
4154 depends on. */
4155
4156 static comp_cost
4157 ptr_difference_cost (struct ivopts_data *data,
4158 tree e1, tree e2, bool *symbol_present, bool *var_present,
4159 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4160 {
4161 HOST_WIDE_INT diff = 0;
4162 aff_tree aff_e1, aff_e2;
4163 tree type;
4164
4165 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
4166
4167 if (ptr_difference_const (e1, e2, &diff))
4168 {
4169 *offset += diff;
4170 *symbol_present = false;
4171 *var_present = false;
4172 return no_cost;
4173 }
4174
4175 if (integer_zerop (e2))
4176 return split_address_cost (data, TREE_OPERAND (e1, 0),
4177 symbol_present, var_present, offset, depends_on);
4178
4179 *symbol_present = false;
4180 *var_present = true;
4181
4182 type = signed_type_for (TREE_TYPE (e1));
4183 tree_to_aff_combination (e1, type, &aff_e1);
4184 tree_to_aff_combination (e2, type, &aff_e2);
4185 aff_combination_scale (&aff_e2, -1);
4186 aff_combination_add (&aff_e1, &aff_e2);
4187
4188 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
4189 }
4190
4191 /* Estimates cost of expressing difference E1 - E2 as
4192 var + symbol + offset. The value of offset is added to OFFSET,
4193 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
4194 part is missing. DEPENDS_ON is a set of the invariants the computation
4195 depends on. */
4196
4197 static comp_cost
4198 difference_cost (struct ivopts_data *data,
4199 tree e1, tree e2, bool *symbol_present, bool *var_present,
4200 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
4201 {
4202 machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
4203 unsigned HOST_WIDE_INT off1, off2;
4204 aff_tree aff_e1, aff_e2;
4205 tree type;
4206
4207 e1 = strip_offset (e1, &off1);
4208 e2 = strip_offset (e2, &off2);
4209 *offset += off1 - off2;
4210
4211 STRIP_NOPS (e1);
4212 STRIP_NOPS (e2);
4213
4214 if (TREE_CODE (e1) == ADDR_EXPR)
4215 return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
4216 offset, depends_on);
4217 *symbol_present = false;
4218
4219 if (operand_equal_p (e1, e2, 0))
4220 {
4221 *var_present = false;
4222 return no_cost;
4223 }
4224
4225 *var_present = true;
4226
4227 if (integer_zerop (e2))
4228 return force_var_cost (data, e1, depends_on);
4229
4230 if (integer_zerop (e1))
4231 {
4232 comp_cost cost = force_var_cost (data, e2, depends_on);
4233 cost.cost += mult_by_coeff_cost (-1, mode, data->speed);
4234 return cost;
4235 }
4236
4237 type = signed_type_for (TREE_TYPE (e1));
4238 tree_to_aff_combination (e1, type, &aff_e1);
4239 tree_to_aff_combination (e2, type, &aff_e2);
4240 aff_combination_scale (&aff_e2, -1);
4241 aff_combination_add (&aff_e1, &aff_e2);
4242
4243 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
4244 }
4245
4246 /* Returns true if AFF1 and AFF2 are identical. */
4247
4248 static bool
4249 compare_aff_trees (aff_tree *aff1, aff_tree *aff2)
4250 {
4251 unsigned i;
4252
4253 if (aff1->n != aff2->n)
4254 return false;
4255
4256 for (i = 0; i < aff1->n; i++)
4257 {
4258 if (aff1->elts[i].coef != aff2->elts[i].coef)
4259 return false;
4260
4261 if (!operand_equal_p (aff1->elts[i].val, aff2->elts[i].val, 0))
4262 return false;
4263 }
4264 return true;
4265 }
4266
4267 /* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id. */
4268
4269 static int
4270 get_expr_id (struct ivopts_data *data, tree expr)
4271 {
4272 struct iv_inv_expr_ent ent;
4273 struct iv_inv_expr_ent **slot;
4274
4275 ent.expr = expr;
4276 ent.hash = iterative_hash_expr (expr, 0);
4277 slot = data->inv_expr_tab->find_slot (&ent, INSERT);
4278 if (*slot)
4279 return (*slot)->id;
4280
4281 *slot = XNEW (struct iv_inv_expr_ent);
4282 (*slot)->expr = expr;
4283 (*slot)->hash = ent.hash;
4284 (*slot)->id = data->inv_expr_id++;
4285 return (*slot)->id;
4286 }
4287
4288 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
4289 requires a new compiler generated temporary. Returns -1 otherwise.
4290 ADDRESS_P is a flag indicating if the expression is for address
4291 computation. */
4292
4293 static int
4294 get_loop_invariant_expr_id (struct ivopts_data *data, tree ubase,
4295 tree cbase, HOST_WIDE_INT ratio,
4296 bool address_p)
4297 {
4298 aff_tree ubase_aff, cbase_aff;
4299 tree expr, ub, cb;
4300
4301 STRIP_NOPS (ubase);
4302 STRIP_NOPS (cbase);
4303 ub = ubase;
4304 cb = cbase;
4305
4306 if ((TREE_CODE (ubase) == INTEGER_CST)
4307 && (TREE_CODE (cbase) == INTEGER_CST))
4308 return -1;
4309
4310 /* Strips the constant part. */
4311 if (TREE_CODE (ubase) == PLUS_EXPR
4312 || TREE_CODE (ubase) == MINUS_EXPR
4313 || TREE_CODE (ubase) == POINTER_PLUS_EXPR)
4314 {
4315 if (TREE_CODE (TREE_OPERAND (ubase, 1)) == INTEGER_CST)
4316 ubase = TREE_OPERAND (ubase, 0);
4317 }
4318
4319 /* Strips the constant part. */
4320 if (TREE_CODE (cbase) == PLUS_EXPR
4321 || TREE_CODE (cbase) == MINUS_EXPR
4322 || TREE_CODE (cbase) == POINTER_PLUS_EXPR)
4323 {
4324 if (TREE_CODE (TREE_OPERAND (cbase, 1)) == INTEGER_CST)
4325 cbase = TREE_OPERAND (cbase, 0);
4326 }
4327
4328 if (address_p)
4329 {
4330 if (((TREE_CODE (ubase) == SSA_NAME)
4331 || (TREE_CODE (ubase) == ADDR_EXPR
4332 && is_gimple_min_invariant (ubase)))
4333 && (TREE_CODE (cbase) == INTEGER_CST))
4334 return -1;
4335
4336 if (((TREE_CODE (cbase) == SSA_NAME)
4337 || (TREE_CODE (cbase) == ADDR_EXPR
4338 && is_gimple_min_invariant (cbase)))
4339 && (TREE_CODE (ubase) == INTEGER_CST))
4340 return -1;
4341 }
4342
4343 if (ratio == 1)
4344 {
4345 if (operand_equal_p (ubase, cbase, 0))
4346 return -1;
4347
4348 if (TREE_CODE (ubase) == ADDR_EXPR
4349 && TREE_CODE (cbase) == ADDR_EXPR)
4350 {
4351 tree usym, csym;
4352
4353 usym = TREE_OPERAND (ubase, 0);
4354 csym = TREE_OPERAND (cbase, 0);
4355 if (TREE_CODE (usym) == ARRAY_REF)
4356 {
4357 tree ind = TREE_OPERAND (usym, 1);
4358 if (TREE_CODE (ind) == INTEGER_CST
4359 && tree_fits_shwi_p (ind)
4360 && tree_to_shwi (ind) == 0)
4361 usym = TREE_OPERAND (usym, 0);
4362 }
4363 if (TREE_CODE (csym) == ARRAY_REF)
4364 {
4365 tree ind = TREE_OPERAND (csym, 1);
4366 if (TREE_CODE (ind) == INTEGER_CST
4367 && tree_fits_shwi_p (ind)
4368 && tree_to_shwi (ind) == 0)
4369 csym = TREE_OPERAND (csym, 0);
4370 }
4371 if (operand_equal_p (usym, csym, 0))
4372 return -1;
4373 }
4374 /* Now do more complex comparison */
4375 tree_to_aff_combination (ubase, TREE_TYPE (ubase), &ubase_aff);
4376 tree_to_aff_combination (cbase, TREE_TYPE (cbase), &cbase_aff);
4377 if (compare_aff_trees (&ubase_aff, &cbase_aff))
4378 return -1;
4379 }
4380
4381 tree_to_aff_combination (ub, TREE_TYPE (ub), &ubase_aff);
4382 tree_to_aff_combination (cb, TREE_TYPE (cb), &cbase_aff);
4383
4384 aff_combination_scale (&cbase_aff, -1 * ratio);
4385 aff_combination_add (&ubase_aff, &cbase_aff);
4386 expr = aff_combination_to_tree (&ubase_aff);
4387 return get_expr_id (data, expr);
4388 }
4389
4390
4391
4392 /* Determines the cost of the computation by that USE is expressed
4393 from induction variable CAND. If ADDRESS_P is true, we just need
4394 to create an address from it, otherwise we want to get it into
4395 register. A set of invariants we depend on is stored in
4396 DEPENDS_ON. AT is the statement at that the value is computed.
4397 If CAN_AUTOINC is nonnull, use it to record whether autoinc
4398 addressing is likely. */
4399
4400 static comp_cost
4401 get_computation_cost_at (struct ivopts_data *data,
4402 struct iv_use *use, struct iv_cand *cand,
4403 bool address_p, bitmap *depends_on, gimple at,
4404 bool *can_autoinc,
4405 int *inv_expr_id)
4406 {
4407 tree ubase = use->iv->base, ustep = use->iv->step;
4408 tree cbase, cstep;
4409 tree utype = TREE_TYPE (ubase), ctype;
4410 unsigned HOST_WIDE_INT cstepi, offset = 0;
4411 HOST_WIDE_INT ratio, aratio;
4412 bool var_present, symbol_present, stmt_is_after_inc;
4413 comp_cost cost;
4414 widest_int rat;
4415 bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4416 machine_mode mem_mode = (address_p
4417 ? TYPE_MODE (TREE_TYPE (*use->op_p))
4418 : VOIDmode);
4419
4420 *depends_on = NULL;
4421
4422 /* Only consider real candidates. */
4423 if (!cand->iv)
4424 return infinite_cost;
4425
4426 cbase = cand->iv->base;
4427 cstep = cand->iv->step;
4428 ctype = TREE_TYPE (cbase);
4429
4430 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4431 {
4432 /* We do not have a precision to express the values of use. */
4433 return infinite_cost;
4434 }
4435
4436 if (address_p
4437 || (use->iv->base_object
4438 && cand->iv->base_object
4439 && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4440 && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4441 {
4442 /* Do not try to express address of an object with computation based
4443 on address of a different object. This may cause problems in rtl
4444 level alias analysis (that does not expect this to be happening,
4445 as this is illegal in C), and would be unlikely to be useful
4446 anyway. */
4447 if (use->iv->base_object
4448 && cand->iv->base_object
4449 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4450 return infinite_cost;
4451 }
4452
4453 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4454 {
4455 /* TODO -- add direct handling of this case. */
4456 goto fallback;
4457 }
4458
4459 /* CSTEPI is removed from the offset in case statement is after the
4460 increment. If the step is not constant, we use zero instead.
4461 This is a bit imprecise (there is the extra addition), but
4462 redundancy elimination is likely to transform the code so that
4463 it uses value of the variable before increment anyway,
4464 so it is not that much unrealistic. */
4465 if (cst_and_fits_in_hwi (cstep))
4466 cstepi = int_cst_value (cstep);
4467 else
4468 cstepi = 0;
4469
4470 if (!constant_multiple_of (ustep, cstep, &rat))
4471 return infinite_cost;
4472
4473 if (wi::fits_shwi_p (rat))
4474 ratio = rat.to_shwi ();
4475 else
4476 return infinite_cost;
4477
4478 STRIP_NOPS (cbase);
4479 ctype = TREE_TYPE (cbase);
4480
4481 stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
4482
4483 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
4484 or ratio == 1, it is better to handle this like
4485
4486 ubase - ratio * cbase + ratio * var
4487
4488 (also holds in the case ratio == -1, TODO. */
4489
4490 if (cst_and_fits_in_hwi (cbase))
4491 {
4492 offset = - ratio * (unsigned HOST_WIDE_INT) int_cst_value (cbase);
4493 cost = difference_cost (data,
4494 ubase, build_int_cst (utype, 0),
4495 &symbol_present, &var_present, &offset,
4496 depends_on);
4497 cost.cost /= avg_loop_niter (data->current_loop);
4498 }
4499 else if (ratio == 1)
4500 {
4501 tree real_cbase = cbase;
4502
4503 /* Check to see if any adjustment is needed. */
4504 if (cstepi == 0 && stmt_is_after_inc)
4505 {
4506 aff_tree real_cbase_aff;
4507 aff_tree cstep_aff;
4508
4509 tree_to_aff_combination (cbase, TREE_TYPE (real_cbase),
4510 &real_cbase_aff);
4511 tree_to_aff_combination (cstep, TREE_TYPE (cstep), &cstep_aff);
4512
4513 aff_combination_add (&real_cbase_aff, &cstep_aff);
4514 real_cbase = aff_combination_to_tree (&real_cbase_aff);
4515 }
4516
4517 cost = difference_cost (data,
4518 ubase, real_cbase,
4519 &symbol_present, &var_present, &offset,
4520 depends_on);
4521 cost.cost /= avg_loop_niter (data->current_loop);
4522 }
4523 else if (address_p
4524 && !POINTER_TYPE_P (ctype)
4525 && multiplier_allowed_in_address_p
4526 (ratio, mem_mode,
4527 TYPE_ADDR_SPACE (TREE_TYPE (utype))))
4528 {
4529 cbase
4530 = fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio));
4531 cost = difference_cost (data,
4532 ubase, cbase,
4533 &symbol_present, &var_present, &offset,
4534 depends_on);
4535 cost.cost /= avg_loop_niter (data->current_loop);
4536 }
4537 else
4538 {
4539 cost = force_var_cost (data, cbase, depends_on);
4540 cost = add_costs (cost,
4541 difference_cost (data,
4542 ubase, build_int_cst (utype, 0),
4543 &symbol_present, &var_present,
4544 &offset, depends_on));
4545 cost.cost /= avg_loop_niter (data->current_loop);
4546 cost.cost += add_cost (data->speed, TYPE_MODE (ctype));
4547 }
4548
4549 /* Set of invariants depended on by sub use has already been computed
4550 for the first use in the group. */
4551 if (use->sub_id)
4552 {
4553 cost.cost = 0;
4554 if (depends_on && *depends_on)
4555 bitmap_clear (*depends_on);
4556 }
4557 else if (inv_expr_id)
4558 {
4559 *inv_expr_id =
4560 get_loop_invariant_expr_id (data, ubase, cbase, ratio, address_p);
4561 /* Clear depends on. */
4562 if (*inv_expr_id != -1 && depends_on && *depends_on)
4563 bitmap_clear (*depends_on);
4564 }
4565
4566 /* If we are after the increment, the value of the candidate is higher by
4567 one iteration. */
4568 if (stmt_is_after_inc)
4569 offset -= ratio * cstepi;
4570
4571 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4572 (symbol/var1/const parts may be omitted). If we are looking for an
4573 address, find the cost of addressing this. */
4574 if (address_p)
4575 return add_costs (cost,
4576 get_address_cost (symbol_present, var_present,
4577 offset, ratio, cstepi,
4578 mem_mode,
4579 TYPE_ADDR_SPACE (TREE_TYPE (utype)),
4580 speed, stmt_is_after_inc,
4581 can_autoinc));
4582
4583 /* Otherwise estimate the costs for computing the expression. */
4584 if (!symbol_present && !var_present && !offset)
4585 {
4586 if (ratio != 1)
4587 cost.cost += mult_by_coeff_cost (ratio, TYPE_MODE (ctype), speed);
4588 return cost;
4589 }
4590
4591 /* Symbol + offset should be compile-time computable so consider that they
4592 are added once to the variable, if present. */
4593 if (var_present && (symbol_present || offset))
4594 cost.cost += adjust_setup_cost (data,
4595 add_cost (speed, TYPE_MODE (ctype)));
4596
4597 /* Having offset does not affect runtime cost in case it is added to
4598 symbol, but it increases complexity. */
4599 if (offset)
4600 cost.complexity++;
4601
4602 cost.cost += add_cost (speed, TYPE_MODE (ctype));
4603
4604 aratio = ratio > 0 ? ratio : -ratio;
4605 if (aratio != 1)
4606 cost.cost += mult_by_coeff_cost (aratio, TYPE_MODE (ctype), speed);
4607 return cost;
4608
4609 fallback:
4610 if (can_autoinc)
4611 *can_autoinc = false;
4612
4613 {
4614 /* Just get the expression, expand it and measure the cost. */
4615 tree comp = get_computation_at (data->current_loop, use, cand, at);
4616
4617 if (!comp)
4618 return infinite_cost;
4619
4620 if (address_p)
4621 comp = build_simple_mem_ref (comp);
4622
4623 return new_cost (computation_cost (comp, speed), 0);
4624 }
4625 }
4626
4627 /* Determines the cost of the computation by that USE is expressed
4628 from induction variable CAND. If ADDRESS_P is true, we just need
4629 to create an address from it, otherwise we want to get it into
4630 register. A set of invariants we depend on is stored in
4631 DEPENDS_ON. If CAN_AUTOINC is nonnull, use it to record whether
4632 autoinc addressing is likely. */
4633
4634 static comp_cost
4635 get_computation_cost (struct ivopts_data *data,
4636 struct iv_use *use, struct iv_cand *cand,
4637 bool address_p, bitmap *depends_on,
4638 bool *can_autoinc, int *inv_expr_id)
4639 {
4640 return get_computation_cost_at (data,
4641 use, cand, address_p, depends_on, use->stmt,
4642 can_autoinc, inv_expr_id);
4643 }
4644
4645 /* Determines cost of basing replacement of USE on CAND in a generic
4646 expression. */
4647
4648 static bool
4649 determine_use_iv_cost_generic (struct ivopts_data *data,
4650 struct iv_use *use, struct iv_cand *cand)
4651 {
4652 bitmap depends_on;
4653 comp_cost cost;
4654 int inv_expr_id = -1;
4655
4656 /* The simple case first -- if we need to express value of the preserved
4657 original biv, the cost is 0. This also prevents us from counting the
4658 cost of increment twice -- once at this use and once in the cost of
4659 the candidate. */
4660 if (cand->pos == IP_ORIGINAL
4661 && cand->incremented_at == use->stmt)
4662 {
4663 set_use_iv_cost (data, use, cand, no_cost, NULL, NULL_TREE,
4664 ERROR_MARK, -1);
4665 return true;
4666 }
4667
4668 cost = get_computation_cost (data, use, cand, false, &depends_on,
4669 NULL, &inv_expr_id);
4670
4671 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4672 inv_expr_id);
4673
4674 return !infinite_cost_p (cost);
4675 }
4676
4677 /* Determines cost of basing replacement of USE on CAND in an address. */
4678
4679 static bool
4680 determine_use_iv_cost_address (struct ivopts_data *data,
4681 struct iv_use *use, struct iv_cand *cand)
4682 {
4683 bitmap depends_on;
4684 bool can_autoinc;
4685 int inv_expr_id = -1;
4686 struct iv_use *sub_use;
4687 comp_cost sub_cost;
4688 comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
4689 &can_autoinc, &inv_expr_id);
4690
4691 if (cand->ainc_use == use)
4692 {
4693 if (can_autoinc)
4694 cost.cost -= cand->cost_step;
4695 /* If we generated the candidate solely for exploiting autoincrement
4696 opportunities, and it turns out it can't be used, set the cost to
4697 infinity to make sure we ignore it. */
4698 else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
4699 cost = infinite_cost;
4700 }
4701 for (sub_use = use->next;
4702 sub_use && !infinite_cost_p (cost);
4703 sub_use = sub_use->next)
4704 {
4705 sub_cost = get_computation_cost (data, sub_use, cand, true, &depends_on,
4706 &can_autoinc, &inv_expr_id);
4707 cost = add_costs (cost, sub_cost);
4708 }
4709
4710 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4711 inv_expr_id);
4712
4713 return !infinite_cost_p (cost);
4714 }
4715
4716 /* Computes value of candidate CAND at position AT in iteration NITER, and
4717 stores it to VAL. */
4718
4719 static void
4720 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple at, tree niter,
4721 aff_tree *val)
4722 {
4723 aff_tree step, delta, nit;
4724 struct iv *iv = cand->iv;
4725 tree type = TREE_TYPE (iv->base);
4726 tree steptype = type;
4727 if (POINTER_TYPE_P (type))
4728 steptype = sizetype;
4729 steptype = unsigned_type_for (type);
4730
4731 tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step);
4732 aff_combination_convert (&step, steptype);
4733 tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
4734 aff_combination_convert (&nit, steptype);
4735 aff_combination_mult (&nit, &step, &delta);
4736 if (stmt_after_increment (loop, cand, at))
4737 aff_combination_add (&delta, &step);
4738
4739 tree_to_aff_combination (iv->base, type, val);
4740 if (!POINTER_TYPE_P (type))
4741 aff_combination_convert (val, steptype);
4742 aff_combination_add (val, &delta);
4743 }
4744
4745 /* Returns period of induction variable iv. */
4746
4747 static tree
4748 iv_period (struct iv *iv)
4749 {
4750 tree step = iv->step, period, type;
4751 tree pow2div;
4752
4753 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4754
4755 type = unsigned_type_for (TREE_TYPE (step));
4756 /* Period of the iv is lcm (step, type_range)/step -1,
4757 i.e., N*type_range/step - 1. Since type range is power
4758 of two, N == (step >> num_of_ending_zeros_binary (step),
4759 so the final result is
4760
4761 (type_range >> num_of_ending_zeros_binary (step)) - 1
4762
4763 */
4764 pow2div = num_ending_zeros (step);
4765
4766 period = build_low_bits_mask (type,
4767 (TYPE_PRECISION (type)
4768 - tree_to_uhwi (pow2div)));
4769
4770 return period;
4771 }
4772
4773 /* Returns the comparison operator used when eliminating the iv USE. */
4774
4775 static enum tree_code
4776 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4777 {
4778 struct loop *loop = data->current_loop;
4779 basic_block ex_bb;
4780 edge exit;
4781
4782 ex_bb = gimple_bb (use->stmt);
4783 exit = EDGE_SUCC (ex_bb, 0);
4784 if (flow_bb_inside_loop_p (loop, exit->dest))
4785 exit = EDGE_SUCC (ex_bb, 1);
4786
4787 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4788 }
4789
4790 /* Returns true if we can prove that BASE - OFFSET does not overflow. For now,
4791 we only detect the situation that BASE = SOMETHING + OFFSET, where the
4792 calculation is performed in non-wrapping type.
4793
4794 TODO: More generally, we could test for the situation that
4795 BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
4796 This would require knowing the sign of OFFSET. */
4797
4798 static bool
4799 difference_cannot_overflow_p (struct ivopts_data *data, tree base, tree offset)
4800 {
4801 enum tree_code code;
4802 tree e1, e2;
4803 aff_tree aff_e1, aff_e2, aff_offset;
4804
4805 if (!nowrap_type_p (TREE_TYPE (base)))
4806 return false;
4807
4808 base = expand_simple_operations (base);
4809
4810 if (TREE_CODE (base) == SSA_NAME)
4811 {
4812 gimple stmt = SSA_NAME_DEF_STMT (base);
4813
4814 if (gimple_code (stmt) != GIMPLE_ASSIGN)
4815 return false;
4816
4817 code = gimple_assign_rhs_code (stmt);
4818 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4819 return false;
4820
4821 e1 = gimple_assign_rhs1 (stmt);
4822 e2 = gimple_assign_rhs2 (stmt);
4823 }
4824 else
4825 {
4826 code = TREE_CODE (base);
4827 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4828 return false;
4829 e1 = TREE_OPERAND (base, 0);
4830 e2 = TREE_OPERAND (base, 1);
4831 }
4832
4833 /* Use affine expansion as deeper inspection to prove the equality. */
4834 tree_to_aff_combination_expand (e2, TREE_TYPE (e2),
4835 &aff_e2, &data->name_expansion_cache);
4836 tree_to_aff_combination_expand (offset, TREE_TYPE (offset),
4837 &aff_offset, &data->name_expansion_cache);
4838 aff_combination_scale (&aff_offset, -1);
4839 switch (code)
4840 {
4841 case PLUS_EXPR:
4842 aff_combination_add (&aff_e2, &aff_offset);
4843 if (aff_combination_zero_p (&aff_e2))
4844 return true;
4845
4846 tree_to_aff_combination_expand (e1, TREE_TYPE (e1),
4847 &aff_e1, &data->name_expansion_cache);
4848 aff_combination_add (&aff_e1, &aff_offset);
4849 return aff_combination_zero_p (&aff_e1);
4850
4851 case POINTER_PLUS_EXPR:
4852 aff_combination_add (&aff_e2, &aff_offset);
4853 return aff_combination_zero_p (&aff_e2);
4854
4855 default:
4856 return false;
4857 }
4858 }
4859
4860 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
4861 comparison with CAND. NITER describes the number of iterations of
4862 the loops. If successful, the comparison in COMP_P is altered accordingly.
4863
4864 We aim to handle the following situation:
4865
4866 sometype *base, *p;
4867 int a, b, i;
4868
4869 i = a;
4870 p = p_0 = base + a;
4871
4872 do
4873 {
4874 bla (*p);
4875 p++;
4876 i++;
4877 }
4878 while (i < b);
4879
4880 Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
4881 We aim to optimize this to
4882
4883 p = p_0 = base + a;
4884 do
4885 {
4886 bla (*p);
4887 p++;
4888 }
4889 while (p < p_0 - a + b);
4890
4891 This preserves the correctness, since the pointer arithmetics does not
4892 overflow. More precisely:
4893
4894 1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
4895 overflow in computing it or the values of p.
4896 2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
4897 overflow. To prove this, we use the fact that p_0 = base + a. */
4898
4899 static bool
4900 iv_elimination_compare_lt (struct ivopts_data *data,
4901 struct iv_cand *cand, enum tree_code *comp_p,
4902 struct tree_niter_desc *niter)
4903 {
4904 tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
4905 struct aff_tree nit, tmpa, tmpb;
4906 enum tree_code comp;
4907 HOST_WIDE_INT step;
4908
4909 /* We need to know that the candidate induction variable does not overflow.
4910 While more complex analysis may be used to prove this, for now just
4911 check that the variable appears in the original program and that it
4912 is computed in a type that guarantees no overflows. */
4913 cand_type = TREE_TYPE (cand->iv->base);
4914 if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
4915 return false;
4916
4917 /* Make sure that the loop iterates till the loop bound is hit, as otherwise
4918 the calculation of the BOUND could overflow, making the comparison
4919 invalid. */
4920 if (!data->loop_single_exit_p)
4921 return false;
4922
4923 /* We need to be able to decide whether candidate is increasing or decreasing
4924 in order to choose the right comparison operator. */
4925 if (!cst_and_fits_in_hwi (cand->iv->step))
4926 return false;
4927 step = int_cst_value (cand->iv->step);
4928
4929 /* Check that the number of iterations matches the expected pattern:
4930 a + 1 > b ? 0 : b - a - 1. */
4931 mbz = niter->may_be_zero;
4932 if (TREE_CODE (mbz) == GT_EXPR)
4933 {
4934 /* Handle a + 1 > b. */
4935 tree op0 = TREE_OPERAND (mbz, 0);
4936 if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
4937 {
4938 a = TREE_OPERAND (op0, 0);
4939 b = TREE_OPERAND (mbz, 1);
4940 }
4941 else
4942 return false;
4943 }
4944 else if (TREE_CODE (mbz) == LT_EXPR)
4945 {
4946 tree op1 = TREE_OPERAND (mbz, 1);
4947
4948 /* Handle b < a + 1. */
4949 if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
4950 {
4951 a = TREE_OPERAND (op1, 0);
4952 b = TREE_OPERAND (mbz, 0);
4953 }
4954 else
4955 return false;
4956 }
4957 else
4958 return false;
4959
4960 /* Expected number of iterations is B - A - 1. Check that it matches
4961 the actual number, i.e., that B - A - NITER = 1. */
4962 tree_to_aff_combination (niter->niter, nit_type, &nit);
4963 tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
4964 tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
4965 aff_combination_scale (&nit, -1);
4966 aff_combination_scale (&tmpa, -1);
4967 aff_combination_add (&tmpb, &tmpa);
4968 aff_combination_add (&tmpb, &nit);
4969 if (tmpb.n != 0 || tmpb.offset != 1)
4970 return false;
4971
4972 /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
4973 overflow. */
4974 offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
4975 cand->iv->step,
4976 fold_convert (TREE_TYPE (cand->iv->step), a));
4977 if (!difference_cannot_overflow_p (data, cand->iv->base, offset))
4978 return false;
4979
4980 /* Determine the new comparison operator. */
4981 comp = step < 0 ? GT_EXPR : LT_EXPR;
4982 if (*comp_p == NE_EXPR)
4983 *comp_p = comp;
4984 else if (*comp_p == EQ_EXPR)
4985 *comp_p = invert_tree_comparison (comp, false);
4986 else
4987 gcc_unreachable ();
4988
4989 return true;
4990 }
4991
4992 /* Check whether it is possible to express the condition in USE by comparison
4993 of candidate CAND. If so, store the value compared with to BOUND, and the
4994 comparison operator to COMP. */
4995
4996 static bool
4997 may_eliminate_iv (struct ivopts_data *data,
4998 struct iv_use *use, struct iv_cand *cand, tree *bound,
4999 enum tree_code *comp)
5000 {
5001 basic_block ex_bb;
5002 edge exit;
5003 tree period;
5004 struct loop *loop = data->current_loop;
5005 aff_tree bnd;
5006 struct tree_niter_desc *desc = NULL;
5007
5008 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
5009 return false;
5010
5011 /* For now works only for exits that dominate the loop latch.
5012 TODO: extend to other conditions inside loop body. */
5013 ex_bb = gimple_bb (use->stmt);
5014 if (use->stmt != last_stmt (ex_bb)
5015 || gimple_code (use->stmt) != GIMPLE_COND
5016 || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
5017 return false;
5018
5019 exit = EDGE_SUCC (ex_bb, 0);
5020 if (flow_bb_inside_loop_p (loop, exit->dest))
5021 exit = EDGE_SUCC (ex_bb, 1);
5022 if (flow_bb_inside_loop_p (loop, exit->dest))
5023 return false;
5024
5025 desc = niter_for_exit (data, exit);
5026 if (!desc)
5027 return false;
5028
5029 /* Determine whether we can use the variable to test the exit condition.
5030 This is the case iff the period of the induction variable is greater
5031 than the number of iterations for which the exit condition is true. */
5032 period = iv_period (cand->iv);
5033
5034 /* If the number of iterations is constant, compare against it directly. */
5035 if (TREE_CODE (desc->niter) == INTEGER_CST)
5036 {
5037 /* See cand_value_at. */
5038 if (stmt_after_increment (loop, cand, use->stmt))
5039 {
5040 if (!tree_int_cst_lt (desc->niter, period))
5041 return false;
5042 }
5043 else
5044 {
5045 if (tree_int_cst_lt (period, desc->niter))
5046 return false;
5047 }
5048 }
5049
5050 /* If not, and if this is the only possible exit of the loop, see whether
5051 we can get a conservative estimate on the number of iterations of the
5052 entire loop and compare against that instead. */
5053 else
5054 {
5055 widest_int period_value, max_niter;
5056
5057 max_niter = desc->max;
5058 if (stmt_after_increment (loop, cand, use->stmt))
5059 max_niter += 1;
5060 period_value = wi::to_widest (period);
5061 if (wi::gtu_p (max_niter, period_value))
5062 {
5063 /* See if we can take advantage of inferred loop bound information. */
5064 if (data->loop_single_exit_p)
5065 {
5066 if (!max_loop_iterations (loop, &max_niter))
5067 return false;
5068 /* The loop bound is already adjusted by adding 1. */
5069 if (wi::gtu_p (max_niter, period_value))
5070 return false;
5071 }
5072 else
5073 return false;
5074 }
5075 }
5076
5077 cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
5078
5079 *bound = fold_convert (TREE_TYPE (cand->iv->base),
5080 aff_combination_to_tree (&bnd));
5081 *comp = iv_elimination_compare (data, use);
5082
5083 /* It is unlikely that computing the number of iterations using division
5084 would be more profitable than keeping the original induction variable. */
5085 if (expression_expensive_p (*bound))
5086 return false;
5087
5088 /* Sometimes, it is possible to handle the situation that the number of
5089 iterations may be zero unless additional assumtions by using <
5090 instead of != in the exit condition.
5091
5092 TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
5093 base the exit condition on it. However, that is often too
5094 expensive. */
5095 if (!integer_zerop (desc->may_be_zero))
5096 return iv_elimination_compare_lt (data, cand, comp, desc);
5097
5098 return true;
5099 }
5100
5101 /* Calculates the cost of BOUND, if it is a PARM_DECL. A PARM_DECL must
5102 be copied, if it is used in the loop body and DATA->body_includes_call. */
5103
5104 static int
5105 parm_decl_cost (struct ivopts_data *data, tree bound)
5106 {
5107 tree sbound = bound;
5108 STRIP_NOPS (sbound);
5109
5110 if (TREE_CODE (sbound) == SSA_NAME
5111 && SSA_NAME_IS_DEFAULT_DEF (sbound)
5112 && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
5113 && data->body_includes_call)
5114 return COSTS_N_INSNS (1);
5115
5116 return 0;
5117 }
5118
5119 /* Determines cost of basing replacement of USE on CAND in a condition. */
5120
5121 static bool
5122 determine_use_iv_cost_condition (struct ivopts_data *data,
5123 struct iv_use *use, struct iv_cand *cand)
5124 {
5125 tree bound = NULL_TREE;
5126 struct iv *cmp_iv;
5127 bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
5128 comp_cost elim_cost, express_cost, cost, bound_cost;
5129 bool ok;
5130 int elim_inv_expr_id = -1, express_inv_expr_id = -1, inv_expr_id;
5131 tree *control_var, *bound_cst;
5132 enum tree_code comp = ERROR_MARK;
5133
5134 /* Only consider real candidates. */
5135 if (!cand->iv)
5136 {
5137 set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE,
5138 ERROR_MARK, -1);
5139 return false;
5140 }
5141
5142 /* Try iv elimination. */
5143 if (may_eliminate_iv (data, use, cand, &bound, &comp))
5144 {
5145 elim_cost = force_var_cost (data, bound, &depends_on_elim);
5146 if (elim_cost.cost == 0)
5147 elim_cost.cost = parm_decl_cost (data, bound);
5148 else if (TREE_CODE (bound) == INTEGER_CST)
5149 elim_cost.cost = 0;
5150 /* If we replace a loop condition 'i < n' with 'p < base + n',
5151 depends_on_elim will have 'base' and 'n' set, which implies
5152 that both 'base' and 'n' will be live during the loop. More likely,
5153 'base + n' will be loop invariant, resulting in only one live value
5154 during the loop. So in that case we clear depends_on_elim and set
5155 elim_inv_expr_id instead. */
5156 if (depends_on_elim && bitmap_count_bits (depends_on_elim) > 1)
5157 {
5158 elim_inv_expr_id = get_expr_id (data, bound);
5159 bitmap_clear (depends_on_elim);
5160 }
5161 /* The bound is a loop invariant, so it will be only computed
5162 once. */
5163 elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
5164 }
5165 else
5166 elim_cost = infinite_cost;
5167
5168 /* Try expressing the original giv. If it is compared with an invariant,
5169 note that we cannot get rid of it. */
5170 ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
5171 NULL, &cmp_iv);
5172 gcc_assert (ok);
5173
5174 /* When the condition is a comparison of the candidate IV against
5175 zero, prefer this IV.
5176
5177 TODO: The constant that we're subtracting from the cost should
5178 be target-dependent. This information should be added to the
5179 target costs for each backend. */
5180 if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */
5181 && integer_zerop (*bound_cst)
5182 && (operand_equal_p (*control_var, cand->var_after, 0)
5183 || operand_equal_p (*control_var, cand->var_before, 0)))
5184 elim_cost.cost -= 1;
5185
5186 express_cost = get_computation_cost (data, use, cand, false,
5187 &depends_on_express, NULL,
5188 &express_inv_expr_id);
5189 fd_ivopts_data = data;
5190 walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
5191
5192 /* Count the cost of the original bound as well. */
5193 bound_cost = force_var_cost (data, *bound_cst, NULL);
5194 if (bound_cost.cost == 0)
5195 bound_cost.cost = parm_decl_cost (data, *bound_cst);
5196 else if (TREE_CODE (*bound_cst) == INTEGER_CST)
5197 bound_cost.cost = 0;
5198 express_cost.cost += bound_cost.cost;
5199
5200 /* Choose the better approach, preferring the eliminated IV. */
5201 if (compare_costs (elim_cost, express_cost) <= 0)
5202 {
5203 cost = elim_cost;
5204 depends_on = depends_on_elim;
5205 depends_on_elim = NULL;
5206 inv_expr_id = elim_inv_expr_id;
5207 }
5208 else
5209 {
5210 cost = express_cost;
5211 depends_on = depends_on_express;
5212 depends_on_express = NULL;
5213 bound = NULL_TREE;
5214 comp = ERROR_MARK;
5215 inv_expr_id = express_inv_expr_id;
5216 }
5217
5218 set_use_iv_cost (data, use, cand, cost, depends_on, bound, comp, inv_expr_id);
5219
5220 if (depends_on_elim)
5221 BITMAP_FREE (depends_on_elim);
5222 if (depends_on_express)
5223 BITMAP_FREE (depends_on_express);
5224
5225 return !infinite_cost_p (cost);
5226 }
5227
5228 /* Determines cost of basing replacement of USE on CAND. Returns false
5229 if USE cannot be based on CAND. */
5230
5231 static bool
5232 determine_use_iv_cost (struct ivopts_data *data,
5233 struct iv_use *use, struct iv_cand *cand)
5234 {
5235 switch (use->type)
5236 {
5237 case USE_NONLINEAR_EXPR:
5238 return determine_use_iv_cost_generic (data, use, cand);
5239
5240 case USE_ADDRESS:
5241 return determine_use_iv_cost_address (data, use, cand);
5242
5243 case USE_COMPARE:
5244 return determine_use_iv_cost_condition (data, use, cand);
5245
5246 default:
5247 gcc_unreachable ();
5248 }
5249 }
5250
5251 /* Return true if get_computation_cost indicates that autoincrement is
5252 a possibility for the pair of USE and CAND, false otherwise. */
5253
5254 static bool
5255 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
5256 struct iv_cand *cand)
5257 {
5258 bitmap depends_on;
5259 bool can_autoinc;
5260 comp_cost cost;
5261
5262 if (use->type != USE_ADDRESS)
5263 return false;
5264
5265 cost = get_computation_cost (data, use, cand, true, &depends_on,
5266 &can_autoinc, NULL);
5267
5268 BITMAP_FREE (depends_on);
5269
5270 return !infinite_cost_p (cost) && can_autoinc;
5271 }
5272
5273 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
5274 use that allows autoincrement, and set their AINC_USE if possible. */
5275
5276 static void
5277 set_autoinc_for_original_candidates (struct ivopts_data *data)
5278 {
5279 unsigned i, j;
5280
5281 for (i = 0; i < n_iv_cands (data); i++)
5282 {
5283 struct iv_cand *cand = iv_cand (data, i);
5284 struct iv_use *closest_before = NULL;
5285 struct iv_use *closest_after = NULL;
5286 if (cand->pos != IP_ORIGINAL)
5287 continue;
5288
5289 for (j = 0; j < n_iv_uses (data); j++)
5290 {
5291 struct iv_use *use = iv_use (data, j);
5292 unsigned uid = gimple_uid (use->stmt);
5293
5294 if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
5295 continue;
5296
5297 if (uid < gimple_uid (cand->incremented_at)
5298 && (closest_before == NULL
5299 || uid > gimple_uid (closest_before->stmt)))
5300 closest_before = use;
5301
5302 if (uid > gimple_uid (cand->incremented_at)
5303 && (closest_after == NULL
5304 || uid < gimple_uid (closest_after->stmt)))
5305 closest_after = use;
5306 }
5307
5308 if (closest_before != NULL
5309 && autoinc_possible_for_pair (data, closest_before, cand))
5310 cand->ainc_use = closest_before;
5311 else if (closest_after != NULL
5312 && autoinc_possible_for_pair (data, closest_after, cand))
5313 cand->ainc_use = closest_after;
5314 }
5315 }
5316
5317 /* Finds the candidates for the induction variables. */
5318
5319 static void
5320 find_iv_candidates (struct ivopts_data *data)
5321 {
5322 /* Add commonly used ivs. */
5323 add_standard_iv_candidates (data);
5324
5325 /* Add old induction variables. */
5326 add_iv_candidate_for_bivs (data);
5327
5328 /* Add induction variables derived from uses. */
5329 add_iv_candidate_for_uses (data);
5330
5331 set_autoinc_for_original_candidates (data);
5332
5333 /* Record the important candidates. */
5334 record_important_candidates (data);
5335 }
5336
5337 /* Determines costs of basing the use of the iv on an iv candidate. */
5338
5339 static void
5340 determine_use_iv_costs (struct ivopts_data *data)
5341 {
5342 unsigned i, j;
5343 struct iv_use *use;
5344 struct iv_cand *cand;
5345 bitmap to_clear = BITMAP_ALLOC (NULL);
5346
5347 alloc_use_cost_map (data);
5348
5349 for (i = 0; i < n_iv_uses (data); i++)
5350 {
5351 use = iv_use (data, i);
5352
5353 if (data->consider_all_candidates)
5354 {
5355 for (j = 0; j < n_iv_cands (data); j++)
5356 {
5357 cand = iv_cand (data, j);
5358 determine_use_iv_cost (data, use, cand);
5359 }
5360 }
5361 else
5362 {
5363 bitmap_iterator bi;
5364
5365 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
5366 {
5367 cand = iv_cand (data, j);
5368 if (!determine_use_iv_cost (data, use, cand))
5369 bitmap_set_bit (to_clear, j);
5370 }
5371
5372 /* Remove the candidates for that the cost is infinite from
5373 the list of related candidates. */
5374 bitmap_and_compl_into (use->related_cands, to_clear);
5375 bitmap_clear (to_clear);
5376 }
5377 }
5378
5379 BITMAP_FREE (to_clear);
5380
5381 if (dump_file && (dump_flags & TDF_DETAILS))
5382 {
5383 fprintf (dump_file, "Use-candidate costs:\n");
5384
5385 for (i = 0; i < n_iv_uses (data); i++)
5386 {
5387 use = iv_use (data, i);
5388
5389 fprintf (dump_file, "Use %d:\n", i);
5390 fprintf (dump_file, " cand\tcost\tcompl.\tdepends on\n");
5391 for (j = 0; j < use->n_map_members; j++)
5392 {
5393 if (!use->cost_map[j].cand
5394 || infinite_cost_p (use->cost_map[j].cost))
5395 continue;
5396
5397 fprintf (dump_file, " %d\t%d\t%d\t",
5398 use->cost_map[j].cand->id,
5399 use->cost_map[j].cost.cost,
5400 use->cost_map[j].cost.complexity);
5401 if (use->cost_map[j].depends_on)
5402 bitmap_print (dump_file,
5403 use->cost_map[j].depends_on, "","");
5404 if (use->cost_map[j].inv_expr_id != -1)
5405 fprintf (dump_file, " inv_expr:%d", use->cost_map[j].inv_expr_id);
5406 fprintf (dump_file, "\n");
5407 }
5408
5409 fprintf (dump_file, "\n");
5410 }
5411 fprintf (dump_file, "\n");
5412 }
5413 }
5414
5415 /* Determines cost of the candidate CAND. */
5416
5417 static void
5418 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5419 {
5420 comp_cost cost_base;
5421 unsigned cost, cost_step;
5422 tree base;
5423
5424 if (!cand->iv)
5425 {
5426 cand->cost = 0;
5427 return;
5428 }
5429
5430 /* There are two costs associated with the candidate -- its increment
5431 and its initialization. The second is almost negligible for any loop
5432 that rolls enough, so we take it just very little into account. */
5433
5434 base = cand->iv->base;
5435 cost_base = force_var_cost (data, base, NULL);
5436 /* It will be exceptional that the iv register happens to be initialized with
5437 the proper value at no cost. In general, there will at least be a regcopy
5438 or a const set. */
5439 if (cost_base.cost == 0)
5440 cost_base.cost = COSTS_N_INSNS (1);
5441 cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
5442
5443 cost = cost_step + adjust_setup_cost (data, cost_base.cost);
5444
5445 /* Prefer the original ivs unless we may gain something by replacing it.
5446 The reason is to make debugging simpler; so this is not relevant for
5447 artificial ivs created by other optimization passes. */
5448 if (cand->pos != IP_ORIGINAL
5449 || !SSA_NAME_VAR (cand->var_before)
5450 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
5451 cost++;
5452
5453 /* Prefer not to insert statements into latch unless there are some
5454 already (so that we do not create unnecessary jumps). */
5455 if (cand->pos == IP_END
5456 && empty_block_p (ip_end_pos (data->current_loop)))
5457 cost++;
5458
5459 cand->cost = cost;
5460 cand->cost_step = cost_step;
5461 }
5462
5463 /* Determines costs of computation of the candidates. */
5464
5465 static void
5466 determine_iv_costs (struct ivopts_data *data)
5467 {
5468 unsigned i;
5469
5470 if (dump_file && (dump_flags & TDF_DETAILS))
5471 {
5472 fprintf (dump_file, "Candidate costs:\n");
5473 fprintf (dump_file, " cand\tcost\n");
5474 }
5475
5476 for (i = 0; i < n_iv_cands (data); i++)
5477 {
5478 struct iv_cand *cand = iv_cand (data, i);
5479
5480 determine_iv_cost (data, cand);
5481
5482 if (dump_file && (dump_flags & TDF_DETAILS))
5483 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
5484 }
5485
5486 if (dump_file && (dump_flags & TDF_DETAILS))
5487 fprintf (dump_file, "\n");
5488 }
5489
5490 /* Calculates cost for having SIZE induction variables. */
5491
5492 static unsigned
5493 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
5494 {
5495 /* We add size to the cost, so that we prefer eliminating ivs
5496 if possible. */
5497 return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed,
5498 data->body_includes_call);
5499 }
5500
5501 /* For each size of the induction variable set determine the penalty. */
5502
5503 static void
5504 determine_set_costs (struct ivopts_data *data)
5505 {
5506 unsigned j, n;
5507 gphi *phi;
5508 gphi_iterator psi;
5509 tree op;
5510 struct loop *loop = data->current_loop;
5511 bitmap_iterator bi;
5512
5513 if (dump_file && (dump_flags & TDF_DETAILS))
5514 {
5515 fprintf (dump_file, "Global costs:\n");
5516 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
5517 fprintf (dump_file, " target_clobbered_regs %d\n", target_clobbered_regs);
5518 fprintf (dump_file, " target_reg_cost %d\n", target_reg_cost[data->speed]);
5519 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost[data->speed]);
5520 }
5521
5522 n = 0;
5523 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
5524 {
5525 phi = psi.phi ();
5526 op = PHI_RESULT (phi);
5527
5528 if (virtual_operand_p (op))
5529 continue;
5530
5531 if (get_iv (data, op))
5532 continue;
5533
5534 n++;
5535 }
5536
5537 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5538 {
5539 struct version_info *info = ver_info (data, j);
5540
5541 if (info->inv_id && info->has_nonlin_use)
5542 n++;
5543 }
5544
5545 data->regs_used = n;
5546 if (dump_file && (dump_flags & TDF_DETAILS))
5547 fprintf (dump_file, " regs_used %d\n", n);
5548
5549 if (dump_file && (dump_flags & TDF_DETAILS))
5550 {
5551 fprintf (dump_file, " cost for size:\n");
5552 fprintf (dump_file, " ivs\tcost\n");
5553 for (j = 0; j <= 2 * target_avail_regs; j++)
5554 fprintf (dump_file, " %d\t%d\n", j,
5555 ivopts_global_cost_for_size (data, j));
5556 fprintf (dump_file, "\n");
5557 }
5558 }
5559
5560 /* Returns true if A is a cheaper cost pair than B. */
5561
5562 static bool
5563 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
5564 {
5565 int cmp;
5566
5567 if (!a)
5568 return false;
5569
5570 if (!b)
5571 return true;
5572
5573 cmp = compare_costs (a->cost, b->cost);
5574 if (cmp < 0)
5575 return true;
5576
5577 if (cmp > 0)
5578 return false;
5579
5580 /* In case the costs are the same, prefer the cheaper candidate. */
5581 if (a->cand->cost < b->cand->cost)
5582 return true;
5583
5584 return false;
5585 }
5586
5587
5588 /* Returns candidate by that USE is expressed in IVS. */
5589
5590 static struct cost_pair *
5591 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
5592 {
5593 return ivs->cand_for_use[use->id];
5594 }
5595
5596 /* Computes the cost field of IVS structure. */
5597
5598 static void
5599 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
5600 {
5601 comp_cost cost = ivs->cand_use_cost;
5602
5603 cost.cost += ivs->cand_cost;
5604
5605 cost.cost += ivopts_global_cost_for_size (data,
5606 ivs->n_regs + ivs->num_used_inv_expr);
5607
5608 ivs->cost = cost;
5609 }
5610
5611 /* Remove invariants in set INVS to set IVS. */
5612
5613 static void
5614 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
5615 {
5616 bitmap_iterator bi;
5617 unsigned iid;
5618
5619 if (!invs)
5620 return;
5621
5622 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5623 {
5624 ivs->n_invariant_uses[iid]--;
5625 if (ivs->n_invariant_uses[iid] == 0)
5626 ivs->n_regs--;
5627 }
5628 }
5629
5630 /* Set USE not to be expressed by any candidate in IVS. */
5631
5632 static void
5633 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
5634 struct iv_use *use)
5635 {
5636 unsigned uid = use->id, cid;
5637 struct cost_pair *cp;
5638
5639 cp = ivs->cand_for_use[uid];
5640 if (!cp)
5641 return;
5642 cid = cp->cand->id;
5643
5644 ivs->bad_uses++;
5645 ivs->cand_for_use[uid] = NULL;
5646 ivs->n_cand_uses[cid]--;
5647
5648 if (ivs->n_cand_uses[cid] == 0)
5649 {
5650 bitmap_clear_bit (ivs->cands, cid);
5651 /* Do not count the pseudocandidates. */
5652 if (cp->cand->iv)
5653 ivs->n_regs--;
5654 ivs->n_cands--;
5655 ivs->cand_cost -= cp->cand->cost;
5656
5657 iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
5658 }
5659
5660 ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
5661
5662 iv_ca_set_remove_invariants (ivs, cp->depends_on);
5663
5664 if (cp->inv_expr_id != -1)
5665 {
5666 ivs->used_inv_expr[cp->inv_expr_id]--;
5667 if (ivs->used_inv_expr[cp->inv_expr_id] == 0)
5668 ivs->num_used_inv_expr--;
5669 }
5670 iv_ca_recount_cost (data, ivs);
5671 }
5672
5673 /* Add invariants in set INVS to set IVS. */
5674
5675 static void
5676 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
5677 {
5678 bitmap_iterator bi;
5679 unsigned iid;
5680
5681 if (!invs)
5682 return;
5683
5684 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5685 {
5686 ivs->n_invariant_uses[iid]++;
5687 if (ivs->n_invariant_uses[iid] == 1)
5688 ivs->n_regs++;
5689 }
5690 }
5691
5692 /* Set cost pair for USE in set IVS to CP. */
5693
5694 static void
5695 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
5696 struct iv_use *use, struct cost_pair *cp)
5697 {
5698 unsigned uid = use->id, cid;
5699
5700 if (ivs->cand_for_use[uid] == cp)
5701 return;
5702
5703 if (ivs->cand_for_use[uid])
5704 iv_ca_set_no_cp (data, ivs, use);
5705
5706 if (cp)
5707 {
5708 cid = cp->cand->id;
5709
5710 ivs->bad_uses--;
5711 ivs->cand_for_use[uid] = cp;
5712 ivs->n_cand_uses[cid]++;
5713 if (ivs->n_cand_uses[cid] == 1)
5714 {
5715 bitmap_set_bit (ivs->cands, cid);
5716 /* Do not count the pseudocandidates. */
5717 if (cp->cand->iv)
5718 ivs->n_regs++;
5719 ivs->n_cands++;
5720 ivs->cand_cost += cp->cand->cost;
5721
5722 iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
5723 }
5724
5725 ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
5726 iv_ca_set_add_invariants (ivs, cp->depends_on);
5727
5728 if (cp->inv_expr_id != -1)
5729 {
5730 ivs->used_inv_expr[cp->inv_expr_id]++;
5731 if (ivs->used_inv_expr[cp->inv_expr_id] == 1)
5732 ivs->num_used_inv_expr++;
5733 }
5734 iv_ca_recount_cost (data, ivs);
5735 }
5736 }
5737
5738 /* Extend set IVS by expressing USE by some of the candidates in it
5739 if possible. Consider all important candidates if candidates in
5740 set IVS don't give any result. */
5741
5742 static void
5743 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
5744 struct iv_use *use)
5745 {
5746 struct cost_pair *best_cp = NULL, *cp;
5747 bitmap_iterator bi;
5748 unsigned i;
5749 struct iv_cand *cand;
5750
5751 gcc_assert (ivs->upto >= use->id);
5752 ivs->upto++;
5753 ivs->bad_uses++;
5754
5755 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5756 {
5757 cand = iv_cand (data, i);
5758 cp = get_use_iv_cost (data, use, cand);
5759 if (cheaper_cost_pair (cp, best_cp))
5760 best_cp = cp;
5761 }
5762
5763 if (best_cp == NULL)
5764 {
5765 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5766 {
5767 cand = iv_cand (data, i);
5768 cp = get_use_iv_cost (data, use, cand);
5769 if (cheaper_cost_pair (cp, best_cp))
5770 best_cp = cp;
5771 }
5772 }
5773
5774 iv_ca_set_cp (data, ivs, use, best_cp);
5775 }
5776
5777 /* Get cost for assignment IVS. */
5778
5779 static comp_cost
5780 iv_ca_cost (struct iv_ca *ivs)
5781 {
5782 /* This was a conditional expression but it triggered a bug in
5783 Sun C 5.5. */
5784 if (ivs->bad_uses)
5785 return infinite_cost;
5786 else
5787 return ivs->cost;
5788 }
5789
5790 /* Returns true if all dependences of CP are among invariants in IVS. */
5791
5792 static bool
5793 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
5794 {
5795 unsigned i;
5796 bitmap_iterator bi;
5797
5798 if (!cp->depends_on)
5799 return true;
5800
5801 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
5802 {
5803 if (ivs->n_invariant_uses[i] == 0)
5804 return false;
5805 }
5806
5807 return true;
5808 }
5809
5810 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
5811 it before NEXT_CHANGE. */
5812
5813 static struct iv_ca_delta *
5814 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
5815 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
5816 {
5817 struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
5818
5819 change->use = use;
5820 change->old_cp = old_cp;
5821 change->new_cp = new_cp;
5822 change->next_change = next_change;
5823
5824 return change;
5825 }
5826
5827 /* Joins two lists of changes L1 and L2. Destructive -- old lists
5828 are rewritten. */
5829
5830 static struct iv_ca_delta *
5831 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
5832 {
5833 struct iv_ca_delta *last;
5834
5835 if (!l2)
5836 return l1;
5837
5838 if (!l1)
5839 return l2;
5840
5841 for (last = l1; last->next_change; last = last->next_change)
5842 continue;
5843 last->next_change = l2;
5844
5845 return l1;
5846 }
5847
5848 /* Reverse the list of changes DELTA, forming the inverse to it. */
5849
5850 static struct iv_ca_delta *
5851 iv_ca_delta_reverse (struct iv_ca_delta *delta)
5852 {
5853 struct iv_ca_delta *act, *next, *prev = NULL;
5854
5855 for (act = delta; act; act = next)
5856 {
5857 next = act->next_change;
5858 act->next_change = prev;
5859 prev = act;
5860
5861 std::swap (act->old_cp, act->new_cp);
5862 }
5863
5864 return prev;
5865 }
5866
5867 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
5868 reverted instead. */
5869
5870 static void
5871 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
5872 struct iv_ca_delta *delta, bool forward)
5873 {
5874 struct cost_pair *from, *to;
5875 struct iv_ca_delta *act;
5876
5877 if (!forward)
5878 delta = iv_ca_delta_reverse (delta);
5879
5880 for (act = delta; act; act = act->next_change)
5881 {
5882 from = act->old_cp;
5883 to = act->new_cp;
5884 gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
5885 iv_ca_set_cp (data, ivs, act->use, to);
5886 }
5887
5888 if (!forward)
5889 iv_ca_delta_reverse (delta);
5890 }
5891
5892 /* Returns true if CAND is used in IVS. */
5893
5894 static bool
5895 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
5896 {
5897 return ivs->n_cand_uses[cand->id] > 0;
5898 }
5899
5900 /* Returns number of induction variable candidates in the set IVS. */
5901
5902 static unsigned
5903 iv_ca_n_cands (struct iv_ca *ivs)
5904 {
5905 return ivs->n_cands;
5906 }
5907
5908 /* Free the list of changes DELTA. */
5909
5910 static void
5911 iv_ca_delta_free (struct iv_ca_delta **delta)
5912 {
5913 struct iv_ca_delta *act, *next;
5914
5915 for (act = *delta; act; act = next)
5916 {
5917 next = act->next_change;
5918 free (act);
5919 }
5920
5921 *delta = NULL;
5922 }
5923
5924 /* Allocates new iv candidates assignment. */
5925
5926 static struct iv_ca *
5927 iv_ca_new (struct ivopts_data *data)
5928 {
5929 struct iv_ca *nw = XNEW (struct iv_ca);
5930
5931 nw->upto = 0;
5932 nw->bad_uses = 0;
5933 nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
5934 nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
5935 nw->cands = BITMAP_ALLOC (NULL);
5936 nw->n_cands = 0;
5937 nw->n_regs = 0;
5938 nw->cand_use_cost = no_cost;
5939 nw->cand_cost = 0;
5940 nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
5941 nw->cost = no_cost;
5942 nw->used_inv_expr = XCNEWVEC (unsigned, data->inv_expr_id + 1);
5943 nw->num_used_inv_expr = 0;
5944
5945 return nw;
5946 }
5947
5948 /* Free memory occupied by the set IVS. */
5949
5950 static void
5951 iv_ca_free (struct iv_ca **ivs)
5952 {
5953 free ((*ivs)->cand_for_use);
5954 free ((*ivs)->n_cand_uses);
5955 BITMAP_FREE ((*ivs)->cands);
5956 free ((*ivs)->n_invariant_uses);
5957 free ((*ivs)->used_inv_expr);
5958 free (*ivs);
5959 *ivs = NULL;
5960 }
5961
5962 /* Dumps IVS to FILE. */
5963
5964 static void
5965 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
5966 {
5967 const char *pref = " invariants ";
5968 unsigned i;
5969 comp_cost cost = iv_ca_cost (ivs);
5970
5971 fprintf (file, " cost: %d (complexity %d)\n", cost.cost, cost.complexity);
5972 fprintf (file, " cand_cost: %d\n cand_use_cost: %d (complexity %d)\n",
5973 ivs->cand_cost, ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
5974 bitmap_print (file, ivs->cands, " candidates: ","\n");
5975
5976 for (i = 0; i < ivs->upto; i++)
5977 {
5978 struct iv_use *use = iv_use (data, i);
5979 struct cost_pair *cp = iv_ca_cand_for_use (ivs, use);
5980 if (cp)
5981 fprintf (file, " use:%d --> iv_cand:%d, cost=(%d,%d)\n",
5982 use->id, cp->cand->id, cp->cost.cost, cp->cost.complexity);
5983 else
5984 fprintf (file, " use:%d --> ??\n", use->id);
5985 }
5986
5987 for (i = 1; i <= data->max_inv_id; i++)
5988 if (ivs->n_invariant_uses[i])
5989 {
5990 fprintf (file, "%s%d", pref, i);
5991 pref = ", ";
5992 }
5993 fprintf (file, "\n\n");
5994 }
5995
5996 /* Try changing candidate in IVS to CAND for each use. Return cost of the
5997 new set, and store differences in DELTA. Number of induction variables
5998 in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
5999 the function will try to find a solution with mimimal iv candidates. */
6000
6001 static comp_cost
6002 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
6003 struct iv_cand *cand, struct iv_ca_delta **delta,
6004 unsigned *n_ivs, bool min_ncand)
6005 {
6006 unsigned i;
6007 comp_cost cost;
6008 struct iv_use *use;
6009 struct cost_pair *old_cp, *new_cp;
6010
6011 *delta = NULL;
6012 for (i = 0; i < ivs->upto; i++)
6013 {
6014 use = iv_use (data, i);
6015 old_cp = iv_ca_cand_for_use (ivs, use);
6016
6017 if (old_cp
6018 && old_cp->cand == cand)
6019 continue;
6020
6021 new_cp = get_use_iv_cost (data, use, cand);
6022 if (!new_cp)
6023 continue;
6024
6025 if (!min_ncand && !iv_ca_has_deps (ivs, new_cp))
6026 continue;
6027
6028 if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp))
6029 continue;
6030
6031 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
6032 }
6033
6034 iv_ca_delta_commit (data, ivs, *delta, true);
6035 cost = iv_ca_cost (ivs);
6036 if (n_ivs)
6037 *n_ivs = iv_ca_n_cands (ivs);
6038 iv_ca_delta_commit (data, ivs, *delta, false);
6039
6040 return cost;
6041 }
6042
6043 /* Try narrowing set IVS by removing CAND. Return the cost of
6044 the new set and store the differences in DELTA. START is
6045 the candidate with which we start narrowing. */
6046
6047 static comp_cost
6048 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
6049 struct iv_cand *cand, struct iv_cand *start,
6050 struct iv_ca_delta **delta)
6051 {
6052 unsigned i, ci;
6053 struct iv_use *use;
6054 struct cost_pair *old_cp, *new_cp, *cp;
6055 bitmap_iterator bi;
6056 struct iv_cand *cnd;
6057 comp_cost cost, best_cost, acost;
6058
6059 *delta = NULL;
6060 for (i = 0; i < n_iv_uses (data); i++)
6061 {
6062 use = iv_use (data, i);
6063
6064 old_cp = iv_ca_cand_for_use (ivs, use);
6065 if (old_cp->cand != cand)
6066 continue;
6067
6068 best_cost = iv_ca_cost (ivs);
6069 /* Start narrowing with START. */
6070 new_cp = get_use_iv_cost (data, use, start);
6071
6072 if (data->consider_all_candidates)
6073 {
6074 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
6075 {
6076 if (ci == cand->id || (start && ci == start->id))
6077 continue;
6078
6079 cnd = iv_cand (data, ci);
6080
6081 cp = get_use_iv_cost (data, use, cnd);
6082 if (!cp)
6083 continue;
6084
6085 iv_ca_set_cp (data, ivs, use, cp);
6086 acost = iv_ca_cost (ivs);
6087
6088 if (compare_costs (acost, best_cost) < 0)
6089 {
6090 best_cost = acost;
6091 new_cp = cp;
6092 }
6093 }
6094 }
6095 else
6096 {
6097 EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
6098 {
6099 if (ci == cand->id || (start && ci == start->id))
6100 continue;
6101
6102 cnd = iv_cand (data, ci);
6103
6104 cp = get_use_iv_cost (data, use, cnd);
6105 if (!cp)
6106 continue;
6107
6108 iv_ca_set_cp (data, ivs, use, cp);
6109 acost = iv_ca_cost (ivs);
6110
6111 if (compare_costs (acost, best_cost) < 0)
6112 {
6113 best_cost = acost;
6114 new_cp = cp;
6115 }
6116 }
6117 }
6118 /* Restore to old cp for use. */
6119 iv_ca_set_cp (data, ivs, use, old_cp);
6120
6121 if (!new_cp)
6122 {
6123 iv_ca_delta_free (delta);
6124 return infinite_cost;
6125 }
6126
6127 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
6128 }
6129
6130 iv_ca_delta_commit (data, ivs, *delta, true);
6131 cost = iv_ca_cost (ivs);
6132 iv_ca_delta_commit (data, ivs, *delta, false);
6133
6134 return cost;
6135 }
6136
6137 /* Try optimizing the set of candidates IVS by removing candidates different
6138 from to EXCEPT_CAND from it. Return cost of the new set, and store
6139 differences in DELTA. */
6140
6141 static comp_cost
6142 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
6143 struct iv_cand *except_cand, struct iv_ca_delta **delta)
6144 {
6145 bitmap_iterator bi;
6146 struct iv_ca_delta *act_delta, *best_delta;
6147 unsigned i;
6148 comp_cost best_cost, acost;
6149 struct iv_cand *cand;
6150
6151 best_delta = NULL;
6152 best_cost = iv_ca_cost (ivs);
6153
6154 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6155 {
6156 cand = iv_cand (data, i);
6157
6158 if (cand == except_cand)
6159 continue;
6160
6161 acost = iv_ca_narrow (data, ivs, cand, except_cand, &act_delta);
6162
6163 if (compare_costs (acost, best_cost) < 0)
6164 {
6165 best_cost = acost;
6166 iv_ca_delta_free (&best_delta);
6167 best_delta = act_delta;
6168 }
6169 else
6170 iv_ca_delta_free (&act_delta);
6171 }
6172
6173 if (!best_delta)
6174 {
6175 *delta = NULL;
6176 return best_cost;
6177 }
6178
6179 /* Recurse to possibly remove other unnecessary ivs. */
6180 iv_ca_delta_commit (data, ivs, best_delta, true);
6181 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
6182 iv_ca_delta_commit (data, ivs, best_delta, false);
6183 *delta = iv_ca_delta_join (best_delta, *delta);
6184 return best_cost;
6185 }
6186
6187 /* Check if CAND_IDX is a candidate other than OLD_CAND and has
6188 cheaper local cost for USE than BEST_CP. Return pointer to
6189 the corresponding cost_pair, otherwise just return BEST_CP. */
6190
6191 static struct cost_pair*
6192 cheaper_cost_with_cand (struct ivopts_data *data, struct iv_use *use,
6193 unsigned int cand_idx, struct iv_cand *old_cand,
6194 struct cost_pair *best_cp)
6195 {
6196 struct iv_cand *cand;
6197 struct cost_pair *cp;
6198
6199 gcc_assert (old_cand != NULL && best_cp != NULL);
6200 if (cand_idx == old_cand->id)
6201 return best_cp;
6202
6203 cand = iv_cand (data, cand_idx);
6204 cp = get_use_iv_cost (data, use, cand);
6205 if (cp != NULL && cheaper_cost_pair (cp, best_cp))
6206 return cp;
6207
6208 return best_cp;
6209 }
6210
6211 /* Try breaking local optimal fixed-point for IVS by replacing candidates
6212 which are used by more than one iv uses. For each of those candidates,
6213 this function tries to represent iv uses under that candidate using
6214 other ones with lower local cost, then tries to prune the new set.
6215 If the new set has lower cost, It returns the new cost after recording
6216 candidate replacement in list DELTA. */
6217
6218 static comp_cost
6219 iv_ca_replace (struct ivopts_data *data, struct iv_ca *ivs,
6220 struct iv_ca_delta **delta)
6221 {
6222 bitmap_iterator bi, bj;
6223 unsigned int i, j, k;
6224 struct iv_use *use;
6225 struct iv_cand *cand;
6226 comp_cost orig_cost, acost;
6227 struct iv_ca_delta *act_delta, *tmp_delta;
6228 struct cost_pair *old_cp, *best_cp = NULL;
6229
6230 *delta = NULL;
6231 orig_cost = iv_ca_cost (ivs);
6232
6233 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
6234 {
6235 if (ivs->n_cand_uses[i] == 1
6236 || ivs->n_cand_uses[i] > ALWAYS_PRUNE_CAND_SET_BOUND)
6237 continue;
6238
6239 cand = iv_cand (data, i);
6240
6241 act_delta = NULL;
6242 /* Represent uses under current candidate using other ones with
6243 lower local cost. */
6244 for (j = 0; j < ivs->upto; j++)
6245 {
6246 use = iv_use (data, j);
6247 old_cp = iv_ca_cand_for_use (ivs, use);
6248
6249 if (old_cp->cand != cand)
6250 continue;
6251
6252 best_cp = old_cp;
6253 if (data->consider_all_candidates)
6254 for (k = 0; k < n_iv_cands (data); k++)
6255 best_cp = cheaper_cost_with_cand (data, use, k,
6256 old_cp->cand, best_cp);
6257 else
6258 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, k, bj)
6259 best_cp = cheaper_cost_with_cand (data, use, k,
6260 old_cp->cand, best_cp);
6261
6262 if (best_cp == old_cp)
6263 continue;
6264
6265 act_delta = iv_ca_delta_add (use, old_cp, best_cp, act_delta);
6266 }
6267 /* No need for further prune. */
6268 if (!act_delta)
6269 continue;
6270
6271 /* Prune the new candidate set. */
6272 iv_ca_delta_commit (data, ivs, act_delta, true);
6273 acost = iv_ca_prune (data, ivs, NULL, &tmp_delta);
6274 iv_ca_delta_commit (data, ivs, act_delta, false);
6275 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6276
6277 if (compare_costs (acost, orig_cost) < 0)
6278 {
6279 *delta = act_delta;
6280 return acost;
6281 }
6282 else
6283 iv_ca_delta_free (&act_delta);
6284 }
6285
6286 return orig_cost;
6287 }
6288
6289 /* Tries to extend the sets IVS in the best possible way in order
6290 to express the USE. If ORIGINALP is true, prefer candidates from
6291 the original set of IVs, otherwise favor important candidates not
6292 based on any memory object. */
6293
6294 static bool
6295 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
6296 struct iv_use *use, bool originalp)
6297 {
6298 comp_cost best_cost, act_cost;
6299 unsigned i;
6300 bitmap_iterator bi;
6301 struct iv_cand *cand;
6302 struct iv_ca_delta *best_delta = NULL, *act_delta;
6303 struct cost_pair *cp;
6304
6305 iv_ca_add_use (data, ivs, use);
6306 best_cost = iv_ca_cost (ivs);
6307 cp = iv_ca_cand_for_use (ivs, use);
6308 if (cp)
6309 {
6310 best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
6311 iv_ca_set_no_cp (data, ivs, use);
6312 }
6313
6314 /* If ORIGINALP is true, try to find the original IV for the use. Otherwise
6315 first try important candidates not based on any memory object. Only if
6316 this fails, try the specific ones. Rationale -- in loops with many
6317 variables the best choice often is to use just one generic biv. If we
6318 added here many ivs specific to the uses, the optimization algorithm later
6319 would be likely to get stuck in a local minimum, thus causing us to create
6320 too many ivs. The approach from few ivs to more seems more likely to be
6321 successful -- starting from few ivs, replacing an expensive use by a
6322 specific iv should always be a win. */
6323 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
6324 {
6325 cand = iv_cand (data, i);
6326
6327 if (originalp && cand->pos !=IP_ORIGINAL)
6328 continue;
6329
6330 if (!originalp && cand->iv->base_object != NULL_TREE)
6331 continue;
6332
6333 if (iv_ca_cand_used_p (ivs, cand))
6334 continue;
6335
6336 cp = get_use_iv_cost (data, use, cand);
6337 if (!cp)
6338 continue;
6339
6340 iv_ca_set_cp (data, ivs, use, cp);
6341 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
6342 true);
6343 iv_ca_set_no_cp (data, ivs, use);
6344 act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
6345
6346 if (compare_costs (act_cost, best_cost) < 0)
6347 {
6348 best_cost = act_cost;
6349
6350 iv_ca_delta_free (&best_delta);
6351 best_delta = act_delta;
6352 }
6353 else
6354 iv_ca_delta_free (&act_delta);
6355 }
6356
6357 if (infinite_cost_p (best_cost))
6358 {
6359 for (i = 0; i < use->n_map_members; i++)
6360 {
6361 cp = use->cost_map + i;
6362 cand = cp->cand;
6363 if (!cand)
6364 continue;
6365
6366 /* Already tried this. */
6367 if (cand->important)
6368 {
6369 if (originalp && cand->pos == IP_ORIGINAL)
6370 continue;
6371 if (!originalp && cand->iv->base_object == NULL_TREE)
6372 continue;
6373 }
6374
6375 if (iv_ca_cand_used_p (ivs, cand))
6376 continue;
6377
6378 act_delta = NULL;
6379 iv_ca_set_cp (data, ivs, use, cp);
6380 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
6381 iv_ca_set_no_cp (data, ivs, use);
6382 act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
6383 cp, act_delta);
6384
6385 if (compare_costs (act_cost, best_cost) < 0)
6386 {
6387 best_cost = act_cost;
6388
6389 if (best_delta)
6390 iv_ca_delta_free (&best_delta);
6391 best_delta = act_delta;
6392 }
6393 else
6394 iv_ca_delta_free (&act_delta);
6395 }
6396 }
6397
6398 iv_ca_delta_commit (data, ivs, best_delta, true);
6399 iv_ca_delta_free (&best_delta);
6400
6401 return !infinite_cost_p (best_cost);
6402 }
6403
6404 /* Finds an initial assignment of candidates to uses. */
6405
6406 static struct iv_ca *
6407 get_initial_solution (struct ivopts_data *data, bool originalp)
6408 {
6409 struct iv_ca *ivs = iv_ca_new (data);
6410 unsigned i;
6411
6412 for (i = 0; i < n_iv_uses (data); i++)
6413 if (!try_add_cand_for (data, ivs, iv_use (data, i), originalp))
6414 {
6415 iv_ca_free (&ivs);
6416 return NULL;
6417 }
6418
6419 return ivs;
6420 }
6421
6422 /* Tries to improve set of induction variables IVS. TRY_REPLACE_P
6423 points to a bool variable, this function tries to break local
6424 optimal fixed-point by replacing candidates in IVS if it's true. */
6425
6426 static bool
6427 try_improve_iv_set (struct ivopts_data *data,
6428 struct iv_ca *ivs, bool *try_replace_p)
6429 {
6430 unsigned i, n_ivs;
6431 comp_cost acost, best_cost = iv_ca_cost (ivs);
6432 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
6433 struct iv_cand *cand;
6434
6435 /* Try extending the set of induction variables by one. */
6436 for (i = 0; i < n_iv_cands (data); i++)
6437 {
6438 cand = iv_cand (data, i);
6439
6440 if (iv_ca_cand_used_p (ivs, cand))
6441 continue;
6442
6443 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
6444 if (!act_delta)
6445 continue;
6446
6447 /* If we successfully added the candidate and the set is small enough,
6448 try optimizing it by removing other candidates. */
6449 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
6450 {
6451 iv_ca_delta_commit (data, ivs, act_delta, true);
6452 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
6453 iv_ca_delta_commit (data, ivs, act_delta, false);
6454 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6455 }
6456
6457 if (compare_costs (acost, best_cost) < 0)
6458 {
6459 best_cost = acost;
6460 iv_ca_delta_free (&best_delta);
6461 best_delta = act_delta;
6462 }
6463 else
6464 iv_ca_delta_free (&act_delta);
6465 }
6466
6467 if (!best_delta)
6468 {
6469 /* Try removing the candidates from the set instead. */
6470 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
6471
6472 if (!best_delta && *try_replace_p)
6473 {
6474 *try_replace_p = false;
6475 /* So far candidate selecting algorithm tends to choose fewer IVs
6476 so that it can handle cases in which loops have many variables
6477 but the best choice is often to use only one general biv. One
6478 weakness is it can't handle opposite cases, in which different
6479 candidates should be chosen with respect to each use. To solve
6480 the problem, we replace candidates in a manner described by the
6481 comments of iv_ca_replace, thus give general algorithm a chance
6482 to break local optimal fixed-point in these cases. */
6483 best_cost = iv_ca_replace (data, ivs, &best_delta);
6484 }
6485
6486 if (!best_delta)
6487 return false;
6488 }
6489
6490 iv_ca_delta_commit (data, ivs, best_delta, true);
6491 gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
6492 iv_ca_delta_free (&best_delta);
6493 return true;
6494 }
6495
6496 /* Attempts to find the optimal set of induction variables. We do simple
6497 greedy heuristic -- we try to replace at most one candidate in the selected
6498 solution and remove the unused ivs while this improves the cost. */
6499
6500 static struct iv_ca *
6501 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
6502 {
6503 struct iv_ca *set;
6504 bool try_replace_p = true;
6505
6506 /* Get the initial solution. */
6507 set = get_initial_solution (data, originalp);
6508 if (!set)
6509 {
6510 if (dump_file && (dump_flags & TDF_DETAILS))
6511 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
6512 return NULL;
6513 }
6514
6515 if (dump_file && (dump_flags & TDF_DETAILS))
6516 {
6517 fprintf (dump_file, "Initial set of candidates:\n");
6518 iv_ca_dump (data, dump_file, set);
6519 }
6520
6521 while (try_improve_iv_set (data, set, &try_replace_p))
6522 {
6523 if (dump_file && (dump_flags & TDF_DETAILS))
6524 {
6525 fprintf (dump_file, "Improved to:\n");
6526 iv_ca_dump (data, dump_file, set);
6527 }
6528 }
6529
6530 return set;
6531 }
6532
6533 static struct iv_ca *
6534 find_optimal_iv_set (struct ivopts_data *data)
6535 {
6536 unsigned i;
6537 struct iv_ca *set, *origset;
6538 struct iv_use *use;
6539 comp_cost cost, origcost;
6540
6541 /* Determine the cost based on a strategy that starts with original IVs,
6542 and try again using a strategy that prefers candidates not based
6543 on any IVs. */
6544 origset = find_optimal_iv_set_1 (data, true);
6545 set = find_optimal_iv_set_1 (data, false);
6546
6547 if (!origset && !set)
6548 return NULL;
6549
6550 origcost = origset ? iv_ca_cost (origset) : infinite_cost;
6551 cost = set ? iv_ca_cost (set) : infinite_cost;
6552
6553 if (dump_file && (dump_flags & TDF_DETAILS))
6554 {
6555 fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
6556 origcost.cost, origcost.complexity);
6557 fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
6558 cost.cost, cost.complexity);
6559 }
6560
6561 /* Choose the one with the best cost. */
6562 if (compare_costs (origcost, cost) <= 0)
6563 {
6564 if (set)
6565 iv_ca_free (&set);
6566 set = origset;
6567 }
6568 else if (origset)
6569 iv_ca_free (&origset);
6570
6571 for (i = 0; i < n_iv_uses (data); i++)
6572 {
6573 use = iv_use (data, i);
6574 use->selected = iv_ca_cand_for_use (set, use)->cand;
6575 }
6576
6577 return set;
6578 }
6579
6580 /* Creates a new induction variable corresponding to CAND. */
6581
6582 static void
6583 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
6584 {
6585 gimple_stmt_iterator incr_pos;
6586 tree base;
6587 bool after = false;
6588
6589 if (!cand->iv)
6590 return;
6591
6592 switch (cand->pos)
6593 {
6594 case IP_NORMAL:
6595 incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
6596 break;
6597
6598 case IP_END:
6599 incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
6600 after = true;
6601 break;
6602
6603 case IP_AFTER_USE:
6604 after = true;
6605 /* fall through */
6606 case IP_BEFORE_USE:
6607 incr_pos = gsi_for_stmt (cand->incremented_at);
6608 break;
6609
6610 case IP_ORIGINAL:
6611 /* Mark that the iv is preserved. */
6612 name_info (data, cand->var_before)->preserve_biv = true;
6613 name_info (data, cand->var_after)->preserve_biv = true;
6614
6615 /* Rewrite the increment so that it uses var_before directly. */
6616 find_interesting_uses_op (data, cand->var_after)->selected = cand;
6617 return;
6618 }
6619
6620 gimple_add_tmp_var (cand->var_before);
6621
6622 base = unshare_expr (cand->iv->base);
6623
6624 create_iv (base, unshare_expr (cand->iv->step),
6625 cand->var_before, data->current_loop,
6626 &incr_pos, after, &cand->var_before, &cand->var_after);
6627 }
6628
6629 /* Creates new induction variables described in SET. */
6630
6631 static void
6632 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
6633 {
6634 unsigned i;
6635 struct iv_cand *cand;
6636 bitmap_iterator bi;
6637
6638 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6639 {
6640 cand = iv_cand (data, i);
6641 create_new_iv (data, cand);
6642 }
6643
6644 if (dump_file && (dump_flags & TDF_DETAILS))
6645 {
6646 fprintf (dump_file, "Selected IV set for loop %d",
6647 data->current_loop->num);
6648 if (data->loop_loc != UNKNOWN_LOCATION)
6649 fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
6650 LOCATION_LINE (data->loop_loc));
6651 fprintf (dump_file, ", %lu IVs:\n", bitmap_count_bits (set->cands));
6652 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6653 {
6654 cand = iv_cand (data, i);
6655 dump_cand (dump_file, cand);
6656 }
6657 fprintf (dump_file, "\n");
6658 }
6659 }
6660
6661 /* Rewrites USE (definition of iv used in a nonlinear expression)
6662 using candidate CAND. */
6663
6664 static void
6665 rewrite_use_nonlinear_expr (struct ivopts_data *data,
6666 struct iv_use *use, struct iv_cand *cand)
6667 {
6668 tree comp;
6669 tree op, tgt;
6670 gassign *ass;
6671 gimple_stmt_iterator bsi;
6672
6673 /* An important special case -- if we are asked to express value of
6674 the original iv by itself, just exit; there is no need to
6675 introduce a new computation (that might also need casting the
6676 variable to unsigned and back). */
6677 if (cand->pos == IP_ORIGINAL
6678 && cand->incremented_at == use->stmt)
6679 {
6680 enum tree_code stmt_code;
6681
6682 gcc_assert (is_gimple_assign (use->stmt));
6683 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
6684
6685 /* Check whether we may leave the computation unchanged.
6686 This is the case only if it does not rely on other
6687 computations in the loop -- otherwise, the computation
6688 we rely upon may be removed in remove_unused_ivs,
6689 thus leading to ICE. */
6690 stmt_code = gimple_assign_rhs_code (use->stmt);
6691 if (stmt_code == PLUS_EXPR
6692 || stmt_code == MINUS_EXPR
6693 || stmt_code == POINTER_PLUS_EXPR)
6694 {
6695 if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
6696 op = gimple_assign_rhs2 (use->stmt);
6697 else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
6698 op = gimple_assign_rhs1 (use->stmt);
6699 else
6700 op = NULL_TREE;
6701 }
6702 else
6703 op = NULL_TREE;
6704
6705 if (op && expr_invariant_in_loop_p (data->current_loop, op))
6706 return;
6707 }
6708
6709 comp = get_computation (data->current_loop, use, cand);
6710 gcc_assert (comp != NULL_TREE);
6711
6712 switch (gimple_code (use->stmt))
6713 {
6714 case GIMPLE_PHI:
6715 tgt = PHI_RESULT (use->stmt);
6716
6717 /* If we should keep the biv, do not replace it. */
6718 if (name_info (data, tgt)->preserve_biv)
6719 return;
6720
6721 bsi = gsi_after_labels (gimple_bb (use->stmt));
6722 break;
6723
6724 case GIMPLE_ASSIGN:
6725 tgt = gimple_assign_lhs (use->stmt);
6726 bsi = gsi_for_stmt (use->stmt);
6727 break;
6728
6729 default:
6730 gcc_unreachable ();
6731 }
6732
6733 if (!valid_gimple_rhs_p (comp)
6734 || (gimple_code (use->stmt) != GIMPLE_PHI
6735 /* We can't allow re-allocating the stmt as it might be pointed
6736 to still. */
6737 && (get_gimple_rhs_num_ops (TREE_CODE (comp))
6738 >= gimple_num_ops (gsi_stmt (bsi)))))
6739 {
6740 comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE,
6741 true, GSI_SAME_STMT);
6742 if (POINTER_TYPE_P (TREE_TYPE (tgt)))
6743 {
6744 duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
6745 /* As this isn't a plain copy we have to reset alignment
6746 information. */
6747 if (SSA_NAME_PTR_INFO (comp))
6748 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
6749 }
6750 }
6751
6752 if (gimple_code (use->stmt) == GIMPLE_PHI)
6753 {
6754 ass = gimple_build_assign (tgt, comp);
6755 gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
6756
6757 bsi = gsi_for_stmt (use->stmt);
6758 remove_phi_node (&bsi, false);
6759 }
6760 else
6761 {
6762 gimple_assign_set_rhs_from_tree (&bsi, comp);
6763 use->stmt = gsi_stmt (bsi);
6764 }
6765 }
6766
6767 /* Performs a peephole optimization to reorder the iv update statement with
6768 a mem ref to enable instruction combining in later phases. The mem ref uses
6769 the iv value before the update, so the reordering transformation requires
6770 adjustment of the offset. CAND is the selected IV_CAND.
6771
6772 Example:
6773
6774 t = MEM_REF (base, iv1, 8, 16); // base, index, stride, offset
6775 iv2 = iv1 + 1;
6776
6777 if (t < val) (1)
6778 goto L;
6779 goto Head;
6780
6781
6782 directly propagating t over to (1) will introduce overlapping live range
6783 thus increase register pressure. This peephole transform it into:
6784
6785
6786 iv2 = iv1 + 1;
6787 t = MEM_REF (base, iv2, 8, 8);
6788 if (t < val)
6789 goto L;
6790 goto Head;
6791 */
6792
6793 static void
6794 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
6795 {
6796 tree var_after;
6797 gimple iv_update, stmt;
6798 basic_block bb;
6799 gimple_stmt_iterator gsi, gsi_iv;
6800
6801 if (cand->pos != IP_NORMAL)
6802 return;
6803
6804 var_after = cand->var_after;
6805 iv_update = SSA_NAME_DEF_STMT (var_after);
6806
6807 bb = gimple_bb (iv_update);
6808 gsi = gsi_last_nondebug_bb (bb);
6809 stmt = gsi_stmt (gsi);
6810
6811 /* Only handle conditional statement for now. */
6812 if (gimple_code (stmt) != GIMPLE_COND)
6813 return;
6814
6815 gsi_prev_nondebug (&gsi);
6816 stmt = gsi_stmt (gsi);
6817 if (stmt != iv_update)
6818 return;
6819
6820 gsi_prev_nondebug (&gsi);
6821 if (gsi_end_p (gsi))
6822 return;
6823
6824 stmt = gsi_stmt (gsi);
6825 if (gimple_code (stmt) != GIMPLE_ASSIGN)
6826 return;
6827
6828 if (stmt != use->stmt)
6829 return;
6830
6831 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
6832 return;
6833
6834 if (dump_file && (dump_flags & TDF_DETAILS))
6835 {
6836 fprintf (dump_file, "Reordering \n");
6837 print_gimple_stmt (dump_file, iv_update, 0, 0);
6838 print_gimple_stmt (dump_file, use->stmt, 0, 0);
6839 fprintf (dump_file, "\n");
6840 }
6841
6842 gsi = gsi_for_stmt (use->stmt);
6843 gsi_iv = gsi_for_stmt (iv_update);
6844 gsi_move_before (&gsi_iv, &gsi);
6845
6846 cand->pos = IP_BEFORE_USE;
6847 cand->incremented_at = use->stmt;
6848 }
6849
6850 /* Rewrites USE (address that is an iv) using candidate CAND. */
6851
6852 static void
6853 rewrite_use_address_1 (struct ivopts_data *data,
6854 struct iv_use *use, struct iv_cand *cand)
6855 {
6856 aff_tree aff;
6857 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6858 tree base_hint = NULL_TREE;
6859 tree ref, iv;
6860 bool ok;
6861
6862 adjust_iv_update_pos (cand, use);
6863 ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
6864 gcc_assert (ok);
6865 unshare_aff_combination (&aff);
6866
6867 /* To avoid undefined overflow problems, all IV candidates use unsigned
6868 integer types. The drawback is that this makes it impossible for
6869 create_mem_ref to distinguish an IV that is based on a memory object
6870 from one that represents simply an offset.
6871
6872 To work around this problem, we pass a hint to create_mem_ref that
6873 indicates which variable (if any) in aff is an IV based on a memory
6874 object. Note that we only consider the candidate. If this is not
6875 based on an object, the base of the reference is in some subexpression
6876 of the use -- but these will use pointer types, so they are recognized
6877 by the create_mem_ref heuristics anyway. */
6878 if (cand->iv->base_object)
6879 base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
6880
6881 iv = var_at_stmt (data->current_loop, cand, use->stmt);
6882 ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff,
6883 reference_alias_ptr_type (*use->op_p),
6884 iv, base_hint, data->speed);
6885 copy_ref_info (ref, *use->op_p);
6886 *use->op_p = ref;
6887 }
6888
6889 /* Rewrites USE (address that is an iv) using candidate CAND. If it's the
6890 first use of a group, rewrites sub uses in the group too. */
6891
6892 static void
6893 rewrite_use_address (struct ivopts_data *data,
6894 struct iv_use *use, struct iv_cand *cand)
6895 {
6896 struct iv_use *next;
6897
6898 gcc_assert (use->sub_id == 0);
6899 rewrite_use_address_1 (data, use, cand);
6900 update_stmt (use->stmt);
6901
6902 for (next = use->next; next != NULL; next = next->next)
6903 {
6904 rewrite_use_address_1 (data, next, cand);
6905 update_stmt (next->stmt);
6906 }
6907
6908 return;
6909 }
6910
6911 /* Rewrites USE (the condition such that one of the arguments is an iv) using
6912 candidate CAND. */
6913
6914 static void
6915 rewrite_use_compare (struct ivopts_data *data,
6916 struct iv_use *use, struct iv_cand *cand)
6917 {
6918 tree comp, *var_p, op, bound;
6919 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6920 enum tree_code compare;
6921 struct cost_pair *cp = get_use_iv_cost (data, use, cand);
6922 bool ok;
6923
6924 bound = cp->value;
6925 if (bound)
6926 {
6927 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
6928 tree var_type = TREE_TYPE (var);
6929 gimple_seq stmts;
6930
6931 if (dump_file && (dump_flags & TDF_DETAILS))
6932 {
6933 fprintf (dump_file, "Replacing exit test: ");
6934 print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
6935 }
6936 compare = cp->comp;
6937 bound = unshare_expr (fold_convert (var_type, bound));
6938 op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
6939 if (stmts)
6940 gsi_insert_seq_on_edge_immediate (
6941 loop_preheader_edge (data->current_loop),
6942 stmts);
6943
6944 gcond *cond_stmt = as_a <gcond *> (use->stmt);
6945 gimple_cond_set_lhs (cond_stmt, var);
6946 gimple_cond_set_code (cond_stmt, compare);
6947 gimple_cond_set_rhs (cond_stmt, op);
6948 return;
6949 }
6950
6951 /* The induction variable elimination failed; just express the original
6952 giv. */
6953 comp = get_computation (data->current_loop, use, cand);
6954 gcc_assert (comp != NULL_TREE);
6955
6956 ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
6957 gcc_assert (ok);
6958
6959 *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
6960 true, GSI_SAME_STMT);
6961 }
6962
6963 /* Rewrites USE using candidate CAND. */
6964
6965 static void
6966 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
6967 {
6968 switch (use->type)
6969 {
6970 case USE_NONLINEAR_EXPR:
6971 rewrite_use_nonlinear_expr (data, use, cand);
6972 break;
6973
6974 case USE_ADDRESS:
6975 rewrite_use_address (data, use, cand);
6976 break;
6977
6978 case USE_COMPARE:
6979 rewrite_use_compare (data, use, cand);
6980 break;
6981
6982 default:
6983 gcc_unreachable ();
6984 }
6985
6986 update_stmt (use->stmt);
6987 }
6988
6989 /* Rewrite the uses using the selected induction variables. */
6990
6991 static void
6992 rewrite_uses (struct ivopts_data *data)
6993 {
6994 unsigned i;
6995 struct iv_cand *cand;
6996 struct iv_use *use;
6997
6998 for (i = 0; i < n_iv_uses (data); i++)
6999 {
7000 use = iv_use (data, i);
7001 cand = use->selected;
7002 gcc_assert (cand);
7003
7004 rewrite_use (data, use, cand);
7005 }
7006 }
7007
7008 /* Removes the ivs that are not used after rewriting. */
7009
7010 static void
7011 remove_unused_ivs (struct ivopts_data *data)
7012 {
7013 unsigned j;
7014 bitmap_iterator bi;
7015 bitmap toremove = BITMAP_ALLOC (NULL);
7016
7017 /* Figure out an order in which to release SSA DEFs so that we don't
7018 release something that we'd have to propagate into a debug stmt
7019 afterwards. */
7020 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
7021 {
7022 struct version_info *info;
7023
7024 info = ver_info (data, j);
7025 if (info->iv
7026 && !integer_zerop (info->iv->step)
7027 && !info->inv_id
7028 && !info->iv->have_use_for
7029 && !info->preserve_biv)
7030 {
7031 bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
7032
7033 tree def = info->iv->ssa_name;
7034
7035 if (MAY_HAVE_DEBUG_STMTS && SSA_NAME_DEF_STMT (def))
7036 {
7037 imm_use_iterator imm_iter;
7038 use_operand_p use_p;
7039 gimple stmt;
7040 int count = 0;
7041
7042 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7043 {
7044 if (!gimple_debug_bind_p (stmt))
7045 continue;
7046
7047 /* We just want to determine whether to do nothing
7048 (count == 0), to substitute the computed
7049 expression into a single use of the SSA DEF by
7050 itself (count == 1), or to use a debug temp
7051 because the SSA DEF is used multiple times or as
7052 part of a larger expression (count > 1). */
7053 count++;
7054 if (gimple_debug_bind_get_value (stmt) != def)
7055 count++;
7056
7057 if (count > 1)
7058 BREAK_FROM_IMM_USE_STMT (imm_iter);
7059 }
7060
7061 if (!count)
7062 continue;
7063
7064 struct iv_use dummy_use;
7065 struct iv_cand *best_cand = NULL, *cand;
7066 unsigned i, best_pref = 0, cand_pref;
7067
7068 memset (&dummy_use, 0, sizeof (dummy_use));
7069 dummy_use.iv = info->iv;
7070 for (i = 0; i < n_iv_uses (data) && i < 64; i++)
7071 {
7072 cand = iv_use (data, i)->selected;
7073 if (cand == best_cand)
7074 continue;
7075 cand_pref = operand_equal_p (cand->iv->step,
7076 info->iv->step, 0)
7077 ? 4 : 0;
7078 cand_pref
7079 += TYPE_MODE (TREE_TYPE (cand->iv->base))
7080 == TYPE_MODE (TREE_TYPE (info->iv->base))
7081 ? 2 : 0;
7082 cand_pref
7083 += TREE_CODE (cand->iv->base) == INTEGER_CST
7084 ? 1 : 0;
7085 if (best_cand == NULL || best_pref < cand_pref)
7086 {
7087 best_cand = cand;
7088 best_pref = cand_pref;
7089 }
7090 }
7091
7092 if (!best_cand)
7093 continue;
7094
7095 tree comp = get_computation_at (data->current_loop,
7096 &dummy_use, best_cand,
7097 SSA_NAME_DEF_STMT (def));
7098 if (!comp)
7099 continue;
7100
7101 if (count > 1)
7102 {
7103 tree vexpr = make_node (DEBUG_EXPR_DECL);
7104 DECL_ARTIFICIAL (vexpr) = 1;
7105 TREE_TYPE (vexpr) = TREE_TYPE (comp);
7106 if (SSA_NAME_VAR (def))
7107 DECL_MODE (vexpr) = DECL_MODE (SSA_NAME_VAR (def));
7108 else
7109 DECL_MODE (vexpr) = TYPE_MODE (TREE_TYPE (vexpr));
7110 gdebug *def_temp
7111 = gimple_build_debug_bind (vexpr, comp, NULL);
7112 gimple_stmt_iterator gsi;
7113
7114 if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
7115 gsi = gsi_after_labels (gimple_bb
7116 (SSA_NAME_DEF_STMT (def)));
7117 else
7118 gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
7119
7120 gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
7121 comp = vexpr;
7122 }
7123
7124 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
7125 {
7126 if (!gimple_debug_bind_p (stmt))
7127 continue;
7128
7129 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
7130 SET_USE (use_p, comp);
7131
7132 update_stmt (stmt);
7133 }
7134 }
7135 }
7136 }
7137
7138 release_defs_bitset (toremove);
7139
7140 BITMAP_FREE (toremove);
7141 }
7142
7143 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
7144 for hash_map::traverse. */
7145
7146 bool
7147 free_tree_niter_desc (edge const &, tree_niter_desc *const &value, void *)
7148 {
7149 free (value);
7150 return true;
7151 }
7152
7153 /* Frees data allocated by the optimization of a single loop. */
7154
7155 static void
7156 free_loop_data (struct ivopts_data *data)
7157 {
7158 unsigned i, j;
7159 bitmap_iterator bi;
7160 tree obj;
7161
7162 if (data->niters)
7163 {
7164 data->niters->traverse<void *, free_tree_niter_desc> (NULL);
7165 delete data->niters;
7166 data->niters = NULL;
7167 }
7168
7169 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
7170 {
7171 struct version_info *info;
7172
7173 info = ver_info (data, i);
7174 info->iv = NULL;
7175 info->has_nonlin_use = false;
7176 info->preserve_biv = false;
7177 info->inv_id = 0;
7178 }
7179 bitmap_clear (data->relevant);
7180 bitmap_clear (data->important_candidates);
7181
7182 for (i = 0; i < n_iv_uses (data); i++)
7183 {
7184 struct iv_use *use = iv_use (data, i);
7185 struct iv_use *pre = use, *sub = use->next;
7186
7187 while (sub)
7188 {
7189 gcc_assert (sub->related_cands == NULL);
7190 gcc_assert (sub->n_map_members == 0 && sub->cost_map == NULL);
7191
7192 pre = sub;
7193 sub = sub->next;
7194 free (pre);
7195 }
7196
7197 BITMAP_FREE (use->related_cands);
7198 for (j = 0; j < use->n_map_members; j++)
7199 if (use->cost_map[j].depends_on)
7200 BITMAP_FREE (use->cost_map[j].depends_on);
7201 free (use->cost_map);
7202 free (use);
7203 }
7204 data->iv_uses.truncate (0);
7205
7206 for (i = 0; i < n_iv_cands (data); i++)
7207 {
7208 struct iv_cand *cand = iv_cand (data, i);
7209
7210 if (cand->depends_on)
7211 BITMAP_FREE (cand->depends_on);
7212 free (cand);
7213 }
7214 data->iv_candidates.truncate (0);
7215
7216 if (data->version_info_size < num_ssa_names)
7217 {
7218 data->version_info_size = 2 * num_ssa_names;
7219 free (data->version_info);
7220 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
7221 }
7222
7223 data->max_inv_id = 0;
7224
7225 FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
7226 SET_DECL_RTL (obj, NULL_RTX);
7227
7228 decl_rtl_to_reset.truncate (0);
7229
7230 data->inv_expr_tab->empty ();
7231 data->inv_expr_id = 0;
7232 }
7233
7234 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
7235 loop tree. */
7236
7237 static void
7238 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
7239 {
7240 free_loop_data (data);
7241 free (data->version_info);
7242 BITMAP_FREE (data->relevant);
7243 BITMAP_FREE (data->important_candidates);
7244
7245 decl_rtl_to_reset.release ();
7246 data->iv_uses.release ();
7247 data->iv_candidates.release ();
7248 delete data->inv_expr_tab;
7249 data->inv_expr_tab = NULL;
7250 free_affine_expand_cache (&data->name_expansion_cache);
7251 obstack_free (&data->iv_obstack, NULL);
7252 }
7253
7254 /* Returns true if the loop body BODY includes any function calls. */
7255
7256 static bool
7257 loop_body_includes_call (basic_block *body, unsigned num_nodes)
7258 {
7259 gimple_stmt_iterator gsi;
7260 unsigned i;
7261
7262 for (i = 0; i < num_nodes; i++)
7263 for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
7264 {
7265 gimple stmt = gsi_stmt (gsi);
7266 if (is_gimple_call (stmt)
7267 && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
7268 return true;
7269 }
7270 return false;
7271 }
7272
7273 /* Optimizes the LOOP. Returns true if anything changed. */
7274
7275 static bool
7276 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
7277 {
7278 bool changed = false;
7279 struct iv_ca *iv_ca;
7280 edge exit = single_dom_exit (loop);
7281 basic_block *body;
7282
7283 gcc_assert (!data->niters);
7284 data->current_loop = loop;
7285 data->loop_loc = find_loop_location (loop);
7286 data->speed = optimize_loop_for_speed_p (loop);
7287
7288 if (dump_file && (dump_flags & TDF_DETAILS))
7289 {
7290 fprintf (dump_file, "Processing loop %d", loop->num);
7291 if (data->loop_loc != UNKNOWN_LOCATION)
7292 fprintf (dump_file, " at %s:%d", LOCATION_FILE (data->loop_loc),
7293 LOCATION_LINE (data->loop_loc));
7294 fprintf (dump_file, "\n");
7295
7296 if (exit)
7297 {
7298 fprintf (dump_file, " single exit %d -> %d, exit condition ",
7299 exit->src->index, exit->dest->index);
7300 print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
7301 fprintf (dump_file, "\n");
7302 }
7303
7304 fprintf (dump_file, "\n");
7305 }
7306
7307 body = get_loop_body (loop);
7308 data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
7309 renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
7310 free (body);
7311
7312 data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
7313
7314 /* For each ssa name determines whether it behaves as an induction variable
7315 in some loop. */
7316 if (!find_induction_variables (data))
7317 goto finish;
7318
7319 /* Finds interesting uses (item 1). */
7320 find_interesting_uses (data);
7321 group_address_uses (data);
7322 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
7323 goto finish;
7324
7325 /* Finds candidates for the induction variables (item 2). */
7326 find_iv_candidates (data);
7327
7328 /* Calculates the costs (item 3, part 1). */
7329 determine_iv_costs (data);
7330 determine_use_iv_costs (data);
7331 determine_set_costs (data);
7332
7333 /* Find the optimal set of induction variables (item 3, part 2). */
7334 iv_ca = find_optimal_iv_set (data);
7335 if (!iv_ca)
7336 goto finish;
7337 changed = true;
7338
7339 /* Create the new induction variables (item 4, part 1). */
7340 create_new_ivs (data, iv_ca);
7341 iv_ca_free (&iv_ca);
7342
7343 /* Rewrite the uses (item 4, part 2). */
7344 rewrite_uses (data);
7345
7346 /* Remove the ivs that are unused after rewriting. */
7347 remove_unused_ivs (data);
7348
7349 /* We have changed the structure of induction variables; it might happen
7350 that definitions in the scev database refer to some of them that were
7351 eliminated. */
7352 scev_reset ();
7353
7354 finish:
7355 free_loop_data (data);
7356
7357 return changed;
7358 }
7359
7360 /* Main entry point. Optimizes induction variables in loops. */
7361
7362 void
7363 tree_ssa_iv_optimize (void)
7364 {
7365 struct loop *loop;
7366 struct ivopts_data data;
7367
7368 tree_ssa_iv_optimize_init (&data);
7369
7370 /* Optimize the loops starting with the innermost ones. */
7371 FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
7372 {
7373 if (dump_file && (dump_flags & TDF_DETAILS))
7374 flow_loop_dump (loop, dump_file, NULL, 1);
7375
7376 tree_ssa_iv_optimize_loop (&data, loop);
7377 }
7378
7379 tree_ssa_iv_optimize_finalize (&data);
7380 }