]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/tree-ssa-loop-ivopts.c
tree-ssa-loop-ivopts.c (get_address_cost): Try aligned offset.
[thirdparty/gcc.git] / gcc / tree-ssa-loop-ivopts.c
1 /* Induction variable optimizations.
2 Copyright (C) 2003-2014 Free Software Foundation, Inc.
3
4 This file is part of GCC.
5
6 GCC is free software; you can redistribute it and/or modify it
7 under the terms of the GNU General Public License as published by the
8 Free Software Foundation; either version 3, or (at your option) any
9 later version.
10
11 GCC is distributed in the hope that it will be useful, but WITHOUT
12 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GCC; see the file COPYING3. If not see
18 <http://www.gnu.org/licenses/>. */
19
20 /* This pass tries to find the optimal set of induction variables for the loop.
21 It optimizes just the basic linear induction variables (although adding
22 support for other types should not be too hard). It includes the
23 optimizations commonly known as strength reduction, induction variable
24 coalescing and induction variable elimination. It does it in the
25 following steps:
26
27 1) The interesting uses of induction variables are found. This includes
28
29 -- uses of induction variables in non-linear expressions
30 -- addresses of arrays
31 -- comparisons of induction variables
32
33 2) Candidates for the induction variables are found. This includes
34
35 -- old induction variables
36 -- the variables defined by expressions derived from the "interesting
37 uses" above
38
39 3) The optimal (w.r. to a cost function) set of variables is chosen. The
40 cost function assigns a cost to sets of induction variables and consists
41 of three parts:
42
43 -- The use costs. Each of the interesting uses chooses the best induction
44 variable in the set and adds its cost to the sum. The cost reflects
45 the time spent on modifying the induction variables value to be usable
46 for the given purpose (adding base and offset for arrays, etc.).
47 -- The variable costs. Each of the variables has a cost assigned that
48 reflects the costs associated with incrementing the value of the
49 variable. The original variables are somewhat preferred.
50 -- The set cost. Depending on the size of the set, extra cost may be
51 added to reflect register pressure.
52
53 All the costs are defined in a machine-specific way, using the target
54 hooks and machine descriptions to determine them.
55
56 4) The trees are transformed to use the new variables, the dead code is
57 removed.
58
59 All of this is done loop by loop. Doing it globally is theoretically
60 possible, it might give a better performance and it might enable us
61 to decide costs more precisely, but getting all the interactions right
62 would be complicated. */
63
64 #include "config.h"
65 #include "system.h"
66 #include "coretypes.h"
67 #include "tm.h"
68 #include "tree.h"
69 #include "stor-layout.h"
70 #include "tm_p.h"
71 #include "basic-block.h"
72 #include "gimple-pretty-print.h"
73 #include "hash-map.h"
74 #include "hash-table.h"
75 #include "tree-ssa-alias.h"
76 #include "internal-fn.h"
77 #include "tree-eh.h"
78 #include "gimple-expr.h"
79 #include "is-a.h"
80 #include "gimple.h"
81 #include "gimplify.h"
82 #include "gimple-iterator.h"
83 #include "gimplify-me.h"
84 #include "gimple-ssa.h"
85 #include "cgraph.h"
86 #include "tree-cfg.h"
87 #include "tree-phinodes.h"
88 #include "ssa-iterators.h"
89 #include "stringpool.h"
90 #include "tree-ssanames.h"
91 #include "tree-ssa-loop-ivopts.h"
92 #include "tree-ssa-loop-manip.h"
93 #include "tree-ssa-loop-niter.h"
94 #include "tree-ssa-loop.h"
95 #include "expr.h"
96 #include "tree-dfa.h"
97 #include "tree-ssa.h"
98 #include "cfgloop.h"
99 #include "tree-pass.h"
100 #include "insn-config.h"
101 #include "tree-chrec.h"
102 #include "tree-scalar-evolution.h"
103 #include "cfgloop.h"
104 #include "params.h"
105 #include "langhooks.h"
106 #include "tree-affine.h"
107 #include "target.h"
108 #include "tree-inline.h"
109 #include "tree-ssa-propagate.h"
110 #include "expmed.h"
111 #include "tree-ssa-address.h"
112 #include "builtins.h"
113
114 /* FIXME: Expressions are expanded to RTL in this pass to determine the
115 cost of different addressing modes. This should be moved to a TBD
116 interface between the GIMPLE and RTL worlds. */
117 #include "expr.h"
118 #include "recog.h"
119
120 /* The infinite cost. */
121 #define INFTY 10000000
122
123 #define AVG_LOOP_NITER(LOOP) 5
124
125 /* Returns the expected number of loop iterations for LOOP.
126 The average trip count is computed from profile data if it
127 exists. */
128
129 static inline HOST_WIDE_INT
130 avg_loop_niter (struct loop *loop)
131 {
132 HOST_WIDE_INT niter = estimated_stmt_executions_int (loop);
133 if (niter == -1)
134 return AVG_LOOP_NITER (loop);
135
136 return niter;
137 }
138
139 /* Representation of the induction variable. */
140 struct iv
141 {
142 tree base; /* Initial value of the iv. */
143 tree base_object; /* A memory object to that the induction variable points. */
144 tree step; /* Step of the iv (constant only). */
145 tree ssa_name; /* The ssa name with the value. */
146 bool biv_p; /* Is it a biv? */
147 bool have_use_for; /* Do we already have a use for it? */
148 unsigned use_id; /* The identifier in the use if it is the case. */
149 };
150
151 /* Per-ssa version information (induction variable descriptions, etc.). */
152 struct version_info
153 {
154 tree name; /* The ssa name. */
155 struct iv *iv; /* Induction variable description. */
156 bool has_nonlin_use; /* For a loop-level invariant, whether it is used in
157 an expression that is not an induction variable. */
158 bool preserve_biv; /* For the original biv, whether to preserve it. */
159 unsigned inv_id; /* Id of an invariant. */
160 };
161
162 /* Types of uses. */
163 enum use_type
164 {
165 USE_NONLINEAR_EXPR, /* Use in a nonlinear expression. */
166 USE_ADDRESS, /* Use in an address. */
167 USE_COMPARE /* Use is a compare. */
168 };
169
170 /* Cost of a computation. */
171 typedef struct
172 {
173 int cost; /* The runtime cost. */
174 unsigned complexity; /* The estimate of the complexity of the code for
175 the computation (in no concrete units --
176 complexity field should be larger for more
177 complex expressions and addressing modes). */
178 } comp_cost;
179
180 static const comp_cost no_cost = {0, 0};
181 static const comp_cost infinite_cost = {INFTY, INFTY};
182
183 /* The candidate - cost pair. */
184 struct cost_pair
185 {
186 struct iv_cand *cand; /* The candidate. */
187 comp_cost cost; /* The cost. */
188 bitmap depends_on; /* The list of invariants that have to be
189 preserved. */
190 tree value; /* For final value elimination, the expression for
191 the final value of the iv. For iv elimination,
192 the new bound to compare with. */
193 enum tree_code comp; /* For iv elimination, the comparison. */
194 int inv_expr_id; /* Loop invariant expression id. */
195 };
196
197 /* Use. */
198 struct iv_use
199 {
200 unsigned id; /* The id of the use. */
201 enum use_type type; /* Type of the use. */
202 struct iv *iv; /* The induction variable it is based on. */
203 gimple stmt; /* Statement in that it occurs. */
204 tree *op_p; /* The place where it occurs. */
205 bitmap related_cands; /* The set of "related" iv candidates, plus the common
206 important ones. */
207
208 unsigned n_map_members; /* Number of candidates in the cost_map list. */
209 struct cost_pair *cost_map;
210 /* The costs wrto the iv candidates. */
211
212 struct iv_cand *selected;
213 /* The selected candidate. */
214 };
215
216 /* The position where the iv is computed. */
217 enum iv_position
218 {
219 IP_NORMAL, /* At the end, just before the exit condition. */
220 IP_END, /* At the end of the latch block. */
221 IP_BEFORE_USE, /* Immediately before a specific use. */
222 IP_AFTER_USE, /* Immediately after a specific use. */
223 IP_ORIGINAL /* The original biv. */
224 };
225
226 /* The induction variable candidate. */
227 struct iv_cand
228 {
229 unsigned id; /* The number of the candidate. */
230 bool important; /* Whether this is an "important" candidate, i.e. such
231 that it should be considered by all uses. */
232 ENUM_BITFIELD(iv_position) pos : 8; /* Where it is computed. */
233 gimple incremented_at;/* For original biv, the statement where it is
234 incremented. */
235 tree var_before; /* The variable used for it before increment. */
236 tree var_after; /* The variable used for it after increment. */
237 struct iv *iv; /* The value of the candidate. NULL for
238 "pseudocandidate" used to indicate the possibility
239 to replace the final value of an iv by direct
240 computation of the value. */
241 unsigned cost; /* Cost of the candidate. */
242 unsigned cost_step; /* Cost of the candidate's increment operation. */
243 struct iv_use *ainc_use; /* For IP_{BEFORE,AFTER}_USE candidates, the place
244 where it is incremented. */
245 bitmap depends_on; /* The list of invariants that are used in step of the
246 biv. */
247 };
248
249 /* Loop invariant expression hashtable entry. */
250 struct iv_inv_expr_ent
251 {
252 tree expr;
253 int id;
254 hashval_t hash;
255 };
256
257 /* The data used by the induction variable optimizations. */
258
259 typedef struct iv_use *iv_use_p;
260
261 typedef struct iv_cand *iv_cand_p;
262
263 /* Hashtable helpers. */
264
265 struct iv_inv_expr_hasher : typed_free_remove <iv_inv_expr_ent>
266 {
267 typedef iv_inv_expr_ent value_type;
268 typedef iv_inv_expr_ent compare_type;
269 static inline hashval_t hash (const value_type *);
270 static inline bool equal (const value_type *, const compare_type *);
271 };
272
273 /* Hash function for loop invariant expressions. */
274
275 inline hashval_t
276 iv_inv_expr_hasher::hash (const value_type *expr)
277 {
278 return expr->hash;
279 }
280
281 /* Hash table equality function for expressions. */
282
283 inline bool
284 iv_inv_expr_hasher::equal (const value_type *expr1, const compare_type *expr2)
285 {
286 return expr1->hash == expr2->hash
287 && operand_equal_p (expr1->expr, expr2->expr, 0);
288 }
289
290 struct ivopts_data
291 {
292 /* The currently optimized loop. */
293 struct loop *current_loop;
294
295 /* Numbers of iterations for all exits of the current loop. */
296 hash_map<edge, tree_niter_desc *> *niters;
297
298 /* Number of registers used in it. */
299 unsigned regs_used;
300
301 /* The size of version_info array allocated. */
302 unsigned version_info_size;
303
304 /* The array of information for the ssa names. */
305 struct version_info *version_info;
306
307 /* The hashtable of loop invariant expressions created
308 by ivopt. */
309 hash_table<iv_inv_expr_hasher> *inv_expr_tab;
310
311 /* Loop invariant expression id. */
312 int inv_expr_id;
313
314 /* The bitmap of indices in version_info whose value was changed. */
315 bitmap relevant;
316
317 /* The uses of induction variables. */
318 vec<iv_use_p> iv_uses;
319
320 /* The candidates. */
321 vec<iv_cand_p> iv_candidates;
322
323 /* A bitmap of important candidates. */
324 bitmap important_candidates;
325
326 /* The maximum invariant id. */
327 unsigned max_inv_id;
328
329 /* Whether to consider just related and important candidates when replacing a
330 use. */
331 bool consider_all_candidates;
332
333 /* Are we optimizing for speed? */
334 bool speed;
335
336 /* Whether the loop body includes any function calls. */
337 bool body_includes_call;
338
339 /* Whether the loop body can only be exited via single exit. */
340 bool loop_single_exit_p;
341 };
342
343 /* An assignment of iv candidates to uses. */
344
345 struct iv_ca
346 {
347 /* The number of uses covered by the assignment. */
348 unsigned upto;
349
350 /* Number of uses that cannot be expressed by the candidates in the set. */
351 unsigned bad_uses;
352
353 /* Candidate assigned to a use, together with the related costs. */
354 struct cost_pair **cand_for_use;
355
356 /* Number of times each candidate is used. */
357 unsigned *n_cand_uses;
358
359 /* The candidates used. */
360 bitmap cands;
361
362 /* The number of candidates in the set. */
363 unsigned n_cands;
364
365 /* Total number of registers needed. */
366 unsigned n_regs;
367
368 /* Total cost of expressing uses. */
369 comp_cost cand_use_cost;
370
371 /* Total cost of candidates. */
372 unsigned cand_cost;
373
374 /* Number of times each invariant is used. */
375 unsigned *n_invariant_uses;
376
377 /* The array holding the number of uses of each loop
378 invariant expressions created by ivopt. */
379 unsigned *used_inv_expr;
380
381 /* The number of created loop invariants. */
382 unsigned num_used_inv_expr;
383
384 /* Total cost of the assignment. */
385 comp_cost cost;
386 };
387
388 /* Difference of two iv candidate assignments. */
389
390 struct iv_ca_delta
391 {
392 /* Changed use. */
393 struct iv_use *use;
394
395 /* An old assignment (for rollback purposes). */
396 struct cost_pair *old_cp;
397
398 /* A new assignment. */
399 struct cost_pair *new_cp;
400
401 /* Next change in the list. */
402 struct iv_ca_delta *next_change;
403 };
404
405 /* Bound on number of candidates below that all candidates are considered. */
406
407 #define CONSIDER_ALL_CANDIDATES_BOUND \
408 ((unsigned) PARAM_VALUE (PARAM_IV_CONSIDER_ALL_CANDIDATES_BOUND))
409
410 /* If there are more iv occurrences, we just give up (it is quite unlikely that
411 optimizing such a loop would help, and it would take ages). */
412
413 #define MAX_CONSIDERED_USES \
414 ((unsigned) PARAM_VALUE (PARAM_IV_MAX_CONSIDERED_USES))
415
416 /* If there are at most this number of ivs in the set, try removing unnecessary
417 ivs from the set always. */
418
419 #define ALWAYS_PRUNE_CAND_SET_BOUND \
420 ((unsigned) PARAM_VALUE (PARAM_IV_ALWAYS_PRUNE_CAND_SET_BOUND))
421
422 /* The list of trees for that the decl_rtl field must be reset is stored
423 here. */
424
425 static vec<tree> decl_rtl_to_reset;
426
427 static comp_cost force_expr_to_var_cost (tree, bool);
428
429 /* Number of uses recorded in DATA. */
430
431 static inline unsigned
432 n_iv_uses (struct ivopts_data *data)
433 {
434 return data->iv_uses.length ();
435 }
436
437 /* Ith use recorded in DATA. */
438
439 static inline struct iv_use *
440 iv_use (struct ivopts_data *data, unsigned i)
441 {
442 return data->iv_uses[i];
443 }
444
445 /* Number of candidates recorded in DATA. */
446
447 static inline unsigned
448 n_iv_cands (struct ivopts_data *data)
449 {
450 return data->iv_candidates.length ();
451 }
452
453 /* Ith candidate recorded in DATA. */
454
455 static inline struct iv_cand *
456 iv_cand (struct ivopts_data *data, unsigned i)
457 {
458 return data->iv_candidates[i];
459 }
460
461 /* The single loop exit if it dominates the latch, NULL otherwise. */
462
463 edge
464 single_dom_exit (struct loop *loop)
465 {
466 edge exit = single_exit (loop);
467
468 if (!exit)
469 return NULL;
470
471 if (!just_once_each_iteration_p (loop, exit->src))
472 return NULL;
473
474 return exit;
475 }
476
477 /* Dumps information about the induction variable IV to FILE. */
478
479 void
480 dump_iv (FILE *file, struct iv *iv)
481 {
482 if (iv->ssa_name)
483 {
484 fprintf (file, "ssa name ");
485 print_generic_expr (file, iv->ssa_name, TDF_SLIM);
486 fprintf (file, "\n");
487 }
488
489 fprintf (file, " type ");
490 print_generic_expr (file, TREE_TYPE (iv->base), TDF_SLIM);
491 fprintf (file, "\n");
492
493 if (iv->step)
494 {
495 fprintf (file, " base ");
496 print_generic_expr (file, iv->base, TDF_SLIM);
497 fprintf (file, "\n");
498
499 fprintf (file, " step ");
500 print_generic_expr (file, iv->step, TDF_SLIM);
501 fprintf (file, "\n");
502 }
503 else
504 {
505 fprintf (file, " invariant ");
506 print_generic_expr (file, iv->base, TDF_SLIM);
507 fprintf (file, "\n");
508 }
509
510 if (iv->base_object)
511 {
512 fprintf (file, " base object ");
513 print_generic_expr (file, iv->base_object, TDF_SLIM);
514 fprintf (file, "\n");
515 }
516
517 if (iv->biv_p)
518 fprintf (file, " is a biv\n");
519 }
520
521 /* Dumps information about the USE to FILE. */
522
523 void
524 dump_use (FILE *file, struct iv_use *use)
525 {
526 fprintf (file, "use %d\n", use->id);
527
528 switch (use->type)
529 {
530 case USE_NONLINEAR_EXPR:
531 fprintf (file, " generic\n");
532 break;
533
534 case USE_ADDRESS:
535 fprintf (file, " address\n");
536 break;
537
538 case USE_COMPARE:
539 fprintf (file, " compare\n");
540 break;
541
542 default:
543 gcc_unreachable ();
544 }
545
546 fprintf (file, " in statement ");
547 print_gimple_stmt (file, use->stmt, 0, 0);
548 fprintf (file, "\n");
549
550 fprintf (file, " at position ");
551 if (use->op_p)
552 print_generic_expr (file, *use->op_p, TDF_SLIM);
553 fprintf (file, "\n");
554
555 dump_iv (file, use->iv);
556
557 if (use->related_cands)
558 {
559 fprintf (file, " related candidates ");
560 dump_bitmap (file, use->related_cands);
561 }
562 }
563
564 /* Dumps information about the uses to FILE. */
565
566 void
567 dump_uses (FILE *file, struct ivopts_data *data)
568 {
569 unsigned i;
570 struct iv_use *use;
571
572 for (i = 0; i < n_iv_uses (data); i++)
573 {
574 use = iv_use (data, i);
575
576 dump_use (file, use);
577 fprintf (file, "\n");
578 }
579 }
580
581 /* Dumps information about induction variable candidate CAND to FILE. */
582
583 void
584 dump_cand (FILE *file, struct iv_cand *cand)
585 {
586 struct iv *iv = cand->iv;
587
588 fprintf (file, "candidate %d%s\n",
589 cand->id, cand->important ? " (important)" : "");
590
591 if (cand->depends_on)
592 {
593 fprintf (file, " depends on ");
594 dump_bitmap (file, cand->depends_on);
595 }
596
597 if (!iv)
598 {
599 fprintf (file, " final value replacement\n");
600 return;
601 }
602
603 if (cand->var_before)
604 {
605 fprintf (file, " var_before ");
606 print_generic_expr (file, cand->var_before, TDF_SLIM);
607 fprintf (file, "\n");
608 }
609 if (cand->var_after)
610 {
611 fprintf (file, " var_after ");
612 print_generic_expr (file, cand->var_after, TDF_SLIM);
613 fprintf (file, "\n");
614 }
615
616 switch (cand->pos)
617 {
618 case IP_NORMAL:
619 fprintf (file, " incremented before exit test\n");
620 break;
621
622 case IP_BEFORE_USE:
623 fprintf (file, " incremented before use %d\n", cand->ainc_use->id);
624 break;
625
626 case IP_AFTER_USE:
627 fprintf (file, " incremented after use %d\n", cand->ainc_use->id);
628 break;
629
630 case IP_END:
631 fprintf (file, " incremented at end\n");
632 break;
633
634 case IP_ORIGINAL:
635 fprintf (file, " original biv\n");
636 break;
637 }
638
639 dump_iv (file, iv);
640 }
641
642 /* Returns the info for ssa version VER. */
643
644 static inline struct version_info *
645 ver_info (struct ivopts_data *data, unsigned ver)
646 {
647 return data->version_info + ver;
648 }
649
650 /* Returns the info for ssa name NAME. */
651
652 static inline struct version_info *
653 name_info (struct ivopts_data *data, tree name)
654 {
655 return ver_info (data, SSA_NAME_VERSION (name));
656 }
657
658 /* Returns true if STMT is after the place where the IP_NORMAL ivs will be
659 emitted in LOOP. */
660
661 static bool
662 stmt_after_ip_normal_pos (struct loop *loop, gimple stmt)
663 {
664 basic_block bb = ip_normal_pos (loop), sbb = gimple_bb (stmt);
665
666 gcc_assert (bb);
667
668 if (sbb == loop->latch)
669 return true;
670
671 if (sbb != bb)
672 return false;
673
674 return stmt == last_stmt (bb);
675 }
676
677 /* Returns true if STMT if after the place where the original induction
678 variable CAND is incremented. If TRUE_IF_EQUAL is set, we return true
679 if the positions are identical. */
680
681 static bool
682 stmt_after_inc_pos (struct iv_cand *cand, gimple stmt, bool true_if_equal)
683 {
684 basic_block cand_bb = gimple_bb (cand->incremented_at);
685 basic_block stmt_bb = gimple_bb (stmt);
686
687 if (!dominated_by_p (CDI_DOMINATORS, stmt_bb, cand_bb))
688 return false;
689
690 if (stmt_bb != cand_bb)
691 return true;
692
693 if (true_if_equal
694 && gimple_uid (stmt) == gimple_uid (cand->incremented_at))
695 return true;
696 return gimple_uid (stmt) > gimple_uid (cand->incremented_at);
697 }
698
699 /* Returns true if STMT if after the place where the induction variable
700 CAND is incremented in LOOP. */
701
702 static bool
703 stmt_after_increment (struct loop *loop, struct iv_cand *cand, gimple stmt)
704 {
705 switch (cand->pos)
706 {
707 case IP_END:
708 return false;
709
710 case IP_NORMAL:
711 return stmt_after_ip_normal_pos (loop, stmt);
712
713 case IP_ORIGINAL:
714 case IP_AFTER_USE:
715 return stmt_after_inc_pos (cand, stmt, false);
716
717 case IP_BEFORE_USE:
718 return stmt_after_inc_pos (cand, stmt, true);
719
720 default:
721 gcc_unreachable ();
722 }
723 }
724
725 /* Returns true if EXP is a ssa name that occurs in an abnormal phi node. */
726
727 static bool
728 abnormal_ssa_name_p (tree exp)
729 {
730 if (!exp)
731 return false;
732
733 if (TREE_CODE (exp) != SSA_NAME)
734 return false;
735
736 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (exp) != 0;
737 }
738
739 /* Returns false if BASE or INDEX contains a ssa name that occurs in an
740 abnormal phi node. Callback for for_each_index. */
741
742 static bool
743 idx_contains_abnormal_ssa_name_p (tree base, tree *index,
744 void *data ATTRIBUTE_UNUSED)
745 {
746 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
747 {
748 if (abnormal_ssa_name_p (TREE_OPERAND (base, 2)))
749 return false;
750 if (abnormal_ssa_name_p (TREE_OPERAND (base, 3)))
751 return false;
752 }
753
754 return !abnormal_ssa_name_p (*index);
755 }
756
757 /* Returns true if EXPR contains a ssa name that occurs in an
758 abnormal phi node. */
759
760 bool
761 contains_abnormal_ssa_name_p (tree expr)
762 {
763 enum tree_code code;
764 enum tree_code_class codeclass;
765
766 if (!expr)
767 return false;
768
769 code = TREE_CODE (expr);
770 codeclass = TREE_CODE_CLASS (code);
771
772 if (code == SSA_NAME)
773 return SSA_NAME_OCCURS_IN_ABNORMAL_PHI (expr) != 0;
774
775 if (code == INTEGER_CST
776 || is_gimple_min_invariant (expr))
777 return false;
778
779 if (code == ADDR_EXPR)
780 return !for_each_index (&TREE_OPERAND (expr, 0),
781 idx_contains_abnormal_ssa_name_p,
782 NULL);
783
784 if (code == COND_EXPR)
785 return contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0))
786 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1))
787 || contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 2));
788
789 switch (codeclass)
790 {
791 case tcc_binary:
792 case tcc_comparison:
793 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 1)))
794 return true;
795
796 /* Fallthru. */
797 case tcc_unary:
798 if (contains_abnormal_ssa_name_p (TREE_OPERAND (expr, 0)))
799 return true;
800
801 break;
802
803 default:
804 gcc_unreachable ();
805 }
806
807 return false;
808 }
809
810 /* Returns the structure describing number of iterations determined from
811 EXIT of DATA->current_loop, or NULL if something goes wrong. */
812
813 static struct tree_niter_desc *
814 niter_for_exit (struct ivopts_data *data, edge exit)
815 {
816 struct tree_niter_desc *desc;
817 tree_niter_desc **slot;
818
819 if (!data->niters)
820 {
821 data->niters = new hash_map<edge, tree_niter_desc *>;
822 slot = NULL;
823 }
824 else
825 slot = data->niters->get (exit);
826
827 if (!slot)
828 {
829 /* Try to determine number of iterations. We cannot safely work with ssa
830 names that appear in phi nodes on abnormal edges, so that we do not
831 create overlapping life ranges for them (PR 27283). */
832 desc = XNEW (struct tree_niter_desc);
833 if (!number_of_iterations_exit (data->current_loop,
834 exit, desc, true)
835 || contains_abnormal_ssa_name_p (desc->niter))
836 {
837 XDELETE (desc);
838 desc = NULL;
839 }
840 data->niters->put (exit, desc);
841 }
842 else
843 desc = *slot;
844
845 return desc;
846 }
847
848 /* Returns the structure describing number of iterations determined from
849 single dominating exit of DATA->current_loop, or NULL if something
850 goes wrong. */
851
852 static struct tree_niter_desc *
853 niter_for_single_dom_exit (struct ivopts_data *data)
854 {
855 edge exit = single_dom_exit (data->current_loop);
856
857 if (!exit)
858 return NULL;
859
860 return niter_for_exit (data, exit);
861 }
862
863 /* Initializes data structures used by the iv optimization pass, stored
864 in DATA. */
865
866 static void
867 tree_ssa_iv_optimize_init (struct ivopts_data *data)
868 {
869 data->version_info_size = 2 * num_ssa_names;
870 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
871 data->relevant = BITMAP_ALLOC (NULL);
872 data->important_candidates = BITMAP_ALLOC (NULL);
873 data->max_inv_id = 0;
874 data->niters = NULL;
875 data->iv_uses.create (20);
876 data->iv_candidates.create (20);
877 data->inv_expr_tab = new hash_table<iv_inv_expr_hasher> (10);
878 data->inv_expr_id = 0;
879 decl_rtl_to_reset.create (20);
880 }
881
882 /* Returns a memory object to that EXPR points. In case we are able to
883 determine that it does not point to any such object, NULL is returned. */
884
885 static tree
886 determine_base_object (tree expr)
887 {
888 enum tree_code code = TREE_CODE (expr);
889 tree base, obj;
890
891 /* If this is a pointer casted to any type, we need to determine
892 the base object for the pointer; so handle conversions before
893 throwing away non-pointer expressions. */
894 if (CONVERT_EXPR_P (expr))
895 return determine_base_object (TREE_OPERAND (expr, 0));
896
897 if (!POINTER_TYPE_P (TREE_TYPE (expr)))
898 return NULL_TREE;
899
900 switch (code)
901 {
902 case INTEGER_CST:
903 return NULL_TREE;
904
905 case ADDR_EXPR:
906 obj = TREE_OPERAND (expr, 0);
907 base = get_base_address (obj);
908
909 if (!base)
910 return expr;
911
912 if (TREE_CODE (base) == MEM_REF)
913 return determine_base_object (TREE_OPERAND (base, 0));
914
915 return fold_convert (ptr_type_node,
916 build_fold_addr_expr (base));
917
918 case POINTER_PLUS_EXPR:
919 return determine_base_object (TREE_OPERAND (expr, 0));
920
921 case PLUS_EXPR:
922 case MINUS_EXPR:
923 /* Pointer addition is done solely using POINTER_PLUS_EXPR. */
924 gcc_unreachable ();
925
926 default:
927 return fold_convert (ptr_type_node, expr);
928 }
929 }
930
931 /* Return true if address expression with non-DECL_P operand appears
932 in EXPR. */
933
934 static bool
935 contain_complex_addr_expr (tree expr)
936 {
937 bool res = false;
938
939 STRIP_NOPS (expr);
940 switch (TREE_CODE (expr))
941 {
942 case POINTER_PLUS_EXPR:
943 case PLUS_EXPR:
944 case MINUS_EXPR:
945 res |= contain_complex_addr_expr (TREE_OPERAND (expr, 0));
946 res |= contain_complex_addr_expr (TREE_OPERAND (expr, 1));
947 break;
948
949 case ADDR_EXPR:
950 return (!DECL_P (TREE_OPERAND (expr, 0)));
951
952 default:
953 return false;
954 }
955
956 return res;
957 }
958
959 /* Allocates an induction variable with given initial value BASE and step STEP
960 for loop LOOP. */
961
962 static struct iv *
963 alloc_iv (tree base, tree step)
964 {
965 tree expr = base;
966 struct iv *iv = XCNEW (struct iv);
967 gcc_assert (step != NULL_TREE);
968
969 /* Lower address expression in base except ones with DECL_P as operand.
970 By doing this:
971 1) More accurate cost can be computed for address expressions;
972 2) Duplicate candidates won't be created for bases in different
973 forms, like &a[0] and &a. */
974 STRIP_NOPS (expr);
975 if ((TREE_CODE (expr) == ADDR_EXPR && !DECL_P (TREE_OPERAND (expr, 0)))
976 || contain_complex_addr_expr (expr))
977 {
978 aff_tree comb;
979 tree_to_aff_combination (expr, TREE_TYPE (base), &comb);
980 base = fold_convert (TREE_TYPE (base), aff_combination_to_tree (&comb));
981 }
982
983 iv->base = base;
984 iv->base_object = determine_base_object (base);
985 iv->step = step;
986 iv->biv_p = false;
987 iv->have_use_for = false;
988 iv->use_id = 0;
989 iv->ssa_name = NULL_TREE;
990
991 return iv;
992 }
993
994 /* Sets STEP and BASE for induction variable IV. */
995
996 static void
997 set_iv (struct ivopts_data *data, tree iv, tree base, tree step)
998 {
999 struct version_info *info = name_info (data, iv);
1000
1001 gcc_assert (!info->iv);
1002
1003 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (iv));
1004 info->iv = alloc_iv (base, step);
1005 info->iv->ssa_name = iv;
1006 }
1007
1008 /* Finds induction variable declaration for VAR. */
1009
1010 static struct iv *
1011 get_iv (struct ivopts_data *data, tree var)
1012 {
1013 basic_block bb;
1014 tree type = TREE_TYPE (var);
1015
1016 if (!POINTER_TYPE_P (type)
1017 && !INTEGRAL_TYPE_P (type))
1018 return NULL;
1019
1020 if (!name_info (data, var)->iv)
1021 {
1022 bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1023
1024 if (!bb
1025 || !flow_bb_inside_loop_p (data->current_loop, bb))
1026 set_iv (data, var, var, build_int_cst (type, 0));
1027 }
1028
1029 return name_info (data, var)->iv;
1030 }
1031
1032 /* Determines the step of a biv defined in PHI. Returns NULL if PHI does
1033 not define a simple affine biv with nonzero step. */
1034
1035 static tree
1036 determine_biv_step (gimple phi)
1037 {
1038 struct loop *loop = gimple_bb (phi)->loop_father;
1039 tree name = PHI_RESULT (phi);
1040 affine_iv iv;
1041
1042 if (virtual_operand_p (name))
1043 return NULL_TREE;
1044
1045 if (!simple_iv (loop, loop, name, &iv, true))
1046 return NULL_TREE;
1047
1048 return integer_zerop (iv.step) ? NULL_TREE : iv.step;
1049 }
1050
1051 /* Finds basic ivs. */
1052
1053 static bool
1054 find_bivs (struct ivopts_data *data)
1055 {
1056 gimple phi;
1057 tree step, type, base;
1058 bool found = false;
1059 struct loop *loop = data->current_loop;
1060 gimple_stmt_iterator psi;
1061
1062 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1063 {
1064 phi = gsi_stmt (psi);
1065
1066 if (SSA_NAME_OCCURS_IN_ABNORMAL_PHI (PHI_RESULT (phi)))
1067 continue;
1068
1069 step = determine_biv_step (phi);
1070 if (!step)
1071 continue;
1072
1073 base = PHI_ARG_DEF_FROM_EDGE (phi, loop_preheader_edge (loop));
1074 base = expand_simple_operations (base);
1075 if (contains_abnormal_ssa_name_p (base)
1076 || contains_abnormal_ssa_name_p (step))
1077 continue;
1078
1079 type = TREE_TYPE (PHI_RESULT (phi));
1080 base = fold_convert (type, base);
1081 if (step)
1082 {
1083 if (POINTER_TYPE_P (type))
1084 step = convert_to_ptrofftype (step);
1085 else
1086 step = fold_convert (type, step);
1087 }
1088
1089 set_iv (data, PHI_RESULT (phi), base, step);
1090 found = true;
1091 }
1092
1093 return found;
1094 }
1095
1096 /* Marks basic ivs. */
1097
1098 static void
1099 mark_bivs (struct ivopts_data *data)
1100 {
1101 gimple phi, def;
1102 tree var;
1103 struct iv *iv, *incr_iv;
1104 struct loop *loop = data->current_loop;
1105 basic_block incr_bb;
1106 gimple_stmt_iterator psi;
1107
1108 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
1109 {
1110 phi = gsi_stmt (psi);
1111
1112 iv = get_iv (data, PHI_RESULT (phi));
1113 if (!iv)
1114 continue;
1115
1116 var = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (loop));
1117 def = SSA_NAME_DEF_STMT (var);
1118 /* Don't mark iv peeled from other one as biv. */
1119 if (def
1120 && gimple_code (def) == GIMPLE_PHI
1121 && gimple_bb (def) == loop->header)
1122 continue;
1123
1124 incr_iv = get_iv (data, var);
1125 if (!incr_iv)
1126 continue;
1127
1128 /* If the increment is in the subloop, ignore it. */
1129 incr_bb = gimple_bb (SSA_NAME_DEF_STMT (var));
1130 if (incr_bb->loop_father != data->current_loop
1131 || (incr_bb->flags & BB_IRREDUCIBLE_LOOP))
1132 continue;
1133
1134 iv->biv_p = true;
1135 incr_iv->biv_p = true;
1136 }
1137 }
1138
1139 /* Checks whether STMT defines a linear induction variable and stores its
1140 parameters to IV. */
1141
1142 static bool
1143 find_givs_in_stmt_scev (struct ivopts_data *data, gimple stmt, affine_iv *iv)
1144 {
1145 tree lhs;
1146 struct loop *loop = data->current_loop;
1147
1148 iv->base = NULL_TREE;
1149 iv->step = NULL_TREE;
1150
1151 if (gimple_code (stmt) != GIMPLE_ASSIGN)
1152 return false;
1153
1154 lhs = gimple_assign_lhs (stmt);
1155 if (TREE_CODE (lhs) != SSA_NAME)
1156 return false;
1157
1158 if (!simple_iv (loop, loop_containing_stmt (stmt), lhs, iv, true))
1159 return false;
1160 iv->base = expand_simple_operations (iv->base);
1161
1162 if (contains_abnormal_ssa_name_p (iv->base)
1163 || contains_abnormal_ssa_name_p (iv->step))
1164 return false;
1165
1166 /* If STMT could throw, then do not consider STMT as defining a GIV.
1167 While this will suppress optimizations, we can not safely delete this
1168 GIV and associated statements, even if it appears it is not used. */
1169 if (stmt_could_throw_p (stmt))
1170 return false;
1171
1172 return true;
1173 }
1174
1175 /* Finds general ivs in statement STMT. */
1176
1177 static void
1178 find_givs_in_stmt (struct ivopts_data *data, gimple stmt)
1179 {
1180 affine_iv iv;
1181
1182 if (!find_givs_in_stmt_scev (data, stmt, &iv))
1183 return;
1184
1185 set_iv (data, gimple_assign_lhs (stmt), iv.base, iv.step);
1186 }
1187
1188 /* Finds general ivs in basic block BB. */
1189
1190 static void
1191 find_givs_in_bb (struct ivopts_data *data, basic_block bb)
1192 {
1193 gimple_stmt_iterator bsi;
1194
1195 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
1196 find_givs_in_stmt (data, gsi_stmt (bsi));
1197 }
1198
1199 /* Finds general ivs. */
1200
1201 static void
1202 find_givs (struct ivopts_data *data)
1203 {
1204 struct loop *loop = data->current_loop;
1205 basic_block *body = get_loop_body_in_dom_order (loop);
1206 unsigned i;
1207
1208 for (i = 0; i < loop->num_nodes; i++)
1209 find_givs_in_bb (data, body[i]);
1210 free (body);
1211 }
1212
1213 /* For each ssa name defined in LOOP determines whether it is an induction
1214 variable and if so, its initial value and step. */
1215
1216 static bool
1217 find_induction_variables (struct ivopts_data *data)
1218 {
1219 unsigned i;
1220 bitmap_iterator bi;
1221
1222 if (!find_bivs (data))
1223 return false;
1224
1225 find_givs (data);
1226 mark_bivs (data);
1227
1228 if (dump_file && (dump_flags & TDF_DETAILS))
1229 {
1230 struct tree_niter_desc *niter = niter_for_single_dom_exit (data);
1231
1232 if (niter)
1233 {
1234 fprintf (dump_file, " number of iterations ");
1235 print_generic_expr (dump_file, niter->niter, TDF_SLIM);
1236 if (!integer_zerop (niter->may_be_zero))
1237 {
1238 fprintf (dump_file, "; zero if ");
1239 print_generic_expr (dump_file, niter->may_be_zero, TDF_SLIM);
1240 }
1241 fprintf (dump_file, "\n\n");
1242 };
1243
1244 fprintf (dump_file, "Induction variables:\n\n");
1245
1246 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
1247 {
1248 if (ver_info (data, i)->iv)
1249 dump_iv (dump_file, ver_info (data, i)->iv);
1250 }
1251 }
1252
1253 return true;
1254 }
1255
1256 /* Records a use of type USE_TYPE at *USE_P in STMT whose value is IV. */
1257
1258 static struct iv_use *
1259 record_use (struct ivopts_data *data, tree *use_p, struct iv *iv,
1260 gimple stmt, enum use_type use_type)
1261 {
1262 struct iv_use *use = XCNEW (struct iv_use);
1263
1264 use->id = n_iv_uses (data);
1265 use->type = use_type;
1266 use->iv = iv;
1267 use->stmt = stmt;
1268 use->op_p = use_p;
1269 use->related_cands = BITMAP_ALLOC (NULL);
1270
1271 /* To avoid showing ssa name in the dumps, if it was not reset by the
1272 caller. */
1273 iv->ssa_name = NULL_TREE;
1274
1275 if (dump_file && (dump_flags & TDF_DETAILS))
1276 dump_use (dump_file, use);
1277
1278 data->iv_uses.safe_push (use);
1279
1280 return use;
1281 }
1282
1283 /* Checks whether OP is a loop-level invariant and if so, records it.
1284 NONLINEAR_USE is true if the invariant is used in a way we do not
1285 handle specially. */
1286
1287 static void
1288 record_invariant (struct ivopts_data *data, tree op, bool nonlinear_use)
1289 {
1290 basic_block bb;
1291 struct version_info *info;
1292
1293 if (TREE_CODE (op) != SSA_NAME
1294 || virtual_operand_p (op))
1295 return;
1296
1297 bb = gimple_bb (SSA_NAME_DEF_STMT (op));
1298 if (bb
1299 && flow_bb_inside_loop_p (data->current_loop, bb))
1300 return;
1301
1302 info = name_info (data, op);
1303 info->name = op;
1304 info->has_nonlin_use |= nonlinear_use;
1305 if (!info->inv_id)
1306 info->inv_id = ++data->max_inv_id;
1307 bitmap_set_bit (data->relevant, SSA_NAME_VERSION (op));
1308 }
1309
1310 /* Checks whether the use OP is interesting and if so, records it. */
1311
1312 static struct iv_use *
1313 find_interesting_uses_op (struct ivopts_data *data, tree op)
1314 {
1315 struct iv *iv;
1316 struct iv *civ;
1317 gimple stmt;
1318 struct iv_use *use;
1319
1320 if (TREE_CODE (op) != SSA_NAME)
1321 return NULL;
1322
1323 iv = get_iv (data, op);
1324 if (!iv)
1325 return NULL;
1326
1327 if (iv->have_use_for)
1328 {
1329 use = iv_use (data, iv->use_id);
1330
1331 gcc_assert (use->type == USE_NONLINEAR_EXPR);
1332 return use;
1333 }
1334
1335 if (integer_zerop (iv->step))
1336 {
1337 record_invariant (data, op, true);
1338 return NULL;
1339 }
1340 iv->have_use_for = true;
1341
1342 civ = XNEW (struct iv);
1343 *civ = *iv;
1344
1345 stmt = SSA_NAME_DEF_STMT (op);
1346 gcc_assert (gimple_code (stmt) == GIMPLE_PHI
1347 || is_gimple_assign (stmt));
1348
1349 use = record_use (data, NULL, civ, stmt, USE_NONLINEAR_EXPR);
1350 iv->use_id = use->id;
1351
1352 return use;
1353 }
1354
1355 /* Given a condition in statement STMT, checks whether it is a compare
1356 of an induction variable and an invariant. If this is the case,
1357 CONTROL_VAR is set to location of the iv, BOUND to the location of
1358 the invariant, IV_VAR and IV_BOUND are set to the corresponding
1359 induction variable descriptions, and true is returned. If this is not
1360 the case, CONTROL_VAR and BOUND are set to the arguments of the
1361 condition and false is returned. */
1362
1363 static bool
1364 extract_cond_operands (struct ivopts_data *data, gimple stmt,
1365 tree **control_var, tree **bound,
1366 struct iv **iv_var, struct iv **iv_bound)
1367 {
1368 /* The objects returned when COND has constant operands. */
1369 static struct iv const_iv;
1370 static tree zero;
1371 tree *op0 = &zero, *op1 = &zero, *tmp_op;
1372 struct iv *iv0 = &const_iv, *iv1 = &const_iv, *tmp_iv;
1373 bool ret = false;
1374
1375 if (gimple_code (stmt) == GIMPLE_COND)
1376 {
1377 op0 = gimple_cond_lhs_ptr (stmt);
1378 op1 = gimple_cond_rhs_ptr (stmt);
1379 }
1380 else
1381 {
1382 op0 = gimple_assign_rhs1_ptr (stmt);
1383 op1 = gimple_assign_rhs2_ptr (stmt);
1384 }
1385
1386 zero = integer_zero_node;
1387 const_iv.step = integer_zero_node;
1388
1389 if (TREE_CODE (*op0) == SSA_NAME)
1390 iv0 = get_iv (data, *op0);
1391 if (TREE_CODE (*op1) == SSA_NAME)
1392 iv1 = get_iv (data, *op1);
1393
1394 /* Exactly one of the compared values must be an iv, and the other one must
1395 be an invariant. */
1396 if (!iv0 || !iv1)
1397 goto end;
1398
1399 if (integer_zerop (iv0->step))
1400 {
1401 /* Control variable may be on the other side. */
1402 tmp_op = op0; op0 = op1; op1 = tmp_op;
1403 tmp_iv = iv0; iv0 = iv1; iv1 = tmp_iv;
1404 }
1405 ret = !integer_zerop (iv0->step) && integer_zerop (iv1->step);
1406
1407 end:
1408 if (control_var)
1409 *control_var = op0;;
1410 if (iv_var)
1411 *iv_var = iv0;;
1412 if (bound)
1413 *bound = op1;
1414 if (iv_bound)
1415 *iv_bound = iv1;
1416
1417 return ret;
1418 }
1419
1420 /* Checks whether the condition in STMT is interesting and if so,
1421 records it. */
1422
1423 static void
1424 find_interesting_uses_cond (struct ivopts_data *data, gimple stmt)
1425 {
1426 tree *var_p, *bound_p;
1427 struct iv *var_iv, *civ;
1428
1429 if (!extract_cond_operands (data, stmt, &var_p, &bound_p, &var_iv, NULL))
1430 {
1431 find_interesting_uses_op (data, *var_p);
1432 find_interesting_uses_op (data, *bound_p);
1433 return;
1434 }
1435
1436 civ = XNEW (struct iv);
1437 *civ = *var_iv;
1438 record_use (data, NULL, civ, stmt, USE_COMPARE);
1439 }
1440
1441 /* Returns the outermost loop EXPR is obviously invariant in
1442 relative to the loop LOOP, i.e. if all its operands are defined
1443 outside of the returned loop. Returns NULL if EXPR is not
1444 even obviously invariant in LOOP. */
1445
1446 struct loop *
1447 outermost_invariant_loop_for_expr (struct loop *loop, tree expr)
1448 {
1449 basic_block def_bb;
1450 unsigned i, len;
1451
1452 if (is_gimple_min_invariant (expr))
1453 return current_loops->tree_root;
1454
1455 if (TREE_CODE (expr) == SSA_NAME)
1456 {
1457 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1458 if (def_bb)
1459 {
1460 if (flow_bb_inside_loop_p (loop, def_bb))
1461 return NULL;
1462 return superloop_at_depth (loop,
1463 loop_depth (def_bb->loop_father) + 1);
1464 }
1465
1466 return current_loops->tree_root;
1467 }
1468
1469 if (!EXPR_P (expr))
1470 return NULL;
1471
1472 unsigned maxdepth = 0;
1473 len = TREE_OPERAND_LENGTH (expr);
1474 for (i = 0; i < len; i++)
1475 {
1476 struct loop *ivloop;
1477 if (!TREE_OPERAND (expr, i))
1478 continue;
1479
1480 ivloop = outermost_invariant_loop_for_expr (loop, TREE_OPERAND (expr, i));
1481 if (!ivloop)
1482 return NULL;
1483 maxdepth = MAX (maxdepth, loop_depth (ivloop));
1484 }
1485
1486 return superloop_at_depth (loop, maxdepth);
1487 }
1488
1489 /* Returns true if expression EXPR is obviously invariant in LOOP,
1490 i.e. if all its operands are defined outside of the LOOP. LOOP
1491 should not be the function body. */
1492
1493 bool
1494 expr_invariant_in_loop_p (struct loop *loop, tree expr)
1495 {
1496 basic_block def_bb;
1497 unsigned i, len;
1498
1499 gcc_assert (loop_depth (loop) > 0);
1500
1501 if (is_gimple_min_invariant (expr))
1502 return true;
1503
1504 if (TREE_CODE (expr) == SSA_NAME)
1505 {
1506 def_bb = gimple_bb (SSA_NAME_DEF_STMT (expr));
1507 if (def_bb
1508 && flow_bb_inside_loop_p (loop, def_bb))
1509 return false;
1510
1511 return true;
1512 }
1513
1514 if (!EXPR_P (expr))
1515 return false;
1516
1517 len = TREE_OPERAND_LENGTH (expr);
1518 for (i = 0; i < len; i++)
1519 if (TREE_OPERAND (expr, i)
1520 && !expr_invariant_in_loop_p (loop, TREE_OPERAND (expr, i)))
1521 return false;
1522
1523 return true;
1524 }
1525
1526 /* Cumulates the steps of indices into DATA and replaces their values with the
1527 initial ones. Returns false when the value of the index cannot be determined.
1528 Callback for for_each_index. */
1529
1530 struct ifs_ivopts_data
1531 {
1532 struct ivopts_data *ivopts_data;
1533 gimple stmt;
1534 tree step;
1535 };
1536
1537 static bool
1538 idx_find_step (tree base, tree *idx, void *data)
1539 {
1540 struct ifs_ivopts_data *dta = (struct ifs_ivopts_data *) data;
1541 struct iv *iv;
1542 tree step, iv_base, iv_step, lbound, off;
1543 struct loop *loop = dta->ivopts_data->current_loop;
1544
1545 /* If base is a component ref, require that the offset of the reference
1546 be invariant. */
1547 if (TREE_CODE (base) == COMPONENT_REF)
1548 {
1549 off = component_ref_field_offset (base);
1550 return expr_invariant_in_loop_p (loop, off);
1551 }
1552
1553 /* If base is array, first check whether we will be able to move the
1554 reference out of the loop (in order to take its address in strength
1555 reduction). In order for this to work we need both lower bound
1556 and step to be loop invariants. */
1557 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1558 {
1559 /* Moreover, for a range, the size needs to be invariant as well. */
1560 if (TREE_CODE (base) == ARRAY_RANGE_REF
1561 && !expr_invariant_in_loop_p (loop, TYPE_SIZE (TREE_TYPE (base))))
1562 return false;
1563
1564 step = array_ref_element_size (base);
1565 lbound = array_ref_low_bound (base);
1566
1567 if (!expr_invariant_in_loop_p (loop, step)
1568 || !expr_invariant_in_loop_p (loop, lbound))
1569 return false;
1570 }
1571
1572 if (TREE_CODE (*idx) != SSA_NAME)
1573 return true;
1574
1575 iv = get_iv (dta->ivopts_data, *idx);
1576 if (!iv)
1577 return false;
1578
1579 /* XXX We produce for a base of *D42 with iv->base being &x[0]
1580 *&x[0], which is not folded and does not trigger the
1581 ARRAY_REF path below. */
1582 *idx = iv->base;
1583
1584 if (integer_zerop (iv->step))
1585 return true;
1586
1587 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1588 {
1589 step = array_ref_element_size (base);
1590
1591 /* We only handle addresses whose step is an integer constant. */
1592 if (TREE_CODE (step) != INTEGER_CST)
1593 return false;
1594 }
1595 else
1596 /* The step for pointer arithmetics already is 1 byte. */
1597 step = size_one_node;
1598
1599 iv_base = iv->base;
1600 iv_step = iv->step;
1601 if (!convert_affine_scev (dta->ivopts_data->current_loop,
1602 sizetype, &iv_base, &iv_step, dta->stmt,
1603 false))
1604 {
1605 /* The index might wrap. */
1606 return false;
1607 }
1608
1609 step = fold_build2 (MULT_EXPR, sizetype, step, iv_step);
1610 dta->step = fold_build2 (PLUS_EXPR, sizetype, dta->step, step);
1611
1612 return true;
1613 }
1614
1615 /* Records use in index IDX. Callback for for_each_index. Ivopts data
1616 object is passed to it in DATA. */
1617
1618 static bool
1619 idx_record_use (tree base, tree *idx,
1620 void *vdata)
1621 {
1622 struct ivopts_data *data = (struct ivopts_data *) vdata;
1623 find_interesting_uses_op (data, *idx);
1624 if (TREE_CODE (base) == ARRAY_REF || TREE_CODE (base) == ARRAY_RANGE_REF)
1625 {
1626 find_interesting_uses_op (data, array_ref_element_size (base));
1627 find_interesting_uses_op (data, array_ref_low_bound (base));
1628 }
1629 return true;
1630 }
1631
1632 /* If we can prove that TOP = cst * BOT for some constant cst,
1633 store cst to MUL and return true. Otherwise return false.
1634 The returned value is always sign-extended, regardless of the
1635 signedness of TOP and BOT. */
1636
1637 static bool
1638 constant_multiple_of (tree top, tree bot, widest_int *mul)
1639 {
1640 tree mby;
1641 enum tree_code code;
1642 unsigned precision = TYPE_PRECISION (TREE_TYPE (top));
1643 widest_int res, p0, p1;
1644
1645 STRIP_NOPS (top);
1646 STRIP_NOPS (bot);
1647
1648 if (operand_equal_p (top, bot, 0))
1649 {
1650 *mul = 1;
1651 return true;
1652 }
1653
1654 code = TREE_CODE (top);
1655 switch (code)
1656 {
1657 case MULT_EXPR:
1658 mby = TREE_OPERAND (top, 1);
1659 if (TREE_CODE (mby) != INTEGER_CST)
1660 return false;
1661
1662 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &res))
1663 return false;
1664
1665 *mul = wi::sext (res * wi::to_widest (mby), precision);
1666 return true;
1667
1668 case PLUS_EXPR:
1669 case MINUS_EXPR:
1670 if (!constant_multiple_of (TREE_OPERAND (top, 0), bot, &p0)
1671 || !constant_multiple_of (TREE_OPERAND (top, 1), bot, &p1))
1672 return false;
1673
1674 if (code == MINUS_EXPR)
1675 p1 = -p1;
1676 *mul = wi::sext (p0 + p1, precision);
1677 return true;
1678
1679 case INTEGER_CST:
1680 if (TREE_CODE (bot) != INTEGER_CST)
1681 return false;
1682
1683 p0 = widest_int::from (top, SIGNED);
1684 p1 = widest_int::from (bot, SIGNED);
1685 if (p1 == 0)
1686 return false;
1687 *mul = wi::sext (wi::divmod_trunc (p0, p1, SIGNED, &res), precision);
1688 return res == 0;
1689
1690 default:
1691 return false;
1692 }
1693 }
1694
1695 /* Return true if memory reference REF with step STEP may be unaligned. */
1696
1697 static bool
1698 may_be_unaligned_p (tree ref, tree step)
1699 {
1700 /* TARGET_MEM_REFs are translated directly to valid MEMs on the target,
1701 thus they are not misaligned. */
1702 if (TREE_CODE (ref) == TARGET_MEM_REF)
1703 return false;
1704
1705 unsigned int align = TYPE_ALIGN (TREE_TYPE (ref));
1706 if (GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref))) > align)
1707 align = GET_MODE_ALIGNMENT (TYPE_MODE (TREE_TYPE (ref)));
1708
1709 unsigned HOST_WIDE_INT bitpos;
1710 unsigned int ref_align;
1711 get_object_alignment_1 (ref, &ref_align, &bitpos);
1712 if (ref_align < align
1713 || (bitpos % align) != 0
1714 || (bitpos % BITS_PER_UNIT) != 0)
1715 return true;
1716
1717 unsigned int trailing_zeros = tree_ctz (step);
1718 if (trailing_zeros < HOST_BITS_PER_INT
1719 && (1U << trailing_zeros) * BITS_PER_UNIT < align)
1720 return true;
1721
1722 return false;
1723 }
1724
1725 /* Return true if EXPR may be non-addressable. */
1726
1727 bool
1728 may_be_nonaddressable_p (tree expr)
1729 {
1730 switch (TREE_CODE (expr))
1731 {
1732 case TARGET_MEM_REF:
1733 /* TARGET_MEM_REFs are translated directly to valid MEMs on the
1734 target, thus they are always addressable. */
1735 return false;
1736
1737 case COMPONENT_REF:
1738 return DECL_NONADDRESSABLE_P (TREE_OPERAND (expr, 1))
1739 || may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1740
1741 case VIEW_CONVERT_EXPR:
1742 /* This kind of view-conversions may wrap non-addressable objects
1743 and make them look addressable. After some processing the
1744 non-addressability may be uncovered again, causing ADDR_EXPRs
1745 of inappropriate objects to be built. */
1746 if (is_gimple_reg (TREE_OPERAND (expr, 0))
1747 || !is_gimple_addressable (TREE_OPERAND (expr, 0)))
1748 return true;
1749
1750 /* ... fall through ... */
1751
1752 case ARRAY_REF:
1753 case ARRAY_RANGE_REF:
1754 return may_be_nonaddressable_p (TREE_OPERAND (expr, 0));
1755
1756 CASE_CONVERT:
1757 return true;
1758
1759 default:
1760 break;
1761 }
1762
1763 return false;
1764 }
1765
1766 /* Finds addresses in *OP_P inside STMT. */
1767
1768 static void
1769 find_interesting_uses_address (struct ivopts_data *data, gimple stmt, tree *op_p)
1770 {
1771 tree base = *op_p, step = size_zero_node;
1772 struct iv *civ;
1773 struct ifs_ivopts_data ifs_ivopts_data;
1774
1775 /* Do not play with volatile memory references. A bit too conservative,
1776 perhaps, but safe. */
1777 if (gimple_has_volatile_ops (stmt))
1778 goto fail;
1779
1780 /* Ignore bitfields for now. Not really something terribly complicated
1781 to handle. TODO. */
1782 if (TREE_CODE (base) == BIT_FIELD_REF)
1783 goto fail;
1784
1785 base = unshare_expr (base);
1786
1787 if (TREE_CODE (base) == TARGET_MEM_REF)
1788 {
1789 tree type = build_pointer_type (TREE_TYPE (base));
1790 tree astep;
1791
1792 if (TMR_BASE (base)
1793 && TREE_CODE (TMR_BASE (base)) == SSA_NAME)
1794 {
1795 civ = get_iv (data, TMR_BASE (base));
1796 if (!civ)
1797 goto fail;
1798
1799 TMR_BASE (base) = civ->base;
1800 step = civ->step;
1801 }
1802 if (TMR_INDEX2 (base)
1803 && TREE_CODE (TMR_INDEX2 (base)) == SSA_NAME)
1804 {
1805 civ = get_iv (data, TMR_INDEX2 (base));
1806 if (!civ)
1807 goto fail;
1808
1809 TMR_INDEX2 (base) = civ->base;
1810 step = civ->step;
1811 }
1812 if (TMR_INDEX (base)
1813 && TREE_CODE (TMR_INDEX (base)) == SSA_NAME)
1814 {
1815 civ = get_iv (data, TMR_INDEX (base));
1816 if (!civ)
1817 goto fail;
1818
1819 TMR_INDEX (base) = civ->base;
1820 astep = civ->step;
1821
1822 if (astep)
1823 {
1824 if (TMR_STEP (base))
1825 astep = fold_build2 (MULT_EXPR, type, TMR_STEP (base), astep);
1826
1827 step = fold_build2 (PLUS_EXPR, type, step, astep);
1828 }
1829 }
1830
1831 if (integer_zerop (step))
1832 goto fail;
1833 base = tree_mem_ref_addr (type, base);
1834 }
1835 else
1836 {
1837 ifs_ivopts_data.ivopts_data = data;
1838 ifs_ivopts_data.stmt = stmt;
1839 ifs_ivopts_data.step = size_zero_node;
1840 if (!for_each_index (&base, idx_find_step, &ifs_ivopts_data)
1841 || integer_zerop (ifs_ivopts_data.step))
1842 goto fail;
1843 step = ifs_ivopts_data.step;
1844
1845 /* Check that the base expression is addressable. This needs
1846 to be done after substituting bases of IVs into it. */
1847 if (may_be_nonaddressable_p (base))
1848 goto fail;
1849
1850 /* Moreover, on strict alignment platforms, check that it is
1851 sufficiently aligned. */
1852 if (STRICT_ALIGNMENT && may_be_unaligned_p (base, step))
1853 goto fail;
1854
1855 base = build_fold_addr_expr (base);
1856
1857 /* Substituting bases of IVs into the base expression might
1858 have caused folding opportunities. */
1859 if (TREE_CODE (base) == ADDR_EXPR)
1860 {
1861 tree *ref = &TREE_OPERAND (base, 0);
1862 while (handled_component_p (*ref))
1863 ref = &TREE_OPERAND (*ref, 0);
1864 if (TREE_CODE (*ref) == MEM_REF)
1865 {
1866 tree tem = fold_binary (MEM_REF, TREE_TYPE (*ref),
1867 TREE_OPERAND (*ref, 0),
1868 TREE_OPERAND (*ref, 1));
1869 if (tem)
1870 *ref = tem;
1871 }
1872 }
1873 }
1874
1875 civ = alloc_iv (base, step);
1876 record_use (data, op_p, civ, stmt, USE_ADDRESS);
1877 return;
1878
1879 fail:
1880 for_each_index (op_p, idx_record_use, data);
1881 }
1882
1883 /* Finds and records invariants used in STMT. */
1884
1885 static void
1886 find_invariants_stmt (struct ivopts_data *data, gimple stmt)
1887 {
1888 ssa_op_iter iter;
1889 use_operand_p use_p;
1890 tree op;
1891
1892 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1893 {
1894 op = USE_FROM_PTR (use_p);
1895 record_invariant (data, op, false);
1896 }
1897 }
1898
1899 /* Finds interesting uses of induction variables in the statement STMT. */
1900
1901 static void
1902 find_interesting_uses_stmt (struct ivopts_data *data, gimple stmt)
1903 {
1904 struct iv *iv;
1905 tree op, *lhs, *rhs;
1906 ssa_op_iter iter;
1907 use_operand_p use_p;
1908 enum tree_code code;
1909
1910 find_invariants_stmt (data, stmt);
1911
1912 if (gimple_code (stmt) == GIMPLE_COND)
1913 {
1914 find_interesting_uses_cond (data, stmt);
1915 return;
1916 }
1917
1918 if (is_gimple_assign (stmt))
1919 {
1920 lhs = gimple_assign_lhs_ptr (stmt);
1921 rhs = gimple_assign_rhs1_ptr (stmt);
1922
1923 if (TREE_CODE (*lhs) == SSA_NAME)
1924 {
1925 /* If the statement defines an induction variable, the uses are not
1926 interesting by themselves. */
1927
1928 iv = get_iv (data, *lhs);
1929
1930 if (iv && !integer_zerop (iv->step))
1931 return;
1932 }
1933
1934 code = gimple_assign_rhs_code (stmt);
1935 if (get_gimple_rhs_class (code) == GIMPLE_SINGLE_RHS
1936 && (REFERENCE_CLASS_P (*rhs)
1937 || is_gimple_val (*rhs)))
1938 {
1939 if (REFERENCE_CLASS_P (*rhs))
1940 find_interesting_uses_address (data, stmt, rhs);
1941 else
1942 find_interesting_uses_op (data, *rhs);
1943
1944 if (REFERENCE_CLASS_P (*lhs))
1945 find_interesting_uses_address (data, stmt, lhs);
1946 return;
1947 }
1948 else if (TREE_CODE_CLASS (code) == tcc_comparison)
1949 {
1950 find_interesting_uses_cond (data, stmt);
1951 return;
1952 }
1953
1954 /* TODO -- we should also handle address uses of type
1955
1956 memory = call (whatever);
1957
1958 and
1959
1960 call (memory). */
1961 }
1962
1963 if (gimple_code (stmt) == GIMPLE_PHI
1964 && gimple_bb (stmt) == data->current_loop->header)
1965 {
1966 iv = get_iv (data, PHI_RESULT (stmt));
1967
1968 if (iv && !integer_zerop (iv->step))
1969 return;
1970 }
1971
1972 FOR_EACH_PHI_OR_STMT_USE (use_p, stmt, iter, SSA_OP_USE)
1973 {
1974 op = USE_FROM_PTR (use_p);
1975
1976 if (TREE_CODE (op) != SSA_NAME)
1977 continue;
1978
1979 iv = get_iv (data, op);
1980 if (!iv)
1981 continue;
1982
1983 find_interesting_uses_op (data, op);
1984 }
1985 }
1986
1987 /* Finds interesting uses of induction variables outside of loops
1988 on loop exit edge EXIT. */
1989
1990 static void
1991 find_interesting_uses_outside (struct ivopts_data *data, edge exit)
1992 {
1993 gimple phi;
1994 gimple_stmt_iterator psi;
1995 tree def;
1996
1997 for (psi = gsi_start_phis (exit->dest); !gsi_end_p (psi); gsi_next (&psi))
1998 {
1999 phi = gsi_stmt (psi);
2000 def = PHI_ARG_DEF_FROM_EDGE (phi, exit);
2001 if (!virtual_operand_p (def))
2002 find_interesting_uses_op (data, def);
2003 }
2004 }
2005
2006 /* Finds uses of the induction variables that are interesting. */
2007
2008 static void
2009 find_interesting_uses (struct ivopts_data *data)
2010 {
2011 basic_block bb;
2012 gimple_stmt_iterator bsi;
2013 basic_block *body = get_loop_body (data->current_loop);
2014 unsigned i;
2015 struct version_info *info;
2016 edge e;
2017
2018 if (dump_file && (dump_flags & TDF_DETAILS))
2019 fprintf (dump_file, "Uses:\n\n");
2020
2021 for (i = 0; i < data->current_loop->num_nodes; i++)
2022 {
2023 edge_iterator ei;
2024 bb = body[i];
2025
2026 FOR_EACH_EDGE (e, ei, bb->succs)
2027 if (e->dest != EXIT_BLOCK_PTR_FOR_FN (cfun)
2028 && !flow_bb_inside_loop_p (data->current_loop, e->dest))
2029 find_interesting_uses_outside (data, e);
2030
2031 for (bsi = gsi_start_phis (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2032 find_interesting_uses_stmt (data, gsi_stmt (bsi));
2033 for (bsi = gsi_start_bb (bb); !gsi_end_p (bsi); gsi_next (&bsi))
2034 if (!is_gimple_debug (gsi_stmt (bsi)))
2035 find_interesting_uses_stmt (data, gsi_stmt (bsi));
2036 }
2037
2038 if (dump_file && (dump_flags & TDF_DETAILS))
2039 {
2040 bitmap_iterator bi;
2041
2042 fprintf (dump_file, "\n");
2043
2044 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2045 {
2046 info = ver_info (data, i);
2047 if (info->inv_id)
2048 {
2049 fprintf (dump_file, " ");
2050 print_generic_expr (dump_file, info->name, TDF_SLIM);
2051 fprintf (dump_file, " is invariant (%d)%s\n",
2052 info->inv_id, info->has_nonlin_use ? "" : ", eliminable");
2053 }
2054 }
2055
2056 fprintf (dump_file, "\n");
2057 }
2058
2059 free (body);
2060 }
2061
2062 /* Strips constant offsets from EXPR and stores them to OFFSET. If INSIDE_ADDR
2063 is true, assume we are inside an address. If TOP_COMPREF is true, assume
2064 we are at the top-level of the processed address. */
2065
2066 static tree
2067 strip_offset_1 (tree expr, bool inside_addr, bool top_compref,
2068 HOST_WIDE_INT *offset)
2069 {
2070 tree op0 = NULL_TREE, op1 = NULL_TREE, tmp, step;
2071 enum tree_code code;
2072 tree type, orig_type = TREE_TYPE (expr);
2073 HOST_WIDE_INT off0, off1, st;
2074 tree orig_expr = expr;
2075
2076 STRIP_NOPS (expr);
2077
2078 type = TREE_TYPE (expr);
2079 code = TREE_CODE (expr);
2080 *offset = 0;
2081
2082 switch (code)
2083 {
2084 case INTEGER_CST:
2085 if (!cst_and_fits_in_hwi (expr)
2086 || integer_zerop (expr))
2087 return orig_expr;
2088
2089 *offset = int_cst_value (expr);
2090 return build_int_cst (orig_type, 0);
2091
2092 case POINTER_PLUS_EXPR:
2093 case PLUS_EXPR:
2094 case MINUS_EXPR:
2095 op0 = TREE_OPERAND (expr, 0);
2096 op1 = TREE_OPERAND (expr, 1);
2097
2098 op0 = strip_offset_1 (op0, false, false, &off0);
2099 op1 = strip_offset_1 (op1, false, false, &off1);
2100
2101 *offset = (code == MINUS_EXPR ? off0 - off1 : off0 + off1);
2102 if (op0 == TREE_OPERAND (expr, 0)
2103 && op1 == TREE_OPERAND (expr, 1))
2104 return orig_expr;
2105
2106 if (integer_zerop (op1))
2107 expr = op0;
2108 else if (integer_zerop (op0))
2109 {
2110 if (code == MINUS_EXPR)
2111 expr = fold_build1 (NEGATE_EXPR, type, op1);
2112 else
2113 expr = op1;
2114 }
2115 else
2116 expr = fold_build2 (code, type, op0, op1);
2117
2118 return fold_convert (orig_type, expr);
2119
2120 case MULT_EXPR:
2121 op1 = TREE_OPERAND (expr, 1);
2122 if (!cst_and_fits_in_hwi (op1))
2123 return orig_expr;
2124
2125 op0 = TREE_OPERAND (expr, 0);
2126 op0 = strip_offset_1 (op0, false, false, &off0);
2127 if (op0 == TREE_OPERAND (expr, 0))
2128 return orig_expr;
2129
2130 *offset = off0 * int_cst_value (op1);
2131 if (integer_zerop (op0))
2132 expr = op0;
2133 else
2134 expr = fold_build2 (MULT_EXPR, type, op0, op1);
2135
2136 return fold_convert (orig_type, expr);
2137
2138 case ARRAY_REF:
2139 case ARRAY_RANGE_REF:
2140 if (!inside_addr)
2141 return orig_expr;
2142
2143 step = array_ref_element_size (expr);
2144 if (!cst_and_fits_in_hwi (step))
2145 break;
2146
2147 st = int_cst_value (step);
2148 op1 = TREE_OPERAND (expr, 1);
2149 op1 = strip_offset_1 (op1, false, false, &off1);
2150 *offset = off1 * st;
2151
2152 if (top_compref
2153 && integer_zerop (op1))
2154 {
2155 /* Strip the component reference completely. */
2156 op0 = TREE_OPERAND (expr, 0);
2157 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2158 *offset += off0;
2159 return op0;
2160 }
2161 break;
2162
2163 case COMPONENT_REF:
2164 {
2165 tree field;
2166
2167 if (!inside_addr)
2168 return orig_expr;
2169
2170 tmp = component_ref_field_offset (expr);
2171 field = TREE_OPERAND (expr, 1);
2172 if (top_compref
2173 && cst_and_fits_in_hwi (tmp)
2174 && cst_and_fits_in_hwi (DECL_FIELD_BIT_OFFSET (field)))
2175 {
2176 HOST_WIDE_INT boffset, abs_off;
2177
2178 /* Strip the component reference completely. */
2179 op0 = TREE_OPERAND (expr, 0);
2180 op0 = strip_offset_1 (op0, inside_addr, top_compref, &off0);
2181 boffset = int_cst_value (DECL_FIELD_BIT_OFFSET (field));
2182 abs_off = abs_hwi (boffset) / BITS_PER_UNIT;
2183 if (boffset < 0)
2184 abs_off = -abs_off;
2185
2186 *offset = off0 + int_cst_value (tmp) + abs_off;
2187 return op0;
2188 }
2189 }
2190 break;
2191
2192 case ADDR_EXPR:
2193 op0 = TREE_OPERAND (expr, 0);
2194 op0 = strip_offset_1 (op0, true, true, &off0);
2195 *offset += off0;
2196
2197 if (op0 == TREE_OPERAND (expr, 0))
2198 return orig_expr;
2199
2200 expr = build_fold_addr_expr (op0);
2201 return fold_convert (orig_type, expr);
2202
2203 case MEM_REF:
2204 /* ??? Offset operand? */
2205 inside_addr = false;
2206 break;
2207
2208 default:
2209 return orig_expr;
2210 }
2211
2212 /* Default handling of expressions for that we want to recurse into
2213 the first operand. */
2214 op0 = TREE_OPERAND (expr, 0);
2215 op0 = strip_offset_1 (op0, inside_addr, false, &off0);
2216 *offset += off0;
2217
2218 if (op0 == TREE_OPERAND (expr, 0)
2219 && (!op1 || op1 == TREE_OPERAND (expr, 1)))
2220 return orig_expr;
2221
2222 expr = copy_node (expr);
2223 TREE_OPERAND (expr, 0) = op0;
2224 if (op1)
2225 TREE_OPERAND (expr, 1) = op1;
2226
2227 /* Inside address, we might strip the top level component references,
2228 thus changing type of the expression. Handling of ADDR_EXPR
2229 will fix that. */
2230 expr = fold_convert (orig_type, expr);
2231
2232 return expr;
2233 }
2234
2235 /* Strips constant offsets from EXPR and stores them to OFFSET. */
2236
2237 static tree
2238 strip_offset (tree expr, unsigned HOST_WIDE_INT *offset)
2239 {
2240 HOST_WIDE_INT off;
2241 tree core = strip_offset_1 (expr, false, false, &off);
2242 *offset = off;
2243 return core;
2244 }
2245
2246 /* Returns variant of TYPE that can be used as base for different uses.
2247 We return unsigned type with the same precision, which avoids problems
2248 with overflows. */
2249
2250 static tree
2251 generic_type_for (tree type)
2252 {
2253 if (POINTER_TYPE_P (type))
2254 return unsigned_type_for (type);
2255
2256 if (TYPE_UNSIGNED (type))
2257 return type;
2258
2259 return unsigned_type_for (type);
2260 }
2261
2262 /* Records invariants in *EXPR_P. Callback for walk_tree. DATA contains
2263 the bitmap to that we should store it. */
2264
2265 static struct ivopts_data *fd_ivopts_data;
2266 static tree
2267 find_depends (tree *expr_p, int *ws ATTRIBUTE_UNUSED, void *data)
2268 {
2269 bitmap *depends_on = (bitmap *) data;
2270 struct version_info *info;
2271
2272 if (TREE_CODE (*expr_p) != SSA_NAME)
2273 return NULL_TREE;
2274 info = name_info (fd_ivopts_data, *expr_p);
2275
2276 if (!info->inv_id || info->has_nonlin_use)
2277 return NULL_TREE;
2278
2279 if (!*depends_on)
2280 *depends_on = BITMAP_ALLOC (NULL);
2281 bitmap_set_bit (*depends_on, info->inv_id);
2282
2283 return NULL_TREE;
2284 }
2285
2286 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2287 position to POS. If USE is not NULL, the candidate is set as related to
2288 it. If both BASE and STEP are NULL, we add a pseudocandidate for the
2289 replacement of the final value of the iv by a direct computation. */
2290
2291 static struct iv_cand *
2292 add_candidate_1 (struct ivopts_data *data,
2293 tree base, tree step, bool important, enum iv_position pos,
2294 struct iv_use *use, gimple incremented_at)
2295 {
2296 unsigned i;
2297 struct iv_cand *cand = NULL;
2298 tree type, orig_type;
2299
2300 /* For non-original variables, make sure their values are computed in a type
2301 that does not invoke undefined behavior on overflows (since in general,
2302 we cannot prove that these induction variables are non-wrapping). */
2303 if (pos != IP_ORIGINAL)
2304 {
2305 orig_type = TREE_TYPE (base);
2306 type = generic_type_for (orig_type);
2307 if (type != orig_type)
2308 {
2309 base = fold_convert (type, base);
2310 step = fold_convert (type, step);
2311 }
2312 }
2313
2314 for (i = 0; i < n_iv_cands (data); i++)
2315 {
2316 cand = iv_cand (data, i);
2317
2318 if (cand->pos != pos)
2319 continue;
2320
2321 if (cand->incremented_at != incremented_at
2322 || ((pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2323 && cand->ainc_use != use))
2324 continue;
2325
2326 if (!cand->iv)
2327 {
2328 if (!base && !step)
2329 break;
2330
2331 continue;
2332 }
2333
2334 if (!base && !step)
2335 continue;
2336
2337 if (operand_equal_p (base, cand->iv->base, 0)
2338 && operand_equal_p (step, cand->iv->step, 0)
2339 && (TYPE_PRECISION (TREE_TYPE (base))
2340 == TYPE_PRECISION (TREE_TYPE (cand->iv->base))))
2341 break;
2342 }
2343
2344 if (i == n_iv_cands (data))
2345 {
2346 cand = XCNEW (struct iv_cand);
2347 cand->id = i;
2348
2349 if (!base && !step)
2350 cand->iv = NULL;
2351 else
2352 cand->iv = alloc_iv (base, step);
2353
2354 cand->pos = pos;
2355 if (pos != IP_ORIGINAL && cand->iv)
2356 {
2357 cand->var_before = create_tmp_var_raw (TREE_TYPE (base), "ivtmp");
2358 cand->var_after = cand->var_before;
2359 }
2360 cand->important = important;
2361 cand->incremented_at = incremented_at;
2362 data->iv_candidates.safe_push (cand);
2363
2364 if (step
2365 && TREE_CODE (step) != INTEGER_CST)
2366 {
2367 fd_ivopts_data = data;
2368 walk_tree (&step, find_depends, &cand->depends_on, NULL);
2369 }
2370
2371 if (pos == IP_AFTER_USE || pos == IP_BEFORE_USE)
2372 cand->ainc_use = use;
2373 else
2374 cand->ainc_use = NULL;
2375
2376 if (dump_file && (dump_flags & TDF_DETAILS))
2377 dump_cand (dump_file, cand);
2378 }
2379
2380 if (important && !cand->important)
2381 {
2382 cand->important = true;
2383 if (dump_file && (dump_flags & TDF_DETAILS))
2384 fprintf (dump_file, "Candidate %d is important\n", cand->id);
2385 }
2386
2387 if (use)
2388 {
2389 bitmap_set_bit (use->related_cands, i);
2390 if (dump_file && (dump_flags & TDF_DETAILS))
2391 fprintf (dump_file, "Candidate %d is related to use %d\n",
2392 cand->id, use->id);
2393 }
2394
2395 return cand;
2396 }
2397
2398 /* Returns true if incrementing the induction variable at the end of the LOOP
2399 is allowed.
2400
2401 The purpose is to avoid splitting latch edge with a biv increment, thus
2402 creating a jump, possibly confusing other optimization passes and leaving
2403 less freedom to scheduler. So we allow IP_END_POS only if IP_NORMAL_POS
2404 is not available (so we do not have a better alternative), or if the latch
2405 edge is already nonempty. */
2406
2407 static bool
2408 allow_ip_end_pos_p (struct loop *loop)
2409 {
2410 if (!ip_normal_pos (loop))
2411 return true;
2412
2413 if (!empty_block_p (ip_end_pos (loop)))
2414 return true;
2415
2416 return false;
2417 }
2418
2419 /* If possible, adds autoincrement candidates BASE + STEP * i based on use USE.
2420 Important field is set to IMPORTANT. */
2421
2422 static void
2423 add_autoinc_candidates (struct ivopts_data *data, tree base, tree step,
2424 bool important, struct iv_use *use)
2425 {
2426 basic_block use_bb = gimple_bb (use->stmt);
2427 enum machine_mode mem_mode;
2428 unsigned HOST_WIDE_INT cstepi;
2429
2430 /* If we insert the increment in any position other than the standard
2431 ones, we must ensure that it is incremented once per iteration.
2432 It must not be in an inner nested loop, or one side of an if
2433 statement. */
2434 if (use_bb->loop_father != data->current_loop
2435 || !dominated_by_p (CDI_DOMINATORS, data->current_loop->latch, use_bb)
2436 || stmt_could_throw_p (use->stmt)
2437 || !cst_and_fits_in_hwi (step))
2438 return;
2439
2440 cstepi = int_cst_value (step);
2441
2442 mem_mode = TYPE_MODE (TREE_TYPE (*use->op_p));
2443 if (((USE_LOAD_PRE_INCREMENT (mem_mode)
2444 || USE_STORE_PRE_INCREMENT (mem_mode))
2445 && GET_MODE_SIZE (mem_mode) == cstepi)
2446 || ((USE_LOAD_PRE_DECREMENT (mem_mode)
2447 || USE_STORE_PRE_DECREMENT (mem_mode))
2448 && GET_MODE_SIZE (mem_mode) == -cstepi))
2449 {
2450 enum tree_code code = MINUS_EXPR;
2451 tree new_base;
2452 tree new_step = step;
2453
2454 if (POINTER_TYPE_P (TREE_TYPE (base)))
2455 {
2456 new_step = fold_build1 (NEGATE_EXPR, TREE_TYPE (step), step);
2457 code = POINTER_PLUS_EXPR;
2458 }
2459 else
2460 new_step = fold_convert (TREE_TYPE (base), new_step);
2461 new_base = fold_build2 (code, TREE_TYPE (base), base, new_step);
2462 add_candidate_1 (data, new_base, step, important, IP_BEFORE_USE, use,
2463 use->stmt);
2464 }
2465 if (((USE_LOAD_POST_INCREMENT (mem_mode)
2466 || USE_STORE_POST_INCREMENT (mem_mode))
2467 && GET_MODE_SIZE (mem_mode) == cstepi)
2468 || ((USE_LOAD_POST_DECREMENT (mem_mode)
2469 || USE_STORE_POST_DECREMENT (mem_mode))
2470 && GET_MODE_SIZE (mem_mode) == -cstepi))
2471 {
2472 add_candidate_1 (data, base, step, important, IP_AFTER_USE, use,
2473 use->stmt);
2474 }
2475 }
2476
2477 /* Adds a candidate BASE + STEP * i. Important field is set to IMPORTANT and
2478 position to POS. If USE is not NULL, the candidate is set as related to
2479 it. The candidate computation is scheduled on all available positions. */
2480
2481 static void
2482 add_candidate (struct ivopts_data *data,
2483 tree base, tree step, bool important, struct iv_use *use)
2484 {
2485 if (ip_normal_pos (data->current_loop))
2486 add_candidate_1 (data, base, step, important, IP_NORMAL, use, NULL);
2487 if (ip_end_pos (data->current_loop)
2488 && allow_ip_end_pos_p (data->current_loop))
2489 add_candidate_1 (data, base, step, important, IP_END, use, NULL);
2490
2491 if (use != NULL && use->type == USE_ADDRESS)
2492 add_autoinc_candidates (data, base, step, important, use);
2493 }
2494
2495 /* Adds standard iv candidates. */
2496
2497 static void
2498 add_standard_iv_candidates (struct ivopts_data *data)
2499 {
2500 add_candidate (data, integer_zero_node, integer_one_node, true, NULL);
2501
2502 /* The same for a double-integer type if it is still fast enough. */
2503 if (TYPE_PRECISION
2504 (long_integer_type_node) > TYPE_PRECISION (integer_type_node)
2505 && TYPE_PRECISION (long_integer_type_node) <= BITS_PER_WORD)
2506 add_candidate (data, build_int_cst (long_integer_type_node, 0),
2507 build_int_cst (long_integer_type_node, 1), true, NULL);
2508
2509 /* The same for a double-integer type if it is still fast enough. */
2510 if (TYPE_PRECISION
2511 (long_long_integer_type_node) > TYPE_PRECISION (long_integer_type_node)
2512 && TYPE_PRECISION (long_long_integer_type_node) <= BITS_PER_WORD)
2513 add_candidate (data, build_int_cst (long_long_integer_type_node, 0),
2514 build_int_cst (long_long_integer_type_node, 1), true, NULL);
2515 }
2516
2517
2518 /* Adds candidates bases on the old induction variable IV. */
2519
2520 static void
2521 add_old_iv_candidates (struct ivopts_data *data, struct iv *iv)
2522 {
2523 gimple phi;
2524 tree def;
2525 struct iv_cand *cand;
2526
2527 add_candidate (data, iv->base, iv->step, true, NULL);
2528
2529 /* The same, but with initial value zero. */
2530 if (POINTER_TYPE_P (TREE_TYPE (iv->base)))
2531 add_candidate (data, size_int (0), iv->step, true, NULL);
2532 else
2533 add_candidate (data, build_int_cst (TREE_TYPE (iv->base), 0),
2534 iv->step, true, NULL);
2535
2536 phi = SSA_NAME_DEF_STMT (iv->ssa_name);
2537 if (gimple_code (phi) == GIMPLE_PHI)
2538 {
2539 /* Additionally record the possibility of leaving the original iv
2540 untouched. */
2541 def = PHI_ARG_DEF_FROM_EDGE (phi, loop_latch_edge (data->current_loop));
2542 /* Don't add candidate if it's from another PHI node because
2543 it's an affine iv appearing in the form of PEELED_CHREC. */
2544 phi = SSA_NAME_DEF_STMT (def);
2545 if (gimple_code (phi) != GIMPLE_PHI)
2546 {
2547 cand = add_candidate_1 (data,
2548 iv->base, iv->step, true, IP_ORIGINAL, NULL,
2549 SSA_NAME_DEF_STMT (def));
2550 cand->var_before = iv->ssa_name;
2551 cand->var_after = def;
2552 }
2553 else
2554 gcc_assert (gimple_bb (phi) == data->current_loop->header);
2555 }
2556 }
2557
2558 /* Adds candidates based on the old induction variables. */
2559
2560 static void
2561 add_old_ivs_candidates (struct ivopts_data *data)
2562 {
2563 unsigned i;
2564 struct iv *iv;
2565 bitmap_iterator bi;
2566
2567 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
2568 {
2569 iv = ver_info (data, i)->iv;
2570 if (iv && iv->biv_p && !integer_zerop (iv->step))
2571 add_old_iv_candidates (data, iv);
2572 }
2573 }
2574
2575 /* Adds candidates based on the value of the induction variable IV and USE. */
2576
2577 static void
2578 add_iv_value_candidates (struct ivopts_data *data,
2579 struct iv *iv, struct iv_use *use)
2580 {
2581 unsigned HOST_WIDE_INT offset;
2582 tree base;
2583 tree basetype;
2584
2585 add_candidate (data, iv->base, iv->step, false, use);
2586
2587 /* The same, but with initial value zero. Make such variable important,
2588 since it is generic enough so that possibly many uses may be based
2589 on it. */
2590 basetype = TREE_TYPE (iv->base);
2591 if (POINTER_TYPE_P (basetype))
2592 basetype = sizetype;
2593 add_candidate (data, build_int_cst (basetype, 0),
2594 iv->step, true, use);
2595
2596 /* Third, try removing the constant offset. Make sure to even
2597 add a candidate for &a[0] vs. (T *)&a. */
2598 base = strip_offset (iv->base, &offset);
2599 if (offset
2600 || base != iv->base)
2601 add_candidate (data, base, iv->step, false, use);
2602 }
2603
2604 /* Adds candidates based on the uses. */
2605
2606 static void
2607 add_derived_ivs_candidates (struct ivopts_data *data)
2608 {
2609 unsigned i;
2610
2611 for (i = 0; i < n_iv_uses (data); i++)
2612 {
2613 struct iv_use *use = iv_use (data, i);
2614
2615 if (!use)
2616 continue;
2617
2618 switch (use->type)
2619 {
2620 case USE_NONLINEAR_EXPR:
2621 case USE_COMPARE:
2622 case USE_ADDRESS:
2623 /* Just add the ivs based on the value of the iv used here. */
2624 add_iv_value_candidates (data, use->iv, use);
2625 break;
2626
2627 default:
2628 gcc_unreachable ();
2629 }
2630 }
2631 }
2632
2633 /* Record important candidates and add them to related_cands bitmaps
2634 if needed. */
2635
2636 static void
2637 record_important_candidates (struct ivopts_data *data)
2638 {
2639 unsigned i;
2640 struct iv_use *use;
2641
2642 for (i = 0; i < n_iv_cands (data); i++)
2643 {
2644 struct iv_cand *cand = iv_cand (data, i);
2645
2646 if (cand->important)
2647 bitmap_set_bit (data->important_candidates, i);
2648 }
2649
2650 data->consider_all_candidates = (n_iv_cands (data)
2651 <= CONSIDER_ALL_CANDIDATES_BOUND);
2652
2653 if (data->consider_all_candidates)
2654 {
2655 /* We will not need "related_cands" bitmaps in this case,
2656 so release them to decrease peak memory consumption. */
2657 for (i = 0; i < n_iv_uses (data); i++)
2658 {
2659 use = iv_use (data, i);
2660 BITMAP_FREE (use->related_cands);
2661 }
2662 }
2663 else
2664 {
2665 /* Add important candidates to the related_cands bitmaps. */
2666 for (i = 0; i < n_iv_uses (data); i++)
2667 bitmap_ior_into (iv_use (data, i)->related_cands,
2668 data->important_candidates);
2669 }
2670 }
2671
2672 /* Allocates the data structure mapping the (use, candidate) pairs to costs.
2673 If consider_all_candidates is true, we use a two-dimensional array, otherwise
2674 we allocate a simple list to every use. */
2675
2676 static void
2677 alloc_use_cost_map (struct ivopts_data *data)
2678 {
2679 unsigned i, size, s;
2680
2681 for (i = 0; i < n_iv_uses (data); i++)
2682 {
2683 struct iv_use *use = iv_use (data, i);
2684
2685 if (data->consider_all_candidates)
2686 size = n_iv_cands (data);
2687 else
2688 {
2689 s = bitmap_count_bits (use->related_cands);
2690
2691 /* Round up to the power of two, so that moduling by it is fast. */
2692 size = s ? (1 << ceil_log2 (s)) : 1;
2693 }
2694
2695 use->n_map_members = size;
2696 use->cost_map = XCNEWVEC (struct cost_pair, size);
2697 }
2698 }
2699
2700 /* Returns description of computation cost of expression whose runtime
2701 cost is RUNTIME and complexity corresponds to COMPLEXITY. */
2702
2703 static comp_cost
2704 new_cost (unsigned runtime, unsigned complexity)
2705 {
2706 comp_cost cost;
2707
2708 cost.cost = runtime;
2709 cost.complexity = complexity;
2710
2711 return cost;
2712 }
2713
2714 /* Adds costs COST1 and COST2. */
2715
2716 static comp_cost
2717 add_costs (comp_cost cost1, comp_cost cost2)
2718 {
2719 cost1.cost += cost2.cost;
2720 cost1.complexity += cost2.complexity;
2721
2722 return cost1;
2723 }
2724 /* Subtracts costs COST1 and COST2. */
2725
2726 static comp_cost
2727 sub_costs (comp_cost cost1, comp_cost cost2)
2728 {
2729 cost1.cost -= cost2.cost;
2730 cost1.complexity -= cost2.complexity;
2731
2732 return cost1;
2733 }
2734
2735 /* Returns a negative number if COST1 < COST2, a positive number if
2736 COST1 > COST2, and 0 if COST1 = COST2. */
2737
2738 static int
2739 compare_costs (comp_cost cost1, comp_cost cost2)
2740 {
2741 if (cost1.cost == cost2.cost)
2742 return cost1.complexity - cost2.complexity;
2743
2744 return cost1.cost - cost2.cost;
2745 }
2746
2747 /* Returns true if COST is infinite. */
2748
2749 static bool
2750 infinite_cost_p (comp_cost cost)
2751 {
2752 return cost.cost == INFTY;
2753 }
2754
2755 /* Sets cost of (USE, CANDIDATE) pair to COST and record that it depends
2756 on invariants DEPENDS_ON and that the value used in expressing it
2757 is VALUE, and in case of iv elimination the comparison operator is COMP. */
2758
2759 static void
2760 set_use_iv_cost (struct ivopts_data *data,
2761 struct iv_use *use, struct iv_cand *cand,
2762 comp_cost cost, bitmap depends_on, tree value,
2763 enum tree_code comp, int inv_expr_id)
2764 {
2765 unsigned i, s;
2766
2767 if (infinite_cost_p (cost))
2768 {
2769 BITMAP_FREE (depends_on);
2770 return;
2771 }
2772
2773 if (data->consider_all_candidates)
2774 {
2775 use->cost_map[cand->id].cand = cand;
2776 use->cost_map[cand->id].cost = cost;
2777 use->cost_map[cand->id].depends_on = depends_on;
2778 use->cost_map[cand->id].value = value;
2779 use->cost_map[cand->id].comp = comp;
2780 use->cost_map[cand->id].inv_expr_id = inv_expr_id;
2781 return;
2782 }
2783
2784 /* n_map_members is a power of two, so this computes modulo. */
2785 s = cand->id & (use->n_map_members - 1);
2786 for (i = s; i < use->n_map_members; i++)
2787 if (!use->cost_map[i].cand)
2788 goto found;
2789 for (i = 0; i < s; i++)
2790 if (!use->cost_map[i].cand)
2791 goto found;
2792
2793 gcc_unreachable ();
2794
2795 found:
2796 use->cost_map[i].cand = cand;
2797 use->cost_map[i].cost = cost;
2798 use->cost_map[i].depends_on = depends_on;
2799 use->cost_map[i].value = value;
2800 use->cost_map[i].comp = comp;
2801 use->cost_map[i].inv_expr_id = inv_expr_id;
2802 }
2803
2804 /* Gets cost of (USE, CANDIDATE) pair. */
2805
2806 static struct cost_pair *
2807 get_use_iv_cost (struct ivopts_data *data, struct iv_use *use,
2808 struct iv_cand *cand)
2809 {
2810 unsigned i, s;
2811 struct cost_pair *ret;
2812
2813 if (!cand)
2814 return NULL;
2815
2816 if (data->consider_all_candidates)
2817 {
2818 ret = use->cost_map + cand->id;
2819 if (!ret->cand)
2820 return NULL;
2821
2822 return ret;
2823 }
2824
2825 /* n_map_members is a power of two, so this computes modulo. */
2826 s = cand->id & (use->n_map_members - 1);
2827 for (i = s; i < use->n_map_members; i++)
2828 if (use->cost_map[i].cand == cand)
2829 return use->cost_map + i;
2830 else if (use->cost_map[i].cand == NULL)
2831 return NULL;
2832 for (i = 0; i < s; i++)
2833 if (use->cost_map[i].cand == cand)
2834 return use->cost_map + i;
2835 else if (use->cost_map[i].cand == NULL)
2836 return NULL;
2837
2838 return NULL;
2839 }
2840
2841 /* Returns estimate on cost of computing SEQ. */
2842
2843 static unsigned
2844 seq_cost (rtx seq, bool speed)
2845 {
2846 unsigned cost = 0;
2847 rtx set;
2848
2849 for (; seq; seq = NEXT_INSN (seq))
2850 {
2851 set = single_set (seq);
2852 if (set)
2853 cost += set_src_cost (SET_SRC (set), speed);
2854 else
2855 cost++;
2856 }
2857
2858 return cost;
2859 }
2860
2861 /* Produce DECL_RTL for object obj so it looks like it is stored in memory. */
2862 static rtx
2863 produce_memory_decl_rtl (tree obj, int *regno)
2864 {
2865 addr_space_t as = TYPE_ADDR_SPACE (TREE_TYPE (obj));
2866 enum machine_mode address_mode = targetm.addr_space.address_mode (as);
2867 rtx x;
2868
2869 gcc_assert (obj);
2870 if (TREE_STATIC (obj) || DECL_EXTERNAL (obj))
2871 {
2872 const char *name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (obj));
2873 x = gen_rtx_SYMBOL_REF (address_mode, name);
2874 SET_SYMBOL_REF_DECL (x, obj);
2875 x = gen_rtx_MEM (DECL_MODE (obj), x);
2876 set_mem_addr_space (x, as);
2877 targetm.encode_section_info (obj, x, true);
2878 }
2879 else
2880 {
2881 x = gen_raw_REG (address_mode, (*regno)++);
2882 x = gen_rtx_MEM (DECL_MODE (obj), x);
2883 set_mem_addr_space (x, as);
2884 }
2885
2886 return x;
2887 }
2888
2889 /* Prepares decl_rtl for variables referred in *EXPR_P. Callback for
2890 walk_tree. DATA contains the actual fake register number. */
2891
2892 static tree
2893 prepare_decl_rtl (tree *expr_p, int *ws, void *data)
2894 {
2895 tree obj = NULL_TREE;
2896 rtx x = NULL_RTX;
2897 int *regno = (int *) data;
2898
2899 switch (TREE_CODE (*expr_p))
2900 {
2901 case ADDR_EXPR:
2902 for (expr_p = &TREE_OPERAND (*expr_p, 0);
2903 handled_component_p (*expr_p);
2904 expr_p = &TREE_OPERAND (*expr_p, 0))
2905 continue;
2906 obj = *expr_p;
2907 if (DECL_P (obj) && HAS_RTL_P (obj) && !DECL_RTL_SET_P (obj))
2908 x = produce_memory_decl_rtl (obj, regno);
2909 break;
2910
2911 case SSA_NAME:
2912 *ws = 0;
2913 obj = SSA_NAME_VAR (*expr_p);
2914 /* Defer handling of anonymous SSA_NAMEs to the expander. */
2915 if (!obj)
2916 return NULL_TREE;
2917 if (!DECL_RTL_SET_P (obj))
2918 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2919 break;
2920
2921 case VAR_DECL:
2922 case PARM_DECL:
2923 case RESULT_DECL:
2924 *ws = 0;
2925 obj = *expr_p;
2926
2927 if (DECL_RTL_SET_P (obj))
2928 break;
2929
2930 if (DECL_MODE (obj) == BLKmode)
2931 x = produce_memory_decl_rtl (obj, regno);
2932 else
2933 x = gen_raw_REG (DECL_MODE (obj), (*regno)++);
2934
2935 break;
2936
2937 default:
2938 break;
2939 }
2940
2941 if (x)
2942 {
2943 decl_rtl_to_reset.safe_push (obj);
2944 SET_DECL_RTL (obj, x);
2945 }
2946
2947 return NULL_TREE;
2948 }
2949
2950 /* Determines cost of the computation of EXPR. */
2951
2952 static unsigned
2953 computation_cost (tree expr, bool speed)
2954 {
2955 rtx seq, rslt;
2956 tree type = TREE_TYPE (expr);
2957 unsigned cost;
2958 /* Avoid using hard regs in ways which may be unsupported. */
2959 int regno = LAST_VIRTUAL_REGISTER + 1;
2960 struct cgraph_node *node = cgraph_node::get (current_function_decl);
2961 enum node_frequency real_frequency = node->frequency;
2962
2963 node->frequency = NODE_FREQUENCY_NORMAL;
2964 crtl->maybe_hot_insn_p = speed;
2965 walk_tree (&expr, prepare_decl_rtl, &regno, NULL);
2966 start_sequence ();
2967 rslt = expand_expr (expr, NULL_RTX, TYPE_MODE (type), EXPAND_NORMAL);
2968 seq = get_insns ();
2969 end_sequence ();
2970 default_rtl_profile ();
2971 node->frequency = real_frequency;
2972
2973 cost = seq_cost (seq, speed);
2974 if (MEM_P (rslt))
2975 cost += address_cost (XEXP (rslt, 0), TYPE_MODE (type),
2976 TYPE_ADDR_SPACE (type), speed);
2977 else if (!REG_P (rslt))
2978 cost += set_src_cost (rslt, speed);
2979
2980 return cost;
2981 }
2982
2983 /* Returns variable containing the value of candidate CAND at statement AT. */
2984
2985 static tree
2986 var_at_stmt (struct loop *loop, struct iv_cand *cand, gimple stmt)
2987 {
2988 if (stmt_after_increment (loop, cand, stmt))
2989 return cand->var_after;
2990 else
2991 return cand->var_before;
2992 }
2993
2994 /* If A is (TYPE) BA and B is (TYPE) BB, and the types of BA and BB have the
2995 same precision that is at least as wide as the precision of TYPE, stores
2996 BA to A and BB to B, and returns the type of BA. Otherwise, returns the
2997 type of A and B. */
2998
2999 static tree
3000 determine_common_wider_type (tree *a, tree *b)
3001 {
3002 tree wider_type = NULL;
3003 tree suba, subb;
3004 tree atype = TREE_TYPE (*a);
3005
3006 if (CONVERT_EXPR_P (*a))
3007 {
3008 suba = TREE_OPERAND (*a, 0);
3009 wider_type = TREE_TYPE (suba);
3010 if (TYPE_PRECISION (wider_type) < TYPE_PRECISION (atype))
3011 return atype;
3012 }
3013 else
3014 return atype;
3015
3016 if (CONVERT_EXPR_P (*b))
3017 {
3018 subb = TREE_OPERAND (*b, 0);
3019 if (TYPE_PRECISION (wider_type) != TYPE_PRECISION (TREE_TYPE (subb)))
3020 return atype;
3021 }
3022 else
3023 return atype;
3024
3025 *a = suba;
3026 *b = subb;
3027 return wider_type;
3028 }
3029
3030 /* Determines the expression by that USE is expressed from induction variable
3031 CAND at statement AT in LOOP. The expression is stored in a decomposed
3032 form into AFF. Returns false if USE cannot be expressed using CAND. */
3033
3034 static bool
3035 get_computation_aff (struct loop *loop,
3036 struct iv_use *use, struct iv_cand *cand, gimple at,
3037 struct aff_tree *aff)
3038 {
3039 tree ubase = use->iv->base;
3040 tree ustep = use->iv->step;
3041 tree cbase = cand->iv->base;
3042 tree cstep = cand->iv->step, cstep_common;
3043 tree utype = TREE_TYPE (ubase), ctype = TREE_TYPE (cbase);
3044 tree common_type, var;
3045 tree uutype;
3046 aff_tree cbase_aff, var_aff;
3047 widest_int rat;
3048
3049 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
3050 {
3051 /* We do not have a precision to express the values of use. */
3052 return false;
3053 }
3054
3055 var = var_at_stmt (loop, cand, at);
3056 uutype = unsigned_type_for (utype);
3057
3058 /* If the conversion is not noop, perform it. */
3059 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
3060 {
3061 cstep = fold_convert (uutype, cstep);
3062 cbase = fold_convert (uutype, cbase);
3063 var = fold_convert (uutype, var);
3064 }
3065
3066 if (!constant_multiple_of (ustep, cstep, &rat))
3067 return false;
3068
3069 /* In case both UBASE and CBASE are shortened to UUTYPE from some common
3070 type, we achieve better folding by computing their difference in this
3071 wider type, and cast the result to UUTYPE. We do not need to worry about
3072 overflows, as all the arithmetics will in the end be performed in UUTYPE
3073 anyway. */
3074 common_type = determine_common_wider_type (&ubase, &cbase);
3075
3076 /* use = ubase - ratio * cbase + ratio * var. */
3077 tree_to_aff_combination (ubase, common_type, aff);
3078 tree_to_aff_combination (cbase, common_type, &cbase_aff);
3079 tree_to_aff_combination (var, uutype, &var_aff);
3080
3081 /* We need to shift the value if we are after the increment. */
3082 if (stmt_after_increment (loop, cand, at))
3083 {
3084 aff_tree cstep_aff;
3085
3086 if (common_type != uutype)
3087 cstep_common = fold_convert (common_type, cstep);
3088 else
3089 cstep_common = cstep;
3090
3091 tree_to_aff_combination (cstep_common, common_type, &cstep_aff);
3092 aff_combination_add (&cbase_aff, &cstep_aff);
3093 }
3094
3095 aff_combination_scale (&cbase_aff, -rat);
3096 aff_combination_add (aff, &cbase_aff);
3097 if (common_type != uutype)
3098 aff_combination_convert (aff, uutype);
3099
3100 aff_combination_scale (&var_aff, rat);
3101 aff_combination_add (aff, &var_aff);
3102
3103 return true;
3104 }
3105
3106 /* Return the type of USE. */
3107
3108 static tree
3109 get_use_type (struct iv_use *use)
3110 {
3111 tree base_type = TREE_TYPE (use->iv->base);
3112 tree type;
3113
3114 if (use->type == USE_ADDRESS)
3115 {
3116 /* The base_type may be a void pointer. Create a pointer type based on
3117 the mem_ref instead. */
3118 type = build_pointer_type (TREE_TYPE (*use->op_p));
3119 gcc_assert (TYPE_ADDR_SPACE (TREE_TYPE (type))
3120 == TYPE_ADDR_SPACE (TREE_TYPE (base_type)));
3121 }
3122 else
3123 type = base_type;
3124
3125 return type;
3126 }
3127
3128 /* Determines the expression by that USE is expressed from induction variable
3129 CAND at statement AT in LOOP. The computation is unshared. */
3130
3131 static tree
3132 get_computation_at (struct loop *loop,
3133 struct iv_use *use, struct iv_cand *cand, gimple at)
3134 {
3135 aff_tree aff;
3136 tree type = get_use_type (use);
3137
3138 if (!get_computation_aff (loop, use, cand, at, &aff))
3139 return NULL_TREE;
3140 unshare_aff_combination (&aff);
3141 return fold_convert (type, aff_combination_to_tree (&aff));
3142 }
3143
3144 /* Determines the expression by that USE is expressed from induction variable
3145 CAND in LOOP. The computation is unshared. */
3146
3147 static tree
3148 get_computation (struct loop *loop, struct iv_use *use, struct iv_cand *cand)
3149 {
3150 return get_computation_at (loop, use, cand, use->stmt);
3151 }
3152
3153 /* Adjust the cost COST for being in loop setup rather than loop body.
3154 If we're optimizing for space, the loop setup overhead is constant;
3155 if we're optimizing for speed, amortize it over the per-iteration cost. */
3156 static unsigned
3157 adjust_setup_cost (struct ivopts_data *data, unsigned cost)
3158 {
3159 if (cost == INFTY)
3160 return cost;
3161 else if (optimize_loop_for_speed_p (data->current_loop))
3162 return cost / avg_loop_niter (data->current_loop);
3163 else
3164 return cost;
3165 }
3166
3167 /* Returns true if multiplying by RATIO is allowed in an address. Test the
3168 validity for a memory reference accessing memory of mode MODE in
3169 address space AS. */
3170
3171
3172 bool
3173 multiplier_allowed_in_address_p (HOST_WIDE_INT ratio, enum machine_mode mode,
3174 addr_space_t as)
3175 {
3176 #define MAX_RATIO 128
3177 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mode;
3178 static vec<sbitmap> valid_mult_list;
3179 sbitmap valid_mult;
3180
3181 if (data_index >= valid_mult_list.length ())
3182 valid_mult_list.safe_grow_cleared (data_index + 1);
3183
3184 valid_mult = valid_mult_list[data_index];
3185 if (!valid_mult)
3186 {
3187 enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3188 rtx reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3189 rtx reg2 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3190 rtx addr, scaled;
3191 HOST_WIDE_INT i;
3192
3193 valid_mult = sbitmap_alloc (2 * MAX_RATIO + 1);
3194 bitmap_clear (valid_mult);
3195 scaled = gen_rtx_fmt_ee (MULT, address_mode, reg1, NULL_RTX);
3196 addr = gen_rtx_fmt_ee (PLUS, address_mode, scaled, reg2);
3197 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3198 {
3199 XEXP (scaled, 1) = gen_int_mode (i, address_mode);
3200 if (memory_address_addr_space_p (mode, addr, as)
3201 || memory_address_addr_space_p (mode, scaled, as))
3202 bitmap_set_bit (valid_mult, i + MAX_RATIO);
3203 }
3204
3205 if (dump_file && (dump_flags & TDF_DETAILS))
3206 {
3207 fprintf (dump_file, " allowed multipliers:");
3208 for (i = -MAX_RATIO; i <= MAX_RATIO; i++)
3209 if (bitmap_bit_p (valid_mult, i + MAX_RATIO))
3210 fprintf (dump_file, " %d", (int) i);
3211 fprintf (dump_file, "\n");
3212 fprintf (dump_file, "\n");
3213 }
3214
3215 valid_mult_list[data_index] = valid_mult;
3216 }
3217
3218 if (ratio > MAX_RATIO || ratio < -MAX_RATIO)
3219 return false;
3220
3221 return bitmap_bit_p (valid_mult, ratio + MAX_RATIO);
3222 }
3223
3224 /* Returns cost of address in shape symbol + var + OFFSET + RATIO * index.
3225 If SYMBOL_PRESENT is false, symbol is omitted. If VAR_PRESENT is false,
3226 variable is omitted. Compute the cost for a memory reference that accesses
3227 a memory location of mode MEM_MODE in address space AS.
3228
3229 MAY_AUTOINC is set to true if the autoincrement (increasing index by
3230 size of MEM_MODE / RATIO) is available. To make this determination, we
3231 look at the size of the increment to be made, which is given in CSTEP.
3232 CSTEP may be zero if the step is unknown.
3233 STMT_AFTER_INC is true iff the statement we're looking at is after the
3234 increment of the original biv.
3235
3236 TODO -- there must be some better way. This all is quite crude. */
3237
3238 enum ainc_type
3239 {
3240 AINC_PRE_INC, /* Pre increment. */
3241 AINC_PRE_DEC, /* Pre decrement. */
3242 AINC_POST_INC, /* Post increment. */
3243 AINC_POST_DEC, /* Post decrement. */
3244 AINC_NONE /* Also the number of auto increment types. */
3245 };
3246
3247 typedef struct address_cost_data_s
3248 {
3249 HOST_WIDE_INT min_offset, max_offset;
3250 unsigned costs[2][2][2][2];
3251 unsigned ainc_costs[AINC_NONE];
3252 } *address_cost_data;
3253
3254
3255 static comp_cost
3256 get_address_cost (bool symbol_present, bool var_present,
3257 unsigned HOST_WIDE_INT offset, HOST_WIDE_INT ratio,
3258 HOST_WIDE_INT cstep, enum machine_mode mem_mode,
3259 addr_space_t as, bool speed,
3260 bool stmt_after_inc, bool *may_autoinc)
3261 {
3262 enum machine_mode address_mode = targetm.addr_space.address_mode (as);
3263 static vec<address_cost_data> address_cost_data_list;
3264 unsigned int data_index = (int) as * MAX_MACHINE_MODE + (int) mem_mode;
3265 address_cost_data data;
3266 static bool has_preinc[MAX_MACHINE_MODE], has_postinc[MAX_MACHINE_MODE];
3267 static bool has_predec[MAX_MACHINE_MODE], has_postdec[MAX_MACHINE_MODE];
3268 unsigned cost, acost, complexity;
3269 enum ainc_type autoinc_type;
3270 bool offset_p, ratio_p, autoinc;
3271 HOST_WIDE_INT s_offset, autoinc_offset, msize;
3272 unsigned HOST_WIDE_INT mask;
3273 unsigned bits;
3274
3275 if (data_index >= address_cost_data_list.length ())
3276 address_cost_data_list.safe_grow_cleared (data_index + 1);
3277
3278 data = address_cost_data_list[data_index];
3279 if (!data)
3280 {
3281 HOST_WIDE_INT i;
3282 HOST_WIDE_INT rat, off = 0;
3283 int old_cse_not_expected, width;
3284 unsigned sym_p, var_p, off_p, rat_p, add_c;
3285 rtx seq, addr, base;
3286 rtx reg0, reg1;
3287
3288 data = (address_cost_data) xcalloc (1, sizeof (*data));
3289
3290 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3291
3292 width = GET_MODE_BITSIZE (address_mode) - 1;
3293 if (width > (HOST_BITS_PER_WIDE_INT - 1))
3294 width = HOST_BITS_PER_WIDE_INT - 1;
3295 addr = gen_rtx_fmt_ee (PLUS, address_mode, reg1, NULL_RTX);
3296
3297 for (i = width; i >= 0; i--)
3298 {
3299 off = -((unsigned HOST_WIDE_INT) 1 << i);
3300 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3301 if (memory_address_addr_space_p (mem_mode, addr, as))
3302 break;
3303 }
3304 data->min_offset = (i == -1? 0 : off);
3305
3306 for (i = width; i >= 0; i--)
3307 {
3308 off = ((unsigned HOST_WIDE_INT) 1 << i) - 1;
3309 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3310 if (memory_address_addr_space_p (mem_mode, addr, as))
3311 break;
3312 /* For some TARGET, like ARM THUMB1, the offset should be nature
3313 aligned. Try an aligned offset if address_mode is not QImode. */
3314 off = (address_mode == QImode)
3315 ? 0
3316 : ((unsigned HOST_WIDE_INT) 1 << i)
3317 - GET_MODE_SIZE (address_mode);
3318 if (off > 0)
3319 {
3320 XEXP (addr, 1) = gen_int_mode (off, address_mode);
3321 if (memory_address_addr_space_p (mem_mode, addr, as))
3322 break;
3323 }
3324 }
3325 if (i == -1)
3326 off = 0;
3327 data->max_offset = off;
3328
3329 if (dump_file && (dump_flags & TDF_DETAILS))
3330 {
3331 fprintf (dump_file, "get_address_cost:\n");
3332 fprintf (dump_file, " min offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3333 GET_MODE_NAME (mem_mode),
3334 data->min_offset);
3335 fprintf (dump_file, " max offset %s " HOST_WIDE_INT_PRINT_DEC "\n",
3336 GET_MODE_NAME (mem_mode),
3337 data->max_offset);
3338 }
3339
3340 rat = 1;
3341 for (i = 2; i <= MAX_RATIO; i++)
3342 if (multiplier_allowed_in_address_p (i, mem_mode, as))
3343 {
3344 rat = i;
3345 break;
3346 }
3347
3348 /* Compute the cost of various addressing modes. */
3349 acost = 0;
3350 reg0 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 1);
3351 reg1 = gen_raw_REG (address_mode, LAST_VIRTUAL_REGISTER + 2);
3352
3353 if (USE_LOAD_PRE_DECREMENT (mem_mode)
3354 || USE_STORE_PRE_DECREMENT (mem_mode))
3355 {
3356 addr = gen_rtx_PRE_DEC (address_mode, reg0);
3357 has_predec[mem_mode]
3358 = memory_address_addr_space_p (mem_mode, addr, as);
3359
3360 if (has_predec[mem_mode])
3361 data->ainc_costs[AINC_PRE_DEC]
3362 = address_cost (addr, mem_mode, as, speed);
3363 }
3364 if (USE_LOAD_POST_DECREMENT (mem_mode)
3365 || USE_STORE_POST_DECREMENT (mem_mode))
3366 {
3367 addr = gen_rtx_POST_DEC (address_mode, reg0);
3368 has_postdec[mem_mode]
3369 = memory_address_addr_space_p (mem_mode, addr, as);
3370
3371 if (has_postdec[mem_mode])
3372 data->ainc_costs[AINC_POST_DEC]
3373 = address_cost (addr, mem_mode, as, speed);
3374 }
3375 if (USE_LOAD_PRE_INCREMENT (mem_mode)
3376 || USE_STORE_PRE_DECREMENT (mem_mode))
3377 {
3378 addr = gen_rtx_PRE_INC (address_mode, reg0);
3379 has_preinc[mem_mode]
3380 = memory_address_addr_space_p (mem_mode, addr, as);
3381
3382 if (has_preinc[mem_mode])
3383 data->ainc_costs[AINC_PRE_INC]
3384 = address_cost (addr, mem_mode, as, speed);
3385 }
3386 if (USE_LOAD_POST_INCREMENT (mem_mode)
3387 || USE_STORE_POST_INCREMENT (mem_mode))
3388 {
3389 addr = gen_rtx_POST_INC (address_mode, reg0);
3390 has_postinc[mem_mode]
3391 = memory_address_addr_space_p (mem_mode, addr, as);
3392
3393 if (has_postinc[mem_mode])
3394 data->ainc_costs[AINC_POST_INC]
3395 = address_cost (addr, mem_mode, as, speed);
3396 }
3397 for (i = 0; i < 16; i++)
3398 {
3399 sym_p = i & 1;
3400 var_p = (i >> 1) & 1;
3401 off_p = (i >> 2) & 1;
3402 rat_p = (i >> 3) & 1;
3403
3404 addr = reg0;
3405 if (rat_p)
3406 addr = gen_rtx_fmt_ee (MULT, address_mode, addr,
3407 gen_int_mode (rat, address_mode));
3408
3409 if (var_p)
3410 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, reg1);
3411
3412 if (sym_p)
3413 {
3414 base = gen_rtx_SYMBOL_REF (address_mode, ggc_strdup (""));
3415 /* ??? We can run into trouble with some backends by presenting
3416 it with symbols which haven't been properly passed through
3417 targetm.encode_section_info. By setting the local bit, we
3418 enhance the probability of things working. */
3419 SYMBOL_REF_FLAGS (base) = SYMBOL_FLAG_LOCAL;
3420
3421 if (off_p)
3422 base = gen_rtx_fmt_e (CONST, address_mode,
3423 gen_rtx_fmt_ee
3424 (PLUS, address_mode, base,
3425 gen_int_mode (off, address_mode)));
3426 }
3427 else if (off_p)
3428 base = gen_int_mode (off, address_mode);
3429 else
3430 base = NULL_RTX;
3431
3432 if (base)
3433 addr = gen_rtx_fmt_ee (PLUS, address_mode, addr, base);
3434
3435 start_sequence ();
3436 /* To avoid splitting addressing modes, pretend that no cse will
3437 follow. */
3438 old_cse_not_expected = cse_not_expected;
3439 cse_not_expected = true;
3440 addr = memory_address_addr_space (mem_mode, addr, as);
3441 cse_not_expected = old_cse_not_expected;
3442 seq = get_insns ();
3443 end_sequence ();
3444
3445 acost = seq_cost (seq, speed);
3446 acost += address_cost (addr, mem_mode, as, speed);
3447
3448 if (!acost)
3449 acost = 1;
3450 data->costs[sym_p][var_p][off_p][rat_p] = acost;
3451 }
3452
3453 /* On some targets, it is quite expensive to load symbol to a register,
3454 which makes addresses that contain symbols look much more expensive.
3455 However, the symbol will have to be loaded in any case before the
3456 loop (and quite likely we have it in register already), so it does not
3457 make much sense to penalize them too heavily. So make some final
3458 tweaks for the SYMBOL_PRESENT modes:
3459
3460 If VAR_PRESENT is false, and the mode obtained by changing symbol to
3461 var is cheaper, use this mode with small penalty.
3462 If VAR_PRESENT is true, try whether the mode with
3463 SYMBOL_PRESENT = false is cheaper even with cost of addition, and
3464 if this is the case, use it. */
3465 add_c = add_cost (speed, address_mode);
3466 for (i = 0; i < 8; i++)
3467 {
3468 var_p = i & 1;
3469 off_p = (i >> 1) & 1;
3470 rat_p = (i >> 2) & 1;
3471
3472 acost = data->costs[0][1][off_p][rat_p] + 1;
3473 if (var_p)
3474 acost += add_c;
3475
3476 if (acost < data->costs[1][var_p][off_p][rat_p])
3477 data->costs[1][var_p][off_p][rat_p] = acost;
3478 }
3479
3480 if (dump_file && (dump_flags & TDF_DETAILS))
3481 {
3482 fprintf (dump_file, "Address costs:\n");
3483
3484 for (i = 0; i < 16; i++)
3485 {
3486 sym_p = i & 1;
3487 var_p = (i >> 1) & 1;
3488 off_p = (i >> 2) & 1;
3489 rat_p = (i >> 3) & 1;
3490
3491 fprintf (dump_file, " ");
3492 if (sym_p)
3493 fprintf (dump_file, "sym + ");
3494 if (var_p)
3495 fprintf (dump_file, "var + ");
3496 if (off_p)
3497 fprintf (dump_file, "cst + ");
3498 if (rat_p)
3499 fprintf (dump_file, "rat * ");
3500
3501 acost = data->costs[sym_p][var_p][off_p][rat_p];
3502 fprintf (dump_file, "index costs %d\n", acost);
3503 }
3504 if (has_predec[mem_mode] || has_postdec[mem_mode]
3505 || has_preinc[mem_mode] || has_postinc[mem_mode])
3506 fprintf (dump_file, " May include autoinc/dec\n");
3507 fprintf (dump_file, "\n");
3508 }
3509
3510 address_cost_data_list[data_index] = data;
3511 }
3512
3513 bits = GET_MODE_BITSIZE (address_mode);
3514 mask = ~(~(unsigned HOST_WIDE_INT) 0 << (bits - 1) << 1);
3515 offset &= mask;
3516 if ((offset >> (bits - 1) & 1))
3517 offset |= ~mask;
3518 s_offset = offset;
3519
3520 autoinc = false;
3521 autoinc_type = AINC_NONE;
3522 msize = GET_MODE_SIZE (mem_mode);
3523 autoinc_offset = offset;
3524 if (stmt_after_inc)
3525 autoinc_offset += ratio * cstep;
3526 if (symbol_present || var_present || ratio != 1)
3527 autoinc = false;
3528 else
3529 {
3530 if (has_postinc[mem_mode] && autoinc_offset == 0
3531 && msize == cstep)
3532 autoinc_type = AINC_POST_INC;
3533 else if (has_postdec[mem_mode] && autoinc_offset == 0
3534 && msize == -cstep)
3535 autoinc_type = AINC_POST_DEC;
3536 else if (has_preinc[mem_mode] && autoinc_offset == msize
3537 && msize == cstep)
3538 autoinc_type = AINC_PRE_INC;
3539 else if (has_predec[mem_mode] && autoinc_offset == -msize
3540 && msize == -cstep)
3541 autoinc_type = AINC_PRE_DEC;
3542
3543 if (autoinc_type != AINC_NONE)
3544 autoinc = true;
3545 }
3546
3547 cost = 0;
3548 offset_p = (s_offset != 0
3549 && data->min_offset <= s_offset
3550 && s_offset <= data->max_offset);
3551 ratio_p = (ratio != 1
3552 && multiplier_allowed_in_address_p (ratio, mem_mode, as));
3553
3554 if (ratio != 1 && !ratio_p)
3555 cost += mult_by_coeff_cost (ratio, address_mode, speed);
3556
3557 if (s_offset && !offset_p && !symbol_present)
3558 cost += add_cost (speed, address_mode);
3559
3560 if (may_autoinc)
3561 *may_autoinc = autoinc;
3562 if (autoinc)
3563 acost = data->ainc_costs[autoinc_type];
3564 else
3565 acost = data->costs[symbol_present][var_present][offset_p][ratio_p];
3566 complexity = (symbol_present != 0) + (var_present != 0) + offset_p + ratio_p;
3567 return new_cost (cost + acost, complexity);
3568 }
3569
3570 /* Calculate the SPEED or size cost of shiftadd EXPR in MODE. MULT is the
3571 the EXPR operand holding the shift. COST0 and COST1 are the costs for
3572 calculating the operands of EXPR. Returns true if successful, and returns
3573 the cost in COST. */
3574
3575 static bool
3576 get_shiftadd_cost (tree expr, enum machine_mode mode, comp_cost cost0,
3577 comp_cost cost1, tree mult, bool speed, comp_cost *cost)
3578 {
3579 comp_cost res;
3580 tree op1 = TREE_OPERAND (expr, 1);
3581 tree cst = TREE_OPERAND (mult, 1);
3582 tree multop = TREE_OPERAND (mult, 0);
3583 int m = exact_log2 (int_cst_value (cst));
3584 int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
3585 int sa_cost;
3586 bool equal_p = false;
3587
3588 if (!(m >= 0 && m < maxm))
3589 return false;
3590
3591 if (operand_equal_p (op1, mult, 0))
3592 equal_p = true;
3593
3594 sa_cost = (TREE_CODE (expr) != MINUS_EXPR
3595 ? shiftadd_cost (speed, mode, m)
3596 : (equal_p
3597 ? shiftsub1_cost (speed, mode, m)
3598 : shiftsub0_cost (speed, mode, m)));
3599 res = new_cost (sa_cost, 0);
3600 res = add_costs (res, equal_p ? cost0 : cost1);
3601
3602 STRIP_NOPS (multop);
3603 if (!is_gimple_val (multop))
3604 res = add_costs (res, force_expr_to_var_cost (multop, speed));
3605
3606 *cost = res;
3607 return true;
3608 }
3609
3610 /* Estimates cost of forcing expression EXPR into a variable. */
3611
3612 static comp_cost
3613 force_expr_to_var_cost (tree expr, bool speed)
3614 {
3615 static bool costs_initialized = false;
3616 static unsigned integer_cost [2];
3617 static unsigned symbol_cost [2];
3618 static unsigned address_cost [2];
3619 tree op0, op1;
3620 comp_cost cost0, cost1, cost;
3621 enum machine_mode mode;
3622
3623 if (!costs_initialized)
3624 {
3625 tree type = build_pointer_type (integer_type_node);
3626 tree var, addr;
3627 rtx x;
3628 int i;
3629
3630 var = create_tmp_var_raw (integer_type_node, "test_var");
3631 TREE_STATIC (var) = 1;
3632 x = produce_memory_decl_rtl (var, NULL);
3633 SET_DECL_RTL (var, x);
3634
3635 addr = build1 (ADDR_EXPR, type, var);
3636
3637
3638 for (i = 0; i < 2; i++)
3639 {
3640 integer_cost[i] = computation_cost (build_int_cst (integer_type_node,
3641 2000), i);
3642
3643 symbol_cost[i] = computation_cost (addr, i) + 1;
3644
3645 address_cost[i]
3646 = computation_cost (fold_build_pointer_plus_hwi (addr, 2000), i) + 1;
3647 if (dump_file && (dump_flags & TDF_DETAILS))
3648 {
3649 fprintf (dump_file, "force_expr_to_var_cost %s costs:\n", i ? "speed" : "size");
3650 fprintf (dump_file, " integer %d\n", (int) integer_cost[i]);
3651 fprintf (dump_file, " symbol %d\n", (int) symbol_cost[i]);
3652 fprintf (dump_file, " address %d\n", (int) address_cost[i]);
3653 fprintf (dump_file, " other %d\n", (int) target_spill_cost[i]);
3654 fprintf (dump_file, "\n");
3655 }
3656 }
3657
3658 costs_initialized = true;
3659 }
3660
3661 STRIP_NOPS (expr);
3662
3663 if (SSA_VAR_P (expr))
3664 return no_cost;
3665
3666 if (is_gimple_min_invariant (expr))
3667 {
3668 if (TREE_CODE (expr) == INTEGER_CST)
3669 return new_cost (integer_cost [speed], 0);
3670
3671 if (TREE_CODE (expr) == ADDR_EXPR)
3672 {
3673 tree obj = TREE_OPERAND (expr, 0);
3674
3675 if (TREE_CODE (obj) == VAR_DECL
3676 || TREE_CODE (obj) == PARM_DECL
3677 || TREE_CODE (obj) == RESULT_DECL)
3678 return new_cost (symbol_cost [speed], 0);
3679 }
3680
3681 return new_cost (address_cost [speed], 0);
3682 }
3683
3684 switch (TREE_CODE (expr))
3685 {
3686 case POINTER_PLUS_EXPR:
3687 case PLUS_EXPR:
3688 case MINUS_EXPR:
3689 case MULT_EXPR:
3690 op0 = TREE_OPERAND (expr, 0);
3691 op1 = TREE_OPERAND (expr, 1);
3692 STRIP_NOPS (op0);
3693 STRIP_NOPS (op1);
3694 break;
3695
3696 CASE_CONVERT:
3697 case NEGATE_EXPR:
3698 op0 = TREE_OPERAND (expr, 0);
3699 STRIP_NOPS (op0);
3700 op1 = NULL_TREE;
3701 break;
3702
3703 default:
3704 /* Just an arbitrary value, FIXME. */
3705 return new_cost (target_spill_cost[speed], 0);
3706 }
3707
3708 if (op0 == NULL_TREE
3709 || TREE_CODE (op0) == SSA_NAME || CONSTANT_CLASS_P (op0))
3710 cost0 = no_cost;
3711 else
3712 cost0 = force_expr_to_var_cost (op0, speed);
3713
3714 if (op1 == NULL_TREE
3715 || TREE_CODE (op1) == SSA_NAME || CONSTANT_CLASS_P (op1))
3716 cost1 = no_cost;
3717 else
3718 cost1 = force_expr_to_var_cost (op1, speed);
3719
3720 mode = TYPE_MODE (TREE_TYPE (expr));
3721 switch (TREE_CODE (expr))
3722 {
3723 case POINTER_PLUS_EXPR:
3724 case PLUS_EXPR:
3725 case MINUS_EXPR:
3726 case NEGATE_EXPR:
3727 cost = new_cost (add_cost (speed, mode), 0);
3728 if (TREE_CODE (expr) != NEGATE_EXPR)
3729 {
3730 tree mult = NULL_TREE;
3731 comp_cost sa_cost;
3732 if (TREE_CODE (op1) == MULT_EXPR)
3733 mult = op1;
3734 else if (TREE_CODE (op0) == MULT_EXPR)
3735 mult = op0;
3736
3737 if (mult != NULL_TREE
3738 && cst_and_fits_in_hwi (TREE_OPERAND (mult, 1))
3739 && get_shiftadd_cost (expr, mode, cost0, cost1, mult,
3740 speed, &sa_cost))
3741 return sa_cost;
3742 }
3743 break;
3744
3745 CASE_CONVERT:
3746 {
3747 tree inner_mode, outer_mode;
3748 outer_mode = TREE_TYPE (expr);
3749 inner_mode = TREE_TYPE (op0);
3750 cost = new_cost (convert_cost (TYPE_MODE (outer_mode),
3751 TYPE_MODE (inner_mode), speed), 0);
3752 }
3753 break;
3754
3755 case MULT_EXPR:
3756 if (cst_and_fits_in_hwi (op0))
3757 cost = new_cost (mult_by_coeff_cost (int_cst_value (op0),
3758 mode, speed), 0);
3759 else if (cst_and_fits_in_hwi (op1))
3760 cost = new_cost (mult_by_coeff_cost (int_cst_value (op1),
3761 mode, speed), 0);
3762 else
3763 return new_cost (target_spill_cost [speed], 0);
3764 break;
3765
3766 default:
3767 gcc_unreachable ();
3768 }
3769
3770 cost = add_costs (cost, cost0);
3771 cost = add_costs (cost, cost1);
3772
3773 /* Bound the cost by target_spill_cost. The parts of complicated
3774 computations often are either loop invariant or at least can
3775 be shared between several iv uses, so letting this grow without
3776 limits would not give reasonable results. */
3777 if (cost.cost > (int) target_spill_cost [speed])
3778 cost.cost = target_spill_cost [speed];
3779
3780 return cost;
3781 }
3782
3783 /* Estimates cost of forcing EXPR into a variable. DEPENDS_ON is a set of the
3784 invariants the computation depends on. */
3785
3786 static comp_cost
3787 force_var_cost (struct ivopts_data *data,
3788 tree expr, bitmap *depends_on)
3789 {
3790 if (depends_on)
3791 {
3792 fd_ivopts_data = data;
3793 walk_tree (&expr, find_depends, depends_on, NULL);
3794 }
3795
3796 return force_expr_to_var_cost (expr, data->speed);
3797 }
3798
3799 /* Estimates cost of expressing address ADDR as var + symbol + offset. The
3800 value of offset is added to OFFSET, SYMBOL_PRESENT and VAR_PRESENT are set
3801 to false if the corresponding part is missing. DEPENDS_ON is a set of the
3802 invariants the computation depends on. */
3803
3804 static comp_cost
3805 split_address_cost (struct ivopts_data *data,
3806 tree addr, bool *symbol_present, bool *var_present,
3807 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3808 {
3809 tree core;
3810 HOST_WIDE_INT bitsize;
3811 HOST_WIDE_INT bitpos;
3812 tree toffset;
3813 enum machine_mode mode;
3814 int unsignedp, volatilep;
3815
3816 core = get_inner_reference (addr, &bitsize, &bitpos, &toffset, &mode,
3817 &unsignedp, &volatilep, false);
3818
3819 if (toffset != 0
3820 || bitpos % BITS_PER_UNIT != 0
3821 || TREE_CODE (core) != VAR_DECL)
3822 {
3823 *symbol_present = false;
3824 *var_present = true;
3825 fd_ivopts_data = data;
3826 walk_tree (&addr, find_depends, depends_on, NULL);
3827 return new_cost (target_spill_cost[data->speed], 0);
3828 }
3829
3830 *offset += bitpos / BITS_PER_UNIT;
3831 if (TREE_STATIC (core)
3832 || DECL_EXTERNAL (core))
3833 {
3834 *symbol_present = true;
3835 *var_present = false;
3836 return no_cost;
3837 }
3838
3839 *symbol_present = false;
3840 *var_present = true;
3841 return no_cost;
3842 }
3843
3844 /* Estimates cost of expressing difference of addresses E1 - E2 as
3845 var + symbol + offset. The value of offset is added to OFFSET,
3846 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3847 part is missing. DEPENDS_ON is a set of the invariants the computation
3848 depends on. */
3849
3850 static comp_cost
3851 ptr_difference_cost (struct ivopts_data *data,
3852 tree e1, tree e2, bool *symbol_present, bool *var_present,
3853 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3854 {
3855 HOST_WIDE_INT diff = 0;
3856 aff_tree aff_e1, aff_e2;
3857 tree type;
3858
3859 gcc_assert (TREE_CODE (e1) == ADDR_EXPR);
3860
3861 if (ptr_difference_const (e1, e2, &diff))
3862 {
3863 *offset += diff;
3864 *symbol_present = false;
3865 *var_present = false;
3866 return no_cost;
3867 }
3868
3869 if (integer_zerop (e2))
3870 return split_address_cost (data, TREE_OPERAND (e1, 0),
3871 symbol_present, var_present, offset, depends_on);
3872
3873 *symbol_present = false;
3874 *var_present = true;
3875
3876 type = signed_type_for (TREE_TYPE (e1));
3877 tree_to_aff_combination (e1, type, &aff_e1);
3878 tree_to_aff_combination (e2, type, &aff_e2);
3879 aff_combination_scale (&aff_e2, -1);
3880 aff_combination_add (&aff_e1, &aff_e2);
3881
3882 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3883 }
3884
3885 /* Estimates cost of expressing difference E1 - E2 as
3886 var + symbol + offset. The value of offset is added to OFFSET,
3887 SYMBOL_PRESENT and VAR_PRESENT are set to false if the corresponding
3888 part is missing. DEPENDS_ON is a set of the invariants the computation
3889 depends on. */
3890
3891 static comp_cost
3892 difference_cost (struct ivopts_data *data,
3893 tree e1, tree e2, bool *symbol_present, bool *var_present,
3894 unsigned HOST_WIDE_INT *offset, bitmap *depends_on)
3895 {
3896 enum machine_mode mode = TYPE_MODE (TREE_TYPE (e1));
3897 unsigned HOST_WIDE_INT off1, off2;
3898 aff_tree aff_e1, aff_e2;
3899 tree type;
3900
3901 e1 = strip_offset (e1, &off1);
3902 e2 = strip_offset (e2, &off2);
3903 *offset += off1 - off2;
3904
3905 STRIP_NOPS (e1);
3906 STRIP_NOPS (e2);
3907
3908 if (TREE_CODE (e1) == ADDR_EXPR)
3909 return ptr_difference_cost (data, e1, e2, symbol_present, var_present,
3910 offset, depends_on);
3911 *symbol_present = false;
3912
3913 if (operand_equal_p (e1, e2, 0))
3914 {
3915 *var_present = false;
3916 return no_cost;
3917 }
3918
3919 *var_present = true;
3920
3921 if (integer_zerop (e2))
3922 return force_var_cost (data, e1, depends_on);
3923
3924 if (integer_zerop (e1))
3925 {
3926 comp_cost cost = force_var_cost (data, e2, depends_on);
3927 cost.cost += mult_by_coeff_cost (-1, mode, data->speed);
3928 return cost;
3929 }
3930
3931 type = signed_type_for (TREE_TYPE (e1));
3932 tree_to_aff_combination (e1, type, &aff_e1);
3933 tree_to_aff_combination (e2, type, &aff_e2);
3934 aff_combination_scale (&aff_e2, -1);
3935 aff_combination_add (&aff_e1, &aff_e2);
3936
3937 return force_var_cost (data, aff_combination_to_tree (&aff_e1), depends_on);
3938 }
3939
3940 /* Returns true if AFF1 and AFF2 are identical. */
3941
3942 static bool
3943 compare_aff_trees (aff_tree *aff1, aff_tree *aff2)
3944 {
3945 unsigned i;
3946
3947 if (aff1->n != aff2->n)
3948 return false;
3949
3950 for (i = 0; i < aff1->n; i++)
3951 {
3952 if (aff1->elts[i].coef != aff2->elts[i].coef)
3953 return false;
3954
3955 if (!operand_equal_p (aff1->elts[i].val, aff2->elts[i].val, 0))
3956 return false;
3957 }
3958 return true;
3959 }
3960
3961 /* Stores EXPR in DATA->inv_expr_tab, and assigns it an inv_expr_id. */
3962
3963 static int
3964 get_expr_id (struct ivopts_data *data, tree expr)
3965 {
3966 struct iv_inv_expr_ent ent;
3967 struct iv_inv_expr_ent **slot;
3968
3969 ent.expr = expr;
3970 ent.hash = iterative_hash_expr (expr, 0);
3971 slot = data->inv_expr_tab->find_slot (&ent, INSERT);
3972 if (*slot)
3973 return (*slot)->id;
3974
3975 *slot = XNEW (struct iv_inv_expr_ent);
3976 (*slot)->expr = expr;
3977 (*slot)->hash = ent.hash;
3978 (*slot)->id = data->inv_expr_id++;
3979 return (*slot)->id;
3980 }
3981
3982 /* Returns the pseudo expr id if expression UBASE - RATIO * CBASE
3983 requires a new compiler generated temporary. Returns -1 otherwise.
3984 ADDRESS_P is a flag indicating if the expression is for address
3985 computation. */
3986
3987 static int
3988 get_loop_invariant_expr_id (struct ivopts_data *data, tree ubase,
3989 tree cbase, HOST_WIDE_INT ratio,
3990 bool address_p)
3991 {
3992 aff_tree ubase_aff, cbase_aff;
3993 tree expr, ub, cb;
3994
3995 STRIP_NOPS (ubase);
3996 STRIP_NOPS (cbase);
3997 ub = ubase;
3998 cb = cbase;
3999
4000 if ((TREE_CODE (ubase) == INTEGER_CST)
4001 && (TREE_CODE (cbase) == INTEGER_CST))
4002 return -1;
4003
4004 /* Strips the constant part. */
4005 if (TREE_CODE (ubase) == PLUS_EXPR
4006 || TREE_CODE (ubase) == MINUS_EXPR
4007 || TREE_CODE (ubase) == POINTER_PLUS_EXPR)
4008 {
4009 if (TREE_CODE (TREE_OPERAND (ubase, 1)) == INTEGER_CST)
4010 ubase = TREE_OPERAND (ubase, 0);
4011 }
4012
4013 /* Strips the constant part. */
4014 if (TREE_CODE (cbase) == PLUS_EXPR
4015 || TREE_CODE (cbase) == MINUS_EXPR
4016 || TREE_CODE (cbase) == POINTER_PLUS_EXPR)
4017 {
4018 if (TREE_CODE (TREE_OPERAND (cbase, 1)) == INTEGER_CST)
4019 cbase = TREE_OPERAND (cbase, 0);
4020 }
4021
4022 if (address_p)
4023 {
4024 if (((TREE_CODE (ubase) == SSA_NAME)
4025 || (TREE_CODE (ubase) == ADDR_EXPR
4026 && is_gimple_min_invariant (ubase)))
4027 && (TREE_CODE (cbase) == INTEGER_CST))
4028 return -1;
4029
4030 if (((TREE_CODE (cbase) == SSA_NAME)
4031 || (TREE_CODE (cbase) == ADDR_EXPR
4032 && is_gimple_min_invariant (cbase)))
4033 && (TREE_CODE (ubase) == INTEGER_CST))
4034 return -1;
4035 }
4036
4037 if (ratio == 1)
4038 {
4039 if (operand_equal_p (ubase, cbase, 0))
4040 return -1;
4041
4042 if (TREE_CODE (ubase) == ADDR_EXPR
4043 && TREE_CODE (cbase) == ADDR_EXPR)
4044 {
4045 tree usym, csym;
4046
4047 usym = TREE_OPERAND (ubase, 0);
4048 csym = TREE_OPERAND (cbase, 0);
4049 if (TREE_CODE (usym) == ARRAY_REF)
4050 {
4051 tree ind = TREE_OPERAND (usym, 1);
4052 if (TREE_CODE (ind) == INTEGER_CST
4053 && tree_fits_shwi_p (ind)
4054 && tree_to_shwi (ind) == 0)
4055 usym = TREE_OPERAND (usym, 0);
4056 }
4057 if (TREE_CODE (csym) == ARRAY_REF)
4058 {
4059 tree ind = TREE_OPERAND (csym, 1);
4060 if (TREE_CODE (ind) == INTEGER_CST
4061 && tree_fits_shwi_p (ind)
4062 && tree_to_shwi (ind) == 0)
4063 csym = TREE_OPERAND (csym, 0);
4064 }
4065 if (operand_equal_p (usym, csym, 0))
4066 return -1;
4067 }
4068 /* Now do more complex comparison */
4069 tree_to_aff_combination (ubase, TREE_TYPE (ubase), &ubase_aff);
4070 tree_to_aff_combination (cbase, TREE_TYPE (cbase), &cbase_aff);
4071 if (compare_aff_trees (&ubase_aff, &cbase_aff))
4072 return -1;
4073 }
4074
4075 tree_to_aff_combination (ub, TREE_TYPE (ub), &ubase_aff);
4076 tree_to_aff_combination (cb, TREE_TYPE (cb), &cbase_aff);
4077
4078 aff_combination_scale (&cbase_aff, -1 * ratio);
4079 aff_combination_add (&ubase_aff, &cbase_aff);
4080 expr = aff_combination_to_tree (&ubase_aff);
4081 return get_expr_id (data, expr);
4082 }
4083
4084
4085
4086 /* Determines the cost of the computation by that USE is expressed
4087 from induction variable CAND. If ADDRESS_P is true, we just need
4088 to create an address from it, otherwise we want to get it into
4089 register. A set of invariants we depend on is stored in
4090 DEPENDS_ON. AT is the statement at that the value is computed.
4091 If CAN_AUTOINC is nonnull, use it to record whether autoinc
4092 addressing is likely. */
4093
4094 static comp_cost
4095 get_computation_cost_at (struct ivopts_data *data,
4096 struct iv_use *use, struct iv_cand *cand,
4097 bool address_p, bitmap *depends_on, gimple at,
4098 bool *can_autoinc,
4099 int *inv_expr_id)
4100 {
4101 tree ubase = use->iv->base, ustep = use->iv->step;
4102 tree cbase, cstep;
4103 tree utype = TREE_TYPE (ubase), ctype;
4104 unsigned HOST_WIDE_INT cstepi, offset = 0;
4105 HOST_WIDE_INT ratio, aratio;
4106 bool var_present, symbol_present, stmt_is_after_inc;
4107 comp_cost cost;
4108 widest_int rat;
4109 bool speed = optimize_bb_for_speed_p (gimple_bb (at));
4110 enum machine_mode mem_mode = (address_p
4111 ? TYPE_MODE (TREE_TYPE (*use->op_p))
4112 : VOIDmode);
4113
4114 *depends_on = NULL;
4115
4116 /* Only consider real candidates. */
4117 if (!cand->iv)
4118 return infinite_cost;
4119
4120 cbase = cand->iv->base;
4121 cstep = cand->iv->step;
4122 ctype = TREE_TYPE (cbase);
4123
4124 if (TYPE_PRECISION (utype) > TYPE_PRECISION (ctype))
4125 {
4126 /* We do not have a precision to express the values of use. */
4127 return infinite_cost;
4128 }
4129
4130 if (address_p
4131 || (use->iv->base_object
4132 && cand->iv->base_object
4133 && POINTER_TYPE_P (TREE_TYPE (use->iv->base_object))
4134 && POINTER_TYPE_P (TREE_TYPE (cand->iv->base_object))))
4135 {
4136 /* Do not try to express address of an object with computation based
4137 on address of a different object. This may cause problems in rtl
4138 level alias analysis (that does not expect this to be happening,
4139 as this is illegal in C), and would be unlikely to be useful
4140 anyway. */
4141 if (use->iv->base_object
4142 && cand->iv->base_object
4143 && !operand_equal_p (use->iv->base_object, cand->iv->base_object, 0))
4144 return infinite_cost;
4145 }
4146
4147 if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
4148 {
4149 /* TODO -- add direct handling of this case. */
4150 goto fallback;
4151 }
4152
4153 /* CSTEPI is removed from the offset in case statement is after the
4154 increment. If the step is not constant, we use zero instead.
4155 This is a bit imprecise (there is the extra addition), but
4156 redundancy elimination is likely to transform the code so that
4157 it uses value of the variable before increment anyway,
4158 so it is not that much unrealistic. */
4159 if (cst_and_fits_in_hwi (cstep))
4160 cstepi = int_cst_value (cstep);
4161 else
4162 cstepi = 0;
4163
4164 if (!constant_multiple_of (ustep, cstep, &rat))
4165 return infinite_cost;
4166
4167 if (wi::fits_shwi_p (rat))
4168 ratio = rat.to_shwi ();
4169 else
4170 return infinite_cost;
4171
4172 STRIP_NOPS (cbase);
4173 ctype = TREE_TYPE (cbase);
4174
4175 stmt_is_after_inc = stmt_after_increment (data->current_loop, cand, at);
4176
4177 /* use = ubase + ratio * (var - cbase). If either cbase is a constant
4178 or ratio == 1, it is better to handle this like
4179
4180 ubase - ratio * cbase + ratio * var
4181
4182 (also holds in the case ratio == -1, TODO. */
4183
4184 if (cst_and_fits_in_hwi (cbase))
4185 {
4186 offset = - ratio * int_cst_value (cbase);
4187 cost = difference_cost (data,
4188 ubase, build_int_cst (utype, 0),
4189 &symbol_present, &var_present, &offset,
4190 depends_on);
4191 cost.cost /= avg_loop_niter (data->current_loop);
4192 }
4193 else if (ratio == 1)
4194 {
4195 tree real_cbase = cbase;
4196
4197 /* Check to see if any adjustment is needed. */
4198 if (cstepi == 0 && stmt_is_after_inc)
4199 {
4200 aff_tree real_cbase_aff;
4201 aff_tree cstep_aff;
4202
4203 tree_to_aff_combination (cbase, TREE_TYPE (real_cbase),
4204 &real_cbase_aff);
4205 tree_to_aff_combination (cstep, TREE_TYPE (cstep), &cstep_aff);
4206
4207 aff_combination_add (&real_cbase_aff, &cstep_aff);
4208 real_cbase = aff_combination_to_tree (&real_cbase_aff);
4209 }
4210
4211 cost = difference_cost (data,
4212 ubase, real_cbase,
4213 &symbol_present, &var_present, &offset,
4214 depends_on);
4215 cost.cost /= avg_loop_niter (data->current_loop);
4216 }
4217 else if (address_p
4218 && !POINTER_TYPE_P (ctype)
4219 && multiplier_allowed_in_address_p
4220 (ratio, mem_mode,
4221 TYPE_ADDR_SPACE (TREE_TYPE (utype))))
4222 {
4223 cbase
4224 = fold_build2 (MULT_EXPR, ctype, cbase, build_int_cst (ctype, ratio));
4225 cost = difference_cost (data,
4226 ubase, cbase,
4227 &symbol_present, &var_present, &offset,
4228 depends_on);
4229 cost.cost /= avg_loop_niter (data->current_loop);
4230 }
4231 else
4232 {
4233 cost = force_var_cost (data, cbase, depends_on);
4234 cost = add_costs (cost,
4235 difference_cost (data,
4236 ubase, build_int_cst (utype, 0),
4237 &symbol_present, &var_present,
4238 &offset, depends_on));
4239 cost.cost /= avg_loop_niter (data->current_loop);
4240 cost.cost += add_cost (data->speed, TYPE_MODE (ctype));
4241 }
4242
4243 if (inv_expr_id)
4244 {
4245 *inv_expr_id =
4246 get_loop_invariant_expr_id (data, ubase, cbase, ratio, address_p);
4247 /* Clear depends on. */
4248 if (*inv_expr_id != -1 && depends_on && *depends_on)
4249 bitmap_clear (*depends_on);
4250 }
4251
4252 /* If we are after the increment, the value of the candidate is higher by
4253 one iteration. */
4254 if (stmt_is_after_inc)
4255 offset -= ratio * cstepi;
4256
4257 /* Now the computation is in shape symbol + var1 + const + ratio * var2.
4258 (symbol/var1/const parts may be omitted). If we are looking for an
4259 address, find the cost of addressing this. */
4260 if (address_p)
4261 return add_costs (cost,
4262 get_address_cost (symbol_present, var_present,
4263 offset, ratio, cstepi,
4264 mem_mode,
4265 TYPE_ADDR_SPACE (TREE_TYPE (utype)),
4266 speed, stmt_is_after_inc,
4267 can_autoinc));
4268
4269 /* Otherwise estimate the costs for computing the expression. */
4270 if (!symbol_present && !var_present && !offset)
4271 {
4272 if (ratio != 1)
4273 cost.cost += mult_by_coeff_cost (ratio, TYPE_MODE (ctype), speed);
4274 return cost;
4275 }
4276
4277 /* Symbol + offset should be compile-time computable so consider that they
4278 are added once to the variable, if present. */
4279 if (var_present && (symbol_present || offset))
4280 cost.cost += adjust_setup_cost (data,
4281 add_cost (speed, TYPE_MODE (ctype)));
4282
4283 /* Having offset does not affect runtime cost in case it is added to
4284 symbol, but it increases complexity. */
4285 if (offset)
4286 cost.complexity++;
4287
4288 cost.cost += add_cost (speed, TYPE_MODE (ctype));
4289
4290 aratio = ratio > 0 ? ratio : -ratio;
4291 if (aratio != 1)
4292 cost.cost += mult_by_coeff_cost (aratio, TYPE_MODE (ctype), speed);
4293 return cost;
4294
4295 fallback:
4296 if (can_autoinc)
4297 *can_autoinc = false;
4298
4299 {
4300 /* Just get the expression, expand it and measure the cost. */
4301 tree comp = get_computation_at (data->current_loop, use, cand, at);
4302
4303 if (!comp)
4304 return infinite_cost;
4305
4306 if (address_p)
4307 comp = build_simple_mem_ref (comp);
4308
4309 return new_cost (computation_cost (comp, speed), 0);
4310 }
4311 }
4312
4313 /* Determines the cost of the computation by that USE is expressed
4314 from induction variable CAND. If ADDRESS_P is true, we just need
4315 to create an address from it, otherwise we want to get it into
4316 register. A set of invariants we depend on is stored in
4317 DEPENDS_ON. If CAN_AUTOINC is nonnull, use it to record whether
4318 autoinc addressing is likely. */
4319
4320 static comp_cost
4321 get_computation_cost (struct ivopts_data *data,
4322 struct iv_use *use, struct iv_cand *cand,
4323 bool address_p, bitmap *depends_on,
4324 bool *can_autoinc, int *inv_expr_id)
4325 {
4326 return get_computation_cost_at (data,
4327 use, cand, address_p, depends_on, use->stmt,
4328 can_autoinc, inv_expr_id);
4329 }
4330
4331 /* Determines cost of basing replacement of USE on CAND in a generic
4332 expression. */
4333
4334 static bool
4335 determine_use_iv_cost_generic (struct ivopts_data *data,
4336 struct iv_use *use, struct iv_cand *cand)
4337 {
4338 bitmap depends_on;
4339 comp_cost cost;
4340 int inv_expr_id = -1;
4341
4342 /* The simple case first -- if we need to express value of the preserved
4343 original biv, the cost is 0. This also prevents us from counting the
4344 cost of increment twice -- once at this use and once in the cost of
4345 the candidate. */
4346 if (cand->pos == IP_ORIGINAL
4347 && cand->incremented_at == use->stmt)
4348 {
4349 set_use_iv_cost (data, use, cand, no_cost, NULL, NULL_TREE,
4350 ERROR_MARK, -1);
4351 return true;
4352 }
4353
4354 cost = get_computation_cost (data, use, cand, false, &depends_on,
4355 NULL, &inv_expr_id);
4356
4357 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4358 inv_expr_id);
4359
4360 return !infinite_cost_p (cost);
4361 }
4362
4363 /* Determines cost of basing replacement of USE on CAND in an address. */
4364
4365 static bool
4366 determine_use_iv_cost_address (struct ivopts_data *data,
4367 struct iv_use *use, struct iv_cand *cand)
4368 {
4369 bitmap depends_on;
4370 bool can_autoinc;
4371 int inv_expr_id = -1;
4372 comp_cost cost = get_computation_cost (data, use, cand, true, &depends_on,
4373 &can_autoinc, &inv_expr_id);
4374
4375 if (cand->ainc_use == use)
4376 {
4377 if (can_autoinc)
4378 cost.cost -= cand->cost_step;
4379 /* If we generated the candidate solely for exploiting autoincrement
4380 opportunities, and it turns out it can't be used, set the cost to
4381 infinity to make sure we ignore it. */
4382 else if (cand->pos == IP_AFTER_USE || cand->pos == IP_BEFORE_USE)
4383 cost = infinite_cost;
4384 }
4385 set_use_iv_cost (data, use, cand, cost, depends_on, NULL_TREE, ERROR_MARK,
4386 inv_expr_id);
4387
4388 return !infinite_cost_p (cost);
4389 }
4390
4391 /* Computes value of candidate CAND at position AT in iteration NITER, and
4392 stores it to VAL. */
4393
4394 static void
4395 cand_value_at (struct loop *loop, struct iv_cand *cand, gimple at, tree niter,
4396 aff_tree *val)
4397 {
4398 aff_tree step, delta, nit;
4399 struct iv *iv = cand->iv;
4400 tree type = TREE_TYPE (iv->base);
4401 tree steptype = type;
4402 if (POINTER_TYPE_P (type))
4403 steptype = sizetype;
4404 steptype = unsigned_type_for (type);
4405
4406 tree_to_aff_combination (iv->step, TREE_TYPE (iv->step), &step);
4407 aff_combination_convert (&step, steptype);
4408 tree_to_aff_combination (niter, TREE_TYPE (niter), &nit);
4409 aff_combination_convert (&nit, steptype);
4410 aff_combination_mult (&nit, &step, &delta);
4411 if (stmt_after_increment (loop, cand, at))
4412 aff_combination_add (&delta, &step);
4413
4414 tree_to_aff_combination (iv->base, type, val);
4415 if (!POINTER_TYPE_P (type))
4416 aff_combination_convert (val, steptype);
4417 aff_combination_add (val, &delta);
4418 }
4419
4420 /* Returns period of induction variable iv. */
4421
4422 static tree
4423 iv_period (struct iv *iv)
4424 {
4425 tree step = iv->step, period, type;
4426 tree pow2div;
4427
4428 gcc_assert (step && TREE_CODE (step) == INTEGER_CST);
4429
4430 type = unsigned_type_for (TREE_TYPE (step));
4431 /* Period of the iv is lcm (step, type_range)/step -1,
4432 i.e., N*type_range/step - 1. Since type range is power
4433 of two, N == (step >> num_of_ending_zeros_binary (step),
4434 so the final result is
4435
4436 (type_range >> num_of_ending_zeros_binary (step)) - 1
4437
4438 */
4439 pow2div = num_ending_zeros (step);
4440
4441 period = build_low_bits_mask (type,
4442 (TYPE_PRECISION (type)
4443 - tree_to_uhwi (pow2div)));
4444
4445 return period;
4446 }
4447
4448 /* Returns the comparison operator used when eliminating the iv USE. */
4449
4450 static enum tree_code
4451 iv_elimination_compare (struct ivopts_data *data, struct iv_use *use)
4452 {
4453 struct loop *loop = data->current_loop;
4454 basic_block ex_bb;
4455 edge exit;
4456
4457 ex_bb = gimple_bb (use->stmt);
4458 exit = EDGE_SUCC (ex_bb, 0);
4459 if (flow_bb_inside_loop_p (loop, exit->dest))
4460 exit = EDGE_SUCC (ex_bb, 1);
4461
4462 return (exit->flags & EDGE_TRUE_VALUE ? EQ_EXPR : NE_EXPR);
4463 }
4464
4465 static tree
4466 strip_wrap_conserving_type_conversions (tree exp)
4467 {
4468 while (tree_ssa_useless_type_conversion (exp)
4469 && (nowrap_type_p (TREE_TYPE (exp))
4470 == nowrap_type_p (TREE_TYPE (TREE_OPERAND (exp, 0)))))
4471 exp = TREE_OPERAND (exp, 0);
4472 return exp;
4473 }
4474
4475 /* Walk the SSA form and check whether E == WHAT. Fairly simplistic, we
4476 check for an exact match. */
4477
4478 static bool
4479 expr_equal_p (tree e, tree what)
4480 {
4481 gimple stmt;
4482 enum tree_code code;
4483
4484 e = strip_wrap_conserving_type_conversions (e);
4485 what = strip_wrap_conserving_type_conversions (what);
4486
4487 code = TREE_CODE (what);
4488 if (TREE_TYPE (e) != TREE_TYPE (what))
4489 return false;
4490
4491 if (operand_equal_p (e, what, 0))
4492 return true;
4493
4494 if (TREE_CODE (e) != SSA_NAME)
4495 return false;
4496
4497 stmt = SSA_NAME_DEF_STMT (e);
4498 if (gimple_code (stmt) != GIMPLE_ASSIGN
4499 || gimple_assign_rhs_code (stmt) != code)
4500 return false;
4501
4502 switch (get_gimple_rhs_class (code))
4503 {
4504 case GIMPLE_BINARY_RHS:
4505 if (!expr_equal_p (gimple_assign_rhs2 (stmt), TREE_OPERAND (what, 1)))
4506 return false;
4507 /* Fallthru. */
4508
4509 case GIMPLE_UNARY_RHS:
4510 case GIMPLE_SINGLE_RHS:
4511 return expr_equal_p (gimple_assign_rhs1 (stmt), TREE_OPERAND (what, 0));
4512 default:
4513 return false;
4514 }
4515 }
4516
4517 /* Returns true if we can prove that BASE - OFFSET does not overflow. For now,
4518 we only detect the situation that BASE = SOMETHING + OFFSET, where the
4519 calculation is performed in non-wrapping type.
4520
4521 TODO: More generally, we could test for the situation that
4522 BASE = SOMETHING + OFFSET' and OFFSET is between OFFSET' and zero.
4523 This would require knowing the sign of OFFSET.
4524
4525 Also, we only look for the first addition in the computation of BASE.
4526 More complex analysis would be better, but introducing it just for
4527 this optimization seems like an overkill. */
4528
4529 static bool
4530 difference_cannot_overflow_p (tree base, tree offset)
4531 {
4532 enum tree_code code;
4533 tree e1, e2;
4534
4535 if (!nowrap_type_p (TREE_TYPE (base)))
4536 return false;
4537
4538 base = expand_simple_operations (base);
4539
4540 if (TREE_CODE (base) == SSA_NAME)
4541 {
4542 gimple stmt = SSA_NAME_DEF_STMT (base);
4543
4544 if (gimple_code (stmt) != GIMPLE_ASSIGN)
4545 return false;
4546
4547 code = gimple_assign_rhs_code (stmt);
4548 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4549 return false;
4550
4551 e1 = gimple_assign_rhs1 (stmt);
4552 e2 = gimple_assign_rhs2 (stmt);
4553 }
4554 else
4555 {
4556 code = TREE_CODE (base);
4557 if (get_gimple_rhs_class (code) != GIMPLE_BINARY_RHS)
4558 return false;
4559 e1 = TREE_OPERAND (base, 0);
4560 e2 = TREE_OPERAND (base, 1);
4561 }
4562
4563 /* TODO: deeper inspection may be necessary to prove the equality. */
4564 switch (code)
4565 {
4566 case PLUS_EXPR:
4567 return expr_equal_p (e1, offset) || expr_equal_p (e2, offset);
4568 case POINTER_PLUS_EXPR:
4569 return expr_equal_p (e2, offset);
4570
4571 default:
4572 return false;
4573 }
4574 }
4575
4576 /* Tries to replace loop exit by one formulated in terms of a LT_EXPR
4577 comparison with CAND. NITER describes the number of iterations of
4578 the loops. If successful, the comparison in COMP_P is altered accordingly.
4579
4580 We aim to handle the following situation:
4581
4582 sometype *base, *p;
4583 int a, b, i;
4584
4585 i = a;
4586 p = p_0 = base + a;
4587
4588 do
4589 {
4590 bla (*p);
4591 p++;
4592 i++;
4593 }
4594 while (i < b);
4595
4596 Here, the number of iterations of the loop is (a + 1 > b) ? 0 : b - a - 1.
4597 We aim to optimize this to
4598
4599 p = p_0 = base + a;
4600 do
4601 {
4602 bla (*p);
4603 p++;
4604 }
4605 while (p < p_0 - a + b);
4606
4607 This preserves the correctness, since the pointer arithmetics does not
4608 overflow. More precisely:
4609
4610 1) if a + 1 <= b, then p_0 - a + b is the final value of p, hence there is no
4611 overflow in computing it or the values of p.
4612 2) if a + 1 > b, then we need to verify that the expression p_0 - a does not
4613 overflow. To prove this, we use the fact that p_0 = base + a. */
4614
4615 static bool
4616 iv_elimination_compare_lt (struct ivopts_data *data,
4617 struct iv_cand *cand, enum tree_code *comp_p,
4618 struct tree_niter_desc *niter)
4619 {
4620 tree cand_type, a, b, mbz, nit_type = TREE_TYPE (niter->niter), offset;
4621 struct aff_tree nit, tmpa, tmpb;
4622 enum tree_code comp;
4623 HOST_WIDE_INT step;
4624
4625 /* We need to know that the candidate induction variable does not overflow.
4626 While more complex analysis may be used to prove this, for now just
4627 check that the variable appears in the original program and that it
4628 is computed in a type that guarantees no overflows. */
4629 cand_type = TREE_TYPE (cand->iv->base);
4630 if (cand->pos != IP_ORIGINAL || !nowrap_type_p (cand_type))
4631 return false;
4632
4633 /* Make sure that the loop iterates till the loop bound is hit, as otherwise
4634 the calculation of the BOUND could overflow, making the comparison
4635 invalid. */
4636 if (!data->loop_single_exit_p)
4637 return false;
4638
4639 /* We need to be able to decide whether candidate is increasing or decreasing
4640 in order to choose the right comparison operator. */
4641 if (!cst_and_fits_in_hwi (cand->iv->step))
4642 return false;
4643 step = int_cst_value (cand->iv->step);
4644
4645 /* Check that the number of iterations matches the expected pattern:
4646 a + 1 > b ? 0 : b - a - 1. */
4647 mbz = niter->may_be_zero;
4648 if (TREE_CODE (mbz) == GT_EXPR)
4649 {
4650 /* Handle a + 1 > b. */
4651 tree op0 = TREE_OPERAND (mbz, 0);
4652 if (TREE_CODE (op0) == PLUS_EXPR && integer_onep (TREE_OPERAND (op0, 1)))
4653 {
4654 a = TREE_OPERAND (op0, 0);
4655 b = TREE_OPERAND (mbz, 1);
4656 }
4657 else
4658 return false;
4659 }
4660 else if (TREE_CODE (mbz) == LT_EXPR)
4661 {
4662 tree op1 = TREE_OPERAND (mbz, 1);
4663
4664 /* Handle b < a + 1. */
4665 if (TREE_CODE (op1) == PLUS_EXPR && integer_onep (TREE_OPERAND (op1, 1)))
4666 {
4667 a = TREE_OPERAND (op1, 0);
4668 b = TREE_OPERAND (mbz, 0);
4669 }
4670 else
4671 return false;
4672 }
4673 else
4674 return false;
4675
4676 /* Expected number of iterations is B - A - 1. Check that it matches
4677 the actual number, i.e., that B - A - NITER = 1. */
4678 tree_to_aff_combination (niter->niter, nit_type, &nit);
4679 tree_to_aff_combination (fold_convert (nit_type, a), nit_type, &tmpa);
4680 tree_to_aff_combination (fold_convert (nit_type, b), nit_type, &tmpb);
4681 aff_combination_scale (&nit, -1);
4682 aff_combination_scale (&tmpa, -1);
4683 aff_combination_add (&tmpb, &tmpa);
4684 aff_combination_add (&tmpb, &nit);
4685 if (tmpb.n != 0 || tmpb.offset != 1)
4686 return false;
4687
4688 /* Finally, check that CAND->IV->BASE - CAND->IV->STEP * A does not
4689 overflow. */
4690 offset = fold_build2 (MULT_EXPR, TREE_TYPE (cand->iv->step),
4691 cand->iv->step,
4692 fold_convert (TREE_TYPE (cand->iv->step), a));
4693 if (!difference_cannot_overflow_p (cand->iv->base, offset))
4694 return false;
4695
4696 /* Determine the new comparison operator. */
4697 comp = step < 0 ? GT_EXPR : LT_EXPR;
4698 if (*comp_p == NE_EXPR)
4699 *comp_p = comp;
4700 else if (*comp_p == EQ_EXPR)
4701 *comp_p = invert_tree_comparison (comp, false);
4702 else
4703 gcc_unreachable ();
4704
4705 return true;
4706 }
4707
4708 /* Check whether it is possible to express the condition in USE by comparison
4709 of candidate CAND. If so, store the value compared with to BOUND, and the
4710 comparison operator to COMP. */
4711
4712 static bool
4713 may_eliminate_iv (struct ivopts_data *data,
4714 struct iv_use *use, struct iv_cand *cand, tree *bound,
4715 enum tree_code *comp)
4716 {
4717 basic_block ex_bb;
4718 edge exit;
4719 tree period;
4720 struct loop *loop = data->current_loop;
4721 aff_tree bnd;
4722 struct tree_niter_desc *desc = NULL;
4723
4724 if (TREE_CODE (cand->iv->step) != INTEGER_CST)
4725 return false;
4726
4727 /* For now works only for exits that dominate the loop latch.
4728 TODO: extend to other conditions inside loop body. */
4729 ex_bb = gimple_bb (use->stmt);
4730 if (use->stmt != last_stmt (ex_bb)
4731 || gimple_code (use->stmt) != GIMPLE_COND
4732 || !dominated_by_p (CDI_DOMINATORS, loop->latch, ex_bb))
4733 return false;
4734
4735 exit = EDGE_SUCC (ex_bb, 0);
4736 if (flow_bb_inside_loop_p (loop, exit->dest))
4737 exit = EDGE_SUCC (ex_bb, 1);
4738 if (flow_bb_inside_loop_p (loop, exit->dest))
4739 return false;
4740
4741 desc = niter_for_exit (data, exit);
4742 if (!desc)
4743 return false;
4744
4745 /* Determine whether we can use the variable to test the exit condition.
4746 This is the case iff the period of the induction variable is greater
4747 than the number of iterations for which the exit condition is true. */
4748 period = iv_period (cand->iv);
4749
4750 /* If the number of iterations is constant, compare against it directly. */
4751 if (TREE_CODE (desc->niter) == INTEGER_CST)
4752 {
4753 /* See cand_value_at. */
4754 if (stmt_after_increment (loop, cand, use->stmt))
4755 {
4756 if (!tree_int_cst_lt (desc->niter, period))
4757 return false;
4758 }
4759 else
4760 {
4761 if (tree_int_cst_lt (period, desc->niter))
4762 return false;
4763 }
4764 }
4765
4766 /* If not, and if this is the only possible exit of the loop, see whether
4767 we can get a conservative estimate on the number of iterations of the
4768 entire loop and compare against that instead. */
4769 else
4770 {
4771 widest_int period_value, max_niter;
4772
4773 max_niter = desc->max;
4774 if (stmt_after_increment (loop, cand, use->stmt))
4775 max_niter += 1;
4776 period_value = wi::to_widest (period);
4777 if (wi::gtu_p (max_niter, period_value))
4778 {
4779 /* See if we can take advantage of inferred loop bound information. */
4780 if (data->loop_single_exit_p)
4781 {
4782 if (!max_loop_iterations (loop, &max_niter))
4783 return false;
4784 /* The loop bound is already adjusted by adding 1. */
4785 if (wi::gtu_p (max_niter, period_value))
4786 return false;
4787 }
4788 else
4789 return false;
4790 }
4791 }
4792
4793 cand_value_at (loop, cand, use->stmt, desc->niter, &bnd);
4794
4795 *bound = fold_convert (TREE_TYPE (cand->iv->base),
4796 aff_combination_to_tree (&bnd));
4797 *comp = iv_elimination_compare (data, use);
4798
4799 /* It is unlikely that computing the number of iterations using division
4800 would be more profitable than keeping the original induction variable. */
4801 if (expression_expensive_p (*bound))
4802 return false;
4803
4804 /* Sometimes, it is possible to handle the situation that the number of
4805 iterations may be zero unless additional assumtions by using <
4806 instead of != in the exit condition.
4807
4808 TODO: we could also calculate the value MAY_BE_ZERO ? 0 : NITER and
4809 base the exit condition on it. However, that is often too
4810 expensive. */
4811 if (!integer_zerop (desc->may_be_zero))
4812 return iv_elimination_compare_lt (data, cand, comp, desc);
4813
4814 return true;
4815 }
4816
4817 /* Calculates the cost of BOUND, if it is a PARM_DECL. A PARM_DECL must
4818 be copied, if is is used in the loop body and DATA->body_includes_call. */
4819
4820 static int
4821 parm_decl_cost (struct ivopts_data *data, tree bound)
4822 {
4823 tree sbound = bound;
4824 STRIP_NOPS (sbound);
4825
4826 if (TREE_CODE (sbound) == SSA_NAME
4827 && SSA_NAME_IS_DEFAULT_DEF (sbound)
4828 && TREE_CODE (SSA_NAME_VAR (sbound)) == PARM_DECL
4829 && data->body_includes_call)
4830 return COSTS_N_INSNS (1);
4831
4832 return 0;
4833 }
4834
4835 /* Determines cost of basing replacement of USE on CAND in a condition. */
4836
4837 static bool
4838 determine_use_iv_cost_condition (struct ivopts_data *data,
4839 struct iv_use *use, struct iv_cand *cand)
4840 {
4841 tree bound = NULL_TREE;
4842 struct iv *cmp_iv;
4843 bitmap depends_on_elim = NULL, depends_on_express = NULL, depends_on;
4844 comp_cost elim_cost, express_cost, cost, bound_cost;
4845 bool ok;
4846 int elim_inv_expr_id = -1, express_inv_expr_id = -1, inv_expr_id;
4847 tree *control_var, *bound_cst;
4848 enum tree_code comp = ERROR_MARK;
4849
4850 /* Only consider real candidates. */
4851 if (!cand->iv)
4852 {
4853 set_use_iv_cost (data, use, cand, infinite_cost, NULL, NULL_TREE,
4854 ERROR_MARK, -1);
4855 return false;
4856 }
4857
4858 /* Try iv elimination. */
4859 if (may_eliminate_iv (data, use, cand, &bound, &comp))
4860 {
4861 elim_cost = force_var_cost (data, bound, &depends_on_elim);
4862 if (elim_cost.cost == 0)
4863 elim_cost.cost = parm_decl_cost (data, bound);
4864 else if (TREE_CODE (bound) == INTEGER_CST)
4865 elim_cost.cost = 0;
4866 /* If we replace a loop condition 'i < n' with 'p < base + n',
4867 depends_on_elim will have 'base' and 'n' set, which implies
4868 that both 'base' and 'n' will be live during the loop. More likely,
4869 'base + n' will be loop invariant, resulting in only one live value
4870 during the loop. So in that case we clear depends_on_elim and set
4871 elim_inv_expr_id instead. */
4872 if (depends_on_elim && bitmap_count_bits (depends_on_elim) > 1)
4873 {
4874 elim_inv_expr_id = get_expr_id (data, bound);
4875 bitmap_clear (depends_on_elim);
4876 }
4877 /* The bound is a loop invariant, so it will be only computed
4878 once. */
4879 elim_cost.cost = adjust_setup_cost (data, elim_cost.cost);
4880 }
4881 else
4882 elim_cost = infinite_cost;
4883
4884 /* Try expressing the original giv. If it is compared with an invariant,
4885 note that we cannot get rid of it. */
4886 ok = extract_cond_operands (data, use->stmt, &control_var, &bound_cst,
4887 NULL, &cmp_iv);
4888 gcc_assert (ok);
4889
4890 /* When the condition is a comparison of the candidate IV against
4891 zero, prefer this IV.
4892
4893 TODO: The constant that we're subtracting from the cost should
4894 be target-dependent. This information should be added to the
4895 target costs for each backend. */
4896 if (!infinite_cost_p (elim_cost) /* Do not try to decrease infinite! */
4897 && integer_zerop (*bound_cst)
4898 && (operand_equal_p (*control_var, cand->var_after, 0)
4899 || operand_equal_p (*control_var, cand->var_before, 0)))
4900 elim_cost.cost -= 1;
4901
4902 express_cost = get_computation_cost (data, use, cand, false,
4903 &depends_on_express, NULL,
4904 &express_inv_expr_id);
4905 fd_ivopts_data = data;
4906 walk_tree (&cmp_iv->base, find_depends, &depends_on_express, NULL);
4907
4908 /* Count the cost of the original bound as well. */
4909 bound_cost = force_var_cost (data, *bound_cst, NULL);
4910 if (bound_cost.cost == 0)
4911 bound_cost.cost = parm_decl_cost (data, *bound_cst);
4912 else if (TREE_CODE (*bound_cst) == INTEGER_CST)
4913 bound_cost.cost = 0;
4914 express_cost.cost += bound_cost.cost;
4915
4916 /* Choose the better approach, preferring the eliminated IV. */
4917 if (compare_costs (elim_cost, express_cost) <= 0)
4918 {
4919 cost = elim_cost;
4920 depends_on = depends_on_elim;
4921 depends_on_elim = NULL;
4922 inv_expr_id = elim_inv_expr_id;
4923 }
4924 else
4925 {
4926 cost = express_cost;
4927 depends_on = depends_on_express;
4928 depends_on_express = NULL;
4929 bound = NULL_TREE;
4930 comp = ERROR_MARK;
4931 inv_expr_id = express_inv_expr_id;
4932 }
4933
4934 set_use_iv_cost (data, use, cand, cost, depends_on, bound, comp, inv_expr_id);
4935
4936 if (depends_on_elim)
4937 BITMAP_FREE (depends_on_elim);
4938 if (depends_on_express)
4939 BITMAP_FREE (depends_on_express);
4940
4941 return !infinite_cost_p (cost);
4942 }
4943
4944 /* Determines cost of basing replacement of USE on CAND. Returns false
4945 if USE cannot be based on CAND. */
4946
4947 static bool
4948 determine_use_iv_cost (struct ivopts_data *data,
4949 struct iv_use *use, struct iv_cand *cand)
4950 {
4951 switch (use->type)
4952 {
4953 case USE_NONLINEAR_EXPR:
4954 return determine_use_iv_cost_generic (data, use, cand);
4955
4956 case USE_ADDRESS:
4957 return determine_use_iv_cost_address (data, use, cand);
4958
4959 case USE_COMPARE:
4960 return determine_use_iv_cost_condition (data, use, cand);
4961
4962 default:
4963 gcc_unreachable ();
4964 }
4965 }
4966
4967 /* Return true if get_computation_cost indicates that autoincrement is
4968 a possibility for the pair of USE and CAND, false otherwise. */
4969
4970 static bool
4971 autoinc_possible_for_pair (struct ivopts_data *data, struct iv_use *use,
4972 struct iv_cand *cand)
4973 {
4974 bitmap depends_on;
4975 bool can_autoinc;
4976 comp_cost cost;
4977
4978 if (use->type != USE_ADDRESS)
4979 return false;
4980
4981 cost = get_computation_cost (data, use, cand, true, &depends_on,
4982 &can_autoinc, NULL);
4983
4984 BITMAP_FREE (depends_on);
4985
4986 return !infinite_cost_p (cost) && can_autoinc;
4987 }
4988
4989 /* Examine IP_ORIGINAL candidates to see if they are incremented next to a
4990 use that allows autoincrement, and set their AINC_USE if possible. */
4991
4992 static void
4993 set_autoinc_for_original_candidates (struct ivopts_data *data)
4994 {
4995 unsigned i, j;
4996
4997 for (i = 0; i < n_iv_cands (data); i++)
4998 {
4999 struct iv_cand *cand = iv_cand (data, i);
5000 struct iv_use *closest_before = NULL;
5001 struct iv_use *closest_after = NULL;
5002 if (cand->pos != IP_ORIGINAL)
5003 continue;
5004
5005 for (j = 0; j < n_iv_uses (data); j++)
5006 {
5007 struct iv_use *use = iv_use (data, j);
5008 unsigned uid = gimple_uid (use->stmt);
5009
5010 if (gimple_bb (use->stmt) != gimple_bb (cand->incremented_at))
5011 continue;
5012
5013 if (uid < gimple_uid (cand->incremented_at)
5014 && (closest_before == NULL
5015 || uid > gimple_uid (closest_before->stmt)))
5016 closest_before = use;
5017
5018 if (uid > gimple_uid (cand->incremented_at)
5019 && (closest_after == NULL
5020 || uid < gimple_uid (closest_after->stmt)))
5021 closest_after = use;
5022 }
5023
5024 if (closest_before != NULL
5025 && autoinc_possible_for_pair (data, closest_before, cand))
5026 cand->ainc_use = closest_before;
5027 else if (closest_after != NULL
5028 && autoinc_possible_for_pair (data, closest_after, cand))
5029 cand->ainc_use = closest_after;
5030 }
5031 }
5032
5033 /* Finds the candidates for the induction variables. */
5034
5035 static void
5036 find_iv_candidates (struct ivopts_data *data)
5037 {
5038 /* Add commonly used ivs. */
5039 add_standard_iv_candidates (data);
5040
5041 /* Add old induction variables. */
5042 add_old_ivs_candidates (data);
5043
5044 /* Add induction variables derived from uses. */
5045 add_derived_ivs_candidates (data);
5046
5047 set_autoinc_for_original_candidates (data);
5048
5049 /* Record the important candidates. */
5050 record_important_candidates (data);
5051 }
5052
5053 /* Determines costs of basing the use of the iv on an iv candidate. */
5054
5055 static void
5056 determine_use_iv_costs (struct ivopts_data *data)
5057 {
5058 unsigned i, j;
5059 struct iv_use *use;
5060 struct iv_cand *cand;
5061 bitmap to_clear = BITMAP_ALLOC (NULL);
5062
5063 alloc_use_cost_map (data);
5064
5065 for (i = 0; i < n_iv_uses (data); i++)
5066 {
5067 use = iv_use (data, i);
5068
5069 if (data->consider_all_candidates)
5070 {
5071 for (j = 0; j < n_iv_cands (data); j++)
5072 {
5073 cand = iv_cand (data, j);
5074 determine_use_iv_cost (data, use, cand);
5075 }
5076 }
5077 else
5078 {
5079 bitmap_iterator bi;
5080
5081 EXECUTE_IF_SET_IN_BITMAP (use->related_cands, 0, j, bi)
5082 {
5083 cand = iv_cand (data, j);
5084 if (!determine_use_iv_cost (data, use, cand))
5085 bitmap_set_bit (to_clear, j);
5086 }
5087
5088 /* Remove the candidates for that the cost is infinite from
5089 the list of related candidates. */
5090 bitmap_and_compl_into (use->related_cands, to_clear);
5091 bitmap_clear (to_clear);
5092 }
5093 }
5094
5095 BITMAP_FREE (to_clear);
5096
5097 if (dump_file && (dump_flags & TDF_DETAILS))
5098 {
5099 fprintf (dump_file, "Use-candidate costs:\n");
5100
5101 for (i = 0; i < n_iv_uses (data); i++)
5102 {
5103 use = iv_use (data, i);
5104
5105 fprintf (dump_file, "Use %d:\n", i);
5106 fprintf (dump_file, " cand\tcost\tcompl.\tdepends on\n");
5107 for (j = 0; j < use->n_map_members; j++)
5108 {
5109 if (!use->cost_map[j].cand
5110 || infinite_cost_p (use->cost_map[j].cost))
5111 continue;
5112
5113 fprintf (dump_file, " %d\t%d\t%d\t",
5114 use->cost_map[j].cand->id,
5115 use->cost_map[j].cost.cost,
5116 use->cost_map[j].cost.complexity);
5117 if (use->cost_map[j].depends_on)
5118 bitmap_print (dump_file,
5119 use->cost_map[j].depends_on, "","");
5120 if (use->cost_map[j].inv_expr_id != -1)
5121 fprintf (dump_file, " inv_expr:%d", use->cost_map[j].inv_expr_id);
5122 fprintf (dump_file, "\n");
5123 }
5124
5125 fprintf (dump_file, "\n");
5126 }
5127 fprintf (dump_file, "\n");
5128 }
5129 }
5130
5131 /* Determines cost of the candidate CAND. */
5132
5133 static void
5134 determine_iv_cost (struct ivopts_data *data, struct iv_cand *cand)
5135 {
5136 comp_cost cost_base;
5137 unsigned cost, cost_step;
5138 tree base;
5139
5140 if (!cand->iv)
5141 {
5142 cand->cost = 0;
5143 return;
5144 }
5145
5146 /* There are two costs associated with the candidate -- its increment
5147 and its initialization. The second is almost negligible for any loop
5148 that rolls enough, so we take it just very little into account. */
5149
5150 base = cand->iv->base;
5151 cost_base = force_var_cost (data, base, NULL);
5152 /* It will be exceptional that the iv register happens to be initialized with
5153 the proper value at no cost. In general, there will at least be a regcopy
5154 or a const set. */
5155 if (cost_base.cost == 0)
5156 cost_base.cost = COSTS_N_INSNS (1);
5157 cost_step = add_cost (data->speed, TYPE_MODE (TREE_TYPE (base)));
5158
5159 cost = cost_step + adjust_setup_cost (data, cost_base.cost);
5160
5161 /* Prefer the original ivs unless we may gain something by replacing it.
5162 The reason is to make debugging simpler; so this is not relevant for
5163 artificial ivs created by other optimization passes. */
5164 if (cand->pos != IP_ORIGINAL
5165 || !SSA_NAME_VAR (cand->var_before)
5166 || DECL_ARTIFICIAL (SSA_NAME_VAR (cand->var_before)))
5167 cost++;
5168
5169 /* Prefer not to insert statements into latch unless there are some
5170 already (so that we do not create unnecessary jumps). */
5171 if (cand->pos == IP_END
5172 && empty_block_p (ip_end_pos (data->current_loop)))
5173 cost++;
5174
5175 cand->cost = cost;
5176 cand->cost_step = cost_step;
5177 }
5178
5179 /* Determines costs of computation of the candidates. */
5180
5181 static void
5182 determine_iv_costs (struct ivopts_data *data)
5183 {
5184 unsigned i;
5185
5186 if (dump_file && (dump_flags & TDF_DETAILS))
5187 {
5188 fprintf (dump_file, "Candidate costs:\n");
5189 fprintf (dump_file, " cand\tcost\n");
5190 }
5191
5192 for (i = 0; i < n_iv_cands (data); i++)
5193 {
5194 struct iv_cand *cand = iv_cand (data, i);
5195
5196 determine_iv_cost (data, cand);
5197
5198 if (dump_file && (dump_flags & TDF_DETAILS))
5199 fprintf (dump_file, " %d\t%d\n", i, cand->cost);
5200 }
5201
5202 if (dump_file && (dump_flags & TDF_DETAILS))
5203 fprintf (dump_file, "\n");
5204 }
5205
5206 /* Calculates cost for having SIZE induction variables. */
5207
5208 static unsigned
5209 ivopts_global_cost_for_size (struct ivopts_data *data, unsigned size)
5210 {
5211 /* We add size to the cost, so that we prefer eliminating ivs
5212 if possible. */
5213 return size + estimate_reg_pressure_cost (size, data->regs_used, data->speed,
5214 data->body_includes_call);
5215 }
5216
5217 /* For each size of the induction variable set determine the penalty. */
5218
5219 static void
5220 determine_set_costs (struct ivopts_data *data)
5221 {
5222 unsigned j, n;
5223 gimple phi;
5224 gimple_stmt_iterator psi;
5225 tree op;
5226 struct loop *loop = data->current_loop;
5227 bitmap_iterator bi;
5228
5229 if (dump_file && (dump_flags & TDF_DETAILS))
5230 {
5231 fprintf (dump_file, "Global costs:\n");
5232 fprintf (dump_file, " target_avail_regs %d\n", target_avail_regs);
5233 fprintf (dump_file, " target_clobbered_regs %d\n", target_clobbered_regs);
5234 fprintf (dump_file, " target_reg_cost %d\n", target_reg_cost[data->speed]);
5235 fprintf (dump_file, " target_spill_cost %d\n", target_spill_cost[data->speed]);
5236 }
5237
5238 n = 0;
5239 for (psi = gsi_start_phis (loop->header); !gsi_end_p (psi); gsi_next (&psi))
5240 {
5241 phi = gsi_stmt (psi);
5242 op = PHI_RESULT (phi);
5243
5244 if (virtual_operand_p (op))
5245 continue;
5246
5247 if (get_iv (data, op))
5248 continue;
5249
5250 n++;
5251 }
5252
5253 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
5254 {
5255 struct version_info *info = ver_info (data, j);
5256
5257 if (info->inv_id && info->has_nonlin_use)
5258 n++;
5259 }
5260
5261 data->regs_used = n;
5262 if (dump_file && (dump_flags & TDF_DETAILS))
5263 fprintf (dump_file, " regs_used %d\n", n);
5264
5265 if (dump_file && (dump_flags & TDF_DETAILS))
5266 {
5267 fprintf (dump_file, " cost for size:\n");
5268 fprintf (dump_file, " ivs\tcost\n");
5269 for (j = 0; j <= 2 * target_avail_regs; j++)
5270 fprintf (dump_file, " %d\t%d\n", j,
5271 ivopts_global_cost_for_size (data, j));
5272 fprintf (dump_file, "\n");
5273 }
5274 }
5275
5276 /* Returns true if A is a cheaper cost pair than B. */
5277
5278 static bool
5279 cheaper_cost_pair (struct cost_pair *a, struct cost_pair *b)
5280 {
5281 int cmp;
5282
5283 if (!a)
5284 return false;
5285
5286 if (!b)
5287 return true;
5288
5289 cmp = compare_costs (a->cost, b->cost);
5290 if (cmp < 0)
5291 return true;
5292
5293 if (cmp > 0)
5294 return false;
5295
5296 /* In case the costs are the same, prefer the cheaper candidate. */
5297 if (a->cand->cost < b->cand->cost)
5298 return true;
5299
5300 return false;
5301 }
5302
5303
5304 /* Returns candidate by that USE is expressed in IVS. */
5305
5306 static struct cost_pair *
5307 iv_ca_cand_for_use (struct iv_ca *ivs, struct iv_use *use)
5308 {
5309 return ivs->cand_for_use[use->id];
5310 }
5311
5312 /* Computes the cost field of IVS structure. */
5313
5314 static void
5315 iv_ca_recount_cost (struct ivopts_data *data, struct iv_ca *ivs)
5316 {
5317 comp_cost cost = ivs->cand_use_cost;
5318
5319 cost.cost += ivs->cand_cost;
5320
5321 cost.cost += ivopts_global_cost_for_size (data,
5322 ivs->n_regs + ivs->num_used_inv_expr);
5323
5324 ivs->cost = cost;
5325 }
5326
5327 /* Remove invariants in set INVS to set IVS. */
5328
5329 static void
5330 iv_ca_set_remove_invariants (struct iv_ca *ivs, bitmap invs)
5331 {
5332 bitmap_iterator bi;
5333 unsigned iid;
5334
5335 if (!invs)
5336 return;
5337
5338 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5339 {
5340 ivs->n_invariant_uses[iid]--;
5341 if (ivs->n_invariant_uses[iid] == 0)
5342 ivs->n_regs--;
5343 }
5344 }
5345
5346 /* Set USE not to be expressed by any candidate in IVS. */
5347
5348 static void
5349 iv_ca_set_no_cp (struct ivopts_data *data, struct iv_ca *ivs,
5350 struct iv_use *use)
5351 {
5352 unsigned uid = use->id, cid;
5353 struct cost_pair *cp;
5354
5355 cp = ivs->cand_for_use[uid];
5356 if (!cp)
5357 return;
5358 cid = cp->cand->id;
5359
5360 ivs->bad_uses++;
5361 ivs->cand_for_use[uid] = NULL;
5362 ivs->n_cand_uses[cid]--;
5363
5364 if (ivs->n_cand_uses[cid] == 0)
5365 {
5366 bitmap_clear_bit (ivs->cands, cid);
5367 /* Do not count the pseudocandidates. */
5368 if (cp->cand->iv)
5369 ivs->n_regs--;
5370 ivs->n_cands--;
5371 ivs->cand_cost -= cp->cand->cost;
5372
5373 iv_ca_set_remove_invariants (ivs, cp->cand->depends_on);
5374 }
5375
5376 ivs->cand_use_cost = sub_costs (ivs->cand_use_cost, cp->cost);
5377
5378 iv_ca_set_remove_invariants (ivs, cp->depends_on);
5379
5380 if (cp->inv_expr_id != -1)
5381 {
5382 ivs->used_inv_expr[cp->inv_expr_id]--;
5383 if (ivs->used_inv_expr[cp->inv_expr_id] == 0)
5384 ivs->num_used_inv_expr--;
5385 }
5386 iv_ca_recount_cost (data, ivs);
5387 }
5388
5389 /* Add invariants in set INVS to set IVS. */
5390
5391 static void
5392 iv_ca_set_add_invariants (struct iv_ca *ivs, bitmap invs)
5393 {
5394 bitmap_iterator bi;
5395 unsigned iid;
5396
5397 if (!invs)
5398 return;
5399
5400 EXECUTE_IF_SET_IN_BITMAP (invs, 0, iid, bi)
5401 {
5402 ivs->n_invariant_uses[iid]++;
5403 if (ivs->n_invariant_uses[iid] == 1)
5404 ivs->n_regs++;
5405 }
5406 }
5407
5408 /* Set cost pair for USE in set IVS to CP. */
5409
5410 static void
5411 iv_ca_set_cp (struct ivopts_data *data, struct iv_ca *ivs,
5412 struct iv_use *use, struct cost_pair *cp)
5413 {
5414 unsigned uid = use->id, cid;
5415
5416 if (ivs->cand_for_use[uid] == cp)
5417 return;
5418
5419 if (ivs->cand_for_use[uid])
5420 iv_ca_set_no_cp (data, ivs, use);
5421
5422 if (cp)
5423 {
5424 cid = cp->cand->id;
5425
5426 ivs->bad_uses--;
5427 ivs->cand_for_use[uid] = cp;
5428 ivs->n_cand_uses[cid]++;
5429 if (ivs->n_cand_uses[cid] == 1)
5430 {
5431 bitmap_set_bit (ivs->cands, cid);
5432 /* Do not count the pseudocandidates. */
5433 if (cp->cand->iv)
5434 ivs->n_regs++;
5435 ivs->n_cands++;
5436 ivs->cand_cost += cp->cand->cost;
5437
5438 iv_ca_set_add_invariants (ivs, cp->cand->depends_on);
5439 }
5440
5441 ivs->cand_use_cost = add_costs (ivs->cand_use_cost, cp->cost);
5442 iv_ca_set_add_invariants (ivs, cp->depends_on);
5443
5444 if (cp->inv_expr_id != -1)
5445 {
5446 ivs->used_inv_expr[cp->inv_expr_id]++;
5447 if (ivs->used_inv_expr[cp->inv_expr_id] == 1)
5448 ivs->num_used_inv_expr++;
5449 }
5450 iv_ca_recount_cost (data, ivs);
5451 }
5452 }
5453
5454 /* Extend set IVS by expressing USE by some of the candidates in it
5455 if possible. All important candidates will be considered
5456 if IMPORTANT_CANDIDATES is true. */
5457
5458 static void
5459 iv_ca_add_use (struct ivopts_data *data, struct iv_ca *ivs,
5460 struct iv_use *use, bool important_candidates)
5461 {
5462 struct cost_pair *best_cp = NULL, *cp;
5463 bitmap_iterator bi;
5464 bitmap cands;
5465 unsigned i;
5466
5467 gcc_assert (ivs->upto >= use->id);
5468
5469 if (ivs->upto == use->id)
5470 {
5471 ivs->upto++;
5472 ivs->bad_uses++;
5473 }
5474
5475 cands = (important_candidates ? data->important_candidates : ivs->cands);
5476 EXECUTE_IF_SET_IN_BITMAP (cands, 0, i, bi)
5477 {
5478 struct iv_cand *cand = iv_cand (data, i);
5479
5480 cp = get_use_iv_cost (data, use, cand);
5481
5482 if (cheaper_cost_pair (cp, best_cp))
5483 best_cp = cp;
5484 }
5485
5486 iv_ca_set_cp (data, ivs, use, best_cp);
5487 }
5488
5489 /* Get cost for assignment IVS. */
5490
5491 static comp_cost
5492 iv_ca_cost (struct iv_ca *ivs)
5493 {
5494 /* This was a conditional expression but it triggered a bug in
5495 Sun C 5.5. */
5496 if (ivs->bad_uses)
5497 return infinite_cost;
5498 else
5499 return ivs->cost;
5500 }
5501
5502 /* Returns true if all dependences of CP are among invariants in IVS. */
5503
5504 static bool
5505 iv_ca_has_deps (struct iv_ca *ivs, struct cost_pair *cp)
5506 {
5507 unsigned i;
5508 bitmap_iterator bi;
5509
5510 if (!cp->depends_on)
5511 return true;
5512
5513 EXECUTE_IF_SET_IN_BITMAP (cp->depends_on, 0, i, bi)
5514 {
5515 if (ivs->n_invariant_uses[i] == 0)
5516 return false;
5517 }
5518
5519 return true;
5520 }
5521
5522 /* Creates change of expressing USE by NEW_CP instead of OLD_CP and chains
5523 it before NEXT_CHANGE. */
5524
5525 static struct iv_ca_delta *
5526 iv_ca_delta_add (struct iv_use *use, struct cost_pair *old_cp,
5527 struct cost_pair *new_cp, struct iv_ca_delta *next_change)
5528 {
5529 struct iv_ca_delta *change = XNEW (struct iv_ca_delta);
5530
5531 change->use = use;
5532 change->old_cp = old_cp;
5533 change->new_cp = new_cp;
5534 change->next_change = next_change;
5535
5536 return change;
5537 }
5538
5539 /* Joins two lists of changes L1 and L2. Destructive -- old lists
5540 are rewritten. */
5541
5542 static struct iv_ca_delta *
5543 iv_ca_delta_join (struct iv_ca_delta *l1, struct iv_ca_delta *l2)
5544 {
5545 struct iv_ca_delta *last;
5546
5547 if (!l2)
5548 return l1;
5549
5550 if (!l1)
5551 return l2;
5552
5553 for (last = l1; last->next_change; last = last->next_change)
5554 continue;
5555 last->next_change = l2;
5556
5557 return l1;
5558 }
5559
5560 /* Reverse the list of changes DELTA, forming the inverse to it. */
5561
5562 static struct iv_ca_delta *
5563 iv_ca_delta_reverse (struct iv_ca_delta *delta)
5564 {
5565 struct iv_ca_delta *act, *next, *prev = NULL;
5566 struct cost_pair *tmp;
5567
5568 for (act = delta; act; act = next)
5569 {
5570 next = act->next_change;
5571 act->next_change = prev;
5572 prev = act;
5573
5574 tmp = act->old_cp;
5575 act->old_cp = act->new_cp;
5576 act->new_cp = tmp;
5577 }
5578
5579 return prev;
5580 }
5581
5582 /* Commit changes in DELTA to IVS. If FORWARD is false, the changes are
5583 reverted instead. */
5584
5585 static void
5586 iv_ca_delta_commit (struct ivopts_data *data, struct iv_ca *ivs,
5587 struct iv_ca_delta *delta, bool forward)
5588 {
5589 struct cost_pair *from, *to;
5590 struct iv_ca_delta *act;
5591
5592 if (!forward)
5593 delta = iv_ca_delta_reverse (delta);
5594
5595 for (act = delta; act; act = act->next_change)
5596 {
5597 from = act->old_cp;
5598 to = act->new_cp;
5599 gcc_assert (iv_ca_cand_for_use (ivs, act->use) == from);
5600 iv_ca_set_cp (data, ivs, act->use, to);
5601 }
5602
5603 if (!forward)
5604 iv_ca_delta_reverse (delta);
5605 }
5606
5607 /* Returns true if CAND is used in IVS. */
5608
5609 static bool
5610 iv_ca_cand_used_p (struct iv_ca *ivs, struct iv_cand *cand)
5611 {
5612 return ivs->n_cand_uses[cand->id] > 0;
5613 }
5614
5615 /* Returns number of induction variable candidates in the set IVS. */
5616
5617 static unsigned
5618 iv_ca_n_cands (struct iv_ca *ivs)
5619 {
5620 return ivs->n_cands;
5621 }
5622
5623 /* Free the list of changes DELTA. */
5624
5625 static void
5626 iv_ca_delta_free (struct iv_ca_delta **delta)
5627 {
5628 struct iv_ca_delta *act, *next;
5629
5630 for (act = *delta; act; act = next)
5631 {
5632 next = act->next_change;
5633 free (act);
5634 }
5635
5636 *delta = NULL;
5637 }
5638
5639 /* Allocates new iv candidates assignment. */
5640
5641 static struct iv_ca *
5642 iv_ca_new (struct ivopts_data *data)
5643 {
5644 struct iv_ca *nw = XNEW (struct iv_ca);
5645
5646 nw->upto = 0;
5647 nw->bad_uses = 0;
5648 nw->cand_for_use = XCNEWVEC (struct cost_pair *, n_iv_uses (data));
5649 nw->n_cand_uses = XCNEWVEC (unsigned, n_iv_cands (data));
5650 nw->cands = BITMAP_ALLOC (NULL);
5651 nw->n_cands = 0;
5652 nw->n_regs = 0;
5653 nw->cand_use_cost = no_cost;
5654 nw->cand_cost = 0;
5655 nw->n_invariant_uses = XCNEWVEC (unsigned, data->max_inv_id + 1);
5656 nw->cost = no_cost;
5657 nw->used_inv_expr = XCNEWVEC (unsigned, data->inv_expr_id + 1);
5658 nw->num_used_inv_expr = 0;
5659
5660 return nw;
5661 }
5662
5663 /* Free memory occupied by the set IVS. */
5664
5665 static void
5666 iv_ca_free (struct iv_ca **ivs)
5667 {
5668 free ((*ivs)->cand_for_use);
5669 free ((*ivs)->n_cand_uses);
5670 BITMAP_FREE ((*ivs)->cands);
5671 free ((*ivs)->n_invariant_uses);
5672 free ((*ivs)->used_inv_expr);
5673 free (*ivs);
5674 *ivs = NULL;
5675 }
5676
5677 /* Dumps IVS to FILE. */
5678
5679 static void
5680 iv_ca_dump (struct ivopts_data *data, FILE *file, struct iv_ca *ivs)
5681 {
5682 const char *pref = " invariants ";
5683 unsigned i;
5684 comp_cost cost = iv_ca_cost (ivs);
5685
5686 fprintf (file, " cost: %d (complexity %d)\n", cost.cost, cost.complexity);
5687 fprintf (file, " cand_cost: %d\n cand_use_cost: %d (complexity %d)\n",
5688 ivs->cand_cost, ivs->cand_use_cost.cost, ivs->cand_use_cost.complexity);
5689 bitmap_print (file, ivs->cands, " candidates: ","\n");
5690
5691 for (i = 0; i < ivs->upto; i++)
5692 {
5693 struct iv_use *use = iv_use (data, i);
5694 struct cost_pair *cp = iv_ca_cand_for_use (ivs, use);
5695 if (cp)
5696 fprintf (file, " use:%d --> iv_cand:%d, cost=(%d,%d)\n",
5697 use->id, cp->cand->id, cp->cost.cost, cp->cost.complexity);
5698 else
5699 fprintf (file, " use:%d --> ??\n", use->id);
5700 }
5701
5702 for (i = 1; i <= data->max_inv_id; i++)
5703 if (ivs->n_invariant_uses[i])
5704 {
5705 fprintf (file, "%s%d", pref, i);
5706 pref = ", ";
5707 }
5708 fprintf (file, "\n\n");
5709 }
5710
5711 /* Try changing candidate in IVS to CAND for each use. Return cost of the
5712 new set, and store differences in DELTA. Number of induction variables
5713 in the new set is stored to N_IVS. MIN_NCAND is a flag. When it is true
5714 the function will try to find a solution with mimimal iv candidates. */
5715
5716 static comp_cost
5717 iv_ca_extend (struct ivopts_data *data, struct iv_ca *ivs,
5718 struct iv_cand *cand, struct iv_ca_delta **delta,
5719 unsigned *n_ivs, bool min_ncand)
5720 {
5721 unsigned i;
5722 comp_cost cost;
5723 struct iv_use *use;
5724 struct cost_pair *old_cp, *new_cp;
5725
5726 *delta = NULL;
5727 for (i = 0; i < ivs->upto; i++)
5728 {
5729 use = iv_use (data, i);
5730 old_cp = iv_ca_cand_for_use (ivs, use);
5731
5732 if (old_cp
5733 && old_cp->cand == cand)
5734 continue;
5735
5736 new_cp = get_use_iv_cost (data, use, cand);
5737 if (!new_cp)
5738 continue;
5739
5740 if (!min_ncand && !iv_ca_has_deps (ivs, new_cp))
5741 continue;
5742
5743 if (!min_ncand && !cheaper_cost_pair (new_cp, old_cp))
5744 continue;
5745
5746 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5747 }
5748
5749 iv_ca_delta_commit (data, ivs, *delta, true);
5750 cost = iv_ca_cost (ivs);
5751 if (n_ivs)
5752 *n_ivs = iv_ca_n_cands (ivs);
5753 iv_ca_delta_commit (data, ivs, *delta, false);
5754
5755 return cost;
5756 }
5757
5758 /* Try narrowing set IVS by removing CAND. Return the cost of
5759 the new set and store the differences in DELTA. START is
5760 the candidate with which we start narrowing. */
5761
5762 static comp_cost
5763 iv_ca_narrow (struct ivopts_data *data, struct iv_ca *ivs,
5764 struct iv_cand *cand, struct iv_cand *start,
5765 struct iv_ca_delta **delta)
5766 {
5767 unsigned i, ci;
5768 struct iv_use *use;
5769 struct cost_pair *old_cp, *new_cp, *cp;
5770 bitmap_iterator bi;
5771 struct iv_cand *cnd;
5772 comp_cost cost, best_cost, acost;
5773
5774 *delta = NULL;
5775 for (i = 0; i < n_iv_uses (data); i++)
5776 {
5777 use = iv_use (data, i);
5778
5779 old_cp = iv_ca_cand_for_use (ivs, use);
5780 if (old_cp->cand != cand)
5781 continue;
5782
5783 best_cost = iv_ca_cost (ivs);
5784 /* Start narrowing with START. */
5785 new_cp = get_use_iv_cost (data, use, start);
5786
5787 if (data->consider_all_candidates)
5788 {
5789 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, ci, bi)
5790 {
5791 if (ci == cand->id || (start && ci == start->id))
5792 continue;
5793
5794 cnd = iv_cand (data, ci);
5795
5796 cp = get_use_iv_cost (data, use, cnd);
5797 if (!cp)
5798 continue;
5799
5800 iv_ca_set_cp (data, ivs, use, cp);
5801 acost = iv_ca_cost (ivs);
5802
5803 if (compare_costs (acost, best_cost) < 0)
5804 {
5805 best_cost = acost;
5806 new_cp = cp;
5807 }
5808 }
5809 }
5810 else
5811 {
5812 EXECUTE_IF_AND_IN_BITMAP (use->related_cands, ivs->cands, 0, ci, bi)
5813 {
5814 if (ci == cand->id || (start && ci == start->id))
5815 continue;
5816
5817 cnd = iv_cand (data, ci);
5818
5819 cp = get_use_iv_cost (data, use, cnd);
5820 if (!cp)
5821 continue;
5822
5823 iv_ca_set_cp (data, ivs, use, cp);
5824 acost = iv_ca_cost (ivs);
5825
5826 if (compare_costs (acost, best_cost) < 0)
5827 {
5828 best_cost = acost;
5829 new_cp = cp;
5830 }
5831 }
5832 }
5833 /* Restore to old cp for use. */
5834 iv_ca_set_cp (data, ivs, use, old_cp);
5835
5836 if (!new_cp)
5837 {
5838 iv_ca_delta_free (delta);
5839 return infinite_cost;
5840 }
5841
5842 *delta = iv_ca_delta_add (use, old_cp, new_cp, *delta);
5843 }
5844
5845 iv_ca_delta_commit (data, ivs, *delta, true);
5846 cost = iv_ca_cost (ivs);
5847 iv_ca_delta_commit (data, ivs, *delta, false);
5848
5849 return cost;
5850 }
5851
5852 /* Try optimizing the set of candidates IVS by removing candidates different
5853 from to EXCEPT_CAND from it. Return cost of the new set, and store
5854 differences in DELTA. */
5855
5856 static comp_cost
5857 iv_ca_prune (struct ivopts_data *data, struct iv_ca *ivs,
5858 struct iv_cand *except_cand, struct iv_ca_delta **delta)
5859 {
5860 bitmap_iterator bi;
5861 struct iv_ca_delta *act_delta, *best_delta;
5862 unsigned i;
5863 comp_cost best_cost, acost;
5864 struct iv_cand *cand;
5865
5866 best_delta = NULL;
5867 best_cost = iv_ca_cost (ivs);
5868
5869 EXECUTE_IF_SET_IN_BITMAP (ivs->cands, 0, i, bi)
5870 {
5871 cand = iv_cand (data, i);
5872
5873 if (cand == except_cand)
5874 continue;
5875
5876 acost = iv_ca_narrow (data, ivs, cand, except_cand, &act_delta);
5877
5878 if (compare_costs (acost, best_cost) < 0)
5879 {
5880 best_cost = acost;
5881 iv_ca_delta_free (&best_delta);
5882 best_delta = act_delta;
5883 }
5884 else
5885 iv_ca_delta_free (&act_delta);
5886 }
5887
5888 if (!best_delta)
5889 {
5890 *delta = NULL;
5891 return best_cost;
5892 }
5893
5894 /* Recurse to possibly remove other unnecessary ivs. */
5895 iv_ca_delta_commit (data, ivs, best_delta, true);
5896 best_cost = iv_ca_prune (data, ivs, except_cand, delta);
5897 iv_ca_delta_commit (data, ivs, best_delta, false);
5898 *delta = iv_ca_delta_join (best_delta, *delta);
5899 return best_cost;
5900 }
5901
5902 /* Tries to extend the sets IVS in the best possible way in order
5903 to express the USE. If ORIGINALP is true, prefer candidates from
5904 the original set of IVs, otherwise favor important candidates not
5905 based on any memory object. */
5906
5907 static bool
5908 try_add_cand_for (struct ivopts_data *data, struct iv_ca *ivs,
5909 struct iv_use *use, bool originalp)
5910 {
5911 comp_cost best_cost, act_cost;
5912 unsigned i;
5913 bitmap_iterator bi;
5914 struct iv_cand *cand;
5915 struct iv_ca_delta *best_delta = NULL, *act_delta;
5916 struct cost_pair *cp;
5917
5918 iv_ca_add_use (data, ivs, use, false);
5919 best_cost = iv_ca_cost (ivs);
5920
5921 cp = iv_ca_cand_for_use (ivs, use);
5922 if (!cp)
5923 {
5924 ivs->upto--;
5925 ivs->bad_uses--;
5926 iv_ca_add_use (data, ivs, use, true);
5927 best_cost = iv_ca_cost (ivs);
5928 cp = iv_ca_cand_for_use (ivs, use);
5929 }
5930 if (cp)
5931 {
5932 best_delta = iv_ca_delta_add (use, NULL, cp, NULL);
5933 iv_ca_set_no_cp (data, ivs, use);
5934 }
5935
5936 /* If ORIGINALP is true, try to find the original IV for the use. Otherwise
5937 first try important candidates not based on any memory object. Only if
5938 this fails, try the specific ones. Rationale -- in loops with many
5939 variables the best choice often is to use just one generic biv. If we
5940 added here many ivs specific to the uses, the optimization algorithm later
5941 would be likely to get stuck in a local minimum, thus causing us to create
5942 too many ivs. The approach from few ivs to more seems more likely to be
5943 successful -- starting from few ivs, replacing an expensive use by a
5944 specific iv should always be a win. */
5945 EXECUTE_IF_SET_IN_BITMAP (data->important_candidates, 0, i, bi)
5946 {
5947 cand = iv_cand (data, i);
5948
5949 if (originalp && cand->pos !=IP_ORIGINAL)
5950 continue;
5951
5952 if (!originalp && cand->iv->base_object != NULL_TREE)
5953 continue;
5954
5955 if (iv_ca_cand_used_p (ivs, cand))
5956 continue;
5957
5958 cp = get_use_iv_cost (data, use, cand);
5959 if (!cp)
5960 continue;
5961
5962 iv_ca_set_cp (data, ivs, use, cp);
5963 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL,
5964 true);
5965 iv_ca_set_no_cp (data, ivs, use);
5966 act_delta = iv_ca_delta_add (use, NULL, cp, act_delta);
5967
5968 if (compare_costs (act_cost, best_cost) < 0)
5969 {
5970 best_cost = act_cost;
5971
5972 iv_ca_delta_free (&best_delta);
5973 best_delta = act_delta;
5974 }
5975 else
5976 iv_ca_delta_free (&act_delta);
5977 }
5978
5979 if (infinite_cost_p (best_cost))
5980 {
5981 for (i = 0; i < use->n_map_members; i++)
5982 {
5983 cp = use->cost_map + i;
5984 cand = cp->cand;
5985 if (!cand)
5986 continue;
5987
5988 /* Already tried this. */
5989 if (cand->important)
5990 {
5991 if (originalp && cand->pos == IP_ORIGINAL)
5992 continue;
5993 if (!originalp && cand->iv->base_object == NULL_TREE)
5994 continue;
5995 }
5996
5997 if (iv_ca_cand_used_p (ivs, cand))
5998 continue;
5999
6000 act_delta = NULL;
6001 iv_ca_set_cp (data, ivs, use, cp);
6002 act_cost = iv_ca_extend (data, ivs, cand, &act_delta, NULL, true);
6003 iv_ca_set_no_cp (data, ivs, use);
6004 act_delta = iv_ca_delta_add (use, iv_ca_cand_for_use (ivs, use),
6005 cp, act_delta);
6006
6007 if (compare_costs (act_cost, best_cost) < 0)
6008 {
6009 best_cost = act_cost;
6010
6011 if (best_delta)
6012 iv_ca_delta_free (&best_delta);
6013 best_delta = act_delta;
6014 }
6015 else
6016 iv_ca_delta_free (&act_delta);
6017 }
6018 }
6019
6020 iv_ca_delta_commit (data, ivs, best_delta, true);
6021 iv_ca_delta_free (&best_delta);
6022
6023 return !infinite_cost_p (best_cost);
6024 }
6025
6026 /* Finds an initial assignment of candidates to uses. */
6027
6028 static struct iv_ca *
6029 get_initial_solution (struct ivopts_data *data, bool originalp)
6030 {
6031 struct iv_ca *ivs = iv_ca_new (data);
6032 unsigned i;
6033
6034 for (i = 0; i < n_iv_uses (data); i++)
6035 if (!try_add_cand_for (data, ivs, iv_use (data, i), originalp))
6036 {
6037 iv_ca_free (&ivs);
6038 return NULL;
6039 }
6040
6041 return ivs;
6042 }
6043
6044 /* Tries to improve set of induction variables IVS. */
6045
6046 static bool
6047 try_improve_iv_set (struct ivopts_data *data, struct iv_ca *ivs)
6048 {
6049 unsigned i, n_ivs;
6050 comp_cost acost, best_cost = iv_ca_cost (ivs);
6051 struct iv_ca_delta *best_delta = NULL, *act_delta, *tmp_delta;
6052 struct iv_cand *cand;
6053
6054 /* Try extending the set of induction variables by one. */
6055 for (i = 0; i < n_iv_cands (data); i++)
6056 {
6057 cand = iv_cand (data, i);
6058
6059 if (iv_ca_cand_used_p (ivs, cand))
6060 continue;
6061
6062 acost = iv_ca_extend (data, ivs, cand, &act_delta, &n_ivs, false);
6063 if (!act_delta)
6064 continue;
6065
6066 /* If we successfully added the candidate and the set is small enough,
6067 try optimizing it by removing other candidates. */
6068 if (n_ivs <= ALWAYS_PRUNE_CAND_SET_BOUND)
6069 {
6070 iv_ca_delta_commit (data, ivs, act_delta, true);
6071 acost = iv_ca_prune (data, ivs, cand, &tmp_delta);
6072 iv_ca_delta_commit (data, ivs, act_delta, false);
6073 act_delta = iv_ca_delta_join (act_delta, tmp_delta);
6074 }
6075
6076 if (compare_costs (acost, best_cost) < 0)
6077 {
6078 best_cost = acost;
6079 iv_ca_delta_free (&best_delta);
6080 best_delta = act_delta;
6081 }
6082 else
6083 iv_ca_delta_free (&act_delta);
6084 }
6085
6086 if (!best_delta)
6087 {
6088 /* Try removing the candidates from the set instead. */
6089 best_cost = iv_ca_prune (data, ivs, NULL, &best_delta);
6090
6091 /* Nothing more we can do. */
6092 if (!best_delta)
6093 return false;
6094 }
6095
6096 iv_ca_delta_commit (data, ivs, best_delta, true);
6097 gcc_assert (compare_costs (best_cost, iv_ca_cost (ivs)) == 0);
6098 iv_ca_delta_free (&best_delta);
6099 return true;
6100 }
6101
6102 /* Attempts to find the optimal set of induction variables. We do simple
6103 greedy heuristic -- we try to replace at most one candidate in the selected
6104 solution and remove the unused ivs while this improves the cost. */
6105
6106 static struct iv_ca *
6107 find_optimal_iv_set_1 (struct ivopts_data *data, bool originalp)
6108 {
6109 struct iv_ca *set;
6110
6111 /* Get the initial solution. */
6112 set = get_initial_solution (data, originalp);
6113 if (!set)
6114 {
6115 if (dump_file && (dump_flags & TDF_DETAILS))
6116 fprintf (dump_file, "Unable to substitute for ivs, failed.\n");
6117 return NULL;
6118 }
6119
6120 if (dump_file && (dump_flags & TDF_DETAILS))
6121 {
6122 fprintf (dump_file, "Initial set of candidates:\n");
6123 iv_ca_dump (data, dump_file, set);
6124 }
6125
6126 while (try_improve_iv_set (data, set))
6127 {
6128 if (dump_file && (dump_flags & TDF_DETAILS))
6129 {
6130 fprintf (dump_file, "Improved to:\n");
6131 iv_ca_dump (data, dump_file, set);
6132 }
6133 }
6134
6135 return set;
6136 }
6137
6138 static struct iv_ca *
6139 find_optimal_iv_set (struct ivopts_data *data)
6140 {
6141 unsigned i;
6142 struct iv_ca *set, *origset;
6143 struct iv_use *use;
6144 comp_cost cost, origcost;
6145
6146 /* Determine the cost based on a strategy that starts with original IVs,
6147 and try again using a strategy that prefers candidates not based
6148 on any IVs. */
6149 origset = find_optimal_iv_set_1 (data, true);
6150 set = find_optimal_iv_set_1 (data, false);
6151
6152 if (!origset && !set)
6153 return NULL;
6154
6155 origcost = origset ? iv_ca_cost (origset) : infinite_cost;
6156 cost = set ? iv_ca_cost (set) : infinite_cost;
6157
6158 if (dump_file && (dump_flags & TDF_DETAILS))
6159 {
6160 fprintf (dump_file, "Original cost %d (complexity %d)\n\n",
6161 origcost.cost, origcost.complexity);
6162 fprintf (dump_file, "Final cost %d (complexity %d)\n\n",
6163 cost.cost, cost.complexity);
6164 }
6165
6166 /* Choose the one with the best cost. */
6167 if (compare_costs (origcost, cost) <= 0)
6168 {
6169 if (set)
6170 iv_ca_free (&set);
6171 set = origset;
6172 }
6173 else if (origset)
6174 iv_ca_free (&origset);
6175
6176 for (i = 0; i < n_iv_uses (data); i++)
6177 {
6178 use = iv_use (data, i);
6179 use->selected = iv_ca_cand_for_use (set, use)->cand;
6180 }
6181
6182 return set;
6183 }
6184
6185 /* Creates a new induction variable corresponding to CAND. */
6186
6187 static void
6188 create_new_iv (struct ivopts_data *data, struct iv_cand *cand)
6189 {
6190 gimple_stmt_iterator incr_pos;
6191 tree base;
6192 bool after = false;
6193
6194 if (!cand->iv)
6195 return;
6196
6197 switch (cand->pos)
6198 {
6199 case IP_NORMAL:
6200 incr_pos = gsi_last_bb (ip_normal_pos (data->current_loop));
6201 break;
6202
6203 case IP_END:
6204 incr_pos = gsi_last_bb (ip_end_pos (data->current_loop));
6205 after = true;
6206 break;
6207
6208 case IP_AFTER_USE:
6209 after = true;
6210 /* fall through */
6211 case IP_BEFORE_USE:
6212 incr_pos = gsi_for_stmt (cand->incremented_at);
6213 break;
6214
6215 case IP_ORIGINAL:
6216 /* Mark that the iv is preserved. */
6217 name_info (data, cand->var_before)->preserve_biv = true;
6218 name_info (data, cand->var_after)->preserve_biv = true;
6219
6220 /* Rewrite the increment so that it uses var_before directly. */
6221 find_interesting_uses_op (data, cand->var_after)->selected = cand;
6222 return;
6223 }
6224
6225 gimple_add_tmp_var (cand->var_before);
6226
6227 base = unshare_expr (cand->iv->base);
6228
6229 create_iv (base, unshare_expr (cand->iv->step),
6230 cand->var_before, data->current_loop,
6231 &incr_pos, after, &cand->var_before, &cand->var_after);
6232 }
6233
6234 /* Creates new induction variables described in SET. */
6235
6236 static void
6237 create_new_ivs (struct ivopts_data *data, struct iv_ca *set)
6238 {
6239 unsigned i;
6240 struct iv_cand *cand;
6241 bitmap_iterator bi;
6242
6243 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6244 {
6245 cand = iv_cand (data, i);
6246 create_new_iv (data, cand);
6247 }
6248
6249 if (dump_file && (dump_flags & TDF_DETAILS))
6250 {
6251 fprintf (dump_file, "\nSelected IV set: \n");
6252 EXECUTE_IF_SET_IN_BITMAP (set->cands, 0, i, bi)
6253 {
6254 cand = iv_cand (data, i);
6255 dump_cand (dump_file, cand);
6256 }
6257 fprintf (dump_file, "\n");
6258 }
6259 }
6260
6261 /* Rewrites USE (definition of iv used in a nonlinear expression)
6262 using candidate CAND. */
6263
6264 static void
6265 rewrite_use_nonlinear_expr (struct ivopts_data *data,
6266 struct iv_use *use, struct iv_cand *cand)
6267 {
6268 tree comp;
6269 tree op, tgt;
6270 gimple ass;
6271 gimple_stmt_iterator bsi;
6272
6273 /* An important special case -- if we are asked to express value of
6274 the original iv by itself, just exit; there is no need to
6275 introduce a new computation (that might also need casting the
6276 variable to unsigned and back). */
6277 if (cand->pos == IP_ORIGINAL
6278 && cand->incremented_at == use->stmt)
6279 {
6280 enum tree_code stmt_code;
6281
6282 gcc_assert (is_gimple_assign (use->stmt));
6283 gcc_assert (gimple_assign_lhs (use->stmt) == cand->var_after);
6284
6285 /* Check whether we may leave the computation unchanged.
6286 This is the case only if it does not rely on other
6287 computations in the loop -- otherwise, the computation
6288 we rely upon may be removed in remove_unused_ivs,
6289 thus leading to ICE. */
6290 stmt_code = gimple_assign_rhs_code (use->stmt);
6291 if (stmt_code == PLUS_EXPR
6292 || stmt_code == MINUS_EXPR
6293 || stmt_code == POINTER_PLUS_EXPR)
6294 {
6295 if (gimple_assign_rhs1 (use->stmt) == cand->var_before)
6296 op = gimple_assign_rhs2 (use->stmt);
6297 else if (gimple_assign_rhs2 (use->stmt) == cand->var_before)
6298 op = gimple_assign_rhs1 (use->stmt);
6299 else
6300 op = NULL_TREE;
6301 }
6302 else
6303 op = NULL_TREE;
6304
6305 if (op && expr_invariant_in_loop_p (data->current_loop, op))
6306 return;
6307 }
6308
6309 comp = get_computation (data->current_loop, use, cand);
6310 gcc_assert (comp != NULL_TREE);
6311
6312 switch (gimple_code (use->stmt))
6313 {
6314 case GIMPLE_PHI:
6315 tgt = PHI_RESULT (use->stmt);
6316
6317 /* If we should keep the biv, do not replace it. */
6318 if (name_info (data, tgt)->preserve_biv)
6319 return;
6320
6321 bsi = gsi_after_labels (gimple_bb (use->stmt));
6322 break;
6323
6324 case GIMPLE_ASSIGN:
6325 tgt = gimple_assign_lhs (use->stmt);
6326 bsi = gsi_for_stmt (use->stmt);
6327 break;
6328
6329 default:
6330 gcc_unreachable ();
6331 }
6332
6333 if (!valid_gimple_rhs_p (comp)
6334 || (gimple_code (use->stmt) != GIMPLE_PHI
6335 /* We can't allow re-allocating the stmt as it might be pointed
6336 to still. */
6337 && (get_gimple_rhs_num_ops (TREE_CODE (comp))
6338 >= gimple_num_ops (gsi_stmt (bsi)))))
6339 {
6340 comp = force_gimple_operand_gsi (&bsi, comp, true, NULL_TREE,
6341 true, GSI_SAME_STMT);
6342 if (POINTER_TYPE_P (TREE_TYPE (tgt)))
6343 {
6344 duplicate_ssa_name_ptr_info (comp, SSA_NAME_PTR_INFO (tgt));
6345 /* As this isn't a plain copy we have to reset alignment
6346 information. */
6347 if (SSA_NAME_PTR_INFO (comp))
6348 mark_ptr_info_alignment_unknown (SSA_NAME_PTR_INFO (comp));
6349 }
6350 }
6351
6352 if (gimple_code (use->stmt) == GIMPLE_PHI)
6353 {
6354 ass = gimple_build_assign (tgt, comp);
6355 gsi_insert_before (&bsi, ass, GSI_SAME_STMT);
6356
6357 bsi = gsi_for_stmt (use->stmt);
6358 remove_phi_node (&bsi, false);
6359 }
6360 else
6361 {
6362 gimple_assign_set_rhs_from_tree (&bsi, comp);
6363 use->stmt = gsi_stmt (bsi);
6364 }
6365 }
6366
6367 /* Performs a peephole optimization to reorder the iv update statement with
6368 a mem ref to enable instruction combining in later phases. The mem ref uses
6369 the iv value before the update, so the reordering transformation requires
6370 adjustment of the offset. CAND is the selected IV_CAND.
6371
6372 Example:
6373
6374 t = MEM_REF (base, iv1, 8, 16); // base, index, stride, offset
6375 iv2 = iv1 + 1;
6376
6377 if (t < val) (1)
6378 goto L;
6379 goto Head;
6380
6381
6382 directly propagating t over to (1) will introduce overlapping live range
6383 thus increase register pressure. This peephole transform it into:
6384
6385
6386 iv2 = iv1 + 1;
6387 t = MEM_REF (base, iv2, 8, 8);
6388 if (t < val)
6389 goto L;
6390 goto Head;
6391 */
6392
6393 static void
6394 adjust_iv_update_pos (struct iv_cand *cand, struct iv_use *use)
6395 {
6396 tree var_after;
6397 gimple iv_update, stmt;
6398 basic_block bb;
6399 gimple_stmt_iterator gsi, gsi_iv;
6400
6401 if (cand->pos != IP_NORMAL)
6402 return;
6403
6404 var_after = cand->var_after;
6405 iv_update = SSA_NAME_DEF_STMT (var_after);
6406
6407 bb = gimple_bb (iv_update);
6408 gsi = gsi_last_nondebug_bb (bb);
6409 stmt = gsi_stmt (gsi);
6410
6411 /* Only handle conditional statement for now. */
6412 if (gimple_code (stmt) != GIMPLE_COND)
6413 return;
6414
6415 gsi_prev_nondebug (&gsi);
6416 stmt = gsi_stmt (gsi);
6417 if (stmt != iv_update)
6418 return;
6419
6420 gsi_prev_nondebug (&gsi);
6421 if (gsi_end_p (gsi))
6422 return;
6423
6424 stmt = gsi_stmt (gsi);
6425 if (gimple_code (stmt) != GIMPLE_ASSIGN)
6426 return;
6427
6428 if (stmt != use->stmt)
6429 return;
6430
6431 if (TREE_CODE (gimple_assign_lhs (stmt)) != SSA_NAME)
6432 return;
6433
6434 if (dump_file && (dump_flags & TDF_DETAILS))
6435 {
6436 fprintf (dump_file, "Reordering \n");
6437 print_gimple_stmt (dump_file, iv_update, 0, 0);
6438 print_gimple_stmt (dump_file, use->stmt, 0, 0);
6439 fprintf (dump_file, "\n");
6440 }
6441
6442 gsi = gsi_for_stmt (use->stmt);
6443 gsi_iv = gsi_for_stmt (iv_update);
6444 gsi_move_before (&gsi_iv, &gsi);
6445
6446 cand->pos = IP_BEFORE_USE;
6447 cand->incremented_at = use->stmt;
6448 }
6449
6450 /* Rewrites USE (address that is an iv) using candidate CAND. */
6451
6452 static void
6453 rewrite_use_address (struct ivopts_data *data,
6454 struct iv_use *use, struct iv_cand *cand)
6455 {
6456 aff_tree aff;
6457 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6458 tree base_hint = NULL_TREE;
6459 tree ref, iv;
6460 bool ok;
6461
6462 adjust_iv_update_pos (cand, use);
6463 ok = get_computation_aff (data->current_loop, use, cand, use->stmt, &aff);
6464 gcc_assert (ok);
6465 unshare_aff_combination (&aff);
6466
6467 /* To avoid undefined overflow problems, all IV candidates use unsigned
6468 integer types. The drawback is that this makes it impossible for
6469 create_mem_ref to distinguish an IV that is based on a memory object
6470 from one that represents simply an offset.
6471
6472 To work around this problem, we pass a hint to create_mem_ref that
6473 indicates which variable (if any) in aff is an IV based on a memory
6474 object. Note that we only consider the candidate. If this is not
6475 based on an object, the base of the reference is in some subexpression
6476 of the use -- but these will use pointer types, so they are recognized
6477 by the create_mem_ref heuristics anyway. */
6478 if (cand->iv->base_object)
6479 base_hint = var_at_stmt (data->current_loop, cand, use->stmt);
6480
6481 iv = var_at_stmt (data->current_loop, cand, use->stmt);
6482 ref = create_mem_ref (&bsi, TREE_TYPE (*use->op_p), &aff,
6483 reference_alias_ptr_type (*use->op_p),
6484 iv, base_hint, data->speed);
6485 copy_ref_info (ref, *use->op_p);
6486 *use->op_p = ref;
6487 }
6488
6489 /* Rewrites USE (the condition such that one of the arguments is an iv) using
6490 candidate CAND. */
6491
6492 static void
6493 rewrite_use_compare (struct ivopts_data *data,
6494 struct iv_use *use, struct iv_cand *cand)
6495 {
6496 tree comp, *var_p, op, bound;
6497 gimple_stmt_iterator bsi = gsi_for_stmt (use->stmt);
6498 enum tree_code compare;
6499 struct cost_pair *cp = get_use_iv_cost (data, use, cand);
6500 bool ok;
6501
6502 bound = cp->value;
6503 if (bound)
6504 {
6505 tree var = var_at_stmt (data->current_loop, cand, use->stmt);
6506 tree var_type = TREE_TYPE (var);
6507 gimple_seq stmts;
6508
6509 if (dump_file && (dump_flags & TDF_DETAILS))
6510 {
6511 fprintf (dump_file, "Replacing exit test: ");
6512 print_gimple_stmt (dump_file, use->stmt, 0, TDF_SLIM);
6513 }
6514 compare = cp->comp;
6515 bound = unshare_expr (fold_convert (var_type, bound));
6516 op = force_gimple_operand (bound, &stmts, true, NULL_TREE);
6517 if (stmts)
6518 gsi_insert_seq_on_edge_immediate (
6519 loop_preheader_edge (data->current_loop),
6520 stmts);
6521
6522 gimple_cond_set_lhs (use->stmt, var);
6523 gimple_cond_set_code (use->stmt, compare);
6524 gimple_cond_set_rhs (use->stmt, op);
6525 return;
6526 }
6527
6528 /* The induction variable elimination failed; just express the original
6529 giv. */
6530 comp = get_computation (data->current_loop, use, cand);
6531 gcc_assert (comp != NULL_TREE);
6532
6533 ok = extract_cond_operands (data, use->stmt, &var_p, NULL, NULL, NULL);
6534 gcc_assert (ok);
6535
6536 *var_p = force_gimple_operand_gsi (&bsi, comp, true, SSA_NAME_VAR (*var_p),
6537 true, GSI_SAME_STMT);
6538 }
6539
6540 /* Rewrites USE using candidate CAND. */
6541
6542 static void
6543 rewrite_use (struct ivopts_data *data, struct iv_use *use, struct iv_cand *cand)
6544 {
6545 switch (use->type)
6546 {
6547 case USE_NONLINEAR_EXPR:
6548 rewrite_use_nonlinear_expr (data, use, cand);
6549 break;
6550
6551 case USE_ADDRESS:
6552 rewrite_use_address (data, use, cand);
6553 break;
6554
6555 case USE_COMPARE:
6556 rewrite_use_compare (data, use, cand);
6557 break;
6558
6559 default:
6560 gcc_unreachable ();
6561 }
6562
6563 update_stmt (use->stmt);
6564 }
6565
6566 /* Rewrite the uses using the selected induction variables. */
6567
6568 static void
6569 rewrite_uses (struct ivopts_data *data)
6570 {
6571 unsigned i;
6572 struct iv_cand *cand;
6573 struct iv_use *use;
6574
6575 for (i = 0; i < n_iv_uses (data); i++)
6576 {
6577 use = iv_use (data, i);
6578 cand = use->selected;
6579 gcc_assert (cand);
6580
6581 rewrite_use (data, use, cand);
6582 }
6583 }
6584
6585 /* Removes the ivs that are not used after rewriting. */
6586
6587 static void
6588 remove_unused_ivs (struct ivopts_data *data)
6589 {
6590 unsigned j;
6591 bitmap_iterator bi;
6592 bitmap toremove = BITMAP_ALLOC (NULL);
6593
6594 /* Figure out an order in which to release SSA DEFs so that we don't
6595 release something that we'd have to propagate into a debug stmt
6596 afterwards. */
6597 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, j, bi)
6598 {
6599 struct version_info *info;
6600
6601 info = ver_info (data, j);
6602 if (info->iv
6603 && !integer_zerop (info->iv->step)
6604 && !info->inv_id
6605 && !info->iv->have_use_for
6606 && !info->preserve_biv)
6607 {
6608 bitmap_set_bit (toremove, SSA_NAME_VERSION (info->iv->ssa_name));
6609
6610 tree def = info->iv->ssa_name;
6611
6612 if (MAY_HAVE_DEBUG_STMTS && SSA_NAME_DEF_STMT (def))
6613 {
6614 imm_use_iterator imm_iter;
6615 use_operand_p use_p;
6616 gimple stmt;
6617 int count = 0;
6618
6619 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
6620 {
6621 if (!gimple_debug_bind_p (stmt))
6622 continue;
6623
6624 /* We just want to determine whether to do nothing
6625 (count == 0), to substitute the computed
6626 expression into a single use of the SSA DEF by
6627 itself (count == 1), or to use a debug temp
6628 because the SSA DEF is used multiple times or as
6629 part of a larger expression (count > 1). */
6630 count++;
6631 if (gimple_debug_bind_get_value (stmt) != def)
6632 count++;
6633
6634 if (count > 1)
6635 BREAK_FROM_IMM_USE_STMT (imm_iter);
6636 }
6637
6638 if (!count)
6639 continue;
6640
6641 struct iv_use dummy_use;
6642 struct iv_cand *best_cand = NULL, *cand;
6643 unsigned i, best_pref = 0, cand_pref;
6644
6645 memset (&dummy_use, 0, sizeof (dummy_use));
6646 dummy_use.iv = info->iv;
6647 for (i = 0; i < n_iv_uses (data) && i < 64; i++)
6648 {
6649 cand = iv_use (data, i)->selected;
6650 if (cand == best_cand)
6651 continue;
6652 cand_pref = operand_equal_p (cand->iv->step,
6653 info->iv->step, 0)
6654 ? 4 : 0;
6655 cand_pref
6656 += TYPE_MODE (TREE_TYPE (cand->iv->base))
6657 == TYPE_MODE (TREE_TYPE (info->iv->base))
6658 ? 2 : 0;
6659 cand_pref
6660 += TREE_CODE (cand->iv->base) == INTEGER_CST
6661 ? 1 : 0;
6662 if (best_cand == NULL || best_pref < cand_pref)
6663 {
6664 best_cand = cand;
6665 best_pref = cand_pref;
6666 }
6667 }
6668
6669 if (!best_cand)
6670 continue;
6671
6672 tree comp = get_computation_at (data->current_loop,
6673 &dummy_use, best_cand,
6674 SSA_NAME_DEF_STMT (def));
6675 if (!comp)
6676 continue;
6677
6678 if (count > 1)
6679 {
6680 tree vexpr = make_node (DEBUG_EXPR_DECL);
6681 DECL_ARTIFICIAL (vexpr) = 1;
6682 TREE_TYPE (vexpr) = TREE_TYPE (comp);
6683 if (SSA_NAME_VAR (def))
6684 DECL_MODE (vexpr) = DECL_MODE (SSA_NAME_VAR (def));
6685 else
6686 DECL_MODE (vexpr) = TYPE_MODE (TREE_TYPE (vexpr));
6687 gimple def_temp = gimple_build_debug_bind (vexpr, comp, NULL);
6688 gimple_stmt_iterator gsi;
6689
6690 if (gimple_code (SSA_NAME_DEF_STMT (def)) == GIMPLE_PHI)
6691 gsi = gsi_after_labels (gimple_bb
6692 (SSA_NAME_DEF_STMT (def)));
6693 else
6694 gsi = gsi_for_stmt (SSA_NAME_DEF_STMT (def));
6695
6696 gsi_insert_before (&gsi, def_temp, GSI_SAME_STMT);
6697 comp = vexpr;
6698 }
6699
6700 FOR_EACH_IMM_USE_STMT (stmt, imm_iter, def)
6701 {
6702 if (!gimple_debug_bind_p (stmt))
6703 continue;
6704
6705 FOR_EACH_IMM_USE_ON_STMT (use_p, imm_iter)
6706 SET_USE (use_p, comp);
6707
6708 update_stmt (stmt);
6709 }
6710 }
6711 }
6712 }
6713
6714 release_defs_bitset (toremove);
6715
6716 BITMAP_FREE (toremove);
6717 }
6718
6719 /* Frees memory occupied by struct tree_niter_desc in *VALUE. Callback
6720 for hash_map::traverse. */
6721
6722 bool
6723 free_tree_niter_desc (edge const &, tree_niter_desc *const &value, void *)
6724 {
6725 free (value);
6726 return true;
6727 }
6728
6729 /* Frees data allocated by the optimization of a single loop. */
6730
6731 static void
6732 free_loop_data (struct ivopts_data *data)
6733 {
6734 unsigned i, j;
6735 bitmap_iterator bi;
6736 tree obj;
6737
6738 if (data->niters)
6739 {
6740 data->niters->traverse<void *, free_tree_niter_desc> (NULL);
6741 delete data->niters;
6742 data->niters = NULL;
6743 }
6744
6745 EXECUTE_IF_SET_IN_BITMAP (data->relevant, 0, i, bi)
6746 {
6747 struct version_info *info;
6748
6749 info = ver_info (data, i);
6750 free (info->iv);
6751 info->iv = NULL;
6752 info->has_nonlin_use = false;
6753 info->preserve_biv = false;
6754 info->inv_id = 0;
6755 }
6756 bitmap_clear (data->relevant);
6757 bitmap_clear (data->important_candidates);
6758
6759 for (i = 0; i < n_iv_uses (data); i++)
6760 {
6761 struct iv_use *use = iv_use (data, i);
6762
6763 free (use->iv);
6764 BITMAP_FREE (use->related_cands);
6765 for (j = 0; j < use->n_map_members; j++)
6766 if (use->cost_map[j].depends_on)
6767 BITMAP_FREE (use->cost_map[j].depends_on);
6768 free (use->cost_map);
6769 free (use);
6770 }
6771 data->iv_uses.truncate (0);
6772
6773 for (i = 0; i < n_iv_cands (data); i++)
6774 {
6775 struct iv_cand *cand = iv_cand (data, i);
6776
6777 free (cand->iv);
6778 if (cand->depends_on)
6779 BITMAP_FREE (cand->depends_on);
6780 free (cand);
6781 }
6782 data->iv_candidates.truncate (0);
6783
6784 if (data->version_info_size < num_ssa_names)
6785 {
6786 data->version_info_size = 2 * num_ssa_names;
6787 free (data->version_info);
6788 data->version_info = XCNEWVEC (struct version_info, data->version_info_size);
6789 }
6790
6791 data->max_inv_id = 0;
6792
6793 FOR_EACH_VEC_ELT (decl_rtl_to_reset, i, obj)
6794 SET_DECL_RTL (obj, NULL_RTX);
6795
6796 decl_rtl_to_reset.truncate (0);
6797
6798 data->inv_expr_tab->empty ();
6799 data->inv_expr_id = 0;
6800 }
6801
6802 /* Finalizes data structures used by the iv optimization pass. LOOPS is the
6803 loop tree. */
6804
6805 static void
6806 tree_ssa_iv_optimize_finalize (struct ivopts_data *data)
6807 {
6808 free_loop_data (data);
6809 free (data->version_info);
6810 BITMAP_FREE (data->relevant);
6811 BITMAP_FREE (data->important_candidates);
6812
6813 decl_rtl_to_reset.release ();
6814 data->iv_uses.release ();
6815 data->iv_candidates.release ();
6816 delete data->inv_expr_tab;
6817 data->inv_expr_tab = NULL;
6818 }
6819
6820 /* Returns true if the loop body BODY includes any function calls. */
6821
6822 static bool
6823 loop_body_includes_call (basic_block *body, unsigned num_nodes)
6824 {
6825 gimple_stmt_iterator gsi;
6826 unsigned i;
6827
6828 for (i = 0; i < num_nodes; i++)
6829 for (gsi = gsi_start_bb (body[i]); !gsi_end_p (gsi); gsi_next (&gsi))
6830 {
6831 gimple stmt = gsi_stmt (gsi);
6832 if (is_gimple_call (stmt)
6833 && !is_inexpensive_builtin (gimple_call_fndecl (stmt)))
6834 return true;
6835 }
6836 return false;
6837 }
6838
6839 /* Optimizes the LOOP. Returns true if anything changed. */
6840
6841 static bool
6842 tree_ssa_iv_optimize_loop (struct ivopts_data *data, struct loop *loop)
6843 {
6844 bool changed = false;
6845 struct iv_ca *iv_ca;
6846 edge exit = single_dom_exit (loop);
6847 basic_block *body;
6848
6849 gcc_assert (!data->niters);
6850 data->current_loop = loop;
6851 data->speed = optimize_loop_for_speed_p (loop);
6852
6853 if (dump_file && (dump_flags & TDF_DETAILS))
6854 {
6855 fprintf (dump_file, "Processing loop %d\n", loop->num);
6856
6857 if (exit)
6858 {
6859 fprintf (dump_file, " single exit %d -> %d, exit condition ",
6860 exit->src->index, exit->dest->index);
6861 print_gimple_stmt (dump_file, last_stmt (exit->src), 0, TDF_SLIM);
6862 fprintf (dump_file, "\n");
6863 }
6864
6865 fprintf (dump_file, "\n");
6866 }
6867
6868 body = get_loop_body (loop);
6869 data->body_includes_call = loop_body_includes_call (body, loop->num_nodes);
6870 renumber_gimple_stmt_uids_in_blocks (body, loop->num_nodes);
6871 free (body);
6872
6873 data->loop_single_exit_p = exit != NULL && loop_only_exit_p (loop, exit);
6874
6875 /* For each ssa name determines whether it behaves as an induction variable
6876 in some loop. */
6877 if (!find_induction_variables (data))
6878 goto finish;
6879
6880 /* Finds interesting uses (item 1). */
6881 find_interesting_uses (data);
6882 if (n_iv_uses (data) > MAX_CONSIDERED_USES)
6883 goto finish;
6884
6885 /* Finds candidates for the induction variables (item 2). */
6886 find_iv_candidates (data);
6887
6888 /* Calculates the costs (item 3, part 1). */
6889 determine_iv_costs (data);
6890 determine_use_iv_costs (data);
6891 determine_set_costs (data);
6892
6893 /* Find the optimal set of induction variables (item 3, part 2). */
6894 iv_ca = find_optimal_iv_set (data);
6895 if (!iv_ca)
6896 goto finish;
6897 changed = true;
6898
6899 /* Create the new induction variables (item 4, part 1). */
6900 create_new_ivs (data, iv_ca);
6901 iv_ca_free (&iv_ca);
6902
6903 /* Rewrite the uses (item 4, part 2). */
6904 rewrite_uses (data);
6905
6906 /* Remove the ivs that are unused after rewriting. */
6907 remove_unused_ivs (data);
6908
6909 /* We have changed the structure of induction variables; it might happen
6910 that definitions in the scev database refer to some of them that were
6911 eliminated. */
6912 scev_reset ();
6913
6914 finish:
6915 free_loop_data (data);
6916
6917 return changed;
6918 }
6919
6920 /* Main entry point. Optimizes induction variables in loops. */
6921
6922 void
6923 tree_ssa_iv_optimize (void)
6924 {
6925 struct loop *loop;
6926 struct ivopts_data data;
6927
6928 tree_ssa_iv_optimize_init (&data);
6929
6930 /* Optimize the loops starting with the innermost ones. */
6931 FOR_EACH_LOOP (loop, LI_FROM_INNERMOST)
6932 {
6933 if (dump_file && (dump_flags & TDF_DETAILS))
6934 flow_loop_dump (loop, dump_file, NULL, 1);
6935
6936 tree_ssa_iv_optimize_loop (&data, loop);
6937 }
6938
6939 tree_ssa_iv_optimize_finalize (&data);
6940 }